1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_submission.h" 31 32 #include "i915_drv.h" 33 #include "i915_trace.h" 34 35 /** 36 * DOC: GuC-based command submission 37 * 38 * The Scratch registers: 39 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 40 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 41 * triggers an interrupt on the GuC via another register write (0xC4C8). 42 * Firmware writes a success/fail code back to the action register after 43 * processes the request. The kernel driver polls waiting for this update and 44 * then proceeds. 45 * 46 * Command Transport buffers (CTBs): 47 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 48 * - G2H) are a message interface between the i915 and GuC. 49 * 50 * Context registration: 51 * Before a context can be submitted it must be registered with the GuC via a 52 * H2G. A unique guc_id is associated with each context. The context is either 53 * registered at request creation time (normal operation) or at submission time 54 * (abnormal operation, e.g. after a reset). 55 * 56 * Context submission: 57 * The i915 updates the LRC tail value in memory. The i915 must enable the 58 * scheduling of the context within the GuC for the GuC to actually consider it. 59 * Therefore, the first time a disabled context is submitted we use a schedule 60 * enable H2G, while follow up submissions are done via the context submit H2G, 61 * which informs the GuC that a previously enabled context has new work 62 * available. 63 * 64 * Context unpin: 65 * To unpin a context a H2G is used to disable scheduling. When the 66 * corresponding G2H returns indicating the scheduling disable operation has 67 * completed it is safe to unpin the context. While a disable is in flight it 68 * isn't safe to resubmit the context so a fence is used to stall all future 69 * requests of that context until the G2H is returned. Because this interaction 70 * with the GuC takes a non-zero amount of time we delay the disabling of 71 * scheduling after the pin count goes to zero by a configurable period of time 72 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 73 * time to resubmit something on the context before doing this costly operation. 74 * This delay is only done if the context isn't closed and the guc_id usage is 75 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 76 * 77 * Context deregistration: 78 * Before a context can be destroyed or if we steal its guc_id we must 79 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 80 * safe to submit anything to this guc_id until the deregister completes so a 81 * fence is used to stall all requests associated with this guc_id until the 82 * corresponding G2H returns indicating the guc_id has been deregistered. 83 * 84 * submission_state.guc_ids: 85 * Unique number associated with private GuC context data passed in during 86 * context registration / submission / deregistration. 64k available. Simple ida 87 * is used for allocation. 88 * 89 * Stealing guc_ids: 90 * If no guc_ids are available they can be stolen from another context at 91 * request creation time if that context is unpinned. If a guc_id can't be found 92 * we punt this problem to the user as we believe this is near impossible to hit 93 * during normal use cases. 94 * 95 * Locking: 96 * In the GuC submission code we have 3 basic spin locks which protect 97 * everything. Details about each below. 98 * 99 * sched_engine->lock 100 * This is the submission lock for all contexts that share an i915 schedule 101 * engine (sched_engine), thus only one of the contexts which share a 102 * sched_engine can be submitting at a time. Currently only one sched_engine is 103 * used for all of GuC submission but that could change in the future. 104 * 105 * guc->submission_state.lock 106 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 107 * list. 108 * 109 * ce->guc_state.lock 110 * Protects everything under ce->guc_state. Ensures that a context is in the 111 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 112 * on a disabled context (bad idea), we don't issue a schedule enable when a 113 * schedule disable is in flight, etc... Also protects list of inflight requests 114 * on the context and the priority management state. Lock is individual to each 115 * context. 116 * 117 * Lock ordering rules: 118 * sched_engine->lock -> ce->guc_state.lock 119 * guc->submission_state.lock -> ce->guc_state.lock 120 * 121 * Reset races: 122 * When a full GT reset is triggered it is assumed that some G2H responses to 123 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 124 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 125 * contexts, release guc_ids, etc...). When this occurs we can scrub the 126 * context state and cleanup appropriately, however this is quite racey. 127 * To avoid races, the reset code must disable submission before scrubbing for 128 * the missing G2H, while the submission code must check for submission being 129 * disabled and skip sending H2Gs and updating context states when it is. Both 130 * sides must also make sure to hold the relevant locks. 131 */ 132 133 /* GuC Virtual Engine */ 134 struct guc_virtual_engine { 135 struct intel_engine_cs base; 136 struct intel_context context; 137 }; 138 139 static struct intel_context * 140 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 141 unsigned long flags); 142 143 static struct intel_context * 144 guc_create_parallel(struct intel_engine_cs **engines, 145 unsigned int num_siblings, 146 unsigned int width); 147 148 #define GUC_REQUEST_SIZE 64 /* bytes */ 149 150 /* 151 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 152 * per the GuC submission interface. A different allocation algorithm is used 153 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 154 * partition the guc_id space. We believe the number of multi-lrc contexts in 155 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 156 * multi-lrc. 157 */ 158 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 159 ((guc)->submission_state.num_guc_ids / 16) 160 161 /* 162 * Below is a set of functions which control the GuC scheduling state which 163 * require a lock. 164 */ 165 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 166 #define SCHED_STATE_DESTROYED BIT(1) 167 #define SCHED_STATE_PENDING_DISABLE BIT(2) 168 #define SCHED_STATE_BANNED BIT(3) 169 #define SCHED_STATE_ENABLED BIT(4) 170 #define SCHED_STATE_PENDING_ENABLE BIT(5) 171 #define SCHED_STATE_REGISTERED BIT(6) 172 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 173 #define SCHED_STATE_CLOSED BIT(8) 174 #define SCHED_STATE_BLOCKED_SHIFT 9 175 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 176 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 177 178 static inline void init_sched_state(struct intel_context *ce) 179 { 180 lockdep_assert_held(&ce->guc_state.lock); 181 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 182 } 183 184 /* 185 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 186 * A context close can race with the submission path, so SCHED_STATE_CLOSED 187 * can be set immediately before we try to register. 188 */ 189 #define SCHED_STATE_VALID_INIT \ 190 (SCHED_STATE_BLOCKED_MASK | \ 191 SCHED_STATE_CLOSED | \ 192 SCHED_STATE_REGISTERED) 193 194 __maybe_unused 195 static bool sched_state_is_init(struct intel_context *ce) 196 { 197 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 198 } 199 200 static inline bool 201 context_wait_for_deregister_to_register(struct intel_context *ce) 202 { 203 return ce->guc_state.sched_state & 204 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 205 } 206 207 static inline void 208 set_context_wait_for_deregister_to_register(struct intel_context *ce) 209 { 210 lockdep_assert_held(&ce->guc_state.lock); 211 ce->guc_state.sched_state |= 212 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 213 } 214 215 static inline void 216 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 217 { 218 lockdep_assert_held(&ce->guc_state.lock); 219 ce->guc_state.sched_state &= 220 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 221 } 222 223 static inline bool 224 context_destroyed(struct intel_context *ce) 225 { 226 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 227 } 228 229 static inline void 230 set_context_destroyed(struct intel_context *ce) 231 { 232 lockdep_assert_held(&ce->guc_state.lock); 233 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 234 } 235 236 static inline bool context_pending_disable(struct intel_context *ce) 237 { 238 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 239 } 240 241 static inline void set_context_pending_disable(struct intel_context *ce) 242 { 243 lockdep_assert_held(&ce->guc_state.lock); 244 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 245 } 246 247 static inline void clr_context_pending_disable(struct intel_context *ce) 248 { 249 lockdep_assert_held(&ce->guc_state.lock); 250 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 251 } 252 253 static inline bool context_banned(struct intel_context *ce) 254 { 255 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 256 } 257 258 static inline void set_context_banned(struct intel_context *ce) 259 { 260 lockdep_assert_held(&ce->guc_state.lock); 261 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 262 } 263 264 static inline void clr_context_banned(struct intel_context *ce) 265 { 266 lockdep_assert_held(&ce->guc_state.lock); 267 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 268 } 269 270 static inline bool context_enabled(struct intel_context *ce) 271 { 272 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 273 } 274 275 static inline void set_context_enabled(struct intel_context *ce) 276 { 277 lockdep_assert_held(&ce->guc_state.lock); 278 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 279 } 280 281 static inline void clr_context_enabled(struct intel_context *ce) 282 { 283 lockdep_assert_held(&ce->guc_state.lock); 284 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 285 } 286 287 static inline bool context_pending_enable(struct intel_context *ce) 288 { 289 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 290 } 291 292 static inline void set_context_pending_enable(struct intel_context *ce) 293 { 294 lockdep_assert_held(&ce->guc_state.lock); 295 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 296 } 297 298 static inline void clr_context_pending_enable(struct intel_context *ce) 299 { 300 lockdep_assert_held(&ce->guc_state.lock); 301 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 302 } 303 304 static inline bool context_registered(struct intel_context *ce) 305 { 306 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 307 } 308 309 static inline void set_context_registered(struct intel_context *ce) 310 { 311 lockdep_assert_held(&ce->guc_state.lock); 312 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 313 } 314 315 static inline void clr_context_registered(struct intel_context *ce) 316 { 317 lockdep_assert_held(&ce->guc_state.lock); 318 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 319 } 320 321 static inline bool context_policy_required(struct intel_context *ce) 322 { 323 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 324 } 325 326 static inline void set_context_policy_required(struct intel_context *ce) 327 { 328 lockdep_assert_held(&ce->guc_state.lock); 329 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 330 } 331 332 static inline void clr_context_policy_required(struct intel_context *ce) 333 { 334 lockdep_assert_held(&ce->guc_state.lock); 335 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 336 } 337 338 static inline bool context_close_done(struct intel_context *ce) 339 { 340 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 341 } 342 343 static inline void set_context_close_done(struct intel_context *ce) 344 { 345 lockdep_assert_held(&ce->guc_state.lock); 346 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 347 } 348 349 static inline u32 context_blocked(struct intel_context *ce) 350 { 351 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 352 SCHED_STATE_BLOCKED_SHIFT; 353 } 354 355 static inline void incr_context_blocked(struct intel_context *ce) 356 { 357 lockdep_assert_held(&ce->guc_state.lock); 358 359 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 360 361 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 362 } 363 364 static inline void decr_context_blocked(struct intel_context *ce) 365 { 366 lockdep_assert_held(&ce->guc_state.lock); 367 368 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 369 370 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 371 } 372 373 static struct intel_context * 374 request_to_scheduling_context(struct i915_request *rq) 375 { 376 return intel_context_to_parent(rq->context); 377 } 378 379 static inline bool context_guc_id_invalid(struct intel_context *ce) 380 { 381 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 382 } 383 384 static inline void set_context_guc_id_invalid(struct intel_context *ce) 385 { 386 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 387 } 388 389 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 390 { 391 return &ce->engine->gt->uc.guc; 392 } 393 394 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 395 { 396 return rb_entry(rb, struct i915_priolist, node); 397 } 398 399 /* 400 * When using multi-lrc submission a scratch memory area is reserved in the 401 * parent's context state for the process descriptor, work queue, and handshake 402 * between the parent + children contexts to insert safe preemption points 403 * between each of the BBs. Currently the scratch area is sized to a page. 404 * 405 * The layout of this scratch area is below: 406 * 0 guc_process_desc 407 * + sizeof(struct guc_process_desc) child go 408 * + CACHELINE_BYTES child join[0] 409 * ... 410 * + CACHELINE_BYTES child join[n - 1] 411 * ... unused 412 * PARENT_SCRATCH_SIZE / 2 work queue start 413 * ... work queue 414 * PARENT_SCRATCH_SIZE - 1 work queue end 415 */ 416 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 417 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 418 419 struct sync_semaphore { 420 u32 semaphore; 421 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 422 }; 423 424 struct parent_scratch { 425 union guc_descs { 426 struct guc_sched_wq_desc wq_desc; 427 struct guc_process_desc_v69 pdesc; 428 } descs; 429 430 struct sync_semaphore go; 431 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 432 433 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 434 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 435 436 u32 wq[WQ_SIZE / sizeof(u32)]; 437 }; 438 439 static u32 __get_parent_scratch_offset(struct intel_context *ce) 440 { 441 GEM_BUG_ON(!ce->parallel.guc.parent_page); 442 443 return ce->parallel.guc.parent_page * PAGE_SIZE; 444 } 445 446 static u32 __get_wq_offset(struct intel_context *ce) 447 { 448 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 449 450 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 451 } 452 453 static struct parent_scratch * 454 __get_parent_scratch(struct intel_context *ce) 455 { 456 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 457 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 458 459 /* 460 * Need to subtract LRC_STATE_OFFSET here as the 461 * parallel.guc.parent_page is the offset into ce->state while 462 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 463 */ 464 return (struct parent_scratch *) 465 (ce->lrc_reg_state + 466 ((__get_parent_scratch_offset(ce) - 467 LRC_STATE_OFFSET) / sizeof(u32))); 468 } 469 470 static struct guc_process_desc_v69 * 471 __get_process_desc_v69(struct intel_context *ce) 472 { 473 struct parent_scratch *ps = __get_parent_scratch(ce); 474 475 return &ps->descs.pdesc; 476 } 477 478 static struct guc_sched_wq_desc * 479 __get_wq_desc_v70(struct intel_context *ce) 480 { 481 struct parent_scratch *ps = __get_parent_scratch(ce); 482 483 return &ps->descs.wq_desc; 484 } 485 486 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 487 { 488 /* 489 * Check for space in work queue. Caching a value of head pointer in 490 * intel_context structure in order reduce the number accesses to shared 491 * GPU memory which may be across a PCIe bus. 492 */ 493 #define AVAILABLE_SPACE \ 494 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 495 if (wqi_size > AVAILABLE_SPACE) { 496 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 497 498 if (wqi_size > AVAILABLE_SPACE) 499 return NULL; 500 } 501 #undef AVAILABLE_SPACE 502 503 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 504 } 505 506 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 507 { 508 struct intel_context *ce = xa_load(&guc->context_lookup, id); 509 510 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 511 512 return ce; 513 } 514 515 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 516 { 517 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 518 519 if (!base) 520 return NULL; 521 522 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 523 524 return &base[index]; 525 } 526 527 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 528 { 529 u32 size; 530 int ret; 531 532 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 533 GUC_MAX_CONTEXT_ID); 534 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 535 (void **)&guc->lrc_desc_pool_vaddr_v69); 536 if (ret) 537 return ret; 538 539 return 0; 540 } 541 542 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 543 { 544 if (!guc->lrc_desc_pool_vaddr_v69) 545 return; 546 547 guc->lrc_desc_pool_vaddr_v69 = NULL; 548 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 549 } 550 551 static inline bool guc_submission_initialized(struct intel_guc *guc) 552 { 553 return guc->submission_initialized; 554 } 555 556 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 557 { 558 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 559 560 if (desc) 561 memset(desc, 0, sizeof(*desc)); 562 } 563 564 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 565 { 566 return __get_context(guc, id); 567 } 568 569 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 570 struct intel_context *ce) 571 { 572 unsigned long flags; 573 574 /* 575 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 576 * lower level functions directly. 577 */ 578 xa_lock_irqsave(&guc->context_lookup, flags); 579 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 580 xa_unlock_irqrestore(&guc->context_lookup, flags); 581 } 582 583 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 584 { 585 unsigned long flags; 586 587 if (unlikely(!guc_submission_initialized(guc))) 588 return; 589 590 _reset_lrc_desc_v69(guc, id); 591 592 /* 593 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 594 * the lower level functions directly. 595 */ 596 xa_lock_irqsave(&guc->context_lookup, flags); 597 __xa_erase(&guc->context_lookup, id); 598 xa_unlock_irqrestore(&guc->context_lookup, flags); 599 } 600 601 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 602 { 603 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 604 wake_up_all(&guc->ct.wq); 605 } 606 607 static int guc_submission_send_busy_loop(struct intel_guc *guc, 608 const u32 *action, 609 u32 len, 610 u32 g2h_len_dw, 611 bool loop) 612 { 613 /* 614 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 615 * so we don't handle the case where we don't get a reply because we 616 * aborted the send due to the channel being busy. 617 */ 618 GEM_BUG_ON(g2h_len_dw && !loop); 619 620 if (g2h_len_dw) 621 atomic_inc(&guc->outstanding_submission_g2h); 622 623 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 624 } 625 626 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 627 atomic_t *wait_var, 628 bool interruptible, 629 long timeout) 630 { 631 const int state = interruptible ? 632 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 633 DEFINE_WAIT(wait); 634 635 might_sleep(); 636 GEM_BUG_ON(timeout < 0); 637 638 if (!atomic_read(wait_var)) 639 return 0; 640 641 if (!timeout) 642 return -ETIME; 643 644 for (;;) { 645 prepare_to_wait(&guc->ct.wq, &wait, state); 646 647 if (!atomic_read(wait_var)) 648 break; 649 650 if (signal_pending_state(state, current)) { 651 timeout = -EINTR; 652 break; 653 } 654 655 if (!timeout) { 656 timeout = -ETIME; 657 break; 658 } 659 660 timeout = io_schedule_timeout(timeout); 661 } 662 finish_wait(&guc->ct.wq, &wait); 663 664 return (timeout < 0) ? timeout : 0; 665 } 666 667 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 668 { 669 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 670 return 0; 671 672 return intel_guc_wait_for_pending_msg(guc, 673 &guc->outstanding_submission_g2h, 674 true, timeout); 675 } 676 677 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 678 static int try_context_registration(struct intel_context *ce, bool loop); 679 680 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 681 { 682 int err = 0; 683 struct intel_context *ce = request_to_scheduling_context(rq); 684 u32 action[3]; 685 int len = 0; 686 u32 g2h_len_dw = 0; 687 bool enabled; 688 689 lockdep_assert_held(&rq->engine->sched_engine->lock); 690 691 /* 692 * Corner case where requests were sitting in the priority list or a 693 * request resubmitted after the context was banned. 694 */ 695 if (unlikely(!intel_context_is_schedulable(ce))) { 696 i915_request_put(i915_request_mark_eio(rq)); 697 intel_engine_signal_breadcrumbs(ce->engine); 698 return 0; 699 } 700 701 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 702 GEM_BUG_ON(context_guc_id_invalid(ce)); 703 704 if (context_policy_required(ce)) { 705 err = guc_context_policy_init_v70(ce, false); 706 if (err) 707 return err; 708 } 709 710 spin_lock(&ce->guc_state.lock); 711 712 /* 713 * The request / context will be run on the hardware when scheduling 714 * gets enabled in the unblock. For multi-lrc we still submit the 715 * context to move the LRC tails. 716 */ 717 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 718 goto out; 719 720 enabled = context_enabled(ce) || context_blocked(ce); 721 722 if (!enabled) { 723 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 724 action[len++] = ce->guc_id.id; 725 action[len++] = GUC_CONTEXT_ENABLE; 726 set_context_pending_enable(ce); 727 intel_context_get(ce); 728 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 729 } else { 730 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 731 action[len++] = ce->guc_id.id; 732 } 733 734 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 735 if (!enabled && !err) { 736 trace_intel_context_sched_enable(ce); 737 atomic_inc(&guc->outstanding_submission_g2h); 738 set_context_enabled(ce); 739 740 /* 741 * Without multi-lrc KMD does the submission step (moving the 742 * lrc tail) so enabling scheduling is sufficient to submit the 743 * context. This isn't the case in multi-lrc submission as the 744 * GuC needs to move the tails, hence the need for another H2G 745 * to submit a multi-lrc context after enabling scheduling. 746 */ 747 if (intel_context_is_parent(ce)) { 748 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 749 err = intel_guc_send_nb(guc, action, len - 1, 0); 750 } 751 } else if (!enabled) { 752 clr_context_pending_enable(ce); 753 intel_context_put(ce); 754 } 755 if (likely(!err)) 756 trace_i915_request_guc_submit(rq); 757 758 out: 759 spin_unlock(&ce->guc_state.lock); 760 return err; 761 } 762 763 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 764 { 765 int ret = __guc_add_request(guc, rq); 766 767 if (unlikely(ret == -EBUSY)) { 768 guc->stalled_request = rq; 769 guc->submission_stall_reason = STALL_ADD_REQUEST; 770 } 771 772 return ret; 773 } 774 775 static inline void guc_set_lrc_tail(struct i915_request *rq) 776 { 777 rq->context->lrc_reg_state[CTX_RING_TAIL] = 778 intel_ring_set_tail(rq->ring, rq->tail); 779 } 780 781 static inline int rq_prio(const struct i915_request *rq) 782 { 783 return rq->sched.attr.priority; 784 } 785 786 static bool is_multi_lrc_rq(struct i915_request *rq) 787 { 788 return intel_context_is_parallel(rq->context); 789 } 790 791 static bool can_merge_rq(struct i915_request *rq, 792 struct i915_request *last) 793 { 794 return request_to_scheduling_context(rq) == 795 request_to_scheduling_context(last); 796 } 797 798 static u32 wq_space_until_wrap(struct intel_context *ce) 799 { 800 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 801 } 802 803 static void write_wqi(struct intel_context *ce, u32 wqi_size) 804 { 805 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 806 807 /* 808 * Ensure WQI are visible before updating tail 809 */ 810 intel_guc_write_barrier(ce_to_guc(ce)); 811 812 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 813 (WQ_SIZE - 1); 814 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 815 } 816 817 static int guc_wq_noop_append(struct intel_context *ce) 818 { 819 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 820 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 821 822 if (!wqi) 823 return -EBUSY; 824 825 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 826 827 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 828 FIELD_PREP(WQ_LEN_MASK, len_dw); 829 ce->parallel.guc.wqi_tail = 0; 830 831 return 0; 832 } 833 834 static int __guc_wq_item_append(struct i915_request *rq) 835 { 836 struct intel_context *ce = request_to_scheduling_context(rq); 837 struct intel_context *child; 838 unsigned int wqi_size = (ce->parallel.number_children + 4) * 839 sizeof(u32); 840 u32 *wqi; 841 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 842 int ret; 843 844 /* Ensure context is in correct state updating work queue */ 845 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 846 GEM_BUG_ON(context_guc_id_invalid(ce)); 847 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 848 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 849 850 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 851 if (wqi_size > wq_space_until_wrap(ce)) { 852 ret = guc_wq_noop_append(ce); 853 if (ret) 854 return ret; 855 } 856 857 wqi = get_wq_pointer(ce, wqi_size); 858 if (!wqi) 859 return -EBUSY; 860 861 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 862 863 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 864 FIELD_PREP(WQ_LEN_MASK, len_dw); 865 *wqi++ = ce->lrc.lrca; 866 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 867 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 868 *wqi++ = 0; /* fence_id */ 869 for_each_child(ce, child) 870 *wqi++ = child->ring->tail / sizeof(u64); 871 872 write_wqi(ce, wqi_size); 873 874 return 0; 875 } 876 877 static int guc_wq_item_append(struct intel_guc *guc, 878 struct i915_request *rq) 879 { 880 struct intel_context *ce = request_to_scheduling_context(rq); 881 int ret; 882 883 if (unlikely(!intel_context_is_schedulable(ce))) 884 return 0; 885 886 ret = __guc_wq_item_append(rq); 887 if (unlikely(ret == -EBUSY)) { 888 guc->stalled_request = rq; 889 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 890 } 891 892 return ret; 893 } 894 895 static bool multi_lrc_submit(struct i915_request *rq) 896 { 897 struct intel_context *ce = request_to_scheduling_context(rq); 898 899 intel_ring_set_tail(rq->ring, rq->tail); 900 901 /* 902 * We expect the front end (execbuf IOCTL) to set this flag on the last 903 * request generated from a multi-BB submission. This indicates to the 904 * backend (GuC interface) that we should submit this context thus 905 * submitting all the requests generated in parallel. 906 */ 907 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 908 !intel_context_is_schedulable(ce); 909 } 910 911 static int guc_dequeue_one_context(struct intel_guc *guc) 912 { 913 struct i915_sched_engine * const sched_engine = guc->sched_engine; 914 struct i915_request *last = NULL; 915 bool submit = false; 916 struct rb_node *rb; 917 int ret; 918 919 lockdep_assert_held(&sched_engine->lock); 920 921 if (guc->stalled_request) { 922 submit = true; 923 last = guc->stalled_request; 924 925 switch (guc->submission_stall_reason) { 926 case STALL_REGISTER_CONTEXT: 927 goto register_context; 928 case STALL_MOVE_LRC_TAIL: 929 goto move_lrc_tail; 930 case STALL_ADD_REQUEST: 931 goto add_request; 932 default: 933 MISSING_CASE(guc->submission_stall_reason); 934 } 935 } 936 937 while ((rb = rb_first_cached(&sched_engine->queue))) { 938 struct i915_priolist *p = to_priolist(rb); 939 struct i915_request *rq, *rn; 940 941 priolist_for_each_request_consume(rq, rn, p) { 942 if (last && !can_merge_rq(rq, last)) 943 goto register_context; 944 945 list_del_init(&rq->sched.link); 946 947 __i915_request_submit(rq); 948 949 trace_i915_request_in(rq, 0); 950 last = rq; 951 952 if (is_multi_lrc_rq(rq)) { 953 /* 954 * We need to coalesce all multi-lrc requests in 955 * a relationship into a single H2G. We are 956 * guaranteed that all of these requests will be 957 * submitted sequentially. 958 */ 959 if (multi_lrc_submit(rq)) { 960 submit = true; 961 goto register_context; 962 } 963 } else { 964 submit = true; 965 } 966 } 967 968 rb_erase_cached(&p->node, &sched_engine->queue); 969 i915_priolist_free(p); 970 } 971 972 register_context: 973 if (submit) { 974 struct intel_context *ce = request_to_scheduling_context(last); 975 976 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 977 intel_context_is_schedulable(ce))) { 978 ret = try_context_registration(ce, false); 979 if (unlikely(ret == -EPIPE)) { 980 goto deadlk; 981 } else if (ret == -EBUSY) { 982 guc->stalled_request = last; 983 guc->submission_stall_reason = 984 STALL_REGISTER_CONTEXT; 985 goto schedule_tasklet; 986 } else if (ret != 0) { 987 GEM_WARN_ON(ret); /* Unexpected */ 988 goto deadlk; 989 } 990 } 991 992 move_lrc_tail: 993 if (is_multi_lrc_rq(last)) { 994 ret = guc_wq_item_append(guc, last); 995 if (ret == -EBUSY) { 996 goto schedule_tasklet; 997 } else if (ret != 0) { 998 GEM_WARN_ON(ret); /* Unexpected */ 999 goto deadlk; 1000 } 1001 } else { 1002 guc_set_lrc_tail(last); 1003 } 1004 1005 add_request: 1006 ret = guc_add_request(guc, last); 1007 if (unlikely(ret == -EPIPE)) { 1008 goto deadlk; 1009 } else if (ret == -EBUSY) { 1010 goto schedule_tasklet; 1011 } else if (ret != 0) { 1012 GEM_WARN_ON(ret); /* Unexpected */ 1013 goto deadlk; 1014 } 1015 } 1016 1017 guc->stalled_request = NULL; 1018 guc->submission_stall_reason = STALL_NONE; 1019 return submit; 1020 1021 deadlk: 1022 sched_engine->tasklet.callback = NULL; 1023 tasklet_disable_nosync(&sched_engine->tasklet); 1024 return false; 1025 1026 schedule_tasklet: 1027 tasklet_schedule(&sched_engine->tasklet); 1028 return false; 1029 } 1030 1031 static void guc_submission_tasklet(struct tasklet_struct *t) 1032 { 1033 struct i915_sched_engine *sched_engine = 1034 from_tasklet(sched_engine, t, tasklet); 1035 unsigned long flags; 1036 bool loop; 1037 1038 spin_lock_irqsave(&sched_engine->lock, flags); 1039 1040 do { 1041 loop = guc_dequeue_one_context(sched_engine->private_data); 1042 } while (loop); 1043 1044 i915_sched_engine_reset_on_empty(sched_engine); 1045 1046 spin_unlock_irqrestore(&sched_engine->lock, flags); 1047 } 1048 1049 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1050 { 1051 if (iir & GT_RENDER_USER_INTERRUPT) 1052 intel_engine_signal_breadcrumbs(engine); 1053 } 1054 1055 static void __guc_context_destroy(struct intel_context *ce); 1056 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1057 static void guc_signal_context_fence(struct intel_context *ce); 1058 static void guc_cancel_context_requests(struct intel_context *ce); 1059 static void guc_blocked_fence_complete(struct intel_context *ce); 1060 1061 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1062 { 1063 struct intel_context *ce; 1064 unsigned long index, flags; 1065 bool pending_disable, pending_enable, deregister, destroyed, banned; 1066 1067 xa_lock_irqsave(&guc->context_lookup, flags); 1068 xa_for_each(&guc->context_lookup, index, ce) { 1069 /* 1070 * Corner case where the ref count on the object is zero but and 1071 * deregister G2H was lost. In this case we don't touch the ref 1072 * count and finish the destroy of the context. 1073 */ 1074 bool do_put = kref_get_unless_zero(&ce->ref); 1075 1076 xa_unlock(&guc->context_lookup); 1077 1078 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1079 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1080 /* successful cancel so jump straight to close it */ 1081 intel_context_sched_disable_unpin(ce); 1082 } 1083 1084 spin_lock(&ce->guc_state.lock); 1085 1086 /* 1087 * Once we are at this point submission_disabled() is guaranteed 1088 * to be visible to all callers who set the below flags (see above 1089 * flush and flushes in reset_prepare). If submission_disabled() 1090 * is set, the caller shouldn't set these flags. 1091 */ 1092 1093 destroyed = context_destroyed(ce); 1094 pending_enable = context_pending_enable(ce); 1095 pending_disable = context_pending_disable(ce); 1096 deregister = context_wait_for_deregister_to_register(ce); 1097 banned = context_banned(ce); 1098 init_sched_state(ce); 1099 1100 spin_unlock(&ce->guc_state.lock); 1101 1102 if (pending_enable || destroyed || deregister) { 1103 decr_outstanding_submission_g2h(guc); 1104 if (deregister) 1105 guc_signal_context_fence(ce); 1106 if (destroyed) { 1107 intel_gt_pm_put_async(guc_to_gt(guc)); 1108 release_guc_id(guc, ce); 1109 __guc_context_destroy(ce); 1110 } 1111 if (pending_enable || deregister) 1112 intel_context_put(ce); 1113 } 1114 1115 /* Not mutualy exclusive with above if statement. */ 1116 if (pending_disable) { 1117 guc_signal_context_fence(ce); 1118 if (banned) { 1119 guc_cancel_context_requests(ce); 1120 intel_engine_signal_breadcrumbs(ce->engine); 1121 } 1122 intel_context_sched_disable_unpin(ce); 1123 decr_outstanding_submission_g2h(guc); 1124 1125 spin_lock(&ce->guc_state.lock); 1126 guc_blocked_fence_complete(ce); 1127 spin_unlock(&ce->guc_state.lock); 1128 1129 intel_context_put(ce); 1130 } 1131 1132 if (do_put) 1133 intel_context_put(ce); 1134 xa_lock(&guc->context_lookup); 1135 } 1136 xa_unlock_irqrestore(&guc->context_lookup, flags); 1137 } 1138 1139 /* 1140 * GuC stores busyness stats for each engine at context in/out boundaries. A 1141 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1142 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1143 * GuC. 1144 * 1145 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1146 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1147 * active. For an active engine total busyness = total + (now - start), where 1148 * 'now' is the time at which the busyness is sampled. For inactive engine, 1149 * total busyness = total. 1150 * 1151 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1152 * 1153 * The start and total values provided by GuC are 32 bits and wrap around in a 1154 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1155 * increasing ns values, there is a need for this implementation to account for 1156 * overflows and extend the GuC provided values to 64 bits before returning 1157 * busyness to the user. In order to do that, a worker runs periodically at 1158 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1159 * 27 seconds for a gt clock frequency of 19.2 MHz). 1160 */ 1161 1162 #define WRAP_TIME_CLKS U32_MAX 1163 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1164 1165 static void 1166 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1167 { 1168 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1169 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1170 1171 if (new_start == lower_32_bits(*prev_start)) 1172 return; 1173 1174 /* 1175 * When gt is unparked, we update the gt timestamp and start the ping 1176 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1177 * is unparked, all switched in contexts will have a start time that is 1178 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1179 * 1180 * If neither gt_stamp nor new_start has rolled over, then the 1181 * gt_stamp_hi does not need to be adjusted, however if one of them has 1182 * rolled over, we need to adjust gt_stamp_hi accordingly. 1183 * 1184 * The below conditions address the cases of new_start rollover and 1185 * gt_stamp_last rollover respectively. 1186 */ 1187 if (new_start < gt_stamp_last && 1188 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1189 gt_stamp_hi++; 1190 1191 if (new_start > gt_stamp_last && 1192 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1193 gt_stamp_hi--; 1194 1195 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1196 } 1197 1198 #define record_read(map_, field_) \ 1199 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1200 1201 /* 1202 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1203 * we run into a race where the value read is inconsistent. Sometimes the 1204 * inconsistency is in reading the upper MSB bytes of the last_in value when 1205 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1206 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1207 * determine validity of these values. Instead we read the values multiple times 1208 * until they are consistent. In test runs, 3 attempts results in consistent 1209 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1210 * any new occurences. 1211 */ 1212 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1213 u32 *last_in, u32 *id, u32 *total) 1214 { 1215 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1216 int i = 0; 1217 1218 do { 1219 *last_in = record_read(&rec_map, last_switch_in_stamp); 1220 *id = record_read(&rec_map, current_context_index); 1221 *total = record_read(&rec_map, total_runtime); 1222 1223 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1224 record_read(&rec_map, current_context_index) == *id && 1225 record_read(&rec_map, total_runtime) == *total) 1226 break; 1227 } while (++i < 6); 1228 } 1229 1230 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1231 { 1232 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1233 struct intel_guc *guc = &engine->gt->uc.guc; 1234 u32 last_switch, ctx_id, total; 1235 1236 lockdep_assert_held(&guc->timestamp.lock); 1237 1238 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1239 1240 stats->running = ctx_id != ~0U && last_switch; 1241 if (stats->running) 1242 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1243 1244 /* 1245 * Instead of adjusting the total for overflow, just add the 1246 * difference from previous sample stats->total_gt_clks 1247 */ 1248 if (total && total != ~0U) { 1249 stats->total_gt_clks += (u32)(total - stats->prev_total); 1250 stats->prev_total = total; 1251 } 1252 } 1253 1254 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1255 { 1256 intel_wakeref_t wakeref; 1257 u32 reg, shift; 1258 1259 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1260 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1261 1262 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1263 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1264 1265 return 3 - shift; 1266 } 1267 1268 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1269 { 1270 struct intel_gt *gt = guc_to_gt(guc); 1271 u32 gt_stamp_lo, gt_stamp_hi; 1272 u64 gpm_ts; 1273 1274 lockdep_assert_held(&guc->timestamp.lock); 1275 1276 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1277 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1278 MISC_STATUS1) >> guc->timestamp.shift; 1279 gt_stamp_lo = lower_32_bits(gpm_ts); 1280 *now = ktime_get(); 1281 1282 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1283 gt_stamp_hi++; 1284 1285 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1286 } 1287 1288 /* 1289 * Unlike the execlist mode of submission total and active times are in terms of 1290 * gt clocks. The *now parameter is retained to return the cpu time at which the 1291 * busyness was sampled. 1292 */ 1293 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1294 { 1295 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1296 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1297 struct intel_gt *gt = engine->gt; 1298 struct intel_guc *guc = >->uc.guc; 1299 u64 total, gt_stamp_saved; 1300 unsigned long flags; 1301 u32 reset_count; 1302 bool in_reset; 1303 1304 spin_lock_irqsave(&guc->timestamp.lock, flags); 1305 1306 /* 1307 * If a reset happened, we risk reading partially updated engine 1308 * busyness from GuC, so we just use the driver stored copy of busyness. 1309 * Synchronize with gt reset using reset_count and the 1310 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1311 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1312 * usable by checking the flag afterwards. 1313 */ 1314 reset_count = i915_reset_count(gpu_error); 1315 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1316 1317 *now = ktime_get(); 1318 1319 /* 1320 * The active busyness depends on start_gt_clk and gt_stamp. 1321 * gt_stamp is updated by i915 only when gt is awake and the 1322 * start_gt_clk is derived from GuC state. To get a consistent 1323 * view of activity, we query the GuC state only if gt is awake. 1324 */ 1325 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1326 stats_saved = *stats; 1327 gt_stamp_saved = guc->timestamp.gt_stamp; 1328 /* 1329 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1330 * start_gt_clk' calculation below for active engines. 1331 */ 1332 guc_update_engine_gt_clks(engine); 1333 guc_update_pm_timestamp(guc, now); 1334 intel_gt_pm_put_async(gt); 1335 if (i915_reset_count(gpu_error) != reset_count) { 1336 *stats = stats_saved; 1337 guc->timestamp.gt_stamp = gt_stamp_saved; 1338 } 1339 } 1340 1341 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1342 if (stats->running) { 1343 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1344 1345 total += intel_gt_clock_interval_to_ns(gt, clk); 1346 } 1347 1348 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1349 1350 return ns_to_ktime(total); 1351 } 1352 1353 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1354 { 1355 struct intel_gt *gt = guc_to_gt(guc); 1356 struct intel_engine_cs *engine; 1357 enum intel_engine_id id; 1358 unsigned long flags; 1359 ktime_t unused; 1360 1361 cancel_delayed_work_sync(&guc->timestamp.work); 1362 1363 spin_lock_irqsave(&guc->timestamp.lock, flags); 1364 1365 guc_update_pm_timestamp(guc, &unused); 1366 for_each_engine(engine, gt, id) { 1367 guc_update_engine_gt_clks(engine); 1368 engine->stats.guc.prev_total = 0; 1369 } 1370 1371 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1372 } 1373 1374 static void __update_guc_busyness_stats(struct intel_guc *guc) 1375 { 1376 struct intel_gt *gt = guc_to_gt(guc); 1377 struct intel_engine_cs *engine; 1378 enum intel_engine_id id; 1379 unsigned long flags; 1380 ktime_t unused; 1381 1382 guc->timestamp.last_stat_jiffies = jiffies; 1383 1384 spin_lock_irqsave(&guc->timestamp.lock, flags); 1385 1386 guc_update_pm_timestamp(guc, &unused); 1387 for_each_engine(engine, gt, id) 1388 guc_update_engine_gt_clks(engine); 1389 1390 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1391 } 1392 1393 static void guc_timestamp_ping(struct work_struct *wrk) 1394 { 1395 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1396 timestamp.work.work); 1397 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1398 struct intel_gt *gt = guc_to_gt(guc); 1399 intel_wakeref_t wakeref; 1400 int srcu, ret; 1401 1402 /* 1403 * Synchronize with gt reset to make sure the worker does not 1404 * corrupt the engine/guc stats. NB: can't actually block waiting 1405 * for a reset to complete as the reset requires flushing out 1406 * this worker thread if started. So waiting would deadlock. 1407 */ 1408 ret = intel_gt_reset_trylock(gt, &srcu); 1409 if (ret) 1410 return; 1411 1412 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1413 __update_guc_busyness_stats(guc); 1414 1415 intel_gt_reset_unlock(gt, srcu); 1416 1417 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1418 guc->timestamp.ping_delay); 1419 } 1420 1421 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1422 { 1423 u32 offset = intel_guc_engine_usage_offset(guc); 1424 u32 action[] = { 1425 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1426 offset, 1427 0, 1428 }; 1429 1430 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1431 } 1432 1433 static void guc_init_engine_stats(struct intel_guc *guc) 1434 { 1435 struct intel_gt *gt = guc_to_gt(guc); 1436 intel_wakeref_t wakeref; 1437 1438 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1439 guc->timestamp.ping_delay); 1440 1441 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1442 int ret = guc_action_enable_usage_stats(guc); 1443 1444 if (ret) 1445 drm_err(>->i915->drm, 1446 "Failed to enable usage stats: %d!\n", ret); 1447 } 1448 } 1449 1450 void intel_guc_busyness_park(struct intel_gt *gt) 1451 { 1452 struct intel_guc *guc = >->uc.guc; 1453 1454 if (!guc_submission_initialized(guc)) 1455 return; 1456 1457 /* 1458 * There is a race with suspend flow where the worker runs after suspend 1459 * and causes an unclaimed register access warning. Cancel the worker 1460 * synchronously here. 1461 */ 1462 cancel_delayed_work_sync(&guc->timestamp.work); 1463 1464 /* 1465 * Before parking, we should sample engine busyness stats if we need to. 1466 * We can skip it if we are less than half a ping from the last time we 1467 * sampled the busyness stats. 1468 */ 1469 if (guc->timestamp.last_stat_jiffies && 1470 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1471 (guc->timestamp.ping_delay / 2))) 1472 return; 1473 1474 __update_guc_busyness_stats(guc); 1475 } 1476 1477 void intel_guc_busyness_unpark(struct intel_gt *gt) 1478 { 1479 struct intel_guc *guc = >->uc.guc; 1480 unsigned long flags; 1481 ktime_t unused; 1482 1483 if (!guc_submission_initialized(guc)) 1484 return; 1485 1486 spin_lock_irqsave(&guc->timestamp.lock, flags); 1487 guc_update_pm_timestamp(guc, &unused); 1488 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1489 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1490 guc->timestamp.ping_delay); 1491 } 1492 1493 static inline bool 1494 submission_disabled(struct intel_guc *guc) 1495 { 1496 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1497 1498 return unlikely(!sched_engine || 1499 !__tasklet_is_enabled(&sched_engine->tasklet) || 1500 intel_gt_is_wedged(guc_to_gt(guc))); 1501 } 1502 1503 static void disable_submission(struct intel_guc *guc) 1504 { 1505 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1506 1507 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1508 GEM_BUG_ON(!guc->ct.enabled); 1509 __tasklet_disable_sync_once(&sched_engine->tasklet); 1510 sched_engine->tasklet.callback = NULL; 1511 } 1512 } 1513 1514 static void enable_submission(struct intel_guc *guc) 1515 { 1516 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1517 unsigned long flags; 1518 1519 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1520 sched_engine->tasklet.callback = guc_submission_tasklet; 1521 wmb(); /* Make sure callback visible */ 1522 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1523 __tasklet_enable(&sched_engine->tasklet)) { 1524 GEM_BUG_ON(!guc->ct.enabled); 1525 1526 /* And kick in case we missed a new request submission. */ 1527 tasklet_hi_schedule(&sched_engine->tasklet); 1528 } 1529 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1530 } 1531 1532 static void guc_flush_submissions(struct intel_guc *guc) 1533 { 1534 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1535 unsigned long flags; 1536 1537 spin_lock_irqsave(&sched_engine->lock, flags); 1538 spin_unlock_irqrestore(&sched_engine->lock, flags); 1539 } 1540 1541 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1542 1543 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1544 { 1545 if (unlikely(!guc_submission_initialized(guc))) { 1546 /* Reset called during driver load? GuC not yet initialised! */ 1547 return; 1548 } 1549 1550 intel_gt_park_heartbeats(guc_to_gt(guc)); 1551 disable_submission(guc); 1552 guc->interrupts.disable(guc); 1553 __reset_guc_busyness_stats(guc); 1554 1555 /* Flush IRQ handler */ 1556 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1557 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1558 1559 guc_flush_submissions(guc); 1560 guc_flush_destroyed_contexts(guc); 1561 flush_work(&guc->ct.requests.worker); 1562 1563 scrub_guc_desc_for_outstanding_g2h(guc); 1564 } 1565 1566 static struct intel_engine_cs * 1567 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1568 { 1569 struct intel_engine_cs *engine; 1570 intel_engine_mask_t tmp, mask = ve->mask; 1571 unsigned int num_siblings = 0; 1572 1573 for_each_engine_masked(engine, ve->gt, mask, tmp) 1574 if (num_siblings++ == sibling) 1575 return engine; 1576 1577 return NULL; 1578 } 1579 1580 static inline struct intel_engine_cs * 1581 __context_to_physical_engine(struct intel_context *ce) 1582 { 1583 struct intel_engine_cs *engine = ce->engine; 1584 1585 if (intel_engine_is_virtual(engine)) 1586 engine = guc_virtual_get_sibling(engine, 0); 1587 1588 return engine; 1589 } 1590 1591 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1592 { 1593 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1594 1595 if (!intel_context_is_schedulable(ce)) 1596 return; 1597 1598 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1599 1600 /* 1601 * We want a simple context + ring to execute the breadcrumb update. 1602 * We cannot rely on the context being intact across the GPU hang, 1603 * so clear it and rebuild just what we need for the breadcrumb. 1604 * All pending requests for this context will be zapped, and any 1605 * future request will be after userspace has had the opportunity 1606 * to recreate its own state. 1607 */ 1608 if (scrub) 1609 lrc_init_regs(ce, engine, true); 1610 1611 /* Rerun the request; its payload has been neutered (if guilty). */ 1612 lrc_update_regs(ce, engine, head); 1613 } 1614 1615 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1616 { 1617 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) 1618 return; 1619 1620 intel_engine_stop_cs(engine); 1621 1622 /* 1623 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need 1624 * to wait for any pending mi force wakeups 1625 */ 1626 intel_engine_wait_for_pending_mi_fw(engine); 1627 } 1628 1629 static void guc_reset_nop(struct intel_engine_cs *engine) 1630 { 1631 } 1632 1633 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1634 { 1635 } 1636 1637 static void 1638 __unwind_incomplete_requests(struct intel_context *ce) 1639 { 1640 struct i915_request *rq, *rn; 1641 struct list_head *pl; 1642 int prio = I915_PRIORITY_INVALID; 1643 struct i915_sched_engine * const sched_engine = 1644 ce->engine->sched_engine; 1645 unsigned long flags; 1646 1647 spin_lock_irqsave(&sched_engine->lock, flags); 1648 spin_lock(&ce->guc_state.lock); 1649 list_for_each_entry_safe_reverse(rq, rn, 1650 &ce->guc_state.requests, 1651 sched.link) { 1652 if (i915_request_completed(rq)) 1653 continue; 1654 1655 list_del_init(&rq->sched.link); 1656 __i915_request_unsubmit(rq); 1657 1658 /* Push the request back into the queue for later resubmission. */ 1659 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1660 if (rq_prio(rq) != prio) { 1661 prio = rq_prio(rq); 1662 pl = i915_sched_lookup_priolist(sched_engine, prio); 1663 } 1664 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1665 1666 list_add(&rq->sched.link, pl); 1667 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1668 } 1669 spin_unlock(&ce->guc_state.lock); 1670 spin_unlock_irqrestore(&sched_engine->lock, flags); 1671 } 1672 1673 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1674 { 1675 bool guilty; 1676 struct i915_request *rq; 1677 unsigned long flags; 1678 u32 head; 1679 int i, number_children = ce->parallel.number_children; 1680 struct intel_context *parent = ce; 1681 1682 GEM_BUG_ON(intel_context_is_child(ce)); 1683 1684 intel_context_get(ce); 1685 1686 /* 1687 * GuC will implicitly mark the context as non-schedulable when it sends 1688 * the reset notification. Make sure our state reflects this change. The 1689 * context will be marked enabled on resubmission. 1690 */ 1691 spin_lock_irqsave(&ce->guc_state.lock, flags); 1692 clr_context_enabled(ce); 1693 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1694 1695 /* 1696 * For each context in the relationship find the hanging request 1697 * resetting each context / request as needed 1698 */ 1699 for (i = 0; i < number_children + 1; ++i) { 1700 if (!intel_context_is_pinned(ce)) 1701 goto next_context; 1702 1703 guilty = false; 1704 rq = intel_context_find_active_request(ce); 1705 if (!rq) { 1706 head = ce->ring->tail; 1707 goto out_replay; 1708 } 1709 1710 if (i915_request_started(rq)) 1711 guilty = stalled & ce->engine->mask; 1712 1713 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1714 head = intel_ring_wrap(ce->ring, rq->head); 1715 1716 __i915_request_reset(rq, guilty); 1717 out_replay: 1718 guc_reset_state(ce, head, guilty); 1719 next_context: 1720 if (i != number_children) 1721 ce = list_next_entry(ce, parallel.child_link); 1722 } 1723 1724 __unwind_incomplete_requests(parent); 1725 intel_context_put(parent); 1726 } 1727 1728 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1729 { 1730 struct intel_context *ce; 1731 unsigned long index; 1732 unsigned long flags; 1733 1734 if (unlikely(!guc_submission_initialized(guc))) { 1735 /* Reset called during driver load? GuC not yet initialised! */ 1736 return; 1737 } 1738 1739 xa_lock_irqsave(&guc->context_lookup, flags); 1740 xa_for_each(&guc->context_lookup, index, ce) { 1741 if (!kref_get_unless_zero(&ce->ref)) 1742 continue; 1743 1744 xa_unlock(&guc->context_lookup); 1745 1746 if (intel_context_is_pinned(ce) && 1747 !intel_context_is_child(ce)) 1748 __guc_reset_context(ce, stalled); 1749 1750 intel_context_put(ce); 1751 1752 xa_lock(&guc->context_lookup); 1753 } 1754 xa_unlock_irqrestore(&guc->context_lookup, flags); 1755 1756 /* GuC is blown away, drop all references to contexts */ 1757 xa_destroy(&guc->context_lookup); 1758 } 1759 1760 static void guc_cancel_context_requests(struct intel_context *ce) 1761 { 1762 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1763 struct i915_request *rq; 1764 unsigned long flags; 1765 1766 /* Mark all executing requests as skipped. */ 1767 spin_lock_irqsave(&sched_engine->lock, flags); 1768 spin_lock(&ce->guc_state.lock); 1769 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1770 i915_request_put(i915_request_mark_eio(rq)); 1771 spin_unlock(&ce->guc_state.lock); 1772 spin_unlock_irqrestore(&sched_engine->lock, flags); 1773 } 1774 1775 static void 1776 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1777 { 1778 struct i915_request *rq, *rn; 1779 struct rb_node *rb; 1780 unsigned long flags; 1781 1782 /* Can be called during boot if GuC fails to load */ 1783 if (!sched_engine) 1784 return; 1785 1786 /* 1787 * Before we call engine->cancel_requests(), we should have exclusive 1788 * access to the submission state. This is arranged for us by the 1789 * caller disabling the interrupt generation, the tasklet and other 1790 * threads that may then access the same state, giving us a free hand 1791 * to reset state. However, we still need to let lockdep be aware that 1792 * we know this state may be accessed in hardirq context, so we 1793 * disable the irq around this manipulation and we want to keep 1794 * the spinlock focused on its duties and not accidentally conflate 1795 * coverage to the submission's irq state. (Similarly, although we 1796 * shouldn't need to disable irq around the manipulation of the 1797 * submission's irq state, we also wish to remind ourselves that 1798 * it is irq state.) 1799 */ 1800 spin_lock_irqsave(&sched_engine->lock, flags); 1801 1802 /* Flush the queued requests to the timeline list (for retiring). */ 1803 while ((rb = rb_first_cached(&sched_engine->queue))) { 1804 struct i915_priolist *p = to_priolist(rb); 1805 1806 priolist_for_each_request_consume(rq, rn, p) { 1807 list_del_init(&rq->sched.link); 1808 1809 __i915_request_submit(rq); 1810 1811 i915_request_put(i915_request_mark_eio(rq)); 1812 } 1813 1814 rb_erase_cached(&p->node, &sched_engine->queue); 1815 i915_priolist_free(p); 1816 } 1817 1818 /* Remaining _unready_ requests will be nop'ed when submitted */ 1819 1820 sched_engine->queue_priority_hint = INT_MIN; 1821 sched_engine->queue = RB_ROOT_CACHED; 1822 1823 spin_unlock_irqrestore(&sched_engine->lock, flags); 1824 } 1825 1826 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1827 { 1828 struct intel_context *ce; 1829 unsigned long index; 1830 unsigned long flags; 1831 1832 xa_lock_irqsave(&guc->context_lookup, flags); 1833 xa_for_each(&guc->context_lookup, index, ce) { 1834 if (!kref_get_unless_zero(&ce->ref)) 1835 continue; 1836 1837 xa_unlock(&guc->context_lookup); 1838 1839 if (intel_context_is_pinned(ce) && 1840 !intel_context_is_child(ce)) 1841 guc_cancel_context_requests(ce); 1842 1843 intel_context_put(ce); 1844 1845 xa_lock(&guc->context_lookup); 1846 } 1847 xa_unlock_irqrestore(&guc->context_lookup, flags); 1848 1849 guc_cancel_sched_engine_requests(guc->sched_engine); 1850 1851 /* GuC is blown away, drop all references to contexts */ 1852 xa_destroy(&guc->context_lookup); 1853 } 1854 1855 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1856 { 1857 /* Reset called during driver load or during wedge? */ 1858 if (unlikely(!guc_submission_initialized(guc) || 1859 intel_gt_is_wedged(guc_to_gt(guc)))) { 1860 return; 1861 } 1862 1863 /* 1864 * Technically possible for either of these values to be non-zero here, 1865 * but very unlikely + harmless. Regardless let's add a warn so we can 1866 * see in CI if this happens frequently / a precursor to taking down the 1867 * machine. 1868 */ 1869 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1870 atomic_set(&guc->outstanding_submission_g2h, 0); 1871 1872 intel_guc_global_policies_update(guc); 1873 enable_submission(guc); 1874 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1875 } 1876 1877 static void destroyed_worker_func(struct work_struct *w); 1878 static void reset_fail_worker_func(struct work_struct *w); 1879 1880 /* 1881 * Set up the memory resources to be shared with the GuC (via the GGTT) 1882 * at firmware loading time. 1883 */ 1884 int intel_guc_submission_init(struct intel_guc *guc) 1885 { 1886 struct intel_gt *gt = guc_to_gt(guc); 1887 int ret; 1888 1889 if (guc->submission_initialized) 1890 return 0; 1891 1892 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) { 1893 ret = guc_lrc_desc_pool_create_v69(guc); 1894 if (ret) 1895 return ret; 1896 } 1897 1898 guc->submission_state.guc_ids_bitmap = 1899 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1900 if (!guc->submission_state.guc_ids_bitmap) { 1901 ret = -ENOMEM; 1902 goto destroy_pool; 1903 } 1904 1905 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1906 guc->timestamp.shift = gpm_timestamp_shift(gt); 1907 guc->submission_initialized = true; 1908 1909 return 0; 1910 1911 destroy_pool: 1912 guc_lrc_desc_pool_destroy_v69(guc); 1913 1914 return ret; 1915 } 1916 1917 void intel_guc_submission_fini(struct intel_guc *guc) 1918 { 1919 if (!guc->submission_initialized) 1920 return; 1921 1922 guc_flush_destroyed_contexts(guc); 1923 guc_lrc_desc_pool_destroy_v69(guc); 1924 i915_sched_engine_put(guc->sched_engine); 1925 bitmap_free(guc->submission_state.guc_ids_bitmap); 1926 guc->submission_initialized = false; 1927 } 1928 1929 static inline void queue_request(struct i915_sched_engine *sched_engine, 1930 struct i915_request *rq, 1931 int prio) 1932 { 1933 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1934 list_add_tail(&rq->sched.link, 1935 i915_sched_lookup_priolist(sched_engine, prio)); 1936 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1937 tasklet_hi_schedule(&sched_engine->tasklet); 1938 } 1939 1940 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1941 struct i915_request *rq) 1942 { 1943 int ret = 0; 1944 1945 __i915_request_submit(rq); 1946 1947 trace_i915_request_in(rq, 0); 1948 1949 if (is_multi_lrc_rq(rq)) { 1950 if (multi_lrc_submit(rq)) { 1951 ret = guc_wq_item_append(guc, rq); 1952 if (!ret) 1953 ret = guc_add_request(guc, rq); 1954 } 1955 } else { 1956 guc_set_lrc_tail(rq); 1957 ret = guc_add_request(guc, rq); 1958 } 1959 1960 if (unlikely(ret == -EPIPE)) 1961 disable_submission(guc); 1962 1963 return ret; 1964 } 1965 1966 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1967 { 1968 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1969 struct intel_context *ce = request_to_scheduling_context(rq); 1970 1971 return submission_disabled(guc) || guc->stalled_request || 1972 !i915_sched_engine_is_empty(sched_engine) || 1973 !ctx_id_mapped(guc, ce->guc_id.id); 1974 } 1975 1976 static void guc_submit_request(struct i915_request *rq) 1977 { 1978 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1979 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1980 unsigned long flags; 1981 1982 /* Will be called from irq-context when using foreign fences. */ 1983 spin_lock_irqsave(&sched_engine->lock, flags); 1984 1985 if (need_tasklet(guc, rq)) 1986 queue_request(sched_engine, rq, rq_prio(rq)); 1987 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1988 tasklet_hi_schedule(&sched_engine->tasklet); 1989 1990 spin_unlock_irqrestore(&sched_engine->lock, flags); 1991 } 1992 1993 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1994 { 1995 int ret; 1996 1997 GEM_BUG_ON(intel_context_is_child(ce)); 1998 1999 if (intel_context_is_parent(ce)) 2000 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2001 NUMBER_MULTI_LRC_GUC_ID(guc), 2002 order_base_2(ce->parallel.number_children 2003 + 1)); 2004 else 2005 ret = ida_simple_get(&guc->submission_state.guc_ids, 2006 NUMBER_MULTI_LRC_GUC_ID(guc), 2007 guc->submission_state.num_guc_ids, 2008 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2009 __GFP_NOWARN); 2010 if (unlikely(ret < 0)) 2011 return ret; 2012 2013 if (!intel_context_is_parent(ce)) 2014 ++guc->submission_state.guc_ids_in_use; 2015 2016 ce->guc_id.id = ret; 2017 return 0; 2018 } 2019 2020 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2021 { 2022 GEM_BUG_ON(intel_context_is_child(ce)); 2023 2024 if (!context_guc_id_invalid(ce)) { 2025 if (intel_context_is_parent(ce)) { 2026 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2027 ce->guc_id.id, 2028 order_base_2(ce->parallel.number_children 2029 + 1)); 2030 } else { 2031 --guc->submission_state.guc_ids_in_use; 2032 ida_simple_remove(&guc->submission_state.guc_ids, 2033 ce->guc_id.id); 2034 } 2035 clr_ctx_id_mapping(guc, ce->guc_id.id); 2036 set_context_guc_id_invalid(ce); 2037 } 2038 if (!list_empty(&ce->guc_id.link)) 2039 list_del_init(&ce->guc_id.link); 2040 } 2041 2042 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2043 { 2044 unsigned long flags; 2045 2046 spin_lock_irqsave(&guc->submission_state.lock, flags); 2047 __release_guc_id(guc, ce); 2048 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2049 } 2050 2051 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2052 { 2053 struct intel_context *cn; 2054 2055 lockdep_assert_held(&guc->submission_state.lock); 2056 GEM_BUG_ON(intel_context_is_child(ce)); 2057 GEM_BUG_ON(intel_context_is_parent(ce)); 2058 2059 if (!list_empty(&guc->submission_state.guc_id_list)) { 2060 cn = list_first_entry(&guc->submission_state.guc_id_list, 2061 struct intel_context, 2062 guc_id.link); 2063 2064 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2065 GEM_BUG_ON(context_guc_id_invalid(cn)); 2066 GEM_BUG_ON(intel_context_is_child(cn)); 2067 GEM_BUG_ON(intel_context_is_parent(cn)); 2068 2069 list_del_init(&cn->guc_id.link); 2070 ce->guc_id.id = cn->guc_id.id; 2071 2072 spin_lock(&cn->guc_state.lock); 2073 clr_context_registered(cn); 2074 spin_unlock(&cn->guc_state.lock); 2075 2076 set_context_guc_id_invalid(cn); 2077 2078 #ifdef CONFIG_DRM_I915_SELFTEST 2079 guc->number_guc_id_stolen++; 2080 #endif 2081 2082 return 0; 2083 } else { 2084 return -EAGAIN; 2085 } 2086 } 2087 2088 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2089 { 2090 int ret; 2091 2092 lockdep_assert_held(&guc->submission_state.lock); 2093 GEM_BUG_ON(intel_context_is_child(ce)); 2094 2095 ret = new_guc_id(guc, ce); 2096 if (unlikely(ret < 0)) { 2097 if (intel_context_is_parent(ce)) 2098 return -ENOSPC; 2099 2100 ret = steal_guc_id(guc, ce); 2101 if (ret < 0) 2102 return ret; 2103 } 2104 2105 if (intel_context_is_parent(ce)) { 2106 struct intel_context *child; 2107 int i = 1; 2108 2109 for_each_child(ce, child) 2110 child->guc_id.id = ce->guc_id.id + i++; 2111 } 2112 2113 return 0; 2114 } 2115 2116 #define PIN_GUC_ID_TRIES 4 2117 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2118 { 2119 int ret = 0; 2120 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2121 2122 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2123 2124 try_again: 2125 spin_lock_irqsave(&guc->submission_state.lock, flags); 2126 2127 might_lock(&ce->guc_state.lock); 2128 2129 if (context_guc_id_invalid(ce)) { 2130 ret = assign_guc_id(guc, ce); 2131 if (ret) 2132 goto out_unlock; 2133 ret = 1; /* Indidcates newly assigned guc_id */ 2134 } 2135 if (!list_empty(&ce->guc_id.link)) 2136 list_del_init(&ce->guc_id.link); 2137 atomic_inc(&ce->guc_id.ref); 2138 2139 out_unlock: 2140 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2141 2142 /* 2143 * -EAGAIN indicates no guc_id are available, let's retire any 2144 * outstanding requests to see if that frees up a guc_id. If the first 2145 * retire didn't help, insert a sleep with the timeslice duration before 2146 * attempting to retire more requests. Double the sleep period each 2147 * subsequent pass before finally giving up. The sleep period has max of 2148 * 100ms and minimum of 1ms. 2149 */ 2150 if (ret == -EAGAIN && --tries) { 2151 if (PIN_GUC_ID_TRIES - tries > 1) { 2152 unsigned int timeslice_shifted = 2153 ce->engine->props.timeslice_duration_ms << 2154 (PIN_GUC_ID_TRIES - tries - 2); 2155 unsigned int max = min_t(unsigned int, 100, 2156 timeslice_shifted); 2157 2158 msleep(max_t(unsigned int, max, 1)); 2159 } 2160 intel_gt_retire_requests(guc_to_gt(guc)); 2161 goto try_again; 2162 } 2163 2164 return ret; 2165 } 2166 2167 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2168 { 2169 unsigned long flags; 2170 2171 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2172 GEM_BUG_ON(intel_context_is_child(ce)); 2173 2174 if (unlikely(context_guc_id_invalid(ce) || 2175 intel_context_is_parent(ce))) 2176 return; 2177 2178 spin_lock_irqsave(&guc->submission_state.lock, flags); 2179 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2180 !atomic_read(&ce->guc_id.ref)) 2181 list_add_tail(&ce->guc_id.link, 2182 &guc->submission_state.guc_id_list); 2183 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2184 } 2185 2186 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2187 struct intel_context *ce, 2188 u32 guc_id, 2189 u32 offset, 2190 bool loop) 2191 { 2192 struct intel_context *child; 2193 u32 action[4 + MAX_ENGINE_INSTANCE]; 2194 int len = 0; 2195 2196 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2197 2198 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2199 action[len++] = guc_id; 2200 action[len++] = ce->parallel.number_children + 1; 2201 action[len++] = offset; 2202 for_each_child(ce, child) { 2203 offset += sizeof(struct guc_lrc_desc_v69); 2204 action[len++] = offset; 2205 } 2206 2207 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2208 } 2209 2210 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2211 struct intel_context *ce, 2212 struct guc_ctxt_registration_info *info, 2213 bool loop) 2214 { 2215 struct intel_context *child; 2216 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2217 int len = 0; 2218 u32 next_id; 2219 2220 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2221 2222 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2223 action[len++] = info->flags; 2224 action[len++] = info->context_idx; 2225 action[len++] = info->engine_class; 2226 action[len++] = info->engine_submit_mask; 2227 action[len++] = info->wq_desc_lo; 2228 action[len++] = info->wq_desc_hi; 2229 action[len++] = info->wq_base_lo; 2230 action[len++] = info->wq_base_hi; 2231 action[len++] = info->wq_size; 2232 action[len++] = ce->parallel.number_children + 1; 2233 action[len++] = info->hwlrca_lo; 2234 action[len++] = info->hwlrca_hi; 2235 2236 next_id = info->context_idx + 1; 2237 for_each_child(ce, child) { 2238 GEM_BUG_ON(next_id++ != child->guc_id.id); 2239 2240 /* 2241 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2242 * only supports 32 bit currently. 2243 */ 2244 action[len++] = lower_32_bits(child->lrc.lrca); 2245 action[len++] = upper_32_bits(child->lrc.lrca); 2246 } 2247 2248 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2249 2250 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2251 } 2252 2253 static int __guc_action_register_context_v69(struct intel_guc *guc, 2254 u32 guc_id, 2255 u32 offset, 2256 bool loop) 2257 { 2258 u32 action[] = { 2259 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2260 guc_id, 2261 offset, 2262 }; 2263 2264 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2265 0, loop); 2266 } 2267 2268 static int __guc_action_register_context_v70(struct intel_guc *guc, 2269 struct guc_ctxt_registration_info *info, 2270 bool loop) 2271 { 2272 u32 action[] = { 2273 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2274 info->flags, 2275 info->context_idx, 2276 info->engine_class, 2277 info->engine_submit_mask, 2278 info->wq_desc_lo, 2279 info->wq_desc_hi, 2280 info->wq_base_lo, 2281 info->wq_base_hi, 2282 info->wq_size, 2283 info->hwlrca_lo, 2284 info->hwlrca_hi, 2285 }; 2286 2287 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2288 0, loop); 2289 } 2290 2291 static void prepare_context_registration_info_v69(struct intel_context *ce); 2292 static void prepare_context_registration_info_v70(struct intel_context *ce, 2293 struct guc_ctxt_registration_info *info); 2294 2295 static int 2296 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2297 { 2298 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2299 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2300 2301 prepare_context_registration_info_v69(ce); 2302 2303 if (intel_context_is_parent(ce)) 2304 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2305 offset, loop); 2306 else 2307 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2308 offset, loop); 2309 } 2310 2311 static int 2312 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2313 { 2314 struct guc_ctxt_registration_info info; 2315 2316 prepare_context_registration_info_v70(ce, &info); 2317 2318 if (intel_context_is_parent(ce)) 2319 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2320 else 2321 return __guc_action_register_context_v70(guc, &info, loop); 2322 } 2323 2324 static int register_context(struct intel_context *ce, bool loop) 2325 { 2326 struct intel_guc *guc = ce_to_guc(ce); 2327 int ret; 2328 2329 GEM_BUG_ON(intel_context_is_child(ce)); 2330 trace_intel_context_register(ce); 2331 2332 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) 2333 ret = register_context_v70(guc, ce, loop); 2334 else 2335 ret = register_context_v69(guc, ce, loop); 2336 2337 if (likely(!ret)) { 2338 unsigned long flags; 2339 2340 spin_lock_irqsave(&ce->guc_state.lock, flags); 2341 set_context_registered(ce); 2342 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2343 2344 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) 2345 guc_context_policy_init_v70(ce, loop); 2346 } 2347 2348 return ret; 2349 } 2350 2351 static int __guc_action_deregister_context(struct intel_guc *guc, 2352 u32 guc_id) 2353 { 2354 u32 action[] = { 2355 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2356 guc_id, 2357 }; 2358 2359 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2360 G2H_LEN_DW_DEREGISTER_CONTEXT, 2361 true); 2362 } 2363 2364 static int deregister_context(struct intel_context *ce, u32 guc_id) 2365 { 2366 struct intel_guc *guc = ce_to_guc(ce); 2367 2368 GEM_BUG_ON(intel_context_is_child(ce)); 2369 trace_intel_context_deregister(ce); 2370 2371 return __guc_action_deregister_context(guc, guc_id); 2372 } 2373 2374 static inline void clear_children_join_go_memory(struct intel_context *ce) 2375 { 2376 struct parent_scratch *ps = __get_parent_scratch(ce); 2377 int i; 2378 2379 ps->go.semaphore = 0; 2380 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2381 ps->join[i].semaphore = 0; 2382 } 2383 2384 static inline u32 get_children_go_value(struct intel_context *ce) 2385 { 2386 return __get_parent_scratch(ce)->go.semaphore; 2387 } 2388 2389 static inline u32 get_children_join_value(struct intel_context *ce, 2390 u8 child_index) 2391 { 2392 return __get_parent_scratch(ce)->join[child_index].semaphore; 2393 } 2394 2395 struct context_policy { 2396 u32 count; 2397 struct guc_update_context_policy h2g; 2398 }; 2399 2400 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2401 { 2402 size_t bytes = sizeof(policy->h2g.header) + 2403 (sizeof(policy->h2g.klv[0]) * policy->count); 2404 2405 return bytes / sizeof(u32); 2406 } 2407 2408 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2409 { 2410 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2411 policy->h2g.header.ctx_id = guc_id; 2412 policy->count = 0; 2413 } 2414 2415 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2416 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2417 { \ 2418 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2419 policy->h2g.klv[policy->count].kl = \ 2420 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2421 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2422 policy->h2g.klv[policy->count].value = data; \ 2423 policy->count++; \ 2424 } 2425 2426 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2427 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2428 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2429 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2430 2431 #undef MAKE_CONTEXT_POLICY_ADD 2432 2433 static int __guc_context_set_context_policies(struct intel_guc *guc, 2434 struct context_policy *policy, 2435 bool loop) 2436 { 2437 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2438 __guc_context_policy_action_size(policy), 2439 0, loop); 2440 } 2441 2442 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2443 { 2444 struct intel_engine_cs *engine = ce->engine; 2445 struct intel_guc *guc = &engine->gt->uc.guc; 2446 struct context_policy policy; 2447 u32 execution_quantum; 2448 u32 preemption_timeout; 2449 unsigned long flags; 2450 int ret; 2451 2452 /* NB: For both of these, zero means disabled. */ 2453 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2454 execution_quantum)); 2455 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2456 preemption_timeout)); 2457 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2458 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2459 2460 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2461 2462 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2463 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2464 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2465 2466 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2467 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2468 2469 ret = __guc_context_set_context_policies(guc, &policy, loop); 2470 2471 spin_lock_irqsave(&ce->guc_state.lock, flags); 2472 if (ret != 0) 2473 set_context_policy_required(ce); 2474 else 2475 clr_context_policy_required(ce); 2476 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2477 2478 return ret; 2479 } 2480 2481 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2482 struct guc_lrc_desc_v69 *desc) 2483 { 2484 desc->policy_flags = 0; 2485 2486 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2487 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2488 2489 /* NB: For both of these, zero means disabled. */ 2490 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2491 desc->execution_quantum)); 2492 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2493 desc->preemption_timeout)); 2494 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2495 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2496 } 2497 2498 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2499 { 2500 /* 2501 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2502 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2503 */ 2504 switch (prio) { 2505 default: 2506 MISSING_CASE(prio); 2507 fallthrough; 2508 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2509 return GEN12_CTX_PRIORITY_NORMAL; 2510 case GUC_CLIENT_PRIORITY_NORMAL: 2511 return GEN12_CTX_PRIORITY_LOW; 2512 case GUC_CLIENT_PRIORITY_HIGH: 2513 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2514 return GEN12_CTX_PRIORITY_HIGH; 2515 } 2516 } 2517 2518 static void prepare_context_registration_info_v69(struct intel_context *ce) 2519 { 2520 struct intel_engine_cs *engine = ce->engine; 2521 struct intel_guc *guc = &engine->gt->uc.guc; 2522 u32 ctx_id = ce->guc_id.id; 2523 struct guc_lrc_desc_v69 *desc; 2524 struct intel_context *child; 2525 2526 GEM_BUG_ON(!engine->mask); 2527 2528 /* 2529 * Ensure LRC + CT vmas are is same region as write barrier is done 2530 * based on CT vma region. 2531 */ 2532 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2533 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2534 2535 desc = __get_lrc_desc_v69(guc, ctx_id); 2536 desc->engine_class = engine_class_to_guc_class(engine->class); 2537 desc->engine_submit_mask = engine->logical_mask; 2538 desc->hw_context_desc = ce->lrc.lrca; 2539 desc->priority = ce->guc_state.prio; 2540 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2541 guc_context_policy_init_v69(engine, desc); 2542 2543 /* 2544 * If context is a parent, we need to register a process descriptor 2545 * describing a work queue and register all child contexts. 2546 */ 2547 if (intel_context_is_parent(ce)) { 2548 struct guc_process_desc_v69 *pdesc; 2549 2550 ce->parallel.guc.wqi_tail = 0; 2551 ce->parallel.guc.wqi_head = 0; 2552 2553 desc->process_desc = i915_ggtt_offset(ce->state) + 2554 __get_parent_scratch_offset(ce); 2555 desc->wq_addr = i915_ggtt_offset(ce->state) + 2556 __get_wq_offset(ce); 2557 desc->wq_size = WQ_SIZE; 2558 2559 pdesc = __get_process_desc_v69(ce); 2560 memset(pdesc, 0, sizeof(*(pdesc))); 2561 pdesc->stage_id = ce->guc_id.id; 2562 pdesc->wq_base_addr = desc->wq_addr; 2563 pdesc->wq_size_bytes = desc->wq_size; 2564 pdesc->wq_status = WQ_STATUS_ACTIVE; 2565 2566 ce->parallel.guc.wq_head = &pdesc->head; 2567 ce->parallel.guc.wq_tail = &pdesc->tail; 2568 ce->parallel.guc.wq_status = &pdesc->wq_status; 2569 2570 for_each_child(ce, child) { 2571 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2572 2573 desc->engine_class = 2574 engine_class_to_guc_class(engine->class); 2575 desc->hw_context_desc = child->lrc.lrca; 2576 desc->priority = ce->guc_state.prio; 2577 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2578 guc_context_policy_init_v69(engine, desc); 2579 } 2580 2581 clear_children_join_go_memory(ce); 2582 } 2583 } 2584 2585 static void prepare_context_registration_info_v70(struct intel_context *ce, 2586 struct guc_ctxt_registration_info *info) 2587 { 2588 struct intel_engine_cs *engine = ce->engine; 2589 struct intel_guc *guc = &engine->gt->uc.guc; 2590 u32 ctx_id = ce->guc_id.id; 2591 2592 GEM_BUG_ON(!engine->mask); 2593 2594 /* 2595 * Ensure LRC + CT vmas are is same region as write barrier is done 2596 * based on CT vma region. 2597 */ 2598 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2599 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2600 2601 memset(info, 0, sizeof(*info)); 2602 info->context_idx = ctx_id; 2603 info->engine_class = engine_class_to_guc_class(engine->class); 2604 info->engine_submit_mask = engine->logical_mask; 2605 /* 2606 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2607 * only supports 32 bit currently. 2608 */ 2609 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2610 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2611 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2612 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2613 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2614 2615 /* 2616 * If context is a parent, we need to register a process descriptor 2617 * describing a work queue and register all child contexts. 2618 */ 2619 if (intel_context_is_parent(ce)) { 2620 struct guc_sched_wq_desc *wq_desc; 2621 u64 wq_desc_offset, wq_base_offset; 2622 2623 ce->parallel.guc.wqi_tail = 0; 2624 ce->parallel.guc.wqi_head = 0; 2625 2626 wq_desc_offset = i915_ggtt_offset(ce->state) + 2627 __get_parent_scratch_offset(ce); 2628 wq_base_offset = i915_ggtt_offset(ce->state) + 2629 __get_wq_offset(ce); 2630 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2631 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2632 info->wq_base_lo = lower_32_bits(wq_base_offset); 2633 info->wq_base_hi = upper_32_bits(wq_base_offset); 2634 info->wq_size = WQ_SIZE; 2635 2636 wq_desc = __get_wq_desc_v70(ce); 2637 memset(wq_desc, 0, sizeof(*wq_desc)); 2638 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2639 2640 ce->parallel.guc.wq_head = &wq_desc->head; 2641 ce->parallel.guc.wq_tail = &wq_desc->tail; 2642 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2643 2644 clear_children_join_go_memory(ce); 2645 } 2646 } 2647 2648 static int try_context_registration(struct intel_context *ce, bool loop) 2649 { 2650 struct intel_engine_cs *engine = ce->engine; 2651 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2652 struct intel_guc *guc = &engine->gt->uc.guc; 2653 intel_wakeref_t wakeref; 2654 u32 ctx_id = ce->guc_id.id; 2655 bool context_registered; 2656 int ret = 0; 2657 2658 GEM_BUG_ON(!sched_state_is_init(ce)); 2659 2660 context_registered = ctx_id_mapped(guc, ctx_id); 2661 2662 clr_ctx_id_mapping(guc, ctx_id); 2663 set_ctx_id_mapping(guc, ctx_id, ce); 2664 2665 /* 2666 * The context_lookup xarray is used to determine if the hardware 2667 * context is currently registered. There are two cases in which it 2668 * could be registered either the guc_id has been stolen from another 2669 * context or the lrc descriptor address of this context has changed. In 2670 * either case the context needs to be deregistered with the GuC before 2671 * registering this context. 2672 */ 2673 if (context_registered) { 2674 bool disabled; 2675 unsigned long flags; 2676 2677 trace_intel_context_steal_guc_id(ce); 2678 GEM_BUG_ON(!loop); 2679 2680 /* Seal race with Reset */ 2681 spin_lock_irqsave(&ce->guc_state.lock, flags); 2682 disabled = submission_disabled(guc); 2683 if (likely(!disabled)) { 2684 set_context_wait_for_deregister_to_register(ce); 2685 intel_context_get(ce); 2686 } 2687 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2688 if (unlikely(disabled)) { 2689 clr_ctx_id_mapping(guc, ctx_id); 2690 return 0; /* Will get registered later */ 2691 } 2692 2693 /* 2694 * If stealing the guc_id, this ce has the same guc_id as the 2695 * context whose guc_id was stolen. 2696 */ 2697 with_intel_runtime_pm(runtime_pm, wakeref) 2698 ret = deregister_context(ce, ce->guc_id.id); 2699 if (unlikely(ret == -ENODEV)) 2700 ret = 0; /* Will get registered later */ 2701 } else { 2702 with_intel_runtime_pm(runtime_pm, wakeref) 2703 ret = register_context(ce, loop); 2704 if (unlikely(ret == -EBUSY)) { 2705 clr_ctx_id_mapping(guc, ctx_id); 2706 } else if (unlikely(ret == -ENODEV)) { 2707 clr_ctx_id_mapping(guc, ctx_id); 2708 ret = 0; /* Will get registered later */ 2709 } 2710 } 2711 2712 return ret; 2713 } 2714 2715 static int __guc_context_pre_pin(struct intel_context *ce, 2716 struct intel_engine_cs *engine, 2717 struct i915_gem_ww_ctx *ww, 2718 void **vaddr) 2719 { 2720 return lrc_pre_pin(ce, engine, ww, vaddr); 2721 } 2722 2723 static int __guc_context_pin(struct intel_context *ce, 2724 struct intel_engine_cs *engine, 2725 void *vaddr) 2726 { 2727 if (i915_ggtt_offset(ce->state) != 2728 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2729 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2730 2731 /* 2732 * GuC context gets pinned in guc_request_alloc. See that function for 2733 * explaination of why. 2734 */ 2735 2736 return lrc_pin(ce, engine, vaddr); 2737 } 2738 2739 static int guc_context_pre_pin(struct intel_context *ce, 2740 struct i915_gem_ww_ctx *ww, 2741 void **vaddr) 2742 { 2743 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2744 } 2745 2746 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2747 { 2748 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2749 2750 if (likely(!ret && !intel_context_is_barrier(ce))) 2751 intel_engine_pm_get(ce->engine); 2752 2753 return ret; 2754 } 2755 2756 static void guc_context_unpin(struct intel_context *ce) 2757 { 2758 struct intel_guc *guc = ce_to_guc(ce); 2759 2760 unpin_guc_id(guc, ce); 2761 lrc_unpin(ce); 2762 2763 if (likely(!intel_context_is_barrier(ce))) 2764 intel_engine_pm_put_async(ce->engine); 2765 } 2766 2767 static void guc_context_post_unpin(struct intel_context *ce) 2768 { 2769 lrc_post_unpin(ce); 2770 } 2771 2772 static void __guc_context_sched_enable(struct intel_guc *guc, 2773 struct intel_context *ce) 2774 { 2775 u32 action[] = { 2776 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2777 ce->guc_id.id, 2778 GUC_CONTEXT_ENABLE 2779 }; 2780 2781 trace_intel_context_sched_enable(ce); 2782 2783 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2784 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2785 } 2786 2787 static void __guc_context_sched_disable(struct intel_guc *guc, 2788 struct intel_context *ce, 2789 u16 guc_id) 2790 { 2791 u32 action[] = { 2792 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2793 guc_id, /* ce->guc_id.id not stable */ 2794 GUC_CONTEXT_DISABLE 2795 }; 2796 2797 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2798 2799 GEM_BUG_ON(intel_context_is_child(ce)); 2800 trace_intel_context_sched_disable(ce); 2801 2802 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2803 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2804 } 2805 2806 static void guc_blocked_fence_complete(struct intel_context *ce) 2807 { 2808 lockdep_assert_held(&ce->guc_state.lock); 2809 2810 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2811 i915_sw_fence_complete(&ce->guc_state.blocked); 2812 } 2813 2814 static void guc_blocked_fence_reinit(struct intel_context *ce) 2815 { 2816 lockdep_assert_held(&ce->guc_state.lock); 2817 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2818 2819 /* 2820 * This fence is always complete unless a pending schedule disable is 2821 * outstanding. We arm the fence here and complete it when we receive 2822 * the pending schedule disable complete message. 2823 */ 2824 i915_sw_fence_fini(&ce->guc_state.blocked); 2825 i915_sw_fence_reinit(&ce->guc_state.blocked); 2826 i915_sw_fence_await(&ce->guc_state.blocked); 2827 i915_sw_fence_commit(&ce->guc_state.blocked); 2828 } 2829 2830 static u16 prep_context_pending_disable(struct intel_context *ce) 2831 { 2832 lockdep_assert_held(&ce->guc_state.lock); 2833 2834 set_context_pending_disable(ce); 2835 clr_context_enabled(ce); 2836 guc_blocked_fence_reinit(ce); 2837 intel_context_get(ce); 2838 2839 return ce->guc_id.id; 2840 } 2841 2842 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2843 { 2844 struct intel_guc *guc = ce_to_guc(ce); 2845 unsigned long flags; 2846 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2847 intel_wakeref_t wakeref; 2848 u16 guc_id; 2849 bool enabled; 2850 2851 GEM_BUG_ON(intel_context_is_child(ce)); 2852 2853 spin_lock_irqsave(&ce->guc_state.lock, flags); 2854 2855 incr_context_blocked(ce); 2856 2857 enabled = context_enabled(ce); 2858 if (unlikely(!enabled || submission_disabled(guc))) { 2859 if (enabled) 2860 clr_context_enabled(ce); 2861 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2862 return &ce->guc_state.blocked; 2863 } 2864 2865 /* 2866 * We add +2 here as the schedule disable complete CTB handler calls 2867 * intel_context_sched_disable_unpin (-2 to pin_count). 2868 */ 2869 atomic_add(2, &ce->pin_count); 2870 2871 guc_id = prep_context_pending_disable(ce); 2872 2873 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2874 2875 with_intel_runtime_pm(runtime_pm, wakeref) 2876 __guc_context_sched_disable(guc, ce, guc_id); 2877 2878 return &ce->guc_state.blocked; 2879 } 2880 2881 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2882 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2883 #define SCHED_STATE_NO_UNBLOCK \ 2884 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2885 SCHED_STATE_PENDING_DISABLE | \ 2886 SCHED_STATE_BANNED) 2887 2888 static bool context_cant_unblock(struct intel_context *ce) 2889 { 2890 lockdep_assert_held(&ce->guc_state.lock); 2891 2892 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2893 context_guc_id_invalid(ce) || 2894 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2895 !intel_context_is_pinned(ce); 2896 } 2897 2898 static void guc_context_unblock(struct intel_context *ce) 2899 { 2900 struct intel_guc *guc = ce_to_guc(ce); 2901 unsigned long flags; 2902 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2903 intel_wakeref_t wakeref; 2904 bool enable; 2905 2906 GEM_BUG_ON(context_enabled(ce)); 2907 GEM_BUG_ON(intel_context_is_child(ce)); 2908 2909 spin_lock_irqsave(&ce->guc_state.lock, flags); 2910 2911 if (unlikely(submission_disabled(guc) || 2912 context_cant_unblock(ce))) { 2913 enable = false; 2914 } else { 2915 enable = true; 2916 set_context_pending_enable(ce); 2917 set_context_enabled(ce); 2918 intel_context_get(ce); 2919 } 2920 2921 decr_context_blocked(ce); 2922 2923 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2924 2925 if (enable) { 2926 with_intel_runtime_pm(runtime_pm, wakeref) 2927 __guc_context_sched_enable(guc, ce); 2928 } 2929 } 2930 2931 static void guc_context_cancel_request(struct intel_context *ce, 2932 struct i915_request *rq) 2933 { 2934 struct intel_context *block_context = 2935 request_to_scheduling_context(rq); 2936 2937 if (i915_sw_fence_signaled(&rq->submit)) { 2938 struct i915_sw_fence *fence; 2939 2940 intel_context_get(ce); 2941 fence = guc_context_block(block_context); 2942 i915_sw_fence_wait(fence); 2943 if (!i915_request_completed(rq)) { 2944 __i915_request_skip(rq); 2945 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2946 true); 2947 } 2948 2949 guc_context_unblock(block_context); 2950 intel_context_put(ce); 2951 } 2952 } 2953 2954 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2955 u16 guc_id, 2956 u32 preemption_timeout) 2957 { 2958 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { 2959 struct context_policy policy; 2960 2961 __guc_context_policy_start_klv(&policy, guc_id); 2962 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2963 __guc_context_set_context_policies(guc, &policy, true); 2964 } else { 2965 u32 action[] = { 2966 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 2967 guc_id, 2968 preemption_timeout 2969 }; 2970 2971 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2972 } 2973 } 2974 2975 static void 2976 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 2977 unsigned int preempt_timeout_ms) 2978 { 2979 struct intel_guc *guc = ce_to_guc(ce); 2980 struct intel_runtime_pm *runtime_pm = 2981 &ce->engine->gt->i915->runtime_pm; 2982 intel_wakeref_t wakeref; 2983 unsigned long flags; 2984 2985 GEM_BUG_ON(intel_context_is_child(ce)); 2986 2987 guc_flush_submissions(guc); 2988 2989 spin_lock_irqsave(&ce->guc_state.lock, flags); 2990 set_context_banned(ce); 2991 2992 if (submission_disabled(guc) || 2993 (!context_enabled(ce) && !context_pending_disable(ce))) { 2994 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2995 2996 guc_cancel_context_requests(ce); 2997 intel_engine_signal_breadcrumbs(ce->engine); 2998 } else if (!context_pending_disable(ce)) { 2999 u16 guc_id; 3000 3001 /* 3002 * We add +2 here as the schedule disable complete CTB handler 3003 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3004 */ 3005 atomic_add(2, &ce->pin_count); 3006 3007 guc_id = prep_context_pending_disable(ce); 3008 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3009 3010 /* 3011 * In addition to disabling scheduling, set the preemption 3012 * timeout to the minimum value (1 us) so the banned context 3013 * gets kicked off the HW ASAP. 3014 */ 3015 with_intel_runtime_pm(runtime_pm, wakeref) { 3016 __guc_context_set_preemption_timeout(guc, guc_id, 3017 preempt_timeout_ms); 3018 __guc_context_sched_disable(guc, ce, guc_id); 3019 } 3020 } else { 3021 if (!context_guc_id_invalid(ce)) 3022 with_intel_runtime_pm(runtime_pm, wakeref) 3023 __guc_context_set_preemption_timeout(guc, 3024 ce->guc_id.id, 3025 preempt_timeout_ms); 3026 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3027 } 3028 } 3029 3030 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3031 unsigned long flags) 3032 __releases(ce->guc_state.lock) 3033 { 3034 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3035 intel_wakeref_t wakeref; 3036 u16 guc_id; 3037 3038 lockdep_assert_held(&ce->guc_state.lock); 3039 guc_id = prep_context_pending_disable(ce); 3040 3041 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3042 3043 with_intel_runtime_pm(runtime_pm, wakeref) 3044 __guc_context_sched_disable(guc, ce, guc_id); 3045 } 3046 3047 static bool bypass_sched_disable(struct intel_guc *guc, 3048 struct intel_context *ce) 3049 { 3050 lockdep_assert_held(&ce->guc_state.lock); 3051 GEM_BUG_ON(intel_context_is_child(ce)); 3052 3053 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3054 !ctx_id_mapped(guc, ce->guc_id.id)) { 3055 clr_context_enabled(ce); 3056 return true; 3057 } 3058 3059 return !context_enabled(ce); 3060 } 3061 3062 static void __delay_sched_disable(struct work_struct *wrk) 3063 { 3064 struct intel_context *ce = 3065 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3066 struct intel_guc *guc = ce_to_guc(ce); 3067 unsigned long flags; 3068 3069 spin_lock_irqsave(&ce->guc_state.lock, flags); 3070 3071 if (bypass_sched_disable(guc, ce)) { 3072 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3073 intel_context_sched_disable_unpin(ce); 3074 } else { 3075 do_sched_disable(guc, ce, flags); 3076 } 3077 } 3078 3079 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3080 { 3081 /* 3082 * parent contexts are perma-pinned, if we are unpinning do schedule 3083 * disable immediately. 3084 */ 3085 if (intel_context_is_parent(ce)) 3086 return true; 3087 3088 /* 3089 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3090 */ 3091 return guc->submission_state.guc_ids_in_use > 3092 guc->submission_state.sched_disable_gucid_threshold; 3093 } 3094 3095 static void guc_context_sched_disable(struct intel_context *ce) 3096 { 3097 struct intel_guc *guc = ce_to_guc(ce); 3098 u64 delay = guc->submission_state.sched_disable_delay_ms; 3099 unsigned long flags; 3100 3101 spin_lock_irqsave(&ce->guc_state.lock, flags); 3102 3103 if (bypass_sched_disable(guc, ce)) { 3104 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3105 intel_context_sched_disable_unpin(ce); 3106 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3107 delay) { 3108 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3109 mod_delayed_work(system_unbound_wq, 3110 &ce->guc_state.sched_disable_delay_work, 3111 msecs_to_jiffies(delay)); 3112 } else { 3113 do_sched_disable(guc, ce, flags); 3114 } 3115 } 3116 3117 static void guc_context_close(struct intel_context *ce) 3118 { 3119 unsigned long flags; 3120 3121 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3122 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3123 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3124 3125 spin_lock_irqsave(&ce->guc_state.lock, flags); 3126 set_context_close_done(ce); 3127 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3128 } 3129 3130 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3131 { 3132 struct intel_guc *guc = ce_to_guc(ce); 3133 struct intel_gt *gt = guc_to_gt(guc); 3134 unsigned long flags; 3135 bool disabled; 3136 3137 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3138 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3139 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3140 GEM_BUG_ON(context_enabled(ce)); 3141 3142 /* Seal race with Reset */ 3143 spin_lock_irqsave(&ce->guc_state.lock, flags); 3144 disabled = submission_disabled(guc); 3145 if (likely(!disabled)) { 3146 __intel_gt_pm_get(gt); 3147 set_context_destroyed(ce); 3148 clr_context_registered(ce); 3149 } 3150 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3151 if (unlikely(disabled)) { 3152 release_guc_id(guc, ce); 3153 __guc_context_destroy(ce); 3154 return; 3155 } 3156 3157 deregister_context(ce, ce->guc_id.id); 3158 } 3159 3160 static void __guc_context_destroy(struct intel_context *ce) 3161 { 3162 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3163 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3164 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3165 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3166 3167 lrc_fini(ce); 3168 intel_context_fini(ce); 3169 3170 if (intel_engine_is_virtual(ce->engine)) { 3171 struct guc_virtual_engine *ve = 3172 container_of(ce, typeof(*ve), context); 3173 3174 if (ve->base.breadcrumbs) 3175 intel_breadcrumbs_put(ve->base.breadcrumbs); 3176 3177 kfree(ve); 3178 } else { 3179 intel_context_free(ce); 3180 } 3181 } 3182 3183 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3184 { 3185 struct intel_context *ce; 3186 unsigned long flags; 3187 3188 GEM_BUG_ON(!submission_disabled(guc) && 3189 guc_submission_initialized(guc)); 3190 3191 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3192 spin_lock_irqsave(&guc->submission_state.lock, flags); 3193 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3194 struct intel_context, 3195 destroyed_link); 3196 if (ce) 3197 list_del_init(&ce->destroyed_link); 3198 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3199 3200 if (!ce) 3201 break; 3202 3203 release_guc_id(guc, ce); 3204 __guc_context_destroy(ce); 3205 } 3206 } 3207 3208 static void deregister_destroyed_contexts(struct intel_guc *guc) 3209 { 3210 struct intel_context *ce; 3211 unsigned long flags; 3212 3213 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3214 spin_lock_irqsave(&guc->submission_state.lock, flags); 3215 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3216 struct intel_context, 3217 destroyed_link); 3218 if (ce) 3219 list_del_init(&ce->destroyed_link); 3220 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3221 3222 if (!ce) 3223 break; 3224 3225 guc_lrc_desc_unpin(ce); 3226 } 3227 } 3228 3229 static void destroyed_worker_func(struct work_struct *w) 3230 { 3231 struct intel_guc *guc = container_of(w, struct intel_guc, 3232 submission_state.destroyed_worker); 3233 struct intel_gt *gt = guc_to_gt(guc); 3234 int tmp; 3235 3236 with_intel_gt_pm(gt, tmp) 3237 deregister_destroyed_contexts(guc); 3238 } 3239 3240 static void guc_context_destroy(struct kref *kref) 3241 { 3242 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3243 struct intel_guc *guc = ce_to_guc(ce); 3244 unsigned long flags; 3245 bool destroy; 3246 3247 /* 3248 * If the guc_id is invalid this context has been stolen and we can free 3249 * it immediately. Also can be freed immediately if the context is not 3250 * registered with the GuC or the GuC is in the middle of a reset. 3251 */ 3252 spin_lock_irqsave(&guc->submission_state.lock, flags); 3253 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3254 !ctx_id_mapped(guc, ce->guc_id.id); 3255 if (likely(!destroy)) { 3256 if (!list_empty(&ce->guc_id.link)) 3257 list_del_init(&ce->guc_id.link); 3258 list_add_tail(&ce->destroyed_link, 3259 &guc->submission_state.destroyed_contexts); 3260 } else { 3261 __release_guc_id(guc, ce); 3262 } 3263 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3264 if (unlikely(destroy)) { 3265 __guc_context_destroy(ce); 3266 return; 3267 } 3268 3269 /* 3270 * We use a worker to issue the H2G to deregister the context as we can 3271 * take the GT PM for the first time which isn't allowed from an atomic 3272 * context. 3273 */ 3274 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3275 } 3276 3277 static int guc_context_alloc(struct intel_context *ce) 3278 { 3279 return lrc_alloc(ce, ce->engine); 3280 } 3281 3282 static void __guc_context_set_prio(struct intel_guc *guc, 3283 struct intel_context *ce) 3284 { 3285 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { 3286 struct context_policy policy; 3287 3288 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3289 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3290 __guc_context_set_context_policies(guc, &policy, true); 3291 } else { 3292 u32 action[] = { 3293 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3294 ce->guc_id.id, 3295 ce->guc_state.prio, 3296 }; 3297 3298 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3299 } 3300 } 3301 3302 static void guc_context_set_prio(struct intel_guc *guc, 3303 struct intel_context *ce, 3304 u8 prio) 3305 { 3306 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3307 prio > GUC_CLIENT_PRIORITY_NORMAL); 3308 lockdep_assert_held(&ce->guc_state.lock); 3309 3310 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3311 !context_registered(ce)) { 3312 ce->guc_state.prio = prio; 3313 return; 3314 } 3315 3316 ce->guc_state.prio = prio; 3317 __guc_context_set_prio(guc, ce); 3318 3319 trace_intel_context_set_prio(ce); 3320 } 3321 3322 static inline u8 map_i915_prio_to_guc_prio(int prio) 3323 { 3324 if (prio == I915_PRIORITY_NORMAL) 3325 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3326 else if (prio < I915_PRIORITY_NORMAL) 3327 return GUC_CLIENT_PRIORITY_NORMAL; 3328 else if (prio < I915_PRIORITY_DISPLAY) 3329 return GUC_CLIENT_PRIORITY_HIGH; 3330 else 3331 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3332 } 3333 3334 static inline void add_context_inflight_prio(struct intel_context *ce, 3335 u8 guc_prio) 3336 { 3337 lockdep_assert_held(&ce->guc_state.lock); 3338 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3339 3340 ++ce->guc_state.prio_count[guc_prio]; 3341 3342 /* Overflow protection */ 3343 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3344 } 3345 3346 static inline void sub_context_inflight_prio(struct intel_context *ce, 3347 u8 guc_prio) 3348 { 3349 lockdep_assert_held(&ce->guc_state.lock); 3350 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3351 3352 /* Underflow protection */ 3353 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3354 3355 --ce->guc_state.prio_count[guc_prio]; 3356 } 3357 3358 static inline void update_context_prio(struct intel_context *ce) 3359 { 3360 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3361 int i; 3362 3363 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3364 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3365 3366 lockdep_assert_held(&ce->guc_state.lock); 3367 3368 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3369 if (ce->guc_state.prio_count[i]) { 3370 guc_context_set_prio(guc, ce, i); 3371 break; 3372 } 3373 } 3374 } 3375 3376 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3377 { 3378 /* Lower value is higher priority */ 3379 return new_guc_prio < old_guc_prio; 3380 } 3381 3382 static void add_to_context(struct i915_request *rq) 3383 { 3384 struct intel_context *ce = request_to_scheduling_context(rq); 3385 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3386 3387 GEM_BUG_ON(intel_context_is_child(ce)); 3388 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3389 3390 spin_lock(&ce->guc_state.lock); 3391 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3392 3393 if (rq->guc_prio == GUC_PRIO_INIT) { 3394 rq->guc_prio = new_guc_prio; 3395 add_context_inflight_prio(ce, rq->guc_prio); 3396 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3397 sub_context_inflight_prio(ce, rq->guc_prio); 3398 rq->guc_prio = new_guc_prio; 3399 add_context_inflight_prio(ce, rq->guc_prio); 3400 } 3401 update_context_prio(ce); 3402 3403 spin_unlock(&ce->guc_state.lock); 3404 } 3405 3406 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3407 { 3408 lockdep_assert_held(&ce->guc_state.lock); 3409 3410 if (rq->guc_prio != GUC_PRIO_INIT && 3411 rq->guc_prio != GUC_PRIO_FINI) { 3412 sub_context_inflight_prio(ce, rq->guc_prio); 3413 update_context_prio(ce); 3414 } 3415 rq->guc_prio = GUC_PRIO_FINI; 3416 } 3417 3418 static void remove_from_context(struct i915_request *rq) 3419 { 3420 struct intel_context *ce = request_to_scheduling_context(rq); 3421 3422 GEM_BUG_ON(intel_context_is_child(ce)); 3423 3424 spin_lock_irq(&ce->guc_state.lock); 3425 3426 list_del_init(&rq->sched.link); 3427 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3428 3429 /* Prevent further __await_execution() registering a cb, then flush */ 3430 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3431 3432 guc_prio_fini(rq, ce); 3433 3434 spin_unlock_irq(&ce->guc_state.lock); 3435 3436 atomic_dec(&ce->guc_id.ref); 3437 i915_request_notify_execute_cb_imm(rq); 3438 } 3439 3440 static const struct intel_context_ops guc_context_ops = { 3441 .alloc = guc_context_alloc, 3442 3443 .close = guc_context_close, 3444 3445 .pre_pin = guc_context_pre_pin, 3446 .pin = guc_context_pin, 3447 .unpin = guc_context_unpin, 3448 .post_unpin = guc_context_post_unpin, 3449 3450 .revoke = guc_context_revoke, 3451 3452 .cancel_request = guc_context_cancel_request, 3453 3454 .enter = intel_context_enter_engine, 3455 .exit = intel_context_exit_engine, 3456 3457 .sched_disable = guc_context_sched_disable, 3458 3459 .reset = lrc_reset, 3460 .destroy = guc_context_destroy, 3461 3462 .create_virtual = guc_create_virtual, 3463 .create_parallel = guc_create_parallel, 3464 }; 3465 3466 static void submit_work_cb(struct irq_work *wrk) 3467 { 3468 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3469 3470 might_lock(&rq->engine->sched_engine->lock); 3471 i915_sw_fence_complete(&rq->submit); 3472 } 3473 3474 static void __guc_signal_context_fence(struct intel_context *ce) 3475 { 3476 struct i915_request *rq, *rn; 3477 3478 lockdep_assert_held(&ce->guc_state.lock); 3479 3480 if (!list_empty(&ce->guc_state.fences)) 3481 trace_intel_context_fence_release(ce); 3482 3483 /* 3484 * Use an IRQ to ensure locking order of sched_engine->lock -> 3485 * ce->guc_state.lock is preserved. 3486 */ 3487 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3488 guc_fence_link) { 3489 list_del(&rq->guc_fence_link); 3490 irq_work_queue(&rq->submit_work); 3491 } 3492 3493 INIT_LIST_HEAD(&ce->guc_state.fences); 3494 } 3495 3496 static void guc_signal_context_fence(struct intel_context *ce) 3497 { 3498 unsigned long flags; 3499 3500 GEM_BUG_ON(intel_context_is_child(ce)); 3501 3502 spin_lock_irqsave(&ce->guc_state.lock, flags); 3503 clr_context_wait_for_deregister_to_register(ce); 3504 __guc_signal_context_fence(ce); 3505 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3506 } 3507 3508 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3509 { 3510 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3511 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3512 !submission_disabled(ce_to_guc(ce)); 3513 } 3514 3515 static void guc_context_init(struct intel_context *ce) 3516 { 3517 const struct i915_gem_context *ctx; 3518 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3519 3520 rcu_read_lock(); 3521 ctx = rcu_dereference(ce->gem_context); 3522 if (ctx) 3523 prio = ctx->sched.priority; 3524 rcu_read_unlock(); 3525 3526 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3527 3528 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3529 __delay_sched_disable); 3530 3531 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3532 } 3533 3534 static int guc_request_alloc(struct i915_request *rq) 3535 { 3536 struct intel_context *ce = request_to_scheduling_context(rq); 3537 struct intel_guc *guc = ce_to_guc(ce); 3538 unsigned long flags; 3539 int ret; 3540 3541 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3542 3543 /* 3544 * Flush enough space to reduce the likelihood of waiting after 3545 * we start building the request - in which case we will just 3546 * have to repeat work. 3547 */ 3548 rq->reserved_space += GUC_REQUEST_SIZE; 3549 3550 /* 3551 * Note that after this point, we have committed to using 3552 * this request as it is being used to both track the 3553 * state of engine initialisation and liveness of the 3554 * golden renderstate above. Think twice before you try 3555 * to cancel/unwind this request now. 3556 */ 3557 3558 /* Unconditionally invalidate GPU caches and TLBs. */ 3559 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3560 if (ret) 3561 return ret; 3562 3563 rq->reserved_space -= GUC_REQUEST_SIZE; 3564 3565 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3566 guc_context_init(ce); 3567 3568 /* 3569 * If the context gets closed while the execbuf is ongoing, the context 3570 * close code will race with the below code to cancel the delayed work. 3571 * If the context close wins the race and cancels the work, it will 3572 * immediately call the sched disable (see guc_context_close), so there 3573 * is a chance we can get past this check while the sched_disable code 3574 * is being executed. To make sure that code completes before we check 3575 * the status further down, we wait for the close process to complete. 3576 * Else, this code path could send a request down thinking that the 3577 * context is still in a schedule-enable mode while the GuC ends up 3578 * dropping the request completely because the disable did go from the 3579 * context_close path right to GuC just prior. In the event the CT is 3580 * full, we could potentially need to wait up to 1.5 seconds. 3581 */ 3582 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3583 intel_context_sched_disable_unpin(ce); 3584 else if (intel_context_is_closed(ce)) 3585 if (wait_for(context_close_done(ce), 1500)) 3586 drm_warn(&guc_to_gt(guc)->i915->drm, 3587 "timed out waiting on context sched close before realloc\n"); 3588 /* 3589 * Call pin_guc_id here rather than in the pinning step as with 3590 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3591 * guc_id and creating horrible race conditions. This is especially bad 3592 * when guc_id are being stolen due to over subscription. By the time 3593 * this function is reached, it is guaranteed that the guc_id will be 3594 * persistent until the generated request is retired. Thus, sealing these 3595 * race conditions. It is still safe to fail here if guc_id are 3596 * exhausted and return -EAGAIN to the user indicating that they can try 3597 * again in the future. 3598 * 3599 * There is no need for a lock here as the timeline mutex ensures at 3600 * most one context can be executing this code path at once. The 3601 * guc_id_ref is incremented once for every request in flight and 3602 * decremented on each retire. When it is zero, a lock around the 3603 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3604 */ 3605 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3606 goto out; 3607 3608 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3609 if (unlikely(ret < 0)) 3610 return ret; 3611 if (context_needs_register(ce, !!ret)) { 3612 ret = try_context_registration(ce, true); 3613 if (unlikely(ret)) { /* unwind */ 3614 if (ret == -EPIPE) { 3615 disable_submission(guc); 3616 goto out; /* GPU will be reset */ 3617 } 3618 atomic_dec(&ce->guc_id.ref); 3619 unpin_guc_id(guc, ce); 3620 return ret; 3621 } 3622 } 3623 3624 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3625 3626 out: 3627 /* 3628 * We block all requests on this context if a G2H is pending for a 3629 * schedule disable or context deregistration as the GuC will fail a 3630 * schedule enable or context registration if either G2H is pending 3631 * respectfully. Once a G2H returns, the fence is released that is 3632 * blocking these requests (see guc_signal_context_fence). 3633 */ 3634 spin_lock_irqsave(&ce->guc_state.lock, flags); 3635 if (context_wait_for_deregister_to_register(ce) || 3636 context_pending_disable(ce)) { 3637 init_irq_work(&rq->submit_work, submit_work_cb); 3638 i915_sw_fence_await(&rq->submit); 3639 3640 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3641 } 3642 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3643 3644 return 0; 3645 } 3646 3647 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3648 struct i915_gem_ww_ctx *ww, 3649 void **vaddr) 3650 { 3651 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3652 3653 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3654 } 3655 3656 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3657 { 3658 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3659 int ret = __guc_context_pin(ce, engine, vaddr); 3660 intel_engine_mask_t tmp, mask = ce->engine->mask; 3661 3662 if (likely(!ret)) 3663 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3664 intel_engine_pm_get(engine); 3665 3666 return ret; 3667 } 3668 3669 static void guc_virtual_context_unpin(struct intel_context *ce) 3670 { 3671 intel_engine_mask_t tmp, mask = ce->engine->mask; 3672 struct intel_engine_cs *engine; 3673 struct intel_guc *guc = ce_to_guc(ce); 3674 3675 GEM_BUG_ON(context_enabled(ce)); 3676 GEM_BUG_ON(intel_context_is_barrier(ce)); 3677 3678 unpin_guc_id(guc, ce); 3679 lrc_unpin(ce); 3680 3681 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3682 intel_engine_pm_put_async(engine); 3683 } 3684 3685 static void guc_virtual_context_enter(struct intel_context *ce) 3686 { 3687 intel_engine_mask_t tmp, mask = ce->engine->mask; 3688 struct intel_engine_cs *engine; 3689 3690 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3691 intel_engine_pm_get(engine); 3692 3693 intel_timeline_enter(ce->timeline); 3694 } 3695 3696 static void guc_virtual_context_exit(struct intel_context *ce) 3697 { 3698 intel_engine_mask_t tmp, mask = ce->engine->mask; 3699 struct intel_engine_cs *engine; 3700 3701 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3702 intel_engine_pm_put(engine); 3703 3704 intel_timeline_exit(ce->timeline); 3705 } 3706 3707 static int guc_virtual_context_alloc(struct intel_context *ce) 3708 { 3709 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3710 3711 return lrc_alloc(ce, engine); 3712 } 3713 3714 static const struct intel_context_ops virtual_guc_context_ops = { 3715 .alloc = guc_virtual_context_alloc, 3716 3717 .close = guc_context_close, 3718 3719 .pre_pin = guc_virtual_context_pre_pin, 3720 .pin = guc_virtual_context_pin, 3721 .unpin = guc_virtual_context_unpin, 3722 .post_unpin = guc_context_post_unpin, 3723 3724 .revoke = guc_context_revoke, 3725 3726 .cancel_request = guc_context_cancel_request, 3727 3728 .enter = guc_virtual_context_enter, 3729 .exit = guc_virtual_context_exit, 3730 3731 .sched_disable = guc_context_sched_disable, 3732 3733 .destroy = guc_context_destroy, 3734 3735 .get_sibling = guc_virtual_get_sibling, 3736 }; 3737 3738 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3739 { 3740 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3741 struct intel_guc *guc = ce_to_guc(ce); 3742 int ret; 3743 3744 GEM_BUG_ON(!intel_context_is_parent(ce)); 3745 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3746 3747 ret = pin_guc_id(guc, ce); 3748 if (unlikely(ret < 0)) 3749 return ret; 3750 3751 return __guc_context_pin(ce, engine, vaddr); 3752 } 3753 3754 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3755 { 3756 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3757 3758 GEM_BUG_ON(!intel_context_is_child(ce)); 3759 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3760 3761 __intel_context_pin(ce->parallel.parent); 3762 return __guc_context_pin(ce, engine, vaddr); 3763 } 3764 3765 static void guc_parent_context_unpin(struct intel_context *ce) 3766 { 3767 struct intel_guc *guc = ce_to_guc(ce); 3768 3769 GEM_BUG_ON(context_enabled(ce)); 3770 GEM_BUG_ON(intel_context_is_barrier(ce)); 3771 GEM_BUG_ON(!intel_context_is_parent(ce)); 3772 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3773 3774 unpin_guc_id(guc, ce); 3775 lrc_unpin(ce); 3776 } 3777 3778 static void guc_child_context_unpin(struct intel_context *ce) 3779 { 3780 GEM_BUG_ON(context_enabled(ce)); 3781 GEM_BUG_ON(intel_context_is_barrier(ce)); 3782 GEM_BUG_ON(!intel_context_is_child(ce)); 3783 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3784 3785 lrc_unpin(ce); 3786 } 3787 3788 static void guc_child_context_post_unpin(struct intel_context *ce) 3789 { 3790 GEM_BUG_ON(!intel_context_is_child(ce)); 3791 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3792 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3793 3794 lrc_post_unpin(ce); 3795 intel_context_unpin(ce->parallel.parent); 3796 } 3797 3798 static void guc_child_context_destroy(struct kref *kref) 3799 { 3800 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3801 3802 __guc_context_destroy(ce); 3803 } 3804 3805 static const struct intel_context_ops virtual_parent_context_ops = { 3806 .alloc = guc_virtual_context_alloc, 3807 3808 .close = guc_context_close, 3809 3810 .pre_pin = guc_context_pre_pin, 3811 .pin = guc_parent_context_pin, 3812 .unpin = guc_parent_context_unpin, 3813 .post_unpin = guc_context_post_unpin, 3814 3815 .revoke = guc_context_revoke, 3816 3817 .cancel_request = guc_context_cancel_request, 3818 3819 .enter = guc_virtual_context_enter, 3820 .exit = guc_virtual_context_exit, 3821 3822 .sched_disable = guc_context_sched_disable, 3823 3824 .destroy = guc_context_destroy, 3825 3826 .get_sibling = guc_virtual_get_sibling, 3827 }; 3828 3829 static const struct intel_context_ops virtual_child_context_ops = { 3830 .alloc = guc_virtual_context_alloc, 3831 3832 .pre_pin = guc_context_pre_pin, 3833 .pin = guc_child_context_pin, 3834 .unpin = guc_child_context_unpin, 3835 .post_unpin = guc_child_context_post_unpin, 3836 3837 .cancel_request = guc_context_cancel_request, 3838 3839 .enter = guc_virtual_context_enter, 3840 .exit = guc_virtual_context_exit, 3841 3842 .destroy = guc_child_context_destroy, 3843 3844 .get_sibling = guc_virtual_get_sibling, 3845 }; 3846 3847 /* 3848 * The below override of the breadcrumbs is enabled when the user configures a 3849 * context for parallel submission (multi-lrc, parent-child). 3850 * 3851 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3852 * safely preempt all the hw contexts configured for parallel submission 3853 * between each BB. The contract between the i915 and GuC is if the parent 3854 * context can be preempted, all the children can be preempted, and the GuC will 3855 * always try to preempt the parent before the children. A handshake between the 3856 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3857 * creating a window to preempt between each set of BBs. 3858 */ 3859 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3860 u64 offset, u32 len, 3861 const unsigned int flags); 3862 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3863 u64 offset, u32 len, 3864 const unsigned int flags); 3865 static u32 * 3866 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3867 u32 *cs); 3868 static u32 * 3869 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3870 u32 *cs); 3871 3872 static struct intel_context * 3873 guc_create_parallel(struct intel_engine_cs **engines, 3874 unsigned int num_siblings, 3875 unsigned int width) 3876 { 3877 struct intel_engine_cs **siblings = NULL; 3878 struct intel_context *parent = NULL, *ce, *err; 3879 int i, j; 3880 3881 siblings = kmalloc_array(num_siblings, 3882 sizeof(*siblings), 3883 GFP_KERNEL); 3884 if (!siblings) 3885 return ERR_PTR(-ENOMEM); 3886 3887 for (i = 0; i < width; ++i) { 3888 for (j = 0; j < num_siblings; ++j) 3889 siblings[j] = engines[i * num_siblings + j]; 3890 3891 ce = intel_engine_create_virtual(siblings, num_siblings, 3892 FORCE_VIRTUAL); 3893 if (IS_ERR(ce)) { 3894 err = ERR_CAST(ce); 3895 goto unwind; 3896 } 3897 3898 if (i == 0) { 3899 parent = ce; 3900 parent->ops = &virtual_parent_context_ops; 3901 } else { 3902 ce->ops = &virtual_child_context_ops; 3903 intel_context_bind_parent_child(parent, ce); 3904 } 3905 } 3906 3907 parent->parallel.fence_context = dma_fence_context_alloc(1); 3908 3909 parent->engine->emit_bb_start = 3910 emit_bb_start_parent_no_preempt_mid_batch; 3911 parent->engine->emit_fini_breadcrumb = 3912 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3913 parent->engine->emit_fini_breadcrumb_dw = 3914 12 + 4 * parent->parallel.number_children; 3915 for_each_child(parent, ce) { 3916 ce->engine->emit_bb_start = 3917 emit_bb_start_child_no_preempt_mid_batch; 3918 ce->engine->emit_fini_breadcrumb = 3919 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3920 ce->engine->emit_fini_breadcrumb_dw = 16; 3921 } 3922 3923 kfree(siblings); 3924 return parent; 3925 3926 unwind: 3927 if (parent) 3928 intel_context_put(parent); 3929 kfree(siblings); 3930 return err; 3931 } 3932 3933 static bool 3934 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3935 { 3936 struct intel_engine_cs *sibling; 3937 intel_engine_mask_t tmp, mask = b->engine_mask; 3938 bool result = false; 3939 3940 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3941 result |= intel_engine_irq_enable(sibling); 3942 3943 return result; 3944 } 3945 3946 static void 3947 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3948 { 3949 struct intel_engine_cs *sibling; 3950 intel_engine_mask_t tmp, mask = b->engine_mask; 3951 3952 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3953 intel_engine_irq_disable(sibling); 3954 } 3955 3956 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3957 { 3958 int i; 3959 3960 /* 3961 * In GuC submission mode we do not know which physical engine a request 3962 * will be scheduled on, this creates a problem because the breadcrumb 3963 * interrupt is per physical engine. To work around this we attach 3964 * requests and direct all breadcrumb interrupts to the first instance 3965 * of an engine per class. In addition all breadcrumb interrupts are 3966 * enabled / disabled across an engine class in unison. 3967 */ 3968 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3969 struct intel_engine_cs *sibling = 3970 engine->gt->engine_class[engine->class][i]; 3971 3972 if (sibling) { 3973 if (engine->breadcrumbs != sibling->breadcrumbs) { 3974 intel_breadcrumbs_put(engine->breadcrumbs); 3975 engine->breadcrumbs = 3976 intel_breadcrumbs_get(sibling->breadcrumbs); 3977 } 3978 break; 3979 } 3980 } 3981 3982 if (engine->breadcrumbs) { 3983 engine->breadcrumbs->engine_mask |= engine->mask; 3984 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3985 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3986 } 3987 } 3988 3989 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3990 int prio) 3991 { 3992 struct intel_context *ce = request_to_scheduling_context(rq); 3993 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3994 3995 /* Short circuit function */ 3996 if (prio < I915_PRIORITY_NORMAL || 3997 rq->guc_prio == GUC_PRIO_FINI || 3998 (rq->guc_prio != GUC_PRIO_INIT && 3999 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4000 return; 4001 4002 spin_lock(&ce->guc_state.lock); 4003 if (rq->guc_prio != GUC_PRIO_FINI) { 4004 if (rq->guc_prio != GUC_PRIO_INIT) 4005 sub_context_inflight_prio(ce, rq->guc_prio); 4006 rq->guc_prio = new_guc_prio; 4007 add_context_inflight_prio(ce, rq->guc_prio); 4008 update_context_prio(ce); 4009 } 4010 spin_unlock(&ce->guc_state.lock); 4011 } 4012 4013 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4014 { 4015 struct intel_context *ce = request_to_scheduling_context(rq); 4016 4017 spin_lock(&ce->guc_state.lock); 4018 guc_prio_fini(rq, ce); 4019 spin_unlock(&ce->guc_state.lock); 4020 } 4021 4022 static void sanitize_hwsp(struct intel_engine_cs *engine) 4023 { 4024 struct intel_timeline *tl; 4025 4026 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4027 intel_timeline_reset_seqno(tl); 4028 } 4029 4030 static void guc_sanitize(struct intel_engine_cs *engine) 4031 { 4032 /* 4033 * Poison residual state on resume, in case the suspend didn't! 4034 * 4035 * We have to assume that across suspend/resume (or other loss 4036 * of control) that the contents of our pinned buffers has been 4037 * lost, replaced by garbage. Since this doesn't always happen, 4038 * let's poison such state so that we more quickly spot when 4039 * we falsely assume it has been preserved. 4040 */ 4041 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4042 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4043 4044 /* 4045 * The kernel_context HWSP is stored in the status_page. As above, 4046 * that may be lost on resume/initialisation, and so we need to 4047 * reset the value in the HWSP. 4048 */ 4049 sanitize_hwsp(engine); 4050 4051 /* And scrub the dirty cachelines for the HWSP */ 4052 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4053 4054 intel_engine_reset_pinned_contexts(engine); 4055 } 4056 4057 static void setup_hwsp(struct intel_engine_cs *engine) 4058 { 4059 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4060 4061 ENGINE_WRITE_FW(engine, 4062 RING_HWS_PGA, 4063 i915_ggtt_offset(engine->status_page.vma)); 4064 } 4065 4066 static void start_engine(struct intel_engine_cs *engine) 4067 { 4068 ENGINE_WRITE_FW(engine, 4069 RING_MODE_GEN7, 4070 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4071 4072 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4073 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4074 } 4075 4076 static int guc_resume(struct intel_engine_cs *engine) 4077 { 4078 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4079 4080 intel_mocs_init_engine(engine); 4081 4082 intel_breadcrumbs_reset(engine->breadcrumbs); 4083 4084 setup_hwsp(engine); 4085 start_engine(engine); 4086 4087 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4088 xehp_enable_ccs_engines(engine); 4089 4090 return 0; 4091 } 4092 4093 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4094 { 4095 return !sched_engine->tasklet.callback; 4096 } 4097 4098 static void guc_set_default_submission(struct intel_engine_cs *engine) 4099 { 4100 engine->submit_request = guc_submit_request; 4101 } 4102 4103 static inline void guc_kernel_context_pin(struct intel_guc *guc, 4104 struct intel_context *ce) 4105 { 4106 /* 4107 * Note: we purposefully do not check the returns below because 4108 * the registration can only fail if a reset is just starting. 4109 * This is called at the end of reset so presumably another reset 4110 * isn't happening and even it did this code would be run again. 4111 */ 4112 4113 if (context_guc_id_invalid(ce)) 4114 pin_guc_id(guc, ce); 4115 4116 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4117 guc_context_init(ce); 4118 4119 try_context_registration(ce, true); 4120 } 4121 4122 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 4123 { 4124 struct intel_gt *gt = guc_to_gt(guc); 4125 struct intel_engine_cs *engine; 4126 enum intel_engine_id id; 4127 4128 /* make sure all descriptors are clean... */ 4129 xa_destroy(&guc->context_lookup); 4130 4131 /* 4132 * A reset might have occurred while we had a pending stalled request, 4133 * so make sure we clean that up. 4134 */ 4135 guc->stalled_request = NULL; 4136 guc->submission_stall_reason = STALL_NONE; 4137 4138 /* 4139 * Some contexts might have been pinned before we enabled GuC 4140 * submission, so we need to add them to the GuC bookeeping. 4141 * Also, after a reset the of the GuC we want to make sure that the 4142 * information shared with GuC is properly reset. The kernel LRCs are 4143 * not attached to the gem_context, so they need to be added separately. 4144 */ 4145 for_each_engine(engine, gt, id) { 4146 struct intel_context *ce; 4147 4148 list_for_each_entry(ce, &engine->pinned_contexts_list, 4149 pinned_contexts_link) 4150 guc_kernel_context_pin(guc, ce); 4151 } 4152 } 4153 4154 static void guc_release(struct intel_engine_cs *engine) 4155 { 4156 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4157 4158 intel_engine_cleanup_common(engine); 4159 lrc_fini_wa_ctx(engine); 4160 } 4161 4162 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4163 { 4164 struct intel_engine_cs *e; 4165 intel_engine_mask_t tmp, mask = engine->mask; 4166 4167 for_each_engine_masked(e, engine->gt, mask, tmp) 4168 e->serial++; 4169 } 4170 4171 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4172 { 4173 /* Default vfuncs which can be overridden by each engine. */ 4174 4175 engine->resume = guc_resume; 4176 4177 engine->cops = &guc_context_ops; 4178 engine->request_alloc = guc_request_alloc; 4179 engine->add_active_request = add_to_context; 4180 engine->remove_active_request = remove_from_context; 4181 4182 engine->sched_engine->schedule = i915_schedule; 4183 4184 engine->reset.prepare = guc_engine_reset_prepare; 4185 engine->reset.rewind = guc_rewind_nop; 4186 engine->reset.cancel = guc_reset_nop; 4187 engine->reset.finish = guc_reset_nop; 4188 4189 engine->emit_flush = gen8_emit_flush_xcs; 4190 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4191 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4192 if (GRAPHICS_VER(engine->i915) >= 12) { 4193 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4194 engine->emit_flush = gen12_emit_flush_xcs; 4195 } 4196 engine->set_default_submission = guc_set_default_submission; 4197 engine->busyness = guc_engine_busyness; 4198 4199 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4200 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4201 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4202 4203 /* Wa_14014475959:dg2 */ 4204 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) 4205 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4206 4207 /* 4208 * TODO: GuC supports timeslicing and semaphores as well, but they're 4209 * handled by the firmware so some minor tweaks are required before 4210 * enabling. 4211 * 4212 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4213 */ 4214 4215 engine->emit_bb_start = gen8_emit_bb_start; 4216 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4217 engine->emit_bb_start = xehp_emit_bb_start; 4218 } 4219 4220 static void rcs_submission_override(struct intel_engine_cs *engine) 4221 { 4222 switch (GRAPHICS_VER(engine->i915)) { 4223 case 12: 4224 engine->emit_flush = gen12_emit_flush_rcs; 4225 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4226 break; 4227 case 11: 4228 engine->emit_flush = gen11_emit_flush_rcs; 4229 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4230 break; 4231 default: 4232 engine->emit_flush = gen8_emit_flush_rcs; 4233 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4234 break; 4235 } 4236 } 4237 4238 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4239 { 4240 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4241 intel_engine_set_irq_handler(engine, cs_irq_handler); 4242 } 4243 4244 static void guc_sched_engine_destroy(struct kref *kref) 4245 { 4246 struct i915_sched_engine *sched_engine = 4247 container_of(kref, typeof(*sched_engine), ref); 4248 struct intel_guc *guc = sched_engine->private_data; 4249 4250 guc->sched_engine = NULL; 4251 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4252 kfree(sched_engine); 4253 } 4254 4255 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4256 { 4257 struct drm_i915_private *i915 = engine->i915; 4258 struct intel_guc *guc = &engine->gt->uc.guc; 4259 4260 /* 4261 * The setup relies on several assumptions (e.g. irqs always enabled) 4262 * that are only valid on gen11+ 4263 */ 4264 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4265 4266 if (!guc->sched_engine) { 4267 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4268 if (!guc->sched_engine) 4269 return -ENOMEM; 4270 4271 guc->sched_engine->schedule = i915_schedule; 4272 guc->sched_engine->disabled = guc_sched_engine_disabled; 4273 guc->sched_engine->private_data = guc; 4274 guc->sched_engine->destroy = guc_sched_engine_destroy; 4275 guc->sched_engine->bump_inflight_request_prio = 4276 guc_bump_inflight_request_prio; 4277 guc->sched_engine->retire_inflight_request_prio = 4278 guc_retire_inflight_request_prio; 4279 tasklet_setup(&guc->sched_engine->tasklet, 4280 guc_submission_tasklet); 4281 } 4282 i915_sched_engine_put(engine->sched_engine); 4283 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4284 4285 guc_default_vfuncs(engine); 4286 guc_default_irqs(engine); 4287 guc_init_breadcrumbs(engine); 4288 4289 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4290 rcs_submission_override(engine); 4291 4292 lrc_init_wa_ctx(engine); 4293 4294 /* Finally, take ownership and responsibility for cleanup! */ 4295 engine->sanitize = guc_sanitize; 4296 engine->release = guc_release; 4297 4298 return 0; 4299 } 4300 4301 struct scheduling_policy { 4302 /* internal data */ 4303 u32 max_words, num_words; 4304 u32 count; 4305 /* API data */ 4306 struct guc_update_scheduling_policy h2g; 4307 }; 4308 4309 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4310 { 4311 u32 *start = (void *)&policy->h2g; 4312 u32 *end = policy->h2g.data + policy->num_words; 4313 size_t delta = end - start; 4314 4315 return delta; 4316 } 4317 4318 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4319 { 4320 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4321 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4322 policy->num_words = 0; 4323 policy->count = 0; 4324 4325 return policy; 4326 } 4327 4328 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4329 u32 action, u32 *data, u32 len) 4330 { 4331 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4332 4333 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4334 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4335 FIELD_PREP(GUC_KLV_0_LEN, len); 4336 memcpy(klv_ptr, data, sizeof(u32) * len); 4337 policy->num_words += 1 + len; 4338 policy->count++; 4339 } 4340 4341 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4342 struct scheduling_policy *policy) 4343 { 4344 int ret; 4345 4346 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4347 __guc_scheduling_policy_action_size(policy)); 4348 if (ret < 0) 4349 return ret; 4350 4351 if (ret != policy->count) { 4352 drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!", 4353 ret, policy->count); 4354 if (ret > policy->count) 4355 return -EPROTO; 4356 } 4357 4358 return 0; 4359 } 4360 4361 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4362 { 4363 struct scheduling_policy policy; 4364 struct intel_gt *gt = guc_to_gt(guc); 4365 intel_wakeref_t wakeref; 4366 int ret = 0; 4367 4368 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0)) 4369 return 0; 4370 4371 __guc_scheduling_policy_start_klv(&policy); 4372 4373 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4374 u32 yield[] = { 4375 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4376 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4377 }; 4378 4379 __guc_scheduling_policy_add_klv(&policy, 4380 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4381 yield, ARRAY_SIZE(yield)); 4382 4383 ret = __guc_action_set_scheduling_policies(guc, &policy); 4384 if (ret) 4385 i915_probe_error(gt->i915, 4386 "Failed to configure global scheduling policies: %pe!\n", 4387 ERR_PTR(ret)); 4388 } 4389 4390 return ret; 4391 } 4392 4393 void intel_guc_submission_enable(struct intel_guc *guc) 4394 { 4395 struct intel_gt *gt = guc_to_gt(guc); 4396 4397 /* Enable and route to GuC */ 4398 if (GRAPHICS_VER(gt->i915) >= 12) 4399 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 4400 GUC_SEM_INTR_ROUTE_TO_GUC | 4401 GUC_SEM_INTR_ENABLE_ALL); 4402 4403 guc_init_lrc_mapping(guc); 4404 guc_init_engine_stats(guc); 4405 guc_init_global_schedule_policy(guc); 4406 } 4407 4408 void intel_guc_submission_disable(struct intel_guc *guc) 4409 { 4410 struct intel_gt *gt = guc_to_gt(guc); 4411 4412 /* Note: By the time we're here, GuC may have already been reset */ 4413 4414 /* Disable and route to host */ 4415 if (GRAPHICS_VER(gt->i915) >= 12) 4416 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); 4417 } 4418 4419 static bool __guc_submission_supported(struct intel_guc *guc) 4420 { 4421 /* GuC submission is unavailable for pre-Gen11 */ 4422 return intel_guc_is_supported(guc) && 4423 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4424 } 4425 4426 static bool __guc_submission_selected(struct intel_guc *guc) 4427 { 4428 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4429 4430 if (!intel_guc_submission_is_supported(guc)) 4431 return false; 4432 4433 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4434 } 4435 4436 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4437 { 4438 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4439 } 4440 4441 /* 4442 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4443 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4444 * operation. This matches the 30fps game-render + encode (real world) workload this 4445 * knob was tested against. 4446 */ 4447 #define SCHED_DISABLE_DELAY_MS 34 4448 4449 /* 4450 * A threshold of 75% is a reasonable starting point considering that real world apps 4451 * generally don't get anywhere near this. 4452 */ 4453 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4454 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4455 4456 void intel_guc_submission_init_early(struct intel_guc *guc) 4457 { 4458 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4459 4460 spin_lock_init(&guc->submission_state.lock); 4461 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4462 ida_init(&guc->submission_state.guc_ids); 4463 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4464 INIT_WORK(&guc->submission_state.destroyed_worker, 4465 destroyed_worker_func); 4466 INIT_WORK(&guc->submission_state.reset_fail_worker, 4467 reset_fail_worker_func); 4468 4469 spin_lock_init(&guc->timestamp.lock); 4470 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4471 4472 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4473 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4474 guc->submission_state.sched_disable_gucid_threshold = 4475 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4476 guc->submission_supported = __guc_submission_supported(guc); 4477 guc->submission_selected = __guc_submission_selected(guc); 4478 } 4479 4480 static inline struct intel_context * 4481 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4482 { 4483 struct intel_context *ce; 4484 4485 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4486 drm_err(&guc_to_gt(guc)->i915->drm, 4487 "Invalid ctx_id %u\n", ctx_id); 4488 return NULL; 4489 } 4490 4491 ce = __get_context(guc, ctx_id); 4492 if (unlikely(!ce)) { 4493 drm_err(&guc_to_gt(guc)->i915->drm, 4494 "Context is NULL, ctx_id %u\n", ctx_id); 4495 return NULL; 4496 } 4497 4498 if (unlikely(intel_context_is_child(ce))) { 4499 drm_err(&guc_to_gt(guc)->i915->drm, 4500 "Context is child, ctx_id %u\n", ctx_id); 4501 return NULL; 4502 } 4503 4504 return ce; 4505 } 4506 4507 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4508 const u32 *msg, 4509 u32 len) 4510 { 4511 struct intel_context *ce; 4512 u32 ctx_id; 4513 4514 if (unlikely(len < 1)) { 4515 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4516 return -EPROTO; 4517 } 4518 ctx_id = msg[0]; 4519 4520 ce = g2h_context_lookup(guc, ctx_id); 4521 if (unlikely(!ce)) 4522 return -EPROTO; 4523 4524 trace_intel_context_deregister_done(ce); 4525 4526 #ifdef CONFIG_DRM_I915_SELFTEST 4527 if (unlikely(ce->drop_deregister)) { 4528 ce->drop_deregister = false; 4529 return 0; 4530 } 4531 #endif 4532 4533 if (context_wait_for_deregister_to_register(ce)) { 4534 struct intel_runtime_pm *runtime_pm = 4535 &ce->engine->gt->i915->runtime_pm; 4536 intel_wakeref_t wakeref; 4537 4538 /* 4539 * Previous owner of this guc_id has been deregistered, now safe 4540 * register this context. 4541 */ 4542 with_intel_runtime_pm(runtime_pm, wakeref) 4543 register_context(ce, true); 4544 guc_signal_context_fence(ce); 4545 intel_context_put(ce); 4546 } else if (context_destroyed(ce)) { 4547 /* Context has been destroyed */ 4548 intel_gt_pm_put_async(guc_to_gt(guc)); 4549 release_guc_id(guc, ce); 4550 __guc_context_destroy(ce); 4551 } 4552 4553 decr_outstanding_submission_g2h(guc); 4554 4555 return 0; 4556 } 4557 4558 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4559 const u32 *msg, 4560 u32 len) 4561 { 4562 struct intel_context *ce; 4563 unsigned long flags; 4564 u32 ctx_id; 4565 4566 if (unlikely(len < 2)) { 4567 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4568 return -EPROTO; 4569 } 4570 ctx_id = msg[0]; 4571 4572 ce = g2h_context_lookup(guc, ctx_id); 4573 if (unlikely(!ce)) 4574 return -EPROTO; 4575 4576 if (unlikely(context_destroyed(ce) || 4577 (!context_pending_enable(ce) && 4578 !context_pending_disable(ce)))) { 4579 drm_err(&guc_to_gt(guc)->i915->drm, 4580 "Bad context sched_state 0x%x, ctx_id %u\n", 4581 ce->guc_state.sched_state, ctx_id); 4582 return -EPROTO; 4583 } 4584 4585 trace_intel_context_sched_done(ce); 4586 4587 if (context_pending_enable(ce)) { 4588 #ifdef CONFIG_DRM_I915_SELFTEST 4589 if (unlikely(ce->drop_schedule_enable)) { 4590 ce->drop_schedule_enable = false; 4591 return 0; 4592 } 4593 #endif 4594 4595 spin_lock_irqsave(&ce->guc_state.lock, flags); 4596 clr_context_pending_enable(ce); 4597 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4598 } else if (context_pending_disable(ce)) { 4599 bool banned; 4600 4601 #ifdef CONFIG_DRM_I915_SELFTEST 4602 if (unlikely(ce->drop_schedule_disable)) { 4603 ce->drop_schedule_disable = false; 4604 return 0; 4605 } 4606 #endif 4607 4608 /* 4609 * Unpin must be done before __guc_signal_context_fence, 4610 * otherwise a race exists between the requests getting 4611 * submitted + retired before this unpin completes resulting in 4612 * the pin_count going to zero and the context still being 4613 * enabled. 4614 */ 4615 intel_context_sched_disable_unpin(ce); 4616 4617 spin_lock_irqsave(&ce->guc_state.lock, flags); 4618 banned = context_banned(ce); 4619 clr_context_banned(ce); 4620 clr_context_pending_disable(ce); 4621 __guc_signal_context_fence(ce); 4622 guc_blocked_fence_complete(ce); 4623 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4624 4625 if (banned) { 4626 guc_cancel_context_requests(ce); 4627 intel_engine_signal_breadcrumbs(ce->engine); 4628 } 4629 } 4630 4631 decr_outstanding_submission_g2h(guc); 4632 intel_context_put(ce); 4633 4634 return 0; 4635 } 4636 4637 static void capture_error_state(struct intel_guc *guc, 4638 struct intel_context *ce) 4639 { 4640 struct intel_gt *gt = guc_to_gt(guc); 4641 struct drm_i915_private *i915 = gt->i915; 4642 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4643 intel_wakeref_t wakeref; 4644 4645 intel_engine_set_hung_context(engine, ce); 4646 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4647 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4648 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4649 } 4650 4651 static void guc_context_replay(struct intel_context *ce) 4652 { 4653 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4654 4655 __guc_reset_context(ce, ce->engine->mask); 4656 tasklet_hi_schedule(&sched_engine->tasklet); 4657 } 4658 4659 static void guc_handle_context_reset(struct intel_guc *guc, 4660 struct intel_context *ce) 4661 { 4662 trace_intel_context_reset(ce); 4663 4664 if (likely(intel_context_is_schedulable(ce))) { 4665 capture_error_state(guc, ce); 4666 guc_context_replay(ce); 4667 } else { 4668 drm_info(&guc_to_gt(guc)->i915->drm, 4669 "Ignoring context reset notification of exiting context 0x%04X on %s", 4670 ce->guc_id.id, ce->engine->name); 4671 } 4672 } 4673 4674 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4675 const u32 *msg, u32 len) 4676 { 4677 struct intel_context *ce; 4678 unsigned long flags; 4679 int ctx_id; 4680 4681 if (unlikely(len != 1)) { 4682 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4683 return -EPROTO; 4684 } 4685 4686 ctx_id = msg[0]; 4687 4688 /* 4689 * The context lookup uses the xarray but lookups only require an RCU lock 4690 * not the full spinlock. So take the lock explicitly and keep it until the 4691 * context has been reference count locked to ensure it can't be destroyed 4692 * asynchronously until the reset is done. 4693 */ 4694 xa_lock_irqsave(&guc->context_lookup, flags); 4695 ce = g2h_context_lookup(guc, ctx_id); 4696 if (ce) 4697 intel_context_get(ce); 4698 xa_unlock_irqrestore(&guc->context_lookup, flags); 4699 4700 if (unlikely(!ce)) 4701 return -EPROTO; 4702 4703 guc_handle_context_reset(guc, ce); 4704 intel_context_put(ce); 4705 4706 return 0; 4707 } 4708 4709 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4710 const u32 *msg, u32 len) 4711 { 4712 u32 status; 4713 4714 if (unlikely(len != 1)) { 4715 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4716 return -EPROTO; 4717 } 4718 4719 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4720 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4721 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4722 4723 intel_guc_capture_process(guc); 4724 4725 return 0; 4726 } 4727 4728 struct intel_engine_cs * 4729 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4730 { 4731 struct intel_gt *gt = guc_to_gt(guc); 4732 u8 engine_class = guc_class_to_engine_class(guc_class); 4733 4734 /* Class index is checked in class converter */ 4735 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4736 4737 return gt->engine_class[engine_class][instance]; 4738 } 4739 4740 static void reset_fail_worker_func(struct work_struct *w) 4741 { 4742 struct intel_guc *guc = container_of(w, struct intel_guc, 4743 submission_state.reset_fail_worker); 4744 struct intel_gt *gt = guc_to_gt(guc); 4745 intel_engine_mask_t reset_fail_mask; 4746 unsigned long flags; 4747 4748 spin_lock_irqsave(&guc->submission_state.lock, flags); 4749 reset_fail_mask = guc->submission_state.reset_fail_mask; 4750 guc->submission_state.reset_fail_mask = 0; 4751 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4752 4753 if (likely(reset_fail_mask)) 4754 intel_gt_handle_error(gt, reset_fail_mask, 4755 I915_ERROR_CAPTURE, 4756 "GuC failed to reset engine mask=0x%x\n", 4757 reset_fail_mask); 4758 } 4759 4760 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4761 const u32 *msg, u32 len) 4762 { 4763 struct intel_engine_cs *engine; 4764 struct intel_gt *gt = guc_to_gt(guc); 4765 u8 guc_class, instance; 4766 u32 reason; 4767 unsigned long flags; 4768 4769 if (unlikely(len != 3)) { 4770 drm_err(>->i915->drm, "Invalid length %u", len); 4771 return -EPROTO; 4772 } 4773 4774 guc_class = msg[0]; 4775 instance = msg[1]; 4776 reason = msg[2]; 4777 4778 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4779 if (unlikely(!engine)) { 4780 drm_err(>->i915->drm, 4781 "Invalid engine %d:%d", guc_class, instance); 4782 return -EPROTO; 4783 } 4784 4785 /* 4786 * This is an unexpected failure of a hardware feature. So, log a real 4787 * error message not just the informational that comes with the reset. 4788 */ 4789 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4790 guc_class, instance, engine->name, reason); 4791 4792 spin_lock_irqsave(&guc->submission_state.lock, flags); 4793 guc->submission_state.reset_fail_mask |= engine->mask; 4794 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4795 4796 /* 4797 * A GT reset flushes this worker queue (G2H handler) so we must use 4798 * another worker to trigger a GT reset. 4799 */ 4800 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4801 4802 return 0; 4803 } 4804 4805 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4806 { 4807 struct intel_guc *guc = &engine->gt->uc.guc; 4808 struct intel_context *ce; 4809 struct i915_request *rq; 4810 unsigned long index; 4811 unsigned long flags; 4812 4813 /* Reset called during driver load? GuC not yet initialised! */ 4814 if (unlikely(!guc_submission_initialized(guc))) 4815 return; 4816 4817 xa_lock_irqsave(&guc->context_lookup, flags); 4818 xa_for_each(&guc->context_lookup, index, ce) { 4819 if (!kref_get_unless_zero(&ce->ref)) 4820 continue; 4821 4822 xa_unlock(&guc->context_lookup); 4823 4824 if (!intel_context_is_pinned(ce)) 4825 goto next; 4826 4827 if (intel_engine_is_virtual(ce->engine)) { 4828 if (!(ce->engine->mask & engine->mask)) 4829 goto next; 4830 } else { 4831 if (ce->engine != engine) 4832 goto next; 4833 } 4834 4835 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4836 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4837 continue; 4838 4839 intel_engine_set_hung_context(engine, ce); 4840 4841 /* Can only cope with one hang at a time... */ 4842 intel_context_put(ce); 4843 xa_lock(&guc->context_lookup); 4844 goto done; 4845 } 4846 next: 4847 intel_context_put(ce); 4848 xa_lock(&guc->context_lookup); 4849 } 4850 done: 4851 xa_unlock_irqrestore(&guc->context_lookup, flags); 4852 } 4853 4854 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4855 struct i915_request *hung_rq, 4856 struct drm_printer *m) 4857 { 4858 struct intel_guc *guc = &engine->gt->uc.guc; 4859 struct intel_context *ce; 4860 unsigned long index; 4861 unsigned long flags; 4862 4863 /* Reset called during driver load? GuC not yet initialised! */ 4864 if (unlikely(!guc_submission_initialized(guc))) 4865 return; 4866 4867 xa_lock_irqsave(&guc->context_lookup, flags); 4868 xa_for_each(&guc->context_lookup, index, ce) { 4869 if (!kref_get_unless_zero(&ce->ref)) 4870 continue; 4871 4872 xa_unlock(&guc->context_lookup); 4873 4874 if (!intel_context_is_pinned(ce)) 4875 goto next; 4876 4877 if (intel_engine_is_virtual(ce->engine)) { 4878 if (!(ce->engine->mask & engine->mask)) 4879 goto next; 4880 } else { 4881 if (ce->engine != engine) 4882 goto next; 4883 } 4884 4885 spin_lock(&ce->guc_state.lock); 4886 intel_engine_dump_active_requests(&ce->guc_state.requests, 4887 hung_rq, m); 4888 spin_unlock(&ce->guc_state.lock); 4889 4890 next: 4891 intel_context_put(ce); 4892 xa_lock(&guc->context_lookup); 4893 } 4894 xa_unlock_irqrestore(&guc->context_lookup, flags); 4895 } 4896 4897 void intel_guc_submission_print_info(struct intel_guc *guc, 4898 struct drm_printer *p) 4899 { 4900 struct i915_sched_engine *sched_engine = guc->sched_engine; 4901 struct rb_node *rb; 4902 unsigned long flags; 4903 4904 if (!sched_engine) 4905 return; 4906 4907 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4908 atomic_read(&guc->outstanding_submission_g2h)); 4909 drm_printf(p, "GuC tasklet count: %u\n", 4910 atomic_read(&sched_engine->tasklet.count)); 4911 4912 spin_lock_irqsave(&sched_engine->lock, flags); 4913 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4914 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4915 struct i915_priolist *pl = to_priolist(rb); 4916 struct i915_request *rq; 4917 4918 priolist_for_each_request(rq, pl) 4919 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4920 rq->context->guc_id.id, 4921 rq->fence.seqno); 4922 } 4923 spin_unlock_irqrestore(&sched_engine->lock, flags); 4924 drm_printf(p, "\n"); 4925 } 4926 4927 static inline void guc_log_context_priority(struct drm_printer *p, 4928 struct intel_context *ce) 4929 { 4930 int i; 4931 4932 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4933 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4934 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4935 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4936 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4937 i, ce->guc_state.prio_count[i]); 4938 } 4939 drm_printf(p, "\n"); 4940 } 4941 4942 static inline void guc_log_context(struct drm_printer *p, 4943 struct intel_context *ce) 4944 { 4945 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4946 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4947 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4948 ce->ring->head, 4949 ce->lrc_reg_state[CTX_RING_HEAD]); 4950 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4951 ce->ring->tail, 4952 ce->lrc_reg_state[CTX_RING_TAIL]); 4953 drm_printf(p, "\t\tContext Pin Count: %u\n", 4954 atomic_read(&ce->pin_count)); 4955 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4956 atomic_read(&ce->guc_id.ref)); 4957 drm_printf(p, "\t\tSchedule State: 0x%x\n", 4958 ce->guc_state.sched_state); 4959 } 4960 4961 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4962 struct drm_printer *p) 4963 { 4964 struct intel_context *ce; 4965 unsigned long index; 4966 unsigned long flags; 4967 4968 xa_lock_irqsave(&guc->context_lookup, flags); 4969 xa_for_each(&guc->context_lookup, index, ce) { 4970 GEM_BUG_ON(intel_context_is_child(ce)); 4971 4972 guc_log_context(p, ce); 4973 guc_log_context_priority(p, ce); 4974 4975 if (intel_context_is_parent(ce)) { 4976 struct intel_context *child; 4977 4978 drm_printf(p, "\t\tNumber children: %u\n", 4979 ce->parallel.number_children); 4980 4981 if (ce->parallel.guc.wq_status) { 4982 drm_printf(p, "\t\tWQI Head: %u\n", 4983 READ_ONCE(*ce->parallel.guc.wq_head)); 4984 drm_printf(p, "\t\tWQI Tail: %u\n", 4985 READ_ONCE(*ce->parallel.guc.wq_tail)); 4986 drm_printf(p, "\t\tWQI Status: %u\n", 4987 READ_ONCE(*ce->parallel.guc.wq_status)); 4988 } 4989 4990 if (ce->engine->emit_bb_start == 4991 emit_bb_start_parent_no_preempt_mid_batch) { 4992 u8 i; 4993 4994 drm_printf(p, "\t\tChildren Go: %u\n", 4995 get_children_go_value(ce)); 4996 for (i = 0; i < ce->parallel.number_children; ++i) 4997 drm_printf(p, "\t\tChildren Join: %u\n", 4998 get_children_join_value(ce, i)); 4999 } 5000 5001 for_each_child(ce, child) 5002 guc_log_context(p, child); 5003 } 5004 } 5005 xa_unlock_irqrestore(&guc->context_lookup, flags); 5006 } 5007 5008 static inline u32 get_children_go_addr(struct intel_context *ce) 5009 { 5010 GEM_BUG_ON(!intel_context_is_parent(ce)); 5011 5012 return i915_ggtt_offset(ce->state) + 5013 __get_parent_scratch_offset(ce) + 5014 offsetof(struct parent_scratch, go.semaphore); 5015 } 5016 5017 static inline u32 get_children_join_addr(struct intel_context *ce, 5018 u8 child_index) 5019 { 5020 GEM_BUG_ON(!intel_context_is_parent(ce)); 5021 5022 return i915_ggtt_offset(ce->state) + 5023 __get_parent_scratch_offset(ce) + 5024 offsetof(struct parent_scratch, join[child_index].semaphore); 5025 } 5026 5027 #define PARENT_GO_BB 1 5028 #define PARENT_GO_FINI_BREADCRUMB 0 5029 #define CHILD_GO_BB 1 5030 #define CHILD_GO_FINI_BREADCRUMB 0 5031 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5032 u64 offset, u32 len, 5033 const unsigned int flags) 5034 { 5035 struct intel_context *ce = rq->context; 5036 u32 *cs; 5037 u8 i; 5038 5039 GEM_BUG_ON(!intel_context_is_parent(ce)); 5040 5041 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5042 if (IS_ERR(cs)) 5043 return PTR_ERR(cs); 5044 5045 /* Wait on children */ 5046 for (i = 0; i < ce->parallel.number_children; ++i) { 5047 *cs++ = (MI_SEMAPHORE_WAIT | 5048 MI_SEMAPHORE_GLOBAL_GTT | 5049 MI_SEMAPHORE_POLL | 5050 MI_SEMAPHORE_SAD_EQ_SDD); 5051 *cs++ = PARENT_GO_BB; 5052 *cs++ = get_children_join_addr(ce, i); 5053 *cs++ = 0; 5054 } 5055 5056 /* Turn off preemption */ 5057 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5058 *cs++ = MI_NOOP; 5059 5060 /* Tell children go */ 5061 cs = gen8_emit_ggtt_write(cs, 5062 CHILD_GO_BB, 5063 get_children_go_addr(ce), 5064 0); 5065 5066 /* Jump to batch */ 5067 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5068 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5069 *cs++ = lower_32_bits(offset); 5070 *cs++ = upper_32_bits(offset); 5071 *cs++ = MI_NOOP; 5072 5073 intel_ring_advance(rq, cs); 5074 5075 return 0; 5076 } 5077 5078 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5079 u64 offset, u32 len, 5080 const unsigned int flags) 5081 { 5082 struct intel_context *ce = rq->context; 5083 struct intel_context *parent = intel_context_to_parent(ce); 5084 u32 *cs; 5085 5086 GEM_BUG_ON(!intel_context_is_child(ce)); 5087 5088 cs = intel_ring_begin(rq, 12); 5089 if (IS_ERR(cs)) 5090 return PTR_ERR(cs); 5091 5092 /* Signal parent */ 5093 cs = gen8_emit_ggtt_write(cs, 5094 PARENT_GO_BB, 5095 get_children_join_addr(parent, 5096 ce->parallel.child_index), 5097 0); 5098 5099 /* Wait on parent for go */ 5100 *cs++ = (MI_SEMAPHORE_WAIT | 5101 MI_SEMAPHORE_GLOBAL_GTT | 5102 MI_SEMAPHORE_POLL | 5103 MI_SEMAPHORE_SAD_EQ_SDD); 5104 *cs++ = CHILD_GO_BB; 5105 *cs++ = get_children_go_addr(parent); 5106 *cs++ = 0; 5107 5108 /* Turn off preemption */ 5109 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5110 5111 /* Jump to batch */ 5112 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5113 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5114 *cs++ = lower_32_bits(offset); 5115 *cs++ = upper_32_bits(offset); 5116 5117 intel_ring_advance(rq, cs); 5118 5119 return 0; 5120 } 5121 5122 static u32 * 5123 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5124 u32 *cs) 5125 { 5126 struct intel_context *ce = rq->context; 5127 u8 i; 5128 5129 GEM_BUG_ON(!intel_context_is_parent(ce)); 5130 5131 /* Wait on children */ 5132 for (i = 0; i < ce->parallel.number_children; ++i) { 5133 *cs++ = (MI_SEMAPHORE_WAIT | 5134 MI_SEMAPHORE_GLOBAL_GTT | 5135 MI_SEMAPHORE_POLL | 5136 MI_SEMAPHORE_SAD_EQ_SDD); 5137 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5138 *cs++ = get_children_join_addr(ce, i); 5139 *cs++ = 0; 5140 } 5141 5142 /* Turn on preemption */ 5143 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5144 *cs++ = MI_NOOP; 5145 5146 /* Tell children go */ 5147 cs = gen8_emit_ggtt_write(cs, 5148 CHILD_GO_FINI_BREADCRUMB, 5149 get_children_go_addr(ce), 5150 0); 5151 5152 return cs; 5153 } 5154 5155 /* 5156 * If this true, a submission of multi-lrc requests had an error and the 5157 * requests need to be skipped. The front end (execuf IOCTL) should've called 5158 * i915_request_skip which squashes the BB but we still need to emit the fini 5159 * breadrcrumbs seqno write. At this point we don't know how many of the 5160 * requests in the multi-lrc submission were generated so we can't do the 5161 * handshake between the parent and children (e.g. if 4 requests should be 5162 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5163 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5164 * has occurred on any of the requests in submission / relationship. 5165 */ 5166 static inline bool skip_handshake(struct i915_request *rq) 5167 { 5168 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5169 } 5170 5171 #define NON_SKIP_LEN 6 5172 static u32 * 5173 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5174 u32 *cs) 5175 { 5176 struct intel_context *ce = rq->context; 5177 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5178 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5179 5180 GEM_BUG_ON(!intel_context_is_parent(ce)); 5181 5182 if (unlikely(skip_handshake(rq))) { 5183 /* 5184 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5185 * the NON_SKIP_LEN comes from the length of the emits below. 5186 */ 5187 memset(cs, 0, sizeof(u32) * 5188 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5189 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5190 } else { 5191 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5192 } 5193 5194 /* Emit fini breadcrumb */ 5195 before_fini_breadcrumb_user_interrupt_cs = cs; 5196 cs = gen8_emit_ggtt_write(cs, 5197 rq->fence.seqno, 5198 i915_request_active_timeline(rq)->hwsp_offset, 5199 0); 5200 5201 /* User interrupt */ 5202 *cs++ = MI_USER_INTERRUPT; 5203 *cs++ = MI_NOOP; 5204 5205 /* Ensure our math for skip + emit is correct */ 5206 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5207 cs); 5208 GEM_BUG_ON(start_fini_breadcrumb_cs + 5209 ce->engine->emit_fini_breadcrumb_dw != cs); 5210 5211 rq->tail = intel_ring_offset(rq, cs); 5212 5213 return cs; 5214 } 5215 5216 static u32 * 5217 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5218 u32 *cs) 5219 { 5220 struct intel_context *ce = rq->context; 5221 struct intel_context *parent = intel_context_to_parent(ce); 5222 5223 GEM_BUG_ON(!intel_context_is_child(ce)); 5224 5225 /* Turn on preemption */ 5226 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5227 *cs++ = MI_NOOP; 5228 5229 /* Signal parent */ 5230 cs = gen8_emit_ggtt_write(cs, 5231 PARENT_GO_FINI_BREADCRUMB, 5232 get_children_join_addr(parent, 5233 ce->parallel.child_index), 5234 0); 5235 5236 /* Wait parent on for go */ 5237 *cs++ = (MI_SEMAPHORE_WAIT | 5238 MI_SEMAPHORE_GLOBAL_GTT | 5239 MI_SEMAPHORE_POLL | 5240 MI_SEMAPHORE_SAD_EQ_SDD); 5241 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5242 *cs++ = get_children_go_addr(parent); 5243 *cs++ = 0; 5244 5245 return cs; 5246 } 5247 5248 static u32 * 5249 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5250 u32 *cs) 5251 { 5252 struct intel_context *ce = rq->context; 5253 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5254 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5255 5256 GEM_BUG_ON(!intel_context_is_child(ce)); 5257 5258 if (unlikely(skip_handshake(rq))) { 5259 /* 5260 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5261 * the NON_SKIP_LEN comes from the length of the emits below. 5262 */ 5263 memset(cs, 0, sizeof(u32) * 5264 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5265 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5266 } else { 5267 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5268 } 5269 5270 /* Emit fini breadcrumb */ 5271 before_fini_breadcrumb_user_interrupt_cs = cs; 5272 cs = gen8_emit_ggtt_write(cs, 5273 rq->fence.seqno, 5274 i915_request_active_timeline(rq)->hwsp_offset, 5275 0); 5276 5277 /* User interrupt */ 5278 *cs++ = MI_USER_INTERRUPT; 5279 *cs++ = MI_NOOP; 5280 5281 /* Ensure our math for skip + emit is correct */ 5282 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5283 cs); 5284 GEM_BUG_ON(start_fini_breadcrumb_cs + 5285 ce->engine->emit_fini_breadcrumb_dw != cs); 5286 5287 rq->tail = intel_ring_offset(rq, cs); 5288 5289 return cs; 5290 } 5291 5292 #undef NON_SKIP_LEN 5293 5294 static struct intel_context * 5295 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5296 unsigned long flags) 5297 { 5298 struct guc_virtual_engine *ve; 5299 struct intel_guc *guc; 5300 unsigned int n; 5301 int err; 5302 5303 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5304 if (!ve) 5305 return ERR_PTR(-ENOMEM); 5306 5307 guc = &siblings[0]->gt->uc.guc; 5308 5309 ve->base.i915 = siblings[0]->i915; 5310 ve->base.gt = siblings[0]->gt; 5311 ve->base.uncore = siblings[0]->uncore; 5312 ve->base.id = -1; 5313 5314 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5315 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5316 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5317 ve->base.saturated = ALL_ENGINES; 5318 5319 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5320 5321 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5322 5323 ve->base.cops = &virtual_guc_context_ops; 5324 ve->base.request_alloc = guc_request_alloc; 5325 ve->base.bump_serial = virtual_guc_bump_serial; 5326 5327 ve->base.submit_request = guc_submit_request; 5328 5329 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5330 5331 intel_context_init(&ve->context, &ve->base); 5332 5333 for (n = 0; n < count; n++) { 5334 struct intel_engine_cs *sibling = siblings[n]; 5335 5336 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5337 if (sibling->mask & ve->base.mask) { 5338 DRM_DEBUG("duplicate %s entry in load balancer\n", 5339 sibling->name); 5340 err = -EINVAL; 5341 goto err_put; 5342 } 5343 5344 ve->base.mask |= sibling->mask; 5345 ve->base.logical_mask |= sibling->logical_mask; 5346 5347 if (n != 0 && ve->base.class != sibling->class) { 5348 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 5349 sibling->class, ve->base.class); 5350 err = -EINVAL; 5351 goto err_put; 5352 } else if (n == 0) { 5353 ve->base.class = sibling->class; 5354 ve->base.uabi_class = sibling->uabi_class; 5355 snprintf(ve->base.name, sizeof(ve->base.name), 5356 "v%dx%d", ve->base.class, count); 5357 ve->base.context_size = sibling->context_size; 5358 5359 ve->base.add_active_request = 5360 sibling->add_active_request; 5361 ve->base.remove_active_request = 5362 sibling->remove_active_request; 5363 ve->base.emit_bb_start = sibling->emit_bb_start; 5364 ve->base.emit_flush = sibling->emit_flush; 5365 ve->base.emit_init_breadcrumb = 5366 sibling->emit_init_breadcrumb; 5367 ve->base.emit_fini_breadcrumb = 5368 sibling->emit_fini_breadcrumb; 5369 ve->base.emit_fini_breadcrumb_dw = 5370 sibling->emit_fini_breadcrumb_dw; 5371 ve->base.breadcrumbs = 5372 intel_breadcrumbs_get(sibling->breadcrumbs); 5373 5374 ve->base.flags |= sibling->flags; 5375 5376 ve->base.props.timeslice_duration_ms = 5377 sibling->props.timeslice_duration_ms; 5378 ve->base.props.preempt_timeout_ms = 5379 sibling->props.preempt_timeout_ms; 5380 } 5381 } 5382 5383 return &ve->context; 5384 5385 err_put: 5386 intel_context_put(&ve->context); 5387 return ERR_PTR(err); 5388 } 5389 5390 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5391 { 5392 struct intel_engine_cs *engine; 5393 intel_engine_mask_t tmp, mask = ve->mask; 5394 5395 for_each_engine_masked(engine, ve->gt, mask, tmp) 5396 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5397 return true; 5398 5399 return false; 5400 } 5401 5402 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5403 #include "selftest_guc.c" 5404 #include "selftest_guc_multi_lrc.c" 5405 #include "selftest_guc_hangcheck.c" 5406 #endif 5407