1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_submission.h" 31 32 #include "i915_drv.h" 33 #include "i915_reg.h" 34 #include "i915_trace.h" 35 36 /** 37 * DOC: GuC-based command submission 38 * 39 * The Scratch registers: 40 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 41 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 42 * triggers an interrupt on the GuC via another register write (0xC4C8). 43 * Firmware writes a success/fail code back to the action register after 44 * processes the request. The kernel driver polls waiting for this update and 45 * then proceeds. 46 * 47 * Command Transport buffers (CTBs): 48 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 49 * - G2H) are a message interface between the i915 and GuC. 50 * 51 * Context registration: 52 * Before a context can be submitted it must be registered with the GuC via a 53 * H2G. A unique guc_id is associated with each context. The context is either 54 * registered at request creation time (normal operation) or at submission time 55 * (abnormal operation, e.g. after a reset). 56 * 57 * Context submission: 58 * The i915 updates the LRC tail value in memory. The i915 must enable the 59 * scheduling of the context within the GuC for the GuC to actually consider it. 60 * Therefore, the first time a disabled context is submitted we use a schedule 61 * enable H2G, while follow up submissions are done via the context submit H2G, 62 * which informs the GuC that a previously enabled context has new work 63 * available. 64 * 65 * Context unpin: 66 * To unpin a context a H2G is used to disable scheduling. When the 67 * corresponding G2H returns indicating the scheduling disable operation has 68 * completed it is safe to unpin the context. While a disable is in flight it 69 * isn't safe to resubmit the context so a fence is used to stall all future 70 * requests of that context until the G2H is returned. Because this interaction 71 * with the GuC takes a non-zero amount of time we delay the disabling of 72 * scheduling after the pin count goes to zero by a configurable period of time 73 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 74 * time to resubmit something on the context before doing this costly operation. 75 * This delay is only done if the context isn't closed and the guc_id usage is 76 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 77 * 78 * Context deregistration: 79 * Before a context can be destroyed or if we steal its guc_id we must 80 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 81 * safe to submit anything to this guc_id until the deregister completes so a 82 * fence is used to stall all requests associated with this guc_id until the 83 * corresponding G2H returns indicating the guc_id has been deregistered. 84 * 85 * submission_state.guc_ids: 86 * Unique number associated with private GuC context data passed in during 87 * context registration / submission / deregistration. 64k available. Simple ida 88 * is used for allocation. 89 * 90 * Stealing guc_ids: 91 * If no guc_ids are available they can be stolen from another context at 92 * request creation time if that context is unpinned. If a guc_id can't be found 93 * we punt this problem to the user as we believe this is near impossible to hit 94 * during normal use cases. 95 * 96 * Locking: 97 * In the GuC submission code we have 3 basic spin locks which protect 98 * everything. Details about each below. 99 * 100 * sched_engine->lock 101 * This is the submission lock for all contexts that share an i915 schedule 102 * engine (sched_engine), thus only one of the contexts which share a 103 * sched_engine can be submitting at a time. Currently only one sched_engine is 104 * used for all of GuC submission but that could change in the future. 105 * 106 * guc->submission_state.lock 107 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 108 * list. 109 * 110 * ce->guc_state.lock 111 * Protects everything under ce->guc_state. Ensures that a context is in the 112 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 113 * on a disabled context (bad idea), we don't issue a schedule enable when a 114 * schedule disable is in flight, etc... Also protects list of inflight requests 115 * on the context and the priority management state. Lock is individual to each 116 * context. 117 * 118 * Lock ordering rules: 119 * sched_engine->lock -> ce->guc_state.lock 120 * guc->submission_state.lock -> ce->guc_state.lock 121 * 122 * Reset races: 123 * When a full GT reset is triggered it is assumed that some G2H responses to 124 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 125 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 126 * contexts, release guc_ids, etc...). When this occurs we can scrub the 127 * context state and cleanup appropriately, however this is quite racey. 128 * To avoid races, the reset code must disable submission before scrubbing for 129 * the missing G2H, while the submission code must check for submission being 130 * disabled and skip sending H2Gs and updating context states when it is. Both 131 * sides must also make sure to hold the relevant locks. 132 */ 133 134 /* GuC Virtual Engine */ 135 struct guc_virtual_engine { 136 struct intel_engine_cs base; 137 struct intel_context context; 138 }; 139 140 static struct intel_context * 141 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 142 unsigned long flags); 143 144 static struct intel_context * 145 guc_create_parallel(struct intel_engine_cs **engines, 146 unsigned int num_siblings, 147 unsigned int width); 148 149 #define GUC_REQUEST_SIZE 64 /* bytes */ 150 151 /* 152 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 153 * per the GuC submission interface. A different allocation algorithm is used 154 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 155 * partition the guc_id space. We believe the number of multi-lrc contexts in 156 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 157 * multi-lrc. 158 */ 159 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 160 ((guc)->submission_state.num_guc_ids / 16) 161 162 /* 163 * Below is a set of functions which control the GuC scheduling state which 164 * require a lock. 165 */ 166 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 167 #define SCHED_STATE_DESTROYED BIT(1) 168 #define SCHED_STATE_PENDING_DISABLE BIT(2) 169 #define SCHED_STATE_BANNED BIT(3) 170 #define SCHED_STATE_ENABLED BIT(4) 171 #define SCHED_STATE_PENDING_ENABLE BIT(5) 172 #define SCHED_STATE_REGISTERED BIT(6) 173 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 174 #define SCHED_STATE_CLOSED BIT(8) 175 #define SCHED_STATE_BLOCKED_SHIFT 9 176 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 177 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 178 179 static inline void init_sched_state(struct intel_context *ce) 180 { 181 lockdep_assert_held(&ce->guc_state.lock); 182 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 183 } 184 185 /* 186 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 187 * A context close can race with the submission path, so SCHED_STATE_CLOSED 188 * can be set immediately before we try to register. 189 */ 190 #define SCHED_STATE_VALID_INIT \ 191 (SCHED_STATE_BLOCKED_MASK | \ 192 SCHED_STATE_CLOSED | \ 193 SCHED_STATE_REGISTERED) 194 195 __maybe_unused 196 static bool sched_state_is_init(struct intel_context *ce) 197 { 198 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 199 } 200 201 static inline bool 202 context_wait_for_deregister_to_register(struct intel_context *ce) 203 { 204 return ce->guc_state.sched_state & 205 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 206 } 207 208 static inline void 209 set_context_wait_for_deregister_to_register(struct intel_context *ce) 210 { 211 lockdep_assert_held(&ce->guc_state.lock); 212 ce->guc_state.sched_state |= 213 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 214 } 215 216 static inline void 217 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 218 { 219 lockdep_assert_held(&ce->guc_state.lock); 220 ce->guc_state.sched_state &= 221 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 222 } 223 224 static inline bool 225 context_destroyed(struct intel_context *ce) 226 { 227 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 228 } 229 230 static inline void 231 set_context_destroyed(struct intel_context *ce) 232 { 233 lockdep_assert_held(&ce->guc_state.lock); 234 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 235 } 236 237 static inline bool context_pending_disable(struct intel_context *ce) 238 { 239 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 240 } 241 242 static inline void set_context_pending_disable(struct intel_context *ce) 243 { 244 lockdep_assert_held(&ce->guc_state.lock); 245 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 246 } 247 248 static inline void clr_context_pending_disable(struct intel_context *ce) 249 { 250 lockdep_assert_held(&ce->guc_state.lock); 251 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 252 } 253 254 static inline bool context_banned(struct intel_context *ce) 255 { 256 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 257 } 258 259 static inline void set_context_banned(struct intel_context *ce) 260 { 261 lockdep_assert_held(&ce->guc_state.lock); 262 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 263 } 264 265 static inline void clr_context_banned(struct intel_context *ce) 266 { 267 lockdep_assert_held(&ce->guc_state.lock); 268 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 269 } 270 271 static inline bool context_enabled(struct intel_context *ce) 272 { 273 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 274 } 275 276 static inline void set_context_enabled(struct intel_context *ce) 277 { 278 lockdep_assert_held(&ce->guc_state.lock); 279 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 280 } 281 282 static inline void clr_context_enabled(struct intel_context *ce) 283 { 284 lockdep_assert_held(&ce->guc_state.lock); 285 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 286 } 287 288 static inline bool context_pending_enable(struct intel_context *ce) 289 { 290 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 291 } 292 293 static inline void set_context_pending_enable(struct intel_context *ce) 294 { 295 lockdep_assert_held(&ce->guc_state.lock); 296 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 297 } 298 299 static inline void clr_context_pending_enable(struct intel_context *ce) 300 { 301 lockdep_assert_held(&ce->guc_state.lock); 302 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 303 } 304 305 static inline bool context_registered(struct intel_context *ce) 306 { 307 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 308 } 309 310 static inline void set_context_registered(struct intel_context *ce) 311 { 312 lockdep_assert_held(&ce->guc_state.lock); 313 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 314 } 315 316 static inline void clr_context_registered(struct intel_context *ce) 317 { 318 lockdep_assert_held(&ce->guc_state.lock); 319 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 320 } 321 322 static inline bool context_policy_required(struct intel_context *ce) 323 { 324 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 325 } 326 327 static inline void set_context_policy_required(struct intel_context *ce) 328 { 329 lockdep_assert_held(&ce->guc_state.lock); 330 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 331 } 332 333 static inline void clr_context_policy_required(struct intel_context *ce) 334 { 335 lockdep_assert_held(&ce->guc_state.lock); 336 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 337 } 338 339 static inline bool context_close_done(struct intel_context *ce) 340 { 341 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 342 } 343 344 static inline void set_context_close_done(struct intel_context *ce) 345 { 346 lockdep_assert_held(&ce->guc_state.lock); 347 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 348 } 349 350 static inline u32 context_blocked(struct intel_context *ce) 351 { 352 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 353 SCHED_STATE_BLOCKED_SHIFT; 354 } 355 356 static inline void incr_context_blocked(struct intel_context *ce) 357 { 358 lockdep_assert_held(&ce->guc_state.lock); 359 360 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 361 362 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 363 } 364 365 static inline void decr_context_blocked(struct intel_context *ce) 366 { 367 lockdep_assert_held(&ce->guc_state.lock); 368 369 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 370 371 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 372 } 373 374 static struct intel_context * 375 request_to_scheduling_context(struct i915_request *rq) 376 { 377 return intel_context_to_parent(rq->context); 378 } 379 380 static inline bool context_guc_id_invalid(struct intel_context *ce) 381 { 382 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 383 } 384 385 static inline void set_context_guc_id_invalid(struct intel_context *ce) 386 { 387 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 388 } 389 390 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 391 { 392 return &ce->engine->gt->uc.guc; 393 } 394 395 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 396 { 397 return rb_entry(rb, struct i915_priolist, node); 398 } 399 400 /* 401 * When using multi-lrc submission a scratch memory area is reserved in the 402 * parent's context state for the process descriptor, work queue, and handshake 403 * between the parent + children contexts to insert safe preemption points 404 * between each of the BBs. Currently the scratch area is sized to a page. 405 * 406 * The layout of this scratch area is below: 407 * 0 guc_process_desc 408 * + sizeof(struct guc_process_desc) child go 409 * + CACHELINE_BYTES child join[0] 410 * ... 411 * + CACHELINE_BYTES child join[n - 1] 412 * ... unused 413 * PARENT_SCRATCH_SIZE / 2 work queue start 414 * ... work queue 415 * PARENT_SCRATCH_SIZE - 1 work queue end 416 */ 417 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 418 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 419 420 struct sync_semaphore { 421 u32 semaphore; 422 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 423 }; 424 425 struct parent_scratch { 426 union guc_descs { 427 struct guc_sched_wq_desc wq_desc; 428 struct guc_process_desc_v69 pdesc; 429 } descs; 430 431 struct sync_semaphore go; 432 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 433 434 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 435 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 436 437 u32 wq[WQ_SIZE / sizeof(u32)]; 438 }; 439 440 static u32 __get_parent_scratch_offset(struct intel_context *ce) 441 { 442 GEM_BUG_ON(!ce->parallel.guc.parent_page); 443 444 return ce->parallel.guc.parent_page * PAGE_SIZE; 445 } 446 447 static u32 __get_wq_offset(struct intel_context *ce) 448 { 449 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 450 451 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 452 } 453 454 static struct parent_scratch * 455 __get_parent_scratch(struct intel_context *ce) 456 { 457 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 458 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 459 460 /* 461 * Need to subtract LRC_STATE_OFFSET here as the 462 * parallel.guc.parent_page is the offset into ce->state while 463 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 464 */ 465 return (struct parent_scratch *) 466 (ce->lrc_reg_state + 467 ((__get_parent_scratch_offset(ce) - 468 LRC_STATE_OFFSET) / sizeof(u32))); 469 } 470 471 static struct guc_process_desc_v69 * 472 __get_process_desc_v69(struct intel_context *ce) 473 { 474 struct parent_scratch *ps = __get_parent_scratch(ce); 475 476 return &ps->descs.pdesc; 477 } 478 479 static struct guc_sched_wq_desc * 480 __get_wq_desc_v70(struct intel_context *ce) 481 { 482 struct parent_scratch *ps = __get_parent_scratch(ce); 483 484 return &ps->descs.wq_desc; 485 } 486 487 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 488 { 489 /* 490 * Check for space in work queue. Caching a value of head pointer in 491 * intel_context structure in order reduce the number accesses to shared 492 * GPU memory which may be across a PCIe bus. 493 */ 494 #define AVAILABLE_SPACE \ 495 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 496 if (wqi_size > AVAILABLE_SPACE) { 497 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 498 499 if (wqi_size > AVAILABLE_SPACE) 500 return NULL; 501 } 502 #undef AVAILABLE_SPACE 503 504 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 505 } 506 507 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 508 { 509 struct intel_context *ce = xa_load(&guc->context_lookup, id); 510 511 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 512 513 return ce; 514 } 515 516 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 517 { 518 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 519 520 if (!base) 521 return NULL; 522 523 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 524 525 return &base[index]; 526 } 527 528 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 529 { 530 u32 size; 531 int ret; 532 533 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 534 GUC_MAX_CONTEXT_ID); 535 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 536 (void **)&guc->lrc_desc_pool_vaddr_v69); 537 if (ret) 538 return ret; 539 540 return 0; 541 } 542 543 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 544 { 545 if (!guc->lrc_desc_pool_vaddr_v69) 546 return; 547 548 guc->lrc_desc_pool_vaddr_v69 = NULL; 549 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 550 } 551 552 static inline bool guc_submission_initialized(struct intel_guc *guc) 553 { 554 return guc->submission_initialized; 555 } 556 557 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 558 { 559 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 560 561 if (desc) 562 memset(desc, 0, sizeof(*desc)); 563 } 564 565 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 566 { 567 return __get_context(guc, id); 568 } 569 570 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 571 struct intel_context *ce) 572 { 573 unsigned long flags; 574 575 /* 576 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 577 * lower level functions directly. 578 */ 579 xa_lock_irqsave(&guc->context_lookup, flags); 580 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 581 xa_unlock_irqrestore(&guc->context_lookup, flags); 582 } 583 584 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 585 { 586 unsigned long flags; 587 588 if (unlikely(!guc_submission_initialized(guc))) 589 return; 590 591 _reset_lrc_desc_v69(guc, id); 592 593 /* 594 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 595 * the lower level functions directly. 596 */ 597 xa_lock_irqsave(&guc->context_lookup, flags); 598 __xa_erase(&guc->context_lookup, id); 599 xa_unlock_irqrestore(&guc->context_lookup, flags); 600 } 601 602 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 603 { 604 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 605 wake_up_all(&guc->ct.wq); 606 } 607 608 static int guc_submission_send_busy_loop(struct intel_guc *guc, 609 const u32 *action, 610 u32 len, 611 u32 g2h_len_dw, 612 bool loop) 613 { 614 /* 615 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 616 * so we don't handle the case where we don't get a reply because we 617 * aborted the send due to the channel being busy. 618 */ 619 GEM_BUG_ON(g2h_len_dw && !loop); 620 621 if (g2h_len_dw) 622 atomic_inc(&guc->outstanding_submission_g2h); 623 624 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 625 } 626 627 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 628 atomic_t *wait_var, 629 bool interruptible, 630 long timeout) 631 { 632 const int state = interruptible ? 633 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 634 DEFINE_WAIT(wait); 635 636 might_sleep(); 637 GEM_BUG_ON(timeout < 0); 638 639 if (!atomic_read(wait_var)) 640 return 0; 641 642 if (!timeout) 643 return -ETIME; 644 645 for (;;) { 646 prepare_to_wait(&guc->ct.wq, &wait, state); 647 648 if (!atomic_read(wait_var)) 649 break; 650 651 if (signal_pending_state(state, current)) { 652 timeout = -EINTR; 653 break; 654 } 655 656 if (!timeout) { 657 timeout = -ETIME; 658 break; 659 } 660 661 timeout = io_schedule_timeout(timeout); 662 } 663 finish_wait(&guc->ct.wq, &wait); 664 665 return (timeout < 0) ? timeout : 0; 666 } 667 668 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 669 { 670 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 671 return 0; 672 673 return intel_guc_wait_for_pending_msg(guc, 674 &guc->outstanding_submission_g2h, 675 true, timeout); 676 } 677 678 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 679 static int try_context_registration(struct intel_context *ce, bool loop); 680 681 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 682 { 683 int err = 0; 684 struct intel_context *ce = request_to_scheduling_context(rq); 685 u32 action[3]; 686 int len = 0; 687 u32 g2h_len_dw = 0; 688 bool enabled; 689 690 lockdep_assert_held(&rq->engine->sched_engine->lock); 691 692 /* 693 * Corner case where requests were sitting in the priority list or a 694 * request resubmitted after the context was banned. 695 */ 696 if (unlikely(!intel_context_is_schedulable(ce))) { 697 i915_request_put(i915_request_mark_eio(rq)); 698 intel_engine_signal_breadcrumbs(ce->engine); 699 return 0; 700 } 701 702 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 703 GEM_BUG_ON(context_guc_id_invalid(ce)); 704 705 if (context_policy_required(ce)) { 706 err = guc_context_policy_init_v70(ce, false); 707 if (err) 708 return err; 709 } 710 711 spin_lock(&ce->guc_state.lock); 712 713 /* 714 * The request / context will be run on the hardware when scheduling 715 * gets enabled in the unblock. For multi-lrc we still submit the 716 * context to move the LRC tails. 717 */ 718 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 719 goto out; 720 721 enabled = context_enabled(ce) || context_blocked(ce); 722 723 if (!enabled) { 724 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 725 action[len++] = ce->guc_id.id; 726 action[len++] = GUC_CONTEXT_ENABLE; 727 set_context_pending_enable(ce); 728 intel_context_get(ce); 729 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 730 } else { 731 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 732 action[len++] = ce->guc_id.id; 733 } 734 735 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 736 if (!enabled && !err) { 737 trace_intel_context_sched_enable(ce); 738 atomic_inc(&guc->outstanding_submission_g2h); 739 set_context_enabled(ce); 740 741 /* 742 * Without multi-lrc KMD does the submission step (moving the 743 * lrc tail) so enabling scheduling is sufficient to submit the 744 * context. This isn't the case in multi-lrc submission as the 745 * GuC needs to move the tails, hence the need for another H2G 746 * to submit a multi-lrc context after enabling scheduling. 747 */ 748 if (intel_context_is_parent(ce)) { 749 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 750 err = intel_guc_send_nb(guc, action, len - 1, 0); 751 } 752 } else if (!enabled) { 753 clr_context_pending_enable(ce); 754 intel_context_put(ce); 755 } 756 if (likely(!err)) 757 trace_i915_request_guc_submit(rq); 758 759 out: 760 spin_unlock(&ce->guc_state.lock); 761 return err; 762 } 763 764 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 765 { 766 int ret = __guc_add_request(guc, rq); 767 768 if (unlikely(ret == -EBUSY)) { 769 guc->stalled_request = rq; 770 guc->submission_stall_reason = STALL_ADD_REQUEST; 771 } 772 773 return ret; 774 } 775 776 static inline void guc_set_lrc_tail(struct i915_request *rq) 777 { 778 rq->context->lrc_reg_state[CTX_RING_TAIL] = 779 intel_ring_set_tail(rq->ring, rq->tail); 780 } 781 782 static inline int rq_prio(const struct i915_request *rq) 783 { 784 return rq->sched.attr.priority; 785 } 786 787 static bool is_multi_lrc_rq(struct i915_request *rq) 788 { 789 return intel_context_is_parallel(rq->context); 790 } 791 792 static bool can_merge_rq(struct i915_request *rq, 793 struct i915_request *last) 794 { 795 return request_to_scheduling_context(rq) == 796 request_to_scheduling_context(last); 797 } 798 799 static u32 wq_space_until_wrap(struct intel_context *ce) 800 { 801 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 802 } 803 804 static void write_wqi(struct intel_context *ce, u32 wqi_size) 805 { 806 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 807 808 /* 809 * Ensure WQI are visible before updating tail 810 */ 811 intel_guc_write_barrier(ce_to_guc(ce)); 812 813 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 814 (WQ_SIZE - 1); 815 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 816 } 817 818 static int guc_wq_noop_append(struct intel_context *ce) 819 { 820 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 821 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 822 823 if (!wqi) 824 return -EBUSY; 825 826 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 827 828 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 829 FIELD_PREP(WQ_LEN_MASK, len_dw); 830 ce->parallel.guc.wqi_tail = 0; 831 832 return 0; 833 } 834 835 static int __guc_wq_item_append(struct i915_request *rq) 836 { 837 struct intel_context *ce = request_to_scheduling_context(rq); 838 struct intel_context *child; 839 unsigned int wqi_size = (ce->parallel.number_children + 4) * 840 sizeof(u32); 841 u32 *wqi; 842 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 843 int ret; 844 845 /* Ensure context is in correct state updating work queue */ 846 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 847 GEM_BUG_ON(context_guc_id_invalid(ce)); 848 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 849 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 850 851 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 852 if (wqi_size > wq_space_until_wrap(ce)) { 853 ret = guc_wq_noop_append(ce); 854 if (ret) 855 return ret; 856 } 857 858 wqi = get_wq_pointer(ce, wqi_size); 859 if (!wqi) 860 return -EBUSY; 861 862 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 863 864 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 865 FIELD_PREP(WQ_LEN_MASK, len_dw); 866 *wqi++ = ce->lrc.lrca; 867 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 868 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 869 *wqi++ = 0; /* fence_id */ 870 for_each_child(ce, child) 871 *wqi++ = child->ring->tail / sizeof(u64); 872 873 write_wqi(ce, wqi_size); 874 875 return 0; 876 } 877 878 static int guc_wq_item_append(struct intel_guc *guc, 879 struct i915_request *rq) 880 { 881 struct intel_context *ce = request_to_scheduling_context(rq); 882 int ret; 883 884 if (unlikely(!intel_context_is_schedulable(ce))) 885 return 0; 886 887 ret = __guc_wq_item_append(rq); 888 if (unlikely(ret == -EBUSY)) { 889 guc->stalled_request = rq; 890 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 891 } 892 893 return ret; 894 } 895 896 static bool multi_lrc_submit(struct i915_request *rq) 897 { 898 struct intel_context *ce = request_to_scheduling_context(rq); 899 900 intel_ring_set_tail(rq->ring, rq->tail); 901 902 /* 903 * We expect the front end (execbuf IOCTL) to set this flag on the last 904 * request generated from a multi-BB submission. This indicates to the 905 * backend (GuC interface) that we should submit this context thus 906 * submitting all the requests generated in parallel. 907 */ 908 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 909 !intel_context_is_schedulable(ce); 910 } 911 912 static int guc_dequeue_one_context(struct intel_guc *guc) 913 { 914 struct i915_sched_engine * const sched_engine = guc->sched_engine; 915 struct i915_request *last = NULL; 916 bool submit = false; 917 struct rb_node *rb; 918 int ret; 919 920 lockdep_assert_held(&sched_engine->lock); 921 922 if (guc->stalled_request) { 923 submit = true; 924 last = guc->stalled_request; 925 926 switch (guc->submission_stall_reason) { 927 case STALL_REGISTER_CONTEXT: 928 goto register_context; 929 case STALL_MOVE_LRC_TAIL: 930 goto move_lrc_tail; 931 case STALL_ADD_REQUEST: 932 goto add_request; 933 default: 934 MISSING_CASE(guc->submission_stall_reason); 935 } 936 } 937 938 while ((rb = rb_first_cached(&sched_engine->queue))) { 939 struct i915_priolist *p = to_priolist(rb); 940 struct i915_request *rq, *rn; 941 942 priolist_for_each_request_consume(rq, rn, p) { 943 if (last && !can_merge_rq(rq, last)) 944 goto register_context; 945 946 list_del_init(&rq->sched.link); 947 948 __i915_request_submit(rq); 949 950 trace_i915_request_in(rq, 0); 951 last = rq; 952 953 if (is_multi_lrc_rq(rq)) { 954 /* 955 * We need to coalesce all multi-lrc requests in 956 * a relationship into a single H2G. We are 957 * guaranteed that all of these requests will be 958 * submitted sequentially. 959 */ 960 if (multi_lrc_submit(rq)) { 961 submit = true; 962 goto register_context; 963 } 964 } else { 965 submit = true; 966 } 967 } 968 969 rb_erase_cached(&p->node, &sched_engine->queue); 970 i915_priolist_free(p); 971 } 972 973 register_context: 974 if (submit) { 975 struct intel_context *ce = request_to_scheduling_context(last); 976 977 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 978 intel_context_is_schedulable(ce))) { 979 ret = try_context_registration(ce, false); 980 if (unlikely(ret == -EPIPE)) { 981 goto deadlk; 982 } else if (ret == -EBUSY) { 983 guc->stalled_request = last; 984 guc->submission_stall_reason = 985 STALL_REGISTER_CONTEXT; 986 goto schedule_tasklet; 987 } else if (ret != 0) { 988 GEM_WARN_ON(ret); /* Unexpected */ 989 goto deadlk; 990 } 991 } 992 993 move_lrc_tail: 994 if (is_multi_lrc_rq(last)) { 995 ret = guc_wq_item_append(guc, last); 996 if (ret == -EBUSY) { 997 goto schedule_tasklet; 998 } else if (ret != 0) { 999 GEM_WARN_ON(ret); /* Unexpected */ 1000 goto deadlk; 1001 } 1002 } else { 1003 guc_set_lrc_tail(last); 1004 } 1005 1006 add_request: 1007 ret = guc_add_request(guc, last); 1008 if (unlikely(ret == -EPIPE)) { 1009 goto deadlk; 1010 } else if (ret == -EBUSY) { 1011 goto schedule_tasklet; 1012 } else if (ret != 0) { 1013 GEM_WARN_ON(ret); /* Unexpected */ 1014 goto deadlk; 1015 } 1016 } 1017 1018 guc->stalled_request = NULL; 1019 guc->submission_stall_reason = STALL_NONE; 1020 return submit; 1021 1022 deadlk: 1023 sched_engine->tasklet.callback = NULL; 1024 tasklet_disable_nosync(&sched_engine->tasklet); 1025 return false; 1026 1027 schedule_tasklet: 1028 tasklet_schedule(&sched_engine->tasklet); 1029 return false; 1030 } 1031 1032 static void guc_submission_tasklet(struct tasklet_struct *t) 1033 { 1034 struct i915_sched_engine *sched_engine = 1035 from_tasklet(sched_engine, t, tasklet); 1036 unsigned long flags; 1037 bool loop; 1038 1039 spin_lock_irqsave(&sched_engine->lock, flags); 1040 1041 do { 1042 loop = guc_dequeue_one_context(sched_engine->private_data); 1043 } while (loop); 1044 1045 i915_sched_engine_reset_on_empty(sched_engine); 1046 1047 spin_unlock_irqrestore(&sched_engine->lock, flags); 1048 } 1049 1050 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1051 { 1052 if (iir & GT_RENDER_USER_INTERRUPT) 1053 intel_engine_signal_breadcrumbs(engine); 1054 } 1055 1056 static void __guc_context_destroy(struct intel_context *ce); 1057 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1058 static void guc_signal_context_fence(struct intel_context *ce); 1059 static void guc_cancel_context_requests(struct intel_context *ce); 1060 static void guc_blocked_fence_complete(struct intel_context *ce); 1061 1062 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1063 { 1064 struct intel_context *ce; 1065 unsigned long index, flags; 1066 bool pending_disable, pending_enable, deregister, destroyed, banned; 1067 1068 xa_lock_irqsave(&guc->context_lookup, flags); 1069 xa_for_each(&guc->context_lookup, index, ce) { 1070 /* 1071 * Corner case where the ref count on the object is zero but and 1072 * deregister G2H was lost. In this case we don't touch the ref 1073 * count and finish the destroy of the context. 1074 */ 1075 bool do_put = kref_get_unless_zero(&ce->ref); 1076 1077 xa_unlock(&guc->context_lookup); 1078 1079 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1080 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1081 /* successful cancel so jump straight to close it */ 1082 intel_context_sched_disable_unpin(ce); 1083 } 1084 1085 spin_lock(&ce->guc_state.lock); 1086 1087 /* 1088 * Once we are at this point submission_disabled() is guaranteed 1089 * to be visible to all callers who set the below flags (see above 1090 * flush and flushes in reset_prepare). If submission_disabled() 1091 * is set, the caller shouldn't set these flags. 1092 */ 1093 1094 destroyed = context_destroyed(ce); 1095 pending_enable = context_pending_enable(ce); 1096 pending_disable = context_pending_disable(ce); 1097 deregister = context_wait_for_deregister_to_register(ce); 1098 banned = context_banned(ce); 1099 init_sched_state(ce); 1100 1101 spin_unlock(&ce->guc_state.lock); 1102 1103 if (pending_enable || destroyed || deregister) { 1104 decr_outstanding_submission_g2h(guc); 1105 if (deregister) 1106 guc_signal_context_fence(ce); 1107 if (destroyed) { 1108 intel_gt_pm_put_async(guc_to_gt(guc)); 1109 release_guc_id(guc, ce); 1110 __guc_context_destroy(ce); 1111 } 1112 if (pending_enable || deregister) 1113 intel_context_put(ce); 1114 } 1115 1116 /* Not mutualy exclusive with above if statement. */ 1117 if (pending_disable) { 1118 guc_signal_context_fence(ce); 1119 if (banned) { 1120 guc_cancel_context_requests(ce); 1121 intel_engine_signal_breadcrumbs(ce->engine); 1122 } 1123 intel_context_sched_disable_unpin(ce); 1124 decr_outstanding_submission_g2h(guc); 1125 1126 spin_lock(&ce->guc_state.lock); 1127 guc_blocked_fence_complete(ce); 1128 spin_unlock(&ce->guc_state.lock); 1129 1130 intel_context_put(ce); 1131 } 1132 1133 if (do_put) 1134 intel_context_put(ce); 1135 xa_lock(&guc->context_lookup); 1136 } 1137 xa_unlock_irqrestore(&guc->context_lookup, flags); 1138 } 1139 1140 /* 1141 * GuC stores busyness stats for each engine at context in/out boundaries. A 1142 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1143 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1144 * GuC. 1145 * 1146 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1147 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1148 * active. For an active engine total busyness = total + (now - start), where 1149 * 'now' is the time at which the busyness is sampled. For inactive engine, 1150 * total busyness = total. 1151 * 1152 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1153 * 1154 * The start and total values provided by GuC are 32 bits and wrap around in a 1155 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1156 * increasing ns values, there is a need for this implementation to account for 1157 * overflows and extend the GuC provided values to 64 bits before returning 1158 * busyness to the user. In order to do that, a worker runs periodically at 1159 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1160 * 27 seconds for a gt clock frequency of 19.2 MHz). 1161 */ 1162 1163 #define WRAP_TIME_CLKS U32_MAX 1164 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1165 1166 static void 1167 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1168 { 1169 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1170 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1171 1172 if (new_start == lower_32_bits(*prev_start)) 1173 return; 1174 1175 /* 1176 * When gt is unparked, we update the gt timestamp and start the ping 1177 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1178 * is unparked, all switched in contexts will have a start time that is 1179 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1180 * 1181 * If neither gt_stamp nor new_start has rolled over, then the 1182 * gt_stamp_hi does not need to be adjusted, however if one of them has 1183 * rolled over, we need to adjust gt_stamp_hi accordingly. 1184 * 1185 * The below conditions address the cases of new_start rollover and 1186 * gt_stamp_last rollover respectively. 1187 */ 1188 if (new_start < gt_stamp_last && 1189 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1190 gt_stamp_hi++; 1191 1192 if (new_start > gt_stamp_last && 1193 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1194 gt_stamp_hi--; 1195 1196 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1197 } 1198 1199 #define record_read(map_, field_) \ 1200 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1201 1202 /* 1203 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1204 * we run into a race where the value read is inconsistent. Sometimes the 1205 * inconsistency is in reading the upper MSB bytes of the last_in value when 1206 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1207 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1208 * determine validity of these values. Instead we read the values multiple times 1209 * until they are consistent. In test runs, 3 attempts results in consistent 1210 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1211 * any new occurences. 1212 */ 1213 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1214 u32 *last_in, u32 *id, u32 *total) 1215 { 1216 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1217 int i = 0; 1218 1219 do { 1220 *last_in = record_read(&rec_map, last_switch_in_stamp); 1221 *id = record_read(&rec_map, current_context_index); 1222 *total = record_read(&rec_map, total_runtime); 1223 1224 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1225 record_read(&rec_map, current_context_index) == *id && 1226 record_read(&rec_map, total_runtime) == *total) 1227 break; 1228 } while (++i < 6); 1229 } 1230 1231 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1232 { 1233 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1234 struct intel_guc *guc = &engine->gt->uc.guc; 1235 u32 last_switch, ctx_id, total; 1236 1237 lockdep_assert_held(&guc->timestamp.lock); 1238 1239 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1240 1241 stats->running = ctx_id != ~0U && last_switch; 1242 if (stats->running) 1243 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1244 1245 /* 1246 * Instead of adjusting the total for overflow, just add the 1247 * difference from previous sample stats->total_gt_clks 1248 */ 1249 if (total && total != ~0U) { 1250 stats->total_gt_clks += (u32)(total - stats->prev_total); 1251 stats->prev_total = total; 1252 } 1253 } 1254 1255 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1256 { 1257 intel_wakeref_t wakeref; 1258 u32 reg, shift; 1259 1260 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1261 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1262 1263 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1264 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1265 1266 return 3 - shift; 1267 } 1268 1269 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1270 { 1271 struct intel_gt *gt = guc_to_gt(guc); 1272 u32 gt_stamp_lo, gt_stamp_hi; 1273 u64 gpm_ts; 1274 1275 lockdep_assert_held(&guc->timestamp.lock); 1276 1277 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1278 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1279 MISC_STATUS1) >> guc->timestamp.shift; 1280 gt_stamp_lo = lower_32_bits(gpm_ts); 1281 *now = ktime_get(); 1282 1283 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1284 gt_stamp_hi++; 1285 1286 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1287 } 1288 1289 /* 1290 * Unlike the execlist mode of submission total and active times are in terms of 1291 * gt clocks. The *now parameter is retained to return the cpu time at which the 1292 * busyness was sampled. 1293 */ 1294 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1295 { 1296 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1297 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1298 struct intel_gt *gt = engine->gt; 1299 struct intel_guc *guc = >->uc.guc; 1300 u64 total, gt_stamp_saved; 1301 unsigned long flags; 1302 u32 reset_count; 1303 bool in_reset; 1304 1305 spin_lock_irqsave(&guc->timestamp.lock, flags); 1306 1307 /* 1308 * If a reset happened, we risk reading partially updated engine 1309 * busyness from GuC, so we just use the driver stored copy of busyness. 1310 * Synchronize with gt reset using reset_count and the 1311 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1312 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1313 * usable by checking the flag afterwards. 1314 */ 1315 reset_count = i915_reset_count(gpu_error); 1316 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1317 1318 *now = ktime_get(); 1319 1320 /* 1321 * The active busyness depends on start_gt_clk and gt_stamp. 1322 * gt_stamp is updated by i915 only when gt is awake and the 1323 * start_gt_clk is derived from GuC state. To get a consistent 1324 * view of activity, we query the GuC state only if gt is awake. 1325 */ 1326 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1327 stats_saved = *stats; 1328 gt_stamp_saved = guc->timestamp.gt_stamp; 1329 /* 1330 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1331 * start_gt_clk' calculation below for active engines. 1332 */ 1333 guc_update_engine_gt_clks(engine); 1334 guc_update_pm_timestamp(guc, now); 1335 intel_gt_pm_put_async(gt); 1336 if (i915_reset_count(gpu_error) != reset_count) { 1337 *stats = stats_saved; 1338 guc->timestamp.gt_stamp = gt_stamp_saved; 1339 } 1340 } 1341 1342 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1343 if (stats->running) { 1344 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1345 1346 total += intel_gt_clock_interval_to_ns(gt, clk); 1347 } 1348 1349 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1350 1351 return ns_to_ktime(total); 1352 } 1353 1354 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1355 { 1356 struct intel_gt *gt = guc_to_gt(guc); 1357 struct intel_engine_cs *engine; 1358 enum intel_engine_id id; 1359 unsigned long flags; 1360 ktime_t unused; 1361 1362 cancel_delayed_work_sync(&guc->timestamp.work); 1363 1364 spin_lock_irqsave(&guc->timestamp.lock, flags); 1365 1366 guc_update_pm_timestamp(guc, &unused); 1367 for_each_engine(engine, gt, id) { 1368 guc_update_engine_gt_clks(engine); 1369 engine->stats.guc.prev_total = 0; 1370 } 1371 1372 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1373 } 1374 1375 static void __update_guc_busyness_stats(struct intel_guc *guc) 1376 { 1377 struct intel_gt *gt = guc_to_gt(guc); 1378 struct intel_engine_cs *engine; 1379 enum intel_engine_id id; 1380 unsigned long flags; 1381 ktime_t unused; 1382 1383 guc->timestamp.last_stat_jiffies = jiffies; 1384 1385 spin_lock_irqsave(&guc->timestamp.lock, flags); 1386 1387 guc_update_pm_timestamp(guc, &unused); 1388 for_each_engine(engine, gt, id) 1389 guc_update_engine_gt_clks(engine); 1390 1391 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1392 } 1393 1394 static void guc_timestamp_ping(struct work_struct *wrk) 1395 { 1396 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1397 timestamp.work.work); 1398 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1399 struct intel_gt *gt = guc_to_gt(guc); 1400 intel_wakeref_t wakeref; 1401 int srcu, ret; 1402 1403 /* 1404 * Synchronize with gt reset to make sure the worker does not 1405 * corrupt the engine/guc stats. NB: can't actually block waiting 1406 * for a reset to complete as the reset requires flushing out 1407 * this worker thread if started. So waiting would deadlock. 1408 */ 1409 ret = intel_gt_reset_trylock(gt, &srcu); 1410 if (ret) 1411 return; 1412 1413 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1414 __update_guc_busyness_stats(guc); 1415 1416 intel_gt_reset_unlock(gt, srcu); 1417 1418 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1419 guc->timestamp.ping_delay); 1420 } 1421 1422 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1423 { 1424 u32 offset = intel_guc_engine_usage_offset(guc); 1425 u32 action[] = { 1426 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1427 offset, 1428 0, 1429 }; 1430 1431 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1432 } 1433 1434 static void guc_init_engine_stats(struct intel_guc *guc) 1435 { 1436 struct intel_gt *gt = guc_to_gt(guc); 1437 intel_wakeref_t wakeref; 1438 1439 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1440 guc->timestamp.ping_delay); 1441 1442 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1443 int ret = guc_action_enable_usage_stats(guc); 1444 1445 if (ret) 1446 drm_err(>->i915->drm, 1447 "Failed to enable usage stats: %d!\n", ret); 1448 } 1449 } 1450 1451 void intel_guc_busyness_park(struct intel_gt *gt) 1452 { 1453 struct intel_guc *guc = >->uc.guc; 1454 1455 if (!guc_submission_initialized(guc)) 1456 return; 1457 1458 /* 1459 * There is a race with suspend flow where the worker runs after suspend 1460 * and causes an unclaimed register access warning. Cancel the worker 1461 * synchronously here. 1462 */ 1463 cancel_delayed_work_sync(&guc->timestamp.work); 1464 1465 /* 1466 * Before parking, we should sample engine busyness stats if we need to. 1467 * We can skip it if we are less than half a ping from the last time we 1468 * sampled the busyness stats. 1469 */ 1470 if (guc->timestamp.last_stat_jiffies && 1471 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1472 (guc->timestamp.ping_delay / 2))) 1473 return; 1474 1475 __update_guc_busyness_stats(guc); 1476 } 1477 1478 void intel_guc_busyness_unpark(struct intel_gt *gt) 1479 { 1480 struct intel_guc *guc = >->uc.guc; 1481 unsigned long flags; 1482 ktime_t unused; 1483 1484 if (!guc_submission_initialized(guc)) 1485 return; 1486 1487 spin_lock_irqsave(&guc->timestamp.lock, flags); 1488 guc_update_pm_timestamp(guc, &unused); 1489 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1490 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1491 guc->timestamp.ping_delay); 1492 } 1493 1494 static inline bool 1495 submission_disabled(struct intel_guc *guc) 1496 { 1497 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1498 1499 return unlikely(!sched_engine || 1500 !__tasklet_is_enabled(&sched_engine->tasklet) || 1501 intel_gt_is_wedged(guc_to_gt(guc))); 1502 } 1503 1504 static void disable_submission(struct intel_guc *guc) 1505 { 1506 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1507 1508 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1509 GEM_BUG_ON(!guc->ct.enabled); 1510 __tasklet_disable_sync_once(&sched_engine->tasklet); 1511 sched_engine->tasklet.callback = NULL; 1512 } 1513 } 1514 1515 static void enable_submission(struct intel_guc *guc) 1516 { 1517 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1518 unsigned long flags; 1519 1520 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1521 sched_engine->tasklet.callback = guc_submission_tasklet; 1522 wmb(); /* Make sure callback visible */ 1523 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1524 __tasklet_enable(&sched_engine->tasklet)) { 1525 GEM_BUG_ON(!guc->ct.enabled); 1526 1527 /* And kick in case we missed a new request submission. */ 1528 tasklet_hi_schedule(&sched_engine->tasklet); 1529 } 1530 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1531 } 1532 1533 static void guc_flush_submissions(struct intel_guc *guc) 1534 { 1535 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1536 unsigned long flags; 1537 1538 spin_lock_irqsave(&sched_engine->lock, flags); 1539 spin_unlock_irqrestore(&sched_engine->lock, flags); 1540 } 1541 1542 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1543 1544 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1545 { 1546 if (unlikely(!guc_submission_initialized(guc))) { 1547 /* Reset called during driver load? GuC not yet initialised! */ 1548 return; 1549 } 1550 1551 intel_gt_park_heartbeats(guc_to_gt(guc)); 1552 disable_submission(guc); 1553 guc->interrupts.disable(guc); 1554 __reset_guc_busyness_stats(guc); 1555 1556 /* Flush IRQ handler */ 1557 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1558 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1559 1560 guc_flush_submissions(guc); 1561 guc_flush_destroyed_contexts(guc); 1562 flush_work(&guc->ct.requests.worker); 1563 1564 scrub_guc_desc_for_outstanding_g2h(guc); 1565 } 1566 1567 static struct intel_engine_cs * 1568 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1569 { 1570 struct intel_engine_cs *engine; 1571 intel_engine_mask_t tmp, mask = ve->mask; 1572 unsigned int num_siblings = 0; 1573 1574 for_each_engine_masked(engine, ve->gt, mask, tmp) 1575 if (num_siblings++ == sibling) 1576 return engine; 1577 1578 return NULL; 1579 } 1580 1581 static inline struct intel_engine_cs * 1582 __context_to_physical_engine(struct intel_context *ce) 1583 { 1584 struct intel_engine_cs *engine = ce->engine; 1585 1586 if (intel_engine_is_virtual(engine)) 1587 engine = guc_virtual_get_sibling(engine, 0); 1588 1589 return engine; 1590 } 1591 1592 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1593 { 1594 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1595 1596 if (!intel_context_is_schedulable(ce)) 1597 return; 1598 1599 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1600 1601 /* 1602 * We want a simple context + ring to execute the breadcrumb update. 1603 * We cannot rely on the context being intact across the GPU hang, 1604 * so clear it and rebuild just what we need for the breadcrumb. 1605 * All pending requests for this context will be zapped, and any 1606 * future request will be after userspace has had the opportunity 1607 * to recreate its own state. 1608 */ 1609 if (scrub) 1610 lrc_init_regs(ce, engine, true); 1611 1612 /* Rerun the request; its payload has been neutered (if guilty). */ 1613 lrc_update_regs(ce, engine, head); 1614 } 1615 1616 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1617 { 1618 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) 1619 return; 1620 1621 intel_engine_stop_cs(engine); 1622 1623 /* 1624 * Wa_22011802037: In addition to stopping the cs, we need 1625 * to wait for any pending mi force wakeups 1626 */ 1627 intel_engine_wait_for_pending_mi_fw(engine); 1628 } 1629 1630 static void guc_reset_nop(struct intel_engine_cs *engine) 1631 { 1632 } 1633 1634 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1635 { 1636 } 1637 1638 static void 1639 __unwind_incomplete_requests(struct intel_context *ce) 1640 { 1641 struct i915_request *rq, *rn; 1642 struct list_head *pl; 1643 int prio = I915_PRIORITY_INVALID; 1644 struct i915_sched_engine * const sched_engine = 1645 ce->engine->sched_engine; 1646 unsigned long flags; 1647 1648 spin_lock_irqsave(&sched_engine->lock, flags); 1649 spin_lock(&ce->guc_state.lock); 1650 list_for_each_entry_safe_reverse(rq, rn, 1651 &ce->guc_state.requests, 1652 sched.link) { 1653 if (i915_request_completed(rq)) 1654 continue; 1655 1656 list_del_init(&rq->sched.link); 1657 __i915_request_unsubmit(rq); 1658 1659 /* Push the request back into the queue for later resubmission. */ 1660 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1661 if (rq_prio(rq) != prio) { 1662 prio = rq_prio(rq); 1663 pl = i915_sched_lookup_priolist(sched_engine, prio); 1664 } 1665 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1666 1667 list_add(&rq->sched.link, pl); 1668 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1669 } 1670 spin_unlock(&ce->guc_state.lock); 1671 spin_unlock_irqrestore(&sched_engine->lock, flags); 1672 } 1673 1674 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1675 { 1676 bool guilty; 1677 struct i915_request *rq; 1678 unsigned long flags; 1679 u32 head; 1680 int i, number_children = ce->parallel.number_children; 1681 struct intel_context *parent = ce; 1682 1683 GEM_BUG_ON(intel_context_is_child(ce)); 1684 1685 intel_context_get(ce); 1686 1687 /* 1688 * GuC will implicitly mark the context as non-schedulable when it sends 1689 * the reset notification. Make sure our state reflects this change. The 1690 * context will be marked enabled on resubmission. 1691 */ 1692 spin_lock_irqsave(&ce->guc_state.lock, flags); 1693 clr_context_enabled(ce); 1694 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1695 1696 /* 1697 * For each context in the relationship find the hanging request 1698 * resetting each context / request as needed 1699 */ 1700 for (i = 0; i < number_children + 1; ++i) { 1701 if (!intel_context_is_pinned(ce)) 1702 goto next_context; 1703 1704 guilty = false; 1705 rq = intel_context_find_active_request(ce); 1706 if (!rq) { 1707 head = ce->ring->tail; 1708 goto out_replay; 1709 } 1710 1711 if (i915_request_started(rq)) 1712 guilty = stalled & ce->engine->mask; 1713 1714 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1715 head = intel_ring_wrap(ce->ring, rq->head); 1716 1717 __i915_request_reset(rq, guilty); 1718 out_replay: 1719 guc_reset_state(ce, head, guilty); 1720 next_context: 1721 if (i != number_children) 1722 ce = list_next_entry(ce, parallel.child_link); 1723 } 1724 1725 __unwind_incomplete_requests(parent); 1726 intel_context_put(parent); 1727 } 1728 1729 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1730 { 1731 struct intel_context *ce; 1732 unsigned long index; 1733 unsigned long flags; 1734 1735 if (unlikely(!guc_submission_initialized(guc))) { 1736 /* Reset called during driver load? GuC not yet initialised! */ 1737 return; 1738 } 1739 1740 xa_lock_irqsave(&guc->context_lookup, flags); 1741 xa_for_each(&guc->context_lookup, index, ce) { 1742 if (!kref_get_unless_zero(&ce->ref)) 1743 continue; 1744 1745 xa_unlock(&guc->context_lookup); 1746 1747 if (intel_context_is_pinned(ce) && 1748 !intel_context_is_child(ce)) 1749 __guc_reset_context(ce, stalled); 1750 1751 intel_context_put(ce); 1752 1753 xa_lock(&guc->context_lookup); 1754 } 1755 xa_unlock_irqrestore(&guc->context_lookup, flags); 1756 1757 /* GuC is blown away, drop all references to contexts */ 1758 xa_destroy(&guc->context_lookup); 1759 } 1760 1761 static void guc_cancel_context_requests(struct intel_context *ce) 1762 { 1763 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1764 struct i915_request *rq; 1765 unsigned long flags; 1766 1767 /* Mark all executing requests as skipped. */ 1768 spin_lock_irqsave(&sched_engine->lock, flags); 1769 spin_lock(&ce->guc_state.lock); 1770 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1771 i915_request_put(i915_request_mark_eio(rq)); 1772 spin_unlock(&ce->guc_state.lock); 1773 spin_unlock_irqrestore(&sched_engine->lock, flags); 1774 } 1775 1776 static void 1777 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1778 { 1779 struct i915_request *rq, *rn; 1780 struct rb_node *rb; 1781 unsigned long flags; 1782 1783 /* Can be called during boot if GuC fails to load */ 1784 if (!sched_engine) 1785 return; 1786 1787 /* 1788 * Before we call engine->cancel_requests(), we should have exclusive 1789 * access to the submission state. This is arranged for us by the 1790 * caller disabling the interrupt generation, the tasklet and other 1791 * threads that may then access the same state, giving us a free hand 1792 * to reset state. However, we still need to let lockdep be aware that 1793 * we know this state may be accessed in hardirq context, so we 1794 * disable the irq around this manipulation and we want to keep 1795 * the spinlock focused on its duties and not accidentally conflate 1796 * coverage to the submission's irq state. (Similarly, although we 1797 * shouldn't need to disable irq around the manipulation of the 1798 * submission's irq state, we also wish to remind ourselves that 1799 * it is irq state.) 1800 */ 1801 spin_lock_irqsave(&sched_engine->lock, flags); 1802 1803 /* Flush the queued requests to the timeline list (for retiring). */ 1804 while ((rb = rb_first_cached(&sched_engine->queue))) { 1805 struct i915_priolist *p = to_priolist(rb); 1806 1807 priolist_for_each_request_consume(rq, rn, p) { 1808 list_del_init(&rq->sched.link); 1809 1810 __i915_request_submit(rq); 1811 1812 i915_request_put(i915_request_mark_eio(rq)); 1813 } 1814 1815 rb_erase_cached(&p->node, &sched_engine->queue); 1816 i915_priolist_free(p); 1817 } 1818 1819 /* Remaining _unready_ requests will be nop'ed when submitted */ 1820 1821 sched_engine->queue_priority_hint = INT_MIN; 1822 sched_engine->queue = RB_ROOT_CACHED; 1823 1824 spin_unlock_irqrestore(&sched_engine->lock, flags); 1825 } 1826 1827 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1828 { 1829 struct intel_context *ce; 1830 unsigned long index; 1831 unsigned long flags; 1832 1833 xa_lock_irqsave(&guc->context_lookup, flags); 1834 xa_for_each(&guc->context_lookup, index, ce) { 1835 if (!kref_get_unless_zero(&ce->ref)) 1836 continue; 1837 1838 xa_unlock(&guc->context_lookup); 1839 1840 if (intel_context_is_pinned(ce) && 1841 !intel_context_is_child(ce)) 1842 guc_cancel_context_requests(ce); 1843 1844 intel_context_put(ce); 1845 1846 xa_lock(&guc->context_lookup); 1847 } 1848 xa_unlock_irqrestore(&guc->context_lookup, flags); 1849 1850 guc_cancel_sched_engine_requests(guc->sched_engine); 1851 1852 /* GuC is blown away, drop all references to contexts */ 1853 xa_destroy(&guc->context_lookup); 1854 } 1855 1856 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1857 { 1858 /* Reset called during driver load or during wedge? */ 1859 if (unlikely(!guc_submission_initialized(guc) || 1860 intel_gt_is_wedged(guc_to_gt(guc)))) { 1861 return; 1862 } 1863 1864 /* 1865 * Technically possible for either of these values to be non-zero here, 1866 * but very unlikely + harmless. Regardless let's add a warn so we can 1867 * see in CI if this happens frequently / a precursor to taking down the 1868 * machine. 1869 */ 1870 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1871 atomic_set(&guc->outstanding_submission_g2h, 0); 1872 1873 intel_guc_global_policies_update(guc); 1874 enable_submission(guc); 1875 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1876 } 1877 1878 static void destroyed_worker_func(struct work_struct *w); 1879 static void reset_fail_worker_func(struct work_struct *w); 1880 1881 /* 1882 * Set up the memory resources to be shared with the GuC (via the GGTT) 1883 * at firmware loading time. 1884 */ 1885 int intel_guc_submission_init(struct intel_guc *guc) 1886 { 1887 struct intel_gt *gt = guc_to_gt(guc); 1888 int ret; 1889 1890 if (guc->submission_initialized) 1891 return 0; 1892 1893 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 1894 ret = guc_lrc_desc_pool_create_v69(guc); 1895 if (ret) 1896 return ret; 1897 } 1898 1899 guc->submission_state.guc_ids_bitmap = 1900 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1901 if (!guc->submission_state.guc_ids_bitmap) { 1902 ret = -ENOMEM; 1903 goto destroy_pool; 1904 } 1905 1906 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1907 guc->timestamp.shift = gpm_timestamp_shift(gt); 1908 guc->submission_initialized = true; 1909 1910 return 0; 1911 1912 destroy_pool: 1913 guc_lrc_desc_pool_destroy_v69(guc); 1914 1915 return ret; 1916 } 1917 1918 void intel_guc_submission_fini(struct intel_guc *guc) 1919 { 1920 if (!guc->submission_initialized) 1921 return; 1922 1923 guc_flush_destroyed_contexts(guc); 1924 guc_lrc_desc_pool_destroy_v69(guc); 1925 i915_sched_engine_put(guc->sched_engine); 1926 bitmap_free(guc->submission_state.guc_ids_bitmap); 1927 guc->submission_initialized = false; 1928 } 1929 1930 static inline void queue_request(struct i915_sched_engine *sched_engine, 1931 struct i915_request *rq, 1932 int prio) 1933 { 1934 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1935 list_add_tail(&rq->sched.link, 1936 i915_sched_lookup_priolist(sched_engine, prio)); 1937 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1938 tasklet_hi_schedule(&sched_engine->tasklet); 1939 } 1940 1941 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1942 struct i915_request *rq) 1943 { 1944 int ret = 0; 1945 1946 __i915_request_submit(rq); 1947 1948 trace_i915_request_in(rq, 0); 1949 1950 if (is_multi_lrc_rq(rq)) { 1951 if (multi_lrc_submit(rq)) { 1952 ret = guc_wq_item_append(guc, rq); 1953 if (!ret) 1954 ret = guc_add_request(guc, rq); 1955 } 1956 } else { 1957 guc_set_lrc_tail(rq); 1958 ret = guc_add_request(guc, rq); 1959 } 1960 1961 if (unlikely(ret == -EPIPE)) 1962 disable_submission(guc); 1963 1964 return ret; 1965 } 1966 1967 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1968 { 1969 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1970 struct intel_context *ce = request_to_scheduling_context(rq); 1971 1972 return submission_disabled(guc) || guc->stalled_request || 1973 !i915_sched_engine_is_empty(sched_engine) || 1974 !ctx_id_mapped(guc, ce->guc_id.id); 1975 } 1976 1977 static void guc_submit_request(struct i915_request *rq) 1978 { 1979 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1980 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1981 unsigned long flags; 1982 1983 /* Will be called from irq-context when using foreign fences. */ 1984 spin_lock_irqsave(&sched_engine->lock, flags); 1985 1986 if (need_tasklet(guc, rq)) 1987 queue_request(sched_engine, rq, rq_prio(rq)); 1988 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1989 tasklet_hi_schedule(&sched_engine->tasklet); 1990 1991 spin_unlock_irqrestore(&sched_engine->lock, flags); 1992 } 1993 1994 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1995 { 1996 int ret; 1997 1998 GEM_BUG_ON(intel_context_is_child(ce)); 1999 2000 if (intel_context_is_parent(ce)) 2001 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2002 NUMBER_MULTI_LRC_GUC_ID(guc), 2003 order_base_2(ce->parallel.number_children 2004 + 1)); 2005 else 2006 ret = ida_simple_get(&guc->submission_state.guc_ids, 2007 NUMBER_MULTI_LRC_GUC_ID(guc), 2008 guc->submission_state.num_guc_ids, 2009 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2010 __GFP_NOWARN); 2011 if (unlikely(ret < 0)) 2012 return ret; 2013 2014 if (!intel_context_is_parent(ce)) 2015 ++guc->submission_state.guc_ids_in_use; 2016 2017 ce->guc_id.id = ret; 2018 return 0; 2019 } 2020 2021 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2022 { 2023 GEM_BUG_ON(intel_context_is_child(ce)); 2024 2025 if (!context_guc_id_invalid(ce)) { 2026 if (intel_context_is_parent(ce)) { 2027 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2028 ce->guc_id.id, 2029 order_base_2(ce->parallel.number_children 2030 + 1)); 2031 } else { 2032 --guc->submission_state.guc_ids_in_use; 2033 ida_simple_remove(&guc->submission_state.guc_ids, 2034 ce->guc_id.id); 2035 } 2036 clr_ctx_id_mapping(guc, ce->guc_id.id); 2037 set_context_guc_id_invalid(ce); 2038 } 2039 if (!list_empty(&ce->guc_id.link)) 2040 list_del_init(&ce->guc_id.link); 2041 } 2042 2043 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2044 { 2045 unsigned long flags; 2046 2047 spin_lock_irqsave(&guc->submission_state.lock, flags); 2048 __release_guc_id(guc, ce); 2049 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2050 } 2051 2052 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2053 { 2054 struct intel_context *cn; 2055 2056 lockdep_assert_held(&guc->submission_state.lock); 2057 GEM_BUG_ON(intel_context_is_child(ce)); 2058 GEM_BUG_ON(intel_context_is_parent(ce)); 2059 2060 if (!list_empty(&guc->submission_state.guc_id_list)) { 2061 cn = list_first_entry(&guc->submission_state.guc_id_list, 2062 struct intel_context, 2063 guc_id.link); 2064 2065 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2066 GEM_BUG_ON(context_guc_id_invalid(cn)); 2067 GEM_BUG_ON(intel_context_is_child(cn)); 2068 GEM_BUG_ON(intel_context_is_parent(cn)); 2069 2070 list_del_init(&cn->guc_id.link); 2071 ce->guc_id.id = cn->guc_id.id; 2072 2073 spin_lock(&cn->guc_state.lock); 2074 clr_context_registered(cn); 2075 spin_unlock(&cn->guc_state.lock); 2076 2077 set_context_guc_id_invalid(cn); 2078 2079 #ifdef CONFIG_DRM_I915_SELFTEST 2080 guc->number_guc_id_stolen++; 2081 #endif 2082 2083 return 0; 2084 } else { 2085 return -EAGAIN; 2086 } 2087 } 2088 2089 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2090 { 2091 int ret; 2092 2093 lockdep_assert_held(&guc->submission_state.lock); 2094 GEM_BUG_ON(intel_context_is_child(ce)); 2095 2096 ret = new_guc_id(guc, ce); 2097 if (unlikely(ret < 0)) { 2098 if (intel_context_is_parent(ce)) 2099 return -ENOSPC; 2100 2101 ret = steal_guc_id(guc, ce); 2102 if (ret < 0) 2103 return ret; 2104 } 2105 2106 if (intel_context_is_parent(ce)) { 2107 struct intel_context *child; 2108 int i = 1; 2109 2110 for_each_child(ce, child) 2111 child->guc_id.id = ce->guc_id.id + i++; 2112 } 2113 2114 return 0; 2115 } 2116 2117 #define PIN_GUC_ID_TRIES 4 2118 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2119 { 2120 int ret = 0; 2121 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2122 2123 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2124 2125 try_again: 2126 spin_lock_irqsave(&guc->submission_state.lock, flags); 2127 2128 might_lock(&ce->guc_state.lock); 2129 2130 if (context_guc_id_invalid(ce)) { 2131 ret = assign_guc_id(guc, ce); 2132 if (ret) 2133 goto out_unlock; 2134 ret = 1; /* Indidcates newly assigned guc_id */ 2135 } 2136 if (!list_empty(&ce->guc_id.link)) 2137 list_del_init(&ce->guc_id.link); 2138 atomic_inc(&ce->guc_id.ref); 2139 2140 out_unlock: 2141 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2142 2143 /* 2144 * -EAGAIN indicates no guc_id are available, let's retire any 2145 * outstanding requests to see if that frees up a guc_id. If the first 2146 * retire didn't help, insert a sleep with the timeslice duration before 2147 * attempting to retire more requests. Double the sleep period each 2148 * subsequent pass before finally giving up. The sleep period has max of 2149 * 100ms and minimum of 1ms. 2150 */ 2151 if (ret == -EAGAIN && --tries) { 2152 if (PIN_GUC_ID_TRIES - tries > 1) { 2153 unsigned int timeslice_shifted = 2154 ce->engine->props.timeslice_duration_ms << 2155 (PIN_GUC_ID_TRIES - tries - 2); 2156 unsigned int max = min_t(unsigned int, 100, 2157 timeslice_shifted); 2158 2159 msleep(max_t(unsigned int, max, 1)); 2160 } 2161 intel_gt_retire_requests(guc_to_gt(guc)); 2162 goto try_again; 2163 } 2164 2165 return ret; 2166 } 2167 2168 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2169 { 2170 unsigned long flags; 2171 2172 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2173 GEM_BUG_ON(intel_context_is_child(ce)); 2174 2175 if (unlikely(context_guc_id_invalid(ce) || 2176 intel_context_is_parent(ce))) 2177 return; 2178 2179 spin_lock_irqsave(&guc->submission_state.lock, flags); 2180 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2181 !atomic_read(&ce->guc_id.ref)) 2182 list_add_tail(&ce->guc_id.link, 2183 &guc->submission_state.guc_id_list); 2184 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2185 } 2186 2187 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2188 struct intel_context *ce, 2189 u32 guc_id, 2190 u32 offset, 2191 bool loop) 2192 { 2193 struct intel_context *child; 2194 u32 action[4 + MAX_ENGINE_INSTANCE]; 2195 int len = 0; 2196 2197 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2198 2199 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2200 action[len++] = guc_id; 2201 action[len++] = ce->parallel.number_children + 1; 2202 action[len++] = offset; 2203 for_each_child(ce, child) { 2204 offset += sizeof(struct guc_lrc_desc_v69); 2205 action[len++] = offset; 2206 } 2207 2208 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2209 } 2210 2211 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2212 struct intel_context *ce, 2213 struct guc_ctxt_registration_info *info, 2214 bool loop) 2215 { 2216 struct intel_context *child; 2217 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2218 int len = 0; 2219 u32 next_id; 2220 2221 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2222 2223 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2224 action[len++] = info->flags; 2225 action[len++] = info->context_idx; 2226 action[len++] = info->engine_class; 2227 action[len++] = info->engine_submit_mask; 2228 action[len++] = info->wq_desc_lo; 2229 action[len++] = info->wq_desc_hi; 2230 action[len++] = info->wq_base_lo; 2231 action[len++] = info->wq_base_hi; 2232 action[len++] = info->wq_size; 2233 action[len++] = ce->parallel.number_children + 1; 2234 action[len++] = info->hwlrca_lo; 2235 action[len++] = info->hwlrca_hi; 2236 2237 next_id = info->context_idx + 1; 2238 for_each_child(ce, child) { 2239 GEM_BUG_ON(next_id++ != child->guc_id.id); 2240 2241 /* 2242 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2243 * only supports 32 bit currently. 2244 */ 2245 action[len++] = lower_32_bits(child->lrc.lrca); 2246 action[len++] = upper_32_bits(child->lrc.lrca); 2247 } 2248 2249 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2250 2251 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2252 } 2253 2254 static int __guc_action_register_context_v69(struct intel_guc *guc, 2255 u32 guc_id, 2256 u32 offset, 2257 bool loop) 2258 { 2259 u32 action[] = { 2260 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2261 guc_id, 2262 offset, 2263 }; 2264 2265 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2266 0, loop); 2267 } 2268 2269 static int __guc_action_register_context_v70(struct intel_guc *guc, 2270 struct guc_ctxt_registration_info *info, 2271 bool loop) 2272 { 2273 u32 action[] = { 2274 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2275 info->flags, 2276 info->context_idx, 2277 info->engine_class, 2278 info->engine_submit_mask, 2279 info->wq_desc_lo, 2280 info->wq_desc_hi, 2281 info->wq_base_lo, 2282 info->wq_base_hi, 2283 info->wq_size, 2284 info->hwlrca_lo, 2285 info->hwlrca_hi, 2286 }; 2287 2288 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2289 0, loop); 2290 } 2291 2292 static void prepare_context_registration_info_v69(struct intel_context *ce); 2293 static void prepare_context_registration_info_v70(struct intel_context *ce, 2294 struct guc_ctxt_registration_info *info); 2295 2296 static int 2297 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2298 { 2299 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2300 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2301 2302 prepare_context_registration_info_v69(ce); 2303 2304 if (intel_context_is_parent(ce)) 2305 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2306 offset, loop); 2307 else 2308 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2309 offset, loop); 2310 } 2311 2312 static int 2313 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2314 { 2315 struct guc_ctxt_registration_info info; 2316 2317 prepare_context_registration_info_v70(ce, &info); 2318 2319 if (intel_context_is_parent(ce)) 2320 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2321 else 2322 return __guc_action_register_context_v70(guc, &info, loop); 2323 } 2324 2325 static int register_context(struct intel_context *ce, bool loop) 2326 { 2327 struct intel_guc *guc = ce_to_guc(ce); 2328 int ret; 2329 2330 GEM_BUG_ON(intel_context_is_child(ce)); 2331 trace_intel_context_register(ce); 2332 2333 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2334 ret = register_context_v70(guc, ce, loop); 2335 else 2336 ret = register_context_v69(guc, ce, loop); 2337 2338 if (likely(!ret)) { 2339 unsigned long flags; 2340 2341 spin_lock_irqsave(&ce->guc_state.lock, flags); 2342 set_context_registered(ce); 2343 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2344 2345 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2346 guc_context_policy_init_v70(ce, loop); 2347 } 2348 2349 return ret; 2350 } 2351 2352 static int __guc_action_deregister_context(struct intel_guc *guc, 2353 u32 guc_id) 2354 { 2355 u32 action[] = { 2356 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2357 guc_id, 2358 }; 2359 2360 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2361 G2H_LEN_DW_DEREGISTER_CONTEXT, 2362 true); 2363 } 2364 2365 static int deregister_context(struct intel_context *ce, u32 guc_id) 2366 { 2367 struct intel_guc *guc = ce_to_guc(ce); 2368 2369 GEM_BUG_ON(intel_context_is_child(ce)); 2370 trace_intel_context_deregister(ce); 2371 2372 return __guc_action_deregister_context(guc, guc_id); 2373 } 2374 2375 static inline void clear_children_join_go_memory(struct intel_context *ce) 2376 { 2377 struct parent_scratch *ps = __get_parent_scratch(ce); 2378 int i; 2379 2380 ps->go.semaphore = 0; 2381 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2382 ps->join[i].semaphore = 0; 2383 } 2384 2385 static inline u32 get_children_go_value(struct intel_context *ce) 2386 { 2387 return __get_parent_scratch(ce)->go.semaphore; 2388 } 2389 2390 static inline u32 get_children_join_value(struct intel_context *ce, 2391 u8 child_index) 2392 { 2393 return __get_parent_scratch(ce)->join[child_index].semaphore; 2394 } 2395 2396 struct context_policy { 2397 u32 count; 2398 struct guc_update_context_policy h2g; 2399 }; 2400 2401 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2402 { 2403 size_t bytes = sizeof(policy->h2g.header) + 2404 (sizeof(policy->h2g.klv[0]) * policy->count); 2405 2406 return bytes / sizeof(u32); 2407 } 2408 2409 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2410 { 2411 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2412 policy->h2g.header.ctx_id = guc_id; 2413 policy->count = 0; 2414 } 2415 2416 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2417 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2418 { \ 2419 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2420 policy->h2g.klv[policy->count].kl = \ 2421 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2422 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2423 policy->h2g.klv[policy->count].value = data; \ 2424 policy->count++; \ 2425 } 2426 2427 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2428 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2429 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2430 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2431 2432 #undef MAKE_CONTEXT_POLICY_ADD 2433 2434 static int __guc_context_set_context_policies(struct intel_guc *guc, 2435 struct context_policy *policy, 2436 bool loop) 2437 { 2438 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2439 __guc_context_policy_action_size(policy), 2440 0, loop); 2441 } 2442 2443 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2444 { 2445 struct intel_engine_cs *engine = ce->engine; 2446 struct intel_guc *guc = &engine->gt->uc.guc; 2447 struct context_policy policy; 2448 u32 execution_quantum; 2449 u32 preemption_timeout; 2450 unsigned long flags; 2451 int ret; 2452 2453 /* NB: For both of these, zero means disabled. */ 2454 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2455 execution_quantum)); 2456 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2457 preemption_timeout)); 2458 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2459 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2460 2461 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2462 2463 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2464 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2465 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2466 2467 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2468 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2469 2470 ret = __guc_context_set_context_policies(guc, &policy, loop); 2471 2472 spin_lock_irqsave(&ce->guc_state.lock, flags); 2473 if (ret != 0) 2474 set_context_policy_required(ce); 2475 else 2476 clr_context_policy_required(ce); 2477 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2478 2479 return ret; 2480 } 2481 2482 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2483 struct guc_lrc_desc_v69 *desc) 2484 { 2485 desc->policy_flags = 0; 2486 2487 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2488 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2489 2490 /* NB: For both of these, zero means disabled. */ 2491 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2492 desc->execution_quantum)); 2493 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2494 desc->preemption_timeout)); 2495 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2496 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2497 } 2498 2499 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2500 { 2501 /* 2502 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2503 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2504 */ 2505 switch (prio) { 2506 default: 2507 MISSING_CASE(prio); 2508 fallthrough; 2509 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2510 return GEN12_CTX_PRIORITY_NORMAL; 2511 case GUC_CLIENT_PRIORITY_NORMAL: 2512 return GEN12_CTX_PRIORITY_LOW; 2513 case GUC_CLIENT_PRIORITY_HIGH: 2514 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2515 return GEN12_CTX_PRIORITY_HIGH; 2516 } 2517 } 2518 2519 static void prepare_context_registration_info_v69(struct intel_context *ce) 2520 { 2521 struct intel_engine_cs *engine = ce->engine; 2522 struct intel_guc *guc = &engine->gt->uc.guc; 2523 u32 ctx_id = ce->guc_id.id; 2524 struct guc_lrc_desc_v69 *desc; 2525 struct intel_context *child; 2526 2527 GEM_BUG_ON(!engine->mask); 2528 2529 /* 2530 * Ensure LRC + CT vmas are is same region as write barrier is done 2531 * based on CT vma region. 2532 */ 2533 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2534 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2535 2536 desc = __get_lrc_desc_v69(guc, ctx_id); 2537 GEM_BUG_ON(!desc); 2538 desc->engine_class = engine_class_to_guc_class(engine->class); 2539 desc->engine_submit_mask = engine->logical_mask; 2540 desc->hw_context_desc = ce->lrc.lrca; 2541 desc->priority = ce->guc_state.prio; 2542 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2543 guc_context_policy_init_v69(engine, desc); 2544 2545 /* 2546 * If context is a parent, we need to register a process descriptor 2547 * describing a work queue and register all child contexts. 2548 */ 2549 if (intel_context_is_parent(ce)) { 2550 struct guc_process_desc_v69 *pdesc; 2551 2552 ce->parallel.guc.wqi_tail = 0; 2553 ce->parallel.guc.wqi_head = 0; 2554 2555 desc->process_desc = i915_ggtt_offset(ce->state) + 2556 __get_parent_scratch_offset(ce); 2557 desc->wq_addr = i915_ggtt_offset(ce->state) + 2558 __get_wq_offset(ce); 2559 desc->wq_size = WQ_SIZE; 2560 2561 pdesc = __get_process_desc_v69(ce); 2562 memset(pdesc, 0, sizeof(*(pdesc))); 2563 pdesc->stage_id = ce->guc_id.id; 2564 pdesc->wq_base_addr = desc->wq_addr; 2565 pdesc->wq_size_bytes = desc->wq_size; 2566 pdesc->wq_status = WQ_STATUS_ACTIVE; 2567 2568 ce->parallel.guc.wq_head = &pdesc->head; 2569 ce->parallel.guc.wq_tail = &pdesc->tail; 2570 ce->parallel.guc.wq_status = &pdesc->wq_status; 2571 2572 for_each_child(ce, child) { 2573 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2574 2575 desc->engine_class = 2576 engine_class_to_guc_class(engine->class); 2577 desc->hw_context_desc = child->lrc.lrca; 2578 desc->priority = ce->guc_state.prio; 2579 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2580 guc_context_policy_init_v69(engine, desc); 2581 } 2582 2583 clear_children_join_go_memory(ce); 2584 } 2585 } 2586 2587 static void prepare_context_registration_info_v70(struct intel_context *ce, 2588 struct guc_ctxt_registration_info *info) 2589 { 2590 struct intel_engine_cs *engine = ce->engine; 2591 struct intel_guc *guc = &engine->gt->uc.guc; 2592 u32 ctx_id = ce->guc_id.id; 2593 2594 GEM_BUG_ON(!engine->mask); 2595 2596 /* 2597 * Ensure LRC + CT vmas are is same region as write barrier is done 2598 * based on CT vma region. 2599 */ 2600 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2601 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2602 2603 memset(info, 0, sizeof(*info)); 2604 info->context_idx = ctx_id; 2605 info->engine_class = engine_class_to_guc_class(engine->class); 2606 info->engine_submit_mask = engine->logical_mask; 2607 /* 2608 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2609 * only supports 32 bit currently. 2610 */ 2611 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2612 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2613 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2614 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2615 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2616 2617 /* 2618 * If context is a parent, we need to register a process descriptor 2619 * describing a work queue and register all child contexts. 2620 */ 2621 if (intel_context_is_parent(ce)) { 2622 struct guc_sched_wq_desc *wq_desc; 2623 u64 wq_desc_offset, wq_base_offset; 2624 2625 ce->parallel.guc.wqi_tail = 0; 2626 ce->parallel.guc.wqi_head = 0; 2627 2628 wq_desc_offset = i915_ggtt_offset(ce->state) + 2629 __get_parent_scratch_offset(ce); 2630 wq_base_offset = i915_ggtt_offset(ce->state) + 2631 __get_wq_offset(ce); 2632 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2633 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2634 info->wq_base_lo = lower_32_bits(wq_base_offset); 2635 info->wq_base_hi = upper_32_bits(wq_base_offset); 2636 info->wq_size = WQ_SIZE; 2637 2638 wq_desc = __get_wq_desc_v70(ce); 2639 memset(wq_desc, 0, sizeof(*wq_desc)); 2640 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2641 2642 ce->parallel.guc.wq_head = &wq_desc->head; 2643 ce->parallel.guc.wq_tail = &wq_desc->tail; 2644 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2645 2646 clear_children_join_go_memory(ce); 2647 } 2648 } 2649 2650 static int try_context_registration(struct intel_context *ce, bool loop) 2651 { 2652 struct intel_engine_cs *engine = ce->engine; 2653 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2654 struct intel_guc *guc = &engine->gt->uc.guc; 2655 intel_wakeref_t wakeref; 2656 u32 ctx_id = ce->guc_id.id; 2657 bool context_registered; 2658 int ret = 0; 2659 2660 GEM_BUG_ON(!sched_state_is_init(ce)); 2661 2662 context_registered = ctx_id_mapped(guc, ctx_id); 2663 2664 clr_ctx_id_mapping(guc, ctx_id); 2665 set_ctx_id_mapping(guc, ctx_id, ce); 2666 2667 /* 2668 * The context_lookup xarray is used to determine if the hardware 2669 * context is currently registered. There are two cases in which it 2670 * could be registered either the guc_id has been stolen from another 2671 * context or the lrc descriptor address of this context has changed. In 2672 * either case the context needs to be deregistered with the GuC before 2673 * registering this context. 2674 */ 2675 if (context_registered) { 2676 bool disabled; 2677 unsigned long flags; 2678 2679 trace_intel_context_steal_guc_id(ce); 2680 GEM_BUG_ON(!loop); 2681 2682 /* Seal race with Reset */ 2683 spin_lock_irqsave(&ce->guc_state.lock, flags); 2684 disabled = submission_disabled(guc); 2685 if (likely(!disabled)) { 2686 set_context_wait_for_deregister_to_register(ce); 2687 intel_context_get(ce); 2688 } 2689 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2690 if (unlikely(disabled)) { 2691 clr_ctx_id_mapping(guc, ctx_id); 2692 return 0; /* Will get registered later */ 2693 } 2694 2695 /* 2696 * If stealing the guc_id, this ce has the same guc_id as the 2697 * context whose guc_id was stolen. 2698 */ 2699 with_intel_runtime_pm(runtime_pm, wakeref) 2700 ret = deregister_context(ce, ce->guc_id.id); 2701 if (unlikely(ret == -ENODEV)) 2702 ret = 0; /* Will get registered later */ 2703 } else { 2704 with_intel_runtime_pm(runtime_pm, wakeref) 2705 ret = register_context(ce, loop); 2706 if (unlikely(ret == -EBUSY)) { 2707 clr_ctx_id_mapping(guc, ctx_id); 2708 } else if (unlikely(ret == -ENODEV)) { 2709 clr_ctx_id_mapping(guc, ctx_id); 2710 ret = 0; /* Will get registered later */ 2711 } 2712 } 2713 2714 return ret; 2715 } 2716 2717 static int __guc_context_pre_pin(struct intel_context *ce, 2718 struct intel_engine_cs *engine, 2719 struct i915_gem_ww_ctx *ww, 2720 void **vaddr) 2721 { 2722 return lrc_pre_pin(ce, engine, ww, vaddr); 2723 } 2724 2725 static int __guc_context_pin(struct intel_context *ce, 2726 struct intel_engine_cs *engine, 2727 void *vaddr) 2728 { 2729 if (i915_ggtt_offset(ce->state) != 2730 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2731 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2732 2733 /* 2734 * GuC context gets pinned in guc_request_alloc. See that function for 2735 * explaination of why. 2736 */ 2737 2738 return lrc_pin(ce, engine, vaddr); 2739 } 2740 2741 static int guc_context_pre_pin(struct intel_context *ce, 2742 struct i915_gem_ww_ctx *ww, 2743 void **vaddr) 2744 { 2745 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2746 } 2747 2748 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2749 { 2750 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2751 2752 if (likely(!ret && !intel_context_is_barrier(ce))) 2753 intel_engine_pm_get(ce->engine); 2754 2755 return ret; 2756 } 2757 2758 static void guc_context_unpin(struct intel_context *ce) 2759 { 2760 struct intel_guc *guc = ce_to_guc(ce); 2761 2762 unpin_guc_id(guc, ce); 2763 lrc_unpin(ce); 2764 2765 if (likely(!intel_context_is_barrier(ce))) 2766 intel_engine_pm_put_async(ce->engine); 2767 } 2768 2769 static void guc_context_post_unpin(struct intel_context *ce) 2770 { 2771 lrc_post_unpin(ce); 2772 } 2773 2774 static void __guc_context_sched_enable(struct intel_guc *guc, 2775 struct intel_context *ce) 2776 { 2777 u32 action[] = { 2778 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2779 ce->guc_id.id, 2780 GUC_CONTEXT_ENABLE 2781 }; 2782 2783 trace_intel_context_sched_enable(ce); 2784 2785 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2786 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2787 } 2788 2789 static void __guc_context_sched_disable(struct intel_guc *guc, 2790 struct intel_context *ce, 2791 u16 guc_id) 2792 { 2793 u32 action[] = { 2794 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2795 guc_id, /* ce->guc_id.id not stable */ 2796 GUC_CONTEXT_DISABLE 2797 }; 2798 2799 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2800 2801 GEM_BUG_ON(intel_context_is_child(ce)); 2802 trace_intel_context_sched_disable(ce); 2803 2804 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2805 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2806 } 2807 2808 static void guc_blocked_fence_complete(struct intel_context *ce) 2809 { 2810 lockdep_assert_held(&ce->guc_state.lock); 2811 2812 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2813 i915_sw_fence_complete(&ce->guc_state.blocked); 2814 } 2815 2816 static void guc_blocked_fence_reinit(struct intel_context *ce) 2817 { 2818 lockdep_assert_held(&ce->guc_state.lock); 2819 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2820 2821 /* 2822 * This fence is always complete unless a pending schedule disable is 2823 * outstanding. We arm the fence here and complete it when we receive 2824 * the pending schedule disable complete message. 2825 */ 2826 i915_sw_fence_fini(&ce->guc_state.blocked); 2827 i915_sw_fence_reinit(&ce->guc_state.blocked); 2828 i915_sw_fence_await(&ce->guc_state.blocked); 2829 i915_sw_fence_commit(&ce->guc_state.blocked); 2830 } 2831 2832 static u16 prep_context_pending_disable(struct intel_context *ce) 2833 { 2834 lockdep_assert_held(&ce->guc_state.lock); 2835 2836 set_context_pending_disable(ce); 2837 clr_context_enabled(ce); 2838 guc_blocked_fence_reinit(ce); 2839 intel_context_get(ce); 2840 2841 return ce->guc_id.id; 2842 } 2843 2844 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2845 { 2846 struct intel_guc *guc = ce_to_guc(ce); 2847 unsigned long flags; 2848 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2849 intel_wakeref_t wakeref; 2850 u16 guc_id; 2851 bool enabled; 2852 2853 GEM_BUG_ON(intel_context_is_child(ce)); 2854 2855 spin_lock_irqsave(&ce->guc_state.lock, flags); 2856 2857 incr_context_blocked(ce); 2858 2859 enabled = context_enabled(ce); 2860 if (unlikely(!enabled || submission_disabled(guc))) { 2861 if (enabled) 2862 clr_context_enabled(ce); 2863 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2864 return &ce->guc_state.blocked; 2865 } 2866 2867 /* 2868 * We add +2 here as the schedule disable complete CTB handler calls 2869 * intel_context_sched_disable_unpin (-2 to pin_count). 2870 */ 2871 atomic_add(2, &ce->pin_count); 2872 2873 guc_id = prep_context_pending_disable(ce); 2874 2875 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2876 2877 with_intel_runtime_pm(runtime_pm, wakeref) 2878 __guc_context_sched_disable(guc, ce, guc_id); 2879 2880 return &ce->guc_state.blocked; 2881 } 2882 2883 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2884 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2885 #define SCHED_STATE_NO_UNBLOCK \ 2886 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2887 SCHED_STATE_PENDING_DISABLE | \ 2888 SCHED_STATE_BANNED) 2889 2890 static bool context_cant_unblock(struct intel_context *ce) 2891 { 2892 lockdep_assert_held(&ce->guc_state.lock); 2893 2894 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2895 context_guc_id_invalid(ce) || 2896 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2897 !intel_context_is_pinned(ce); 2898 } 2899 2900 static void guc_context_unblock(struct intel_context *ce) 2901 { 2902 struct intel_guc *guc = ce_to_guc(ce); 2903 unsigned long flags; 2904 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2905 intel_wakeref_t wakeref; 2906 bool enable; 2907 2908 GEM_BUG_ON(context_enabled(ce)); 2909 GEM_BUG_ON(intel_context_is_child(ce)); 2910 2911 spin_lock_irqsave(&ce->guc_state.lock, flags); 2912 2913 if (unlikely(submission_disabled(guc) || 2914 context_cant_unblock(ce))) { 2915 enable = false; 2916 } else { 2917 enable = true; 2918 set_context_pending_enable(ce); 2919 set_context_enabled(ce); 2920 intel_context_get(ce); 2921 } 2922 2923 decr_context_blocked(ce); 2924 2925 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2926 2927 if (enable) { 2928 with_intel_runtime_pm(runtime_pm, wakeref) 2929 __guc_context_sched_enable(guc, ce); 2930 } 2931 } 2932 2933 static void guc_context_cancel_request(struct intel_context *ce, 2934 struct i915_request *rq) 2935 { 2936 struct intel_context *block_context = 2937 request_to_scheduling_context(rq); 2938 2939 if (i915_sw_fence_signaled(&rq->submit)) { 2940 struct i915_sw_fence *fence; 2941 2942 intel_context_get(ce); 2943 fence = guc_context_block(block_context); 2944 i915_sw_fence_wait(fence); 2945 if (!i915_request_completed(rq)) { 2946 __i915_request_skip(rq); 2947 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2948 true); 2949 } 2950 2951 guc_context_unblock(block_context); 2952 intel_context_put(ce); 2953 } 2954 } 2955 2956 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2957 u16 guc_id, 2958 u32 preemption_timeout) 2959 { 2960 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 2961 struct context_policy policy; 2962 2963 __guc_context_policy_start_klv(&policy, guc_id); 2964 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2965 __guc_context_set_context_policies(guc, &policy, true); 2966 } else { 2967 u32 action[] = { 2968 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 2969 guc_id, 2970 preemption_timeout 2971 }; 2972 2973 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2974 } 2975 } 2976 2977 static void 2978 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 2979 unsigned int preempt_timeout_ms) 2980 { 2981 struct intel_guc *guc = ce_to_guc(ce); 2982 struct intel_runtime_pm *runtime_pm = 2983 &ce->engine->gt->i915->runtime_pm; 2984 intel_wakeref_t wakeref; 2985 unsigned long flags; 2986 2987 GEM_BUG_ON(intel_context_is_child(ce)); 2988 2989 guc_flush_submissions(guc); 2990 2991 spin_lock_irqsave(&ce->guc_state.lock, flags); 2992 set_context_banned(ce); 2993 2994 if (submission_disabled(guc) || 2995 (!context_enabled(ce) && !context_pending_disable(ce))) { 2996 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2997 2998 guc_cancel_context_requests(ce); 2999 intel_engine_signal_breadcrumbs(ce->engine); 3000 } else if (!context_pending_disable(ce)) { 3001 u16 guc_id; 3002 3003 /* 3004 * We add +2 here as the schedule disable complete CTB handler 3005 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3006 */ 3007 atomic_add(2, &ce->pin_count); 3008 3009 guc_id = prep_context_pending_disable(ce); 3010 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3011 3012 /* 3013 * In addition to disabling scheduling, set the preemption 3014 * timeout to the minimum value (1 us) so the banned context 3015 * gets kicked off the HW ASAP. 3016 */ 3017 with_intel_runtime_pm(runtime_pm, wakeref) { 3018 __guc_context_set_preemption_timeout(guc, guc_id, 3019 preempt_timeout_ms); 3020 __guc_context_sched_disable(guc, ce, guc_id); 3021 } 3022 } else { 3023 if (!context_guc_id_invalid(ce)) 3024 with_intel_runtime_pm(runtime_pm, wakeref) 3025 __guc_context_set_preemption_timeout(guc, 3026 ce->guc_id.id, 3027 preempt_timeout_ms); 3028 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3029 } 3030 } 3031 3032 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3033 unsigned long flags) 3034 __releases(ce->guc_state.lock) 3035 { 3036 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3037 intel_wakeref_t wakeref; 3038 u16 guc_id; 3039 3040 lockdep_assert_held(&ce->guc_state.lock); 3041 guc_id = prep_context_pending_disable(ce); 3042 3043 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3044 3045 with_intel_runtime_pm(runtime_pm, wakeref) 3046 __guc_context_sched_disable(guc, ce, guc_id); 3047 } 3048 3049 static bool bypass_sched_disable(struct intel_guc *guc, 3050 struct intel_context *ce) 3051 { 3052 lockdep_assert_held(&ce->guc_state.lock); 3053 GEM_BUG_ON(intel_context_is_child(ce)); 3054 3055 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3056 !ctx_id_mapped(guc, ce->guc_id.id)) { 3057 clr_context_enabled(ce); 3058 return true; 3059 } 3060 3061 return !context_enabled(ce); 3062 } 3063 3064 static void __delay_sched_disable(struct work_struct *wrk) 3065 { 3066 struct intel_context *ce = 3067 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3068 struct intel_guc *guc = ce_to_guc(ce); 3069 unsigned long flags; 3070 3071 spin_lock_irqsave(&ce->guc_state.lock, flags); 3072 3073 if (bypass_sched_disable(guc, ce)) { 3074 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3075 intel_context_sched_disable_unpin(ce); 3076 } else { 3077 do_sched_disable(guc, ce, flags); 3078 } 3079 } 3080 3081 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3082 { 3083 /* 3084 * parent contexts are perma-pinned, if we are unpinning do schedule 3085 * disable immediately. 3086 */ 3087 if (intel_context_is_parent(ce)) 3088 return true; 3089 3090 /* 3091 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3092 */ 3093 return guc->submission_state.guc_ids_in_use > 3094 guc->submission_state.sched_disable_gucid_threshold; 3095 } 3096 3097 static void guc_context_sched_disable(struct intel_context *ce) 3098 { 3099 struct intel_guc *guc = ce_to_guc(ce); 3100 u64 delay = guc->submission_state.sched_disable_delay_ms; 3101 unsigned long flags; 3102 3103 spin_lock_irqsave(&ce->guc_state.lock, flags); 3104 3105 if (bypass_sched_disable(guc, ce)) { 3106 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3107 intel_context_sched_disable_unpin(ce); 3108 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3109 delay) { 3110 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3111 mod_delayed_work(system_unbound_wq, 3112 &ce->guc_state.sched_disable_delay_work, 3113 msecs_to_jiffies(delay)); 3114 } else { 3115 do_sched_disable(guc, ce, flags); 3116 } 3117 } 3118 3119 static void guc_context_close(struct intel_context *ce) 3120 { 3121 unsigned long flags; 3122 3123 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3124 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3125 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3126 3127 spin_lock_irqsave(&ce->guc_state.lock, flags); 3128 set_context_close_done(ce); 3129 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3130 } 3131 3132 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3133 { 3134 struct intel_guc *guc = ce_to_guc(ce); 3135 struct intel_gt *gt = guc_to_gt(guc); 3136 unsigned long flags; 3137 bool disabled; 3138 3139 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3140 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3141 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3142 GEM_BUG_ON(context_enabled(ce)); 3143 3144 /* Seal race with Reset */ 3145 spin_lock_irqsave(&ce->guc_state.lock, flags); 3146 disabled = submission_disabled(guc); 3147 if (likely(!disabled)) { 3148 __intel_gt_pm_get(gt); 3149 set_context_destroyed(ce); 3150 clr_context_registered(ce); 3151 } 3152 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3153 if (unlikely(disabled)) { 3154 release_guc_id(guc, ce); 3155 __guc_context_destroy(ce); 3156 return; 3157 } 3158 3159 deregister_context(ce, ce->guc_id.id); 3160 } 3161 3162 static void __guc_context_destroy(struct intel_context *ce) 3163 { 3164 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3165 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3166 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3167 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3168 3169 lrc_fini(ce); 3170 intel_context_fini(ce); 3171 3172 if (intel_engine_is_virtual(ce->engine)) { 3173 struct guc_virtual_engine *ve = 3174 container_of(ce, typeof(*ve), context); 3175 3176 if (ve->base.breadcrumbs) 3177 intel_breadcrumbs_put(ve->base.breadcrumbs); 3178 3179 kfree(ve); 3180 } else { 3181 intel_context_free(ce); 3182 } 3183 } 3184 3185 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3186 { 3187 struct intel_context *ce; 3188 unsigned long flags; 3189 3190 GEM_BUG_ON(!submission_disabled(guc) && 3191 guc_submission_initialized(guc)); 3192 3193 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3194 spin_lock_irqsave(&guc->submission_state.lock, flags); 3195 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3196 struct intel_context, 3197 destroyed_link); 3198 if (ce) 3199 list_del_init(&ce->destroyed_link); 3200 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3201 3202 if (!ce) 3203 break; 3204 3205 release_guc_id(guc, ce); 3206 __guc_context_destroy(ce); 3207 } 3208 } 3209 3210 static void deregister_destroyed_contexts(struct intel_guc *guc) 3211 { 3212 struct intel_context *ce; 3213 unsigned long flags; 3214 3215 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3216 spin_lock_irqsave(&guc->submission_state.lock, flags); 3217 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3218 struct intel_context, 3219 destroyed_link); 3220 if (ce) 3221 list_del_init(&ce->destroyed_link); 3222 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3223 3224 if (!ce) 3225 break; 3226 3227 guc_lrc_desc_unpin(ce); 3228 } 3229 } 3230 3231 static void destroyed_worker_func(struct work_struct *w) 3232 { 3233 struct intel_guc *guc = container_of(w, struct intel_guc, 3234 submission_state.destroyed_worker); 3235 struct intel_gt *gt = guc_to_gt(guc); 3236 int tmp; 3237 3238 with_intel_gt_pm(gt, tmp) 3239 deregister_destroyed_contexts(guc); 3240 } 3241 3242 static void guc_context_destroy(struct kref *kref) 3243 { 3244 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3245 struct intel_guc *guc = ce_to_guc(ce); 3246 unsigned long flags; 3247 bool destroy; 3248 3249 /* 3250 * If the guc_id is invalid this context has been stolen and we can free 3251 * it immediately. Also can be freed immediately if the context is not 3252 * registered with the GuC or the GuC is in the middle of a reset. 3253 */ 3254 spin_lock_irqsave(&guc->submission_state.lock, flags); 3255 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3256 !ctx_id_mapped(guc, ce->guc_id.id); 3257 if (likely(!destroy)) { 3258 if (!list_empty(&ce->guc_id.link)) 3259 list_del_init(&ce->guc_id.link); 3260 list_add_tail(&ce->destroyed_link, 3261 &guc->submission_state.destroyed_contexts); 3262 } else { 3263 __release_guc_id(guc, ce); 3264 } 3265 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3266 if (unlikely(destroy)) { 3267 __guc_context_destroy(ce); 3268 return; 3269 } 3270 3271 /* 3272 * We use a worker to issue the H2G to deregister the context as we can 3273 * take the GT PM for the first time which isn't allowed from an atomic 3274 * context. 3275 */ 3276 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3277 } 3278 3279 static int guc_context_alloc(struct intel_context *ce) 3280 { 3281 return lrc_alloc(ce, ce->engine); 3282 } 3283 3284 static void __guc_context_set_prio(struct intel_guc *guc, 3285 struct intel_context *ce) 3286 { 3287 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3288 struct context_policy policy; 3289 3290 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3291 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3292 __guc_context_set_context_policies(guc, &policy, true); 3293 } else { 3294 u32 action[] = { 3295 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3296 ce->guc_id.id, 3297 ce->guc_state.prio, 3298 }; 3299 3300 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3301 } 3302 } 3303 3304 static void guc_context_set_prio(struct intel_guc *guc, 3305 struct intel_context *ce, 3306 u8 prio) 3307 { 3308 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3309 prio > GUC_CLIENT_PRIORITY_NORMAL); 3310 lockdep_assert_held(&ce->guc_state.lock); 3311 3312 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3313 !context_registered(ce)) { 3314 ce->guc_state.prio = prio; 3315 return; 3316 } 3317 3318 ce->guc_state.prio = prio; 3319 __guc_context_set_prio(guc, ce); 3320 3321 trace_intel_context_set_prio(ce); 3322 } 3323 3324 static inline u8 map_i915_prio_to_guc_prio(int prio) 3325 { 3326 if (prio == I915_PRIORITY_NORMAL) 3327 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3328 else if (prio < I915_PRIORITY_NORMAL) 3329 return GUC_CLIENT_PRIORITY_NORMAL; 3330 else if (prio < I915_PRIORITY_DISPLAY) 3331 return GUC_CLIENT_PRIORITY_HIGH; 3332 else 3333 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3334 } 3335 3336 static inline void add_context_inflight_prio(struct intel_context *ce, 3337 u8 guc_prio) 3338 { 3339 lockdep_assert_held(&ce->guc_state.lock); 3340 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3341 3342 ++ce->guc_state.prio_count[guc_prio]; 3343 3344 /* Overflow protection */ 3345 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3346 } 3347 3348 static inline void sub_context_inflight_prio(struct intel_context *ce, 3349 u8 guc_prio) 3350 { 3351 lockdep_assert_held(&ce->guc_state.lock); 3352 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3353 3354 /* Underflow protection */ 3355 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3356 3357 --ce->guc_state.prio_count[guc_prio]; 3358 } 3359 3360 static inline void update_context_prio(struct intel_context *ce) 3361 { 3362 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3363 int i; 3364 3365 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3366 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3367 3368 lockdep_assert_held(&ce->guc_state.lock); 3369 3370 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3371 if (ce->guc_state.prio_count[i]) { 3372 guc_context_set_prio(guc, ce, i); 3373 break; 3374 } 3375 } 3376 } 3377 3378 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3379 { 3380 /* Lower value is higher priority */ 3381 return new_guc_prio < old_guc_prio; 3382 } 3383 3384 static void add_to_context(struct i915_request *rq) 3385 { 3386 struct intel_context *ce = request_to_scheduling_context(rq); 3387 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3388 3389 GEM_BUG_ON(intel_context_is_child(ce)); 3390 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3391 3392 spin_lock(&ce->guc_state.lock); 3393 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3394 3395 if (rq->guc_prio == GUC_PRIO_INIT) { 3396 rq->guc_prio = new_guc_prio; 3397 add_context_inflight_prio(ce, rq->guc_prio); 3398 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3399 sub_context_inflight_prio(ce, rq->guc_prio); 3400 rq->guc_prio = new_guc_prio; 3401 add_context_inflight_prio(ce, rq->guc_prio); 3402 } 3403 update_context_prio(ce); 3404 3405 spin_unlock(&ce->guc_state.lock); 3406 } 3407 3408 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3409 { 3410 lockdep_assert_held(&ce->guc_state.lock); 3411 3412 if (rq->guc_prio != GUC_PRIO_INIT && 3413 rq->guc_prio != GUC_PRIO_FINI) { 3414 sub_context_inflight_prio(ce, rq->guc_prio); 3415 update_context_prio(ce); 3416 } 3417 rq->guc_prio = GUC_PRIO_FINI; 3418 } 3419 3420 static void remove_from_context(struct i915_request *rq) 3421 { 3422 struct intel_context *ce = request_to_scheduling_context(rq); 3423 3424 GEM_BUG_ON(intel_context_is_child(ce)); 3425 3426 spin_lock_irq(&ce->guc_state.lock); 3427 3428 list_del_init(&rq->sched.link); 3429 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3430 3431 /* Prevent further __await_execution() registering a cb, then flush */ 3432 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3433 3434 guc_prio_fini(rq, ce); 3435 3436 spin_unlock_irq(&ce->guc_state.lock); 3437 3438 atomic_dec(&ce->guc_id.ref); 3439 i915_request_notify_execute_cb_imm(rq); 3440 } 3441 3442 static const struct intel_context_ops guc_context_ops = { 3443 .alloc = guc_context_alloc, 3444 3445 .close = guc_context_close, 3446 3447 .pre_pin = guc_context_pre_pin, 3448 .pin = guc_context_pin, 3449 .unpin = guc_context_unpin, 3450 .post_unpin = guc_context_post_unpin, 3451 3452 .revoke = guc_context_revoke, 3453 3454 .cancel_request = guc_context_cancel_request, 3455 3456 .enter = intel_context_enter_engine, 3457 .exit = intel_context_exit_engine, 3458 3459 .sched_disable = guc_context_sched_disable, 3460 3461 .reset = lrc_reset, 3462 .destroy = guc_context_destroy, 3463 3464 .create_virtual = guc_create_virtual, 3465 .create_parallel = guc_create_parallel, 3466 }; 3467 3468 static void submit_work_cb(struct irq_work *wrk) 3469 { 3470 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3471 3472 might_lock(&rq->engine->sched_engine->lock); 3473 i915_sw_fence_complete(&rq->submit); 3474 } 3475 3476 static void __guc_signal_context_fence(struct intel_context *ce) 3477 { 3478 struct i915_request *rq, *rn; 3479 3480 lockdep_assert_held(&ce->guc_state.lock); 3481 3482 if (!list_empty(&ce->guc_state.fences)) 3483 trace_intel_context_fence_release(ce); 3484 3485 /* 3486 * Use an IRQ to ensure locking order of sched_engine->lock -> 3487 * ce->guc_state.lock is preserved. 3488 */ 3489 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3490 guc_fence_link) { 3491 list_del(&rq->guc_fence_link); 3492 irq_work_queue(&rq->submit_work); 3493 } 3494 3495 INIT_LIST_HEAD(&ce->guc_state.fences); 3496 } 3497 3498 static void guc_signal_context_fence(struct intel_context *ce) 3499 { 3500 unsigned long flags; 3501 3502 GEM_BUG_ON(intel_context_is_child(ce)); 3503 3504 spin_lock_irqsave(&ce->guc_state.lock, flags); 3505 clr_context_wait_for_deregister_to_register(ce); 3506 __guc_signal_context_fence(ce); 3507 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3508 } 3509 3510 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3511 { 3512 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3513 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3514 !submission_disabled(ce_to_guc(ce)); 3515 } 3516 3517 static void guc_context_init(struct intel_context *ce) 3518 { 3519 const struct i915_gem_context *ctx; 3520 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3521 3522 rcu_read_lock(); 3523 ctx = rcu_dereference(ce->gem_context); 3524 if (ctx) 3525 prio = ctx->sched.priority; 3526 rcu_read_unlock(); 3527 3528 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3529 3530 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3531 __delay_sched_disable); 3532 3533 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3534 } 3535 3536 static int guc_request_alloc(struct i915_request *rq) 3537 { 3538 struct intel_context *ce = request_to_scheduling_context(rq); 3539 struct intel_guc *guc = ce_to_guc(ce); 3540 unsigned long flags; 3541 int ret; 3542 3543 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3544 3545 /* 3546 * Flush enough space to reduce the likelihood of waiting after 3547 * we start building the request - in which case we will just 3548 * have to repeat work. 3549 */ 3550 rq->reserved_space += GUC_REQUEST_SIZE; 3551 3552 /* 3553 * Note that after this point, we have committed to using 3554 * this request as it is being used to both track the 3555 * state of engine initialisation and liveness of the 3556 * golden renderstate above. Think twice before you try 3557 * to cancel/unwind this request now. 3558 */ 3559 3560 /* Unconditionally invalidate GPU caches and TLBs. */ 3561 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3562 if (ret) 3563 return ret; 3564 3565 rq->reserved_space -= GUC_REQUEST_SIZE; 3566 3567 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3568 guc_context_init(ce); 3569 3570 /* 3571 * If the context gets closed while the execbuf is ongoing, the context 3572 * close code will race with the below code to cancel the delayed work. 3573 * If the context close wins the race and cancels the work, it will 3574 * immediately call the sched disable (see guc_context_close), so there 3575 * is a chance we can get past this check while the sched_disable code 3576 * is being executed. To make sure that code completes before we check 3577 * the status further down, we wait for the close process to complete. 3578 * Else, this code path could send a request down thinking that the 3579 * context is still in a schedule-enable mode while the GuC ends up 3580 * dropping the request completely because the disable did go from the 3581 * context_close path right to GuC just prior. In the event the CT is 3582 * full, we could potentially need to wait up to 1.5 seconds. 3583 */ 3584 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3585 intel_context_sched_disable_unpin(ce); 3586 else if (intel_context_is_closed(ce)) 3587 if (wait_for(context_close_done(ce), 1500)) 3588 drm_warn(&guc_to_gt(guc)->i915->drm, 3589 "timed out waiting on context sched close before realloc\n"); 3590 /* 3591 * Call pin_guc_id here rather than in the pinning step as with 3592 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3593 * guc_id and creating horrible race conditions. This is especially bad 3594 * when guc_id are being stolen due to over subscription. By the time 3595 * this function is reached, it is guaranteed that the guc_id will be 3596 * persistent until the generated request is retired. Thus, sealing these 3597 * race conditions. It is still safe to fail here if guc_id are 3598 * exhausted and return -EAGAIN to the user indicating that they can try 3599 * again in the future. 3600 * 3601 * There is no need for a lock here as the timeline mutex ensures at 3602 * most one context can be executing this code path at once. The 3603 * guc_id_ref is incremented once for every request in flight and 3604 * decremented on each retire. When it is zero, a lock around the 3605 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3606 */ 3607 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3608 goto out; 3609 3610 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3611 if (unlikely(ret < 0)) 3612 return ret; 3613 if (context_needs_register(ce, !!ret)) { 3614 ret = try_context_registration(ce, true); 3615 if (unlikely(ret)) { /* unwind */ 3616 if (ret == -EPIPE) { 3617 disable_submission(guc); 3618 goto out; /* GPU will be reset */ 3619 } 3620 atomic_dec(&ce->guc_id.ref); 3621 unpin_guc_id(guc, ce); 3622 return ret; 3623 } 3624 } 3625 3626 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3627 3628 out: 3629 /* 3630 * We block all requests on this context if a G2H is pending for a 3631 * schedule disable or context deregistration as the GuC will fail a 3632 * schedule enable or context registration if either G2H is pending 3633 * respectfully. Once a G2H returns, the fence is released that is 3634 * blocking these requests (see guc_signal_context_fence). 3635 */ 3636 spin_lock_irqsave(&ce->guc_state.lock, flags); 3637 if (context_wait_for_deregister_to_register(ce) || 3638 context_pending_disable(ce)) { 3639 init_irq_work(&rq->submit_work, submit_work_cb); 3640 i915_sw_fence_await(&rq->submit); 3641 3642 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3643 } 3644 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3645 3646 return 0; 3647 } 3648 3649 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3650 struct i915_gem_ww_ctx *ww, 3651 void **vaddr) 3652 { 3653 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3654 3655 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3656 } 3657 3658 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3659 { 3660 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3661 int ret = __guc_context_pin(ce, engine, vaddr); 3662 intel_engine_mask_t tmp, mask = ce->engine->mask; 3663 3664 if (likely(!ret)) 3665 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3666 intel_engine_pm_get(engine); 3667 3668 return ret; 3669 } 3670 3671 static void guc_virtual_context_unpin(struct intel_context *ce) 3672 { 3673 intel_engine_mask_t tmp, mask = ce->engine->mask; 3674 struct intel_engine_cs *engine; 3675 struct intel_guc *guc = ce_to_guc(ce); 3676 3677 GEM_BUG_ON(context_enabled(ce)); 3678 GEM_BUG_ON(intel_context_is_barrier(ce)); 3679 3680 unpin_guc_id(guc, ce); 3681 lrc_unpin(ce); 3682 3683 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3684 intel_engine_pm_put_async(engine); 3685 } 3686 3687 static void guc_virtual_context_enter(struct intel_context *ce) 3688 { 3689 intel_engine_mask_t tmp, mask = ce->engine->mask; 3690 struct intel_engine_cs *engine; 3691 3692 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3693 intel_engine_pm_get(engine); 3694 3695 intel_timeline_enter(ce->timeline); 3696 } 3697 3698 static void guc_virtual_context_exit(struct intel_context *ce) 3699 { 3700 intel_engine_mask_t tmp, mask = ce->engine->mask; 3701 struct intel_engine_cs *engine; 3702 3703 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3704 intel_engine_pm_put(engine); 3705 3706 intel_timeline_exit(ce->timeline); 3707 } 3708 3709 static int guc_virtual_context_alloc(struct intel_context *ce) 3710 { 3711 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3712 3713 return lrc_alloc(ce, engine); 3714 } 3715 3716 static const struct intel_context_ops virtual_guc_context_ops = { 3717 .alloc = guc_virtual_context_alloc, 3718 3719 .close = guc_context_close, 3720 3721 .pre_pin = guc_virtual_context_pre_pin, 3722 .pin = guc_virtual_context_pin, 3723 .unpin = guc_virtual_context_unpin, 3724 .post_unpin = guc_context_post_unpin, 3725 3726 .revoke = guc_context_revoke, 3727 3728 .cancel_request = guc_context_cancel_request, 3729 3730 .enter = guc_virtual_context_enter, 3731 .exit = guc_virtual_context_exit, 3732 3733 .sched_disable = guc_context_sched_disable, 3734 3735 .destroy = guc_context_destroy, 3736 3737 .get_sibling = guc_virtual_get_sibling, 3738 }; 3739 3740 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3741 { 3742 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3743 struct intel_guc *guc = ce_to_guc(ce); 3744 int ret; 3745 3746 GEM_BUG_ON(!intel_context_is_parent(ce)); 3747 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3748 3749 ret = pin_guc_id(guc, ce); 3750 if (unlikely(ret < 0)) 3751 return ret; 3752 3753 return __guc_context_pin(ce, engine, vaddr); 3754 } 3755 3756 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3757 { 3758 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3759 3760 GEM_BUG_ON(!intel_context_is_child(ce)); 3761 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3762 3763 __intel_context_pin(ce->parallel.parent); 3764 return __guc_context_pin(ce, engine, vaddr); 3765 } 3766 3767 static void guc_parent_context_unpin(struct intel_context *ce) 3768 { 3769 struct intel_guc *guc = ce_to_guc(ce); 3770 3771 GEM_BUG_ON(context_enabled(ce)); 3772 GEM_BUG_ON(intel_context_is_barrier(ce)); 3773 GEM_BUG_ON(!intel_context_is_parent(ce)); 3774 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3775 3776 unpin_guc_id(guc, ce); 3777 lrc_unpin(ce); 3778 } 3779 3780 static void guc_child_context_unpin(struct intel_context *ce) 3781 { 3782 GEM_BUG_ON(context_enabled(ce)); 3783 GEM_BUG_ON(intel_context_is_barrier(ce)); 3784 GEM_BUG_ON(!intel_context_is_child(ce)); 3785 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3786 3787 lrc_unpin(ce); 3788 } 3789 3790 static void guc_child_context_post_unpin(struct intel_context *ce) 3791 { 3792 GEM_BUG_ON(!intel_context_is_child(ce)); 3793 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3794 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3795 3796 lrc_post_unpin(ce); 3797 intel_context_unpin(ce->parallel.parent); 3798 } 3799 3800 static void guc_child_context_destroy(struct kref *kref) 3801 { 3802 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3803 3804 __guc_context_destroy(ce); 3805 } 3806 3807 static const struct intel_context_ops virtual_parent_context_ops = { 3808 .alloc = guc_virtual_context_alloc, 3809 3810 .close = guc_context_close, 3811 3812 .pre_pin = guc_context_pre_pin, 3813 .pin = guc_parent_context_pin, 3814 .unpin = guc_parent_context_unpin, 3815 .post_unpin = guc_context_post_unpin, 3816 3817 .revoke = guc_context_revoke, 3818 3819 .cancel_request = guc_context_cancel_request, 3820 3821 .enter = guc_virtual_context_enter, 3822 .exit = guc_virtual_context_exit, 3823 3824 .sched_disable = guc_context_sched_disable, 3825 3826 .destroy = guc_context_destroy, 3827 3828 .get_sibling = guc_virtual_get_sibling, 3829 }; 3830 3831 static const struct intel_context_ops virtual_child_context_ops = { 3832 .alloc = guc_virtual_context_alloc, 3833 3834 .pre_pin = guc_context_pre_pin, 3835 .pin = guc_child_context_pin, 3836 .unpin = guc_child_context_unpin, 3837 .post_unpin = guc_child_context_post_unpin, 3838 3839 .cancel_request = guc_context_cancel_request, 3840 3841 .enter = guc_virtual_context_enter, 3842 .exit = guc_virtual_context_exit, 3843 3844 .destroy = guc_child_context_destroy, 3845 3846 .get_sibling = guc_virtual_get_sibling, 3847 }; 3848 3849 /* 3850 * The below override of the breadcrumbs is enabled when the user configures a 3851 * context for parallel submission (multi-lrc, parent-child). 3852 * 3853 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3854 * safely preempt all the hw contexts configured for parallel submission 3855 * between each BB. The contract between the i915 and GuC is if the parent 3856 * context can be preempted, all the children can be preempted, and the GuC will 3857 * always try to preempt the parent before the children. A handshake between the 3858 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3859 * creating a window to preempt between each set of BBs. 3860 */ 3861 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3862 u64 offset, u32 len, 3863 const unsigned int flags); 3864 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3865 u64 offset, u32 len, 3866 const unsigned int flags); 3867 static u32 * 3868 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3869 u32 *cs); 3870 static u32 * 3871 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3872 u32 *cs); 3873 3874 static struct intel_context * 3875 guc_create_parallel(struct intel_engine_cs **engines, 3876 unsigned int num_siblings, 3877 unsigned int width) 3878 { 3879 struct intel_engine_cs **siblings = NULL; 3880 struct intel_context *parent = NULL, *ce, *err; 3881 int i, j; 3882 3883 siblings = kmalloc_array(num_siblings, 3884 sizeof(*siblings), 3885 GFP_KERNEL); 3886 if (!siblings) 3887 return ERR_PTR(-ENOMEM); 3888 3889 for (i = 0; i < width; ++i) { 3890 for (j = 0; j < num_siblings; ++j) 3891 siblings[j] = engines[i * num_siblings + j]; 3892 3893 ce = intel_engine_create_virtual(siblings, num_siblings, 3894 FORCE_VIRTUAL); 3895 if (IS_ERR(ce)) { 3896 err = ERR_CAST(ce); 3897 goto unwind; 3898 } 3899 3900 if (i == 0) { 3901 parent = ce; 3902 parent->ops = &virtual_parent_context_ops; 3903 } else { 3904 ce->ops = &virtual_child_context_ops; 3905 intel_context_bind_parent_child(parent, ce); 3906 } 3907 } 3908 3909 parent->parallel.fence_context = dma_fence_context_alloc(1); 3910 3911 parent->engine->emit_bb_start = 3912 emit_bb_start_parent_no_preempt_mid_batch; 3913 parent->engine->emit_fini_breadcrumb = 3914 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3915 parent->engine->emit_fini_breadcrumb_dw = 3916 12 + 4 * parent->parallel.number_children; 3917 for_each_child(parent, ce) { 3918 ce->engine->emit_bb_start = 3919 emit_bb_start_child_no_preempt_mid_batch; 3920 ce->engine->emit_fini_breadcrumb = 3921 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3922 ce->engine->emit_fini_breadcrumb_dw = 16; 3923 } 3924 3925 kfree(siblings); 3926 return parent; 3927 3928 unwind: 3929 if (parent) 3930 intel_context_put(parent); 3931 kfree(siblings); 3932 return err; 3933 } 3934 3935 static bool 3936 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3937 { 3938 struct intel_engine_cs *sibling; 3939 intel_engine_mask_t tmp, mask = b->engine_mask; 3940 bool result = false; 3941 3942 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3943 result |= intel_engine_irq_enable(sibling); 3944 3945 return result; 3946 } 3947 3948 static void 3949 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3950 { 3951 struct intel_engine_cs *sibling; 3952 intel_engine_mask_t tmp, mask = b->engine_mask; 3953 3954 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3955 intel_engine_irq_disable(sibling); 3956 } 3957 3958 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3959 { 3960 int i; 3961 3962 /* 3963 * In GuC submission mode we do not know which physical engine a request 3964 * will be scheduled on, this creates a problem because the breadcrumb 3965 * interrupt is per physical engine. To work around this we attach 3966 * requests and direct all breadcrumb interrupts to the first instance 3967 * of an engine per class. In addition all breadcrumb interrupts are 3968 * enabled / disabled across an engine class in unison. 3969 */ 3970 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3971 struct intel_engine_cs *sibling = 3972 engine->gt->engine_class[engine->class][i]; 3973 3974 if (sibling) { 3975 if (engine->breadcrumbs != sibling->breadcrumbs) { 3976 intel_breadcrumbs_put(engine->breadcrumbs); 3977 engine->breadcrumbs = 3978 intel_breadcrumbs_get(sibling->breadcrumbs); 3979 } 3980 break; 3981 } 3982 } 3983 3984 if (engine->breadcrumbs) { 3985 engine->breadcrumbs->engine_mask |= engine->mask; 3986 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3987 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3988 } 3989 } 3990 3991 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3992 int prio) 3993 { 3994 struct intel_context *ce = request_to_scheduling_context(rq); 3995 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3996 3997 /* Short circuit function */ 3998 if (prio < I915_PRIORITY_NORMAL || 3999 rq->guc_prio == GUC_PRIO_FINI || 4000 (rq->guc_prio != GUC_PRIO_INIT && 4001 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4002 return; 4003 4004 spin_lock(&ce->guc_state.lock); 4005 if (rq->guc_prio != GUC_PRIO_FINI) { 4006 if (rq->guc_prio != GUC_PRIO_INIT) 4007 sub_context_inflight_prio(ce, rq->guc_prio); 4008 rq->guc_prio = new_guc_prio; 4009 add_context_inflight_prio(ce, rq->guc_prio); 4010 update_context_prio(ce); 4011 } 4012 spin_unlock(&ce->guc_state.lock); 4013 } 4014 4015 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4016 { 4017 struct intel_context *ce = request_to_scheduling_context(rq); 4018 4019 spin_lock(&ce->guc_state.lock); 4020 guc_prio_fini(rq, ce); 4021 spin_unlock(&ce->guc_state.lock); 4022 } 4023 4024 static void sanitize_hwsp(struct intel_engine_cs *engine) 4025 { 4026 struct intel_timeline *tl; 4027 4028 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4029 intel_timeline_reset_seqno(tl); 4030 } 4031 4032 static void guc_sanitize(struct intel_engine_cs *engine) 4033 { 4034 /* 4035 * Poison residual state on resume, in case the suspend didn't! 4036 * 4037 * We have to assume that across suspend/resume (or other loss 4038 * of control) that the contents of our pinned buffers has been 4039 * lost, replaced by garbage. Since this doesn't always happen, 4040 * let's poison such state so that we more quickly spot when 4041 * we falsely assume it has been preserved. 4042 */ 4043 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4044 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4045 4046 /* 4047 * The kernel_context HWSP is stored in the status_page. As above, 4048 * that may be lost on resume/initialisation, and so we need to 4049 * reset the value in the HWSP. 4050 */ 4051 sanitize_hwsp(engine); 4052 4053 /* And scrub the dirty cachelines for the HWSP */ 4054 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4055 4056 intel_engine_reset_pinned_contexts(engine); 4057 } 4058 4059 static void setup_hwsp(struct intel_engine_cs *engine) 4060 { 4061 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4062 4063 ENGINE_WRITE_FW(engine, 4064 RING_HWS_PGA, 4065 i915_ggtt_offset(engine->status_page.vma)); 4066 } 4067 4068 static void start_engine(struct intel_engine_cs *engine) 4069 { 4070 ENGINE_WRITE_FW(engine, 4071 RING_MODE_GEN7, 4072 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4073 4074 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4075 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4076 } 4077 4078 static int guc_resume(struct intel_engine_cs *engine) 4079 { 4080 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4081 4082 intel_mocs_init_engine(engine); 4083 4084 intel_breadcrumbs_reset(engine->breadcrumbs); 4085 4086 setup_hwsp(engine); 4087 start_engine(engine); 4088 4089 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4090 xehp_enable_ccs_engines(engine); 4091 4092 return 0; 4093 } 4094 4095 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4096 { 4097 return !sched_engine->tasklet.callback; 4098 } 4099 4100 static void guc_set_default_submission(struct intel_engine_cs *engine) 4101 { 4102 engine->submit_request = guc_submit_request; 4103 } 4104 4105 static inline void guc_kernel_context_pin(struct intel_guc *guc, 4106 struct intel_context *ce) 4107 { 4108 /* 4109 * Note: we purposefully do not check the returns below because 4110 * the registration can only fail if a reset is just starting. 4111 * This is called at the end of reset so presumably another reset 4112 * isn't happening and even it did this code would be run again. 4113 */ 4114 4115 if (context_guc_id_invalid(ce)) 4116 pin_guc_id(guc, ce); 4117 4118 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4119 guc_context_init(ce); 4120 4121 try_context_registration(ce, true); 4122 } 4123 4124 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 4125 { 4126 struct intel_gt *gt = guc_to_gt(guc); 4127 struct intel_engine_cs *engine; 4128 enum intel_engine_id id; 4129 4130 /* make sure all descriptors are clean... */ 4131 xa_destroy(&guc->context_lookup); 4132 4133 /* 4134 * A reset might have occurred while we had a pending stalled request, 4135 * so make sure we clean that up. 4136 */ 4137 guc->stalled_request = NULL; 4138 guc->submission_stall_reason = STALL_NONE; 4139 4140 /* 4141 * Some contexts might have been pinned before we enabled GuC 4142 * submission, so we need to add them to the GuC bookeeping. 4143 * Also, after a reset the of the GuC we want to make sure that the 4144 * information shared with GuC is properly reset. The kernel LRCs are 4145 * not attached to the gem_context, so they need to be added separately. 4146 */ 4147 for_each_engine(engine, gt, id) { 4148 struct intel_context *ce; 4149 4150 list_for_each_entry(ce, &engine->pinned_contexts_list, 4151 pinned_contexts_link) 4152 guc_kernel_context_pin(guc, ce); 4153 } 4154 } 4155 4156 static void guc_release(struct intel_engine_cs *engine) 4157 { 4158 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4159 4160 intel_engine_cleanup_common(engine); 4161 lrc_fini_wa_ctx(engine); 4162 } 4163 4164 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4165 { 4166 struct intel_engine_cs *e; 4167 intel_engine_mask_t tmp, mask = engine->mask; 4168 4169 for_each_engine_masked(e, engine->gt, mask, tmp) 4170 e->serial++; 4171 } 4172 4173 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4174 { 4175 /* Default vfuncs which can be overridden by each engine. */ 4176 4177 engine->resume = guc_resume; 4178 4179 engine->cops = &guc_context_ops; 4180 engine->request_alloc = guc_request_alloc; 4181 engine->add_active_request = add_to_context; 4182 engine->remove_active_request = remove_from_context; 4183 4184 engine->sched_engine->schedule = i915_schedule; 4185 4186 engine->reset.prepare = guc_engine_reset_prepare; 4187 engine->reset.rewind = guc_rewind_nop; 4188 engine->reset.cancel = guc_reset_nop; 4189 engine->reset.finish = guc_reset_nop; 4190 4191 engine->emit_flush = gen8_emit_flush_xcs; 4192 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4193 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4194 if (GRAPHICS_VER(engine->i915) >= 12) { 4195 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4196 engine->emit_flush = gen12_emit_flush_xcs; 4197 } 4198 engine->set_default_submission = guc_set_default_submission; 4199 engine->busyness = guc_engine_busyness; 4200 4201 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4202 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4203 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4204 4205 /* Wa_14014475959:dg2 */ 4206 if (engine->class == COMPUTE_CLASS) 4207 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || 4208 IS_DG2(engine->i915)) 4209 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4210 4211 /* 4212 * TODO: GuC supports timeslicing and semaphores as well, but they're 4213 * handled by the firmware so some minor tweaks are required before 4214 * enabling. 4215 * 4216 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4217 */ 4218 4219 engine->emit_bb_start = gen8_emit_bb_start; 4220 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4221 engine->emit_bb_start = xehp_emit_bb_start; 4222 } 4223 4224 static void rcs_submission_override(struct intel_engine_cs *engine) 4225 { 4226 switch (GRAPHICS_VER(engine->i915)) { 4227 case 12: 4228 engine->emit_flush = gen12_emit_flush_rcs; 4229 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4230 break; 4231 case 11: 4232 engine->emit_flush = gen11_emit_flush_rcs; 4233 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4234 break; 4235 default: 4236 engine->emit_flush = gen8_emit_flush_rcs; 4237 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4238 break; 4239 } 4240 } 4241 4242 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4243 { 4244 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4245 intel_engine_set_irq_handler(engine, cs_irq_handler); 4246 } 4247 4248 static void guc_sched_engine_destroy(struct kref *kref) 4249 { 4250 struct i915_sched_engine *sched_engine = 4251 container_of(kref, typeof(*sched_engine), ref); 4252 struct intel_guc *guc = sched_engine->private_data; 4253 4254 guc->sched_engine = NULL; 4255 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4256 kfree(sched_engine); 4257 } 4258 4259 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4260 { 4261 struct drm_i915_private *i915 = engine->i915; 4262 struct intel_guc *guc = &engine->gt->uc.guc; 4263 4264 /* 4265 * The setup relies on several assumptions (e.g. irqs always enabled) 4266 * that are only valid on gen11+ 4267 */ 4268 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4269 4270 if (!guc->sched_engine) { 4271 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4272 if (!guc->sched_engine) 4273 return -ENOMEM; 4274 4275 guc->sched_engine->schedule = i915_schedule; 4276 guc->sched_engine->disabled = guc_sched_engine_disabled; 4277 guc->sched_engine->private_data = guc; 4278 guc->sched_engine->destroy = guc_sched_engine_destroy; 4279 guc->sched_engine->bump_inflight_request_prio = 4280 guc_bump_inflight_request_prio; 4281 guc->sched_engine->retire_inflight_request_prio = 4282 guc_retire_inflight_request_prio; 4283 tasklet_setup(&guc->sched_engine->tasklet, 4284 guc_submission_tasklet); 4285 } 4286 i915_sched_engine_put(engine->sched_engine); 4287 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4288 4289 guc_default_vfuncs(engine); 4290 guc_default_irqs(engine); 4291 guc_init_breadcrumbs(engine); 4292 4293 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4294 rcs_submission_override(engine); 4295 4296 lrc_init_wa_ctx(engine); 4297 4298 /* Finally, take ownership and responsibility for cleanup! */ 4299 engine->sanitize = guc_sanitize; 4300 engine->release = guc_release; 4301 4302 return 0; 4303 } 4304 4305 struct scheduling_policy { 4306 /* internal data */ 4307 u32 max_words, num_words; 4308 u32 count; 4309 /* API data */ 4310 struct guc_update_scheduling_policy h2g; 4311 }; 4312 4313 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4314 { 4315 u32 *start = (void *)&policy->h2g; 4316 u32 *end = policy->h2g.data + policy->num_words; 4317 size_t delta = end - start; 4318 4319 return delta; 4320 } 4321 4322 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4323 { 4324 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4325 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4326 policy->num_words = 0; 4327 policy->count = 0; 4328 4329 return policy; 4330 } 4331 4332 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4333 u32 action, u32 *data, u32 len) 4334 { 4335 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4336 4337 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4338 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4339 FIELD_PREP(GUC_KLV_0_LEN, len); 4340 memcpy(klv_ptr, data, sizeof(u32) * len); 4341 policy->num_words += 1 + len; 4342 policy->count++; 4343 } 4344 4345 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4346 struct scheduling_policy *policy) 4347 { 4348 int ret; 4349 4350 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4351 __guc_scheduling_policy_action_size(policy)); 4352 if (ret < 0) 4353 return ret; 4354 4355 if (ret != policy->count) { 4356 drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!", 4357 ret, policy->count); 4358 if (ret > policy->count) 4359 return -EPROTO; 4360 } 4361 4362 return 0; 4363 } 4364 4365 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4366 { 4367 struct scheduling_policy policy; 4368 struct intel_gt *gt = guc_to_gt(guc); 4369 intel_wakeref_t wakeref; 4370 int ret = 0; 4371 4372 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4373 return 0; 4374 4375 __guc_scheduling_policy_start_klv(&policy); 4376 4377 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4378 u32 yield[] = { 4379 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4380 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4381 }; 4382 4383 __guc_scheduling_policy_add_klv(&policy, 4384 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4385 yield, ARRAY_SIZE(yield)); 4386 4387 ret = __guc_action_set_scheduling_policies(guc, &policy); 4388 if (ret) 4389 i915_probe_error(gt->i915, 4390 "Failed to configure global scheduling policies: %pe!\n", 4391 ERR_PTR(ret)); 4392 } 4393 4394 return ret; 4395 } 4396 4397 void intel_guc_submission_enable(struct intel_guc *guc) 4398 { 4399 struct intel_gt *gt = guc_to_gt(guc); 4400 4401 /* Enable and route to GuC */ 4402 if (GRAPHICS_VER(gt->i915) >= 12) 4403 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 4404 GUC_SEM_INTR_ROUTE_TO_GUC | 4405 GUC_SEM_INTR_ENABLE_ALL); 4406 4407 guc_init_lrc_mapping(guc); 4408 guc_init_engine_stats(guc); 4409 guc_init_global_schedule_policy(guc); 4410 } 4411 4412 void intel_guc_submission_disable(struct intel_guc *guc) 4413 { 4414 struct intel_gt *gt = guc_to_gt(guc); 4415 4416 /* Note: By the time we're here, GuC may have already been reset */ 4417 4418 /* Disable and route to host */ 4419 if (GRAPHICS_VER(gt->i915) >= 12) 4420 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); 4421 } 4422 4423 static bool __guc_submission_supported(struct intel_guc *guc) 4424 { 4425 /* GuC submission is unavailable for pre-Gen11 */ 4426 return intel_guc_is_supported(guc) && 4427 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4428 } 4429 4430 static bool __guc_submission_selected(struct intel_guc *guc) 4431 { 4432 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4433 4434 if (!intel_guc_submission_is_supported(guc)) 4435 return false; 4436 4437 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4438 } 4439 4440 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4441 { 4442 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4443 } 4444 4445 /* 4446 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4447 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4448 * operation. This matches the 30fps game-render + encode (real world) workload this 4449 * knob was tested against. 4450 */ 4451 #define SCHED_DISABLE_DELAY_MS 34 4452 4453 /* 4454 * A threshold of 75% is a reasonable starting point considering that real world apps 4455 * generally don't get anywhere near this. 4456 */ 4457 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4458 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4459 4460 void intel_guc_submission_init_early(struct intel_guc *guc) 4461 { 4462 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4463 4464 spin_lock_init(&guc->submission_state.lock); 4465 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4466 ida_init(&guc->submission_state.guc_ids); 4467 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4468 INIT_WORK(&guc->submission_state.destroyed_worker, 4469 destroyed_worker_func); 4470 INIT_WORK(&guc->submission_state.reset_fail_worker, 4471 reset_fail_worker_func); 4472 4473 spin_lock_init(&guc->timestamp.lock); 4474 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4475 4476 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4477 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4478 guc->submission_state.sched_disable_gucid_threshold = 4479 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4480 guc->submission_supported = __guc_submission_supported(guc); 4481 guc->submission_selected = __guc_submission_selected(guc); 4482 } 4483 4484 static inline struct intel_context * 4485 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4486 { 4487 struct intel_context *ce; 4488 4489 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4490 drm_err(&guc_to_gt(guc)->i915->drm, 4491 "Invalid ctx_id %u\n", ctx_id); 4492 return NULL; 4493 } 4494 4495 ce = __get_context(guc, ctx_id); 4496 if (unlikely(!ce)) { 4497 drm_err(&guc_to_gt(guc)->i915->drm, 4498 "Context is NULL, ctx_id %u\n", ctx_id); 4499 return NULL; 4500 } 4501 4502 if (unlikely(intel_context_is_child(ce))) { 4503 drm_err(&guc_to_gt(guc)->i915->drm, 4504 "Context is child, ctx_id %u\n", ctx_id); 4505 return NULL; 4506 } 4507 4508 return ce; 4509 } 4510 4511 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4512 const u32 *msg, 4513 u32 len) 4514 { 4515 struct intel_context *ce; 4516 u32 ctx_id; 4517 4518 if (unlikely(len < 1)) { 4519 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4520 return -EPROTO; 4521 } 4522 ctx_id = msg[0]; 4523 4524 ce = g2h_context_lookup(guc, ctx_id); 4525 if (unlikely(!ce)) 4526 return -EPROTO; 4527 4528 trace_intel_context_deregister_done(ce); 4529 4530 #ifdef CONFIG_DRM_I915_SELFTEST 4531 if (unlikely(ce->drop_deregister)) { 4532 ce->drop_deregister = false; 4533 return 0; 4534 } 4535 #endif 4536 4537 if (context_wait_for_deregister_to_register(ce)) { 4538 struct intel_runtime_pm *runtime_pm = 4539 &ce->engine->gt->i915->runtime_pm; 4540 intel_wakeref_t wakeref; 4541 4542 /* 4543 * Previous owner of this guc_id has been deregistered, now safe 4544 * register this context. 4545 */ 4546 with_intel_runtime_pm(runtime_pm, wakeref) 4547 register_context(ce, true); 4548 guc_signal_context_fence(ce); 4549 intel_context_put(ce); 4550 } else if (context_destroyed(ce)) { 4551 /* Context has been destroyed */ 4552 intel_gt_pm_put_async(guc_to_gt(guc)); 4553 release_guc_id(guc, ce); 4554 __guc_context_destroy(ce); 4555 } 4556 4557 decr_outstanding_submission_g2h(guc); 4558 4559 return 0; 4560 } 4561 4562 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4563 const u32 *msg, 4564 u32 len) 4565 { 4566 struct intel_context *ce; 4567 unsigned long flags; 4568 u32 ctx_id; 4569 4570 if (unlikely(len < 2)) { 4571 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4572 return -EPROTO; 4573 } 4574 ctx_id = msg[0]; 4575 4576 ce = g2h_context_lookup(guc, ctx_id); 4577 if (unlikely(!ce)) 4578 return -EPROTO; 4579 4580 if (unlikely(context_destroyed(ce) || 4581 (!context_pending_enable(ce) && 4582 !context_pending_disable(ce)))) { 4583 drm_err(&guc_to_gt(guc)->i915->drm, 4584 "Bad context sched_state 0x%x, ctx_id %u\n", 4585 ce->guc_state.sched_state, ctx_id); 4586 return -EPROTO; 4587 } 4588 4589 trace_intel_context_sched_done(ce); 4590 4591 if (context_pending_enable(ce)) { 4592 #ifdef CONFIG_DRM_I915_SELFTEST 4593 if (unlikely(ce->drop_schedule_enable)) { 4594 ce->drop_schedule_enable = false; 4595 return 0; 4596 } 4597 #endif 4598 4599 spin_lock_irqsave(&ce->guc_state.lock, flags); 4600 clr_context_pending_enable(ce); 4601 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4602 } else if (context_pending_disable(ce)) { 4603 bool banned; 4604 4605 #ifdef CONFIG_DRM_I915_SELFTEST 4606 if (unlikely(ce->drop_schedule_disable)) { 4607 ce->drop_schedule_disable = false; 4608 return 0; 4609 } 4610 #endif 4611 4612 /* 4613 * Unpin must be done before __guc_signal_context_fence, 4614 * otherwise a race exists between the requests getting 4615 * submitted + retired before this unpin completes resulting in 4616 * the pin_count going to zero and the context still being 4617 * enabled. 4618 */ 4619 intel_context_sched_disable_unpin(ce); 4620 4621 spin_lock_irqsave(&ce->guc_state.lock, flags); 4622 banned = context_banned(ce); 4623 clr_context_banned(ce); 4624 clr_context_pending_disable(ce); 4625 __guc_signal_context_fence(ce); 4626 guc_blocked_fence_complete(ce); 4627 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4628 4629 if (banned) { 4630 guc_cancel_context_requests(ce); 4631 intel_engine_signal_breadcrumbs(ce->engine); 4632 } 4633 } 4634 4635 decr_outstanding_submission_g2h(guc); 4636 intel_context_put(ce); 4637 4638 return 0; 4639 } 4640 4641 static void capture_error_state(struct intel_guc *guc, 4642 struct intel_context *ce) 4643 { 4644 struct intel_gt *gt = guc_to_gt(guc); 4645 struct drm_i915_private *i915 = gt->i915; 4646 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4647 intel_wakeref_t wakeref; 4648 4649 intel_engine_set_hung_context(engine, ce); 4650 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4651 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4652 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4653 } 4654 4655 static void guc_context_replay(struct intel_context *ce) 4656 { 4657 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4658 4659 __guc_reset_context(ce, ce->engine->mask); 4660 tasklet_hi_schedule(&sched_engine->tasklet); 4661 } 4662 4663 static void guc_handle_context_reset(struct intel_guc *guc, 4664 struct intel_context *ce) 4665 { 4666 trace_intel_context_reset(ce); 4667 4668 if (likely(intel_context_is_schedulable(ce))) { 4669 capture_error_state(guc, ce); 4670 guc_context_replay(ce); 4671 } else { 4672 drm_info(&guc_to_gt(guc)->i915->drm, 4673 "Ignoring context reset notification of exiting context 0x%04X on %s", 4674 ce->guc_id.id, ce->engine->name); 4675 } 4676 } 4677 4678 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4679 const u32 *msg, u32 len) 4680 { 4681 struct intel_context *ce; 4682 unsigned long flags; 4683 int ctx_id; 4684 4685 if (unlikely(len != 1)) { 4686 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4687 return -EPROTO; 4688 } 4689 4690 ctx_id = msg[0]; 4691 4692 /* 4693 * The context lookup uses the xarray but lookups only require an RCU lock 4694 * not the full spinlock. So take the lock explicitly and keep it until the 4695 * context has been reference count locked to ensure it can't be destroyed 4696 * asynchronously until the reset is done. 4697 */ 4698 xa_lock_irqsave(&guc->context_lookup, flags); 4699 ce = g2h_context_lookup(guc, ctx_id); 4700 if (ce) 4701 intel_context_get(ce); 4702 xa_unlock_irqrestore(&guc->context_lookup, flags); 4703 4704 if (unlikely(!ce)) 4705 return -EPROTO; 4706 4707 guc_handle_context_reset(guc, ce); 4708 intel_context_put(ce); 4709 4710 return 0; 4711 } 4712 4713 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4714 const u32 *msg, u32 len) 4715 { 4716 u32 status; 4717 4718 if (unlikely(len != 1)) { 4719 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4720 return -EPROTO; 4721 } 4722 4723 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4724 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4725 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4726 4727 intel_guc_capture_process(guc); 4728 4729 return 0; 4730 } 4731 4732 struct intel_engine_cs * 4733 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4734 { 4735 struct intel_gt *gt = guc_to_gt(guc); 4736 u8 engine_class = guc_class_to_engine_class(guc_class); 4737 4738 /* Class index is checked in class converter */ 4739 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4740 4741 return gt->engine_class[engine_class][instance]; 4742 } 4743 4744 static void reset_fail_worker_func(struct work_struct *w) 4745 { 4746 struct intel_guc *guc = container_of(w, struct intel_guc, 4747 submission_state.reset_fail_worker); 4748 struct intel_gt *gt = guc_to_gt(guc); 4749 intel_engine_mask_t reset_fail_mask; 4750 unsigned long flags; 4751 4752 spin_lock_irqsave(&guc->submission_state.lock, flags); 4753 reset_fail_mask = guc->submission_state.reset_fail_mask; 4754 guc->submission_state.reset_fail_mask = 0; 4755 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4756 4757 if (likely(reset_fail_mask)) 4758 intel_gt_handle_error(gt, reset_fail_mask, 4759 I915_ERROR_CAPTURE, 4760 "GuC failed to reset engine mask=0x%x\n", 4761 reset_fail_mask); 4762 } 4763 4764 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4765 const u32 *msg, u32 len) 4766 { 4767 struct intel_engine_cs *engine; 4768 struct intel_gt *gt = guc_to_gt(guc); 4769 u8 guc_class, instance; 4770 u32 reason; 4771 unsigned long flags; 4772 4773 if (unlikely(len != 3)) { 4774 drm_err(>->i915->drm, "Invalid length %u", len); 4775 return -EPROTO; 4776 } 4777 4778 guc_class = msg[0]; 4779 instance = msg[1]; 4780 reason = msg[2]; 4781 4782 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4783 if (unlikely(!engine)) { 4784 drm_err(>->i915->drm, 4785 "Invalid engine %d:%d", guc_class, instance); 4786 return -EPROTO; 4787 } 4788 4789 /* 4790 * This is an unexpected failure of a hardware feature. So, log a real 4791 * error message not just the informational that comes with the reset. 4792 */ 4793 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4794 guc_class, instance, engine->name, reason); 4795 4796 spin_lock_irqsave(&guc->submission_state.lock, flags); 4797 guc->submission_state.reset_fail_mask |= engine->mask; 4798 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4799 4800 /* 4801 * A GT reset flushes this worker queue (G2H handler) so we must use 4802 * another worker to trigger a GT reset. 4803 */ 4804 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4805 4806 return 0; 4807 } 4808 4809 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4810 { 4811 struct intel_guc *guc = &engine->gt->uc.guc; 4812 struct intel_context *ce; 4813 struct i915_request *rq; 4814 unsigned long index; 4815 unsigned long flags; 4816 4817 /* Reset called during driver load? GuC not yet initialised! */ 4818 if (unlikely(!guc_submission_initialized(guc))) 4819 return; 4820 4821 xa_lock_irqsave(&guc->context_lookup, flags); 4822 xa_for_each(&guc->context_lookup, index, ce) { 4823 if (!kref_get_unless_zero(&ce->ref)) 4824 continue; 4825 4826 xa_unlock(&guc->context_lookup); 4827 4828 if (!intel_context_is_pinned(ce)) 4829 goto next; 4830 4831 if (intel_engine_is_virtual(ce->engine)) { 4832 if (!(ce->engine->mask & engine->mask)) 4833 goto next; 4834 } else { 4835 if (ce->engine != engine) 4836 goto next; 4837 } 4838 4839 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4840 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4841 continue; 4842 4843 intel_engine_set_hung_context(engine, ce); 4844 4845 /* Can only cope with one hang at a time... */ 4846 intel_context_put(ce); 4847 xa_lock(&guc->context_lookup); 4848 goto done; 4849 } 4850 next: 4851 intel_context_put(ce); 4852 xa_lock(&guc->context_lookup); 4853 } 4854 done: 4855 xa_unlock_irqrestore(&guc->context_lookup, flags); 4856 } 4857 4858 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4859 struct i915_request *hung_rq, 4860 struct drm_printer *m) 4861 { 4862 struct intel_guc *guc = &engine->gt->uc.guc; 4863 struct intel_context *ce; 4864 unsigned long index; 4865 unsigned long flags; 4866 4867 /* Reset called during driver load? GuC not yet initialised! */ 4868 if (unlikely(!guc_submission_initialized(guc))) 4869 return; 4870 4871 xa_lock_irqsave(&guc->context_lookup, flags); 4872 xa_for_each(&guc->context_lookup, index, ce) { 4873 if (!kref_get_unless_zero(&ce->ref)) 4874 continue; 4875 4876 xa_unlock(&guc->context_lookup); 4877 4878 if (!intel_context_is_pinned(ce)) 4879 goto next; 4880 4881 if (intel_engine_is_virtual(ce->engine)) { 4882 if (!(ce->engine->mask & engine->mask)) 4883 goto next; 4884 } else { 4885 if (ce->engine != engine) 4886 goto next; 4887 } 4888 4889 spin_lock(&ce->guc_state.lock); 4890 intel_engine_dump_active_requests(&ce->guc_state.requests, 4891 hung_rq, m); 4892 spin_unlock(&ce->guc_state.lock); 4893 4894 next: 4895 intel_context_put(ce); 4896 xa_lock(&guc->context_lookup); 4897 } 4898 xa_unlock_irqrestore(&guc->context_lookup, flags); 4899 } 4900 4901 void intel_guc_submission_print_info(struct intel_guc *guc, 4902 struct drm_printer *p) 4903 { 4904 struct i915_sched_engine *sched_engine = guc->sched_engine; 4905 struct rb_node *rb; 4906 unsigned long flags; 4907 4908 if (!sched_engine) 4909 return; 4910 4911 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 4912 guc->submission_version.major, guc->submission_version.minor, 4913 guc->submission_version.patch); 4914 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4915 atomic_read(&guc->outstanding_submission_g2h)); 4916 drm_printf(p, "GuC tasklet count: %u\n", 4917 atomic_read(&sched_engine->tasklet.count)); 4918 4919 spin_lock_irqsave(&sched_engine->lock, flags); 4920 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4921 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4922 struct i915_priolist *pl = to_priolist(rb); 4923 struct i915_request *rq; 4924 4925 priolist_for_each_request(rq, pl) 4926 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4927 rq->context->guc_id.id, 4928 rq->fence.seqno); 4929 } 4930 spin_unlock_irqrestore(&sched_engine->lock, flags); 4931 drm_printf(p, "\n"); 4932 } 4933 4934 static inline void guc_log_context_priority(struct drm_printer *p, 4935 struct intel_context *ce) 4936 { 4937 int i; 4938 4939 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4940 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4941 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4942 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4943 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4944 i, ce->guc_state.prio_count[i]); 4945 } 4946 drm_printf(p, "\n"); 4947 } 4948 4949 static inline void guc_log_context(struct drm_printer *p, 4950 struct intel_context *ce) 4951 { 4952 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4953 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4954 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4955 ce->ring->head, 4956 ce->lrc_reg_state[CTX_RING_HEAD]); 4957 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4958 ce->ring->tail, 4959 ce->lrc_reg_state[CTX_RING_TAIL]); 4960 drm_printf(p, "\t\tContext Pin Count: %u\n", 4961 atomic_read(&ce->pin_count)); 4962 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4963 atomic_read(&ce->guc_id.ref)); 4964 drm_printf(p, "\t\tSchedule State: 0x%x\n", 4965 ce->guc_state.sched_state); 4966 } 4967 4968 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4969 struct drm_printer *p) 4970 { 4971 struct intel_context *ce; 4972 unsigned long index; 4973 unsigned long flags; 4974 4975 xa_lock_irqsave(&guc->context_lookup, flags); 4976 xa_for_each(&guc->context_lookup, index, ce) { 4977 GEM_BUG_ON(intel_context_is_child(ce)); 4978 4979 guc_log_context(p, ce); 4980 guc_log_context_priority(p, ce); 4981 4982 if (intel_context_is_parent(ce)) { 4983 struct intel_context *child; 4984 4985 drm_printf(p, "\t\tNumber children: %u\n", 4986 ce->parallel.number_children); 4987 4988 if (ce->parallel.guc.wq_status) { 4989 drm_printf(p, "\t\tWQI Head: %u\n", 4990 READ_ONCE(*ce->parallel.guc.wq_head)); 4991 drm_printf(p, "\t\tWQI Tail: %u\n", 4992 READ_ONCE(*ce->parallel.guc.wq_tail)); 4993 drm_printf(p, "\t\tWQI Status: %u\n", 4994 READ_ONCE(*ce->parallel.guc.wq_status)); 4995 } 4996 4997 if (ce->engine->emit_bb_start == 4998 emit_bb_start_parent_no_preempt_mid_batch) { 4999 u8 i; 5000 5001 drm_printf(p, "\t\tChildren Go: %u\n", 5002 get_children_go_value(ce)); 5003 for (i = 0; i < ce->parallel.number_children; ++i) 5004 drm_printf(p, "\t\tChildren Join: %u\n", 5005 get_children_join_value(ce, i)); 5006 } 5007 5008 for_each_child(ce, child) 5009 guc_log_context(p, child); 5010 } 5011 } 5012 xa_unlock_irqrestore(&guc->context_lookup, flags); 5013 } 5014 5015 static inline u32 get_children_go_addr(struct intel_context *ce) 5016 { 5017 GEM_BUG_ON(!intel_context_is_parent(ce)); 5018 5019 return i915_ggtt_offset(ce->state) + 5020 __get_parent_scratch_offset(ce) + 5021 offsetof(struct parent_scratch, go.semaphore); 5022 } 5023 5024 static inline u32 get_children_join_addr(struct intel_context *ce, 5025 u8 child_index) 5026 { 5027 GEM_BUG_ON(!intel_context_is_parent(ce)); 5028 5029 return i915_ggtt_offset(ce->state) + 5030 __get_parent_scratch_offset(ce) + 5031 offsetof(struct parent_scratch, join[child_index].semaphore); 5032 } 5033 5034 #define PARENT_GO_BB 1 5035 #define PARENT_GO_FINI_BREADCRUMB 0 5036 #define CHILD_GO_BB 1 5037 #define CHILD_GO_FINI_BREADCRUMB 0 5038 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5039 u64 offset, u32 len, 5040 const unsigned int flags) 5041 { 5042 struct intel_context *ce = rq->context; 5043 u32 *cs; 5044 u8 i; 5045 5046 GEM_BUG_ON(!intel_context_is_parent(ce)); 5047 5048 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5049 if (IS_ERR(cs)) 5050 return PTR_ERR(cs); 5051 5052 /* Wait on children */ 5053 for (i = 0; i < ce->parallel.number_children; ++i) { 5054 *cs++ = (MI_SEMAPHORE_WAIT | 5055 MI_SEMAPHORE_GLOBAL_GTT | 5056 MI_SEMAPHORE_POLL | 5057 MI_SEMAPHORE_SAD_EQ_SDD); 5058 *cs++ = PARENT_GO_BB; 5059 *cs++ = get_children_join_addr(ce, i); 5060 *cs++ = 0; 5061 } 5062 5063 /* Turn off preemption */ 5064 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5065 *cs++ = MI_NOOP; 5066 5067 /* Tell children go */ 5068 cs = gen8_emit_ggtt_write(cs, 5069 CHILD_GO_BB, 5070 get_children_go_addr(ce), 5071 0); 5072 5073 /* Jump to batch */ 5074 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5075 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5076 *cs++ = lower_32_bits(offset); 5077 *cs++ = upper_32_bits(offset); 5078 *cs++ = MI_NOOP; 5079 5080 intel_ring_advance(rq, cs); 5081 5082 return 0; 5083 } 5084 5085 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5086 u64 offset, u32 len, 5087 const unsigned int flags) 5088 { 5089 struct intel_context *ce = rq->context; 5090 struct intel_context *parent = intel_context_to_parent(ce); 5091 u32 *cs; 5092 5093 GEM_BUG_ON(!intel_context_is_child(ce)); 5094 5095 cs = intel_ring_begin(rq, 12); 5096 if (IS_ERR(cs)) 5097 return PTR_ERR(cs); 5098 5099 /* Signal parent */ 5100 cs = gen8_emit_ggtt_write(cs, 5101 PARENT_GO_BB, 5102 get_children_join_addr(parent, 5103 ce->parallel.child_index), 5104 0); 5105 5106 /* Wait on parent for go */ 5107 *cs++ = (MI_SEMAPHORE_WAIT | 5108 MI_SEMAPHORE_GLOBAL_GTT | 5109 MI_SEMAPHORE_POLL | 5110 MI_SEMAPHORE_SAD_EQ_SDD); 5111 *cs++ = CHILD_GO_BB; 5112 *cs++ = get_children_go_addr(parent); 5113 *cs++ = 0; 5114 5115 /* Turn off preemption */ 5116 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5117 5118 /* Jump to batch */ 5119 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5120 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5121 *cs++ = lower_32_bits(offset); 5122 *cs++ = upper_32_bits(offset); 5123 5124 intel_ring_advance(rq, cs); 5125 5126 return 0; 5127 } 5128 5129 static u32 * 5130 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5131 u32 *cs) 5132 { 5133 struct intel_context *ce = rq->context; 5134 u8 i; 5135 5136 GEM_BUG_ON(!intel_context_is_parent(ce)); 5137 5138 /* Wait on children */ 5139 for (i = 0; i < ce->parallel.number_children; ++i) { 5140 *cs++ = (MI_SEMAPHORE_WAIT | 5141 MI_SEMAPHORE_GLOBAL_GTT | 5142 MI_SEMAPHORE_POLL | 5143 MI_SEMAPHORE_SAD_EQ_SDD); 5144 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5145 *cs++ = get_children_join_addr(ce, i); 5146 *cs++ = 0; 5147 } 5148 5149 /* Turn on preemption */ 5150 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5151 *cs++ = MI_NOOP; 5152 5153 /* Tell children go */ 5154 cs = gen8_emit_ggtt_write(cs, 5155 CHILD_GO_FINI_BREADCRUMB, 5156 get_children_go_addr(ce), 5157 0); 5158 5159 return cs; 5160 } 5161 5162 /* 5163 * If this true, a submission of multi-lrc requests had an error and the 5164 * requests need to be skipped. The front end (execuf IOCTL) should've called 5165 * i915_request_skip which squashes the BB but we still need to emit the fini 5166 * breadrcrumbs seqno write. At this point we don't know how many of the 5167 * requests in the multi-lrc submission were generated so we can't do the 5168 * handshake between the parent and children (e.g. if 4 requests should be 5169 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5170 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5171 * has occurred on any of the requests in submission / relationship. 5172 */ 5173 static inline bool skip_handshake(struct i915_request *rq) 5174 { 5175 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5176 } 5177 5178 #define NON_SKIP_LEN 6 5179 static u32 * 5180 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5181 u32 *cs) 5182 { 5183 struct intel_context *ce = rq->context; 5184 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5185 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5186 5187 GEM_BUG_ON(!intel_context_is_parent(ce)); 5188 5189 if (unlikely(skip_handshake(rq))) { 5190 /* 5191 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5192 * the NON_SKIP_LEN comes from the length of the emits below. 5193 */ 5194 memset(cs, 0, sizeof(u32) * 5195 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5196 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5197 } else { 5198 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5199 } 5200 5201 /* Emit fini breadcrumb */ 5202 before_fini_breadcrumb_user_interrupt_cs = cs; 5203 cs = gen8_emit_ggtt_write(cs, 5204 rq->fence.seqno, 5205 i915_request_active_timeline(rq)->hwsp_offset, 5206 0); 5207 5208 /* User interrupt */ 5209 *cs++ = MI_USER_INTERRUPT; 5210 *cs++ = MI_NOOP; 5211 5212 /* Ensure our math for skip + emit is correct */ 5213 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5214 cs); 5215 GEM_BUG_ON(start_fini_breadcrumb_cs + 5216 ce->engine->emit_fini_breadcrumb_dw != cs); 5217 5218 rq->tail = intel_ring_offset(rq, cs); 5219 5220 return cs; 5221 } 5222 5223 static u32 * 5224 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5225 u32 *cs) 5226 { 5227 struct intel_context *ce = rq->context; 5228 struct intel_context *parent = intel_context_to_parent(ce); 5229 5230 GEM_BUG_ON(!intel_context_is_child(ce)); 5231 5232 /* Turn on preemption */ 5233 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5234 *cs++ = MI_NOOP; 5235 5236 /* Signal parent */ 5237 cs = gen8_emit_ggtt_write(cs, 5238 PARENT_GO_FINI_BREADCRUMB, 5239 get_children_join_addr(parent, 5240 ce->parallel.child_index), 5241 0); 5242 5243 /* Wait parent on for go */ 5244 *cs++ = (MI_SEMAPHORE_WAIT | 5245 MI_SEMAPHORE_GLOBAL_GTT | 5246 MI_SEMAPHORE_POLL | 5247 MI_SEMAPHORE_SAD_EQ_SDD); 5248 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5249 *cs++ = get_children_go_addr(parent); 5250 *cs++ = 0; 5251 5252 return cs; 5253 } 5254 5255 static u32 * 5256 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5257 u32 *cs) 5258 { 5259 struct intel_context *ce = rq->context; 5260 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5261 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5262 5263 GEM_BUG_ON(!intel_context_is_child(ce)); 5264 5265 if (unlikely(skip_handshake(rq))) { 5266 /* 5267 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5268 * the NON_SKIP_LEN comes from the length of the emits below. 5269 */ 5270 memset(cs, 0, sizeof(u32) * 5271 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5272 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5273 } else { 5274 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5275 } 5276 5277 /* Emit fini breadcrumb */ 5278 before_fini_breadcrumb_user_interrupt_cs = cs; 5279 cs = gen8_emit_ggtt_write(cs, 5280 rq->fence.seqno, 5281 i915_request_active_timeline(rq)->hwsp_offset, 5282 0); 5283 5284 /* User interrupt */ 5285 *cs++ = MI_USER_INTERRUPT; 5286 *cs++ = MI_NOOP; 5287 5288 /* Ensure our math for skip + emit is correct */ 5289 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5290 cs); 5291 GEM_BUG_ON(start_fini_breadcrumb_cs + 5292 ce->engine->emit_fini_breadcrumb_dw != cs); 5293 5294 rq->tail = intel_ring_offset(rq, cs); 5295 5296 return cs; 5297 } 5298 5299 #undef NON_SKIP_LEN 5300 5301 static struct intel_context * 5302 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5303 unsigned long flags) 5304 { 5305 struct guc_virtual_engine *ve; 5306 struct intel_guc *guc; 5307 unsigned int n; 5308 int err; 5309 5310 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5311 if (!ve) 5312 return ERR_PTR(-ENOMEM); 5313 5314 guc = &siblings[0]->gt->uc.guc; 5315 5316 ve->base.i915 = siblings[0]->i915; 5317 ve->base.gt = siblings[0]->gt; 5318 ve->base.uncore = siblings[0]->uncore; 5319 ve->base.id = -1; 5320 5321 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5322 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5323 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5324 ve->base.saturated = ALL_ENGINES; 5325 5326 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5327 5328 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5329 5330 ve->base.cops = &virtual_guc_context_ops; 5331 ve->base.request_alloc = guc_request_alloc; 5332 ve->base.bump_serial = virtual_guc_bump_serial; 5333 5334 ve->base.submit_request = guc_submit_request; 5335 5336 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5337 5338 intel_context_init(&ve->context, &ve->base); 5339 5340 for (n = 0; n < count; n++) { 5341 struct intel_engine_cs *sibling = siblings[n]; 5342 5343 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5344 if (sibling->mask & ve->base.mask) { 5345 DRM_DEBUG("duplicate %s entry in load balancer\n", 5346 sibling->name); 5347 err = -EINVAL; 5348 goto err_put; 5349 } 5350 5351 ve->base.mask |= sibling->mask; 5352 ve->base.logical_mask |= sibling->logical_mask; 5353 5354 if (n != 0 && ve->base.class != sibling->class) { 5355 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 5356 sibling->class, ve->base.class); 5357 err = -EINVAL; 5358 goto err_put; 5359 } else if (n == 0) { 5360 ve->base.class = sibling->class; 5361 ve->base.uabi_class = sibling->uabi_class; 5362 snprintf(ve->base.name, sizeof(ve->base.name), 5363 "v%dx%d", ve->base.class, count); 5364 ve->base.context_size = sibling->context_size; 5365 5366 ve->base.add_active_request = 5367 sibling->add_active_request; 5368 ve->base.remove_active_request = 5369 sibling->remove_active_request; 5370 ve->base.emit_bb_start = sibling->emit_bb_start; 5371 ve->base.emit_flush = sibling->emit_flush; 5372 ve->base.emit_init_breadcrumb = 5373 sibling->emit_init_breadcrumb; 5374 ve->base.emit_fini_breadcrumb = 5375 sibling->emit_fini_breadcrumb; 5376 ve->base.emit_fini_breadcrumb_dw = 5377 sibling->emit_fini_breadcrumb_dw; 5378 ve->base.breadcrumbs = 5379 intel_breadcrumbs_get(sibling->breadcrumbs); 5380 5381 ve->base.flags |= sibling->flags; 5382 5383 ve->base.props.timeslice_duration_ms = 5384 sibling->props.timeslice_duration_ms; 5385 ve->base.props.preempt_timeout_ms = 5386 sibling->props.preempt_timeout_ms; 5387 } 5388 } 5389 5390 return &ve->context; 5391 5392 err_put: 5393 intel_context_put(&ve->context); 5394 return ERR_PTR(err); 5395 } 5396 5397 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5398 { 5399 struct intel_engine_cs *engine; 5400 intel_engine_mask_t tmp, mask = ve->mask; 5401 5402 for_each_engine_masked(engine, ve->gt, mask, tmp) 5403 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5404 return true; 5405 5406 return false; 5407 } 5408 5409 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5410 #include "selftest_guc.c" 5411 #include "selftest_guc_multi_lrc.c" 5412 #include "selftest_guc_hangcheck.c" 5413 #endif 5414