1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_submission.h" 31 32 #include "i915_drv.h" 33 #include "i915_trace.h" 34 35 /** 36 * DOC: GuC-based command submission 37 * 38 * The Scratch registers: 39 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 40 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 41 * triggers an interrupt on the GuC via another register write (0xC4C8). 42 * Firmware writes a success/fail code back to the action register after 43 * processes the request. The kernel driver polls waiting for this update and 44 * then proceeds. 45 * 46 * Command Transport buffers (CTBs): 47 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 48 * - G2H) are a message interface between the i915 and GuC. 49 * 50 * Context registration: 51 * Before a context can be submitted it must be registered with the GuC via a 52 * H2G. A unique guc_id is associated with each context. The context is either 53 * registered at request creation time (normal operation) or at submission time 54 * (abnormal operation, e.g. after a reset). 55 * 56 * Context submission: 57 * The i915 updates the LRC tail value in memory. The i915 must enable the 58 * scheduling of the context within the GuC for the GuC to actually consider it. 59 * Therefore, the first time a disabled context is submitted we use a schedule 60 * enable H2G, while follow up submissions are done via the context submit H2G, 61 * which informs the GuC that a previously enabled context has new work 62 * available. 63 * 64 * Context unpin: 65 * To unpin a context a H2G is used to disable scheduling. When the 66 * corresponding G2H returns indicating the scheduling disable operation has 67 * completed it is safe to unpin the context. While a disable is in flight it 68 * isn't safe to resubmit the context so a fence is used to stall all future 69 * requests of that context until the G2H is returned. Because this interaction 70 * with the GuC takes a non-zero amount of time we delay the disabling of 71 * scheduling after the pin count goes to zero by a configurable period of time 72 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 73 * time to resubmit something on the context before doing this costly operation. 74 * This delay is only done if the context isn't closed and the guc_id usage is 75 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 76 * 77 * Context deregistration: 78 * Before a context can be destroyed or if we steal its guc_id we must 79 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 80 * safe to submit anything to this guc_id until the deregister completes so a 81 * fence is used to stall all requests associated with this guc_id until the 82 * corresponding G2H returns indicating the guc_id has been deregistered. 83 * 84 * submission_state.guc_ids: 85 * Unique number associated with private GuC context data passed in during 86 * context registration / submission / deregistration. 64k available. Simple ida 87 * is used for allocation. 88 * 89 * Stealing guc_ids: 90 * If no guc_ids are available they can be stolen from another context at 91 * request creation time if that context is unpinned. If a guc_id can't be found 92 * we punt this problem to the user as we believe this is near impossible to hit 93 * during normal use cases. 94 * 95 * Locking: 96 * In the GuC submission code we have 3 basic spin locks which protect 97 * everything. Details about each below. 98 * 99 * sched_engine->lock 100 * This is the submission lock for all contexts that share an i915 schedule 101 * engine (sched_engine), thus only one of the contexts which share a 102 * sched_engine can be submitting at a time. Currently only one sched_engine is 103 * used for all of GuC submission but that could change in the future. 104 * 105 * guc->submission_state.lock 106 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 107 * list. 108 * 109 * ce->guc_state.lock 110 * Protects everything under ce->guc_state. Ensures that a context is in the 111 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 112 * on a disabled context (bad idea), we don't issue a schedule enable when a 113 * schedule disable is in flight, etc... Also protects list of inflight requests 114 * on the context and the priority management state. Lock is individual to each 115 * context. 116 * 117 * Lock ordering rules: 118 * sched_engine->lock -> ce->guc_state.lock 119 * guc->submission_state.lock -> ce->guc_state.lock 120 * 121 * Reset races: 122 * When a full GT reset is triggered it is assumed that some G2H responses to 123 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 124 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 125 * contexts, release guc_ids, etc...). When this occurs we can scrub the 126 * context state and cleanup appropriately, however this is quite racey. 127 * To avoid races, the reset code must disable submission before scrubbing for 128 * the missing G2H, while the submission code must check for submission being 129 * disabled and skip sending H2Gs and updating context states when it is. Both 130 * sides must also make sure to hold the relevant locks. 131 */ 132 133 /* GuC Virtual Engine */ 134 struct guc_virtual_engine { 135 struct intel_engine_cs base; 136 struct intel_context context; 137 }; 138 139 static struct intel_context * 140 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 141 unsigned long flags); 142 143 static struct intel_context * 144 guc_create_parallel(struct intel_engine_cs **engines, 145 unsigned int num_siblings, 146 unsigned int width); 147 148 #define GUC_REQUEST_SIZE 64 /* bytes */ 149 150 /* 151 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 152 * per the GuC submission interface. A different allocation algorithm is used 153 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 154 * partition the guc_id space. We believe the number of multi-lrc contexts in 155 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 156 * multi-lrc. 157 */ 158 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 159 ((guc)->submission_state.num_guc_ids / 16) 160 161 /* 162 * Below is a set of functions which control the GuC scheduling state which 163 * require a lock. 164 */ 165 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 166 #define SCHED_STATE_DESTROYED BIT(1) 167 #define SCHED_STATE_PENDING_DISABLE BIT(2) 168 #define SCHED_STATE_BANNED BIT(3) 169 #define SCHED_STATE_ENABLED BIT(4) 170 #define SCHED_STATE_PENDING_ENABLE BIT(5) 171 #define SCHED_STATE_REGISTERED BIT(6) 172 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 173 #define SCHED_STATE_CLOSED BIT(8) 174 #define SCHED_STATE_BLOCKED_SHIFT 9 175 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 176 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 177 178 static inline void init_sched_state(struct intel_context *ce) 179 { 180 lockdep_assert_held(&ce->guc_state.lock); 181 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 182 } 183 184 /* 185 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 186 * A context close can race with the submission path, so SCHED_STATE_CLOSED 187 * can be set immediately before we try to register. 188 */ 189 #define SCHED_STATE_VALID_INIT \ 190 (SCHED_STATE_BLOCKED_MASK | \ 191 SCHED_STATE_CLOSED | \ 192 SCHED_STATE_REGISTERED) 193 194 __maybe_unused 195 static bool sched_state_is_init(struct intel_context *ce) 196 { 197 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 198 } 199 200 static inline bool 201 context_wait_for_deregister_to_register(struct intel_context *ce) 202 { 203 return ce->guc_state.sched_state & 204 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 205 } 206 207 static inline void 208 set_context_wait_for_deregister_to_register(struct intel_context *ce) 209 { 210 lockdep_assert_held(&ce->guc_state.lock); 211 ce->guc_state.sched_state |= 212 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 213 } 214 215 static inline void 216 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 217 { 218 lockdep_assert_held(&ce->guc_state.lock); 219 ce->guc_state.sched_state &= 220 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 221 } 222 223 static inline bool 224 context_destroyed(struct intel_context *ce) 225 { 226 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 227 } 228 229 static inline void 230 set_context_destroyed(struct intel_context *ce) 231 { 232 lockdep_assert_held(&ce->guc_state.lock); 233 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 234 } 235 236 static inline bool context_pending_disable(struct intel_context *ce) 237 { 238 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 239 } 240 241 static inline void set_context_pending_disable(struct intel_context *ce) 242 { 243 lockdep_assert_held(&ce->guc_state.lock); 244 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 245 } 246 247 static inline void clr_context_pending_disable(struct intel_context *ce) 248 { 249 lockdep_assert_held(&ce->guc_state.lock); 250 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 251 } 252 253 static inline bool context_banned(struct intel_context *ce) 254 { 255 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 256 } 257 258 static inline void set_context_banned(struct intel_context *ce) 259 { 260 lockdep_assert_held(&ce->guc_state.lock); 261 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 262 } 263 264 static inline void clr_context_banned(struct intel_context *ce) 265 { 266 lockdep_assert_held(&ce->guc_state.lock); 267 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 268 } 269 270 static inline bool context_enabled(struct intel_context *ce) 271 { 272 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 273 } 274 275 static inline void set_context_enabled(struct intel_context *ce) 276 { 277 lockdep_assert_held(&ce->guc_state.lock); 278 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 279 } 280 281 static inline void clr_context_enabled(struct intel_context *ce) 282 { 283 lockdep_assert_held(&ce->guc_state.lock); 284 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 285 } 286 287 static inline bool context_pending_enable(struct intel_context *ce) 288 { 289 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 290 } 291 292 static inline void set_context_pending_enable(struct intel_context *ce) 293 { 294 lockdep_assert_held(&ce->guc_state.lock); 295 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 296 } 297 298 static inline void clr_context_pending_enable(struct intel_context *ce) 299 { 300 lockdep_assert_held(&ce->guc_state.lock); 301 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 302 } 303 304 static inline bool context_registered(struct intel_context *ce) 305 { 306 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 307 } 308 309 static inline void set_context_registered(struct intel_context *ce) 310 { 311 lockdep_assert_held(&ce->guc_state.lock); 312 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 313 } 314 315 static inline void clr_context_registered(struct intel_context *ce) 316 { 317 lockdep_assert_held(&ce->guc_state.lock); 318 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 319 } 320 321 static inline bool context_policy_required(struct intel_context *ce) 322 { 323 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 324 } 325 326 static inline void set_context_policy_required(struct intel_context *ce) 327 { 328 lockdep_assert_held(&ce->guc_state.lock); 329 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 330 } 331 332 static inline void clr_context_policy_required(struct intel_context *ce) 333 { 334 lockdep_assert_held(&ce->guc_state.lock); 335 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 336 } 337 338 static inline bool context_close_done(struct intel_context *ce) 339 { 340 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 341 } 342 343 static inline void set_context_close_done(struct intel_context *ce) 344 { 345 lockdep_assert_held(&ce->guc_state.lock); 346 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 347 } 348 349 static inline u32 context_blocked(struct intel_context *ce) 350 { 351 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 352 SCHED_STATE_BLOCKED_SHIFT; 353 } 354 355 static inline void incr_context_blocked(struct intel_context *ce) 356 { 357 lockdep_assert_held(&ce->guc_state.lock); 358 359 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 360 361 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 362 } 363 364 static inline void decr_context_blocked(struct intel_context *ce) 365 { 366 lockdep_assert_held(&ce->guc_state.lock); 367 368 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 369 370 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 371 } 372 373 static struct intel_context * 374 request_to_scheduling_context(struct i915_request *rq) 375 { 376 return intel_context_to_parent(rq->context); 377 } 378 379 static inline bool context_guc_id_invalid(struct intel_context *ce) 380 { 381 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 382 } 383 384 static inline void set_context_guc_id_invalid(struct intel_context *ce) 385 { 386 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 387 } 388 389 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 390 { 391 return &ce->engine->gt->uc.guc; 392 } 393 394 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 395 { 396 return rb_entry(rb, struct i915_priolist, node); 397 } 398 399 /* 400 * When using multi-lrc submission a scratch memory area is reserved in the 401 * parent's context state for the process descriptor, work queue, and handshake 402 * between the parent + children contexts to insert safe preemption points 403 * between each of the BBs. Currently the scratch area is sized to a page. 404 * 405 * The layout of this scratch area is below: 406 * 0 guc_process_desc 407 * + sizeof(struct guc_process_desc) child go 408 * + CACHELINE_BYTES child join[0] 409 * ... 410 * + CACHELINE_BYTES child join[n - 1] 411 * ... unused 412 * PARENT_SCRATCH_SIZE / 2 work queue start 413 * ... work queue 414 * PARENT_SCRATCH_SIZE - 1 work queue end 415 */ 416 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 417 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 418 419 struct sync_semaphore { 420 u32 semaphore; 421 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 422 }; 423 424 struct parent_scratch { 425 union guc_descs { 426 struct guc_sched_wq_desc wq_desc; 427 struct guc_process_desc_v69 pdesc; 428 } descs; 429 430 struct sync_semaphore go; 431 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 432 433 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 434 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 435 436 u32 wq[WQ_SIZE / sizeof(u32)]; 437 }; 438 439 static u32 __get_parent_scratch_offset(struct intel_context *ce) 440 { 441 GEM_BUG_ON(!ce->parallel.guc.parent_page); 442 443 return ce->parallel.guc.parent_page * PAGE_SIZE; 444 } 445 446 static u32 __get_wq_offset(struct intel_context *ce) 447 { 448 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 449 450 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 451 } 452 453 static struct parent_scratch * 454 __get_parent_scratch(struct intel_context *ce) 455 { 456 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 457 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 458 459 /* 460 * Need to subtract LRC_STATE_OFFSET here as the 461 * parallel.guc.parent_page is the offset into ce->state while 462 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 463 */ 464 return (struct parent_scratch *) 465 (ce->lrc_reg_state + 466 ((__get_parent_scratch_offset(ce) - 467 LRC_STATE_OFFSET) / sizeof(u32))); 468 } 469 470 static struct guc_process_desc_v69 * 471 __get_process_desc_v69(struct intel_context *ce) 472 { 473 struct parent_scratch *ps = __get_parent_scratch(ce); 474 475 return &ps->descs.pdesc; 476 } 477 478 static struct guc_sched_wq_desc * 479 __get_wq_desc_v70(struct intel_context *ce) 480 { 481 struct parent_scratch *ps = __get_parent_scratch(ce); 482 483 return &ps->descs.wq_desc; 484 } 485 486 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 487 { 488 /* 489 * Check for space in work queue. Caching a value of head pointer in 490 * intel_context structure in order reduce the number accesses to shared 491 * GPU memory which may be across a PCIe bus. 492 */ 493 #define AVAILABLE_SPACE \ 494 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 495 if (wqi_size > AVAILABLE_SPACE) { 496 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 497 498 if (wqi_size > AVAILABLE_SPACE) 499 return NULL; 500 } 501 #undef AVAILABLE_SPACE 502 503 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 504 } 505 506 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 507 { 508 struct intel_context *ce = xa_load(&guc->context_lookup, id); 509 510 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 511 512 return ce; 513 } 514 515 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 516 { 517 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 518 519 if (!base) 520 return NULL; 521 522 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 523 524 return &base[index]; 525 } 526 527 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 528 { 529 u32 size; 530 int ret; 531 532 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 533 GUC_MAX_CONTEXT_ID); 534 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 535 (void **)&guc->lrc_desc_pool_vaddr_v69); 536 if (ret) 537 return ret; 538 539 return 0; 540 } 541 542 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 543 { 544 if (!guc->lrc_desc_pool_vaddr_v69) 545 return; 546 547 guc->lrc_desc_pool_vaddr_v69 = NULL; 548 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 549 } 550 551 static inline bool guc_submission_initialized(struct intel_guc *guc) 552 { 553 return guc->submission_initialized; 554 } 555 556 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 557 { 558 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 559 560 if (desc) 561 memset(desc, 0, sizeof(*desc)); 562 } 563 564 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 565 { 566 return __get_context(guc, id); 567 } 568 569 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 570 struct intel_context *ce) 571 { 572 unsigned long flags; 573 574 /* 575 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 576 * lower level functions directly. 577 */ 578 xa_lock_irqsave(&guc->context_lookup, flags); 579 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 580 xa_unlock_irqrestore(&guc->context_lookup, flags); 581 } 582 583 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 584 { 585 unsigned long flags; 586 587 if (unlikely(!guc_submission_initialized(guc))) 588 return; 589 590 _reset_lrc_desc_v69(guc, id); 591 592 /* 593 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 594 * the lower level functions directly. 595 */ 596 xa_lock_irqsave(&guc->context_lookup, flags); 597 __xa_erase(&guc->context_lookup, id); 598 xa_unlock_irqrestore(&guc->context_lookup, flags); 599 } 600 601 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 602 { 603 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 604 wake_up_all(&guc->ct.wq); 605 } 606 607 static int guc_submission_send_busy_loop(struct intel_guc *guc, 608 const u32 *action, 609 u32 len, 610 u32 g2h_len_dw, 611 bool loop) 612 { 613 /* 614 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 615 * so we don't handle the case where we don't get a reply because we 616 * aborted the send due to the channel being busy. 617 */ 618 GEM_BUG_ON(g2h_len_dw && !loop); 619 620 if (g2h_len_dw) 621 atomic_inc(&guc->outstanding_submission_g2h); 622 623 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 624 } 625 626 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 627 atomic_t *wait_var, 628 bool interruptible, 629 long timeout) 630 { 631 const int state = interruptible ? 632 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 633 DEFINE_WAIT(wait); 634 635 might_sleep(); 636 GEM_BUG_ON(timeout < 0); 637 638 if (!atomic_read(wait_var)) 639 return 0; 640 641 if (!timeout) 642 return -ETIME; 643 644 for (;;) { 645 prepare_to_wait(&guc->ct.wq, &wait, state); 646 647 if (!atomic_read(wait_var)) 648 break; 649 650 if (signal_pending_state(state, current)) { 651 timeout = -EINTR; 652 break; 653 } 654 655 if (!timeout) { 656 timeout = -ETIME; 657 break; 658 } 659 660 timeout = io_schedule_timeout(timeout); 661 } 662 finish_wait(&guc->ct.wq, &wait); 663 664 return (timeout < 0) ? timeout : 0; 665 } 666 667 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 668 { 669 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 670 return 0; 671 672 return intel_guc_wait_for_pending_msg(guc, 673 &guc->outstanding_submission_g2h, 674 true, timeout); 675 } 676 677 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 678 static int try_context_registration(struct intel_context *ce, bool loop); 679 680 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 681 { 682 int err = 0; 683 struct intel_context *ce = request_to_scheduling_context(rq); 684 u32 action[3]; 685 int len = 0; 686 u32 g2h_len_dw = 0; 687 bool enabled; 688 689 lockdep_assert_held(&rq->engine->sched_engine->lock); 690 691 /* 692 * Corner case where requests were sitting in the priority list or a 693 * request resubmitted after the context was banned. 694 */ 695 if (unlikely(!intel_context_is_schedulable(ce))) { 696 i915_request_put(i915_request_mark_eio(rq)); 697 intel_engine_signal_breadcrumbs(ce->engine); 698 return 0; 699 } 700 701 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 702 GEM_BUG_ON(context_guc_id_invalid(ce)); 703 704 if (context_policy_required(ce)) { 705 err = guc_context_policy_init_v70(ce, false); 706 if (err) 707 return err; 708 } 709 710 spin_lock(&ce->guc_state.lock); 711 712 /* 713 * The request / context will be run on the hardware when scheduling 714 * gets enabled in the unblock. For multi-lrc we still submit the 715 * context to move the LRC tails. 716 */ 717 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 718 goto out; 719 720 enabled = context_enabled(ce) || context_blocked(ce); 721 722 if (!enabled) { 723 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 724 action[len++] = ce->guc_id.id; 725 action[len++] = GUC_CONTEXT_ENABLE; 726 set_context_pending_enable(ce); 727 intel_context_get(ce); 728 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 729 } else { 730 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 731 action[len++] = ce->guc_id.id; 732 } 733 734 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 735 if (!enabled && !err) { 736 trace_intel_context_sched_enable(ce); 737 atomic_inc(&guc->outstanding_submission_g2h); 738 set_context_enabled(ce); 739 740 /* 741 * Without multi-lrc KMD does the submission step (moving the 742 * lrc tail) so enabling scheduling is sufficient to submit the 743 * context. This isn't the case in multi-lrc submission as the 744 * GuC needs to move the tails, hence the need for another H2G 745 * to submit a multi-lrc context after enabling scheduling. 746 */ 747 if (intel_context_is_parent(ce)) { 748 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 749 err = intel_guc_send_nb(guc, action, len - 1, 0); 750 } 751 } else if (!enabled) { 752 clr_context_pending_enable(ce); 753 intel_context_put(ce); 754 } 755 if (likely(!err)) 756 trace_i915_request_guc_submit(rq); 757 758 out: 759 spin_unlock(&ce->guc_state.lock); 760 return err; 761 } 762 763 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 764 { 765 int ret = __guc_add_request(guc, rq); 766 767 if (unlikely(ret == -EBUSY)) { 768 guc->stalled_request = rq; 769 guc->submission_stall_reason = STALL_ADD_REQUEST; 770 } 771 772 return ret; 773 } 774 775 static inline void guc_set_lrc_tail(struct i915_request *rq) 776 { 777 rq->context->lrc_reg_state[CTX_RING_TAIL] = 778 intel_ring_set_tail(rq->ring, rq->tail); 779 } 780 781 static inline int rq_prio(const struct i915_request *rq) 782 { 783 return rq->sched.attr.priority; 784 } 785 786 static bool is_multi_lrc_rq(struct i915_request *rq) 787 { 788 return intel_context_is_parallel(rq->context); 789 } 790 791 static bool can_merge_rq(struct i915_request *rq, 792 struct i915_request *last) 793 { 794 return request_to_scheduling_context(rq) == 795 request_to_scheduling_context(last); 796 } 797 798 static u32 wq_space_until_wrap(struct intel_context *ce) 799 { 800 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 801 } 802 803 static void write_wqi(struct intel_context *ce, u32 wqi_size) 804 { 805 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 806 807 /* 808 * Ensure WQI are visible before updating tail 809 */ 810 intel_guc_write_barrier(ce_to_guc(ce)); 811 812 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 813 (WQ_SIZE - 1); 814 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 815 } 816 817 static int guc_wq_noop_append(struct intel_context *ce) 818 { 819 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 820 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 821 822 if (!wqi) 823 return -EBUSY; 824 825 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 826 827 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 828 FIELD_PREP(WQ_LEN_MASK, len_dw); 829 ce->parallel.guc.wqi_tail = 0; 830 831 return 0; 832 } 833 834 static int __guc_wq_item_append(struct i915_request *rq) 835 { 836 struct intel_context *ce = request_to_scheduling_context(rq); 837 struct intel_context *child; 838 unsigned int wqi_size = (ce->parallel.number_children + 4) * 839 sizeof(u32); 840 u32 *wqi; 841 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 842 int ret; 843 844 /* Ensure context is in correct state updating work queue */ 845 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 846 GEM_BUG_ON(context_guc_id_invalid(ce)); 847 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 848 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 849 850 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 851 if (wqi_size > wq_space_until_wrap(ce)) { 852 ret = guc_wq_noop_append(ce); 853 if (ret) 854 return ret; 855 } 856 857 wqi = get_wq_pointer(ce, wqi_size); 858 if (!wqi) 859 return -EBUSY; 860 861 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 862 863 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 864 FIELD_PREP(WQ_LEN_MASK, len_dw); 865 *wqi++ = ce->lrc.lrca; 866 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 867 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 868 *wqi++ = 0; /* fence_id */ 869 for_each_child(ce, child) 870 *wqi++ = child->ring->tail / sizeof(u64); 871 872 write_wqi(ce, wqi_size); 873 874 return 0; 875 } 876 877 static int guc_wq_item_append(struct intel_guc *guc, 878 struct i915_request *rq) 879 { 880 struct intel_context *ce = request_to_scheduling_context(rq); 881 int ret; 882 883 if (unlikely(!intel_context_is_schedulable(ce))) 884 return 0; 885 886 ret = __guc_wq_item_append(rq); 887 if (unlikely(ret == -EBUSY)) { 888 guc->stalled_request = rq; 889 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 890 } 891 892 return ret; 893 } 894 895 static bool multi_lrc_submit(struct i915_request *rq) 896 { 897 struct intel_context *ce = request_to_scheduling_context(rq); 898 899 intel_ring_set_tail(rq->ring, rq->tail); 900 901 /* 902 * We expect the front end (execbuf IOCTL) to set this flag on the last 903 * request generated from a multi-BB submission. This indicates to the 904 * backend (GuC interface) that we should submit this context thus 905 * submitting all the requests generated in parallel. 906 */ 907 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 908 !intel_context_is_schedulable(ce); 909 } 910 911 static int guc_dequeue_one_context(struct intel_guc *guc) 912 { 913 struct i915_sched_engine * const sched_engine = guc->sched_engine; 914 struct i915_request *last = NULL; 915 bool submit = false; 916 struct rb_node *rb; 917 int ret; 918 919 lockdep_assert_held(&sched_engine->lock); 920 921 if (guc->stalled_request) { 922 submit = true; 923 last = guc->stalled_request; 924 925 switch (guc->submission_stall_reason) { 926 case STALL_REGISTER_CONTEXT: 927 goto register_context; 928 case STALL_MOVE_LRC_TAIL: 929 goto move_lrc_tail; 930 case STALL_ADD_REQUEST: 931 goto add_request; 932 default: 933 MISSING_CASE(guc->submission_stall_reason); 934 } 935 } 936 937 while ((rb = rb_first_cached(&sched_engine->queue))) { 938 struct i915_priolist *p = to_priolist(rb); 939 struct i915_request *rq, *rn; 940 941 priolist_for_each_request_consume(rq, rn, p) { 942 if (last && !can_merge_rq(rq, last)) 943 goto register_context; 944 945 list_del_init(&rq->sched.link); 946 947 __i915_request_submit(rq); 948 949 trace_i915_request_in(rq, 0); 950 last = rq; 951 952 if (is_multi_lrc_rq(rq)) { 953 /* 954 * We need to coalesce all multi-lrc requests in 955 * a relationship into a single H2G. We are 956 * guaranteed that all of these requests will be 957 * submitted sequentially. 958 */ 959 if (multi_lrc_submit(rq)) { 960 submit = true; 961 goto register_context; 962 } 963 } else { 964 submit = true; 965 } 966 } 967 968 rb_erase_cached(&p->node, &sched_engine->queue); 969 i915_priolist_free(p); 970 } 971 972 register_context: 973 if (submit) { 974 struct intel_context *ce = request_to_scheduling_context(last); 975 976 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 977 intel_context_is_schedulable(ce))) { 978 ret = try_context_registration(ce, false); 979 if (unlikely(ret == -EPIPE)) { 980 goto deadlk; 981 } else if (ret == -EBUSY) { 982 guc->stalled_request = last; 983 guc->submission_stall_reason = 984 STALL_REGISTER_CONTEXT; 985 goto schedule_tasklet; 986 } else if (ret != 0) { 987 GEM_WARN_ON(ret); /* Unexpected */ 988 goto deadlk; 989 } 990 } 991 992 move_lrc_tail: 993 if (is_multi_lrc_rq(last)) { 994 ret = guc_wq_item_append(guc, last); 995 if (ret == -EBUSY) { 996 goto schedule_tasklet; 997 } else if (ret != 0) { 998 GEM_WARN_ON(ret); /* Unexpected */ 999 goto deadlk; 1000 } 1001 } else { 1002 guc_set_lrc_tail(last); 1003 } 1004 1005 add_request: 1006 ret = guc_add_request(guc, last); 1007 if (unlikely(ret == -EPIPE)) { 1008 goto deadlk; 1009 } else if (ret == -EBUSY) { 1010 goto schedule_tasklet; 1011 } else if (ret != 0) { 1012 GEM_WARN_ON(ret); /* Unexpected */ 1013 goto deadlk; 1014 } 1015 } 1016 1017 guc->stalled_request = NULL; 1018 guc->submission_stall_reason = STALL_NONE; 1019 return submit; 1020 1021 deadlk: 1022 sched_engine->tasklet.callback = NULL; 1023 tasklet_disable_nosync(&sched_engine->tasklet); 1024 return false; 1025 1026 schedule_tasklet: 1027 tasklet_schedule(&sched_engine->tasklet); 1028 return false; 1029 } 1030 1031 static void guc_submission_tasklet(struct tasklet_struct *t) 1032 { 1033 struct i915_sched_engine *sched_engine = 1034 from_tasklet(sched_engine, t, tasklet); 1035 unsigned long flags; 1036 bool loop; 1037 1038 spin_lock_irqsave(&sched_engine->lock, flags); 1039 1040 do { 1041 loop = guc_dequeue_one_context(sched_engine->private_data); 1042 } while (loop); 1043 1044 i915_sched_engine_reset_on_empty(sched_engine); 1045 1046 spin_unlock_irqrestore(&sched_engine->lock, flags); 1047 } 1048 1049 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1050 { 1051 if (iir & GT_RENDER_USER_INTERRUPT) 1052 intel_engine_signal_breadcrumbs(engine); 1053 } 1054 1055 static void __guc_context_destroy(struct intel_context *ce); 1056 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1057 static void guc_signal_context_fence(struct intel_context *ce); 1058 static void guc_cancel_context_requests(struct intel_context *ce); 1059 static void guc_blocked_fence_complete(struct intel_context *ce); 1060 1061 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1062 { 1063 struct intel_context *ce; 1064 unsigned long index, flags; 1065 bool pending_disable, pending_enable, deregister, destroyed, banned; 1066 1067 xa_lock_irqsave(&guc->context_lookup, flags); 1068 xa_for_each(&guc->context_lookup, index, ce) { 1069 /* 1070 * Corner case where the ref count on the object is zero but and 1071 * deregister G2H was lost. In this case we don't touch the ref 1072 * count and finish the destroy of the context. 1073 */ 1074 bool do_put = kref_get_unless_zero(&ce->ref); 1075 1076 xa_unlock(&guc->context_lookup); 1077 1078 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1079 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1080 /* successful cancel so jump straight to close it */ 1081 intel_context_sched_disable_unpin(ce); 1082 } 1083 1084 spin_lock(&ce->guc_state.lock); 1085 1086 /* 1087 * Once we are at this point submission_disabled() is guaranteed 1088 * to be visible to all callers who set the below flags (see above 1089 * flush and flushes in reset_prepare). If submission_disabled() 1090 * is set, the caller shouldn't set these flags. 1091 */ 1092 1093 destroyed = context_destroyed(ce); 1094 pending_enable = context_pending_enable(ce); 1095 pending_disable = context_pending_disable(ce); 1096 deregister = context_wait_for_deregister_to_register(ce); 1097 banned = context_banned(ce); 1098 init_sched_state(ce); 1099 1100 spin_unlock(&ce->guc_state.lock); 1101 1102 if (pending_enable || destroyed || deregister) { 1103 decr_outstanding_submission_g2h(guc); 1104 if (deregister) 1105 guc_signal_context_fence(ce); 1106 if (destroyed) { 1107 intel_gt_pm_put_async(guc_to_gt(guc)); 1108 release_guc_id(guc, ce); 1109 __guc_context_destroy(ce); 1110 } 1111 if (pending_enable || deregister) 1112 intel_context_put(ce); 1113 } 1114 1115 /* Not mutualy exclusive with above if statement. */ 1116 if (pending_disable) { 1117 guc_signal_context_fence(ce); 1118 if (banned) { 1119 guc_cancel_context_requests(ce); 1120 intel_engine_signal_breadcrumbs(ce->engine); 1121 } 1122 intel_context_sched_disable_unpin(ce); 1123 decr_outstanding_submission_g2h(guc); 1124 1125 spin_lock(&ce->guc_state.lock); 1126 guc_blocked_fence_complete(ce); 1127 spin_unlock(&ce->guc_state.lock); 1128 1129 intel_context_put(ce); 1130 } 1131 1132 if (do_put) 1133 intel_context_put(ce); 1134 xa_lock(&guc->context_lookup); 1135 } 1136 xa_unlock_irqrestore(&guc->context_lookup, flags); 1137 } 1138 1139 /* 1140 * GuC stores busyness stats for each engine at context in/out boundaries. A 1141 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1142 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1143 * GuC. 1144 * 1145 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1146 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1147 * active. For an active engine total busyness = total + (now - start), where 1148 * 'now' is the time at which the busyness is sampled. For inactive engine, 1149 * total busyness = total. 1150 * 1151 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1152 * 1153 * The start and total values provided by GuC are 32 bits and wrap around in a 1154 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1155 * increasing ns values, there is a need for this implementation to account for 1156 * overflows and extend the GuC provided values to 64 bits before returning 1157 * busyness to the user. In order to do that, a worker runs periodically at 1158 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1159 * 27 seconds for a gt clock frequency of 19.2 MHz). 1160 */ 1161 1162 #define WRAP_TIME_CLKS U32_MAX 1163 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1164 1165 static void 1166 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1167 { 1168 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1169 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1170 1171 if (new_start == lower_32_bits(*prev_start)) 1172 return; 1173 1174 /* 1175 * When gt is unparked, we update the gt timestamp and start the ping 1176 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1177 * is unparked, all switched in contexts will have a start time that is 1178 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1179 * 1180 * If neither gt_stamp nor new_start has rolled over, then the 1181 * gt_stamp_hi does not need to be adjusted, however if one of them has 1182 * rolled over, we need to adjust gt_stamp_hi accordingly. 1183 * 1184 * The below conditions address the cases of new_start rollover and 1185 * gt_stamp_last rollover respectively. 1186 */ 1187 if (new_start < gt_stamp_last && 1188 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1189 gt_stamp_hi++; 1190 1191 if (new_start > gt_stamp_last && 1192 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1193 gt_stamp_hi--; 1194 1195 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1196 } 1197 1198 #define record_read(map_, field_) \ 1199 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1200 1201 /* 1202 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1203 * we run into a race where the value read is inconsistent. Sometimes the 1204 * inconsistency is in reading the upper MSB bytes of the last_in value when 1205 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1206 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1207 * determine validity of these values. Instead we read the values multiple times 1208 * until they are consistent. In test runs, 3 attempts results in consistent 1209 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1210 * any new occurences. 1211 */ 1212 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1213 u32 *last_in, u32 *id, u32 *total) 1214 { 1215 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1216 int i = 0; 1217 1218 do { 1219 *last_in = record_read(&rec_map, last_switch_in_stamp); 1220 *id = record_read(&rec_map, current_context_index); 1221 *total = record_read(&rec_map, total_runtime); 1222 1223 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1224 record_read(&rec_map, current_context_index) == *id && 1225 record_read(&rec_map, total_runtime) == *total) 1226 break; 1227 } while (++i < 6); 1228 } 1229 1230 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1231 { 1232 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1233 struct intel_guc *guc = &engine->gt->uc.guc; 1234 u32 last_switch, ctx_id, total; 1235 1236 lockdep_assert_held(&guc->timestamp.lock); 1237 1238 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1239 1240 stats->running = ctx_id != ~0U && last_switch; 1241 if (stats->running) 1242 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1243 1244 /* 1245 * Instead of adjusting the total for overflow, just add the 1246 * difference from previous sample stats->total_gt_clks 1247 */ 1248 if (total && total != ~0U) { 1249 stats->total_gt_clks += (u32)(total - stats->prev_total); 1250 stats->prev_total = total; 1251 } 1252 } 1253 1254 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1255 { 1256 intel_wakeref_t wakeref; 1257 u32 reg, shift; 1258 1259 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1260 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1261 1262 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1263 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1264 1265 return 3 - shift; 1266 } 1267 1268 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1269 { 1270 struct intel_gt *gt = guc_to_gt(guc); 1271 u32 gt_stamp_lo, gt_stamp_hi; 1272 u64 gpm_ts; 1273 1274 lockdep_assert_held(&guc->timestamp.lock); 1275 1276 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1277 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1278 MISC_STATUS1) >> guc->timestamp.shift; 1279 gt_stamp_lo = lower_32_bits(gpm_ts); 1280 *now = ktime_get(); 1281 1282 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1283 gt_stamp_hi++; 1284 1285 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1286 } 1287 1288 /* 1289 * Unlike the execlist mode of submission total and active times are in terms of 1290 * gt clocks. The *now parameter is retained to return the cpu time at which the 1291 * busyness was sampled. 1292 */ 1293 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1294 { 1295 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1296 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1297 struct intel_gt *gt = engine->gt; 1298 struct intel_guc *guc = >->uc.guc; 1299 u64 total, gt_stamp_saved; 1300 unsigned long flags; 1301 u32 reset_count; 1302 bool in_reset; 1303 1304 spin_lock_irqsave(&guc->timestamp.lock, flags); 1305 1306 /* 1307 * If a reset happened, we risk reading partially updated engine 1308 * busyness from GuC, so we just use the driver stored copy of busyness. 1309 * Synchronize with gt reset using reset_count and the 1310 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1311 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1312 * usable by checking the flag afterwards. 1313 */ 1314 reset_count = i915_reset_count(gpu_error); 1315 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1316 1317 *now = ktime_get(); 1318 1319 /* 1320 * The active busyness depends on start_gt_clk and gt_stamp. 1321 * gt_stamp is updated by i915 only when gt is awake and the 1322 * start_gt_clk is derived from GuC state. To get a consistent 1323 * view of activity, we query the GuC state only if gt is awake. 1324 */ 1325 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1326 stats_saved = *stats; 1327 gt_stamp_saved = guc->timestamp.gt_stamp; 1328 /* 1329 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1330 * start_gt_clk' calculation below for active engines. 1331 */ 1332 guc_update_engine_gt_clks(engine); 1333 guc_update_pm_timestamp(guc, now); 1334 intel_gt_pm_put_async(gt); 1335 if (i915_reset_count(gpu_error) != reset_count) { 1336 *stats = stats_saved; 1337 guc->timestamp.gt_stamp = gt_stamp_saved; 1338 } 1339 } 1340 1341 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1342 if (stats->running) { 1343 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1344 1345 total += intel_gt_clock_interval_to_ns(gt, clk); 1346 } 1347 1348 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1349 1350 return ns_to_ktime(total); 1351 } 1352 1353 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1354 { 1355 struct intel_gt *gt = guc_to_gt(guc); 1356 struct intel_engine_cs *engine; 1357 enum intel_engine_id id; 1358 unsigned long flags; 1359 ktime_t unused; 1360 1361 cancel_delayed_work_sync(&guc->timestamp.work); 1362 1363 spin_lock_irqsave(&guc->timestamp.lock, flags); 1364 1365 guc_update_pm_timestamp(guc, &unused); 1366 for_each_engine(engine, gt, id) { 1367 guc_update_engine_gt_clks(engine); 1368 engine->stats.guc.prev_total = 0; 1369 } 1370 1371 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1372 } 1373 1374 static void __update_guc_busyness_stats(struct intel_guc *guc) 1375 { 1376 struct intel_gt *gt = guc_to_gt(guc); 1377 struct intel_engine_cs *engine; 1378 enum intel_engine_id id; 1379 unsigned long flags; 1380 ktime_t unused; 1381 1382 guc->timestamp.last_stat_jiffies = jiffies; 1383 1384 spin_lock_irqsave(&guc->timestamp.lock, flags); 1385 1386 guc_update_pm_timestamp(guc, &unused); 1387 for_each_engine(engine, gt, id) 1388 guc_update_engine_gt_clks(engine); 1389 1390 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1391 } 1392 1393 static void guc_timestamp_ping(struct work_struct *wrk) 1394 { 1395 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1396 timestamp.work.work); 1397 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1398 struct intel_gt *gt = guc_to_gt(guc); 1399 intel_wakeref_t wakeref; 1400 int srcu, ret; 1401 1402 /* 1403 * Synchronize with gt reset to make sure the worker does not 1404 * corrupt the engine/guc stats. 1405 */ 1406 ret = intel_gt_reset_trylock(gt, &srcu); 1407 if (ret) 1408 return; 1409 1410 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1411 __update_guc_busyness_stats(guc); 1412 1413 intel_gt_reset_unlock(gt, srcu); 1414 1415 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1416 guc->timestamp.ping_delay); 1417 } 1418 1419 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1420 { 1421 u32 offset = intel_guc_engine_usage_offset(guc); 1422 u32 action[] = { 1423 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1424 offset, 1425 0, 1426 }; 1427 1428 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1429 } 1430 1431 static void guc_init_engine_stats(struct intel_guc *guc) 1432 { 1433 struct intel_gt *gt = guc_to_gt(guc); 1434 intel_wakeref_t wakeref; 1435 1436 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1437 guc->timestamp.ping_delay); 1438 1439 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1440 int ret = guc_action_enable_usage_stats(guc); 1441 1442 if (ret) 1443 drm_err(>->i915->drm, 1444 "Failed to enable usage stats: %d!\n", ret); 1445 } 1446 } 1447 1448 void intel_guc_busyness_park(struct intel_gt *gt) 1449 { 1450 struct intel_guc *guc = >->uc.guc; 1451 1452 if (!guc_submission_initialized(guc)) 1453 return; 1454 1455 /* 1456 * There is a race with suspend flow where the worker runs after suspend 1457 * and causes an unclaimed register access warning. Cancel the worker 1458 * synchronously here. 1459 */ 1460 cancel_delayed_work_sync(&guc->timestamp.work); 1461 1462 /* 1463 * Before parking, we should sample engine busyness stats if we need to. 1464 * We can skip it if we are less than half a ping from the last time we 1465 * sampled the busyness stats. 1466 */ 1467 if (guc->timestamp.last_stat_jiffies && 1468 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1469 (guc->timestamp.ping_delay / 2))) 1470 return; 1471 1472 __update_guc_busyness_stats(guc); 1473 } 1474 1475 void intel_guc_busyness_unpark(struct intel_gt *gt) 1476 { 1477 struct intel_guc *guc = >->uc.guc; 1478 unsigned long flags; 1479 ktime_t unused; 1480 1481 if (!guc_submission_initialized(guc)) 1482 return; 1483 1484 spin_lock_irqsave(&guc->timestamp.lock, flags); 1485 guc_update_pm_timestamp(guc, &unused); 1486 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1487 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1488 guc->timestamp.ping_delay); 1489 } 1490 1491 static inline bool 1492 submission_disabled(struct intel_guc *guc) 1493 { 1494 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1495 1496 return unlikely(!sched_engine || 1497 !__tasklet_is_enabled(&sched_engine->tasklet) || 1498 intel_gt_is_wedged(guc_to_gt(guc))); 1499 } 1500 1501 static void disable_submission(struct intel_guc *guc) 1502 { 1503 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1504 1505 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1506 GEM_BUG_ON(!guc->ct.enabled); 1507 __tasklet_disable_sync_once(&sched_engine->tasklet); 1508 sched_engine->tasklet.callback = NULL; 1509 } 1510 } 1511 1512 static void enable_submission(struct intel_guc *guc) 1513 { 1514 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1515 unsigned long flags; 1516 1517 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1518 sched_engine->tasklet.callback = guc_submission_tasklet; 1519 wmb(); /* Make sure callback visible */ 1520 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1521 __tasklet_enable(&sched_engine->tasklet)) { 1522 GEM_BUG_ON(!guc->ct.enabled); 1523 1524 /* And kick in case we missed a new request submission. */ 1525 tasklet_hi_schedule(&sched_engine->tasklet); 1526 } 1527 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1528 } 1529 1530 static void guc_flush_submissions(struct intel_guc *guc) 1531 { 1532 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1533 unsigned long flags; 1534 1535 spin_lock_irqsave(&sched_engine->lock, flags); 1536 spin_unlock_irqrestore(&sched_engine->lock, flags); 1537 } 1538 1539 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1540 1541 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1542 { 1543 if (unlikely(!guc_submission_initialized(guc))) { 1544 /* Reset called during driver load? GuC not yet initialised! */ 1545 return; 1546 } 1547 1548 intel_gt_park_heartbeats(guc_to_gt(guc)); 1549 disable_submission(guc); 1550 guc->interrupts.disable(guc); 1551 __reset_guc_busyness_stats(guc); 1552 1553 /* Flush IRQ handler */ 1554 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1555 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1556 1557 guc_flush_submissions(guc); 1558 guc_flush_destroyed_contexts(guc); 1559 flush_work(&guc->ct.requests.worker); 1560 1561 scrub_guc_desc_for_outstanding_g2h(guc); 1562 } 1563 1564 static struct intel_engine_cs * 1565 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1566 { 1567 struct intel_engine_cs *engine; 1568 intel_engine_mask_t tmp, mask = ve->mask; 1569 unsigned int num_siblings = 0; 1570 1571 for_each_engine_masked(engine, ve->gt, mask, tmp) 1572 if (num_siblings++ == sibling) 1573 return engine; 1574 1575 return NULL; 1576 } 1577 1578 static inline struct intel_engine_cs * 1579 __context_to_physical_engine(struct intel_context *ce) 1580 { 1581 struct intel_engine_cs *engine = ce->engine; 1582 1583 if (intel_engine_is_virtual(engine)) 1584 engine = guc_virtual_get_sibling(engine, 0); 1585 1586 return engine; 1587 } 1588 1589 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1590 { 1591 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1592 1593 if (!intel_context_is_schedulable(ce)) 1594 return; 1595 1596 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1597 1598 /* 1599 * We want a simple context + ring to execute the breadcrumb update. 1600 * We cannot rely on the context being intact across the GPU hang, 1601 * so clear it and rebuild just what we need for the breadcrumb. 1602 * All pending requests for this context will be zapped, and any 1603 * future request will be after userspace has had the opportunity 1604 * to recreate its own state. 1605 */ 1606 if (scrub) 1607 lrc_init_regs(ce, engine, true); 1608 1609 /* Rerun the request; its payload has been neutered (if guilty). */ 1610 lrc_update_regs(ce, engine, head); 1611 } 1612 1613 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1614 { 1615 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) 1616 return; 1617 1618 intel_engine_stop_cs(engine); 1619 1620 /* 1621 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need 1622 * to wait for any pending mi force wakeups 1623 */ 1624 intel_engine_wait_for_pending_mi_fw(engine); 1625 } 1626 1627 static void guc_reset_nop(struct intel_engine_cs *engine) 1628 { 1629 } 1630 1631 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1632 { 1633 } 1634 1635 static void 1636 __unwind_incomplete_requests(struct intel_context *ce) 1637 { 1638 struct i915_request *rq, *rn; 1639 struct list_head *pl; 1640 int prio = I915_PRIORITY_INVALID; 1641 struct i915_sched_engine * const sched_engine = 1642 ce->engine->sched_engine; 1643 unsigned long flags; 1644 1645 spin_lock_irqsave(&sched_engine->lock, flags); 1646 spin_lock(&ce->guc_state.lock); 1647 list_for_each_entry_safe_reverse(rq, rn, 1648 &ce->guc_state.requests, 1649 sched.link) { 1650 if (i915_request_completed(rq)) 1651 continue; 1652 1653 list_del_init(&rq->sched.link); 1654 __i915_request_unsubmit(rq); 1655 1656 /* Push the request back into the queue for later resubmission. */ 1657 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1658 if (rq_prio(rq) != prio) { 1659 prio = rq_prio(rq); 1660 pl = i915_sched_lookup_priolist(sched_engine, prio); 1661 } 1662 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1663 1664 list_add(&rq->sched.link, pl); 1665 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1666 } 1667 spin_unlock(&ce->guc_state.lock); 1668 spin_unlock_irqrestore(&sched_engine->lock, flags); 1669 } 1670 1671 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1672 { 1673 bool guilty; 1674 struct i915_request *rq; 1675 unsigned long flags; 1676 u32 head; 1677 int i, number_children = ce->parallel.number_children; 1678 struct intel_context *parent = ce; 1679 1680 GEM_BUG_ON(intel_context_is_child(ce)); 1681 1682 intel_context_get(ce); 1683 1684 /* 1685 * GuC will implicitly mark the context as non-schedulable when it sends 1686 * the reset notification. Make sure our state reflects this change. The 1687 * context will be marked enabled on resubmission. 1688 */ 1689 spin_lock_irqsave(&ce->guc_state.lock, flags); 1690 clr_context_enabled(ce); 1691 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1692 1693 /* 1694 * For each context in the relationship find the hanging request 1695 * resetting each context / request as needed 1696 */ 1697 for (i = 0; i < number_children + 1; ++i) { 1698 if (!intel_context_is_pinned(ce)) 1699 goto next_context; 1700 1701 guilty = false; 1702 rq = intel_context_find_active_request(ce); 1703 if (!rq) { 1704 head = ce->ring->tail; 1705 goto out_replay; 1706 } 1707 1708 if (i915_request_started(rq)) 1709 guilty = stalled & ce->engine->mask; 1710 1711 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1712 head = intel_ring_wrap(ce->ring, rq->head); 1713 1714 __i915_request_reset(rq, guilty); 1715 out_replay: 1716 guc_reset_state(ce, head, guilty); 1717 next_context: 1718 if (i != number_children) 1719 ce = list_next_entry(ce, parallel.child_link); 1720 } 1721 1722 __unwind_incomplete_requests(parent); 1723 intel_context_put(parent); 1724 } 1725 1726 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1727 { 1728 struct intel_context *ce; 1729 unsigned long index; 1730 unsigned long flags; 1731 1732 if (unlikely(!guc_submission_initialized(guc))) { 1733 /* Reset called during driver load? GuC not yet initialised! */ 1734 return; 1735 } 1736 1737 xa_lock_irqsave(&guc->context_lookup, flags); 1738 xa_for_each(&guc->context_lookup, index, ce) { 1739 if (!kref_get_unless_zero(&ce->ref)) 1740 continue; 1741 1742 xa_unlock(&guc->context_lookup); 1743 1744 if (intel_context_is_pinned(ce) && 1745 !intel_context_is_child(ce)) 1746 __guc_reset_context(ce, stalled); 1747 1748 intel_context_put(ce); 1749 1750 xa_lock(&guc->context_lookup); 1751 } 1752 xa_unlock_irqrestore(&guc->context_lookup, flags); 1753 1754 /* GuC is blown away, drop all references to contexts */ 1755 xa_destroy(&guc->context_lookup); 1756 } 1757 1758 static void guc_cancel_context_requests(struct intel_context *ce) 1759 { 1760 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1761 struct i915_request *rq; 1762 unsigned long flags; 1763 1764 /* Mark all executing requests as skipped. */ 1765 spin_lock_irqsave(&sched_engine->lock, flags); 1766 spin_lock(&ce->guc_state.lock); 1767 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1768 i915_request_put(i915_request_mark_eio(rq)); 1769 spin_unlock(&ce->guc_state.lock); 1770 spin_unlock_irqrestore(&sched_engine->lock, flags); 1771 } 1772 1773 static void 1774 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1775 { 1776 struct i915_request *rq, *rn; 1777 struct rb_node *rb; 1778 unsigned long flags; 1779 1780 /* Can be called during boot if GuC fails to load */ 1781 if (!sched_engine) 1782 return; 1783 1784 /* 1785 * Before we call engine->cancel_requests(), we should have exclusive 1786 * access to the submission state. This is arranged for us by the 1787 * caller disabling the interrupt generation, the tasklet and other 1788 * threads that may then access the same state, giving us a free hand 1789 * to reset state. However, we still need to let lockdep be aware that 1790 * we know this state may be accessed in hardirq context, so we 1791 * disable the irq around this manipulation and we want to keep 1792 * the spinlock focused on its duties and not accidentally conflate 1793 * coverage to the submission's irq state. (Similarly, although we 1794 * shouldn't need to disable irq around the manipulation of the 1795 * submission's irq state, we also wish to remind ourselves that 1796 * it is irq state.) 1797 */ 1798 spin_lock_irqsave(&sched_engine->lock, flags); 1799 1800 /* Flush the queued requests to the timeline list (for retiring). */ 1801 while ((rb = rb_first_cached(&sched_engine->queue))) { 1802 struct i915_priolist *p = to_priolist(rb); 1803 1804 priolist_for_each_request_consume(rq, rn, p) { 1805 list_del_init(&rq->sched.link); 1806 1807 __i915_request_submit(rq); 1808 1809 i915_request_put(i915_request_mark_eio(rq)); 1810 } 1811 1812 rb_erase_cached(&p->node, &sched_engine->queue); 1813 i915_priolist_free(p); 1814 } 1815 1816 /* Remaining _unready_ requests will be nop'ed when submitted */ 1817 1818 sched_engine->queue_priority_hint = INT_MIN; 1819 sched_engine->queue = RB_ROOT_CACHED; 1820 1821 spin_unlock_irqrestore(&sched_engine->lock, flags); 1822 } 1823 1824 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1825 { 1826 struct intel_context *ce; 1827 unsigned long index; 1828 unsigned long flags; 1829 1830 xa_lock_irqsave(&guc->context_lookup, flags); 1831 xa_for_each(&guc->context_lookup, index, ce) { 1832 if (!kref_get_unless_zero(&ce->ref)) 1833 continue; 1834 1835 xa_unlock(&guc->context_lookup); 1836 1837 if (intel_context_is_pinned(ce) && 1838 !intel_context_is_child(ce)) 1839 guc_cancel_context_requests(ce); 1840 1841 intel_context_put(ce); 1842 1843 xa_lock(&guc->context_lookup); 1844 } 1845 xa_unlock_irqrestore(&guc->context_lookup, flags); 1846 1847 guc_cancel_sched_engine_requests(guc->sched_engine); 1848 1849 /* GuC is blown away, drop all references to contexts */ 1850 xa_destroy(&guc->context_lookup); 1851 } 1852 1853 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1854 { 1855 /* Reset called during driver load or during wedge? */ 1856 if (unlikely(!guc_submission_initialized(guc) || 1857 intel_gt_is_wedged(guc_to_gt(guc)))) { 1858 return; 1859 } 1860 1861 /* 1862 * Technically possible for either of these values to be non-zero here, 1863 * but very unlikely + harmless. Regardless let's add a warn so we can 1864 * see in CI if this happens frequently / a precursor to taking down the 1865 * machine. 1866 */ 1867 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1868 atomic_set(&guc->outstanding_submission_g2h, 0); 1869 1870 intel_guc_global_policies_update(guc); 1871 enable_submission(guc); 1872 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1873 } 1874 1875 static void destroyed_worker_func(struct work_struct *w); 1876 static void reset_fail_worker_func(struct work_struct *w); 1877 1878 /* 1879 * Set up the memory resources to be shared with the GuC (via the GGTT) 1880 * at firmware loading time. 1881 */ 1882 int intel_guc_submission_init(struct intel_guc *guc) 1883 { 1884 struct intel_gt *gt = guc_to_gt(guc); 1885 int ret; 1886 1887 if (guc->submission_initialized) 1888 return 0; 1889 1890 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) { 1891 ret = guc_lrc_desc_pool_create_v69(guc); 1892 if (ret) 1893 return ret; 1894 } 1895 1896 guc->submission_state.guc_ids_bitmap = 1897 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1898 if (!guc->submission_state.guc_ids_bitmap) { 1899 ret = -ENOMEM; 1900 goto destroy_pool; 1901 } 1902 1903 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1904 guc->timestamp.shift = gpm_timestamp_shift(gt); 1905 guc->submission_initialized = true; 1906 1907 return 0; 1908 1909 destroy_pool: 1910 guc_lrc_desc_pool_destroy_v69(guc); 1911 1912 return ret; 1913 } 1914 1915 void intel_guc_submission_fini(struct intel_guc *guc) 1916 { 1917 if (!guc->submission_initialized) 1918 return; 1919 1920 guc_flush_destroyed_contexts(guc); 1921 guc_lrc_desc_pool_destroy_v69(guc); 1922 i915_sched_engine_put(guc->sched_engine); 1923 bitmap_free(guc->submission_state.guc_ids_bitmap); 1924 guc->submission_initialized = false; 1925 } 1926 1927 static inline void queue_request(struct i915_sched_engine *sched_engine, 1928 struct i915_request *rq, 1929 int prio) 1930 { 1931 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1932 list_add_tail(&rq->sched.link, 1933 i915_sched_lookup_priolist(sched_engine, prio)); 1934 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1935 tasklet_hi_schedule(&sched_engine->tasklet); 1936 } 1937 1938 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1939 struct i915_request *rq) 1940 { 1941 int ret = 0; 1942 1943 __i915_request_submit(rq); 1944 1945 trace_i915_request_in(rq, 0); 1946 1947 if (is_multi_lrc_rq(rq)) { 1948 if (multi_lrc_submit(rq)) { 1949 ret = guc_wq_item_append(guc, rq); 1950 if (!ret) 1951 ret = guc_add_request(guc, rq); 1952 } 1953 } else { 1954 guc_set_lrc_tail(rq); 1955 ret = guc_add_request(guc, rq); 1956 } 1957 1958 if (unlikely(ret == -EPIPE)) 1959 disable_submission(guc); 1960 1961 return ret; 1962 } 1963 1964 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1965 { 1966 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1967 struct intel_context *ce = request_to_scheduling_context(rq); 1968 1969 return submission_disabled(guc) || guc->stalled_request || 1970 !i915_sched_engine_is_empty(sched_engine) || 1971 !ctx_id_mapped(guc, ce->guc_id.id); 1972 } 1973 1974 static void guc_submit_request(struct i915_request *rq) 1975 { 1976 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1977 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1978 unsigned long flags; 1979 1980 /* Will be called from irq-context when using foreign fences. */ 1981 spin_lock_irqsave(&sched_engine->lock, flags); 1982 1983 if (need_tasklet(guc, rq)) 1984 queue_request(sched_engine, rq, rq_prio(rq)); 1985 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1986 tasklet_hi_schedule(&sched_engine->tasklet); 1987 1988 spin_unlock_irqrestore(&sched_engine->lock, flags); 1989 } 1990 1991 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1992 { 1993 int ret; 1994 1995 GEM_BUG_ON(intel_context_is_child(ce)); 1996 1997 if (intel_context_is_parent(ce)) 1998 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1999 NUMBER_MULTI_LRC_GUC_ID(guc), 2000 order_base_2(ce->parallel.number_children 2001 + 1)); 2002 else 2003 ret = ida_simple_get(&guc->submission_state.guc_ids, 2004 NUMBER_MULTI_LRC_GUC_ID(guc), 2005 guc->submission_state.num_guc_ids, 2006 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2007 __GFP_NOWARN); 2008 if (unlikely(ret < 0)) 2009 return ret; 2010 2011 if (!intel_context_is_parent(ce)) 2012 ++guc->submission_state.guc_ids_in_use; 2013 2014 ce->guc_id.id = ret; 2015 return 0; 2016 } 2017 2018 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2019 { 2020 GEM_BUG_ON(intel_context_is_child(ce)); 2021 2022 if (!context_guc_id_invalid(ce)) { 2023 if (intel_context_is_parent(ce)) { 2024 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2025 ce->guc_id.id, 2026 order_base_2(ce->parallel.number_children 2027 + 1)); 2028 } else { 2029 --guc->submission_state.guc_ids_in_use; 2030 ida_simple_remove(&guc->submission_state.guc_ids, 2031 ce->guc_id.id); 2032 } 2033 clr_ctx_id_mapping(guc, ce->guc_id.id); 2034 set_context_guc_id_invalid(ce); 2035 } 2036 if (!list_empty(&ce->guc_id.link)) 2037 list_del_init(&ce->guc_id.link); 2038 } 2039 2040 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2041 { 2042 unsigned long flags; 2043 2044 spin_lock_irqsave(&guc->submission_state.lock, flags); 2045 __release_guc_id(guc, ce); 2046 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2047 } 2048 2049 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2050 { 2051 struct intel_context *cn; 2052 2053 lockdep_assert_held(&guc->submission_state.lock); 2054 GEM_BUG_ON(intel_context_is_child(ce)); 2055 GEM_BUG_ON(intel_context_is_parent(ce)); 2056 2057 if (!list_empty(&guc->submission_state.guc_id_list)) { 2058 cn = list_first_entry(&guc->submission_state.guc_id_list, 2059 struct intel_context, 2060 guc_id.link); 2061 2062 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2063 GEM_BUG_ON(context_guc_id_invalid(cn)); 2064 GEM_BUG_ON(intel_context_is_child(cn)); 2065 GEM_BUG_ON(intel_context_is_parent(cn)); 2066 2067 list_del_init(&cn->guc_id.link); 2068 ce->guc_id.id = cn->guc_id.id; 2069 2070 spin_lock(&cn->guc_state.lock); 2071 clr_context_registered(cn); 2072 spin_unlock(&cn->guc_state.lock); 2073 2074 set_context_guc_id_invalid(cn); 2075 2076 #ifdef CONFIG_DRM_I915_SELFTEST 2077 guc->number_guc_id_stolen++; 2078 #endif 2079 2080 return 0; 2081 } else { 2082 return -EAGAIN; 2083 } 2084 } 2085 2086 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2087 { 2088 int ret; 2089 2090 lockdep_assert_held(&guc->submission_state.lock); 2091 GEM_BUG_ON(intel_context_is_child(ce)); 2092 2093 ret = new_guc_id(guc, ce); 2094 if (unlikely(ret < 0)) { 2095 if (intel_context_is_parent(ce)) 2096 return -ENOSPC; 2097 2098 ret = steal_guc_id(guc, ce); 2099 if (ret < 0) 2100 return ret; 2101 } 2102 2103 if (intel_context_is_parent(ce)) { 2104 struct intel_context *child; 2105 int i = 1; 2106 2107 for_each_child(ce, child) 2108 child->guc_id.id = ce->guc_id.id + i++; 2109 } 2110 2111 return 0; 2112 } 2113 2114 #define PIN_GUC_ID_TRIES 4 2115 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2116 { 2117 int ret = 0; 2118 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2119 2120 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2121 2122 try_again: 2123 spin_lock_irqsave(&guc->submission_state.lock, flags); 2124 2125 might_lock(&ce->guc_state.lock); 2126 2127 if (context_guc_id_invalid(ce)) { 2128 ret = assign_guc_id(guc, ce); 2129 if (ret) 2130 goto out_unlock; 2131 ret = 1; /* Indidcates newly assigned guc_id */ 2132 } 2133 if (!list_empty(&ce->guc_id.link)) 2134 list_del_init(&ce->guc_id.link); 2135 atomic_inc(&ce->guc_id.ref); 2136 2137 out_unlock: 2138 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2139 2140 /* 2141 * -EAGAIN indicates no guc_id are available, let's retire any 2142 * outstanding requests to see if that frees up a guc_id. If the first 2143 * retire didn't help, insert a sleep with the timeslice duration before 2144 * attempting to retire more requests. Double the sleep period each 2145 * subsequent pass before finally giving up. The sleep period has max of 2146 * 100ms and minimum of 1ms. 2147 */ 2148 if (ret == -EAGAIN && --tries) { 2149 if (PIN_GUC_ID_TRIES - tries > 1) { 2150 unsigned int timeslice_shifted = 2151 ce->engine->props.timeslice_duration_ms << 2152 (PIN_GUC_ID_TRIES - tries - 2); 2153 unsigned int max = min_t(unsigned int, 100, 2154 timeslice_shifted); 2155 2156 msleep(max_t(unsigned int, max, 1)); 2157 } 2158 intel_gt_retire_requests(guc_to_gt(guc)); 2159 goto try_again; 2160 } 2161 2162 return ret; 2163 } 2164 2165 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2166 { 2167 unsigned long flags; 2168 2169 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2170 GEM_BUG_ON(intel_context_is_child(ce)); 2171 2172 if (unlikely(context_guc_id_invalid(ce) || 2173 intel_context_is_parent(ce))) 2174 return; 2175 2176 spin_lock_irqsave(&guc->submission_state.lock, flags); 2177 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2178 !atomic_read(&ce->guc_id.ref)) 2179 list_add_tail(&ce->guc_id.link, 2180 &guc->submission_state.guc_id_list); 2181 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2182 } 2183 2184 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2185 struct intel_context *ce, 2186 u32 guc_id, 2187 u32 offset, 2188 bool loop) 2189 { 2190 struct intel_context *child; 2191 u32 action[4 + MAX_ENGINE_INSTANCE]; 2192 int len = 0; 2193 2194 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2195 2196 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2197 action[len++] = guc_id; 2198 action[len++] = ce->parallel.number_children + 1; 2199 action[len++] = offset; 2200 for_each_child(ce, child) { 2201 offset += sizeof(struct guc_lrc_desc_v69); 2202 action[len++] = offset; 2203 } 2204 2205 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2206 } 2207 2208 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2209 struct intel_context *ce, 2210 struct guc_ctxt_registration_info *info, 2211 bool loop) 2212 { 2213 struct intel_context *child; 2214 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2215 int len = 0; 2216 u32 next_id; 2217 2218 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2219 2220 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2221 action[len++] = info->flags; 2222 action[len++] = info->context_idx; 2223 action[len++] = info->engine_class; 2224 action[len++] = info->engine_submit_mask; 2225 action[len++] = info->wq_desc_lo; 2226 action[len++] = info->wq_desc_hi; 2227 action[len++] = info->wq_base_lo; 2228 action[len++] = info->wq_base_hi; 2229 action[len++] = info->wq_size; 2230 action[len++] = ce->parallel.number_children + 1; 2231 action[len++] = info->hwlrca_lo; 2232 action[len++] = info->hwlrca_hi; 2233 2234 next_id = info->context_idx + 1; 2235 for_each_child(ce, child) { 2236 GEM_BUG_ON(next_id++ != child->guc_id.id); 2237 2238 /* 2239 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2240 * only supports 32 bit currently. 2241 */ 2242 action[len++] = lower_32_bits(child->lrc.lrca); 2243 action[len++] = upper_32_bits(child->lrc.lrca); 2244 } 2245 2246 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2247 2248 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2249 } 2250 2251 static int __guc_action_register_context_v69(struct intel_guc *guc, 2252 u32 guc_id, 2253 u32 offset, 2254 bool loop) 2255 { 2256 u32 action[] = { 2257 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2258 guc_id, 2259 offset, 2260 }; 2261 2262 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2263 0, loop); 2264 } 2265 2266 static int __guc_action_register_context_v70(struct intel_guc *guc, 2267 struct guc_ctxt_registration_info *info, 2268 bool loop) 2269 { 2270 u32 action[] = { 2271 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2272 info->flags, 2273 info->context_idx, 2274 info->engine_class, 2275 info->engine_submit_mask, 2276 info->wq_desc_lo, 2277 info->wq_desc_hi, 2278 info->wq_base_lo, 2279 info->wq_base_hi, 2280 info->wq_size, 2281 info->hwlrca_lo, 2282 info->hwlrca_hi, 2283 }; 2284 2285 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2286 0, loop); 2287 } 2288 2289 static void prepare_context_registration_info_v69(struct intel_context *ce); 2290 static void prepare_context_registration_info_v70(struct intel_context *ce, 2291 struct guc_ctxt_registration_info *info); 2292 2293 static int 2294 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2295 { 2296 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2297 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2298 2299 prepare_context_registration_info_v69(ce); 2300 2301 if (intel_context_is_parent(ce)) 2302 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2303 offset, loop); 2304 else 2305 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2306 offset, loop); 2307 } 2308 2309 static int 2310 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2311 { 2312 struct guc_ctxt_registration_info info; 2313 2314 prepare_context_registration_info_v70(ce, &info); 2315 2316 if (intel_context_is_parent(ce)) 2317 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2318 else 2319 return __guc_action_register_context_v70(guc, &info, loop); 2320 } 2321 2322 static int register_context(struct intel_context *ce, bool loop) 2323 { 2324 struct intel_guc *guc = ce_to_guc(ce); 2325 int ret; 2326 2327 GEM_BUG_ON(intel_context_is_child(ce)); 2328 trace_intel_context_register(ce); 2329 2330 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) 2331 ret = register_context_v70(guc, ce, loop); 2332 else 2333 ret = register_context_v69(guc, ce, loop); 2334 2335 if (likely(!ret)) { 2336 unsigned long flags; 2337 2338 spin_lock_irqsave(&ce->guc_state.lock, flags); 2339 set_context_registered(ce); 2340 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2341 2342 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) 2343 guc_context_policy_init_v70(ce, loop); 2344 } 2345 2346 return ret; 2347 } 2348 2349 static int __guc_action_deregister_context(struct intel_guc *guc, 2350 u32 guc_id) 2351 { 2352 u32 action[] = { 2353 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2354 guc_id, 2355 }; 2356 2357 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2358 G2H_LEN_DW_DEREGISTER_CONTEXT, 2359 true); 2360 } 2361 2362 static int deregister_context(struct intel_context *ce, u32 guc_id) 2363 { 2364 struct intel_guc *guc = ce_to_guc(ce); 2365 2366 GEM_BUG_ON(intel_context_is_child(ce)); 2367 trace_intel_context_deregister(ce); 2368 2369 return __guc_action_deregister_context(guc, guc_id); 2370 } 2371 2372 static inline void clear_children_join_go_memory(struct intel_context *ce) 2373 { 2374 struct parent_scratch *ps = __get_parent_scratch(ce); 2375 int i; 2376 2377 ps->go.semaphore = 0; 2378 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2379 ps->join[i].semaphore = 0; 2380 } 2381 2382 static inline u32 get_children_go_value(struct intel_context *ce) 2383 { 2384 return __get_parent_scratch(ce)->go.semaphore; 2385 } 2386 2387 static inline u32 get_children_join_value(struct intel_context *ce, 2388 u8 child_index) 2389 { 2390 return __get_parent_scratch(ce)->join[child_index].semaphore; 2391 } 2392 2393 struct context_policy { 2394 u32 count; 2395 struct guc_update_context_policy h2g; 2396 }; 2397 2398 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2399 { 2400 size_t bytes = sizeof(policy->h2g.header) + 2401 (sizeof(policy->h2g.klv[0]) * policy->count); 2402 2403 return bytes / sizeof(u32); 2404 } 2405 2406 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2407 { 2408 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2409 policy->h2g.header.ctx_id = guc_id; 2410 policy->count = 0; 2411 } 2412 2413 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2414 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2415 { \ 2416 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2417 policy->h2g.klv[policy->count].kl = \ 2418 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2419 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2420 policy->h2g.klv[policy->count].value = data; \ 2421 policy->count++; \ 2422 } 2423 2424 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2425 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2426 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2427 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2428 2429 #undef MAKE_CONTEXT_POLICY_ADD 2430 2431 static int __guc_context_set_context_policies(struct intel_guc *guc, 2432 struct context_policy *policy, 2433 bool loop) 2434 { 2435 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2436 __guc_context_policy_action_size(policy), 2437 0, loop); 2438 } 2439 2440 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2441 { 2442 struct intel_engine_cs *engine = ce->engine; 2443 struct intel_guc *guc = &engine->gt->uc.guc; 2444 struct context_policy policy; 2445 u32 execution_quantum; 2446 u32 preemption_timeout; 2447 unsigned long flags; 2448 int ret; 2449 2450 /* NB: For both of these, zero means disabled. */ 2451 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2452 execution_quantum)); 2453 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2454 preemption_timeout)); 2455 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2456 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2457 2458 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2459 2460 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2461 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2462 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2463 2464 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2465 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2466 2467 ret = __guc_context_set_context_policies(guc, &policy, loop); 2468 2469 spin_lock_irqsave(&ce->guc_state.lock, flags); 2470 if (ret != 0) 2471 set_context_policy_required(ce); 2472 else 2473 clr_context_policy_required(ce); 2474 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2475 2476 return ret; 2477 } 2478 2479 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2480 struct guc_lrc_desc_v69 *desc) 2481 { 2482 desc->policy_flags = 0; 2483 2484 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2485 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2486 2487 /* NB: For both of these, zero means disabled. */ 2488 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2489 desc->execution_quantum)); 2490 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2491 desc->preemption_timeout)); 2492 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2493 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2494 } 2495 2496 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2497 { 2498 /* 2499 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2500 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2501 */ 2502 switch (prio) { 2503 default: 2504 MISSING_CASE(prio); 2505 fallthrough; 2506 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2507 return GEN12_CTX_PRIORITY_NORMAL; 2508 case GUC_CLIENT_PRIORITY_NORMAL: 2509 return GEN12_CTX_PRIORITY_LOW; 2510 case GUC_CLIENT_PRIORITY_HIGH: 2511 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2512 return GEN12_CTX_PRIORITY_HIGH; 2513 } 2514 } 2515 2516 static void prepare_context_registration_info_v69(struct intel_context *ce) 2517 { 2518 struct intel_engine_cs *engine = ce->engine; 2519 struct intel_guc *guc = &engine->gt->uc.guc; 2520 u32 ctx_id = ce->guc_id.id; 2521 struct guc_lrc_desc_v69 *desc; 2522 struct intel_context *child; 2523 2524 GEM_BUG_ON(!engine->mask); 2525 2526 /* 2527 * Ensure LRC + CT vmas are is same region as write barrier is done 2528 * based on CT vma region. 2529 */ 2530 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2531 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2532 2533 desc = __get_lrc_desc_v69(guc, ctx_id); 2534 desc->engine_class = engine_class_to_guc_class(engine->class); 2535 desc->engine_submit_mask = engine->logical_mask; 2536 desc->hw_context_desc = ce->lrc.lrca; 2537 desc->priority = ce->guc_state.prio; 2538 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2539 guc_context_policy_init_v69(engine, desc); 2540 2541 /* 2542 * If context is a parent, we need to register a process descriptor 2543 * describing a work queue and register all child contexts. 2544 */ 2545 if (intel_context_is_parent(ce)) { 2546 struct guc_process_desc_v69 *pdesc; 2547 2548 ce->parallel.guc.wqi_tail = 0; 2549 ce->parallel.guc.wqi_head = 0; 2550 2551 desc->process_desc = i915_ggtt_offset(ce->state) + 2552 __get_parent_scratch_offset(ce); 2553 desc->wq_addr = i915_ggtt_offset(ce->state) + 2554 __get_wq_offset(ce); 2555 desc->wq_size = WQ_SIZE; 2556 2557 pdesc = __get_process_desc_v69(ce); 2558 memset(pdesc, 0, sizeof(*(pdesc))); 2559 pdesc->stage_id = ce->guc_id.id; 2560 pdesc->wq_base_addr = desc->wq_addr; 2561 pdesc->wq_size_bytes = desc->wq_size; 2562 pdesc->wq_status = WQ_STATUS_ACTIVE; 2563 2564 ce->parallel.guc.wq_head = &pdesc->head; 2565 ce->parallel.guc.wq_tail = &pdesc->tail; 2566 ce->parallel.guc.wq_status = &pdesc->wq_status; 2567 2568 for_each_child(ce, child) { 2569 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2570 2571 desc->engine_class = 2572 engine_class_to_guc_class(engine->class); 2573 desc->hw_context_desc = child->lrc.lrca; 2574 desc->priority = ce->guc_state.prio; 2575 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2576 guc_context_policy_init_v69(engine, desc); 2577 } 2578 2579 clear_children_join_go_memory(ce); 2580 } 2581 } 2582 2583 static void prepare_context_registration_info_v70(struct intel_context *ce, 2584 struct guc_ctxt_registration_info *info) 2585 { 2586 struct intel_engine_cs *engine = ce->engine; 2587 struct intel_guc *guc = &engine->gt->uc.guc; 2588 u32 ctx_id = ce->guc_id.id; 2589 2590 GEM_BUG_ON(!engine->mask); 2591 2592 /* 2593 * Ensure LRC + CT vmas are is same region as write barrier is done 2594 * based on CT vma region. 2595 */ 2596 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2597 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2598 2599 memset(info, 0, sizeof(*info)); 2600 info->context_idx = ctx_id; 2601 info->engine_class = engine_class_to_guc_class(engine->class); 2602 info->engine_submit_mask = engine->logical_mask; 2603 /* 2604 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2605 * only supports 32 bit currently. 2606 */ 2607 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2608 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2609 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2610 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2611 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2612 2613 /* 2614 * If context is a parent, we need to register a process descriptor 2615 * describing a work queue and register all child contexts. 2616 */ 2617 if (intel_context_is_parent(ce)) { 2618 struct guc_sched_wq_desc *wq_desc; 2619 u64 wq_desc_offset, wq_base_offset; 2620 2621 ce->parallel.guc.wqi_tail = 0; 2622 ce->parallel.guc.wqi_head = 0; 2623 2624 wq_desc_offset = i915_ggtt_offset(ce->state) + 2625 __get_parent_scratch_offset(ce); 2626 wq_base_offset = i915_ggtt_offset(ce->state) + 2627 __get_wq_offset(ce); 2628 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2629 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2630 info->wq_base_lo = lower_32_bits(wq_base_offset); 2631 info->wq_base_hi = upper_32_bits(wq_base_offset); 2632 info->wq_size = WQ_SIZE; 2633 2634 wq_desc = __get_wq_desc_v70(ce); 2635 memset(wq_desc, 0, sizeof(*wq_desc)); 2636 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2637 2638 ce->parallel.guc.wq_head = &wq_desc->head; 2639 ce->parallel.guc.wq_tail = &wq_desc->tail; 2640 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2641 2642 clear_children_join_go_memory(ce); 2643 } 2644 } 2645 2646 static int try_context_registration(struct intel_context *ce, bool loop) 2647 { 2648 struct intel_engine_cs *engine = ce->engine; 2649 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2650 struct intel_guc *guc = &engine->gt->uc.guc; 2651 intel_wakeref_t wakeref; 2652 u32 ctx_id = ce->guc_id.id; 2653 bool context_registered; 2654 int ret = 0; 2655 2656 GEM_BUG_ON(!sched_state_is_init(ce)); 2657 2658 context_registered = ctx_id_mapped(guc, ctx_id); 2659 2660 clr_ctx_id_mapping(guc, ctx_id); 2661 set_ctx_id_mapping(guc, ctx_id, ce); 2662 2663 /* 2664 * The context_lookup xarray is used to determine if the hardware 2665 * context is currently registered. There are two cases in which it 2666 * could be registered either the guc_id has been stolen from another 2667 * context or the lrc descriptor address of this context has changed. In 2668 * either case the context needs to be deregistered with the GuC before 2669 * registering this context. 2670 */ 2671 if (context_registered) { 2672 bool disabled; 2673 unsigned long flags; 2674 2675 trace_intel_context_steal_guc_id(ce); 2676 GEM_BUG_ON(!loop); 2677 2678 /* Seal race with Reset */ 2679 spin_lock_irqsave(&ce->guc_state.lock, flags); 2680 disabled = submission_disabled(guc); 2681 if (likely(!disabled)) { 2682 set_context_wait_for_deregister_to_register(ce); 2683 intel_context_get(ce); 2684 } 2685 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2686 if (unlikely(disabled)) { 2687 clr_ctx_id_mapping(guc, ctx_id); 2688 return 0; /* Will get registered later */ 2689 } 2690 2691 /* 2692 * If stealing the guc_id, this ce has the same guc_id as the 2693 * context whose guc_id was stolen. 2694 */ 2695 with_intel_runtime_pm(runtime_pm, wakeref) 2696 ret = deregister_context(ce, ce->guc_id.id); 2697 if (unlikely(ret == -ENODEV)) 2698 ret = 0; /* Will get registered later */ 2699 } else { 2700 with_intel_runtime_pm(runtime_pm, wakeref) 2701 ret = register_context(ce, loop); 2702 if (unlikely(ret == -EBUSY)) { 2703 clr_ctx_id_mapping(guc, ctx_id); 2704 } else if (unlikely(ret == -ENODEV)) { 2705 clr_ctx_id_mapping(guc, ctx_id); 2706 ret = 0; /* Will get registered later */ 2707 } 2708 } 2709 2710 return ret; 2711 } 2712 2713 static int __guc_context_pre_pin(struct intel_context *ce, 2714 struct intel_engine_cs *engine, 2715 struct i915_gem_ww_ctx *ww, 2716 void **vaddr) 2717 { 2718 return lrc_pre_pin(ce, engine, ww, vaddr); 2719 } 2720 2721 static int __guc_context_pin(struct intel_context *ce, 2722 struct intel_engine_cs *engine, 2723 void *vaddr) 2724 { 2725 if (i915_ggtt_offset(ce->state) != 2726 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2727 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2728 2729 /* 2730 * GuC context gets pinned in guc_request_alloc. See that function for 2731 * explaination of why. 2732 */ 2733 2734 return lrc_pin(ce, engine, vaddr); 2735 } 2736 2737 static int guc_context_pre_pin(struct intel_context *ce, 2738 struct i915_gem_ww_ctx *ww, 2739 void **vaddr) 2740 { 2741 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2742 } 2743 2744 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2745 { 2746 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2747 2748 if (likely(!ret && !intel_context_is_barrier(ce))) 2749 intel_engine_pm_get(ce->engine); 2750 2751 return ret; 2752 } 2753 2754 static void guc_context_unpin(struct intel_context *ce) 2755 { 2756 struct intel_guc *guc = ce_to_guc(ce); 2757 2758 unpin_guc_id(guc, ce); 2759 lrc_unpin(ce); 2760 2761 if (likely(!intel_context_is_barrier(ce))) 2762 intel_engine_pm_put_async(ce->engine); 2763 } 2764 2765 static void guc_context_post_unpin(struct intel_context *ce) 2766 { 2767 lrc_post_unpin(ce); 2768 } 2769 2770 static void __guc_context_sched_enable(struct intel_guc *guc, 2771 struct intel_context *ce) 2772 { 2773 u32 action[] = { 2774 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2775 ce->guc_id.id, 2776 GUC_CONTEXT_ENABLE 2777 }; 2778 2779 trace_intel_context_sched_enable(ce); 2780 2781 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2782 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2783 } 2784 2785 static void __guc_context_sched_disable(struct intel_guc *guc, 2786 struct intel_context *ce, 2787 u16 guc_id) 2788 { 2789 u32 action[] = { 2790 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2791 guc_id, /* ce->guc_id.id not stable */ 2792 GUC_CONTEXT_DISABLE 2793 }; 2794 2795 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2796 2797 GEM_BUG_ON(intel_context_is_child(ce)); 2798 trace_intel_context_sched_disable(ce); 2799 2800 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2801 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2802 } 2803 2804 static void guc_blocked_fence_complete(struct intel_context *ce) 2805 { 2806 lockdep_assert_held(&ce->guc_state.lock); 2807 2808 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2809 i915_sw_fence_complete(&ce->guc_state.blocked); 2810 } 2811 2812 static void guc_blocked_fence_reinit(struct intel_context *ce) 2813 { 2814 lockdep_assert_held(&ce->guc_state.lock); 2815 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2816 2817 /* 2818 * This fence is always complete unless a pending schedule disable is 2819 * outstanding. We arm the fence here and complete it when we receive 2820 * the pending schedule disable complete message. 2821 */ 2822 i915_sw_fence_fini(&ce->guc_state.blocked); 2823 i915_sw_fence_reinit(&ce->guc_state.blocked); 2824 i915_sw_fence_await(&ce->guc_state.blocked); 2825 i915_sw_fence_commit(&ce->guc_state.blocked); 2826 } 2827 2828 static u16 prep_context_pending_disable(struct intel_context *ce) 2829 { 2830 lockdep_assert_held(&ce->guc_state.lock); 2831 2832 set_context_pending_disable(ce); 2833 clr_context_enabled(ce); 2834 guc_blocked_fence_reinit(ce); 2835 intel_context_get(ce); 2836 2837 return ce->guc_id.id; 2838 } 2839 2840 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2841 { 2842 struct intel_guc *guc = ce_to_guc(ce); 2843 unsigned long flags; 2844 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2845 intel_wakeref_t wakeref; 2846 u16 guc_id; 2847 bool enabled; 2848 2849 GEM_BUG_ON(intel_context_is_child(ce)); 2850 2851 spin_lock_irqsave(&ce->guc_state.lock, flags); 2852 2853 incr_context_blocked(ce); 2854 2855 enabled = context_enabled(ce); 2856 if (unlikely(!enabled || submission_disabled(guc))) { 2857 if (enabled) 2858 clr_context_enabled(ce); 2859 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2860 return &ce->guc_state.blocked; 2861 } 2862 2863 /* 2864 * We add +2 here as the schedule disable complete CTB handler calls 2865 * intel_context_sched_disable_unpin (-2 to pin_count). 2866 */ 2867 atomic_add(2, &ce->pin_count); 2868 2869 guc_id = prep_context_pending_disable(ce); 2870 2871 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2872 2873 with_intel_runtime_pm(runtime_pm, wakeref) 2874 __guc_context_sched_disable(guc, ce, guc_id); 2875 2876 return &ce->guc_state.blocked; 2877 } 2878 2879 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2880 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2881 #define SCHED_STATE_NO_UNBLOCK \ 2882 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2883 SCHED_STATE_PENDING_DISABLE | \ 2884 SCHED_STATE_BANNED) 2885 2886 static bool context_cant_unblock(struct intel_context *ce) 2887 { 2888 lockdep_assert_held(&ce->guc_state.lock); 2889 2890 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2891 context_guc_id_invalid(ce) || 2892 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2893 !intel_context_is_pinned(ce); 2894 } 2895 2896 static void guc_context_unblock(struct intel_context *ce) 2897 { 2898 struct intel_guc *guc = ce_to_guc(ce); 2899 unsigned long flags; 2900 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2901 intel_wakeref_t wakeref; 2902 bool enable; 2903 2904 GEM_BUG_ON(context_enabled(ce)); 2905 GEM_BUG_ON(intel_context_is_child(ce)); 2906 2907 spin_lock_irqsave(&ce->guc_state.lock, flags); 2908 2909 if (unlikely(submission_disabled(guc) || 2910 context_cant_unblock(ce))) { 2911 enable = false; 2912 } else { 2913 enable = true; 2914 set_context_pending_enable(ce); 2915 set_context_enabled(ce); 2916 intel_context_get(ce); 2917 } 2918 2919 decr_context_blocked(ce); 2920 2921 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2922 2923 if (enable) { 2924 with_intel_runtime_pm(runtime_pm, wakeref) 2925 __guc_context_sched_enable(guc, ce); 2926 } 2927 } 2928 2929 static void guc_context_cancel_request(struct intel_context *ce, 2930 struct i915_request *rq) 2931 { 2932 struct intel_context *block_context = 2933 request_to_scheduling_context(rq); 2934 2935 if (i915_sw_fence_signaled(&rq->submit)) { 2936 struct i915_sw_fence *fence; 2937 2938 intel_context_get(ce); 2939 fence = guc_context_block(block_context); 2940 i915_sw_fence_wait(fence); 2941 if (!i915_request_completed(rq)) { 2942 __i915_request_skip(rq); 2943 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2944 true); 2945 } 2946 2947 guc_context_unblock(block_context); 2948 intel_context_put(ce); 2949 } 2950 } 2951 2952 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2953 u16 guc_id, 2954 u32 preemption_timeout) 2955 { 2956 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { 2957 struct context_policy policy; 2958 2959 __guc_context_policy_start_klv(&policy, guc_id); 2960 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2961 __guc_context_set_context_policies(guc, &policy, true); 2962 } else { 2963 u32 action[] = { 2964 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 2965 guc_id, 2966 preemption_timeout 2967 }; 2968 2969 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2970 } 2971 } 2972 2973 static void 2974 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 2975 unsigned int preempt_timeout_ms) 2976 { 2977 struct intel_guc *guc = ce_to_guc(ce); 2978 struct intel_runtime_pm *runtime_pm = 2979 &ce->engine->gt->i915->runtime_pm; 2980 intel_wakeref_t wakeref; 2981 unsigned long flags; 2982 2983 GEM_BUG_ON(intel_context_is_child(ce)); 2984 2985 guc_flush_submissions(guc); 2986 2987 spin_lock_irqsave(&ce->guc_state.lock, flags); 2988 set_context_banned(ce); 2989 2990 if (submission_disabled(guc) || 2991 (!context_enabled(ce) && !context_pending_disable(ce))) { 2992 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2993 2994 guc_cancel_context_requests(ce); 2995 intel_engine_signal_breadcrumbs(ce->engine); 2996 } else if (!context_pending_disable(ce)) { 2997 u16 guc_id; 2998 2999 /* 3000 * We add +2 here as the schedule disable complete CTB handler 3001 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3002 */ 3003 atomic_add(2, &ce->pin_count); 3004 3005 guc_id = prep_context_pending_disable(ce); 3006 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3007 3008 /* 3009 * In addition to disabling scheduling, set the preemption 3010 * timeout to the minimum value (1 us) so the banned context 3011 * gets kicked off the HW ASAP. 3012 */ 3013 with_intel_runtime_pm(runtime_pm, wakeref) { 3014 __guc_context_set_preemption_timeout(guc, guc_id, 3015 preempt_timeout_ms); 3016 __guc_context_sched_disable(guc, ce, guc_id); 3017 } 3018 } else { 3019 if (!context_guc_id_invalid(ce)) 3020 with_intel_runtime_pm(runtime_pm, wakeref) 3021 __guc_context_set_preemption_timeout(guc, 3022 ce->guc_id.id, 3023 preempt_timeout_ms); 3024 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3025 } 3026 } 3027 3028 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3029 unsigned long flags) 3030 __releases(ce->guc_state.lock) 3031 { 3032 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3033 intel_wakeref_t wakeref; 3034 u16 guc_id; 3035 3036 lockdep_assert_held(&ce->guc_state.lock); 3037 guc_id = prep_context_pending_disable(ce); 3038 3039 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3040 3041 with_intel_runtime_pm(runtime_pm, wakeref) 3042 __guc_context_sched_disable(guc, ce, guc_id); 3043 } 3044 3045 static bool bypass_sched_disable(struct intel_guc *guc, 3046 struct intel_context *ce) 3047 { 3048 lockdep_assert_held(&ce->guc_state.lock); 3049 GEM_BUG_ON(intel_context_is_child(ce)); 3050 3051 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3052 !ctx_id_mapped(guc, ce->guc_id.id)) { 3053 clr_context_enabled(ce); 3054 return true; 3055 } 3056 3057 return !context_enabled(ce); 3058 } 3059 3060 static void __delay_sched_disable(struct work_struct *wrk) 3061 { 3062 struct intel_context *ce = 3063 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3064 struct intel_guc *guc = ce_to_guc(ce); 3065 unsigned long flags; 3066 3067 spin_lock_irqsave(&ce->guc_state.lock, flags); 3068 3069 if (bypass_sched_disable(guc, ce)) { 3070 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3071 intel_context_sched_disable_unpin(ce); 3072 } else { 3073 do_sched_disable(guc, ce, flags); 3074 } 3075 } 3076 3077 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3078 { 3079 /* 3080 * parent contexts are perma-pinned, if we are unpinning do schedule 3081 * disable immediately. 3082 */ 3083 if (intel_context_is_parent(ce)) 3084 return true; 3085 3086 /* 3087 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3088 */ 3089 return guc->submission_state.guc_ids_in_use > 3090 guc->submission_state.sched_disable_gucid_threshold; 3091 } 3092 3093 static void guc_context_sched_disable(struct intel_context *ce) 3094 { 3095 struct intel_guc *guc = ce_to_guc(ce); 3096 u64 delay = guc->submission_state.sched_disable_delay_ms; 3097 unsigned long flags; 3098 3099 spin_lock_irqsave(&ce->guc_state.lock, flags); 3100 3101 if (bypass_sched_disable(guc, ce)) { 3102 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3103 intel_context_sched_disable_unpin(ce); 3104 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3105 delay) { 3106 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3107 mod_delayed_work(system_unbound_wq, 3108 &ce->guc_state.sched_disable_delay_work, 3109 msecs_to_jiffies(delay)); 3110 } else { 3111 do_sched_disable(guc, ce, flags); 3112 } 3113 } 3114 3115 static void guc_context_close(struct intel_context *ce) 3116 { 3117 unsigned long flags; 3118 3119 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3120 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3121 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3122 3123 spin_lock_irqsave(&ce->guc_state.lock, flags); 3124 set_context_close_done(ce); 3125 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3126 } 3127 3128 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3129 { 3130 struct intel_guc *guc = ce_to_guc(ce); 3131 struct intel_gt *gt = guc_to_gt(guc); 3132 unsigned long flags; 3133 bool disabled; 3134 3135 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3136 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3137 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3138 GEM_BUG_ON(context_enabled(ce)); 3139 3140 /* Seal race with Reset */ 3141 spin_lock_irqsave(&ce->guc_state.lock, flags); 3142 disabled = submission_disabled(guc); 3143 if (likely(!disabled)) { 3144 __intel_gt_pm_get(gt); 3145 set_context_destroyed(ce); 3146 clr_context_registered(ce); 3147 } 3148 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3149 if (unlikely(disabled)) { 3150 release_guc_id(guc, ce); 3151 __guc_context_destroy(ce); 3152 return; 3153 } 3154 3155 deregister_context(ce, ce->guc_id.id); 3156 } 3157 3158 static void __guc_context_destroy(struct intel_context *ce) 3159 { 3160 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3161 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3162 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3163 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3164 3165 lrc_fini(ce); 3166 intel_context_fini(ce); 3167 3168 if (intel_engine_is_virtual(ce->engine)) { 3169 struct guc_virtual_engine *ve = 3170 container_of(ce, typeof(*ve), context); 3171 3172 if (ve->base.breadcrumbs) 3173 intel_breadcrumbs_put(ve->base.breadcrumbs); 3174 3175 kfree(ve); 3176 } else { 3177 intel_context_free(ce); 3178 } 3179 } 3180 3181 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3182 { 3183 struct intel_context *ce; 3184 unsigned long flags; 3185 3186 GEM_BUG_ON(!submission_disabled(guc) && 3187 guc_submission_initialized(guc)); 3188 3189 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3190 spin_lock_irqsave(&guc->submission_state.lock, flags); 3191 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3192 struct intel_context, 3193 destroyed_link); 3194 if (ce) 3195 list_del_init(&ce->destroyed_link); 3196 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3197 3198 if (!ce) 3199 break; 3200 3201 release_guc_id(guc, ce); 3202 __guc_context_destroy(ce); 3203 } 3204 } 3205 3206 static void deregister_destroyed_contexts(struct intel_guc *guc) 3207 { 3208 struct intel_context *ce; 3209 unsigned long flags; 3210 3211 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3212 spin_lock_irqsave(&guc->submission_state.lock, flags); 3213 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3214 struct intel_context, 3215 destroyed_link); 3216 if (ce) 3217 list_del_init(&ce->destroyed_link); 3218 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3219 3220 if (!ce) 3221 break; 3222 3223 guc_lrc_desc_unpin(ce); 3224 } 3225 } 3226 3227 static void destroyed_worker_func(struct work_struct *w) 3228 { 3229 struct intel_guc *guc = container_of(w, struct intel_guc, 3230 submission_state.destroyed_worker); 3231 struct intel_gt *gt = guc_to_gt(guc); 3232 int tmp; 3233 3234 with_intel_gt_pm(gt, tmp) 3235 deregister_destroyed_contexts(guc); 3236 } 3237 3238 static void guc_context_destroy(struct kref *kref) 3239 { 3240 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3241 struct intel_guc *guc = ce_to_guc(ce); 3242 unsigned long flags; 3243 bool destroy; 3244 3245 /* 3246 * If the guc_id is invalid this context has been stolen and we can free 3247 * it immediately. Also can be freed immediately if the context is not 3248 * registered with the GuC or the GuC is in the middle of a reset. 3249 */ 3250 spin_lock_irqsave(&guc->submission_state.lock, flags); 3251 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3252 !ctx_id_mapped(guc, ce->guc_id.id); 3253 if (likely(!destroy)) { 3254 if (!list_empty(&ce->guc_id.link)) 3255 list_del_init(&ce->guc_id.link); 3256 list_add_tail(&ce->destroyed_link, 3257 &guc->submission_state.destroyed_contexts); 3258 } else { 3259 __release_guc_id(guc, ce); 3260 } 3261 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3262 if (unlikely(destroy)) { 3263 __guc_context_destroy(ce); 3264 return; 3265 } 3266 3267 /* 3268 * We use a worker to issue the H2G to deregister the context as we can 3269 * take the GT PM for the first time which isn't allowed from an atomic 3270 * context. 3271 */ 3272 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3273 } 3274 3275 static int guc_context_alloc(struct intel_context *ce) 3276 { 3277 return lrc_alloc(ce, ce->engine); 3278 } 3279 3280 static void __guc_context_set_prio(struct intel_guc *guc, 3281 struct intel_context *ce) 3282 { 3283 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { 3284 struct context_policy policy; 3285 3286 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3287 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3288 __guc_context_set_context_policies(guc, &policy, true); 3289 } else { 3290 u32 action[] = { 3291 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3292 ce->guc_id.id, 3293 ce->guc_state.prio, 3294 }; 3295 3296 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3297 } 3298 } 3299 3300 static void guc_context_set_prio(struct intel_guc *guc, 3301 struct intel_context *ce, 3302 u8 prio) 3303 { 3304 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3305 prio > GUC_CLIENT_PRIORITY_NORMAL); 3306 lockdep_assert_held(&ce->guc_state.lock); 3307 3308 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3309 !context_registered(ce)) { 3310 ce->guc_state.prio = prio; 3311 return; 3312 } 3313 3314 ce->guc_state.prio = prio; 3315 __guc_context_set_prio(guc, ce); 3316 3317 trace_intel_context_set_prio(ce); 3318 } 3319 3320 static inline u8 map_i915_prio_to_guc_prio(int prio) 3321 { 3322 if (prio == I915_PRIORITY_NORMAL) 3323 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3324 else if (prio < I915_PRIORITY_NORMAL) 3325 return GUC_CLIENT_PRIORITY_NORMAL; 3326 else if (prio < I915_PRIORITY_DISPLAY) 3327 return GUC_CLIENT_PRIORITY_HIGH; 3328 else 3329 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3330 } 3331 3332 static inline void add_context_inflight_prio(struct intel_context *ce, 3333 u8 guc_prio) 3334 { 3335 lockdep_assert_held(&ce->guc_state.lock); 3336 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3337 3338 ++ce->guc_state.prio_count[guc_prio]; 3339 3340 /* Overflow protection */ 3341 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3342 } 3343 3344 static inline void sub_context_inflight_prio(struct intel_context *ce, 3345 u8 guc_prio) 3346 { 3347 lockdep_assert_held(&ce->guc_state.lock); 3348 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3349 3350 /* Underflow protection */ 3351 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3352 3353 --ce->guc_state.prio_count[guc_prio]; 3354 } 3355 3356 static inline void update_context_prio(struct intel_context *ce) 3357 { 3358 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3359 int i; 3360 3361 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3362 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3363 3364 lockdep_assert_held(&ce->guc_state.lock); 3365 3366 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3367 if (ce->guc_state.prio_count[i]) { 3368 guc_context_set_prio(guc, ce, i); 3369 break; 3370 } 3371 } 3372 } 3373 3374 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3375 { 3376 /* Lower value is higher priority */ 3377 return new_guc_prio < old_guc_prio; 3378 } 3379 3380 static void add_to_context(struct i915_request *rq) 3381 { 3382 struct intel_context *ce = request_to_scheduling_context(rq); 3383 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3384 3385 GEM_BUG_ON(intel_context_is_child(ce)); 3386 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3387 3388 spin_lock(&ce->guc_state.lock); 3389 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3390 3391 if (rq->guc_prio == GUC_PRIO_INIT) { 3392 rq->guc_prio = new_guc_prio; 3393 add_context_inflight_prio(ce, rq->guc_prio); 3394 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3395 sub_context_inflight_prio(ce, rq->guc_prio); 3396 rq->guc_prio = new_guc_prio; 3397 add_context_inflight_prio(ce, rq->guc_prio); 3398 } 3399 update_context_prio(ce); 3400 3401 spin_unlock(&ce->guc_state.lock); 3402 } 3403 3404 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3405 { 3406 lockdep_assert_held(&ce->guc_state.lock); 3407 3408 if (rq->guc_prio != GUC_PRIO_INIT && 3409 rq->guc_prio != GUC_PRIO_FINI) { 3410 sub_context_inflight_prio(ce, rq->guc_prio); 3411 update_context_prio(ce); 3412 } 3413 rq->guc_prio = GUC_PRIO_FINI; 3414 } 3415 3416 static void remove_from_context(struct i915_request *rq) 3417 { 3418 struct intel_context *ce = request_to_scheduling_context(rq); 3419 3420 GEM_BUG_ON(intel_context_is_child(ce)); 3421 3422 spin_lock_irq(&ce->guc_state.lock); 3423 3424 list_del_init(&rq->sched.link); 3425 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3426 3427 /* Prevent further __await_execution() registering a cb, then flush */ 3428 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3429 3430 guc_prio_fini(rq, ce); 3431 3432 spin_unlock_irq(&ce->guc_state.lock); 3433 3434 atomic_dec(&ce->guc_id.ref); 3435 i915_request_notify_execute_cb_imm(rq); 3436 } 3437 3438 static const struct intel_context_ops guc_context_ops = { 3439 .alloc = guc_context_alloc, 3440 3441 .close = guc_context_close, 3442 3443 .pre_pin = guc_context_pre_pin, 3444 .pin = guc_context_pin, 3445 .unpin = guc_context_unpin, 3446 .post_unpin = guc_context_post_unpin, 3447 3448 .revoke = guc_context_revoke, 3449 3450 .cancel_request = guc_context_cancel_request, 3451 3452 .enter = intel_context_enter_engine, 3453 .exit = intel_context_exit_engine, 3454 3455 .sched_disable = guc_context_sched_disable, 3456 3457 .reset = lrc_reset, 3458 .destroy = guc_context_destroy, 3459 3460 .create_virtual = guc_create_virtual, 3461 .create_parallel = guc_create_parallel, 3462 }; 3463 3464 static void submit_work_cb(struct irq_work *wrk) 3465 { 3466 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3467 3468 might_lock(&rq->engine->sched_engine->lock); 3469 i915_sw_fence_complete(&rq->submit); 3470 } 3471 3472 static void __guc_signal_context_fence(struct intel_context *ce) 3473 { 3474 struct i915_request *rq, *rn; 3475 3476 lockdep_assert_held(&ce->guc_state.lock); 3477 3478 if (!list_empty(&ce->guc_state.fences)) 3479 trace_intel_context_fence_release(ce); 3480 3481 /* 3482 * Use an IRQ to ensure locking order of sched_engine->lock -> 3483 * ce->guc_state.lock is preserved. 3484 */ 3485 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3486 guc_fence_link) { 3487 list_del(&rq->guc_fence_link); 3488 irq_work_queue(&rq->submit_work); 3489 } 3490 3491 INIT_LIST_HEAD(&ce->guc_state.fences); 3492 } 3493 3494 static void guc_signal_context_fence(struct intel_context *ce) 3495 { 3496 unsigned long flags; 3497 3498 GEM_BUG_ON(intel_context_is_child(ce)); 3499 3500 spin_lock_irqsave(&ce->guc_state.lock, flags); 3501 clr_context_wait_for_deregister_to_register(ce); 3502 __guc_signal_context_fence(ce); 3503 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3504 } 3505 3506 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3507 { 3508 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3509 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3510 !submission_disabled(ce_to_guc(ce)); 3511 } 3512 3513 static void guc_context_init(struct intel_context *ce) 3514 { 3515 const struct i915_gem_context *ctx; 3516 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3517 3518 rcu_read_lock(); 3519 ctx = rcu_dereference(ce->gem_context); 3520 if (ctx) 3521 prio = ctx->sched.priority; 3522 rcu_read_unlock(); 3523 3524 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3525 3526 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3527 __delay_sched_disable); 3528 3529 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3530 } 3531 3532 static int guc_request_alloc(struct i915_request *rq) 3533 { 3534 struct intel_context *ce = request_to_scheduling_context(rq); 3535 struct intel_guc *guc = ce_to_guc(ce); 3536 unsigned long flags; 3537 int ret; 3538 3539 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3540 3541 /* 3542 * Flush enough space to reduce the likelihood of waiting after 3543 * we start building the request - in which case we will just 3544 * have to repeat work. 3545 */ 3546 rq->reserved_space += GUC_REQUEST_SIZE; 3547 3548 /* 3549 * Note that after this point, we have committed to using 3550 * this request as it is being used to both track the 3551 * state of engine initialisation and liveness of the 3552 * golden renderstate above. Think twice before you try 3553 * to cancel/unwind this request now. 3554 */ 3555 3556 /* Unconditionally invalidate GPU caches and TLBs. */ 3557 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3558 if (ret) 3559 return ret; 3560 3561 rq->reserved_space -= GUC_REQUEST_SIZE; 3562 3563 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3564 guc_context_init(ce); 3565 3566 /* 3567 * If the context gets closed while the execbuf is ongoing, the context 3568 * close code will race with the below code to cancel the delayed work. 3569 * If the context close wins the race and cancels the work, it will 3570 * immediately call the sched disable (see guc_context_close), so there 3571 * is a chance we can get past this check while the sched_disable code 3572 * is being executed. To make sure that code completes before we check 3573 * the status further down, we wait for the close process to complete. 3574 * Else, this code path could send a request down thinking that the 3575 * context is still in a schedule-enable mode while the GuC ends up 3576 * dropping the request completely because the disable did go from the 3577 * context_close path right to GuC just prior. In the event the CT is 3578 * full, we could potentially need to wait up to 1.5 seconds. 3579 */ 3580 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3581 intel_context_sched_disable_unpin(ce); 3582 else if (intel_context_is_closed(ce)) 3583 if (wait_for(context_close_done(ce), 1500)) 3584 drm_warn(&guc_to_gt(guc)->i915->drm, 3585 "timed out waiting on context sched close before realloc\n"); 3586 /* 3587 * Call pin_guc_id here rather than in the pinning step as with 3588 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3589 * guc_id and creating horrible race conditions. This is especially bad 3590 * when guc_id are being stolen due to over subscription. By the time 3591 * this function is reached, it is guaranteed that the guc_id will be 3592 * persistent until the generated request is retired. Thus, sealing these 3593 * race conditions. It is still safe to fail here if guc_id are 3594 * exhausted and return -EAGAIN to the user indicating that they can try 3595 * again in the future. 3596 * 3597 * There is no need for a lock here as the timeline mutex ensures at 3598 * most one context can be executing this code path at once. The 3599 * guc_id_ref is incremented once for every request in flight and 3600 * decremented on each retire. When it is zero, a lock around the 3601 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3602 */ 3603 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3604 goto out; 3605 3606 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3607 if (unlikely(ret < 0)) 3608 return ret; 3609 if (context_needs_register(ce, !!ret)) { 3610 ret = try_context_registration(ce, true); 3611 if (unlikely(ret)) { /* unwind */ 3612 if (ret == -EPIPE) { 3613 disable_submission(guc); 3614 goto out; /* GPU will be reset */ 3615 } 3616 atomic_dec(&ce->guc_id.ref); 3617 unpin_guc_id(guc, ce); 3618 return ret; 3619 } 3620 } 3621 3622 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3623 3624 out: 3625 /* 3626 * We block all requests on this context if a G2H is pending for a 3627 * schedule disable or context deregistration as the GuC will fail a 3628 * schedule enable or context registration if either G2H is pending 3629 * respectfully. Once a G2H returns, the fence is released that is 3630 * blocking these requests (see guc_signal_context_fence). 3631 */ 3632 spin_lock_irqsave(&ce->guc_state.lock, flags); 3633 if (context_wait_for_deregister_to_register(ce) || 3634 context_pending_disable(ce)) { 3635 init_irq_work(&rq->submit_work, submit_work_cb); 3636 i915_sw_fence_await(&rq->submit); 3637 3638 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3639 } 3640 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3641 3642 return 0; 3643 } 3644 3645 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3646 struct i915_gem_ww_ctx *ww, 3647 void **vaddr) 3648 { 3649 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3650 3651 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3652 } 3653 3654 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3655 { 3656 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3657 int ret = __guc_context_pin(ce, engine, vaddr); 3658 intel_engine_mask_t tmp, mask = ce->engine->mask; 3659 3660 if (likely(!ret)) 3661 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3662 intel_engine_pm_get(engine); 3663 3664 return ret; 3665 } 3666 3667 static void guc_virtual_context_unpin(struct intel_context *ce) 3668 { 3669 intel_engine_mask_t tmp, mask = ce->engine->mask; 3670 struct intel_engine_cs *engine; 3671 struct intel_guc *guc = ce_to_guc(ce); 3672 3673 GEM_BUG_ON(context_enabled(ce)); 3674 GEM_BUG_ON(intel_context_is_barrier(ce)); 3675 3676 unpin_guc_id(guc, ce); 3677 lrc_unpin(ce); 3678 3679 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3680 intel_engine_pm_put_async(engine); 3681 } 3682 3683 static void guc_virtual_context_enter(struct intel_context *ce) 3684 { 3685 intel_engine_mask_t tmp, mask = ce->engine->mask; 3686 struct intel_engine_cs *engine; 3687 3688 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3689 intel_engine_pm_get(engine); 3690 3691 intel_timeline_enter(ce->timeline); 3692 } 3693 3694 static void guc_virtual_context_exit(struct intel_context *ce) 3695 { 3696 intel_engine_mask_t tmp, mask = ce->engine->mask; 3697 struct intel_engine_cs *engine; 3698 3699 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3700 intel_engine_pm_put(engine); 3701 3702 intel_timeline_exit(ce->timeline); 3703 } 3704 3705 static int guc_virtual_context_alloc(struct intel_context *ce) 3706 { 3707 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3708 3709 return lrc_alloc(ce, engine); 3710 } 3711 3712 static const struct intel_context_ops virtual_guc_context_ops = { 3713 .alloc = guc_virtual_context_alloc, 3714 3715 .close = guc_context_close, 3716 3717 .pre_pin = guc_virtual_context_pre_pin, 3718 .pin = guc_virtual_context_pin, 3719 .unpin = guc_virtual_context_unpin, 3720 .post_unpin = guc_context_post_unpin, 3721 3722 .revoke = guc_context_revoke, 3723 3724 .cancel_request = guc_context_cancel_request, 3725 3726 .enter = guc_virtual_context_enter, 3727 .exit = guc_virtual_context_exit, 3728 3729 .sched_disable = guc_context_sched_disable, 3730 3731 .destroy = guc_context_destroy, 3732 3733 .get_sibling = guc_virtual_get_sibling, 3734 }; 3735 3736 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3737 { 3738 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3739 struct intel_guc *guc = ce_to_guc(ce); 3740 int ret; 3741 3742 GEM_BUG_ON(!intel_context_is_parent(ce)); 3743 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3744 3745 ret = pin_guc_id(guc, ce); 3746 if (unlikely(ret < 0)) 3747 return ret; 3748 3749 return __guc_context_pin(ce, engine, vaddr); 3750 } 3751 3752 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3753 { 3754 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3755 3756 GEM_BUG_ON(!intel_context_is_child(ce)); 3757 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3758 3759 __intel_context_pin(ce->parallel.parent); 3760 return __guc_context_pin(ce, engine, vaddr); 3761 } 3762 3763 static void guc_parent_context_unpin(struct intel_context *ce) 3764 { 3765 struct intel_guc *guc = ce_to_guc(ce); 3766 3767 GEM_BUG_ON(context_enabled(ce)); 3768 GEM_BUG_ON(intel_context_is_barrier(ce)); 3769 GEM_BUG_ON(!intel_context_is_parent(ce)); 3770 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3771 3772 unpin_guc_id(guc, ce); 3773 lrc_unpin(ce); 3774 } 3775 3776 static void guc_child_context_unpin(struct intel_context *ce) 3777 { 3778 GEM_BUG_ON(context_enabled(ce)); 3779 GEM_BUG_ON(intel_context_is_barrier(ce)); 3780 GEM_BUG_ON(!intel_context_is_child(ce)); 3781 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3782 3783 lrc_unpin(ce); 3784 } 3785 3786 static void guc_child_context_post_unpin(struct intel_context *ce) 3787 { 3788 GEM_BUG_ON(!intel_context_is_child(ce)); 3789 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3790 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3791 3792 lrc_post_unpin(ce); 3793 intel_context_unpin(ce->parallel.parent); 3794 } 3795 3796 static void guc_child_context_destroy(struct kref *kref) 3797 { 3798 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3799 3800 __guc_context_destroy(ce); 3801 } 3802 3803 static const struct intel_context_ops virtual_parent_context_ops = { 3804 .alloc = guc_virtual_context_alloc, 3805 3806 .close = guc_context_close, 3807 3808 .pre_pin = guc_context_pre_pin, 3809 .pin = guc_parent_context_pin, 3810 .unpin = guc_parent_context_unpin, 3811 .post_unpin = guc_context_post_unpin, 3812 3813 .revoke = guc_context_revoke, 3814 3815 .cancel_request = guc_context_cancel_request, 3816 3817 .enter = guc_virtual_context_enter, 3818 .exit = guc_virtual_context_exit, 3819 3820 .sched_disable = guc_context_sched_disable, 3821 3822 .destroy = guc_context_destroy, 3823 3824 .get_sibling = guc_virtual_get_sibling, 3825 }; 3826 3827 static const struct intel_context_ops virtual_child_context_ops = { 3828 .alloc = guc_virtual_context_alloc, 3829 3830 .pre_pin = guc_context_pre_pin, 3831 .pin = guc_child_context_pin, 3832 .unpin = guc_child_context_unpin, 3833 .post_unpin = guc_child_context_post_unpin, 3834 3835 .cancel_request = guc_context_cancel_request, 3836 3837 .enter = guc_virtual_context_enter, 3838 .exit = guc_virtual_context_exit, 3839 3840 .destroy = guc_child_context_destroy, 3841 3842 .get_sibling = guc_virtual_get_sibling, 3843 }; 3844 3845 /* 3846 * The below override of the breadcrumbs is enabled when the user configures a 3847 * context for parallel submission (multi-lrc, parent-child). 3848 * 3849 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3850 * safely preempt all the hw contexts configured for parallel submission 3851 * between each BB. The contract between the i915 and GuC is if the parent 3852 * context can be preempted, all the children can be preempted, and the GuC will 3853 * always try to preempt the parent before the children. A handshake between the 3854 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3855 * creating a window to preempt between each set of BBs. 3856 */ 3857 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3858 u64 offset, u32 len, 3859 const unsigned int flags); 3860 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3861 u64 offset, u32 len, 3862 const unsigned int flags); 3863 static u32 * 3864 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3865 u32 *cs); 3866 static u32 * 3867 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3868 u32 *cs); 3869 3870 static struct intel_context * 3871 guc_create_parallel(struct intel_engine_cs **engines, 3872 unsigned int num_siblings, 3873 unsigned int width) 3874 { 3875 struct intel_engine_cs **siblings = NULL; 3876 struct intel_context *parent = NULL, *ce, *err; 3877 int i, j; 3878 3879 siblings = kmalloc_array(num_siblings, 3880 sizeof(*siblings), 3881 GFP_KERNEL); 3882 if (!siblings) 3883 return ERR_PTR(-ENOMEM); 3884 3885 for (i = 0; i < width; ++i) { 3886 for (j = 0; j < num_siblings; ++j) 3887 siblings[j] = engines[i * num_siblings + j]; 3888 3889 ce = intel_engine_create_virtual(siblings, num_siblings, 3890 FORCE_VIRTUAL); 3891 if (IS_ERR(ce)) { 3892 err = ERR_CAST(ce); 3893 goto unwind; 3894 } 3895 3896 if (i == 0) { 3897 parent = ce; 3898 parent->ops = &virtual_parent_context_ops; 3899 } else { 3900 ce->ops = &virtual_child_context_ops; 3901 intel_context_bind_parent_child(parent, ce); 3902 } 3903 } 3904 3905 parent->parallel.fence_context = dma_fence_context_alloc(1); 3906 3907 parent->engine->emit_bb_start = 3908 emit_bb_start_parent_no_preempt_mid_batch; 3909 parent->engine->emit_fini_breadcrumb = 3910 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3911 parent->engine->emit_fini_breadcrumb_dw = 3912 12 + 4 * parent->parallel.number_children; 3913 for_each_child(parent, ce) { 3914 ce->engine->emit_bb_start = 3915 emit_bb_start_child_no_preempt_mid_batch; 3916 ce->engine->emit_fini_breadcrumb = 3917 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3918 ce->engine->emit_fini_breadcrumb_dw = 16; 3919 } 3920 3921 kfree(siblings); 3922 return parent; 3923 3924 unwind: 3925 if (parent) 3926 intel_context_put(parent); 3927 kfree(siblings); 3928 return err; 3929 } 3930 3931 static bool 3932 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3933 { 3934 struct intel_engine_cs *sibling; 3935 intel_engine_mask_t tmp, mask = b->engine_mask; 3936 bool result = false; 3937 3938 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3939 result |= intel_engine_irq_enable(sibling); 3940 3941 return result; 3942 } 3943 3944 static void 3945 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3946 { 3947 struct intel_engine_cs *sibling; 3948 intel_engine_mask_t tmp, mask = b->engine_mask; 3949 3950 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3951 intel_engine_irq_disable(sibling); 3952 } 3953 3954 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3955 { 3956 int i; 3957 3958 /* 3959 * In GuC submission mode we do not know which physical engine a request 3960 * will be scheduled on, this creates a problem because the breadcrumb 3961 * interrupt is per physical engine. To work around this we attach 3962 * requests and direct all breadcrumb interrupts to the first instance 3963 * of an engine per class. In addition all breadcrumb interrupts are 3964 * enabled / disabled across an engine class in unison. 3965 */ 3966 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3967 struct intel_engine_cs *sibling = 3968 engine->gt->engine_class[engine->class][i]; 3969 3970 if (sibling) { 3971 if (engine->breadcrumbs != sibling->breadcrumbs) { 3972 intel_breadcrumbs_put(engine->breadcrumbs); 3973 engine->breadcrumbs = 3974 intel_breadcrumbs_get(sibling->breadcrumbs); 3975 } 3976 break; 3977 } 3978 } 3979 3980 if (engine->breadcrumbs) { 3981 engine->breadcrumbs->engine_mask |= engine->mask; 3982 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3983 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3984 } 3985 } 3986 3987 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3988 int prio) 3989 { 3990 struct intel_context *ce = request_to_scheduling_context(rq); 3991 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3992 3993 /* Short circuit function */ 3994 if (prio < I915_PRIORITY_NORMAL || 3995 rq->guc_prio == GUC_PRIO_FINI || 3996 (rq->guc_prio != GUC_PRIO_INIT && 3997 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3998 return; 3999 4000 spin_lock(&ce->guc_state.lock); 4001 if (rq->guc_prio != GUC_PRIO_FINI) { 4002 if (rq->guc_prio != GUC_PRIO_INIT) 4003 sub_context_inflight_prio(ce, rq->guc_prio); 4004 rq->guc_prio = new_guc_prio; 4005 add_context_inflight_prio(ce, rq->guc_prio); 4006 update_context_prio(ce); 4007 } 4008 spin_unlock(&ce->guc_state.lock); 4009 } 4010 4011 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4012 { 4013 struct intel_context *ce = request_to_scheduling_context(rq); 4014 4015 spin_lock(&ce->guc_state.lock); 4016 guc_prio_fini(rq, ce); 4017 spin_unlock(&ce->guc_state.lock); 4018 } 4019 4020 static void sanitize_hwsp(struct intel_engine_cs *engine) 4021 { 4022 struct intel_timeline *tl; 4023 4024 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4025 intel_timeline_reset_seqno(tl); 4026 } 4027 4028 static void guc_sanitize(struct intel_engine_cs *engine) 4029 { 4030 /* 4031 * Poison residual state on resume, in case the suspend didn't! 4032 * 4033 * We have to assume that across suspend/resume (or other loss 4034 * of control) that the contents of our pinned buffers has been 4035 * lost, replaced by garbage. Since this doesn't always happen, 4036 * let's poison such state so that we more quickly spot when 4037 * we falsely assume it has been preserved. 4038 */ 4039 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4040 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4041 4042 /* 4043 * The kernel_context HWSP is stored in the status_page. As above, 4044 * that may be lost on resume/initialisation, and so we need to 4045 * reset the value in the HWSP. 4046 */ 4047 sanitize_hwsp(engine); 4048 4049 /* And scrub the dirty cachelines for the HWSP */ 4050 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4051 4052 intel_engine_reset_pinned_contexts(engine); 4053 } 4054 4055 static void setup_hwsp(struct intel_engine_cs *engine) 4056 { 4057 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4058 4059 ENGINE_WRITE_FW(engine, 4060 RING_HWS_PGA, 4061 i915_ggtt_offset(engine->status_page.vma)); 4062 } 4063 4064 static void start_engine(struct intel_engine_cs *engine) 4065 { 4066 ENGINE_WRITE_FW(engine, 4067 RING_MODE_GEN7, 4068 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4069 4070 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4071 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4072 } 4073 4074 static int guc_resume(struct intel_engine_cs *engine) 4075 { 4076 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4077 4078 intel_mocs_init_engine(engine); 4079 4080 intel_breadcrumbs_reset(engine->breadcrumbs); 4081 4082 setup_hwsp(engine); 4083 start_engine(engine); 4084 4085 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4086 xehp_enable_ccs_engines(engine); 4087 4088 return 0; 4089 } 4090 4091 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4092 { 4093 return !sched_engine->tasklet.callback; 4094 } 4095 4096 static void guc_set_default_submission(struct intel_engine_cs *engine) 4097 { 4098 engine->submit_request = guc_submit_request; 4099 } 4100 4101 static inline void guc_kernel_context_pin(struct intel_guc *guc, 4102 struct intel_context *ce) 4103 { 4104 /* 4105 * Note: we purposefully do not check the returns below because 4106 * the registration can only fail if a reset is just starting. 4107 * This is called at the end of reset so presumably another reset 4108 * isn't happening and even it did this code would be run again. 4109 */ 4110 4111 if (context_guc_id_invalid(ce)) 4112 pin_guc_id(guc, ce); 4113 4114 try_context_registration(ce, true); 4115 } 4116 4117 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 4118 { 4119 struct intel_gt *gt = guc_to_gt(guc); 4120 struct intel_engine_cs *engine; 4121 enum intel_engine_id id; 4122 4123 /* make sure all descriptors are clean... */ 4124 xa_destroy(&guc->context_lookup); 4125 4126 /* 4127 * A reset might have occurred while we had a pending stalled request, 4128 * so make sure we clean that up. 4129 */ 4130 guc->stalled_request = NULL; 4131 guc->submission_stall_reason = STALL_NONE; 4132 4133 /* 4134 * Some contexts might have been pinned before we enabled GuC 4135 * submission, so we need to add them to the GuC bookeeping. 4136 * Also, after a reset the of the GuC we want to make sure that the 4137 * information shared with GuC is properly reset. The kernel LRCs are 4138 * not attached to the gem_context, so they need to be added separately. 4139 */ 4140 for_each_engine(engine, gt, id) { 4141 struct intel_context *ce; 4142 4143 list_for_each_entry(ce, &engine->pinned_contexts_list, 4144 pinned_contexts_link) 4145 guc_kernel_context_pin(guc, ce); 4146 } 4147 } 4148 4149 static void guc_release(struct intel_engine_cs *engine) 4150 { 4151 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4152 4153 intel_engine_cleanup_common(engine); 4154 lrc_fini_wa_ctx(engine); 4155 } 4156 4157 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4158 { 4159 struct intel_engine_cs *e; 4160 intel_engine_mask_t tmp, mask = engine->mask; 4161 4162 for_each_engine_masked(e, engine->gt, mask, tmp) 4163 e->serial++; 4164 } 4165 4166 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4167 { 4168 /* Default vfuncs which can be overridden by each engine. */ 4169 4170 engine->resume = guc_resume; 4171 4172 engine->cops = &guc_context_ops; 4173 engine->request_alloc = guc_request_alloc; 4174 engine->add_active_request = add_to_context; 4175 engine->remove_active_request = remove_from_context; 4176 4177 engine->sched_engine->schedule = i915_schedule; 4178 4179 engine->reset.prepare = guc_engine_reset_prepare; 4180 engine->reset.rewind = guc_rewind_nop; 4181 engine->reset.cancel = guc_reset_nop; 4182 engine->reset.finish = guc_reset_nop; 4183 4184 engine->emit_flush = gen8_emit_flush_xcs; 4185 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4186 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4187 if (GRAPHICS_VER(engine->i915) >= 12) { 4188 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4189 engine->emit_flush = gen12_emit_flush_xcs; 4190 } 4191 engine->set_default_submission = guc_set_default_submission; 4192 engine->busyness = guc_engine_busyness; 4193 4194 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4195 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4196 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4197 4198 /* Wa_14014475959:dg2 */ 4199 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) 4200 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4201 4202 /* 4203 * TODO: GuC supports timeslicing and semaphores as well, but they're 4204 * handled by the firmware so some minor tweaks are required before 4205 * enabling. 4206 * 4207 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4208 */ 4209 4210 engine->emit_bb_start = gen8_emit_bb_start; 4211 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4212 engine->emit_bb_start = xehp_emit_bb_start; 4213 } 4214 4215 static void rcs_submission_override(struct intel_engine_cs *engine) 4216 { 4217 switch (GRAPHICS_VER(engine->i915)) { 4218 case 12: 4219 engine->emit_flush = gen12_emit_flush_rcs; 4220 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4221 break; 4222 case 11: 4223 engine->emit_flush = gen11_emit_flush_rcs; 4224 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4225 break; 4226 default: 4227 engine->emit_flush = gen8_emit_flush_rcs; 4228 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4229 break; 4230 } 4231 } 4232 4233 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4234 { 4235 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4236 intel_engine_set_irq_handler(engine, cs_irq_handler); 4237 } 4238 4239 static void guc_sched_engine_destroy(struct kref *kref) 4240 { 4241 struct i915_sched_engine *sched_engine = 4242 container_of(kref, typeof(*sched_engine), ref); 4243 struct intel_guc *guc = sched_engine->private_data; 4244 4245 guc->sched_engine = NULL; 4246 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4247 kfree(sched_engine); 4248 } 4249 4250 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4251 { 4252 struct drm_i915_private *i915 = engine->i915; 4253 struct intel_guc *guc = &engine->gt->uc.guc; 4254 4255 /* 4256 * The setup relies on several assumptions (e.g. irqs always enabled) 4257 * that are only valid on gen11+ 4258 */ 4259 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4260 4261 if (!guc->sched_engine) { 4262 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4263 if (!guc->sched_engine) 4264 return -ENOMEM; 4265 4266 guc->sched_engine->schedule = i915_schedule; 4267 guc->sched_engine->disabled = guc_sched_engine_disabled; 4268 guc->sched_engine->private_data = guc; 4269 guc->sched_engine->destroy = guc_sched_engine_destroy; 4270 guc->sched_engine->bump_inflight_request_prio = 4271 guc_bump_inflight_request_prio; 4272 guc->sched_engine->retire_inflight_request_prio = 4273 guc_retire_inflight_request_prio; 4274 tasklet_setup(&guc->sched_engine->tasklet, 4275 guc_submission_tasklet); 4276 } 4277 i915_sched_engine_put(engine->sched_engine); 4278 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4279 4280 guc_default_vfuncs(engine); 4281 guc_default_irqs(engine); 4282 guc_init_breadcrumbs(engine); 4283 4284 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4285 rcs_submission_override(engine); 4286 4287 lrc_init_wa_ctx(engine); 4288 4289 /* Finally, take ownership and responsibility for cleanup! */ 4290 engine->sanitize = guc_sanitize; 4291 engine->release = guc_release; 4292 4293 return 0; 4294 } 4295 4296 struct scheduling_policy { 4297 /* internal data */ 4298 u32 max_words, num_words; 4299 u32 count; 4300 /* API data */ 4301 struct guc_update_scheduling_policy h2g; 4302 }; 4303 4304 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4305 { 4306 u32 *start = (void *)&policy->h2g; 4307 u32 *end = policy->h2g.data + policy->num_words; 4308 size_t delta = end - start; 4309 4310 return delta; 4311 } 4312 4313 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4314 { 4315 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4316 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4317 policy->num_words = 0; 4318 policy->count = 0; 4319 4320 return policy; 4321 } 4322 4323 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4324 u32 action, u32 *data, u32 len) 4325 { 4326 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4327 4328 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4329 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4330 FIELD_PREP(GUC_KLV_0_LEN, len); 4331 memcpy(klv_ptr, data, sizeof(u32) * len); 4332 policy->num_words += 1 + len; 4333 policy->count++; 4334 } 4335 4336 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4337 struct scheduling_policy *policy) 4338 { 4339 int ret; 4340 4341 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4342 __guc_scheduling_policy_action_size(policy)); 4343 if (ret < 0) 4344 return ret; 4345 4346 if (ret != policy->count) { 4347 drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!", 4348 ret, policy->count); 4349 if (ret > policy->count) 4350 return -EPROTO; 4351 } 4352 4353 return 0; 4354 } 4355 4356 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4357 { 4358 struct scheduling_policy policy; 4359 struct intel_gt *gt = guc_to_gt(guc); 4360 intel_wakeref_t wakeref; 4361 int ret = 0; 4362 4363 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0)) 4364 return 0; 4365 4366 __guc_scheduling_policy_start_klv(&policy); 4367 4368 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4369 u32 yield[] = { 4370 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4371 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4372 }; 4373 4374 __guc_scheduling_policy_add_klv(&policy, 4375 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4376 yield, ARRAY_SIZE(yield)); 4377 4378 ret = __guc_action_set_scheduling_policies(guc, &policy); 4379 if (ret) 4380 i915_probe_error(gt->i915, 4381 "Failed to configure global scheduling policies: %pe!\n", 4382 ERR_PTR(ret)); 4383 } 4384 4385 return ret; 4386 } 4387 4388 void intel_guc_submission_enable(struct intel_guc *guc) 4389 { 4390 struct intel_gt *gt = guc_to_gt(guc); 4391 4392 /* Enable and route to GuC */ 4393 if (GRAPHICS_VER(gt->i915) >= 12) 4394 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 4395 GUC_SEM_INTR_ROUTE_TO_GUC | 4396 GUC_SEM_INTR_ENABLE_ALL); 4397 4398 guc_init_lrc_mapping(guc); 4399 guc_init_engine_stats(guc); 4400 guc_init_global_schedule_policy(guc); 4401 } 4402 4403 void intel_guc_submission_disable(struct intel_guc *guc) 4404 { 4405 struct intel_gt *gt = guc_to_gt(guc); 4406 4407 /* Note: By the time we're here, GuC may have already been reset */ 4408 4409 /* Disable and route to host */ 4410 if (GRAPHICS_VER(gt->i915) >= 12) 4411 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); 4412 } 4413 4414 static bool __guc_submission_supported(struct intel_guc *guc) 4415 { 4416 /* GuC submission is unavailable for pre-Gen11 */ 4417 return intel_guc_is_supported(guc) && 4418 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4419 } 4420 4421 static bool __guc_submission_selected(struct intel_guc *guc) 4422 { 4423 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4424 4425 if (!intel_guc_submission_is_supported(guc)) 4426 return false; 4427 4428 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4429 } 4430 4431 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4432 { 4433 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4434 } 4435 4436 /* 4437 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4438 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4439 * operation. This matches the 30fps game-render + encode (real world) workload this 4440 * knob was tested against. 4441 */ 4442 #define SCHED_DISABLE_DELAY_MS 34 4443 4444 /* 4445 * A threshold of 75% is a reasonable starting point considering that real world apps 4446 * generally don't get anywhere near this. 4447 */ 4448 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4449 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4450 4451 void intel_guc_submission_init_early(struct intel_guc *guc) 4452 { 4453 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4454 4455 spin_lock_init(&guc->submission_state.lock); 4456 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4457 ida_init(&guc->submission_state.guc_ids); 4458 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4459 INIT_WORK(&guc->submission_state.destroyed_worker, 4460 destroyed_worker_func); 4461 INIT_WORK(&guc->submission_state.reset_fail_worker, 4462 reset_fail_worker_func); 4463 4464 spin_lock_init(&guc->timestamp.lock); 4465 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4466 4467 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4468 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4469 guc->submission_state.sched_disable_gucid_threshold = 4470 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4471 guc->submission_supported = __guc_submission_supported(guc); 4472 guc->submission_selected = __guc_submission_selected(guc); 4473 } 4474 4475 static inline struct intel_context * 4476 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4477 { 4478 struct intel_context *ce; 4479 4480 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4481 drm_err(&guc_to_gt(guc)->i915->drm, 4482 "Invalid ctx_id %u\n", ctx_id); 4483 return NULL; 4484 } 4485 4486 ce = __get_context(guc, ctx_id); 4487 if (unlikely(!ce)) { 4488 drm_err(&guc_to_gt(guc)->i915->drm, 4489 "Context is NULL, ctx_id %u\n", ctx_id); 4490 return NULL; 4491 } 4492 4493 if (unlikely(intel_context_is_child(ce))) { 4494 drm_err(&guc_to_gt(guc)->i915->drm, 4495 "Context is child, ctx_id %u\n", ctx_id); 4496 return NULL; 4497 } 4498 4499 return ce; 4500 } 4501 4502 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4503 const u32 *msg, 4504 u32 len) 4505 { 4506 struct intel_context *ce; 4507 u32 ctx_id; 4508 4509 if (unlikely(len < 1)) { 4510 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4511 return -EPROTO; 4512 } 4513 ctx_id = msg[0]; 4514 4515 ce = g2h_context_lookup(guc, ctx_id); 4516 if (unlikely(!ce)) 4517 return -EPROTO; 4518 4519 trace_intel_context_deregister_done(ce); 4520 4521 #ifdef CONFIG_DRM_I915_SELFTEST 4522 if (unlikely(ce->drop_deregister)) { 4523 ce->drop_deregister = false; 4524 return 0; 4525 } 4526 #endif 4527 4528 if (context_wait_for_deregister_to_register(ce)) { 4529 struct intel_runtime_pm *runtime_pm = 4530 &ce->engine->gt->i915->runtime_pm; 4531 intel_wakeref_t wakeref; 4532 4533 /* 4534 * Previous owner of this guc_id has been deregistered, now safe 4535 * register this context. 4536 */ 4537 with_intel_runtime_pm(runtime_pm, wakeref) 4538 register_context(ce, true); 4539 guc_signal_context_fence(ce); 4540 intel_context_put(ce); 4541 } else if (context_destroyed(ce)) { 4542 /* Context has been destroyed */ 4543 intel_gt_pm_put_async(guc_to_gt(guc)); 4544 release_guc_id(guc, ce); 4545 __guc_context_destroy(ce); 4546 } 4547 4548 decr_outstanding_submission_g2h(guc); 4549 4550 return 0; 4551 } 4552 4553 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4554 const u32 *msg, 4555 u32 len) 4556 { 4557 struct intel_context *ce; 4558 unsigned long flags; 4559 u32 ctx_id; 4560 4561 if (unlikely(len < 2)) { 4562 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4563 return -EPROTO; 4564 } 4565 ctx_id = msg[0]; 4566 4567 ce = g2h_context_lookup(guc, ctx_id); 4568 if (unlikely(!ce)) 4569 return -EPROTO; 4570 4571 if (unlikely(context_destroyed(ce) || 4572 (!context_pending_enable(ce) && 4573 !context_pending_disable(ce)))) { 4574 drm_err(&guc_to_gt(guc)->i915->drm, 4575 "Bad context sched_state 0x%x, ctx_id %u\n", 4576 ce->guc_state.sched_state, ctx_id); 4577 return -EPROTO; 4578 } 4579 4580 trace_intel_context_sched_done(ce); 4581 4582 if (context_pending_enable(ce)) { 4583 #ifdef CONFIG_DRM_I915_SELFTEST 4584 if (unlikely(ce->drop_schedule_enable)) { 4585 ce->drop_schedule_enable = false; 4586 return 0; 4587 } 4588 #endif 4589 4590 spin_lock_irqsave(&ce->guc_state.lock, flags); 4591 clr_context_pending_enable(ce); 4592 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4593 } else if (context_pending_disable(ce)) { 4594 bool banned; 4595 4596 #ifdef CONFIG_DRM_I915_SELFTEST 4597 if (unlikely(ce->drop_schedule_disable)) { 4598 ce->drop_schedule_disable = false; 4599 return 0; 4600 } 4601 #endif 4602 4603 /* 4604 * Unpin must be done before __guc_signal_context_fence, 4605 * otherwise a race exists between the requests getting 4606 * submitted + retired before this unpin completes resulting in 4607 * the pin_count going to zero and the context still being 4608 * enabled. 4609 */ 4610 intel_context_sched_disable_unpin(ce); 4611 4612 spin_lock_irqsave(&ce->guc_state.lock, flags); 4613 banned = context_banned(ce); 4614 clr_context_banned(ce); 4615 clr_context_pending_disable(ce); 4616 __guc_signal_context_fence(ce); 4617 guc_blocked_fence_complete(ce); 4618 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4619 4620 if (banned) { 4621 guc_cancel_context_requests(ce); 4622 intel_engine_signal_breadcrumbs(ce->engine); 4623 } 4624 } 4625 4626 decr_outstanding_submission_g2h(guc); 4627 intel_context_put(ce); 4628 4629 return 0; 4630 } 4631 4632 static void capture_error_state(struct intel_guc *guc, 4633 struct intel_context *ce) 4634 { 4635 struct intel_gt *gt = guc_to_gt(guc); 4636 struct drm_i915_private *i915 = gt->i915; 4637 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4638 intel_wakeref_t wakeref; 4639 4640 intel_engine_set_hung_context(engine, ce); 4641 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4642 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4643 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4644 } 4645 4646 static void guc_context_replay(struct intel_context *ce) 4647 { 4648 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4649 4650 __guc_reset_context(ce, ce->engine->mask); 4651 tasklet_hi_schedule(&sched_engine->tasklet); 4652 } 4653 4654 static void guc_handle_context_reset(struct intel_guc *guc, 4655 struct intel_context *ce) 4656 { 4657 trace_intel_context_reset(ce); 4658 4659 if (likely(intel_context_is_schedulable(ce))) { 4660 capture_error_state(guc, ce); 4661 guc_context_replay(ce); 4662 } else { 4663 drm_info(&guc_to_gt(guc)->i915->drm, 4664 "Ignoring context reset notification of exiting context 0x%04X on %s", 4665 ce->guc_id.id, ce->engine->name); 4666 } 4667 } 4668 4669 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4670 const u32 *msg, u32 len) 4671 { 4672 struct intel_context *ce; 4673 unsigned long flags; 4674 int ctx_id; 4675 4676 if (unlikely(len != 1)) { 4677 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4678 return -EPROTO; 4679 } 4680 4681 ctx_id = msg[0]; 4682 4683 /* 4684 * The context lookup uses the xarray but lookups only require an RCU lock 4685 * not the full spinlock. So take the lock explicitly and keep it until the 4686 * context has been reference count locked to ensure it can't be destroyed 4687 * asynchronously until the reset is done. 4688 */ 4689 xa_lock_irqsave(&guc->context_lookup, flags); 4690 ce = g2h_context_lookup(guc, ctx_id); 4691 if (ce) 4692 intel_context_get(ce); 4693 xa_unlock_irqrestore(&guc->context_lookup, flags); 4694 4695 if (unlikely(!ce)) 4696 return -EPROTO; 4697 4698 guc_handle_context_reset(guc, ce); 4699 intel_context_put(ce); 4700 4701 return 0; 4702 } 4703 4704 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4705 const u32 *msg, u32 len) 4706 { 4707 u32 status; 4708 4709 if (unlikely(len != 1)) { 4710 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4711 return -EPROTO; 4712 } 4713 4714 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4715 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4716 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4717 4718 intel_guc_capture_process(guc); 4719 4720 return 0; 4721 } 4722 4723 struct intel_engine_cs * 4724 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4725 { 4726 struct intel_gt *gt = guc_to_gt(guc); 4727 u8 engine_class = guc_class_to_engine_class(guc_class); 4728 4729 /* Class index is checked in class converter */ 4730 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4731 4732 return gt->engine_class[engine_class][instance]; 4733 } 4734 4735 static void reset_fail_worker_func(struct work_struct *w) 4736 { 4737 struct intel_guc *guc = container_of(w, struct intel_guc, 4738 submission_state.reset_fail_worker); 4739 struct intel_gt *gt = guc_to_gt(guc); 4740 intel_engine_mask_t reset_fail_mask; 4741 unsigned long flags; 4742 4743 spin_lock_irqsave(&guc->submission_state.lock, flags); 4744 reset_fail_mask = guc->submission_state.reset_fail_mask; 4745 guc->submission_state.reset_fail_mask = 0; 4746 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4747 4748 if (likely(reset_fail_mask)) 4749 intel_gt_handle_error(gt, reset_fail_mask, 4750 I915_ERROR_CAPTURE, 4751 "GuC failed to reset engine mask=0x%x\n", 4752 reset_fail_mask); 4753 } 4754 4755 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4756 const u32 *msg, u32 len) 4757 { 4758 struct intel_engine_cs *engine; 4759 struct intel_gt *gt = guc_to_gt(guc); 4760 u8 guc_class, instance; 4761 u32 reason; 4762 unsigned long flags; 4763 4764 if (unlikely(len != 3)) { 4765 drm_err(>->i915->drm, "Invalid length %u", len); 4766 return -EPROTO; 4767 } 4768 4769 guc_class = msg[0]; 4770 instance = msg[1]; 4771 reason = msg[2]; 4772 4773 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4774 if (unlikely(!engine)) { 4775 drm_err(>->i915->drm, 4776 "Invalid engine %d:%d", guc_class, instance); 4777 return -EPROTO; 4778 } 4779 4780 /* 4781 * This is an unexpected failure of a hardware feature. So, log a real 4782 * error message not just the informational that comes with the reset. 4783 */ 4784 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4785 guc_class, instance, engine->name, reason); 4786 4787 spin_lock_irqsave(&guc->submission_state.lock, flags); 4788 guc->submission_state.reset_fail_mask |= engine->mask; 4789 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4790 4791 /* 4792 * A GT reset flushes this worker queue (G2H handler) so we must use 4793 * another worker to trigger a GT reset. 4794 */ 4795 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4796 4797 return 0; 4798 } 4799 4800 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4801 { 4802 struct intel_guc *guc = &engine->gt->uc.guc; 4803 struct intel_context *ce; 4804 struct i915_request *rq; 4805 unsigned long index; 4806 unsigned long flags; 4807 4808 /* Reset called during driver load? GuC not yet initialised! */ 4809 if (unlikely(!guc_submission_initialized(guc))) 4810 return; 4811 4812 xa_lock_irqsave(&guc->context_lookup, flags); 4813 xa_for_each(&guc->context_lookup, index, ce) { 4814 if (!kref_get_unless_zero(&ce->ref)) 4815 continue; 4816 4817 xa_unlock(&guc->context_lookup); 4818 4819 if (!intel_context_is_pinned(ce)) 4820 goto next; 4821 4822 if (intel_engine_is_virtual(ce->engine)) { 4823 if (!(ce->engine->mask & engine->mask)) 4824 goto next; 4825 } else { 4826 if (ce->engine != engine) 4827 goto next; 4828 } 4829 4830 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4831 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4832 continue; 4833 4834 intel_engine_set_hung_context(engine, ce); 4835 4836 /* Can only cope with one hang at a time... */ 4837 intel_context_put(ce); 4838 xa_lock(&guc->context_lookup); 4839 goto done; 4840 } 4841 next: 4842 intel_context_put(ce); 4843 xa_lock(&guc->context_lookup); 4844 } 4845 done: 4846 xa_unlock_irqrestore(&guc->context_lookup, flags); 4847 } 4848 4849 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4850 struct i915_request *hung_rq, 4851 struct drm_printer *m) 4852 { 4853 struct intel_guc *guc = &engine->gt->uc.guc; 4854 struct intel_context *ce; 4855 unsigned long index; 4856 unsigned long flags; 4857 4858 /* Reset called during driver load? GuC not yet initialised! */ 4859 if (unlikely(!guc_submission_initialized(guc))) 4860 return; 4861 4862 xa_lock_irqsave(&guc->context_lookup, flags); 4863 xa_for_each(&guc->context_lookup, index, ce) { 4864 if (!kref_get_unless_zero(&ce->ref)) 4865 continue; 4866 4867 xa_unlock(&guc->context_lookup); 4868 4869 if (!intel_context_is_pinned(ce)) 4870 goto next; 4871 4872 if (intel_engine_is_virtual(ce->engine)) { 4873 if (!(ce->engine->mask & engine->mask)) 4874 goto next; 4875 } else { 4876 if (ce->engine != engine) 4877 goto next; 4878 } 4879 4880 spin_lock(&ce->guc_state.lock); 4881 intel_engine_dump_active_requests(&ce->guc_state.requests, 4882 hung_rq, m); 4883 spin_unlock(&ce->guc_state.lock); 4884 4885 next: 4886 intel_context_put(ce); 4887 xa_lock(&guc->context_lookup); 4888 } 4889 xa_unlock_irqrestore(&guc->context_lookup, flags); 4890 } 4891 4892 void intel_guc_submission_print_info(struct intel_guc *guc, 4893 struct drm_printer *p) 4894 { 4895 struct i915_sched_engine *sched_engine = guc->sched_engine; 4896 struct rb_node *rb; 4897 unsigned long flags; 4898 4899 if (!sched_engine) 4900 return; 4901 4902 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4903 atomic_read(&guc->outstanding_submission_g2h)); 4904 drm_printf(p, "GuC tasklet count: %u\n\n", 4905 atomic_read(&sched_engine->tasklet.count)); 4906 4907 spin_lock_irqsave(&sched_engine->lock, flags); 4908 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4909 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4910 struct i915_priolist *pl = to_priolist(rb); 4911 struct i915_request *rq; 4912 4913 priolist_for_each_request(rq, pl) 4914 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4915 rq->context->guc_id.id, 4916 rq->fence.seqno); 4917 } 4918 spin_unlock_irqrestore(&sched_engine->lock, flags); 4919 drm_printf(p, "\n"); 4920 } 4921 4922 static inline void guc_log_context_priority(struct drm_printer *p, 4923 struct intel_context *ce) 4924 { 4925 int i; 4926 4927 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4928 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4929 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4930 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4931 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4932 i, ce->guc_state.prio_count[i]); 4933 } 4934 drm_printf(p, "\n"); 4935 } 4936 4937 static inline void guc_log_context(struct drm_printer *p, 4938 struct intel_context *ce) 4939 { 4940 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4941 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4942 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4943 ce->ring->head, 4944 ce->lrc_reg_state[CTX_RING_HEAD]); 4945 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4946 ce->ring->tail, 4947 ce->lrc_reg_state[CTX_RING_TAIL]); 4948 drm_printf(p, "\t\tContext Pin Count: %u\n", 4949 atomic_read(&ce->pin_count)); 4950 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4951 atomic_read(&ce->guc_id.ref)); 4952 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 4953 ce->guc_state.sched_state); 4954 } 4955 4956 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4957 struct drm_printer *p) 4958 { 4959 struct intel_context *ce; 4960 unsigned long index; 4961 unsigned long flags; 4962 4963 xa_lock_irqsave(&guc->context_lookup, flags); 4964 xa_for_each(&guc->context_lookup, index, ce) { 4965 GEM_BUG_ON(intel_context_is_child(ce)); 4966 4967 guc_log_context(p, ce); 4968 guc_log_context_priority(p, ce); 4969 4970 if (intel_context_is_parent(ce)) { 4971 struct intel_context *child; 4972 4973 drm_printf(p, "\t\tNumber children: %u\n", 4974 ce->parallel.number_children); 4975 4976 if (ce->parallel.guc.wq_status) { 4977 drm_printf(p, "\t\tWQI Head: %u\n", 4978 READ_ONCE(*ce->parallel.guc.wq_head)); 4979 drm_printf(p, "\t\tWQI Tail: %u\n", 4980 READ_ONCE(*ce->parallel.guc.wq_tail)); 4981 drm_printf(p, "\t\tWQI Status: %u\n\n", 4982 READ_ONCE(*ce->parallel.guc.wq_status)); 4983 } 4984 4985 if (ce->engine->emit_bb_start == 4986 emit_bb_start_parent_no_preempt_mid_batch) { 4987 u8 i; 4988 4989 drm_printf(p, "\t\tChildren Go: %u\n\n", 4990 get_children_go_value(ce)); 4991 for (i = 0; i < ce->parallel.number_children; ++i) 4992 drm_printf(p, "\t\tChildren Join: %u\n", 4993 get_children_join_value(ce, i)); 4994 } 4995 4996 for_each_child(ce, child) 4997 guc_log_context(p, child); 4998 } 4999 } 5000 xa_unlock_irqrestore(&guc->context_lookup, flags); 5001 } 5002 5003 static inline u32 get_children_go_addr(struct intel_context *ce) 5004 { 5005 GEM_BUG_ON(!intel_context_is_parent(ce)); 5006 5007 return i915_ggtt_offset(ce->state) + 5008 __get_parent_scratch_offset(ce) + 5009 offsetof(struct parent_scratch, go.semaphore); 5010 } 5011 5012 static inline u32 get_children_join_addr(struct intel_context *ce, 5013 u8 child_index) 5014 { 5015 GEM_BUG_ON(!intel_context_is_parent(ce)); 5016 5017 return i915_ggtt_offset(ce->state) + 5018 __get_parent_scratch_offset(ce) + 5019 offsetof(struct parent_scratch, join[child_index].semaphore); 5020 } 5021 5022 #define PARENT_GO_BB 1 5023 #define PARENT_GO_FINI_BREADCRUMB 0 5024 #define CHILD_GO_BB 1 5025 #define CHILD_GO_FINI_BREADCRUMB 0 5026 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5027 u64 offset, u32 len, 5028 const unsigned int flags) 5029 { 5030 struct intel_context *ce = rq->context; 5031 u32 *cs; 5032 u8 i; 5033 5034 GEM_BUG_ON(!intel_context_is_parent(ce)); 5035 5036 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5037 if (IS_ERR(cs)) 5038 return PTR_ERR(cs); 5039 5040 /* Wait on children */ 5041 for (i = 0; i < ce->parallel.number_children; ++i) { 5042 *cs++ = (MI_SEMAPHORE_WAIT | 5043 MI_SEMAPHORE_GLOBAL_GTT | 5044 MI_SEMAPHORE_POLL | 5045 MI_SEMAPHORE_SAD_EQ_SDD); 5046 *cs++ = PARENT_GO_BB; 5047 *cs++ = get_children_join_addr(ce, i); 5048 *cs++ = 0; 5049 } 5050 5051 /* Turn off preemption */ 5052 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5053 *cs++ = MI_NOOP; 5054 5055 /* Tell children go */ 5056 cs = gen8_emit_ggtt_write(cs, 5057 CHILD_GO_BB, 5058 get_children_go_addr(ce), 5059 0); 5060 5061 /* Jump to batch */ 5062 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5063 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5064 *cs++ = lower_32_bits(offset); 5065 *cs++ = upper_32_bits(offset); 5066 *cs++ = MI_NOOP; 5067 5068 intel_ring_advance(rq, cs); 5069 5070 return 0; 5071 } 5072 5073 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5074 u64 offset, u32 len, 5075 const unsigned int flags) 5076 { 5077 struct intel_context *ce = rq->context; 5078 struct intel_context *parent = intel_context_to_parent(ce); 5079 u32 *cs; 5080 5081 GEM_BUG_ON(!intel_context_is_child(ce)); 5082 5083 cs = intel_ring_begin(rq, 12); 5084 if (IS_ERR(cs)) 5085 return PTR_ERR(cs); 5086 5087 /* Signal parent */ 5088 cs = gen8_emit_ggtt_write(cs, 5089 PARENT_GO_BB, 5090 get_children_join_addr(parent, 5091 ce->parallel.child_index), 5092 0); 5093 5094 /* Wait on parent for go */ 5095 *cs++ = (MI_SEMAPHORE_WAIT | 5096 MI_SEMAPHORE_GLOBAL_GTT | 5097 MI_SEMAPHORE_POLL | 5098 MI_SEMAPHORE_SAD_EQ_SDD); 5099 *cs++ = CHILD_GO_BB; 5100 *cs++ = get_children_go_addr(parent); 5101 *cs++ = 0; 5102 5103 /* Turn off preemption */ 5104 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5105 5106 /* Jump to batch */ 5107 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5108 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5109 *cs++ = lower_32_bits(offset); 5110 *cs++ = upper_32_bits(offset); 5111 5112 intel_ring_advance(rq, cs); 5113 5114 return 0; 5115 } 5116 5117 static u32 * 5118 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5119 u32 *cs) 5120 { 5121 struct intel_context *ce = rq->context; 5122 u8 i; 5123 5124 GEM_BUG_ON(!intel_context_is_parent(ce)); 5125 5126 /* Wait on children */ 5127 for (i = 0; i < ce->parallel.number_children; ++i) { 5128 *cs++ = (MI_SEMAPHORE_WAIT | 5129 MI_SEMAPHORE_GLOBAL_GTT | 5130 MI_SEMAPHORE_POLL | 5131 MI_SEMAPHORE_SAD_EQ_SDD); 5132 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5133 *cs++ = get_children_join_addr(ce, i); 5134 *cs++ = 0; 5135 } 5136 5137 /* Turn on preemption */ 5138 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5139 *cs++ = MI_NOOP; 5140 5141 /* Tell children go */ 5142 cs = gen8_emit_ggtt_write(cs, 5143 CHILD_GO_FINI_BREADCRUMB, 5144 get_children_go_addr(ce), 5145 0); 5146 5147 return cs; 5148 } 5149 5150 /* 5151 * If this true, a submission of multi-lrc requests had an error and the 5152 * requests need to be skipped. The front end (execuf IOCTL) should've called 5153 * i915_request_skip which squashes the BB but we still need to emit the fini 5154 * breadrcrumbs seqno write. At this point we don't know how many of the 5155 * requests in the multi-lrc submission were generated so we can't do the 5156 * handshake between the parent and children (e.g. if 4 requests should be 5157 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5158 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5159 * has occurred on any of the requests in submission / relationship. 5160 */ 5161 static inline bool skip_handshake(struct i915_request *rq) 5162 { 5163 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5164 } 5165 5166 #define NON_SKIP_LEN 6 5167 static u32 * 5168 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5169 u32 *cs) 5170 { 5171 struct intel_context *ce = rq->context; 5172 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5173 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5174 5175 GEM_BUG_ON(!intel_context_is_parent(ce)); 5176 5177 if (unlikely(skip_handshake(rq))) { 5178 /* 5179 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5180 * the NON_SKIP_LEN comes from the length of the emits below. 5181 */ 5182 memset(cs, 0, sizeof(u32) * 5183 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5184 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5185 } else { 5186 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5187 } 5188 5189 /* Emit fini breadcrumb */ 5190 before_fini_breadcrumb_user_interrupt_cs = cs; 5191 cs = gen8_emit_ggtt_write(cs, 5192 rq->fence.seqno, 5193 i915_request_active_timeline(rq)->hwsp_offset, 5194 0); 5195 5196 /* User interrupt */ 5197 *cs++ = MI_USER_INTERRUPT; 5198 *cs++ = MI_NOOP; 5199 5200 /* Ensure our math for skip + emit is correct */ 5201 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5202 cs); 5203 GEM_BUG_ON(start_fini_breadcrumb_cs + 5204 ce->engine->emit_fini_breadcrumb_dw != cs); 5205 5206 rq->tail = intel_ring_offset(rq, cs); 5207 5208 return cs; 5209 } 5210 5211 static u32 * 5212 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5213 u32 *cs) 5214 { 5215 struct intel_context *ce = rq->context; 5216 struct intel_context *parent = intel_context_to_parent(ce); 5217 5218 GEM_BUG_ON(!intel_context_is_child(ce)); 5219 5220 /* Turn on preemption */ 5221 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5222 *cs++ = MI_NOOP; 5223 5224 /* Signal parent */ 5225 cs = gen8_emit_ggtt_write(cs, 5226 PARENT_GO_FINI_BREADCRUMB, 5227 get_children_join_addr(parent, 5228 ce->parallel.child_index), 5229 0); 5230 5231 /* Wait parent on for go */ 5232 *cs++ = (MI_SEMAPHORE_WAIT | 5233 MI_SEMAPHORE_GLOBAL_GTT | 5234 MI_SEMAPHORE_POLL | 5235 MI_SEMAPHORE_SAD_EQ_SDD); 5236 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5237 *cs++ = get_children_go_addr(parent); 5238 *cs++ = 0; 5239 5240 return cs; 5241 } 5242 5243 static u32 * 5244 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5245 u32 *cs) 5246 { 5247 struct intel_context *ce = rq->context; 5248 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5249 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5250 5251 GEM_BUG_ON(!intel_context_is_child(ce)); 5252 5253 if (unlikely(skip_handshake(rq))) { 5254 /* 5255 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5256 * the NON_SKIP_LEN comes from the length of the emits below. 5257 */ 5258 memset(cs, 0, sizeof(u32) * 5259 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5260 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5261 } else { 5262 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5263 } 5264 5265 /* Emit fini breadcrumb */ 5266 before_fini_breadcrumb_user_interrupt_cs = cs; 5267 cs = gen8_emit_ggtt_write(cs, 5268 rq->fence.seqno, 5269 i915_request_active_timeline(rq)->hwsp_offset, 5270 0); 5271 5272 /* User interrupt */ 5273 *cs++ = MI_USER_INTERRUPT; 5274 *cs++ = MI_NOOP; 5275 5276 /* Ensure our math for skip + emit is correct */ 5277 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5278 cs); 5279 GEM_BUG_ON(start_fini_breadcrumb_cs + 5280 ce->engine->emit_fini_breadcrumb_dw != cs); 5281 5282 rq->tail = intel_ring_offset(rq, cs); 5283 5284 return cs; 5285 } 5286 5287 #undef NON_SKIP_LEN 5288 5289 static struct intel_context * 5290 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5291 unsigned long flags) 5292 { 5293 struct guc_virtual_engine *ve; 5294 struct intel_guc *guc; 5295 unsigned int n; 5296 int err; 5297 5298 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5299 if (!ve) 5300 return ERR_PTR(-ENOMEM); 5301 5302 guc = &siblings[0]->gt->uc.guc; 5303 5304 ve->base.i915 = siblings[0]->i915; 5305 ve->base.gt = siblings[0]->gt; 5306 ve->base.uncore = siblings[0]->uncore; 5307 ve->base.id = -1; 5308 5309 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5310 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5311 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5312 ve->base.saturated = ALL_ENGINES; 5313 5314 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5315 5316 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5317 5318 ve->base.cops = &virtual_guc_context_ops; 5319 ve->base.request_alloc = guc_request_alloc; 5320 ve->base.bump_serial = virtual_guc_bump_serial; 5321 5322 ve->base.submit_request = guc_submit_request; 5323 5324 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5325 5326 intel_context_init(&ve->context, &ve->base); 5327 5328 for (n = 0; n < count; n++) { 5329 struct intel_engine_cs *sibling = siblings[n]; 5330 5331 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5332 if (sibling->mask & ve->base.mask) { 5333 DRM_DEBUG("duplicate %s entry in load balancer\n", 5334 sibling->name); 5335 err = -EINVAL; 5336 goto err_put; 5337 } 5338 5339 ve->base.mask |= sibling->mask; 5340 ve->base.logical_mask |= sibling->logical_mask; 5341 5342 if (n != 0 && ve->base.class != sibling->class) { 5343 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 5344 sibling->class, ve->base.class); 5345 err = -EINVAL; 5346 goto err_put; 5347 } else if (n == 0) { 5348 ve->base.class = sibling->class; 5349 ve->base.uabi_class = sibling->uabi_class; 5350 snprintf(ve->base.name, sizeof(ve->base.name), 5351 "v%dx%d", ve->base.class, count); 5352 ve->base.context_size = sibling->context_size; 5353 5354 ve->base.add_active_request = 5355 sibling->add_active_request; 5356 ve->base.remove_active_request = 5357 sibling->remove_active_request; 5358 ve->base.emit_bb_start = sibling->emit_bb_start; 5359 ve->base.emit_flush = sibling->emit_flush; 5360 ve->base.emit_init_breadcrumb = 5361 sibling->emit_init_breadcrumb; 5362 ve->base.emit_fini_breadcrumb = 5363 sibling->emit_fini_breadcrumb; 5364 ve->base.emit_fini_breadcrumb_dw = 5365 sibling->emit_fini_breadcrumb_dw; 5366 ve->base.breadcrumbs = 5367 intel_breadcrumbs_get(sibling->breadcrumbs); 5368 5369 ve->base.flags |= sibling->flags; 5370 5371 ve->base.props.timeslice_duration_ms = 5372 sibling->props.timeslice_duration_ms; 5373 ve->base.props.preempt_timeout_ms = 5374 sibling->props.preempt_timeout_ms; 5375 } 5376 } 5377 5378 return &ve->context; 5379 5380 err_put: 5381 intel_context_put(&ve->context); 5382 return ERR_PTR(err); 5383 } 5384 5385 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5386 { 5387 struct intel_engine_cs *engine; 5388 intel_engine_mask_t tmp, mask = ve->mask; 5389 5390 for_each_engine_masked(engine, ve->gt, mask, tmp) 5391 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5392 return true; 5393 5394 return false; 5395 } 5396 5397 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5398 #include "selftest_guc.c" 5399 #include "selftest_guc_multi_lrc.c" 5400 #include "selftest_guc_hangcheck.c" 5401 #endif 5402