1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_heartbeat.h" 13 #include "gt/intel_engine_pm.h" 14 #include "gt/intel_engine_regs.h" 15 #include "gt/intel_gpu_commands.h" 16 #include "gt/intel_gt.h" 17 #include "gt/intel_gt_clock_utils.h" 18 #include "gt/intel_gt_irq.h" 19 #include "gt/intel_gt_pm.h" 20 #include "gt/intel_gt_regs.h" 21 #include "gt/intel_gt_requests.h" 22 #include "gt/intel_lrc.h" 23 #include "gt/intel_lrc_reg.h" 24 #include "gt/intel_mocs.h" 25 #include "gt/intel_ring.h" 26 27 #include "intel_guc_ads.h" 28 #include "intel_guc_capture.h" 29 #include "intel_guc_submission.h" 30 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 34 /** 35 * DOC: GuC-based command submission 36 * 37 * The Scratch registers: 38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 40 * triggers an interrupt on the GuC via another register write (0xC4C8). 41 * Firmware writes a success/fail code back to the action register after 42 * processes the request. The kernel driver polls waiting for this update and 43 * then proceeds. 44 * 45 * Command Transport buffers (CTBs): 46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 47 * - G2H) are a message interface between the i915 and GuC. 48 * 49 * Context registration: 50 * Before a context can be submitted it must be registered with the GuC via a 51 * H2G. A unique guc_id is associated with each context. The context is either 52 * registered at request creation time (normal operation) or at submission time 53 * (abnormal operation, e.g. after a reset). 54 * 55 * Context submission: 56 * The i915 updates the LRC tail value in memory. The i915 must enable the 57 * scheduling of the context within the GuC for the GuC to actually consider it. 58 * Therefore, the first time a disabled context is submitted we use a schedule 59 * enable H2G, while follow up submissions are done via the context submit H2G, 60 * which informs the GuC that a previously enabled context has new work 61 * available. 62 * 63 * Context unpin: 64 * To unpin a context a H2G is used to disable scheduling. When the 65 * corresponding G2H returns indicating the scheduling disable operation has 66 * completed it is safe to unpin the context. While a disable is in flight it 67 * isn't safe to resubmit the context so a fence is used to stall all future 68 * requests of that context until the G2H is returned. 69 * 70 * Context deregistration: 71 * Before a context can be destroyed or if we steal its guc_id we must 72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 73 * safe to submit anything to this guc_id until the deregister completes so a 74 * fence is used to stall all requests associated with this guc_id until the 75 * corresponding G2H returns indicating the guc_id has been deregistered. 76 * 77 * submission_state.guc_ids: 78 * Unique number associated with private GuC context data passed in during 79 * context registration / submission / deregistration. 64k available. Simple ida 80 * is used for allocation. 81 * 82 * Stealing guc_ids: 83 * If no guc_ids are available they can be stolen from another context at 84 * request creation time if that context is unpinned. If a guc_id can't be found 85 * we punt this problem to the user as we believe this is near impossible to hit 86 * during normal use cases. 87 * 88 * Locking: 89 * In the GuC submission code we have 3 basic spin locks which protect 90 * everything. Details about each below. 91 * 92 * sched_engine->lock 93 * This is the submission lock for all contexts that share an i915 schedule 94 * engine (sched_engine), thus only one of the contexts which share a 95 * sched_engine can be submitting at a time. Currently only one sched_engine is 96 * used for all of GuC submission but that could change in the future. 97 * 98 * guc->submission_state.lock 99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 100 * list. 101 * 102 * ce->guc_state.lock 103 * Protects everything under ce->guc_state. Ensures that a context is in the 104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 105 * on a disabled context (bad idea), we don't issue a schedule enable when a 106 * schedule disable is in flight, etc... Also protects list of inflight requests 107 * on the context and the priority management state. Lock is individual to each 108 * context. 109 * 110 * Lock ordering rules: 111 * sched_engine->lock -> ce->guc_state.lock 112 * guc->submission_state.lock -> ce->guc_state.lock 113 * 114 * Reset races: 115 * When a full GT reset is triggered it is assumed that some G2H responses to 116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 118 * contexts, release guc_ids, etc...). When this occurs we can scrub the 119 * context state and cleanup appropriately, however this is quite racey. 120 * To avoid races, the reset code must disable submission before scrubbing for 121 * the missing G2H, while the submission code must check for submission being 122 * disabled and skip sending H2Gs and updating context states when it is. Both 123 * sides must also make sure to hold the relevant locks. 124 */ 125 126 /* GuC Virtual Engine */ 127 struct guc_virtual_engine { 128 struct intel_engine_cs base; 129 struct intel_context context; 130 }; 131 132 static struct intel_context * 133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 134 unsigned long flags); 135 136 static struct intel_context * 137 guc_create_parallel(struct intel_engine_cs **engines, 138 unsigned int num_siblings, 139 unsigned int width); 140 141 #define GUC_REQUEST_SIZE 64 /* bytes */ 142 143 /* 144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 145 * per the GuC submission interface. A different allocation algorithm is used 146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 147 * partition the guc_id space. We believe the number of multi-lrc contexts in 148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 149 * multi-lrc. 150 */ 151 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 152 ((guc)->submission_state.num_guc_ids / 16) 153 154 /* 155 * Below is a set of functions which control the GuC scheduling state which 156 * require a lock. 157 */ 158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 159 #define SCHED_STATE_DESTROYED BIT(1) 160 #define SCHED_STATE_PENDING_DISABLE BIT(2) 161 #define SCHED_STATE_BANNED BIT(3) 162 #define SCHED_STATE_ENABLED BIT(4) 163 #define SCHED_STATE_PENDING_ENABLE BIT(5) 164 #define SCHED_STATE_REGISTERED BIT(6) 165 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 166 #define SCHED_STATE_BLOCKED_SHIFT 8 167 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 168 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 169 170 static inline void init_sched_state(struct intel_context *ce) 171 { 172 lockdep_assert_held(&ce->guc_state.lock); 173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 174 } 175 176 __maybe_unused 177 static bool sched_state_is_init(struct intel_context *ce) 178 { 179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ 180 return !(ce->guc_state.sched_state & 181 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 182 } 183 184 static inline bool 185 context_wait_for_deregister_to_register(struct intel_context *ce) 186 { 187 return ce->guc_state.sched_state & 188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 189 } 190 191 static inline void 192 set_context_wait_for_deregister_to_register(struct intel_context *ce) 193 { 194 lockdep_assert_held(&ce->guc_state.lock); 195 ce->guc_state.sched_state |= 196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 197 } 198 199 static inline void 200 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 201 { 202 lockdep_assert_held(&ce->guc_state.lock); 203 ce->guc_state.sched_state &= 204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 205 } 206 207 static inline bool 208 context_destroyed(struct intel_context *ce) 209 { 210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 211 } 212 213 static inline void 214 set_context_destroyed(struct intel_context *ce) 215 { 216 lockdep_assert_held(&ce->guc_state.lock); 217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 218 } 219 220 static inline bool context_pending_disable(struct intel_context *ce) 221 { 222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 223 } 224 225 static inline void set_context_pending_disable(struct intel_context *ce) 226 { 227 lockdep_assert_held(&ce->guc_state.lock); 228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 229 } 230 231 static inline void clr_context_pending_disable(struct intel_context *ce) 232 { 233 lockdep_assert_held(&ce->guc_state.lock); 234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 235 } 236 237 static inline bool context_banned(struct intel_context *ce) 238 { 239 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 240 } 241 242 static inline void set_context_banned(struct intel_context *ce) 243 { 244 lockdep_assert_held(&ce->guc_state.lock); 245 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 246 } 247 248 static inline void clr_context_banned(struct intel_context *ce) 249 { 250 lockdep_assert_held(&ce->guc_state.lock); 251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 252 } 253 254 static inline bool context_enabled(struct intel_context *ce) 255 { 256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 257 } 258 259 static inline void set_context_enabled(struct intel_context *ce) 260 { 261 lockdep_assert_held(&ce->guc_state.lock); 262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 263 } 264 265 static inline void clr_context_enabled(struct intel_context *ce) 266 { 267 lockdep_assert_held(&ce->guc_state.lock); 268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 269 } 270 271 static inline bool context_pending_enable(struct intel_context *ce) 272 { 273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 274 } 275 276 static inline void set_context_pending_enable(struct intel_context *ce) 277 { 278 lockdep_assert_held(&ce->guc_state.lock); 279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 280 } 281 282 static inline void clr_context_pending_enable(struct intel_context *ce) 283 { 284 lockdep_assert_held(&ce->guc_state.lock); 285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 286 } 287 288 static inline bool context_registered(struct intel_context *ce) 289 { 290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 291 } 292 293 static inline void set_context_registered(struct intel_context *ce) 294 { 295 lockdep_assert_held(&ce->guc_state.lock); 296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 297 } 298 299 static inline void clr_context_registered(struct intel_context *ce) 300 { 301 lockdep_assert_held(&ce->guc_state.lock); 302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 303 } 304 305 static inline bool context_policy_required(struct intel_context *ce) 306 { 307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 308 } 309 310 static inline void set_context_policy_required(struct intel_context *ce) 311 { 312 lockdep_assert_held(&ce->guc_state.lock); 313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 314 } 315 316 static inline void clr_context_policy_required(struct intel_context *ce) 317 { 318 lockdep_assert_held(&ce->guc_state.lock); 319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 320 } 321 322 static inline u32 context_blocked(struct intel_context *ce) 323 { 324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 325 SCHED_STATE_BLOCKED_SHIFT; 326 } 327 328 static inline void incr_context_blocked(struct intel_context *ce) 329 { 330 lockdep_assert_held(&ce->guc_state.lock); 331 332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 333 334 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 335 } 336 337 static inline void decr_context_blocked(struct intel_context *ce) 338 { 339 lockdep_assert_held(&ce->guc_state.lock); 340 341 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 342 343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 344 } 345 346 static inline bool context_has_committed_requests(struct intel_context *ce) 347 { 348 return !!ce->guc_state.number_committed_requests; 349 } 350 351 static inline void incr_context_committed_requests(struct intel_context *ce) 352 { 353 lockdep_assert_held(&ce->guc_state.lock); 354 ++ce->guc_state.number_committed_requests; 355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 356 } 357 358 static inline void decr_context_committed_requests(struct intel_context *ce) 359 { 360 lockdep_assert_held(&ce->guc_state.lock); 361 --ce->guc_state.number_committed_requests; 362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 363 } 364 365 static struct intel_context * 366 request_to_scheduling_context(struct i915_request *rq) 367 { 368 return intel_context_to_parent(rq->context); 369 } 370 371 static inline bool context_guc_id_invalid(struct intel_context *ce) 372 { 373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 374 } 375 376 static inline void set_context_guc_id_invalid(struct intel_context *ce) 377 { 378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 379 } 380 381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 382 { 383 return &ce->engine->gt->uc.guc; 384 } 385 386 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 387 { 388 return rb_entry(rb, struct i915_priolist, node); 389 } 390 391 /* 392 * When using multi-lrc submission a scratch memory area is reserved in the 393 * parent's context state for the process descriptor, work queue, and handshake 394 * between the parent + children contexts to insert safe preemption points 395 * between each of the BBs. Currently the scratch area is sized to a page. 396 * 397 * The layout of this scratch area is below: 398 * 0 guc_process_desc 399 * + sizeof(struct guc_process_desc) child go 400 * + CACHELINE_BYTES child join[0] 401 * ... 402 * + CACHELINE_BYTES child join[n - 1] 403 * ... unused 404 * PARENT_SCRATCH_SIZE / 2 work queue start 405 * ... work queue 406 * PARENT_SCRATCH_SIZE - 1 work queue end 407 */ 408 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 409 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 410 411 struct sync_semaphore { 412 u32 semaphore; 413 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 414 }; 415 416 struct parent_scratch { 417 union guc_descs { 418 struct guc_sched_wq_desc wq_desc; 419 struct guc_process_desc_v69 pdesc; 420 } descs; 421 422 struct sync_semaphore go; 423 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 424 425 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 426 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 427 428 u32 wq[WQ_SIZE / sizeof(u32)]; 429 }; 430 431 static u32 __get_parent_scratch_offset(struct intel_context *ce) 432 { 433 GEM_BUG_ON(!ce->parallel.guc.parent_page); 434 435 return ce->parallel.guc.parent_page * PAGE_SIZE; 436 } 437 438 static u32 __get_wq_offset(struct intel_context *ce) 439 { 440 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 441 442 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 443 } 444 445 static struct parent_scratch * 446 __get_parent_scratch(struct intel_context *ce) 447 { 448 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 449 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 450 451 /* 452 * Need to subtract LRC_STATE_OFFSET here as the 453 * parallel.guc.parent_page is the offset into ce->state while 454 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 455 */ 456 return (struct parent_scratch *) 457 (ce->lrc_reg_state + 458 ((__get_parent_scratch_offset(ce) - 459 LRC_STATE_OFFSET) / sizeof(u32))); 460 } 461 462 static struct guc_process_desc_v69 * 463 __get_process_desc_v69(struct intel_context *ce) 464 { 465 struct parent_scratch *ps = __get_parent_scratch(ce); 466 467 return &ps->descs.pdesc; 468 } 469 470 static struct guc_sched_wq_desc * 471 __get_wq_desc_v70(struct intel_context *ce) 472 { 473 struct parent_scratch *ps = __get_parent_scratch(ce); 474 475 return &ps->descs.wq_desc; 476 } 477 478 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 479 { 480 /* 481 * Check for space in work queue. Caching a value of head pointer in 482 * intel_context structure in order reduce the number accesses to shared 483 * GPU memory which may be across a PCIe bus. 484 */ 485 #define AVAILABLE_SPACE \ 486 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 487 if (wqi_size > AVAILABLE_SPACE) { 488 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 489 490 if (wqi_size > AVAILABLE_SPACE) 491 return NULL; 492 } 493 #undef AVAILABLE_SPACE 494 495 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 496 } 497 498 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 499 { 500 struct intel_context *ce = xa_load(&guc->context_lookup, id); 501 502 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 503 504 return ce; 505 } 506 507 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 508 { 509 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 510 511 if (!base) 512 return NULL; 513 514 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 515 516 return &base[index]; 517 } 518 519 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 520 { 521 u32 size; 522 int ret; 523 524 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 525 GUC_MAX_CONTEXT_ID); 526 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 527 (void **)&guc->lrc_desc_pool_vaddr_v69); 528 if (ret) 529 return ret; 530 531 return 0; 532 } 533 534 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 535 { 536 if (!guc->lrc_desc_pool_vaddr_v69) 537 return; 538 539 guc->lrc_desc_pool_vaddr_v69 = NULL; 540 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 541 } 542 543 static inline bool guc_submission_initialized(struct intel_guc *guc) 544 { 545 return guc->submission_initialized; 546 } 547 548 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 549 { 550 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 551 552 if (desc) 553 memset(desc, 0, sizeof(*desc)); 554 } 555 556 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 557 { 558 return __get_context(guc, id); 559 } 560 561 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 562 struct intel_context *ce) 563 { 564 unsigned long flags; 565 566 /* 567 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 568 * lower level functions directly. 569 */ 570 xa_lock_irqsave(&guc->context_lookup, flags); 571 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 572 xa_unlock_irqrestore(&guc->context_lookup, flags); 573 } 574 575 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 576 { 577 unsigned long flags; 578 579 if (unlikely(!guc_submission_initialized(guc))) 580 return; 581 582 _reset_lrc_desc_v69(guc, id); 583 584 /* 585 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 586 * the lower level functions directly. 587 */ 588 xa_lock_irqsave(&guc->context_lookup, flags); 589 __xa_erase(&guc->context_lookup, id); 590 xa_unlock_irqrestore(&guc->context_lookup, flags); 591 } 592 593 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 594 { 595 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 596 wake_up_all(&guc->ct.wq); 597 } 598 599 static int guc_submission_send_busy_loop(struct intel_guc *guc, 600 const u32 *action, 601 u32 len, 602 u32 g2h_len_dw, 603 bool loop) 604 { 605 /* 606 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 607 * so we don't handle the case where we don't get a reply because we 608 * aborted the send due to the channel being busy. 609 */ 610 GEM_BUG_ON(g2h_len_dw && !loop); 611 612 if (g2h_len_dw) 613 atomic_inc(&guc->outstanding_submission_g2h); 614 615 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 616 } 617 618 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 619 atomic_t *wait_var, 620 bool interruptible, 621 long timeout) 622 { 623 const int state = interruptible ? 624 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 625 DEFINE_WAIT(wait); 626 627 might_sleep(); 628 GEM_BUG_ON(timeout < 0); 629 630 if (!atomic_read(wait_var)) 631 return 0; 632 633 if (!timeout) 634 return -ETIME; 635 636 for (;;) { 637 prepare_to_wait(&guc->ct.wq, &wait, state); 638 639 if (!atomic_read(wait_var)) 640 break; 641 642 if (signal_pending_state(state, current)) { 643 timeout = -EINTR; 644 break; 645 } 646 647 if (!timeout) { 648 timeout = -ETIME; 649 break; 650 } 651 652 timeout = io_schedule_timeout(timeout); 653 } 654 finish_wait(&guc->ct.wq, &wait); 655 656 return (timeout < 0) ? timeout : 0; 657 } 658 659 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 660 { 661 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 662 return 0; 663 664 return intel_guc_wait_for_pending_msg(guc, 665 &guc->outstanding_submission_g2h, 666 true, timeout); 667 } 668 669 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 670 static int try_context_registration(struct intel_context *ce, bool loop); 671 672 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 673 { 674 int err = 0; 675 struct intel_context *ce = request_to_scheduling_context(rq); 676 u32 action[3]; 677 int len = 0; 678 u32 g2h_len_dw = 0; 679 bool enabled; 680 681 lockdep_assert_held(&rq->engine->sched_engine->lock); 682 683 /* 684 * Corner case where requests were sitting in the priority list or a 685 * request resubmitted after the context was banned. 686 */ 687 if (unlikely(intel_context_is_banned(ce))) { 688 i915_request_put(i915_request_mark_eio(rq)); 689 intel_engine_signal_breadcrumbs(ce->engine); 690 return 0; 691 } 692 693 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 694 GEM_BUG_ON(context_guc_id_invalid(ce)); 695 696 if (context_policy_required(ce)) { 697 err = guc_context_policy_init_v70(ce, false); 698 if (err) 699 return err; 700 } 701 702 spin_lock(&ce->guc_state.lock); 703 704 /* 705 * The request / context will be run on the hardware when scheduling 706 * gets enabled in the unblock. For multi-lrc we still submit the 707 * context to move the LRC tails. 708 */ 709 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 710 goto out; 711 712 enabled = context_enabled(ce) || context_blocked(ce); 713 714 if (!enabled) { 715 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 716 action[len++] = ce->guc_id.id; 717 action[len++] = GUC_CONTEXT_ENABLE; 718 set_context_pending_enable(ce); 719 intel_context_get(ce); 720 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 721 } else { 722 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 723 action[len++] = ce->guc_id.id; 724 } 725 726 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 727 if (!enabled && !err) { 728 trace_intel_context_sched_enable(ce); 729 atomic_inc(&guc->outstanding_submission_g2h); 730 set_context_enabled(ce); 731 732 /* 733 * Without multi-lrc KMD does the submission step (moving the 734 * lrc tail) so enabling scheduling is sufficient to submit the 735 * context. This isn't the case in multi-lrc submission as the 736 * GuC needs to move the tails, hence the need for another H2G 737 * to submit a multi-lrc context after enabling scheduling. 738 */ 739 if (intel_context_is_parent(ce)) { 740 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 741 err = intel_guc_send_nb(guc, action, len - 1, 0); 742 } 743 } else if (!enabled) { 744 clr_context_pending_enable(ce); 745 intel_context_put(ce); 746 } 747 if (likely(!err)) 748 trace_i915_request_guc_submit(rq); 749 750 out: 751 spin_unlock(&ce->guc_state.lock); 752 return err; 753 } 754 755 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 756 { 757 int ret = __guc_add_request(guc, rq); 758 759 if (unlikely(ret == -EBUSY)) { 760 guc->stalled_request = rq; 761 guc->submission_stall_reason = STALL_ADD_REQUEST; 762 } 763 764 return ret; 765 } 766 767 static inline void guc_set_lrc_tail(struct i915_request *rq) 768 { 769 rq->context->lrc_reg_state[CTX_RING_TAIL] = 770 intel_ring_set_tail(rq->ring, rq->tail); 771 } 772 773 static inline int rq_prio(const struct i915_request *rq) 774 { 775 return rq->sched.attr.priority; 776 } 777 778 static bool is_multi_lrc_rq(struct i915_request *rq) 779 { 780 return intel_context_is_parallel(rq->context); 781 } 782 783 static bool can_merge_rq(struct i915_request *rq, 784 struct i915_request *last) 785 { 786 return request_to_scheduling_context(rq) == 787 request_to_scheduling_context(last); 788 } 789 790 static u32 wq_space_until_wrap(struct intel_context *ce) 791 { 792 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 793 } 794 795 static void write_wqi(struct intel_context *ce, u32 wqi_size) 796 { 797 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 798 799 /* 800 * Ensure WQI are visible before updating tail 801 */ 802 intel_guc_write_barrier(ce_to_guc(ce)); 803 804 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 805 (WQ_SIZE - 1); 806 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 807 } 808 809 static int guc_wq_noop_append(struct intel_context *ce) 810 { 811 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 812 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 813 814 if (!wqi) 815 return -EBUSY; 816 817 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 818 819 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 820 FIELD_PREP(WQ_LEN_MASK, len_dw); 821 ce->parallel.guc.wqi_tail = 0; 822 823 return 0; 824 } 825 826 static int __guc_wq_item_append(struct i915_request *rq) 827 { 828 struct intel_context *ce = request_to_scheduling_context(rq); 829 struct intel_context *child; 830 unsigned int wqi_size = (ce->parallel.number_children + 4) * 831 sizeof(u32); 832 u32 *wqi; 833 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 834 int ret; 835 836 /* Ensure context is in correct state updating work queue */ 837 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 838 GEM_BUG_ON(context_guc_id_invalid(ce)); 839 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 840 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 841 842 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 843 if (wqi_size > wq_space_until_wrap(ce)) { 844 ret = guc_wq_noop_append(ce); 845 if (ret) 846 return ret; 847 } 848 849 wqi = get_wq_pointer(ce, wqi_size); 850 if (!wqi) 851 return -EBUSY; 852 853 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 854 855 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 856 FIELD_PREP(WQ_LEN_MASK, len_dw); 857 *wqi++ = ce->lrc.lrca; 858 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 859 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 860 *wqi++ = 0; /* fence_id */ 861 for_each_child(ce, child) 862 *wqi++ = child->ring->tail / sizeof(u64); 863 864 write_wqi(ce, wqi_size); 865 866 return 0; 867 } 868 869 static int guc_wq_item_append(struct intel_guc *guc, 870 struct i915_request *rq) 871 { 872 struct intel_context *ce = request_to_scheduling_context(rq); 873 int ret = 0; 874 875 if (likely(!intel_context_is_banned(ce))) { 876 ret = __guc_wq_item_append(rq); 877 878 if (unlikely(ret == -EBUSY)) { 879 guc->stalled_request = rq; 880 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 881 } 882 } 883 884 return ret; 885 } 886 887 static bool multi_lrc_submit(struct i915_request *rq) 888 { 889 struct intel_context *ce = request_to_scheduling_context(rq); 890 891 intel_ring_set_tail(rq->ring, rq->tail); 892 893 /* 894 * We expect the front end (execbuf IOCTL) to set this flag on the last 895 * request generated from a multi-BB submission. This indicates to the 896 * backend (GuC interface) that we should submit this context thus 897 * submitting all the requests generated in parallel. 898 */ 899 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 900 intel_context_is_banned(ce); 901 } 902 903 static int guc_dequeue_one_context(struct intel_guc *guc) 904 { 905 struct i915_sched_engine * const sched_engine = guc->sched_engine; 906 struct i915_request *last = NULL; 907 bool submit = false; 908 struct rb_node *rb; 909 int ret; 910 911 lockdep_assert_held(&sched_engine->lock); 912 913 if (guc->stalled_request) { 914 submit = true; 915 last = guc->stalled_request; 916 917 switch (guc->submission_stall_reason) { 918 case STALL_REGISTER_CONTEXT: 919 goto register_context; 920 case STALL_MOVE_LRC_TAIL: 921 goto move_lrc_tail; 922 case STALL_ADD_REQUEST: 923 goto add_request; 924 default: 925 MISSING_CASE(guc->submission_stall_reason); 926 } 927 } 928 929 while ((rb = rb_first_cached(&sched_engine->queue))) { 930 struct i915_priolist *p = to_priolist(rb); 931 struct i915_request *rq, *rn; 932 933 priolist_for_each_request_consume(rq, rn, p) { 934 if (last && !can_merge_rq(rq, last)) 935 goto register_context; 936 937 list_del_init(&rq->sched.link); 938 939 __i915_request_submit(rq); 940 941 trace_i915_request_in(rq, 0); 942 last = rq; 943 944 if (is_multi_lrc_rq(rq)) { 945 /* 946 * We need to coalesce all multi-lrc requests in 947 * a relationship into a single H2G. We are 948 * guaranteed that all of these requests will be 949 * submitted sequentially. 950 */ 951 if (multi_lrc_submit(rq)) { 952 submit = true; 953 goto register_context; 954 } 955 } else { 956 submit = true; 957 } 958 } 959 960 rb_erase_cached(&p->node, &sched_engine->queue); 961 i915_priolist_free(p); 962 } 963 964 register_context: 965 if (submit) { 966 struct intel_context *ce = request_to_scheduling_context(last); 967 968 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 969 !intel_context_is_banned(ce))) { 970 ret = try_context_registration(ce, false); 971 if (unlikely(ret == -EPIPE)) { 972 goto deadlk; 973 } else if (ret == -EBUSY) { 974 guc->stalled_request = last; 975 guc->submission_stall_reason = 976 STALL_REGISTER_CONTEXT; 977 goto schedule_tasklet; 978 } else if (ret != 0) { 979 GEM_WARN_ON(ret); /* Unexpected */ 980 goto deadlk; 981 } 982 } 983 984 move_lrc_tail: 985 if (is_multi_lrc_rq(last)) { 986 ret = guc_wq_item_append(guc, last); 987 if (ret == -EBUSY) { 988 goto schedule_tasklet; 989 } else if (ret != 0) { 990 GEM_WARN_ON(ret); /* Unexpected */ 991 goto deadlk; 992 } 993 } else { 994 guc_set_lrc_tail(last); 995 } 996 997 add_request: 998 ret = guc_add_request(guc, last); 999 if (unlikely(ret == -EPIPE)) { 1000 goto deadlk; 1001 } else if (ret == -EBUSY) { 1002 goto schedule_tasklet; 1003 } else if (ret != 0) { 1004 GEM_WARN_ON(ret); /* Unexpected */ 1005 goto deadlk; 1006 } 1007 } 1008 1009 guc->stalled_request = NULL; 1010 guc->submission_stall_reason = STALL_NONE; 1011 return submit; 1012 1013 deadlk: 1014 sched_engine->tasklet.callback = NULL; 1015 tasklet_disable_nosync(&sched_engine->tasklet); 1016 return false; 1017 1018 schedule_tasklet: 1019 tasklet_schedule(&sched_engine->tasklet); 1020 return false; 1021 } 1022 1023 static void guc_submission_tasklet(struct tasklet_struct *t) 1024 { 1025 struct i915_sched_engine *sched_engine = 1026 from_tasklet(sched_engine, t, tasklet); 1027 unsigned long flags; 1028 bool loop; 1029 1030 spin_lock_irqsave(&sched_engine->lock, flags); 1031 1032 do { 1033 loop = guc_dequeue_one_context(sched_engine->private_data); 1034 } while (loop); 1035 1036 i915_sched_engine_reset_on_empty(sched_engine); 1037 1038 spin_unlock_irqrestore(&sched_engine->lock, flags); 1039 } 1040 1041 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1042 { 1043 if (iir & GT_RENDER_USER_INTERRUPT) 1044 intel_engine_signal_breadcrumbs(engine); 1045 } 1046 1047 static void __guc_context_destroy(struct intel_context *ce); 1048 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1049 static void guc_signal_context_fence(struct intel_context *ce); 1050 static void guc_cancel_context_requests(struct intel_context *ce); 1051 static void guc_blocked_fence_complete(struct intel_context *ce); 1052 1053 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1054 { 1055 struct intel_context *ce; 1056 unsigned long index, flags; 1057 bool pending_disable, pending_enable, deregister, destroyed, banned; 1058 1059 xa_lock_irqsave(&guc->context_lookup, flags); 1060 xa_for_each(&guc->context_lookup, index, ce) { 1061 /* 1062 * Corner case where the ref count on the object is zero but and 1063 * deregister G2H was lost. In this case we don't touch the ref 1064 * count and finish the destroy of the context. 1065 */ 1066 bool do_put = kref_get_unless_zero(&ce->ref); 1067 1068 xa_unlock(&guc->context_lookup); 1069 1070 spin_lock(&ce->guc_state.lock); 1071 1072 /* 1073 * Once we are at this point submission_disabled() is guaranteed 1074 * to be visible to all callers who set the below flags (see above 1075 * flush and flushes in reset_prepare). If submission_disabled() 1076 * is set, the caller shouldn't set these flags. 1077 */ 1078 1079 destroyed = context_destroyed(ce); 1080 pending_enable = context_pending_enable(ce); 1081 pending_disable = context_pending_disable(ce); 1082 deregister = context_wait_for_deregister_to_register(ce); 1083 banned = context_banned(ce); 1084 init_sched_state(ce); 1085 1086 spin_unlock(&ce->guc_state.lock); 1087 1088 if (pending_enable || destroyed || deregister) { 1089 decr_outstanding_submission_g2h(guc); 1090 if (deregister) 1091 guc_signal_context_fence(ce); 1092 if (destroyed) { 1093 intel_gt_pm_put_async(guc_to_gt(guc)); 1094 release_guc_id(guc, ce); 1095 __guc_context_destroy(ce); 1096 } 1097 if (pending_enable || deregister) 1098 intel_context_put(ce); 1099 } 1100 1101 /* Not mutualy exclusive with above if statement. */ 1102 if (pending_disable) { 1103 guc_signal_context_fence(ce); 1104 if (banned) { 1105 guc_cancel_context_requests(ce); 1106 intel_engine_signal_breadcrumbs(ce->engine); 1107 } 1108 intel_context_sched_disable_unpin(ce); 1109 decr_outstanding_submission_g2h(guc); 1110 1111 spin_lock(&ce->guc_state.lock); 1112 guc_blocked_fence_complete(ce); 1113 spin_unlock(&ce->guc_state.lock); 1114 1115 intel_context_put(ce); 1116 } 1117 1118 if (do_put) 1119 intel_context_put(ce); 1120 xa_lock(&guc->context_lookup); 1121 } 1122 xa_unlock_irqrestore(&guc->context_lookup, flags); 1123 } 1124 1125 /* 1126 * GuC stores busyness stats for each engine at context in/out boundaries. A 1127 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1128 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1129 * GuC. 1130 * 1131 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1132 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1133 * active. For an active engine total busyness = total + (now - start), where 1134 * 'now' is the time at which the busyness is sampled. For inactive engine, 1135 * total busyness = total. 1136 * 1137 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1138 * 1139 * The start and total values provided by GuC are 32 bits and wrap around in a 1140 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1141 * increasing ns values, there is a need for this implementation to account for 1142 * overflows and extend the GuC provided values to 64 bits before returning 1143 * busyness to the user. In order to do that, a worker runs periodically at 1144 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1145 * 27 seconds for a gt clock frequency of 19.2 MHz). 1146 */ 1147 1148 #define WRAP_TIME_CLKS U32_MAX 1149 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1150 1151 static void 1152 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1153 { 1154 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1155 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1156 1157 if (new_start == lower_32_bits(*prev_start)) 1158 return; 1159 1160 /* 1161 * When gt is unparked, we update the gt timestamp and start the ping 1162 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1163 * is unparked, all switched in contexts will have a start time that is 1164 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1165 * 1166 * If neither gt_stamp nor new_start has rolled over, then the 1167 * gt_stamp_hi does not need to be adjusted, however if one of them has 1168 * rolled over, we need to adjust gt_stamp_hi accordingly. 1169 * 1170 * The below conditions address the cases of new_start rollover and 1171 * gt_stamp_last rollover respectively. 1172 */ 1173 if (new_start < gt_stamp_last && 1174 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1175 gt_stamp_hi++; 1176 1177 if (new_start > gt_stamp_last && 1178 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1179 gt_stamp_hi--; 1180 1181 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1182 } 1183 1184 #define record_read(map_, field_) \ 1185 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1186 1187 /* 1188 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1189 * we run into a race where the value read is inconsistent. Sometimes the 1190 * inconsistency is in reading the upper MSB bytes of the last_in value when 1191 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1192 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1193 * determine validity of these values. Instead we read the values multiple times 1194 * until they are consistent. In test runs, 3 attempts results in consistent 1195 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1196 * any new occurences. 1197 */ 1198 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1199 u32 *last_in, u32 *id, u32 *total) 1200 { 1201 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1202 int i = 0; 1203 1204 do { 1205 *last_in = record_read(&rec_map, last_switch_in_stamp); 1206 *id = record_read(&rec_map, current_context_index); 1207 *total = record_read(&rec_map, total_runtime); 1208 1209 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1210 record_read(&rec_map, current_context_index) == *id && 1211 record_read(&rec_map, total_runtime) == *total) 1212 break; 1213 } while (++i < 6); 1214 } 1215 1216 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1217 { 1218 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1219 struct intel_guc *guc = &engine->gt->uc.guc; 1220 u32 last_switch, ctx_id, total; 1221 1222 lockdep_assert_held(&guc->timestamp.lock); 1223 1224 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1225 1226 stats->running = ctx_id != ~0U && last_switch; 1227 if (stats->running) 1228 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1229 1230 /* 1231 * Instead of adjusting the total for overflow, just add the 1232 * difference from previous sample stats->total_gt_clks 1233 */ 1234 if (total && total != ~0U) { 1235 stats->total_gt_clks += (u32)(total - stats->prev_total); 1236 stats->prev_total = total; 1237 } 1238 } 1239 1240 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1241 { 1242 intel_wakeref_t wakeref; 1243 u32 reg, shift; 1244 1245 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1246 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1247 1248 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1249 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1250 1251 return 3 - shift; 1252 } 1253 1254 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1255 { 1256 struct intel_gt *gt = guc_to_gt(guc); 1257 u32 gt_stamp_lo, gt_stamp_hi; 1258 u64 gpm_ts; 1259 1260 lockdep_assert_held(&guc->timestamp.lock); 1261 1262 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1263 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1264 MISC_STATUS1) >> guc->timestamp.shift; 1265 gt_stamp_lo = lower_32_bits(gpm_ts); 1266 *now = ktime_get(); 1267 1268 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1269 gt_stamp_hi++; 1270 1271 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1272 } 1273 1274 /* 1275 * Unlike the execlist mode of submission total and active times are in terms of 1276 * gt clocks. The *now parameter is retained to return the cpu time at which the 1277 * busyness was sampled. 1278 */ 1279 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1280 { 1281 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1282 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1283 struct intel_gt *gt = engine->gt; 1284 struct intel_guc *guc = >->uc.guc; 1285 u64 total, gt_stamp_saved; 1286 unsigned long flags; 1287 u32 reset_count; 1288 bool in_reset; 1289 1290 spin_lock_irqsave(&guc->timestamp.lock, flags); 1291 1292 /* 1293 * If a reset happened, we risk reading partially updated engine 1294 * busyness from GuC, so we just use the driver stored copy of busyness. 1295 * Synchronize with gt reset using reset_count and the 1296 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1297 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1298 * usable by checking the flag afterwards. 1299 */ 1300 reset_count = i915_reset_count(gpu_error); 1301 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1302 1303 *now = ktime_get(); 1304 1305 /* 1306 * The active busyness depends on start_gt_clk and gt_stamp. 1307 * gt_stamp is updated by i915 only when gt is awake and the 1308 * start_gt_clk is derived from GuC state. To get a consistent 1309 * view of activity, we query the GuC state only if gt is awake. 1310 */ 1311 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1312 stats_saved = *stats; 1313 gt_stamp_saved = guc->timestamp.gt_stamp; 1314 /* 1315 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1316 * start_gt_clk' calculation below for active engines. 1317 */ 1318 guc_update_engine_gt_clks(engine); 1319 guc_update_pm_timestamp(guc, now); 1320 intel_gt_pm_put_async(gt); 1321 if (i915_reset_count(gpu_error) != reset_count) { 1322 *stats = stats_saved; 1323 guc->timestamp.gt_stamp = gt_stamp_saved; 1324 } 1325 } 1326 1327 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1328 if (stats->running) { 1329 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1330 1331 total += intel_gt_clock_interval_to_ns(gt, clk); 1332 } 1333 1334 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1335 1336 return ns_to_ktime(total); 1337 } 1338 1339 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1340 { 1341 struct intel_gt *gt = guc_to_gt(guc); 1342 struct intel_engine_cs *engine; 1343 enum intel_engine_id id; 1344 unsigned long flags; 1345 ktime_t unused; 1346 1347 cancel_delayed_work_sync(&guc->timestamp.work); 1348 1349 spin_lock_irqsave(&guc->timestamp.lock, flags); 1350 1351 guc_update_pm_timestamp(guc, &unused); 1352 for_each_engine(engine, gt, id) { 1353 guc_update_engine_gt_clks(engine); 1354 engine->stats.guc.prev_total = 0; 1355 } 1356 1357 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1358 } 1359 1360 static void __update_guc_busyness_stats(struct intel_guc *guc) 1361 { 1362 struct intel_gt *gt = guc_to_gt(guc); 1363 struct intel_engine_cs *engine; 1364 enum intel_engine_id id; 1365 unsigned long flags; 1366 ktime_t unused; 1367 1368 spin_lock_irqsave(&guc->timestamp.lock, flags); 1369 1370 guc_update_pm_timestamp(guc, &unused); 1371 for_each_engine(engine, gt, id) 1372 guc_update_engine_gt_clks(engine); 1373 1374 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1375 } 1376 1377 static void guc_timestamp_ping(struct work_struct *wrk) 1378 { 1379 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1380 timestamp.work.work); 1381 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1382 struct intel_gt *gt = guc_to_gt(guc); 1383 intel_wakeref_t wakeref; 1384 int srcu, ret; 1385 1386 /* 1387 * Synchronize with gt reset to make sure the worker does not 1388 * corrupt the engine/guc stats. 1389 */ 1390 ret = intel_gt_reset_trylock(gt, &srcu); 1391 if (ret) 1392 return; 1393 1394 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1395 __update_guc_busyness_stats(guc); 1396 1397 intel_gt_reset_unlock(gt, srcu); 1398 1399 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1400 guc->timestamp.ping_delay); 1401 } 1402 1403 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1404 { 1405 u32 offset = intel_guc_engine_usage_offset(guc); 1406 u32 action[] = { 1407 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1408 offset, 1409 0, 1410 }; 1411 1412 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1413 } 1414 1415 static void guc_init_engine_stats(struct intel_guc *guc) 1416 { 1417 struct intel_gt *gt = guc_to_gt(guc); 1418 intel_wakeref_t wakeref; 1419 1420 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1421 guc->timestamp.ping_delay); 1422 1423 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1424 int ret = guc_action_enable_usage_stats(guc); 1425 1426 if (ret) 1427 drm_err(>->i915->drm, 1428 "Failed to enable usage stats: %d!\n", ret); 1429 } 1430 } 1431 1432 void intel_guc_busyness_park(struct intel_gt *gt) 1433 { 1434 struct intel_guc *guc = >->uc.guc; 1435 1436 if (!guc_submission_initialized(guc)) 1437 return; 1438 1439 cancel_delayed_work(&guc->timestamp.work); 1440 __update_guc_busyness_stats(guc); 1441 } 1442 1443 void intel_guc_busyness_unpark(struct intel_gt *gt) 1444 { 1445 struct intel_guc *guc = >->uc.guc; 1446 unsigned long flags; 1447 ktime_t unused; 1448 1449 if (!guc_submission_initialized(guc)) 1450 return; 1451 1452 spin_lock_irqsave(&guc->timestamp.lock, flags); 1453 guc_update_pm_timestamp(guc, &unused); 1454 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1455 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1456 guc->timestamp.ping_delay); 1457 } 1458 1459 static inline bool 1460 submission_disabled(struct intel_guc *guc) 1461 { 1462 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1463 1464 return unlikely(!sched_engine || 1465 !__tasklet_is_enabled(&sched_engine->tasklet) || 1466 intel_gt_is_wedged(guc_to_gt(guc))); 1467 } 1468 1469 static void disable_submission(struct intel_guc *guc) 1470 { 1471 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1472 1473 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1474 GEM_BUG_ON(!guc->ct.enabled); 1475 __tasklet_disable_sync_once(&sched_engine->tasklet); 1476 sched_engine->tasklet.callback = NULL; 1477 } 1478 } 1479 1480 static void enable_submission(struct intel_guc *guc) 1481 { 1482 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1483 unsigned long flags; 1484 1485 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1486 sched_engine->tasklet.callback = guc_submission_tasklet; 1487 wmb(); /* Make sure callback visible */ 1488 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1489 __tasklet_enable(&sched_engine->tasklet)) { 1490 GEM_BUG_ON(!guc->ct.enabled); 1491 1492 /* And kick in case we missed a new request submission. */ 1493 tasklet_hi_schedule(&sched_engine->tasklet); 1494 } 1495 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1496 } 1497 1498 static void guc_flush_submissions(struct intel_guc *guc) 1499 { 1500 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1501 unsigned long flags; 1502 1503 spin_lock_irqsave(&sched_engine->lock, flags); 1504 spin_unlock_irqrestore(&sched_engine->lock, flags); 1505 } 1506 1507 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1508 1509 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1510 { 1511 if (unlikely(!guc_submission_initialized(guc))) { 1512 /* Reset called during driver load? GuC not yet initialised! */ 1513 return; 1514 } 1515 1516 intel_gt_park_heartbeats(guc_to_gt(guc)); 1517 disable_submission(guc); 1518 guc->interrupts.disable(guc); 1519 __reset_guc_busyness_stats(guc); 1520 1521 /* Flush IRQ handler */ 1522 spin_lock_irq(&guc_to_gt(guc)->irq_lock); 1523 spin_unlock_irq(&guc_to_gt(guc)->irq_lock); 1524 1525 guc_flush_submissions(guc); 1526 guc_flush_destroyed_contexts(guc); 1527 flush_work(&guc->ct.requests.worker); 1528 1529 scrub_guc_desc_for_outstanding_g2h(guc); 1530 } 1531 1532 static struct intel_engine_cs * 1533 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1534 { 1535 struct intel_engine_cs *engine; 1536 intel_engine_mask_t tmp, mask = ve->mask; 1537 unsigned int num_siblings = 0; 1538 1539 for_each_engine_masked(engine, ve->gt, mask, tmp) 1540 if (num_siblings++ == sibling) 1541 return engine; 1542 1543 return NULL; 1544 } 1545 1546 static inline struct intel_engine_cs * 1547 __context_to_physical_engine(struct intel_context *ce) 1548 { 1549 struct intel_engine_cs *engine = ce->engine; 1550 1551 if (intel_engine_is_virtual(engine)) 1552 engine = guc_virtual_get_sibling(engine, 0); 1553 1554 return engine; 1555 } 1556 1557 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1558 { 1559 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1560 1561 if (intel_context_is_banned(ce)) 1562 return; 1563 1564 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1565 1566 /* 1567 * We want a simple context + ring to execute the breadcrumb update. 1568 * We cannot rely on the context being intact across the GPU hang, 1569 * so clear it and rebuild just what we need for the breadcrumb. 1570 * All pending requests for this context will be zapped, and any 1571 * future request will be after userspace has had the opportunity 1572 * to recreate its own state. 1573 */ 1574 if (scrub) 1575 lrc_init_regs(ce, engine, true); 1576 1577 /* Rerun the request; its payload has been neutered (if guilty). */ 1578 lrc_update_regs(ce, engine, head); 1579 } 1580 1581 static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) 1582 { 1583 static const i915_reg_t _reg[I915_NUM_ENGINES] = { 1584 [RCS0] = MSG_IDLE_CS, 1585 [BCS0] = MSG_IDLE_BCS, 1586 [VCS0] = MSG_IDLE_VCS0, 1587 [VCS1] = MSG_IDLE_VCS1, 1588 [VCS2] = MSG_IDLE_VCS2, 1589 [VCS3] = MSG_IDLE_VCS3, 1590 [VCS4] = MSG_IDLE_VCS4, 1591 [VCS5] = MSG_IDLE_VCS5, 1592 [VCS6] = MSG_IDLE_VCS6, 1593 [VCS7] = MSG_IDLE_VCS7, 1594 [VECS0] = MSG_IDLE_VECS0, 1595 [VECS1] = MSG_IDLE_VECS1, 1596 [VECS2] = MSG_IDLE_VECS2, 1597 [VECS3] = MSG_IDLE_VECS3, 1598 [CCS0] = MSG_IDLE_CS, 1599 [CCS1] = MSG_IDLE_CS, 1600 [CCS2] = MSG_IDLE_CS, 1601 [CCS3] = MSG_IDLE_CS, 1602 }; 1603 u32 val; 1604 1605 if (!_reg[engine->id].reg) 1606 return 0; 1607 1608 val = intel_uncore_read(engine->uncore, _reg[engine->id]); 1609 1610 /* bits[29:25] & bits[13:9] >> shift */ 1611 return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; 1612 } 1613 1614 static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) 1615 { 1616 int ret; 1617 1618 /* Ensure GPM receives fw up/down after CS is stopped */ 1619 udelay(1); 1620 1621 /* Wait for forcewake request to complete in GPM */ 1622 ret = __intel_wait_for_register_fw(gt->uncore, 1623 GEN9_PWRGT_DOMAIN_STATUS, 1624 fw_mask, fw_mask, 5000, 0, NULL); 1625 1626 /* Ensure CS receives fw ack from GPM */ 1627 udelay(1); 1628 1629 if (ret) 1630 GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); 1631 } 1632 1633 /* 1634 * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any 1635 * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The 1636 * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the 1637 * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we 1638 * are concerned only with the gt reset here, we use a logical OR of pending 1639 * forcewakeups from all reset domains and then wait for them to complete by 1640 * querying PWRGT_DOMAIN_STATUS. 1641 */ 1642 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1643 { 1644 u32 fw_pending; 1645 1646 if (GRAPHICS_VER(engine->i915) != 12) 1647 return; 1648 1649 /* 1650 * Wa_22011802037 1651 * TODO: Occasionally trying to stop the cs times out, but does not 1652 * adversely affect functionality. The timeout is set as a config 1653 * parameter that defaults to 100ms. Assuming that this timeout is 1654 * sufficient for any pending MI_FORCEWAKEs to complete, ignore the 1655 * timeout returned here until it is root caused. 1656 */ 1657 intel_engine_stop_cs(engine); 1658 1659 fw_pending = __cs_pending_mi_force_wakes(engine); 1660 if (fw_pending) 1661 __gpm_wait_for_fw_complete(engine->gt, fw_pending); 1662 } 1663 1664 static void guc_reset_nop(struct intel_engine_cs *engine) 1665 { 1666 } 1667 1668 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1669 { 1670 } 1671 1672 static void 1673 __unwind_incomplete_requests(struct intel_context *ce) 1674 { 1675 struct i915_request *rq, *rn; 1676 struct list_head *pl; 1677 int prio = I915_PRIORITY_INVALID; 1678 struct i915_sched_engine * const sched_engine = 1679 ce->engine->sched_engine; 1680 unsigned long flags; 1681 1682 spin_lock_irqsave(&sched_engine->lock, flags); 1683 spin_lock(&ce->guc_state.lock); 1684 list_for_each_entry_safe_reverse(rq, rn, 1685 &ce->guc_state.requests, 1686 sched.link) { 1687 if (i915_request_completed(rq)) 1688 continue; 1689 1690 list_del_init(&rq->sched.link); 1691 __i915_request_unsubmit(rq); 1692 1693 /* Push the request back into the queue for later resubmission. */ 1694 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1695 if (rq_prio(rq) != prio) { 1696 prio = rq_prio(rq); 1697 pl = i915_sched_lookup_priolist(sched_engine, prio); 1698 } 1699 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1700 1701 list_add(&rq->sched.link, pl); 1702 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1703 } 1704 spin_unlock(&ce->guc_state.lock); 1705 spin_unlock_irqrestore(&sched_engine->lock, flags); 1706 } 1707 1708 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1709 { 1710 bool guilty; 1711 struct i915_request *rq; 1712 unsigned long flags; 1713 u32 head; 1714 int i, number_children = ce->parallel.number_children; 1715 struct intel_context *parent = ce; 1716 1717 GEM_BUG_ON(intel_context_is_child(ce)); 1718 1719 intel_context_get(ce); 1720 1721 /* 1722 * GuC will implicitly mark the context as non-schedulable when it sends 1723 * the reset notification. Make sure our state reflects this change. The 1724 * context will be marked enabled on resubmission. 1725 */ 1726 spin_lock_irqsave(&ce->guc_state.lock, flags); 1727 clr_context_enabled(ce); 1728 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1729 1730 /* 1731 * For each context in the relationship find the hanging request 1732 * resetting each context / request as needed 1733 */ 1734 for (i = 0; i < number_children + 1; ++i) { 1735 if (!intel_context_is_pinned(ce)) 1736 goto next_context; 1737 1738 guilty = false; 1739 rq = intel_context_find_active_request(ce); 1740 if (!rq) { 1741 head = ce->ring->tail; 1742 goto out_replay; 1743 } 1744 1745 if (i915_request_started(rq)) 1746 guilty = stalled & ce->engine->mask; 1747 1748 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1749 head = intel_ring_wrap(ce->ring, rq->head); 1750 1751 __i915_request_reset(rq, guilty); 1752 out_replay: 1753 guc_reset_state(ce, head, guilty); 1754 next_context: 1755 if (i != number_children) 1756 ce = list_next_entry(ce, parallel.child_link); 1757 } 1758 1759 __unwind_incomplete_requests(parent); 1760 intel_context_put(parent); 1761 } 1762 1763 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1764 { 1765 struct intel_context *ce; 1766 unsigned long index; 1767 unsigned long flags; 1768 1769 if (unlikely(!guc_submission_initialized(guc))) { 1770 /* Reset called during driver load? GuC not yet initialised! */ 1771 return; 1772 } 1773 1774 xa_lock_irqsave(&guc->context_lookup, flags); 1775 xa_for_each(&guc->context_lookup, index, ce) { 1776 if (!kref_get_unless_zero(&ce->ref)) 1777 continue; 1778 1779 xa_unlock(&guc->context_lookup); 1780 1781 if (intel_context_is_pinned(ce) && 1782 !intel_context_is_child(ce)) 1783 __guc_reset_context(ce, stalled); 1784 1785 intel_context_put(ce); 1786 1787 xa_lock(&guc->context_lookup); 1788 } 1789 xa_unlock_irqrestore(&guc->context_lookup, flags); 1790 1791 /* GuC is blown away, drop all references to contexts */ 1792 xa_destroy(&guc->context_lookup); 1793 } 1794 1795 static void guc_cancel_context_requests(struct intel_context *ce) 1796 { 1797 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1798 struct i915_request *rq; 1799 unsigned long flags; 1800 1801 /* Mark all executing requests as skipped. */ 1802 spin_lock_irqsave(&sched_engine->lock, flags); 1803 spin_lock(&ce->guc_state.lock); 1804 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1805 i915_request_put(i915_request_mark_eio(rq)); 1806 spin_unlock(&ce->guc_state.lock); 1807 spin_unlock_irqrestore(&sched_engine->lock, flags); 1808 } 1809 1810 static void 1811 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1812 { 1813 struct i915_request *rq, *rn; 1814 struct rb_node *rb; 1815 unsigned long flags; 1816 1817 /* Can be called during boot if GuC fails to load */ 1818 if (!sched_engine) 1819 return; 1820 1821 /* 1822 * Before we call engine->cancel_requests(), we should have exclusive 1823 * access to the submission state. This is arranged for us by the 1824 * caller disabling the interrupt generation, the tasklet and other 1825 * threads that may then access the same state, giving us a free hand 1826 * to reset state. However, we still need to let lockdep be aware that 1827 * we know this state may be accessed in hardirq context, so we 1828 * disable the irq around this manipulation and we want to keep 1829 * the spinlock focused on its duties and not accidentally conflate 1830 * coverage to the submission's irq state. (Similarly, although we 1831 * shouldn't need to disable irq around the manipulation of the 1832 * submission's irq state, we also wish to remind ourselves that 1833 * it is irq state.) 1834 */ 1835 spin_lock_irqsave(&sched_engine->lock, flags); 1836 1837 /* Flush the queued requests to the timeline list (for retiring). */ 1838 while ((rb = rb_first_cached(&sched_engine->queue))) { 1839 struct i915_priolist *p = to_priolist(rb); 1840 1841 priolist_for_each_request_consume(rq, rn, p) { 1842 list_del_init(&rq->sched.link); 1843 1844 __i915_request_submit(rq); 1845 1846 i915_request_put(i915_request_mark_eio(rq)); 1847 } 1848 1849 rb_erase_cached(&p->node, &sched_engine->queue); 1850 i915_priolist_free(p); 1851 } 1852 1853 /* Remaining _unready_ requests will be nop'ed when submitted */ 1854 1855 sched_engine->queue_priority_hint = INT_MIN; 1856 sched_engine->queue = RB_ROOT_CACHED; 1857 1858 spin_unlock_irqrestore(&sched_engine->lock, flags); 1859 } 1860 1861 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1862 { 1863 struct intel_context *ce; 1864 unsigned long index; 1865 unsigned long flags; 1866 1867 xa_lock_irqsave(&guc->context_lookup, flags); 1868 xa_for_each(&guc->context_lookup, index, ce) { 1869 if (!kref_get_unless_zero(&ce->ref)) 1870 continue; 1871 1872 xa_unlock(&guc->context_lookup); 1873 1874 if (intel_context_is_pinned(ce) && 1875 !intel_context_is_child(ce)) 1876 guc_cancel_context_requests(ce); 1877 1878 intel_context_put(ce); 1879 1880 xa_lock(&guc->context_lookup); 1881 } 1882 xa_unlock_irqrestore(&guc->context_lookup, flags); 1883 1884 guc_cancel_sched_engine_requests(guc->sched_engine); 1885 1886 /* GuC is blown away, drop all references to contexts */ 1887 xa_destroy(&guc->context_lookup); 1888 } 1889 1890 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1891 { 1892 /* Reset called during driver load or during wedge? */ 1893 if (unlikely(!guc_submission_initialized(guc) || 1894 intel_gt_is_wedged(guc_to_gt(guc)))) { 1895 return; 1896 } 1897 1898 /* 1899 * Technically possible for either of these values to be non-zero here, 1900 * but very unlikely + harmless. Regardless let's add a warn so we can 1901 * see in CI if this happens frequently / a precursor to taking down the 1902 * machine. 1903 */ 1904 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1905 atomic_set(&guc->outstanding_submission_g2h, 0); 1906 1907 intel_guc_global_policies_update(guc); 1908 enable_submission(guc); 1909 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1910 } 1911 1912 static void destroyed_worker_func(struct work_struct *w); 1913 static void reset_fail_worker_func(struct work_struct *w); 1914 1915 /* 1916 * Set up the memory resources to be shared with the GuC (via the GGTT) 1917 * at firmware loading time. 1918 */ 1919 int intel_guc_submission_init(struct intel_guc *guc) 1920 { 1921 struct intel_gt *gt = guc_to_gt(guc); 1922 int ret; 1923 1924 if (guc->submission_initialized) 1925 return 0; 1926 1927 if (guc->fw.major_ver_found < 70) { 1928 ret = guc_lrc_desc_pool_create_v69(guc); 1929 if (ret) 1930 return ret; 1931 } 1932 1933 guc->submission_state.guc_ids_bitmap = 1934 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1935 if (!guc->submission_state.guc_ids_bitmap) { 1936 ret = -ENOMEM; 1937 goto destroy_pool; 1938 } 1939 1940 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1941 guc->timestamp.shift = gpm_timestamp_shift(gt); 1942 guc->submission_initialized = true; 1943 1944 return 0; 1945 1946 destroy_pool: 1947 guc_lrc_desc_pool_destroy_v69(guc); 1948 1949 return ret; 1950 } 1951 1952 void intel_guc_submission_fini(struct intel_guc *guc) 1953 { 1954 if (!guc->submission_initialized) 1955 return; 1956 1957 guc_flush_destroyed_contexts(guc); 1958 guc_lrc_desc_pool_destroy_v69(guc); 1959 i915_sched_engine_put(guc->sched_engine); 1960 bitmap_free(guc->submission_state.guc_ids_bitmap); 1961 guc->submission_initialized = false; 1962 } 1963 1964 static inline void queue_request(struct i915_sched_engine *sched_engine, 1965 struct i915_request *rq, 1966 int prio) 1967 { 1968 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1969 list_add_tail(&rq->sched.link, 1970 i915_sched_lookup_priolist(sched_engine, prio)); 1971 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1972 tasklet_hi_schedule(&sched_engine->tasklet); 1973 } 1974 1975 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1976 struct i915_request *rq) 1977 { 1978 int ret = 0; 1979 1980 __i915_request_submit(rq); 1981 1982 trace_i915_request_in(rq, 0); 1983 1984 if (is_multi_lrc_rq(rq)) { 1985 if (multi_lrc_submit(rq)) { 1986 ret = guc_wq_item_append(guc, rq); 1987 if (!ret) 1988 ret = guc_add_request(guc, rq); 1989 } 1990 } else { 1991 guc_set_lrc_tail(rq); 1992 ret = guc_add_request(guc, rq); 1993 } 1994 1995 if (unlikely(ret == -EPIPE)) 1996 disable_submission(guc); 1997 1998 return ret; 1999 } 2000 2001 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 2002 { 2003 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2004 struct intel_context *ce = request_to_scheduling_context(rq); 2005 2006 return submission_disabled(guc) || guc->stalled_request || 2007 !i915_sched_engine_is_empty(sched_engine) || 2008 !ctx_id_mapped(guc, ce->guc_id.id); 2009 } 2010 2011 static void guc_submit_request(struct i915_request *rq) 2012 { 2013 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2014 struct intel_guc *guc = &rq->engine->gt->uc.guc; 2015 unsigned long flags; 2016 2017 /* Will be called from irq-context when using foreign fences. */ 2018 spin_lock_irqsave(&sched_engine->lock, flags); 2019 2020 if (need_tasklet(guc, rq)) 2021 queue_request(sched_engine, rq, rq_prio(rq)); 2022 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 2023 tasklet_hi_schedule(&sched_engine->tasklet); 2024 2025 spin_unlock_irqrestore(&sched_engine->lock, flags); 2026 } 2027 2028 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 2029 { 2030 int ret; 2031 2032 GEM_BUG_ON(intel_context_is_child(ce)); 2033 2034 if (intel_context_is_parent(ce)) 2035 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2036 NUMBER_MULTI_LRC_GUC_ID(guc), 2037 order_base_2(ce->parallel.number_children 2038 + 1)); 2039 else 2040 ret = ida_simple_get(&guc->submission_state.guc_ids, 2041 NUMBER_MULTI_LRC_GUC_ID(guc), 2042 guc->submission_state.num_guc_ids, 2043 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2044 __GFP_NOWARN); 2045 if (unlikely(ret < 0)) 2046 return ret; 2047 2048 ce->guc_id.id = ret; 2049 return 0; 2050 } 2051 2052 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2053 { 2054 GEM_BUG_ON(intel_context_is_child(ce)); 2055 2056 if (!context_guc_id_invalid(ce)) { 2057 if (intel_context_is_parent(ce)) 2058 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2059 ce->guc_id.id, 2060 order_base_2(ce->parallel.number_children 2061 + 1)); 2062 else 2063 ida_simple_remove(&guc->submission_state.guc_ids, 2064 ce->guc_id.id); 2065 clr_ctx_id_mapping(guc, ce->guc_id.id); 2066 set_context_guc_id_invalid(ce); 2067 } 2068 if (!list_empty(&ce->guc_id.link)) 2069 list_del_init(&ce->guc_id.link); 2070 } 2071 2072 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2073 { 2074 unsigned long flags; 2075 2076 spin_lock_irqsave(&guc->submission_state.lock, flags); 2077 __release_guc_id(guc, ce); 2078 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2079 } 2080 2081 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2082 { 2083 struct intel_context *cn; 2084 2085 lockdep_assert_held(&guc->submission_state.lock); 2086 GEM_BUG_ON(intel_context_is_child(ce)); 2087 GEM_BUG_ON(intel_context_is_parent(ce)); 2088 2089 if (!list_empty(&guc->submission_state.guc_id_list)) { 2090 cn = list_first_entry(&guc->submission_state.guc_id_list, 2091 struct intel_context, 2092 guc_id.link); 2093 2094 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2095 GEM_BUG_ON(context_guc_id_invalid(cn)); 2096 GEM_BUG_ON(intel_context_is_child(cn)); 2097 GEM_BUG_ON(intel_context_is_parent(cn)); 2098 2099 list_del_init(&cn->guc_id.link); 2100 ce->guc_id.id = cn->guc_id.id; 2101 2102 spin_lock(&cn->guc_state.lock); 2103 clr_context_registered(cn); 2104 spin_unlock(&cn->guc_state.lock); 2105 2106 set_context_guc_id_invalid(cn); 2107 2108 #ifdef CONFIG_DRM_I915_SELFTEST 2109 guc->number_guc_id_stolen++; 2110 #endif 2111 2112 return 0; 2113 } else { 2114 return -EAGAIN; 2115 } 2116 } 2117 2118 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2119 { 2120 int ret; 2121 2122 lockdep_assert_held(&guc->submission_state.lock); 2123 GEM_BUG_ON(intel_context_is_child(ce)); 2124 2125 ret = new_guc_id(guc, ce); 2126 if (unlikely(ret < 0)) { 2127 if (intel_context_is_parent(ce)) 2128 return -ENOSPC; 2129 2130 ret = steal_guc_id(guc, ce); 2131 if (ret < 0) 2132 return ret; 2133 } 2134 2135 if (intel_context_is_parent(ce)) { 2136 struct intel_context *child; 2137 int i = 1; 2138 2139 for_each_child(ce, child) 2140 child->guc_id.id = ce->guc_id.id + i++; 2141 } 2142 2143 return 0; 2144 } 2145 2146 #define PIN_GUC_ID_TRIES 4 2147 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2148 { 2149 int ret = 0; 2150 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2151 2152 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2153 2154 try_again: 2155 spin_lock_irqsave(&guc->submission_state.lock, flags); 2156 2157 might_lock(&ce->guc_state.lock); 2158 2159 if (context_guc_id_invalid(ce)) { 2160 ret = assign_guc_id(guc, ce); 2161 if (ret) 2162 goto out_unlock; 2163 ret = 1; /* Indidcates newly assigned guc_id */ 2164 } 2165 if (!list_empty(&ce->guc_id.link)) 2166 list_del_init(&ce->guc_id.link); 2167 atomic_inc(&ce->guc_id.ref); 2168 2169 out_unlock: 2170 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2171 2172 /* 2173 * -EAGAIN indicates no guc_id are available, let's retire any 2174 * outstanding requests to see if that frees up a guc_id. If the first 2175 * retire didn't help, insert a sleep with the timeslice duration before 2176 * attempting to retire more requests. Double the sleep period each 2177 * subsequent pass before finally giving up. The sleep period has max of 2178 * 100ms and minimum of 1ms. 2179 */ 2180 if (ret == -EAGAIN && --tries) { 2181 if (PIN_GUC_ID_TRIES - tries > 1) { 2182 unsigned int timeslice_shifted = 2183 ce->engine->props.timeslice_duration_ms << 2184 (PIN_GUC_ID_TRIES - tries - 2); 2185 unsigned int max = min_t(unsigned int, 100, 2186 timeslice_shifted); 2187 2188 msleep(max_t(unsigned int, max, 1)); 2189 } 2190 intel_gt_retire_requests(guc_to_gt(guc)); 2191 goto try_again; 2192 } 2193 2194 return ret; 2195 } 2196 2197 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2198 { 2199 unsigned long flags; 2200 2201 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2202 GEM_BUG_ON(intel_context_is_child(ce)); 2203 2204 if (unlikely(context_guc_id_invalid(ce) || 2205 intel_context_is_parent(ce))) 2206 return; 2207 2208 spin_lock_irqsave(&guc->submission_state.lock, flags); 2209 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2210 !atomic_read(&ce->guc_id.ref)) 2211 list_add_tail(&ce->guc_id.link, 2212 &guc->submission_state.guc_id_list); 2213 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2214 } 2215 2216 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2217 struct intel_context *ce, 2218 u32 guc_id, 2219 u32 offset, 2220 bool loop) 2221 { 2222 struct intel_context *child; 2223 u32 action[4 + MAX_ENGINE_INSTANCE]; 2224 int len = 0; 2225 2226 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2227 2228 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2229 action[len++] = guc_id; 2230 action[len++] = ce->parallel.number_children + 1; 2231 action[len++] = offset; 2232 for_each_child(ce, child) { 2233 offset += sizeof(struct guc_lrc_desc_v69); 2234 action[len++] = offset; 2235 } 2236 2237 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2238 } 2239 2240 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2241 struct intel_context *ce, 2242 struct guc_ctxt_registration_info *info, 2243 bool loop) 2244 { 2245 struct intel_context *child; 2246 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2247 int len = 0; 2248 u32 next_id; 2249 2250 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2251 2252 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2253 action[len++] = info->flags; 2254 action[len++] = info->context_idx; 2255 action[len++] = info->engine_class; 2256 action[len++] = info->engine_submit_mask; 2257 action[len++] = info->wq_desc_lo; 2258 action[len++] = info->wq_desc_hi; 2259 action[len++] = info->wq_base_lo; 2260 action[len++] = info->wq_base_hi; 2261 action[len++] = info->wq_size; 2262 action[len++] = ce->parallel.number_children + 1; 2263 action[len++] = info->hwlrca_lo; 2264 action[len++] = info->hwlrca_hi; 2265 2266 next_id = info->context_idx + 1; 2267 for_each_child(ce, child) { 2268 GEM_BUG_ON(next_id++ != child->guc_id.id); 2269 2270 /* 2271 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2272 * only supports 32 bit currently. 2273 */ 2274 action[len++] = lower_32_bits(child->lrc.lrca); 2275 action[len++] = upper_32_bits(child->lrc.lrca); 2276 } 2277 2278 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2279 2280 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2281 } 2282 2283 static int __guc_action_register_context_v69(struct intel_guc *guc, 2284 u32 guc_id, 2285 u32 offset, 2286 bool loop) 2287 { 2288 u32 action[] = { 2289 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2290 guc_id, 2291 offset, 2292 }; 2293 2294 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2295 0, loop); 2296 } 2297 2298 static int __guc_action_register_context_v70(struct intel_guc *guc, 2299 struct guc_ctxt_registration_info *info, 2300 bool loop) 2301 { 2302 u32 action[] = { 2303 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2304 info->flags, 2305 info->context_idx, 2306 info->engine_class, 2307 info->engine_submit_mask, 2308 info->wq_desc_lo, 2309 info->wq_desc_hi, 2310 info->wq_base_lo, 2311 info->wq_base_hi, 2312 info->wq_size, 2313 info->hwlrca_lo, 2314 info->hwlrca_hi, 2315 }; 2316 2317 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2318 0, loop); 2319 } 2320 2321 static void prepare_context_registration_info_v69(struct intel_context *ce); 2322 static void prepare_context_registration_info_v70(struct intel_context *ce, 2323 struct guc_ctxt_registration_info *info); 2324 2325 static int 2326 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2327 { 2328 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2329 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2330 2331 prepare_context_registration_info_v69(ce); 2332 2333 if (intel_context_is_parent(ce)) 2334 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2335 offset, loop); 2336 else 2337 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2338 offset, loop); 2339 } 2340 2341 static int 2342 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2343 { 2344 struct guc_ctxt_registration_info info; 2345 2346 prepare_context_registration_info_v70(ce, &info); 2347 2348 if (intel_context_is_parent(ce)) 2349 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2350 else 2351 return __guc_action_register_context_v70(guc, &info, loop); 2352 } 2353 2354 static int register_context(struct intel_context *ce, bool loop) 2355 { 2356 struct intel_guc *guc = ce_to_guc(ce); 2357 int ret; 2358 2359 GEM_BUG_ON(intel_context_is_child(ce)); 2360 trace_intel_context_register(ce); 2361 2362 if (guc->fw.major_ver_found >= 70) 2363 ret = register_context_v70(guc, ce, loop); 2364 else 2365 ret = register_context_v69(guc, ce, loop); 2366 2367 if (likely(!ret)) { 2368 unsigned long flags; 2369 2370 spin_lock_irqsave(&ce->guc_state.lock, flags); 2371 set_context_registered(ce); 2372 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2373 2374 if (guc->fw.major_ver_found >= 70) 2375 guc_context_policy_init_v70(ce, loop); 2376 } 2377 2378 return ret; 2379 } 2380 2381 static int __guc_action_deregister_context(struct intel_guc *guc, 2382 u32 guc_id) 2383 { 2384 u32 action[] = { 2385 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2386 guc_id, 2387 }; 2388 2389 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2390 G2H_LEN_DW_DEREGISTER_CONTEXT, 2391 true); 2392 } 2393 2394 static int deregister_context(struct intel_context *ce, u32 guc_id) 2395 { 2396 struct intel_guc *guc = ce_to_guc(ce); 2397 2398 GEM_BUG_ON(intel_context_is_child(ce)); 2399 trace_intel_context_deregister(ce); 2400 2401 return __guc_action_deregister_context(guc, guc_id); 2402 } 2403 2404 static inline void clear_children_join_go_memory(struct intel_context *ce) 2405 { 2406 struct parent_scratch *ps = __get_parent_scratch(ce); 2407 int i; 2408 2409 ps->go.semaphore = 0; 2410 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2411 ps->join[i].semaphore = 0; 2412 } 2413 2414 static inline u32 get_children_go_value(struct intel_context *ce) 2415 { 2416 return __get_parent_scratch(ce)->go.semaphore; 2417 } 2418 2419 static inline u32 get_children_join_value(struct intel_context *ce, 2420 u8 child_index) 2421 { 2422 return __get_parent_scratch(ce)->join[child_index].semaphore; 2423 } 2424 2425 struct context_policy { 2426 u32 count; 2427 struct guc_update_context_policy h2g; 2428 }; 2429 2430 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2431 { 2432 size_t bytes = sizeof(policy->h2g.header) + 2433 (sizeof(policy->h2g.klv[0]) * policy->count); 2434 2435 return bytes / sizeof(u32); 2436 } 2437 2438 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2439 { 2440 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2441 policy->h2g.header.ctx_id = guc_id; 2442 policy->count = 0; 2443 } 2444 2445 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2446 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2447 { \ 2448 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2449 policy->h2g.klv[policy->count].kl = \ 2450 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2451 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2452 policy->h2g.klv[policy->count].value = data; \ 2453 policy->count++; \ 2454 } 2455 2456 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2457 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2458 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2459 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2460 2461 #undef MAKE_CONTEXT_POLICY_ADD 2462 2463 static int __guc_context_set_context_policies(struct intel_guc *guc, 2464 struct context_policy *policy, 2465 bool loop) 2466 { 2467 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2468 __guc_context_policy_action_size(policy), 2469 0, loop); 2470 } 2471 2472 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2473 { 2474 struct intel_engine_cs *engine = ce->engine; 2475 struct intel_guc *guc = &engine->gt->uc.guc; 2476 struct context_policy policy; 2477 u32 execution_quantum; 2478 u32 preemption_timeout; 2479 bool missing = false; 2480 unsigned long flags; 2481 int ret; 2482 2483 /* NB: For both of these, zero means disabled. */ 2484 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2485 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2486 2487 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2488 2489 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2490 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2491 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2492 2493 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2494 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2495 2496 ret = __guc_context_set_context_policies(guc, &policy, loop); 2497 missing = ret != 0; 2498 2499 if (!missing && intel_context_is_parent(ce)) { 2500 struct intel_context *child; 2501 2502 for_each_child(ce, child) { 2503 __guc_context_policy_start_klv(&policy, child->guc_id.id); 2504 2505 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2506 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2507 2508 child->guc_state.prio = ce->guc_state.prio; 2509 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2510 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2511 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2512 2513 ret = __guc_context_set_context_policies(guc, &policy, loop); 2514 if (ret) { 2515 missing = true; 2516 break; 2517 } 2518 } 2519 } 2520 2521 spin_lock_irqsave(&ce->guc_state.lock, flags); 2522 if (missing) 2523 set_context_policy_required(ce); 2524 else 2525 clr_context_policy_required(ce); 2526 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2527 2528 return ret; 2529 } 2530 2531 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2532 struct guc_lrc_desc_v69 *desc) 2533 { 2534 desc->policy_flags = 0; 2535 2536 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2537 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2538 2539 /* NB: For both of these, zero means disabled. */ 2540 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2541 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2542 } 2543 2544 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2545 { 2546 /* 2547 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2548 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2549 */ 2550 switch (prio) { 2551 default: 2552 MISSING_CASE(prio); 2553 fallthrough; 2554 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2555 return GEN12_CTX_PRIORITY_NORMAL; 2556 case GUC_CLIENT_PRIORITY_NORMAL: 2557 return GEN12_CTX_PRIORITY_LOW; 2558 case GUC_CLIENT_PRIORITY_HIGH: 2559 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2560 return GEN12_CTX_PRIORITY_HIGH; 2561 } 2562 } 2563 2564 static void prepare_context_registration_info_v69(struct intel_context *ce) 2565 { 2566 struct intel_engine_cs *engine = ce->engine; 2567 struct intel_guc *guc = &engine->gt->uc.guc; 2568 u32 ctx_id = ce->guc_id.id; 2569 struct guc_lrc_desc_v69 *desc; 2570 struct intel_context *child; 2571 2572 GEM_BUG_ON(!engine->mask); 2573 2574 /* 2575 * Ensure LRC + CT vmas are is same region as write barrier is done 2576 * based on CT vma region. 2577 */ 2578 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2579 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2580 2581 desc = __get_lrc_desc_v69(guc, ctx_id); 2582 desc->engine_class = engine_class_to_guc_class(engine->class); 2583 desc->engine_submit_mask = engine->logical_mask; 2584 desc->hw_context_desc = ce->lrc.lrca; 2585 desc->priority = ce->guc_state.prio; 2586 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2587 guc_context_policy_init_v69(engine, desc); 2588 2589 /* 2590 * If context is a parent, we need to register a process descriptor 2591 * describing a work queue and register all child contexts. 2592 */ 2593 if (intel_context_is_parent(ce)) { 2594 struct guc_process_desc_v69 *pdesc; 2595 2596 ce->parallel.guc.wqi_tail = 0; 2597 ce->parallel.guc.wqi_head = 0; 2598 2599 desc->process_desc = i915_ggtt_offset(ce->state) + 2600 __get_parent_scratch_offset(ce); 2601 desc->wq_addr = i915_ggtt_offset(ce->state) + 2602 __get_wq_offset(ce); 2603 desc->wq_size = WQ_SIZE; 2604 2605 pdesc = __get_process_desc_v69(ce); 2606 memset(pdesc, 0, sizeof(*(pdesc))); 2607 pdesc->stage_id = ce->guc_id.id; 2608 pdesc->wq_base_addr = desc->wq_addr; 2609 pdesc->wq_size_bytes = desc->wq_size; 2610 pdesc->wq_status = WQ_STATUS_ACTIVE; 2611 2612 ce->parallel.guc.wq_head = &pdesc->head; 2613 ce->parallel.guc.wq_tail = &pdesc->tail; 2614 ce->parallel.guc.wq_status = &pdesc->wq_status; 2615 2616 for_each_child(ce, child) { 2617 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2618 2619 desc->engine_class = 2620 engine_class_to_guc_class(engine->class); 2621 desc->hw_context_desc = child->lrc.lrca; 2622 desc->priority = ce->guc_state.prio; 2623 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2624 guc_context_policy_init_v69(engine, desc); 2625 } 2626 2627 clear_children_join_go_memory(ce); 2628 } 2629 } 2630 2631 static void prepare_context_registration_info_v70(struct intel_context *ce, 2632 struct guc_ctxt_registration_info *info) 2633 { 2634 struct intel_engine_cs *engine = ce->engine; 2635 struct intel_guc *guc = &engine->gt->uc.guc; 2636 u32 ctx_id = ce->guc_id.id; 2637 2638 GEM_BUG_ON(!engine->mask); 2639 2640 /* 2641 * Ensure LRC + CT vmas are is same region as write barrier is done 2642 * based on CT vma region. 2643 */ 2644 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2645 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2646 2647 memset(info, 0, sizeof(*info)); 2648 info->context_idx = ctx_id; 2649 info->engine_class = engine_class_to_guc_class(engine->class); 2650 info->engine_submit_mask = engine->logical_mask; 2651 /* 2652 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2653 * only supports 32 bit currently. 2654 */ 2655 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2656 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2657 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2658 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2659 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2660 2661 /* 2662 * If context is a parent, we need to register a process descriptor 2663 * describing a work queue and register all child contexts. 2664 */ 2665 if (intel_context_is_parent(ce)) { 2666 struct guc_sched_wq_desc *wq_desc; 2667 u64 wq_desc_offset, wq_base_offset; 2668 2669 ce->parallel.guc.wqi_tail = 0; 2670 ce->parallel.guc.wqi_head = 0; 2671 2672 wq_desc_offset = i915_ggtt_offset(ce->state) + 2673 __get_parent_scratch_offset(ce); 2674 wq_base_offset = i915_ggtt_offset(ce->state) + 2675 __get_wq_offset(ce); 2676 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2677 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2678 info->wq_base_lo = lower_32_bits(wq_base_offset); 2679 info->wq_base_hi = upper_32_bits(wq_base_offset); 2680 info->wq_size = WQ_SIZE; 2681 2682 wq_desc = __get_wq_desc_v70(ce); 2683 memset(wq_desc, 0, sizeof(*wq_desc)); 2684 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2685 2686 ce->parallel.guc.wq_head = &wq_desc->head; 2687 ce->parallel.guc.wq_tail = &wq_desc->tail; 2688 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2689 2690 clear_children_join_go_memory(ce); 2691 } 2692 } 2693 2694 static int try_context_registration(struct intel_context *ce, bool loop) 2695 { 2696 struct intel_engine_cs *engine = ce->engine; 2697 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2698 struct intel_guc *guc = &engine->gt->uc.guc; 2699 intel_wakeref_t wakeref; 2700 u32 ctx_id = ce->guc_id.id; 2701 bool context_registered; 2702 int ret = 0; 2703 2704 GEM_BUG_ON(!sched_state_is_init(ce)); 2705 2706 context_registered = ctx_id_mapped(guc, ctx_id); 2707 2708 clr_ctx_id_mapping(guc, ctx_id); 2709 set_ctx_id_mapping(guc, ctx_id, ce); 2710 2711 /* 2712 * The context_lookup xarray is used to determine if the hardware 2713 * context is currently registered. There are two cases in which it 2714 * could be registered either the guc_id has been stolen from another 2715 * context or the lrc descriptor address of this context has changed. In 2716 * either case the context needs to be deregistered with the GuC before 2717 * registering this context. 2718 */ 2719 if (context_registered) { 2720 bool disabled; 2721 unsigned long flags; 2722 2723 trace_intel_context_steal_guc_id(ce); 2724 GEM_BUG_ON(!loop); 2725 2726 /* Seal race with Reset */ 2727 spin_lock_irqsave(&ce->guc_state.lock, flags); 2728 disabled = submission_disabled(guc); 2729 if (likely(!disabled)) { 2730 set_context_wait_for_deregister_to_register(ce); 2731 intel_context_get(ce); 2732 } 2733 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2734 if (unlikely(disabled)) { 2735 clr_ctx_id_mapping(guc, ctx_id); 2736 return 0; /* Will get registered later */ 2737 } 2738 2739 /* 2740 * If stealing the guc_id, this ce has the same guc_id as the 2741 * context whose guc_id was stolen. 2742 */ 2743 with_intel_runtime_pm(runtime_pm, wakeref) 2744 ret = deregister_context(ce, ce->guc_id.id); 2745 if (unlikely(ret == -ENODEV)) 2746 ret = 0; /* Will get registered later */ 2747 } else { 2748 with_intel_runtime_pm(runtime_pm, wakeref) 2749 ret = register_context(ce, loop); 2750 if (unlikely(ret == -EBUSY)) { 2751 clr_ctx_id_mapping(guc, ctx_id); 2752 } else if (unlikely(ret == -ENODEV)) { 2753 clr_ctx_id_mapping(guc, ctx_id); 2754 ret = 0; /* Will get registered later */ 2755 } 2756 } 2757 2758 return ret; 2759 } 2760 2761 static int __guc_context_pre_pin(struct intel_context *ce, 2762 struct intel_engine_cs *engine, 2763 struct i915_gem_ww_ctx *ww, 2764 void **vaddr) 2765 { 2766 return lrc_pre_pin(ce, engine, ww, vaddr); 2767 } 2768 2769 static int __guc_context_pin(struct intel_context *ce, 2770 struct intel_engine_cs *engine, 2771 void *vaddr) 2772 { 2773 if (i915_ggtt_offset(ce->state) != 2774 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2775 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2776 2777 /* 2778 * GuC context gets pinned in guc_request_alloc. See that function for 2779 * explaination of why. 2780 */ 2781 2782 return lrc_pin(ce, engine, vaddr); 2783 } 2784 2785 static int guc_context_pre_pin(struct intel_context *ce, 2786 struct i915_gem_ww_ctx *ww, 2787 void **vaddr) 2788 { 2789 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2790 } 2791 2792 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2793 { 2794 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2795 2796 if (likely(!ret && !intel_context_is_barrier(ce))) 2797 intel_engine_pm_get(ce->engine); 2798 2799 return ret; 2800 } 2801 2802 static void guc_context_unpin(struct intel_context *ce) 2803 { 2804 struct intel_guc *guc = ce_to_guc(ce); 2805 2806 unpin_guc_id(guc, ce); 2807 lrc_unpin(ce); 2808 2809 if (likely(!intel_context_is_barrier(ce))) 2810 intel_engine_pm_put_async(ce->engine); 2811 } 2812 2813 static void guc_context_post_unpin(struct intel_context *ce) 2814 { 2815 lrc_post_unpin(ce); 2816 } 2817 2818 static void __guc_context_sched_enable(struct intel_guc *guc, 2819 struct intel_context *ce) 2820 { 2821 u32 action[] = { 2822 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2823 ce->guc_id.id, 2824 GUC_CONTEXT_ENABLE 2825 }; 2826 2827 trace_intel_context_sched_enable(ce); 2828 2829 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2830 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2831 } 2832 2833 static void __guc_context_sched_disable(struct intel_guc *guc, 2834 struct intel_context *ce, 2835 u16 guc_id) 2836 { 2837 u32 action[] = { 2838 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2839 guc_id, /* ce->guc_id.id not stable */ 2840 GUC_CONTEXT_DISABLE 2841 }; 2842 2843 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2844 2845 GEM_BUG_ON(intel_context_is_child(ce)); 2846 trace_intel_context_sched_disable(ce); 2847 2848 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2849 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2850 } 2851 2852 static void guc_blocked_fence_complete(struct intel_context *ce) 2853 { 2854 lockdep_assert_held(&ce->guc_state.lock); 2855 2856 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2857 i915_sw_fence_complete(&ce->guc_state.blocked); 2858 } 2859 2860 static void guc_blocked_fence_reinit(struct intel_context *ce) 2861 { 2862 lockdep_assert_held(&ce->guc_state.lock); 2863 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2864 2865 /* 2866 * This fence is always complete unless a pending schedule disable is 2867 * outstanding. We arm the fence here and complete it when we receive 2868 * the pending schedule disable complete message. 2869 */ 2870 i915_sw_fence_fini(&ce->guc_state.blocked); 2871 i915_sw_fence_reinit(&ce->guc_state.blocked); 2872 i915_sw_fence_await(&ce->guc_state.blocked); 2873 i915_sw_fence_commit(&ce->guc_state.blocked); 2874 } 2875 2876 static u16 prep_context_pending_disable(struct intel_context *ce) 2877 { 2878 lockdep_assert_held(&ce->guc_state.lock); 2879 2880 set_context_pending_disable(ce); 2881 clr_context_enabled(ce); 2882 guc_blocked_fence_reinit(ce); 2883 intel_context_get(ce); 2884 2885 return ce->guc_id.id; 2886 } 2887 2888 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2889 { 2890 struct intel_guc *guc = ce_to_guc(ce); 2891 unsigned long flags; 2892 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2893 intel_wakeref_t wakeref; 2894 u16 guc_id; 2895 bool enabled; 2896 2897 GEM_BUG_ON(intel_context_is_child(ce)); 2898 2899 spin_lock_irqsave(&ce->guc_state.lock, flags); 2900 2901 incr_context_blocked(ce); 2902 2903 enabled = context_enabled(ce); 2904 if (unlikely(!enabled || submission_disabled(guc))) { 2905 if (enabled) 2906 clr_context_enabled(ce); 2907 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2908 return &ce->guc_state.blocked; 2909 } 2910 2911 /* 2912 * We add +2 here as the schedule disable complete CTB handler calls 2913 * intel_context_sched_disable_unpin (-2 to pin_count). 2914 */ 2915 atomic_add(2, &ce->pin_count); 2916 2917 guc_id = prep_context_pending_disable(ce); 2918 2919 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2920 2921 with_intel_runtime_pm(runtime_pm, wakeref) 2922 __guc_context_sched_disable(guc, ce, guc_id); 2923 2924 return &ce->guc_state.blocked; 2925 } 2926 2927 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2928 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2929 #define SCHED_STATE_NO_UNBLOCK \ 2930 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2931 SCHED_STATE_PENDING_DISABLE | \ 2932 SCHED_STATE_BANNED) 2933 2934 static bool context_cant_unblock(struct intel_context *ce) 2935 { 2936 lockdep_assert_held(&ce->guc_state.lock); 2937 2938 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2939 context_guc_id_invalid(ce) || 2940 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2941 !intel_context_is_pinned(ce); 2942 } 2943 2944 static void guc_context_unblock(struct intel_context *ce) 2945 { 2946 struct intel_guc *guc = ce_to_guc(ce); 2947 unsigned long flags; 2948 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2949 intel_wakeref_t wakeref; 2950 bool enable; 2951 2952 GEM_BUG_ON(context_enabled(ce)); 2953 GEM_BUG_ON(intel_context_is_child(ce)); 2954 2955 spin_lock_irqsave(&ce->guc_state.lock, flags); 2956 2957 if (unlikely(submission_disabled(guc) || 2958 context_cant_unblock(ce))) { 2959 enable = false; 2960 } else { 2961 enable = true; 2962 set_context_pending_enable(ce); 2963 set_context_enabled(ce); 2964 intel_context_get(ce); 2965 } 2966 2967 decr_context_blocked(ce); 2968 2969 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2970 2971 if (enable) { 2972 with_intel_runtime_pm(runtime_pm, wakeref) 2973 __guc_context_sched_enable(guc, ce); 2974 } 2975 } 2976 2977 static void guc_context_cancel_request(struct intel_context *ce, 2978 struct i915_request *rq) 2979 { 2980 struct intel_context *block_context = 2981 request_to_scheduling_context(rq); 2982 2983 if (i915_sw_fence_signaled(&rq->submit)) { 2984 struct i915_sw_fence *fence; 2985 2986 intel_context_get(ce); 2987 fence = guc_context_block(block_context); 2988 i915_sw_fence_wait(fence); 2989 if (!i915_request_completed(rq)) { 2990 __i915_request_skip(rq); 2991 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2992 true); 2993 } 2994 2995 guc_context_unblock(block_context); 2996 intel_context_put(ce); 2997 } 2998 } 2999 3000 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 3001 u16 guc_id, 3002 u32 preemption_timeout) 3003 { 3004 if (guc->fw.major_ver_found >= 70) { 3005 struct context_policy policy; 3006 3007 __guc_context_policy_start_klv(&policy, guc_id); 3008 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 3009 __guc_context_set_context_policies(guc, &policy, true); 3010 } else { 3011 u32 action[] = { 3012 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 3013 guc_id, 3014 preemption_timeout 3015 }; 3016 3017 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3018 } 3019 } 3020 3021 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) 3022 { 3023 struct intel_guc *guc = ce_to_guc(ce); 3024 struct intel_runtime_pm *runtime_pm = 3025 &ce->engine->gt->i915->runtime_pm; 3026 intel_wakeref_t wakeref; 3027 unsigned long flags; 3028 3029 GEM_BUG_ON(intel_context_is_child(ce)); 3030 3031 guc_flush_submissions(guc); 3032 3033 spin_lock_irqsave(&ce->guc_state.lock, flags); 3034 set_context_banned(ce); 3035 3036 if (submission_disabled(guc) || 3037 (!context_enabled(ce) && !context_pending_disable(ce))) { 3038 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3039 3040 guc_cancel_context_requests(ce); 3041 intel_engine_signal_breadcrumbs(ce->engine); 3042 } else if (!context_pending_disable(ce)) { 3043 u16 guc_id; 3044 3045 /* 3046 * We add +2 here as the schedule disable complete CTB handler 3047 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3048 */ 3049 atomic_add(2, &ce->pin_count); 3050 3051 guc_id = prep_context_pending_disable(ce); 3052 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3053 3054 /* 3055 * In addition to disabling scheduling, set the preemption 3056 * timeout to the minimum value (1 us) so the banned context 3057 * gets kicked off the HW ASAP. 3058 */ 3059 with_intel_runtime_pm(runtime_pm, wakeref) { 3060 __guc_context_set_preemption_timeout(guc, guc_id, 1); 3061 __guc_context_sched_disable(guc, ce, guc_id); 3062 } 3063 } else { 3064 if (!context_guc_id_invalid(ce)) 3065 with_intel_runtime_pm(runtime_pm, wakeref) 3066 __guc_context_set_preemption_timeout(guc, 3067 ce->guc_id.id, 3068 1); 3069 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3070 } 3071 } 3072 3073 static void guc_context_sched_disable(struct intel_context *ce) 3074 { 3075 struct intel_guc *guc = ce_to_guc(ce); 3076 unsigned long flags; 3077 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3078 intel_wakeref_t wakeref; 3079 u16 guc_id; 3080 3081 GEM_BUG_ON(intel_context_is_child(ce)); 3082 3083 spin_lock_irqsave(&ce->guc_state.lock, flags); 3084 3085 /* 3086 * We have to check if the context has been disabled by another thread, 3087 * check if submssion has been disabled to seal a race with reset and 3088 * finally check if any more requests have been committed to the 3089 * context ensursing that a request doesn't slip through the 3090 * 'context_pending_disable' fence. 3091 */ 3092 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 3093 context_has_committed_requests(ce))) { 3094 clr_context_enabled(ce); 3095 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3096 goto unpin; 3097 } 3098 guc_id = prep_context_pending_disable(ce); 3099 3100 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3101 3102 with_intel_runtime_pm(runtime_pm, wakeref) 3103 __guc_context_sched_disable(guc, ce, guc_id); 3104 3105 return; 3106 unpin: 3107 intel_context_sched_disable_unpin(ce); 3108 } 3109 3110 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3111 { 3112 struct intel_guc *guc = ce_to_guc(ce); 3113 struct intel_gt *gt = guc_to_gt(guc); 3114 unsigned long flags; 3115 bool disabled; 3116 3117 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3118 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3119 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3120 GEM_BUG_ON(context_enabled(ce)); 3121 3122 /* Seal race with Reset */ 3123 spin_lock_irqsave(&ce->guc_state.lock, flags); 3124 disabled = submission_disabled(guc); 3125 if (likely(!disabled)) { 3126 __intel_gt_pm_get(gt); 3127 set_context_destroyed(ce); 3128 clr_context_registered(ce); 3129 } 3130 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3131 if (unlikely(disabled)) { 3132 release_guc_id(guc, ce); 3133 __guc_context_destroy(ce); 3134 return; 3135 } 3136 3137 deregister_context(ce, ce->guc_id.id); 3138 } 3139 3140 static void __guc_context_destroy(struct intel_context *ce) 3141 { 3142 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3143 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3144 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3145 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3146 GEM_BUG_ON(ce->guc_state.number_committed_requests); 3147 3148 lrc_fini(ce); 3149 intel_context_fini(ce); 3150 3151 if (intel_engine_is_virtual(ce->engine)) { 3152 struct guc_virtual_engine *ve = 3153 container_of(ce, typeof(*ve), context); 3154 3155 if (ve->base.breadcrumbs) 3156 intel_breadcrumbs_put(ve->base.breadcrumbs); 3157 3158 kfree(ve); 3159 } else { 3160 intel_context_free(ce); 3161 } 3162 } 3163 3164 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3165 { 3166 struct intel_context *ce; 3167 unsigned long flags; 3168 3169 GEM_BUG_ON(!submission_disabled(guc) && 3170 guc_submission_initialized(guc)); 3171 3172 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3173 spin_lock_irqsave(&guc->submission_state.lock, flags); 3174 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3175 struct intel_context, 3176 destroyed_link); 3177 if (ce) 3178 list_del_init(&ce->destroyed_link); 3179 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3180 3181 if (!ce) 3182 break; 3183 3184 release_guc_id(guc, ce); 3185 __guc_context_destroy(ce); 3186 } 3187 } 3188 3189 static void deregister_destroyed_contexts(struct intel_guc *guc) 3190 { 3191 struct intel_context *ce; 3192 unsigned long flags; 3193 3194 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3195 spin_lock_irqsave(&guc->submission_state.lock, flags); 3196 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3197 struct intel_context, 3198 destroyed_link); 3199 if (ce) 3200 list_del_init(&ce->destroyed_link); 3201 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3202 3203 if (!ce) 3204 break; 3205 3206 guc_lrc_desc_unpin(ce); 3207 } 3208 } 3209 3210 static void destroyed_worker_func(struct work_struct *w) 3211 { 3212 struct intel_guc *guc = container_of(w, struct intel_guc, 3213 submission_state.destroyed_worker); 3214 struct intel_gt *gt = guc_to_gt(guc); 3215 int tmp; 3216 3217 with_intel_gt_pm(gt, tmp) 3218 deregister_destroyed_contexts(guc); 3219 } 3220 3221 static void guc_context_destroy(struct kref *kref) 3222 { 3223 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3224 struct intel_guc *guc = ce_to_guc(ce); 3225 unsigned long flags; 3226 bool destroy; 3227 3228 /* 3229 * If the guc_id is invalid this context has been stolen and we can free 3230 * it immediately. Also can be freed immediately if the context is not 3231 * registered with the GuC or the GuC is in the middle of a reset. 3232 */ 3233 spin_lock_irqsave(&guc->submission_state.lock, flags); 3234 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3235 !ctx_id_mapped(guc, ce->guc_id.id); 3236 if (likely(!destroy)) { 3237 if (!list_empty(&ce->guc_id.link)) 3238 list_del_init(&ce->guc_id.link); 3239 list_add_tail(&ce->destroyed_link, 3240 &guc->submission_state.destroyed_contexts); 3241 } else { 3242 __release_guc_id(guc, ce); 3243 } 3244 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3245 if (unlikely(destroy)) { 3246 __guc_context_destroy(ce); 3247 return; 3248 } 3249 3250 /* 3251 * We use a worker to issue the H2G to deregister the context as we can 3252 * take the GT PM for the first time which isn't allowed from an atomic 3253 * context. 3254 */ 3255 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3256 } 3257 3258 static int guc_context_alloc(struct intel_context *ce) 3259 { 3260 return lrc_alloc(ce, ce->engine); 3261 } 3262 3263 static void __guc_context_set_prio(struct intel_guc *guc, 3264 struct intel_context *ce) 3265 { 3266 if (guc->fw.major_ver_found >= 70) { 3267 struct context_policy policy; 3268 3269 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3270 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3271 __guc_context_set_context_policies(guc, &policy, true); 3272 } else { 3273 u32 action[] = { 3274 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3275 ce->guc_id.id, 3276 ce->guc_state.prio, 3277 }; 3278 3279 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3280 } 3281 } 3282 3283 static void guc_context_set_prio(struct intel_guc *guc, 3284 struct intel_context *ce, 3285 u8 prio) 3286 { 3287 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3288 prio > GUC_CLIENT_PRIORITY_NORMAL); 3289 lockdep_assert_held(&ce->guc_state.lock); 3290 3291 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3292 !context_registered(ce)) { 3293 ce->guc_state.prio = prio; 3294 return; 3295 } 3296 3297 ce->guc_state.prio = prio; 3298 __guc_context_set_prio(guc, ce); 3299 3300 trace_intel_context_set_prio(ce); 3301 } 3302 3303 static inline u8 map_i915_prio_to_guc_prio(int prio) 3304 { 3305 if (prio == I915_PRIORITY_NORMAL) 3306 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3307 else if (prio < I915_PRIORITY_NORMAL) 3308 return GUC_CLIENT_PRIORITY_NORMAL; 3309 else if (prio < I915_PRIORITY_DISPLAY) 3310 return GUC_CLIENT_PRIORITY_HIGH; 3311 else 3312 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3313 } 3314 3315 static inline void add_context_inflight_prio(struct intel_context *ce, 3316 u8 guc_prio) 3317 { 3318 lockdep_assert_held(&ce->guc_state.lock); 3319 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3320 3321 ++ce->guc_state.prio_count[guc_prio]; 3322 3323 /* Overflow protection */ 3324 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3325 } 3326 3327 static inline void sub_context_inflight_prio(struct intel_context *ce, 3328 u8 guc_prio) 3329 { 3330 lockdep_assert_held(&ce->guc_state.lock); 3331 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3332 3333 /* Underflow protection */ 3334 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3335 3336 --ce->guc_state.prio_count[guc_prio]; 3337 } 3338 3339 static inline void update_context_prio(struct intel_context *ce) 3340 { 3341 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3342 int i; 3343 3344 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3345 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3346 3347 lockdep_assert_held(&ce->guc_state.lock); 3348 3349 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3350 if (ce->guc_state.prio_count[i]) { 3351 guc_context_set_prio(guc, ce, i); 3352 break; 3353 } 3354 } 3355 } 3356 3357 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3358 { 3359 /* Lower value is higher priority */ 3360 return new_guc_prio < old_guc_prio; 3361 } 3362 3363 static void add_to_context(struct i915_request *rq) 3364 { 3365 struct intel_context *ce = request_to_scheduling_context(rq); 3366 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3367 3368 GEM_BUG_ON(intel_context_is_child(ce)); 3369 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3370 3371 spin_lock(&ce->guc_state.lock); 3372 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3373 3374 if (rq->guc_prio == GUC_PRIO_INIT) { 3375 rq->guc_prio = new_guc_prio; 3376 add_context_inflight_prio(ce, rq->guc_prio); 3377 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3378 sub_context_inflight_prio(ce, rq->guc_prio); 3379 rq->guc_prio = new_guc_prio; 3380 add_context_inflight_prio(ce, rq->guc_prio); 3381 } 3382 update_context_prio(ce); 3383 3384 spin_unlock(&ce->guc_state.lock); 3385 } 3386 3387 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3388 { 3389 lockdep_assert_held(&ce->guc_state.lock); 3390 3391 if (rq->guc_prio != GUC_PRIO_INIT && 3392 rq->guc_prio != GUC_PRIO_FINI) { 3393 sub_context_inflight_prio(ce, rq->guc_prio); 3394 update_context_prio(ce); 3395 } 3396 rq->guc_prio = GUC_PRIO_FINI; 3397 } 3398 3399 static void remove_from_context(struct i915_request *rq) 3400 { 3401 struct intel_context *ce = request_to_scheduling_context(rq); 3402 3403 GEM_BUG_ON(intel_context_is_child(ce)); 3404 3405 spin_lock_irq(&ce->guc_state.lock); 3406 3407 list_del_init(&rq->sched.link); 3408 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3409 3410 /* Prevent further __await_execution() registering a cb, then flush */ 3411 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3412 3413 guc_prio_fini(rq, ce); 3414 3415 decr_context_committed_requests(ce); 3416 3417 spin_unlock_irq(&ce->guc_state.lock); 3418 3419 atomic_dec(&ce->guc_id.ref); 3420 i915_request_notify_execute_cb_imm(rq); 3421 } 3422 3423 static const struct intel_context_ops guc_context_ops = { 3424 .alloc = guc_context_alloc, 3425 3426 .pre_pin = guc_context_pre_pin, 3427 .pin = guc_context_pin, 3428 .unpin = guc_context_unpin, 3429 .post_unpin = guc_context_post_unpin, 3430 3431 .ban = guc_context_ban, 3432 3433 .cancel_request = guc_context_cancel_request, 3434 3435 .enter = intel_context_enter_engine, 3436 .exit = intel_context_exit_engine, 3437 3438 .sched_disable = guc_context_sched_disable, 3439 3440 .reset = lrc_reset, 3441 .destroy = guc_context_destroy, 3442 3443 .create_virtual = guc_create_virtual, 3444 .create_parallel = guc_create_parallel, 3445 }; 3446 3447 static void submit_work_cb(struct irq_work *wrk) 3448 { 3449 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3450 3451 might_lock(&rq->engine->sched_engine->lock); 3452 i915_sw_fence_complete(&rq->submit); 3453 } 3454 3455 static void __guc_signal_context_fence(struct intel_context *ce) 3456 { 3457 struct i915_request *rq, *rn; 3458 3459 lockdep_assert_held(&ce->guc_state.lock); 3460 3461 if (!list_empty(&ce->guc_state.fences)) 3462 trace_intel_context_fence_release(ce); 3463 3464 /* 3465 * Use an IRQ to ensure locking order of sched_engine->lock -> 3466 * ce->guc_state.lock is preserved. 3467 */ 3468 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3469 guc_fence_link) { 3470 list_del(&rq->guc_fence_link); 3471 irq_work_queue(&rq->submit_work); 3472 } 3473 3474 INIT_LIST_HEAD(&ce->guc_state.fences); 3475 } 3476 3477 static void guc_signal_context_fence(struct intel_context *ce) 3478 { 3479 unsigned long flags; 3480 3481 GEM_BUG_ON(intel_context_is_child(ce)); 3482 3483 spin_lock_irqsave(&ce->guc_state.lock, flags); 3484 clr_context_wait_for_deregister_to_register(ce); 3485 __guc_signal_context_fence(ce); 3486 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3487 } 3488 3489 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3490 { 3491 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3492 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3493 !submission_disabled(ce_to_guc(ce)); 3494 } 3495 3496 static void guc_context_init(struct intel_context *ce) 3497 { 3498 const struct i915_gem_context *ctx; 3499 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3500 3501 rcu_read_lock(); 3502 ctx = rcu_dereference(ce->gem_context); 3503 if (ctx) 3504 prio = ctx->sched.priority; 3505 rcu_read_unlock(); 3506 3507 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3508 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3509 } 3510 3511 static int guc_request_alloc(struct i915_request *rq) 3512 { 3513 struct intel_context *ce = request_to_scheduling_context(rq); 3514 struct intel_guc *guc = ce_to_guc(ce); 3515 unsigned long flags; 3516 int ret; 3517 3518 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3519 3520 /* 3521 * Flush enough space to reduce the likelihood of waiting after 3522 * we start building the request - in which case we will just 3523 * have to repeat work. 3524 */ 3525 rq->reserved_space += GUC_REQUEST_SIZE; 3526 3527 /* 3528 * Note that after this point, we have committed to using 3529 * this request as it is being used to both track the 3530 * state of engine initialisation and liveness of the 3531 * golden renderstate above. Think twice before you try 3532 * to cancel/unwind this request now. 3533 */ 3534 3535 /* Unconditionally invalidate GPU caches and TLBs. */ 3536 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3537 if (ret) 3538 return ret; 3539 3540 rq->reserved_space -= GUC_REQUEST_SIZE; 3541 3542 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3543 guc_context_init(ce); 3544 3545 /* 3546 * Call pin_guc_id here rather than in the pinning step as with 3547 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3548 * guc_id and creating horrible race conditions. This is especially bad 3549 * when guc_id are being stolen due to over subscription. By the time 3550 * this function is reached, it is guaranteed that the guc_id will be 3551 * persistent until the generated request is retired. Thus, sealing these 3552 * race conditions. It is still safe to fail here if guc_id are 3553 * exhausted and return -EAGAIN to the user indicating that they can try 3554 * again in the future. 3555 * 3556 * There is no need for a lock here as the timeline mutex ensures at 3557 * most one context can be executing this code path at once. The 3558 * guc_id_ref is incremented once for every request in flight and 3559 * decremented on each retire. When it is zero, a lock around the 3560 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3561 */ 3562 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3563 goto out; 3564 3565 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3566 if (unlikely(ret < 0)) 3567 return ret; 3568 if (context_needs_register(ce, !!ret)) { 3569 ret = try_context_registration(ce, true); 3570 if (unlikely(ret)) { /* unwind */ 3571 if (ret == -EPIPE) { 3572 disable_submission(guc); 3573 goto out; /* GPU will be reset */ 3574 } 3575 atomic_dec(&ce->guc_id.ref); 3576 unpin_guc_id(guc, ce); 3577 return ret; 3578 } 3579 } 3580 3581 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3582 3583 out: 3584 /* 3585 * We block all requests on this context if a G2H is pending for a 3586 * schedule disable or context deregistration as the GuC will fail a 3587 * schedule enable or context registration if either G2H is pending 3588 * respectfully. Once a G2H returns, the fence is released that is 3589 * blocking these requests (see guc_signal_context_fence). 3590 */ 3591 spin_lock_irqsave(&ce->guc_state.lock, flags); 3592 if (context_wait_for_deregister_to_register(ce) || 3593 context_pending_disable(ce)) { 3594 init_irq_work(&rq->submit_work, submit_work_cb); 3595 i915_sw_fence_await(&rq->submit); 3596 3597 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3598 } 3599 incr_context_committed_requests(ce); 3600 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3601 3602 return 0; 3603 } 3604 3605 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3606 struct i915_gem_ww_ctx *ww, 3607 void **vaddr) 3608 { 3609 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3610 3611 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3612 } 3613 3614 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3615 { 3616 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3617 int ret = __guc_context_pin(ce, engine, vaddr); 3618 intel_engine_mask_t tmp, mask = ce->engine->mask; 3619 3620 if (likely(!ret)) 3621 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3622 intel_engine_pm_get(engine); 3623 3624 return ret; 3625 } 3626 3627 static void guc_virtual_context_unpin(struct intel_context *ce) 3628 { 3629 intel_engine_mask_t tmp, mask = ce->engine->mask; 3630 struct intel_engine_cs *engine; 3631 struct intel_guc *guc = ce_to_guc(ce); 3632 3633 GEM_BUG_ON(context_enabled(ce)); 3634 GEM_BUG_ON(intel_context_is_barrier(ce)); 3635 3636 unpin_guc_id(guc, ce); 3637 lrc_unpin(ce); 3638 3639 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3640 intel_engine_pm_put_async(engine); 3641 } 3642 3643 static void guc_virtual_context_enter(struct intel_context *ce) 3644 { 3645 intel_engine_mask_t tmp, mask = ce->engine->mask; 3646 struct intel_engine_cs *engine; 3647 3648 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3649 intel_engine_pm_get(engine); 3650 3651 intel_timeline_enter(ce->timeline); 3652 } 3653 3654 static void guc_virtual_context_exit(struct intel_context *ce) 3655 { 3656 intel_engine_mask_t tmp, mask = ce->engine->mask; 3657 struct intel_engine_cs *engine; 3658 3659 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3660 intel_engine_pm_put(engine); 3661 3662 intel_timeline_exit(ce->timeline); 3663 } 3664 3665 static int guc_virtual_context_alloc(struct intel_context *ce) 3666 { 3667 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3668 3669 return lrc_alloc(ce, engine); 3670 } 3671 3672 static const struct intel_context_ops virtual_guc_context_ops = { 3673 .alloc = guc_virtual_context_alloc, 3674 3675 .pre_pin = guc_virtual_context_pre_pin, 3676 .pin = guc_virtual_context_pin, 3677 .unpin = guc_virtual_context_unpin, 3678 .post_unpin = guc_context_post_unpin, 3679 3680 .ban = guc_context_ban, 3681 3682 .cancel_request = guc_context_cancel_request, 3683 3684 .enter = guc_virtual_context_enter, 3685 .exit = guc_virtual_context_exit, 3686 3687 .sched_disable = guc_context_sched_disable, 3688 3689 .destroy = guc_context_destroy, 3690 3691 .get_sibling = guc_virtual_get_sibling, 3692 }; 3693 3694 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3695 { 3696 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3697 struct intel_guc *guc = ce_to_guc(ce); 3698 int ret; 3699 3700 GEM_BUG_ON(!intel_context_is_parent(ce)); 3701 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3702 3703 ret = pin_guc_id(guc, ce); 3704 if (unlikely(ret < 0)) 3705 return ret; 3706 3707 return __guc_context_pin(ce, engine, vaddr); 3708 } 3709 3710 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3711 { 3712 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3713 3714 GEM_BUG_ON(!intel_context_is_child(ce)); 3715 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3716 3717 __intel_context_pin(ce->parallel.parent); 3718 return __guc_context_pin(ce, engine, vaddr); 3719 } 3720 3721 static void guc_parent_context_unpin(struct intel_context *ce) 3722 { 3723 struct intel_guc *guc = ce_to_guc(ce); 3724 3725 GEM_BUG_ON(context_enabled(ce)); 3726 GEM_BUG_ON(intel_context_is_barrier(ce)); 3727 GEM_BUG_ON(!intel_context_is_parent(ce)); 3728 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3729 3730 unpin_guc_id(guc, ce); 3731 lrc_unpin(ce); 3732 } 3733 3734 static void guc_child_context_unpin(struct intel_context *ce) 3735 { 3736 GEM_BUG_ON(context_enabled(ce)); 3737 GEM_BUG_ON(intel_context_is_barrier(ce)); 3738 GEM_BUG_ON(!intel_context_is_child(ce)); 3739 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3740 3741 lrc_unpin(ce); 3742 } 3743 3744 static void guc_child_context_post_unpin(struct intel_context *ce) 3745 { 3746 GEM_BUG_ON(!intel_context_is_child(ce)); 3747 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3748 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3749 3750 lrc_post_unpin(ce); 3751 intel_context_unpin(ce->parallel.parent); 3752 } 3753 3754 static void guc_child_context_destroy(struct kref *kref) 3755 { 3756 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3757 3758 __guc_context_destroy(ce); 3759 } 3760 3761 static const struct intel_context_ops virtual_parent_context_ops = { 3762 .alloc = guc_virtual_context_alloc, 3763 3764 .pre_pin = guc_context_pre_pin, 3765 .pin = guc_parent_context_pin, 3766 .unpin = guc_parent_context_unpin, 3767 .post_unpin = guc_context_post_unpin, 3768 3769 .ban = guc_context_ban, 3770 3771 .cancel_request = guc_context_cancel_request, 3772 3773 .enter = guc_virtual_context_enter, 3774 .exit = guc_virtual_context_exit, 3775 3776 .sched_disable = guc_context_sched_disable, 3777 3778 .destroy = guc_context_destroy, 3779 3780 .get_sibling = guc_virtual_get_sibling, 3781 }; 3782 3783 static const struct intel_context_ops virtual_child_context_ops = { 3784 .alloc = guc_virtual_context_alloc, 3785 3786 .pre_pin = guc_context_pre_pin, 3787 .pin = guc_child_context_pin, 3788 .unpin = guc_child_context_unpin, 3789 .post_unpin = guc_child_context_post_unpin, 3790 3791 .cancel_request = guc_context_cancel_request, 3792 3793 .enter = guc_virtual_context_enter, 3794 .exit = guc_virtual_context_exit, 3795 3796 .destroy = guc_child_context_destroy, 3797 3798 .get_sibling = guc_virtual_get_sibling, 3799 }; 3800 3801 /* 3802 * The below override of the breadcrumbs is enabled when the user configures a 3803 * context for parallel submission (multi-lrc, parent-child). 3804 * 3805 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3806 * safely preempt all the hw contexts configured for parallel submission 3807 * between each BB. The contract between the i915 and GuC is if the parent 3808 * context can be preempted, all the children can be preempted, and the GuC will 3809 * always try to preempt the parent before the children. A handshake between the 3810 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3811 * creating a window to preempt between each set of BBs. 3812 */ 3813 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3814 u64 offset, u32 len, 3815 const unsigned int flags); 3816 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3817 u64 offset, u32 len, 3818 const unsigned int flags); 3819 static u32 * 3820 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3821 u32 *cs); 3822 static u32 * 3823 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3824 u32 *cs); 3825 3826 static struct intel_context * 3827 guc_create_parallel(struct intel_engine_cs **engines, 3828 unsigned int num_siblings, 3829 unsigned int width) 3830 { 3831 struct intel_engine_cs **siblings = NULL; 3832 struct intel_context *parent = NULL, *ce, *err; 3833 int i, j; 3834 3835 siblings = kmalloc_array(num_siblings, 3836 sizeof(*siblings), 3837 GFP_KERNEL); 3838 if (!siblings) 3839 return ERR_PTR(-ENOMEM); 3840 3841 for (i = 0; i < width; ++i) { 3842 for (j = 0; j < num_siblings; ++j) 3843 siblings[j] = engines[i * num_siblings + j]; 3844 3845 ce = intel_engine_create_virtual(siblings, num_siblings, 3846 FORCE_VIRTUAL); 3847 if (IS_ERR(ce)) { 3848 err = ERR_CAST(ce); 3849 goto unwind; 3850 } 3851 3852 if (i == 0) { 3853 parent = ce; 3854 parent->ops = &virtual_parent_context_ops; 3855 } else { 3856 ce->ops = &virtual_child_context_ops; 3857 intel_context_bind_parent_child(parent, ce); 3858 } 3859 } 3860 3861 parent->parallel.fence_context = dma_fence_context_alloc(1); 3862 3863 parent->engine->emit_bb_start = 3864 emit_bb_start_parent_no_preempt_mid_batch; 3865 parent->engine->emit_fini_breadcrumb = 3866 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3867 parent->engine->emit_fini_breadcrumb_dw = 3868 12 + 4 * parent->parallel.number_children; 3869 for_each_child(parent, ce) { 3870 ce->engine->emit_bb_start = 3871 emit_bb_start_child_no_preempt_mid_batch; 3872 ce->engine->emit_fini_breadcrumb = 3873 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3874 ce->engine->emit_fini_breadcrumb_dw = 16; 3875 } 3876 3877 kfree(siblings); 3878 return parent; 3879 3880 unwind: 3881 if (parent) 3882 intel_context_put(parent); 3883 kfree(siblings); 3884 return err; 3885 } 3886 3887 static bool 3888 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3889 { 3890 struct intel_engine_cs *sibling; 3891 intel_engine_mask_t tmp, mask = b->engine_mask; 3892 bool result = false; 3893 3894 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3895 result |= intel_engine_irq_enable(sibling); 3896 3897 return result; 3898 } 3899 3900 static void 3901 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3902 { 3903 struct intel_engine_cs *sibling; 3904 intel_engine_mask_t tmp, mask = b->engine_mask; 3905 3906 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3907 intel_engine_irq_disable(sibling); 3908 } 3909 3910 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3911 { 3912 int i; 3913 3914 /* 3915 * In GuC submission mode we do not know which physical engine a request 3916 * will be scheduled on, this creates a problem because the breadcrumb 3917 * interrupt is per physical engine. To work around this we attach 3918 * requests and direct all breadcrumb interrupts to the first instance 3919 * of an engine per class. In addition all breadcrumb interrupts are 3920 * enabled / disabled across an engine class in unison. 3921 */ 3922 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3923 struct intel_engine_cs *sibling = 3924 engine->gt->engine_class[engine->class][i]; 3925 3926 if (sibling) { 3927 if (engine->breadcrumbs != sibling->breadcrumbs) { 3928 intel_breadcrumbs_put(engine->breadcrumbs); 3929 engine->breadcrumbs = 3930 intel_breadcrumbs_get(sibling->breadcrumbs); 3931 } 3932 break; 3933 } 3934 } 3935 3936 if (engine->breadcrumbs) { 3937 engine->breadcrumbs->engine_mask |= engine->mask; 3938 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3939 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3940 } 3941 } 3942 3943 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3944 int prio) 3945 { 3946 struct intel_context *ce = request_to_scheduling_context(rq); 3947 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3948 3949 /* Short circuit function */ 3950 if (prio < I915_PRIORITY_NORMAL || 3951 rq->guc_prio == GUC_PRIO_FINI || 3952 (rq->guc_prio != GUC_PRIO_INIT && 3953 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3954 return; 3955 3956 spin_lock(&ce->guc_state.lock); 3957 if (rq->guc_prio != GUC_PRIO_FINI) { 3958 if (rq->guc_prio != GUC_PRIO_INIT) 3959 sub_context_inflight_prio(ce, rq->guc_prio); 3960 rq->guc_prio = new_guc_prio; 3961 add_context_inflight_prio(ce, rq->guc_prio); 3962 update_context_prio(ce); 3963 } 3964 spin_unlock(&ce->guc_state.lock); 3965 } 3966 3967 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3968 { 3969 struct intel_context *ce = request_to_scheduling_context(rq); 3970 3971 spin_lock(&ce->guc_state.lock); 3972 guc_prio_fini(rq, ce); 3973 spin_unlock(&ce->guc_state.lock); 3974 } 3975 3976 static void sanitize_hwsp(struct intel_engine_cs *engine) 3977 { 3978 struct intel_timeline *tl; 3979 3980 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3981 intel_timeline_reset_seqno(tl); 3982 } 3983 3984 static void guc_sanitize(struct intel_engine_cs *engine) 3985 { 3986 /* 3987 * Poison residual state on resume, in case the suspend didn't! 3988 * 3989 * We have to assume that across suspend/resume (or other loss 3990 * of control) that the contents of our pinned buffers has been 3991 * lost, replaced by garbage. Since this doesn't always happen, 3992 * let's poison such state so that we more quickly spot when 3993 * we falsely assume it has been preserved. 3994 */ 3995 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3996 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3997 3998 /* 3999 * The kernel_context HWSP is stored in the status_page. As above, 4000 * that may be lost on resume/initialisation, and so we need to 4001 * reset the value in the HWSP. 4002 */ 4003 sanitize_hwsp(engine); 4004 4005 /* And scrub the dirty cachelines for the HWSP */ 4006 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4007 4008 intel_engine_reset_pinned_contexts(engine); 4009 } 4010 4011 static void setup_hwsp(struct intel_engine_cs *engine) 4012 { 4013 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4014 4015 ENGINE_WRITE_FW(engine, 4016 RING_HWS_PGA, 4017 i915_ggtt_offset(engine->status_page.vma)); 4018 } 4019 4020 static void start_engine(struct intel_engine_cs *engine) 4021 { 4022 ENGINE_WRITE_FW(engine, 4023 RING_MODE_GEN7, 4024 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4025 4026 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4027 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4028 } 4029 4030 static int guc_resume(struct intel_engine_cs *engine) 4031 { 4032 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4033 4034 intel_mocs_init_engine(engine); 4035 4036 intel_breadcrumbs_reset(engine->breadcrumbs); 4037 4038 setup_hwsp(engine); 4039 start_engine(engine); 4040 4041 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4042 xehp_enable_ccs_engines(engine); 4043 4044 return 0; 4045 } 4046 4047 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4048 { 4049 return !sched_engine->tasklet.callback; 4050 } 4051 4052 static void guc_set_default_submission(struct intel_engine_cs *engine) 4053 { 4054 engine->submit_request = guc_submit_request; 4055 } 4056 4057 static inline void guc_kernel_context_pin(struct intel_guc *guc, 4058 struct intel_context *ce) 4059 { 4060 /* 4061 * Note: we purposefully do not check the returns below because 4062 * the registration can only fail if a reset is just starting. 4063 * This is called at the end of reset so presumably another reset 4064 * isn't happening and even it did this code would be run again. 4065 */ 4066 4067 if (context_guc_id_invalid(ce)) 4068 pin_guc_id(guc, ce); 4069 4070 try_context_registration(ce, true); 4071 } 4072 4073 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 4074 { 4075 struct intel_gt *gt = guc_to_gt(guc); 4076 struct intel_engine_cs *engine; 4077 enum intel_engine_id id; 4078 4079 /* make sure all descriptors are clean... */ 4080 xa_destroy(&guc->context_lookup); 4081 4082 /* 4083 * Some contexts might have been pinned before we enabled GuC 4084 * submission, so we need to add them to the GuC bookeeping. 4085 * Also, after a reset the of the GuC we want to make sure that the 4086 * information shared with GuC is properly reset. The kernel LRCs are 4087 * not attached to the gem_context, so they need to be added separately. 4088 */ 4089 for_each_engine(engine, gt, id) { 4090 struct intel_context *ce; 4091 4092 list_for_each_entry(ce, &engine->pinned_contexts_list, 4093 pinned_contexts_link) 4094 guc_kernel_context_pin(guc, ce); 4095 } 4096 } 4097 4098 static void guc_release(struct intel_engine_cs *engine) 4099 { 4100 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4101 4102 intel_engine_cleanup_common(engine); 4103 lrc_fini_wa_ctx(engine); 4104 } 4105 4106 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4107 { 4108 struct intel_engine_cs *e; 4109 intel_engine_mask_t tmp, mask = engine->mask; 4110 4111 for_each_engine_masked(e, engine->gt, mask, tmp) 4112 e->serial++; 4113 } 4114 4115 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4116 { 4117 /* Default vfuncs which can be overridden by each engine. */ 4118 4119 engine->resume = guc_resume; 4120 4121 engine->cops = &guc_context_ops; 4122 engine->request_alloc = guc_request_alloc; 4123 engine->add_active_request = add_to_context; 4124 engine->remove_active_request = remove_from_context; 4125 4126 engine->sched_engine->schedule = i915_schedule; 4127 4128 engine->reset.prepare = guc_engine_reset_prepare; 4129 engine->reset.rewind = guc_rewind_nop; 4130 engine->reset.cancel = guc_reset_nop; 4131 engine->reset.finish = guc_reset_nop; 4132 4133 engine->emit_flush = gen8_emit_flush_xcs; 4134 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4135 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4136 if (GRAPHICS_VER(engine->i915) >= 12) { 4137 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4138 engine->emit_flush = gen12_emit_flush_xcs; 4139 } 4140 engine->set_default_submission = guc_set_default_submission; 4141 engine->busyness = guc_engine_busyness; 4142 4143 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4144 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4145 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4146 4147 /* Wa_14014475959:dg2 */ 4148 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) 4149 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4150 4151 /* 4152 * TODO: GuC supports timeslicing and semaphores as well, but they're 4153 * handled by the firmware so some minor tweaks are required before 4154 * enabling. 4155 * 4156 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4157 */ 4158 4159 engine->emit_bb_start = gen8_emit_bb_start; 4160 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4161 engine->emit_bb_start = gen125_emit_bb_start; 4162 } 4163 4164 static void rcs_submission_override(struct intel_engine_cs *engine) 4165 { 4166 switch (GRAPHICS_VER(engine->i915)) { 4167 case 12: 4168 engine->emit_flush = gen12_emit_flush_rcs; 4169 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4170 break; 4171 case 11: 4172 engine->emit_flush = gen11_emit_flush_rcs; 4173 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4174 break; 4175 default: 4176 engine->emit_flush = gen8_emit_flush_rcs; 4177 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4178 break; 4179 } 4180 } 4181 4182 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4183 { 4184 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4185 intel_engine_set_irq_handler(engine, cs_irq_handler); 4186 } 4187 4188 static void guc_sched_engine_destroy(struct kref *kref) 4189 { 4190 struct i915_sched_engine *sched_engine = 4191 container_of(kref, typeof(*sched_engine), ref); 4192 struct intel_guc *guc = sched_engine->private_data; 4193 4194 guc->sched_engine = NULL; 4195 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4196 kfree(sched_engine); 4197 } 4198 4199 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4200 { 4201 struct drm_i915_private *i915 = engine->i915; 4202 struct intel_guc *guc = &engine->gt->uc.guc; 4203 4204 /* 4205 * The setup relies on several assumptions (e.g. irqs always enabled) 4206 * that are only valid on gen11+ 4207 */ 4208 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4209 4210 if (!guc->sched_engine) { 4211 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4212 if (!guc->sched_engine) 4213 return -ENOMEM; 4214 4215 guc->sched_engine->schedule = i915_schedule; 4216 guc->sched_engine->disabled = guc_sched_engine_disabled; 4217 guc->sched_engine->private_data = guc; 4218 guc->sched_engine->destroy = guc_sched_engine_destroy; 4219 guc->sched_engine->bump_inflight_request_prio = 4220 guc_bump_inflight_request_prio; 4221 guc->sched_engine->retire_inflight_request_prio = 4222 guc_retire_inflight_request_prio; 4223 tasklet_setup(&guc->sched_engine->tasklet, 4224 guc_submission_tasklet); 4225 } 4226 i915_sched_engine_put(engine->sched_engine); 4227 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4228 4229 guc_default_vfuncs(engine); 4230 guc_default_irqs(engine); 4231 guc_init_breadcrumbs(engine); 4232 4233 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4234 rcs_submission_override(engine); 4235 4236 lrc_init_wa_ctx(engine); 4237 4238 /* Finally, take ownership and responsibility for cleanup! */ 4239 engine->sanitize = guc_sanitize; 4240 engine->release = guc_release; 4241 4242 return 0; 4243 } 4244 4245 void intel_guc_submission_enable(struct intel_guc *guc) 4246 { 4247 guc_init_lrc_mapping(guc); 4248 guc_init_engine_stats(guc); 4249 } 4250 4251 void intel_guc_submission_disable(struct intel_guc *guc) 4252 { 4253 /* Note: By the time we're here, GuC may have already been reset */ 4254 } 4255 4256 static bool __guc_submission_supported(struct intel_guc *guc) 4257 { 4258 /* GuC submission is unavailable for pre-Gen11 */ 4259 return intel_guc_is_supported(guc) && 4260 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4261 } 4262 4263 static bool __guc_submission_selected(struct intel_guc *guc) 4264 { 4265 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4266 4267 if (!intel_guc_submission_is_supported(guc)) 4268 return false; 4269 4270 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4271 } 4272 4273 void intel_guc_submission_init_early(struct intel_guc *guc) 4274 { 4275 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4276 4277 spin_lock_init(&guc->submission_state.lock); 4278 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4279 ida_init(&guc->submission_state.guc_ids); 4280 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4281 INIT_WORK(&guc->submission_state.destroyed_worker, 4282 destroyed_worker_func); 4283 INIT_WORK(&guc->submission_state.reset_fail_worker, 4284 reset_fail_worker_func); 4285 4286 spin_lock_init(&guc->timestamp.lock); 4287 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4288 4289 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4290 guc->submission_supported = __guc_submission_supported(guc); 4291 guc->submission_selected = __guc_submission_selected(guc); 4292 } 4293 4294 static inline struct intel_context * 4295 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4296 { 4297 struct intel_context *ce; 4298 4299 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4300 drm_err(&guc_to_gt(guc)->i915->drm, 4301 "Invalid ctx_id %u\n", ctx_id); 4302 return NULL; 4303 } 4304 4305 ce = __get_context(guc, ctx_id); 4306 if (unlikely(!ce)) { 4307 drm_err(&guc_to_gt(guc)->i915->drm, 4308 "Context is NULL, ctx_id %u\n", ctx_id); 4309 return NULL; 4310 } 4311 4312 if (unlikely(intel_context_is_child(ce))) { 4313 drm_err(&guc_to_gt(guc)->i915->drm, 4314 "Context is child, ctx_id %u\n", ctx_id); 4315 return NULL; 4316 } 4317 4318 return ce; 4319 } 4320 4321 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4322 const u32 *msg, 4323 u32 len) 4324 { 4325 struct intel_context *ce; 4326 u32 ctx_id; 4327 4328 if (unlikely(len < 1)) { 4329 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4330 return -EPROTO; 4331 } 4332 ctx_id = msg[0]; 4333 4334 ce = g2h_context_lookup(guc, ctx_id); 4335 if (unlikely(!ce)) 4336 return -EPROTO; 4337 4338 trace_intel_context_deregister_done(ce); 4339 4340 #ifdef CONFIG_DRM_I915_SELFTEST 4341 if (unlikely(ce->drop_deregister)) { 4342 ce->drop_deregister = false; 4343 return 0; 4344 } 4345 #endif 4346 4347 if (context_wait_for_deregister_to_register(ce)) { 4348 struct intel_runtime_pm *runtime_pm = 4349 &ce->engine->gt->i915->runtime_pm; 4350 intel_wakeref_t wakeref; 4351 4352 /* 4353 * Previous owner of this guc_id has been deregistered, now safe 4354 * register this context. 4355 */ 4356 with_intel_runtime_pm(runtime_pm, wakeref) 4357 register_context(ce, true); 4358 guc_signal_context_fence(ce); 4359 intel_context_put(ce); 4360 } else if (context_destroyed(ce)) { 4361 /* Context has been destroyed */ 4362 intel_gt_pm_put_async(guc_to_gt(guc)); 4363 release_guc_id(guc, ce); 4364 __guc_context_destroy(ce); 4365 } 4366 4367 decr_outstanding_submission_g2h(guc); 4368 4369 return 0; 4370 } 4371 4372 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4373 const u32 *msg, 4374 u32 len) 4375 { 4376 struct intel_context *ce; 4377 unsigned long flags; 4378 u32 ctx_id; 4379 4380 if (unlikely(len < 2)) { 4381 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4382 return -EPROTO; 4383 } 4384 ctx_id = msg[0]; 4385 4386 ce = g2h_context_lookup(guc, ctx_id); 4387 if (unlikely(!ce)) 4388 return -EPROTO; 4389 4390 if (unlikely(context_destroyed(ce) || 4391 (!context_pending_enable(ce) && 4392 !context_pending_disable(ce)))) { 4393 drm_err(&guc_to_gt(guc)->i915->drm, 4394 "Bad context sched_state 0x%x, ctx_id %u\n", 4395 ce->guc_state.sched_state, ctx_id); 4396 return -EPROTO; 4397 } 4398 4399 trace_intel_context_sched_done(ce); 4400 4401 if (context_pending_enable(ce)) { 4402 #ifdef CONFIG_DRM_I915_SELFTEST 4403 if (unlikely(ce->drop_schedule_enable)) { 4404 ce->drop_schedule_enable = false; 4405 return 0; 4406 } 4407 #endif 4408 4409 spin_lock_irqsave(&ce->guc_state.lock, flags); 4410 clr_context_pending_enable(ce); 4411 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4412 } else if (context_pending_disable(ce)) { 4413 bool banned; 4414 4415 #ifdef CONFIG_DRM_I915_SELFTEST 4416 if (unlikely(ce->drop_schedule_disable)) { 4417 ce->drop_schedule_disable = false; 4418 return 0; 4419 } 4420 #endif 4421 4422 /* 4423 * Unpin must be done before __guc_signal_context_fence, 4424 * otherwise a race exists between the requests getting 4425 * submitted + retired before this unpin completes resulting in 4426 * the pin_count going to zero and the context still being 4427 * enabled. 4428 */ 4429 intel_context_sched_disable_unpin(ce); 4430 4431 spin_lock_irqsave(&ce->guc_state.lock, flags); 4432 banned = context_banned(ce); 4433 clr_context_banned(ce); 4434 clr_context_pending_disable(ce); 4435 __guc_signal_context_fence(ce); 4436 guc_blocked_fence_complete(ce); 4437 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4438 4439 if (banned) { 4440 guc_cancel_context_requests(ce); 4441 intel_engine_signal_breadcrumbs(ce->engine); 4442 } 4443 } 4444 4445 decr_outstanding_submission_g2h(guc); 4446 intel_context_put(ce); 4447 4448 return 0; 4449 } 4450 4451 static void capture_error_state(struct intel_guc *guc, 4452 struct intel_context *ce) 4453 { 4454 struct intel_gt *gt = guc_to_gt(guc); 4455 struct drm_i915_private *i915 = gt->i915; 4456 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4457 intel_wakeref_t wakeref; 4458 4459 intel_engine_set_hung_context(engine, ce); 4460 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4461 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4462 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4463 } 4464 4465 static void guc_context_replay(struct intel_context *ce) 4466 { 4467 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4468 4469 __guc_reset_context(ce, ce->engine->mask); 4470 tasklet_hi_schedule(&sched_engine->tasklet); 4471 } 4472 4473 static void guc_handle_context_reset(struct intel_guc *guc, 4474 struct intel_context *ce) 4475 { 4476 trace_intel_context_reset(ce); 4477 4478 if (likely(!intel_context_is_banned(ce))) { 4479 capture_error_state(guc, ce); 4480 guc_context_replay(ce); 4481 } else { 4482 drm_info(&guc_to_gt(guc)->i915->drm, 4483 "Ignoring context reset notification of banned context 0x%04X on %s", 4484 ce->guc_id.id, ce->engine->name); 4485 } 4486 } 4487 4488 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4489 const u32 *msg, u32 len) 4490 { 4491 struct intel_context *ce; 4492 unsigned long flags; 4493 int ctx_id; 4494 4495 if (unlikely(len != 1)) { 4496 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4497 return -EPROTO; 4498 } 4499 4500 ctx_id = msg[0]; 4501 4502 /* 4503 * The context lookup uses the xarray but lookups only require an RCU lock 4504 * not the full spinlock. So take the lock explicitly and keep it until the 4505 * context has been reference count locked to ensure it can't be destroyed 4506 * asynchronously until the reset is done. 4507 */ 4508 xa_lock_irqsave(&guc->context_lookup, flags); 4509 ce = g2h_context_lookup(guc, ctx_id); 4510 if (ce) 4511 intel_context_get(ce); 4512 xa_unlock_irqrestore(&guc->context_lookup, flags); 4513 4514 if (unlikely(!ce)) 4515 return -EPROTO; 4516 4517 guc_handle_context_reset(guc, ce); 4518 intel_context_put(ce); 4519 4520 return 0; 4521 } 4522 4523 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4524 const u32 *msg, u32 len) 4525 { 4526 u32 status; 4527 4528 if (unlikely(len != 1)) { 4529 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4530 return -EPROTO; 4531 } 4532 4533 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4534 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4535 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4536 4537 intel_guc_capture_process(guc); 4538 4539 return 0; 4540 } 4541 4542 struct intel_engine_cs * 4543 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4544 { 4545 struct intel_gt *gt = guc_to_gt(guc); 4546 u8 engine_class = guc_class_to_engine_class(guc_class); 4547 4548 /* Class index is checked in class converter */ 4549 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4550 4551 return gt->engine_class[engine_class][instance]; 4552 } 4553 4554 static void reset_fail_worker_func(struct work_struct *w) 4555 { 4556 struct intel_guc *guc = container_of(w, struct intel_guc, 4557 submission_state.reset_fail_worker); 4558 struct intel_gt *gt = guc_to_gt(guc); 4559 intel_engine_mask_t reset_fail_mask; 4560 unsigned long flags; 4561 4562 spin_lock_irqsave(&guc->submission_state.lock, flags); 4563 reset_fail_mask = guc->submission_state.reset_fail_mask; 4564 guc->submission_state.reset_fail_mask = 0; 4565 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4566 4567 if (likely(reset_fail_mask)) 4568 intel_gt_handle_error(gt, reset_fail_mask, 4569 I915_ERROR_CAPTURE, 4570 "GuC failed to reset engine mask=0x%x\n", 4571 reset_fail_mask); 4572 } 4573 4574 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4575 const u32 *msg, u32 len) 4576 { 4577 struct intel_engine_cs *engine; 4578 struct intel_gt *gt = guc_to_gt(guc); 4579 u8 guc_class, instance; 4580 u32 reason; 4581 unsigned long flags; 4582 4583 if (unlikely(len != 3)) { 4584 drm_err(>->i915->drm, "Invalid length %u", len); 4585 return -EPROTO; 4586 } 4587 4588 guc_class = msg[0]; 4589 instance = msg[1]; 4590 reason = msg[2]; 4591 4592 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4593 if (unlikely(!engine)) { 4594 drm_err(>->i915->drm, 4595 "Invalid engine %d:%d", guc_class, instance); 4596 return -EPROTO; 4597 } 4598 4599 /* 4600 * This is an unexpected failure of a hardware feature. So, log a real 4601 * error message not just the informational that comes with the reset. 4602 */ 4603 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4604 guc_class, instance, engine->name, reason); 4605 4606 spin_lock_irqsave(&guc->submission_state.lock, flags); 4607 guc->submission_state.reset_fail_mask |= engine->mask; 4608 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4609 4610 /* 4611 * A GT reset flushes this worker queue (G2H handler) so we must use 4612 * another worker to trigger a GT reset. 4613 */ 4614 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4615 4616 return 0; 4617 } 4618 4619 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4620 { 4621 struct intel_guc *guc = &engine->gt->uc.guc; 4622 struct intel_context *ce; 4623 struct i915_request *rq; 4624 unsigned long index; 4625 unsigned long flags; 4626 4627 /* Reset called during driver load? GuC not yet initialised! */ 4628 if (unlikely(!guc_submission_initialized(guc))) 4629 return; 4630 4631 xa_lock_irqsave(&guc->context_lookup, flags); 4632 xa_for_each(&guc->context_lookup, index, ce) { 4633 if (!kref_get_unless_zero(&ce->ref)) 4634 continue; 4635 4636 xa_unlock(&guc->context_lookup); 4637 4638 if (!intel_context_is_pinned(ce)) 4639 goto next; 4640 4641 if (intel_engine_is_virtual(ce->engine)) { 4642 if (!(ce->engine->mask & engine->mask)) 4643 goto next; 4644 } else { 4645 if (ce->engine != engine) 4646 goto next; 4647 } 4648 4649 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4650 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4651 continue; 4652 4653 intel_engine_set_hung_context(engine, ce); 4654 4655 /* Can only cope with one hang at a time... */ 4656 intel_context_put(ce); 4657 xa_lock(&guc->context_lookup); 4658 goto done; 4659 } 4660 next: 4661 intel_context_put(ce); 4662 xa_lock(&guc->context_lookup); 4663 } 4664 done: 4665 xa_unlock_irqrestore(&guc->context_lookup, flags); 4666 } 4667 4668 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4669 struct i915_request *hung_rq, 4670 struct drm_printer *m) 4671 { 4672 struct intel_guc *guc = &engine->gt->uc.guc; 4673 struct intel_context *ce; 4674 unsigned long index; 4675 unsigned long flags; 4676 4677 /* Reset called during driver load? GuC not yet initialised! */ 4678 if (unlikely(!guc_submission_initialized(guc))) 4679 return; 4680 4681 xa_lock_irqsave(&guc->context_lookup, flags); 4682 xa_for_each(&guc->context_lookup, index, ce) { 4683 if (!kref_get_unless_zero(&ce->ref)) 4684 continue; 4685 4686 xa_unlock(&guc->context_lookup); 4687 4688 if (!intel_context_is_pinned(ce)) 4689 goto next; 4690 4691 if (intel_engine_is_virtual(ce->engine)) { 4692 if (!(ce->engine->mask & engine->mask)) 4693 goto next; 4694 } else { 4695 if (ce->engine != engine) 4696 goto next; 4697 } 4698 4699 spin_lock(&ce->guc_state.lock); 4700 intel_engine_dump_active_requests(&ce->guc_state.requests, 4701 hung_rq, m); 4702 spin_unlock(&ce->guc_state.lock); 4703 4704 next: 4705 intel_context_put(ce); 4706 xa_lock(&guc->context_lookup); 4707 } 4708 xa_unlock_irqrestore(&guc->context_lookup, flags); 4709 } 4710 4711 void intel_guc_submission_print_info(struct intel_guc *guc, 4712 struct drm_printer *p) 4713 { 4714 struct i915_sched_engine *sched_engine = guc->sched_engine; 4715 struct rb_node *rb; 4716 unsigned long flags; 4717 4718 if (!sched_engine) 4719 return; 4720 4721 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4722 atomic_read(&guc->outstanding_submission_g2h)); 4723 drm_printf(p, "GuC tasklet count: %u\n\n", 4724 atomic_read(&sched_engine->tasklet.count)); 4725 4726 spin_lock_irqsave(&sched_engine->lock, flags); 4727 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4728 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4729 struct i915_priolist *pl = to_priolist(rb); 4730 struct i915_request *rq; 4731 4732 priolist_for_each_request(rq, pl) 4733 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4734 rq->context->guc_id.id, 4735 rq->fence.seqno); 4736 } 4737 spin_unlock_irqrestore(&sched_engine->lock, flags); 4738 drm_printf(p, "\n"); 4739 } 4740 4741 static inline void guc_log_context_priority(struct drm_printer *p, 4742 struct intel_context *ce) 4743 { 4744 int i; 4745 4746 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4747 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4748 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4749 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4750 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4751 i, ce->guc_state.prio_count[i]); 4752 } 4753 drm_printf(p, "\n"); 4754 } 4755 4756 static inline void guc_log_context(struct drm_printer *p, 4757 struct intel_context *ce) 4758 { 4759 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4760 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4761 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4762 ce->ring->head, 4763 ce->lrc_reg_state[CTX_RING_HEAD]); 4764 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4765 ce->ring->tail, 4766 ce->lrc_reg_state[CTX_RING_TAIL]); 4767 drm_printf(p, "\t\tContext Pin Count: %u\n", 4768 atomic_read(&ce->pin_count)); 4769 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4770 atomic_read(&ce->guc_id.ref)); 4771 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 4772 ce->guc_state.sched_state); 4773 } 4774 4775 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4776 struct drm_printer *p) 4777 { 4778 struct intel_context *ce; 4779 unsigned long index; 4780 unsigned long flags; 4781 4782 xa_lock_irqsave(&guc->context_lookup, flags); 4783 xa_for_each(&guc->context_lookup, index, ce) { 4784 GEM_BUG_ON(intel_context_is_child(ce)); 4785 4786 guc_log_context(p, ce); 4787 guc_log_context_priority(p, ce); 4788 4789 if (intel_context_is_parent(ce)) { 4790 struct intel_context *child; 4791 4792 drm_printf(p, "\t\tNumber children: %u\n", 4793 ce->parallel.number_children); 4794 4795 if (ce->parallel.guc.wq_status) { 4796 drm_printf(p, "\t\tWQI Head: %u\n", 4797 READ_ONCE(*ce->parallel.guc.wq_head)); 4798 drm_printf(p, "\t\tWQI Tail: %u\n", 4799 READ_ONCE(*ce->parallel.guc.wq_tail)); 4800 drm_printf(p, "\t\tWQI Status: %u\n\n", 4801 READ_ONCE(*ce->parallel.guc.wq_status)); 4802 } 4803 4804 if (ce->engine->emit_bb_start == 4805 emit_bb_start_parent_no_preempt_mid_batch) { 4806 u8 i; 4807 4808 drm_printf(p, "\t\tChildren Go: %u\n\n", 4809 get_children_go_value(ce)); 4810 for (i = 0; i < ce->parallel.number_children; ++i) 4811 drm_printf(p, "\t\tChildren Join: %u\n", 4812 get_children_join_value(ce, i)); 4813 } 4814 4815 for_each_child(ce, child) 4816 guc_log_context(p, child); 4817 } 4818 } 4819 xa_unlock_irqrestore(&guc->context_lookup, flags); 4820 } 4821 4822 static inline u32 get_children_go_addr(struct intel_context *ce) 4823 { 4824 GEM_BUG_ON(!intel_context_is_parent(ce)); 4825 4826 return i915_ggtt_offset(ce->state) + 4827 __get_parent_scratch_offset(ce) + 4828 offsetof(struct parent_scratch, go.semaphore); 4829 } 4830 4831 static inline u32 get_children_join_addr(struct intel_context *ce, 4832 u8 child_index) 4833 { 4834 GEM_BUG_ON(!intel_context_is_parent(ce)); 4835 4836 return i915_ggtt_offset(ce->state) + 4837 __get_parent_scratch_offset(ce) + 4838 offsetof(struct parent_scratch, join[child_index].semaphore); 4839 } 4840 4841 #define PARENT_GO_BB 1 4842 #define PARENT_GO_FINI_BREADCRUMB 0 4843 #define CHILD_GO_BB 1 4844 #define CHILD_GO_FINI_BREADCRUMB 0 4845 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4846 u64 offset, u32 len, 4847 const unsigned int flags) 4848 { 4849 struct intel_context *ce = rq->context; 4850 u32 *cs; 4851 u8 i; 4852 4853 GEM_BUG_ON(!intel_context_is_parent(ce)); 4854 4855 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 4856 if (IS_ERR(cs)) 4857 return PTR_ERR(cs); 4858 4859 /* Wait on children */ 4860 for (i = 0; i < ce->parallel.number_children; ++i) { 4861 *cs++ = (MI_SEMAPHORE_WAIT | 4862 MI_SEMAPHORE_GLOBAL_GTT | 4863 MI_SEMAPHORE_POLL | 4864 MI_SEMAPHORE_SAD_EQ_SDD); 4865 *cs++ = PARENT_GO_BB; 4866 *cs++ = get_children_join_addr(ce, i); 4867 *cs++ = 0; 4868 } 4869 4870 /* Turn off preemption */ 4871 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4872 *cs++ = MI_NOOP; 4873 4874 /* Tell children go */ 4875 cs = gen8_emit_ggtt_write(cs, 4876 CHILD_GO_BB, 4877 get_children_go_addr(ce), 4878 0); 4879 4880 /* Jump to batch */ 4881 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4882 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4883 *cs++ = lower_32_bits(offset); 4884 *cs++ = upper_32_bits(offset); 4885 *cs++ = MI_NOOP; 4886 4887 intel_ring_advance(rq, cs); 4888 4889 return 0; 4890 } 4891 4892 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4893 u64 offset, u32 len, 4894 const unsigned int flags) 4895 { 4896 struct intel_context *ce = rq->context; 4897 struct intel_context *parent = intel_context_to_parent(ce); 4898 u32 *cs; 4899 4900 GEM_BUG_ON(!intel_context_is_child(ce)); 4901 4902 cs = intel_ring_begin(rq, 12); 4903 if (IS_ERR(cs)) 4904 return PTR_ERR(cs); 4905 4906 /* Signal parent */ 4907 cs = gen8_emit_ggtt_write(cs, 4908 PARENT_GO_BB, 4909 get_children_join_addr(parent, 4910 ce->parallel.child_index), 4911 0); 4912 4913 /* Wait on parent for go */ 4914 *cs++ = (MI_SEMAPHORE_WAIT | 4915 MI_SEMAPHORE_GLOBAL_GTT | 4916 MI_SEMAPHORE_POLL | 4917 MI_SEMAPHORE_SAD_EQ_SDD); 4918 *cs++ = CHILD_GO_BB; 4919 *cs++ = get_children_go_addr(parent); 4920 *cs++ = 0; 4921 4922 /* Turn off preemption */ 4923 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4924 4925 /* Jump to batch */ 4926 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4927 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4928 *cs++ = lower_32_bits(offset); 4929 *cs++ = upper_32_bits(offset); 4930 4931 intel_ring_advance(rq, cs); 4932 4933 return 0; 4934 } 4935 4936 static u32 * 4937 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4938 u32 *cs) 4939 { 4940 struct intel_context *ce = rq->context; 4941 u8 i; 4942 4943 GEM_BUG_ON(!intel_context_is_parent(ce)); 4944 4945 /* Wait on children */ 4946 for (i = 0; i < ce->parallel.number_children; ++i) { 4947 *cs++ = (MI_SEMAPHORE_WAIT | 4948 MI_SEMAPHORE_GLOBAL_GTT | 4949 MI_SEMAPHORE_POLL | 4950 MI_SEMAPHORE_SAD_EQ_SDD); 4951 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4952 *cs++ = get_children_join_addr(ce, i); 4953 *cs++ = 0; 4954 } 4955 4956 /* Turn on preemption */ 4957 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4958 *cs++ = MI_NOOP; 4959 4960 /* Tell children go */ 4961 cs = gen8_emit_ggtt_write(cs, 4962 CHILD_GO_FINI_BREADCRUMB, 4963 get_children_go_addr(ce), 4964 0); 4965 4966 return cs; 4967 } 4968 4969 /* 4970 * If this true, a submission of multi-lrc requests had an error and the 4971 * requests need to be skipped. The front end (execuf IOCTL) should've called 4972 * i915_request_skip which squashes the BB but we still need to emit the fini 4973 * breadrcrumbs seqno write. At this point we don't know how many of the 4974 * requests in the multi-lrc submission were generated so we can't do the 4975 * handshake between the parent and children (e.g. if 4 requests should be 4976 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4977 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4978 * has occurred on any of the requests in submission / relationship. 4979 */ 4980 static inline bool skip_handshake(struct i915_request *rq) 4981 { 4982 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4983 } 4984 4985 #define NON_SKIP_LEN 6 4986 static u32 * 4987 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4988 u32 *cs) 4989 { 4990 struct intel_context *ce = rq->context; 4991 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4992 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4993 4994 GEM_BUG_ON(!intel_context_is_parent(ce)); 4995 4996 if (unlikely(skip_handshake(rq))) { 4997 /* 4998 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4999 * the NON_SKIP_LEN comes from the length of the emits below. 5000 */ 5001 memset(cs, 0, sizeof(u32) * 5002 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5003 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5004 } else { 5005 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5006 } 5007 5008 /* Emit fini breadcrumb */ 5009 before_fini_breadcrumb_user_interrupt_cs = cs; 5010 cs = gen8_emit_ggtt_write(cs, 5011 rq->fence.seqno, 5012 i915_request_active_timeline(rq)->hwsp_offset, 5013 0); 5014 5015 /* User interrupt */ 5016 *cs++ = MI_USER_INTERRUPT; 5017 *cs++ = MI_NOOP; 5018 5019 /* Ensure our math for skip + emit is correct */ 5020 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5021 cs); 5022 GEM_BUG_ON(start_fini_breadcrumb_cs + 5023 ce->engine->emit_fini_breadcrumb_dw != cs); 5024 5025 rq->tail = intel_ring_offset(rq, cs); 5026 5027 return cs; 5028 } 5029 5030 static u32 * 5031 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5032 u32 *cs) 5033 { 5034 struct intel_context *ce = rq->context; 5035 struct intel_context *parent = intel_context_to_parent(ce); 5036 5037 GEM_BUG_ON(!intel_context_is_child(ce)); 5038 5039 /* Turn on preemption */ 5040 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5041 *cs++ = MI_NOOP; 5042 5043 /* Signal parent */ 5044 cs = gen8_emit_ggtt_write(cs, 5045 PARENT_GO_FINI_BREADCRUMB, 5046 get_children_join_addr(parent, 5047 ce->parallel.child_index), 5048 0); 5049 5050 /* Wait parent on for go */ 5051 *cs++ = (MI_SEMAPHORE_WAIT | 5052 MI_SEMAPHORE_GLOBAL_GTT | 5053 MI_SEMAPHORE_POLL | 5054 MI_SEMAPHORE_SAD_EQ_SDD); 5055 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5056 *cs++ = get_children_go_addr(parent); 5057 *cs++ = 0; 5058 5059 return cs; 5060 } 5061 5062 static u32 * 5063 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5064 u32 *cs) 5065 { 5066 struct intel_context *ce = rq->context; 5067 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5068 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5069 5070 GEM_BUG_ON(!intel_context_is_child(ce)); 5071 5072 if (unlikely(skip_handshake(rq))) { 5073 /* 5074 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5075 * the NON_SKIP_LEN comes from the length of the emits below. 5076 */ 5077 memset(cs, 0, sizeof(u32) * 5078 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5079 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5080 } else { 5081 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5082 } 5083 5084 /* Emit fini breadcrumb */ 5085 before_fini_breadcrumb_user_interrupt_cs = cs; 5086 cs = gen8_emit_ggtt_write(cs, 5087 rq->fence.seqno, 5088 i915_request_active_timeline(rq)->hwsp_offset, 5089 0); 5090 5091 /* User interrupt */ 5092 *cs++ = MI_USER_INTERRUPT; 5093 *cs++ = MI_NOOP; 5094 5095 /* Ensure our math for skip + emit is correct */ 5096 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5097 cs); 5098 GEM_BUG_ON(start_fini_breadcrumb_cs + 5099 ce->engine->emit_fini_breadcrumb_dw != cs); 5100 5101 rq->tail = intel_ring_offset(rq, cs); 5102 5103 return cs; 5104 } 5105 5106 #undef NON_SKIP_LEN 5107 5108 static struct intel_context * 5109 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5110 unsigned long flags) 5111 { 5112 struct guc_virtual_engine *ve; 5113 struct intel_guc *guc; 5114 unsigned int n; 5115 int err; 5116 5117 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5118 if (!ve) 5119 return ERR_PTR(-ENOMEM); 5120 5121 guc = &siblings[0]->gt->uc.guc; 5122 5123 ve->base.i915 = siblings[0]->i915; 5124 ve->base.gt = siblings[0]->gt; 5125 ve->base.uncore = siblings[0]->uncore; 5126 ve->base.id = -1; 5127 5128 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5129 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5130 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5131 ve->base.saturated = ALL_ENGINES; 5132 5133 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5134 5135 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5136 5137 ve->base.cops = &virtual_guc_context_ops; 5138 ve->base.request_alloc = guc_request_alloc; 5139 ve->base.bump_serial = virtual_guc_bump_serial; 5140 5141 ve->base.submit_request = guc_submit_request; 5142 5143 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5144 5145 intel_context_init(&ve->context, &ve->base); 5146 5147 for (n = 0; n < count; n++) { 5148 struct intel_engine_cs *sibling = siblings[n]; 5149 5150 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5151 if (sibling->mask & ve->base.mask) { 5152 DRM_DEBUG("duplicate %s entry in load balancer\n", 5153 sibling->name); 5154 err = -EINVAL; 5155 goto err_put; 5156 } 5157 5158 ve->base.mask |= sibling->mask; 5159 ve->base.logical_mask |= sibling->logical_mask; 5160 5161 if (n != 0 && ve->base.class != sibling->class) { 5162 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 5163 sibling->class, ve->base.class); 5164 err = -EINVAL; 5165 goto err_put; 5166 } else if (n == 0) { 5167 ve->base.class = sibling->class; 5168 ve->base.uabi_class = sibling->uabi_class; 5169 snprintf(ve->base.name, sizeof(ve->base.name), 5170 "v%dx%d", ve->base.class, count); 5171 ve->base.context_size = sibling->context_size; 5172 5173 ve->base.add_active_request = 5174 sibling->add_active_request; 5175 ve->base.remove_active_request = 5176 sibling->remove_active_request; 5177 ve->base.emit_bb_start = sibling->emit_bb_start; 5178 ve->base.emit_flush = sibling->emit_flush; 5179 ve->base.emit_init_breadcrumb = 5180 sibling->emit_init_breadcrumb; 5181 ve->base.emit_fini_breadcrumb = 5182 sibling->emit_fini_breadcrumb; 5183 ve->base.emit_fini_breadcrumb_dw = 5184 sibling->emit_fini_breadcrumb_dw; 5185 ve->base.breadcrumbs = 5186 intel_breadcrumbs_get(sibling->breadcrumbs); 5187 5188 ve->base.flags |= sibling->flags; 5189 5190 ve->base.props.timeslice_duration_ms = 5191 sibling->props.timeslice_duration_ms; 5192 ve->base.props.preempt_timeout_ms = 5193 sibling->props.preempt_timeout_ms; 5194 } 5195 } 5196 5197 return &ve->context; 5198 5199 err_put: 5200 intel_context_put(&ve->context); 5201 return ERR_PTR(err); 5202 } 5203 5204 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5205 { 5206 struct intel_engine_cs *engine; 5207 intel_engine_mask_t tmp, mask = ve->mask; 5208 5209 for_each_engine_masked(engine, ve->gt, mask, tmp) 5210 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5211 return true; 5212 5213 return false; 5214 } 5215 5216 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5217 #include "selftest_guc.c" 5218 #include "selftest_guc_multi_lrc.c" 5219 #endif 5220