1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_heartbeat.h" 13 #include "gt/intel_engine_pm.h" 14 #include "gt/intel_engine_regs.h" 15 #include "gt/intel_gpu_commands.h" 16 #include "gt/intel_gt.h" 17 #include "gt/intel_gt_clock_utils.h" 18 #include "gt/intel_gt_irq.h" 19 #include "gt/intel_gt_pm.h" 20 #include "gt/intel_gt_regs.h" 21 #include "gt/intel_gt_requests.h" 22 #include "gt/intel_lrc.h" 23 #include "gt/intel_lrc_reg.h" 24 #include "gt/intel_mocs.h" 25 #include "gt/intel_ring.h" 26 27 #include "intel_guc_ads.h" 28 #include "intel_guc_capture.h" 29 #include "intel_guc_submission.h" 30 31 #include "i915_drv.h" 32 #include "i915_reg.h" 33 #include "i915_trace.h" 34 35 /** 36 * DOC: GuC-based command submission 37 * 38 * The Scratch registers: 39 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 40 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 41 * triggers an interrupt on the GuC via another register write (0xC4C8). 42 * Firmware writes a success/fail code back to the action register after 43 * processes the request. The kernel driver polls waiting for this update and 44 * then proceeds. 45 * 46 * Command Transport buffers (CTBs): 47 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 48 * - G2H) are a message interface between the i915 and GuC. 49 * 50 * Context registration: 51 * Before a context can be submitted it must be registered with the GuC via a 52 * H2G. A unique guc_id is associated with each context. The context is either 53 * registered at request creation time (normal operation) or at submission time 54 * (abnormal operation, e.g. after a reset). 55 * 56 * Context submission: 57 * The i915 updates the LRC tail value in memory. The i915 must enable the 58 * scheduling of the context within the GuC for the GuC to actually consider it. 59 * Therefore, the first time a disabled context is submitted we use a schedule 60 * enable H2G, while follow up submissions are done via the context submit H2G, 61 * which informs the GuC that a previously enabled context has new work 62 * available. 63 * 64 * Context unpin: 65 * To unpin a context a H2G is used to disable scheduling. When the 66 * corresponding G2H returns indicating the scheduling disable operation has 67 * completed it is safe to unpin the context. While a disable is in flight it 68 * isn't safe to resubmit the context so a fence is used to stall all future 69 * requests of that context until the G2H is returned. 70 * 71 * Context deregistration: 72 * Before a context can be destroyed or if we steal its guc_id we must 73 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 74 * safe to submit anything to this guc_id until the deregister completes so a 75 * fence is used to stall all requests associated with this guc_id until the 76 * corresponding G2H returns indicating the guc_id has been deregistered. 77 * 78 * submission_state.guc_ids: 79 * Unique number associated with private GuC context data passed in during 80 * context registration / submission / deregistration. 64k available. Simple ida 81 * is used for allocation. 82 * 83 * Stealing guc_ids: 84 * If no guc_ids are available they can be stolen from another context at 85 * request creation time if that context is unpinned. If a guc_id can't be found 86 * we punt this problem to the user as we believe this is near impossible to hit 87 * during normal use cases. 88 * 89 * Locking: 90 * In the GuC submission code we have 3 basic spin locks which protect 91 * everything. Details about each below. 92 * 93 * sched_engine->lock 94 * This is the submission lock for all contexts that share an i915 schedule 95 * engine (sched_engine), thus only one of the contexts which share a 96 * sched_engine can be submitting at a time. Currently only one sched_engine is 97 * used for all of GuC submission but that could change in the future. 98 * 99 * guc->submission_state.lock 100 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 101 * list. 102 * 103 * ce->guc_state.lock 104 * Protects everything under ce->guc_state. Ensures that a context is in the 105 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 106 * on a disabled context (bad idea), we don't issue a schedule enable when a 107 * schedule disable is in flight, etc... Also protects list of inflight requests 108 * on the context and the priority management state. Lock is individual to each 109 * context. 110 * 111 * Lock ordering rules: 112 * sched_engine->lock -> ce->guc_state.lock 113 * guc->submission_state.lock -> ce->guc_state.lock 114 * 115 * Reset races: 116 * When a full GT reset is triggered it is assumed that some G2H responses to 117 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 118 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 119 * contexts, release guc_ids, etc...). When this occurs we can scrub the 120 * context state and cleanup appropriately, however this is quite racey. 121 * To avoid races, the reset code must disable submission before scrubbing for 122 * the missing G2H, while the submission code must check for submission being 123 * disabled and skip sending H2Gs and updating context states when it is. Both 124 * sides must also make sure to hold the relevant locks. 125 */ 126 127 /* GuC Virtual Engine */ 128 struct guc_virtual_engine { 129 struct intel_engine_cs base; 130 struct intel_context context; 131 }; 132 133 static struct intel_context * 134 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 135 unsigned long flags); 136 137 static struct intel_context * 138 guc_create_parallel(struct intel_engine_cs **engines, 139 unsigned int num_siblings, 140 unsigned int width); 141 142 #define GUC_REQUEST_SIZE 64 /* bytes */ 143 144 /* 145 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 146 * per the GuC submission interface. A different allocation algorithm is used 147 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 148 * partition the guc_id space. We believe the number of multi-lrc contexts in 149 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 150 * multi-lrc. 151 */ 152 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 153 ((guc)->submission_state.num_guc_ids / 16) 154 155 /* 156 * Below is a set of functions which control the GuC scheduling state which 157 * require a lock. 158 */ 159 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 160 #define SCHED_STATE_DESTROYED BIT(1) 161 #define SCHED_STATE_PENDING_DISABLE BIT(2) 162 #define SCHED_STATE_BANNED BIT(3) 163 #define SCHED_STATE_ENABLED BIT(4) 164 #define SCHED_STATE_PENDING_ENABLE BIT(5) 165 #define SCHED_STATE_REGISTERED BIT(6) 166 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 167 #define SCHED_STATE_BLOCKED_SHIFT 8 168 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 169 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 170 171 static inline void init_sched_state(struct intel_context *ce) 172 { 173 lockdep_assert_held(&ce->guc_state.lock); 174 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 175 } 176 177 __maybe_unused 178 static bool sched_state_is_init(struct intel_context *ce) 179 { 180 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ 181 return !(ce->guc_state.sched_state & 182 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 183 } 184 185 static inline bool 186 context_wait_for_deregister_to_register(struct intel_context *ce) 187 { 188 return ce->guc_state.sched_state & 189 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 190 } 191 192 static inline void 193 set_context_wait_for_deregister_to_register(struct intel_context *ce) 194 { 195 lockdep_assert_held(&ce->guc_state.lock); 196 ce->guc_state.sched_state |= 197 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 198 } 199 200 static inline void 201 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 202 { 203 lockdep_assert_held(&ce->guc_state.lock); 204 ce->guc_state.sched_state &= 205 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 206 } 207 208 static inline bool 209 context_destroyed(struct intel_context *ce) 210 { 211 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 212 } 213 214 static inline void 215 set_context_destroyed(struct intel_context *ce) 216 { 217 lockdep_assert_held(&ce->guc_state.lock); 218 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 219 } 220 221 static inline bool context_pending_disable(struct intel_context *ce) 222 { 223 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 224 } 225 226 static inline void set_context_pending_disable(struct intel_context *ce) 227 { 228 lockdep_assert_held(&ce->guc_state.lock); 229 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 230 } 231 232 static inline void clr_context_pending_disable(struct intel_context *ce) 233 { 234 lockdep_assert_held(&ce->guc_state.lock); 235 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 236 } 237 238 static inline bool context_banned(struct intel_context *ce) 239 { 240 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 241 } 242 243 static inline void set_context_banned(struct intel_context *ce) 244 { 245 lockdep_assert_held(&ce->guc_state.lock); 246 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 247 } 248 249 static inline void clr_context_banned(struct intel_context *ce) 250 { 251 lockdep_assert_held(&ce->guc_state.lock); 252 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 253 } 254 255 static inline bool context_enabled(struct intel_context *ce) 256 { 257 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 258 } 259 260 static inline void set_context_enabled(struct intel_context *ce) 261 { 262 lockdep_assert_held(&ce->guc_state.lock); 263 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 264 } 265 266 static inline void clr_context_enabled(struct intel_context *ce) 267 { 268 lockdep_assert_held(&ce->guc_state.lock); 269 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 270 } 271 272 static inline bool context_pending_enable(struct intel_context *ce) 273 { 274 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 275 } 276 277 static inline void set_context_pending_enable(struct intel_context *ce) 278 { 279 lockdep_assert_held(&ce->guc_state.lock); 280 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 281 } 282 283 static inline void clr_context_pending_enable(struct intel_context *ce) 284 { 285 lockdep_assert_held(&ce->guc_state.lock); 286 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 287 } 288 289 static inline bool context_registered(struct intel_context *ce) 290 { 291 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 292 } 293 294 static inline void set_context_registered(struct intel_context *ce) 295 { 296 lockdep_assert_held(&ce->guc_state.lock); 297 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 298 } 299 300 static inline void clr_context_registered(struct intel_context *ce) 301 { 302 lockdep_assert_held(&ce->guc_state.lock); 303 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 304 } 305 306 static inline bool context_policy_required(struct intel_context *ce) 307 { 308 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 309 } 310 311 static inline void set_context_policy_required(struct intel_context *ce) 312 { 313 lockdep_assert_held(&ce->guc_state.lock); 314 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 315 } 316 317 static inline void clr_context_policy_required(struct intel_context *ce) 318 { 319 lockdep_assert_held(&ce->guc_state.lock); 320 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 321 } 322 323 static inline u32 context_blocked(struct intel_context *ce) 324 { 325 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 326 SCHED_STATE_BLOCKED_SHIFT; 327 } 328 329 static inline void incr_context_blocked(struct intel_context *ce) 330 { 331 lockdep_assert_held(&ce->guc_state.lock); 332 333 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 334 335 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 336 } 337 338 static inline void decr_context_blocked(struct intel_context *ce) 339 { 340 lockdep_assert_held(&ce->guc_state.lock); 341 342 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 343 344 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 345 } 346 347 static inline bool context_has_committed_requests(struct intel_context *ce) 348 { 349 return !!ce->guc_state.number_committed_requests; 350 } 351 352 static inline void incr_context_committed_requests(struct intel_context *ce) 353 { 354 lockdep_assert_held(&ce->guc_state.lock); 355 ++ce->guc_state.number_committed_requests; 356 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 357 } 358 359 static inline void decr_context_committed_requests(struct intel_context *ce) 360 { 361 lockdep_assert_held(&ce->guc_state.lock); 362 --ce->guc_state.number_committed_requests; 363 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 364 } 365 366 static struct intel_context * 367 request_to_scheduling_context(struct i915_request *rq) 368 { 369 return intel_context_to_parent(rq->context); 370 } 371 372 static inline bool context_guc_id_invalid(struct intel_context *ce) 373 { 374 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 375 } 376 377 static inline void set_context_guc_id_invalid(struct intel_context *ce) 378 { 379 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 380 } 381 382 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 383 { 384 return &ce->engine->gt->uc.guc; 385 } 386 387 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 388 { 389 return rb_entry(rb, struct i915_priolist, node); 390 } 391 392 /* 393 * When using multi-lrc submission a scratch memory area is reserved in the 394 * parent's context state for the process descriptor, work queue, and handshake 395 * between the parent + children contexts to insert safe preemption points 396 * between each of the BBs. Currently the scratch area is sized to a page. 397 * 398 * The layout of this scratch area is below: 399 * 0 guc_process_desc 400 * + sizeof(struct guc_process_desc) child go 401 * + CACHELINE_BYTES child join[0] 402 * ... 403 * + CACHELINE_BYTES child join[n - 1] 404 * ... unused 405 * PARENT_SCRATCH_SIZE / 2 work queue start 406 * ... work queue 407 * PARENT_SCRATCH_SIZE - 1 work queue end 408 */ 409 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 410 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 411 412 struct sync_semaphore { 413 u32 semaphore; 414 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 415 }; 416 417 struct parent_scratch { 418 union guc_descs { 419 struct guc_sched_wq_desc wq_desc; 420 struct guc_process_desc_v69 pdesc; 421 } descs; 422 423 struct sync_semaphore go; 424 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 425 426 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 427 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 428 429 u32 wq[WQ_SIZE / sizeof(u32)]; 430 }; 431 432 static u32 __get_parent_scratch_offset(struct intel_context *ce) 433 { 434 GEM_BUG_ON(!ce->parallel.guc.parent_page); 435 436 return ce->parallel.guc.parent_page * PAGE_SIZE; 437 } 438 439 static u32 __get_wq_offset(struct intel_context *ce) 440 { 441 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 442 443 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 444 } 445 446 static struct parent_scratch * 447 __get_parent_scratch(struct intel_context *ce) 448 { 449 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 450 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 451 452 /* 453 * Need to subtract LRC_STATE_OFFSET here as the 454 * parallel.guc.parent_page is the offset into ce->state while 455 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 456 */ 457 return (struct parent_scratch *) 458 (ce->lrc_reg_state + 459 ((__get_parent_scratch_offset(ce) - 460 LRC_STATE_OFFSET) / sizeof(u32))); 461 } 462 463 static struct guc_process_desc_v69 * 464 __get_process_desc_v69(struct intel_context *ce) 465 { 466 struct parent_scratch *ps = __get_parent_scratch(ce); 467 468 return &ps->descs.pdesc; 469 } 470 471 static struct guc_sched_wq_desc * 472 __get_wq_desc_v70(struct intel_context *ce) 473 { 474 struct parent_scratch *ps = __get_parent_scratch(ce); 475 476 return &ps->descs.wq_desc; 477 } 478 479 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 480 { 481 /* 482 * Check for space in work queue. Caching a value of head pointer in 483 * intel_context structure in order reduce the number accesses to shared 484 * GPU memory which may be across a PCIe bus. 485 */ 486 #define AVAILABLE_SPACE \ 487 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 488 if (wqi_size > AVAILABLE_SPACE) { 489 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 490 491 if (wqi_size > AVAILABLE_SPACE) 492 return NULL; 493 } 494 #undef AVAILABLE_SPACE 495 496 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 497 } 498 499 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 500 { 501 struct intel_context *ce = xa_load(&guc->context_lookup, id); 502 503 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 504 505 return ce; 506 } 507 508 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 509 { 510 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 511 512 if (!base) 513 return NULL; 514 515 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 516 517 return &base[index]; 518 } 519 520 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 521 { 522 u32 size; 523 int ret; 524 525 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 526 GUC_MAX_CONTEXT_ID); 527 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 528 (void **)&guc->lrc_desc_pool_vaddr_v69); 529 if (ret) 530 return ret; 531 532 return 0; 533 } 534 535 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 536 { 537 if (!guc->lrc_desc_pool_vaddr_v69) 538 return; 539 540 guc->lrc_desc_pool_vaddr_v69 = NULL; 541 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 542 } 543 544 static inline bool guc_submission_initialized(struct intel_guc *guc) 545 { 546 return guc->submission_initialized; 547 } 548 549 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 550 { 551 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 552 553 if (desc) 554 memset(desc, 0, sizeof(*desc)); 555 } 556 557 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 558 { 559 return __get_context(guc, id); 560 } 561 562 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 563 struct intel_context *ce) 564 { 565 unsigned long flags; 566 567 /* 568 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 569 * lower level functions directly. 570 */ 571 xa_lock_irqsave(&guc->context_lookup, flags); 572 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 573 xa_unlock_irqrestore(&guc->context_lookup, flags); 574 } 575 576 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 577 { 578 unsigned long flags; 579 580 if (unlikely(!guc_submission_initialized(guc))) 581 return; 582 583 _reset_lrc_desc_v69(guc, id); 584 585 /* 586 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 587 * the lower level functions directly. 588 */ 589 xa_lock_irqsave(&guc->context_lookup, flags); 590 __xa_erase(&guc->context_lookup, id); 591 xa_unlock_irqrestore(&guc->context_lookup, flags); 592 } 593 594 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 595 { 596 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 597 wake_up_all(&guc->ct.wq); 598 } 599 600 static int guc_submission_send_busy_loop(struct intel_guc *guc, 601 const u32 *action, 602 u32 len, 603 u32 g2h_len_dw, 604 bool loop) 605 { 606 /* 607 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 608 * so we don't handle the case where we don't get a reply because we 609 * aborted the send due to the channel being busy. 610 */ 611 GEM_BUG_ON(g2h_len_dw && !loop); 612 613 if (g2h_len_dw) 614 atomic_inc(&guc->outstanding_submission_g2h); 615 616 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 617 } 618 619 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 620 atomic_t *wait_var, 621 bool interruptible, 622 long timeout) 623 { 624 const int state = interruptible ? 625 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 626 DEFINE_WAIT(wait); 627 628 might_sleep(); 629 GEM_BUG_ON(timeout < 0); 630 631 if (!atomic_read(wait_var)) 632 return 0; 633 634 if (!timeout) 635 return -ETIME; 636 637 for (;;) { 638 prepare_to_wait(&guc->ct.wq, &wait, state); 639 640 if (!atomic_read(wait_var)) 641 break; 642 643 if (signal_pending_state(state, current)) { 644 timeout = -EINTR; 645 break; 646 } 647 648 if (!timeout) { 649 timeout = -ETIME; 650 break; 651 } 652 653 timeout = io_schedule_timeout(timeout); 654 } 655 finish_wait(&guc->ct.wq, &wait); 656 657 return (timeout < 0) ? timeout : 0; 658 } 659 660 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 661 { 662 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 663 return 0; 664 665 return intel_guc_wait_for_pending_msg(guc, 666 &guc->outstanding_submission_g2h, 667 true, timeout); 668 } 669 670 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 671 static int try_context_registration(struct intel_context *ce, bool loop); 672 673 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 674 { 675 int err = 0; 676 struct intel_context *ce = request_to_scheduling_context(rq); 677 u32 action[3]; 678 int len = 0; 679 u32 g2h_len_dw = 0; 680 bool enabled; 681 682 lockdep_assert_held(&rq->engine->sched_engine->lock); 683 684 /* 685 * Corner case where requests were sitting in the priority list or a 686 * request resubmitted after the context was banned. 687 */ 688 if (unlikely(intel_context_is_banned(ce))) { 689 i915_request_put(i915_request_mark_eio(rq)); 690 intel_engine_signal_breadcrumbs(ce->engine); 691 return 0; 692 } 693 694 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 695 GEM_BUG_ON(context_guc_id_invalid(ce)); 696 697 if (context_policy_required(ce)) { 698 err = guc_context_policy_init_v70(ce, false); 699 if (err) 700 return err; 701 } 702 703 spin_lock(&ce->guc_state.lock); 704 705 /* 706 * The request / context will be run on the hardware when scheduling 707 * gets enabled in the unblock. For multi-lrc we still submit the 708 * context to move the LRC tails. 709 */ 710 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 711 goto out; 712 713 enabled = context_enabled(ce) || context_blocked(ce); 714 715 if (!enabled) { 716 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 717 action[len++] = ce->guc_id.id; 718 action[len++] = GUC_CONTEXT_ENABLE; 719 set_context_pending_enable(ce); 720 intel_context_get(ce); 721 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 722 } else { 723 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 724 action[len++] = ce->guc_id.id; 725 } 726 727 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 728 if (!enabled && !err) { 729 trace_intel_context_sched_enable(ce); 730 atomic_inc(&guc->outstanding_submission_g2h); 731 set_context_enabled(ce); 732 733 /* 734 * Without multi-lrc KMD does the submission step (moving the 735 * lrc tail) so enabling scheduling is sufficient to submit the 736 * context. This isn't the case in multi-lrc submission as the 737 * GuC needs to move the tails, hence the need for another H2G 738 * to submit a multi-lrc context after enabling scheduling. 739 */ 740 if (intel_context_is_parent(ce)) { 741 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 742 err = intel_guc_send_nb(guc, action, len - 1, 0); 743 } 744 } else if (!enabled) { 745 clr_context_pending_enable(ce); 746 intel_context_put(ce); 747 } 748 if (likely(!err)) 749 trace_i915_request_guc_submit(rq); 750 751 out: 752 spin_unlock(&ce->guc_state.lock); 753 return err; 754 } 755 756 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 757 { 758 int ret = __guc_add_request(guc, rq); 759 760 if (unlikely(ret == -EBUSY)) { 761 guc->stalled_request = rq; 762 guc->submission_stall_reason = STALL_ADD_REQUEST; 763 } 764 765 return ret; 766 } 767 768 static inline void guc_set_lrc_tail(struct i915_request *rq) 769 { 770 rq->context->lrc_reg_state[CTX_RING_TAIL] = 771 intel_ring_set_tail(rq->ring, rq->tail); 772 } 773 774 static inline int rq_prio(const struct i915_request *rq) 775 { 776 return rq->sched.attr.priority; 777 } 778 779 static bool is_multi_lrc_rq(struct i915_request *rq) 780 { 781 return intel_context_is_parallel(rq->context); 782 } 783 784 static bool can_merge_rq(struct i915_request *rq, 785 struct i915_request *last) 786 { 787 return request_to_scheduling_context(rq) == 788 request_to_scheduling_context(last); 789 } 790 791 static u32 wq_space_until_wrap(struct intel_context *ce) 792 { 793 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 794 } 795 796 static void write_wqi(struct intel_context *ce, u32 wqi_size) 797 { 798 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 799 800 /* 801 * Ensure WQI are visible before updating tail 802 */ 803 intel_guc_write_barrier(ce_to_guc(ce)); 804 805 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 806 (WQ_SIZE - 1); 807 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 808 } 809 810 static int guc_wq_noop_append(struct intel_context *ce) 811 { 812 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 813 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 814 815 if (!wqi) 816 return -EBUSY; 817 818 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 819 820 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 821 FIELD_PREP(WQ_LEN_MASK, len_dw); 822 ce->parallel.guc.wqi_tail = 0; 823 824 return 0; 825 } 826 827 static int __guc_wq_item_append(struct i915_request *rq) 828 { 829 struct intel_context *ce = request_to_scheduling_context(rq); 830 struct intel_context *child; 831 unsigned int wqi_size = (ce->parallel.number_children + 4) * 832 sizeof(u32); 833 u32 *wqi; 834 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 835 int ret; 836 837 /* Ensure context is in correct state updating work queue */ 838 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 839 GEM_BUG_ON(context_guc_id_invalid(ce)); 840 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 841 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 842 843 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 844 if (wqi_size > wq_space_until_wrap(ce)) { 845 ret = guc_wq_noop_append(ce); 846 if (ret) 847 return ret; 848 } 849 850 wqi = get_wq_pointer(ce, wqi_size); 851 if (!wqi) 852 return -EBUSY; 853 854 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 855 856 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 857 FIELD_PREP(WQ_LEN_MASK, len_dw); 858 *wqi++ = ce->lrc.lrca; 859 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 860 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 861 *wqi++ = 0; /* fence_id */ 862 for_each_child(ce, child) 863 *wqi++ = child->ring->tail / sizeof(u64); 864 865 write_wqi(ce, wqi_size); 866 867 return 0; 868 } 869 870 static int guc_wq_item_append(struct intel_guc *guc, 871 struct i915_request *rq) 872 { 873 struct intel_context *ce = request_to_scheduling_context(rq); 874 int ret = 0; 875 876 if (likely(!intel_context_is_banned(ce))) { 877 ret = __guc_wq_item_append(rq); 878 879 if (unlikely(ret == -EBUSY)) { 880 guc->stalled_request = rq; 881 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 882 } 883 } 884 885 return ret; 886 } 887 888 static bool multi_lrc_submit(struct i915_request *rq) 889 { 890 struct intel_context *ce = request_to_scheduling_context(rq); 891 892 intel_ring_set_tail(rq->ring, rq->tail); 893 894 /* 895 * We expect the front end (execbuf IOCTL) to set this flag on the last 896 * request generated from a multi-BB submission. This indicates to the 897 * backend (GuC interface) that we should submit this context thus 898 * submitting all the requests generated in parallel. 899 */ 900 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 901 intel_context_is_banned(ce); 902 } 903 904 static int guc_dequeue_one_context(struct intel_guc *guc) 905 { 906 struct i915_sched_engine * const sched_engine = guc->sched_engine; 907 struct i915_request *last = NULL; 908 bool submit = false; 909 struct rb_node *rb; 910 int ret; 911 912 lockdep_assert_held(&sched_engine->lock); 913 914 if (guc->stalled_request) { 915 submit = true; 916 last = guc->stalled_request; 917 918 switch (guc->submission_stall_reason) { 919 case STALL_REGISTER_CONTEXT: 920 goto register_context; 921 case STALL_MOVE_LRC_TAIL: 922 goto move_lrc_tail; 923 case STALL_ADD_REQUEST: 924 goto add_request; 925 default: 926 MISSING_CASE(guc->submission_stall_reason); 927 } 928 } 929 930 while ((rb = rb_first_cached(&sched_engine->queue))) { 931 struct i915_priolist *p = to_priolist(rb); 932 struct i915_request *rq, *rn; 933 934 priolist_for_each_request_consume(rq, rn, p) { 935 if (last && !can_merge_rq(rq, last)) 936 goto register_context; 937 938 list_del_init(&rq->sched.link); 939 940 __i915_request_submit(rq); 941 942 trace_i915_request_in(rq, 0); 943 last = rq; 944 945 if (is_multi_lrc_rq(rq)) { 946 /* 947 * We need to coalesce all multi-lrc requests in 948 * a relationship into a single H2G. We are 949 * guaranteed that all of these requests will be 950 * submitted sequentially. 951 */ 952 if (multi_lrc_submit(rq)) { 953 submit = true; 954 goto register_context; 955 } 956 } else { 957 submit = true; 958 } 959 } 960 961 rb_erase_cached(&p->node, &sched_engine->queue); 962 i915_priolist_free(p); 963 } 964 965 register_context: 966 if (submit) { 967 struct intel_context *ce = request_to_scheduling_context(last); 968 969 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 970 !intel_context_is_banned(ce))) { 971 ret = try_context_registration(ce, false); 972 if (unlikely(ret == -EPIPE)) { 973 goto deadlk; 974 } else if (ret == -EBUSY) { 975 guc->stalled_request = last; 976 guc->submission_stall_reason = 977 STALL_REGISTER_CONTEXT; 978 goto schedule_tasklet; 979 } else if (ret != 0) { 980 GEM_WARN_ON(ret); /* Unexpected */ 981 goto deadlk; 982 } 983 } 984 985 move_lrc_tail: 986 if (is_multi_lrc_rq(last)) { 987 ret = guc_wq_item_append(guc, last); 988 if (ret == -EBUSY) { 989 goto schedule_tasklet; 990 } else if (ret != 0) { 991 GEM_WARN_ON(ret); /* Unexpected */ 992 goto deadlk; 993 } 994 } else { 995 guc_set_lrc_tail(last); 996 } 997 998 add_request: 999 ret = guc_add_request(guc, last); 1000 if (unlikely(ret == -EPIPE)) { 1001 goto deadlk; 1002 } else if (ret == -EBUSY) { 1003 goto schedule_tasklet; 1004 } else if (ret != 0) { 1005 GEM_WARN_ON(ret); /* Unexpected */ 1006 goto deadlk; 1007 } 1008 } 1009 1010 guc->stalled_request = NULL; 1011 guc->submission_stall_reason = STALL_NONE; 1012 return submit; 1013 1014 deadlk: 1015 sched_engine->tasklet.callback = NULL; 1016 tasklet_disable_nosync(&sched_engine->tasklet); 1017 return false; 1018 1019 schedule_tasklet: 1020 tasklet_schedule(&sched_engine->tasklet); 1021 return false; 1022 } 1023 1024 static void guc_submission_tasklet(struct tasklet_struct *t) 1025 { 1026 struct i915_sched_engine *sched_engine = 1027 from_tasklet(sched_engine, t, tasklet); 1028 unsigned long flags; 1029 bool loop; 1030 1031 spin_lock_irqsave(&sched_engine->lock, flags); 1032 1033 do { 1034 loop = guc_dequeue_one_context(sched_engine->private_data); 1035 } while (loop); 1036 1037 i915_sched_engine_reset_on_empty(sched_engine); 1038 1039 spin_unlock_irqrestore(&sched_engine->lock, flags); 1040 } 1041 1042 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1043 { 1044 if (iir & GT_RENDER_USER_INTERRUPT) 1045 intel_engine_signal_breadcrumbs(engine); 1046 } 1047 1048 static void __guc_context_destroy(struct intel_context *ce); 1049 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1050 static void guc_signal_context_fence(struct intel_context *ce); 1051 static void guc_cancel_context_requests(struct intel_context *ce); 1052 static void guc_blocked_fence_complete(struct intel_context *ce); 1053 1054 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1055 { 1056 struct intel_context *ce; 1057 unsigned long index, flags; 1058 bool pending_disable, pending_enable, deregister, destroyed, banned; 1059 1060 xa_lock_irqsave(&guc->context_lookup, flags); 1061 xa_for_each(&guc->context_lookup, index, ce) { 1062 /* 1063 * Corner case where the ref count on the object is zero but and 1064 * deregister G2H was lost. In this case we don't touch the ref 1065 * count and finish the destroy of the context. 1066 */ 1067 bool do_put = kref_get_unless_zero(&ce->ref); 1068 1069 xa_unlock(&guc->context_lookup); 1070 1071 spin_lock(&ce->guc_state.lock); 1072 1073 /* 1074 * Once we are at this point submission_disabled() is guaranteed 1075 * to be visible to all callers who set the below flags (see above 1076 * flush and flushes in reset_prepare). If submission_disabled() 1077 * is set, the caller shouldn't set these flags. 1078 */ 1079 1080 destroyed = context_destroyed(ce); 1081 pending_enable = context_pending_enable(ce); 1082 pending_disable = context_pending_disable(ce); 1083 deregister = context_wait_for_deregister_to_register(ce); 1084 banned = context_banned(ce); 1085 init_sched_state(ce); 1086 1087 spin_unlock(&ce->guc_state.lock); 1088 1089 if (pending_enable || destroyed || deregister) { 1090 decr_outstanding_submission_g2h(guc); 1091 if (deregister) 1092 guc_signal_context_fence(ce); 1093 if (destroyed) { 1094 intel_gt_pm_put_async(guc_to_gt(guc)); 1095 release_guc_id(guc, ce); 1096 __guc_context_destroy(ce); 1097 } 1098 if (pending_enable || deregister) 1099 intel_context_put(ce); 1100 } 1101 1102 /* Not mutualy exclusive with above if statement. */ 1103 if (pending_disable) { 1104 guc_signal_context_fence(ce); 1105 if (banned) { 1106 guc_cancel_context_requests(ce); 1107 intel_engine_signal_breadcrumbs(ce->engine); 1108 } 1109 intel_context_sched_disable_unpin(ce); 1110 decr_outstanding_submission_g2h(guc); 1111 1112 spin_lock(&ce->guc_state.lock); 1113 guc_blocked_fence_complete(ce); 1114 spin_unlock(&ce->guc_state.lock); 1115 1116 intel_context_put(ce); 1117 } 1118 1119 if (do_put) 1120 intel_context_put(ce); 1121 xa_lock(&guc->context_lookup); 1122 } 1123 xa_unlock_irqrestore(&guc->context_lookup, flags); 1124 } 1125 1126 /* 1127 * GuC stores busyness stats for each engine at context in/out boundaries. A 1128 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1129 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1130 * GuC. 1131 * 1132 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1133 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1134 * active. For an active engine total busyness = total + (now - start), where 1135 * 'now' is the time at which the busyness is sampled. For inactive engine, 1136 * total busyness = total. 1137 * 1138 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1139 * 1140 * The start and total values provided by GuC are 32 bits and wrap around in a 1141 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1142 * increasing ns values, there is a need for this implementation to account for 1143 * overflows and extend the GuC provided values to 64 bits before returning 1144 * busyness to the user. In order to do that, a worker runs periodically at 1145 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1146 * 27 seconds for a gt clock frequency of 19.2 MHz). 1147 */ 1148 1149 #define WRAP_TIME_CLKS U32_MAX 1150 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1151 1152 static void 1153 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1154 { 1155 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1156 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1157 1158 if (new_start == lower_32_bits(*prev_start)) 1159 return; 1160 1161 /* 1162 * When gt is unparked, we update the gt timestamp and start the ping 1163 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1164 * is unparked, all switched in contexts will have a start time that is 1165 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1166 * 1167 * If neither gt_stamp nor new_start has rolled over, then the 1168 * gt_stamp_hi does not need to be adjusted, however if one of them has 1169 * rolled over, we need to adjust gt_stamp_hi accordingly. 1170 * 1171 * The below conditions address the cases of new_start rollover and 1172 * gt_stamp_last rollover respectively. 1173 */ 1174 if (new_start < gt_stamp_last && 1175 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1176 gt_stamp_hi++; 1177 1178 if (new_start > gt_stamp_last && 1179 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1180 gt_stamp_hi--; 1181 1182 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1183 } 1184 1185 #define record_read(map_, field_) \ 1186 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1187 1188 /* 1189 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1190 * we run into a race where the value read is inconsistent. Sometimes the 1191 * inconsistency is in reading the upper MSB bytes of the last_in value when 1192 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1193 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1194 * determine validity of these values. Instead we read the values multiple times 1195 * until they are consistent. In test runs, 3 attempts results in consistent 1196 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1197 * any new occurences. 1198 */ 1199 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1200 u32 *last_in, u32 *id, u32 *total) 1201 { 1202 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1203 int i = 0; 1204 1205 do { 1206 *last_in = record_read(&rec_map, last_switch_in_stamp); 1207 *id = record_read(&rec_map, current_context_index); 1208 *total = record_read(&rec_map, total_runtime); 1209 1210 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1211 record_read(&rec_map, current_context_index) == *id && 1212 record_read(&rec_map, total_runtime) == *total) 1213 break; 1214 } while (++i < 6); 1215 } 1216 1217 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1218 { 1219 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1220 struct intel_guc *guc = &engine->gt->uc.guc; 1221 u32 last_switch, ctx_id, total; 1222 1223 lockdep_assert_held(&guc->timestamp.lock); 1224 1225 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1226 1227 stats->running = ctx_id != ~0U && last_switch; 1228 if (stats->running) 1229 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1230 1231 /* 1232 * Instead of adjusting the total for overflow, just add the 1233 * difference from previous sample stats->total_gt_clks 1234 */ 1235 if (total && total != ~0U) { 1236 stats->total_gt_clks += (u32)(total - stats->prev_total); 1237 stats->prev_total = total; 1238 } 1239 } 1240 1241 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1242 { 1243 intel_wakeref_t wakeref; 1244 u32 reg, shift; 1245 1246 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1247 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1248 1249 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1250 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1251 1252 return 3 - shift; 1253 } 1254 1255 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1256 { 1257 struct intel_gt *gt = guc_to_gt(guc); 1258 u32 gt_stamp_lo, gt_stamp_hi; 1259 u64 gpm_ts; 1260 1261 lockdep_assert_held(&guc->timestamp.lock); 1262 1263 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1264 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1265 MISC_STATUS1) >> guc->timestamp.shift; 1266 gt_stamp_lo = lower_32_bits(gpm_ts); 1267 *now = ktime_get(); 1268 1269 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1270 gt_stamp_hi++; 1271 1272 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1273 } 1274 1275 /* 1276 * Unlike the execlist mode of submission total and active times are in terms of 1277 * gt clocks. The *now parameter is retained to return the cpu time at which the 1278 * busyness was sampled. 1279 */ 1280 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1281 { 1282 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1283 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1284 struct intel_gt *gt = engine->gt; 1285 struct intel_guc *guc = >->uc.guc; 1286 u64 total, gt_stamp_saved; 1287 unsigned long flags; 1288 u32 reset_count; 1289 bool in_reset; 1290 1291 spin_lock_irqsave(&guc->timestamp.lock, flags); 1292 1293 /* 1294 * If a reset happened, we risk reading partially updated engine 1295 * busyness from GuC, so we just use the driver stored copy of busyness. 1296 * Synchronize with gt reset using reset_count and the 1297 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1298 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1299 * usable by checking the flag afterwards. 1300 */ 1301 reset_count = i915_reset_count(gpu_error); 1302 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1303 1304 *now = ktime_get(); 1305 1306 /* 1307 * The active busyness depends on start_gt_clk and gt_stamp. 1308 * gt_stamp is updated by i915 only when gt is awake and the 1309 * start_gt_clk is derived from GuC state. To get a consistent 1310 * view of activity, we query the GuC state only if gt is awake. 1311 */ 1312 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1313 stats_saved = *stats; 1314 gt_stamp_saved = guc->timestamp.gt_stamp; 1315 /* 1316 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1317 * start_gt_clk' calculation below for active engines. 1318 */ 1319 guc_update_engine_gt_clks(engine); 1320 guc_update_pm_timestamp(guc, now); 1321 intel_gt_pm_put_async(gt); 1322 if (i915_reset_count(gpu_error) != reset_count) { 1323 *stats = stats_saved; 1324 guc->timestamp.gt_stamp = gt_stamp_saved; 1325 } 1326 } 1327 1328 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1329 if (stats->running) { 1330 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1331 1332 total += intel_gt_clock_interval_to_ns(gt, clk); 1333 } 1334 1335 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1336 1337 return ns_to_ktime(total); 1338 } 1339 1340 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1341 { 1342 struct intel_gt *gt = guc_to_gt(guc); 1343 struct intel_engine_cs *engine; 1344 enum intel_engine_id id; 1345 unsigned long flags; 1346 ktime_t unused; 1347 1348 cancel_delayed_work_sync(&guc->timestamp.work); 1349 1350 spin_lock_irqsave(&guc->timestamp.lock, flags); 1351 1352 guc_update_pm_timestamp(guc, &unused); 1353 for_each_engine(engine, gt, id) { 1354 guc_update_engine_gt_clks(engine); 1355 engine->stats.guc.prev_total = 0; 1356 } 1357 1358 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1359 } 1360 1361 static void __update_guc_busyness_stats(struct intel_guc *guc) 1362 { 1363 struct intel_gt *gt = guc_to_gt(guc); 1364 struct intel_engine_cs *engine; 1365 enum intel_engine_id id; 1366 unsigned long flags; 1367 ktime_t unused; 1368 1369 guc->timestamp.last_stat_jiffies = jiffies; 1370 1371 spin_lock_irqsave(&guc->timestamp.lock, flags); 1372 1373 guc_update_pm_timestamp(guc, &unused); 1374 for_each_engine(engine, gt, id) 1375 guc_update_engine_gt_clks(engine); 1376 1377 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1378 } 1379 1380 static void guc_timestamp_ping(struct work_struct *wrk) 1381 { 1382 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1383 timestamp.work.work); 1384 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1385 struct intel_gt *gt = guc_to_gt(guc); 1386 intel_wakeref_t wakeref; 1387 int srcu, ret; 1388 1389 /* 1390 * Synchronize with gt reset to make sure the worker does not 1391 * corrupt the engine/guc stats. 1392 */ 1393 ret = intel_gt_reset_trylock(gt, &srcu); 1394 if (ret) 1395 return; 1396 1397 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1398 __update_guc_busyness_stats(guc); 1399 1400 intel_gt_reset_unlock(gt, srcu); 1401 1402 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1403 guc->timestamp.ping_delay); 1404 } 1405 1406 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1407 { 1408 u32 offset = intel_guc_engine_usage_offset(guc); 1409 u32 action[] = { 1410 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1411 offset, 1412 0, 1413 }; 1414 1415 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1416 } 1417 1418 static void guc_init_engine_stats(struct intel_guc *guc) 1419 { 1420 struct intel_gt *gt = guc_to_gt(guc); 1421 intel_wakeref_t wakeref; 1422 1423 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1424 guc->timestamp.ping_delay); 1425 1426 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1427 int ret = guc_action_enable_usage_stats(guc); 1428 1429 if (ret) 1430 drm_err(>->i915->drm, 1431 "Failed to enable usage stats: %d!\n", ret); 1432 } 1433 } 1434 1435 void intel_guc_busyness_park(struct intel_gt *gt) 1436 { 1437 struct intel_guc *guc = >->uc.guc; 1438 1439 if (!guc_submission_initialized(guc)) 1440 return; 1441 1442 cancel_delayed_work(&guc->timestamp.work); 1443 1444 /* 1445 * Before parking, we should sample engine busyness stats if we need to. 1446 * We can skip it if we are less than half a ping from the last time we 1447 * sampled the busyness stats. 1448 */ 1449 if (guc->timestamp.last_stat_jiffies && 1450 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1451 (guc->timestamp.ping_delay / 2))) 1452 return; 1453 1454 __update_guc_busyness_stats(guc); 1455 } 1456 1457 void intel_guc_busyness_unpark(struct intel_gt *gt) 1458 { 1459 struct intel_guc *guc = >->uc.guc; 1460 unsigned long flags; 1461 ktime_t unused; 1462 1463 if (!guc_submission_initialized(guc)) 1464 return; 1465 1466 spin_lock_irqsave(&guc->timestamp.lock, flags); 1467 guc_update_pm_timestamp(guc, &unused); 1468 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1469 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1470 guc->timestamp.ping_delay); 1471 } 1472 1473 static inline bool 1474 submission_disabled(struct intel_guc *guc) 1475 { 1476 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1477 1478 return unlikely(!sched_engine || 1479 !__tasklet_is_enabled(&sched_engine->tasklet) || 1480 intel_gt_is_wedged(guc_to_gt(guc))); 1481 } 1482 1483 static void disable_submission(struct intel_guc *guc) 1484 { 1485 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1486 1487 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1488 GEM_BUG_ON(!guc->ct.enabled); 1489 __tasklet_disable_sync_once(&sched_engine->tasklet); 1490 sched_engine->tasklet.callback = NULL; 1491 } 1492 } 1493 1494 static void enable_submission(struct intel_guc *guc) 1495 { 1496 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1497 unsigned long flags; 1498 1499 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1500 sched_engine->tasklet.callback = guc_submission_tasklet; 1501 wmb(); /* Make sure callback visible */ 1502 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1503 __tasklet_enable(&sched_engine->tasklet)) { 1504 GEM_BUG_ON(!guc->ct.enabled); 1505 1506 /* And kick in case we missed a new request submission. */ 1507 tasklet_hi_schedule(&sched_engine->tasklet); 1508 } 1509 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1510 } 1511 1512 static void guc_flush_submissions(struct intel_guc *guc) 1513 { 1514 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1515 unsigned long flags; 1516 1517 spin_lock_irqsave(&sched_engine->lock, flags); 1518 spin_unlock_irqrestore(&sched_engine->lock, flags); 1519 } 1520 1521 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1522 1523 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1524 { 1525 if (unlikely(!guc_submission_initialized(guc))) { 1526 /* Reset called during driver load? GuC not yet initialised! */ 1527 return; 1528 } 1529 1530 intel_gt_park_heartbeats(guc_to_gt(guc)); 1531 disable_submission(guc); 1532 guc->interrupts.disable(guc); 1533 __reset_guc_busyness_stats(guc); 1534 1535 /* Flush IRQ handler */ 1536 spin_lock_irq(&guc_to_gt(guc)->irq_lock); 1537 spin_unlock_irq(&guc_to_gt(guc)->irq_lock); 1538 1539 guc_flush_submissions(guc); 1540 guc_flush_destroyed_contexts(guc); 1541 flush_work(&guc->ct.requests.worker); 1542 1543 scrub_guc_desc_for_outstanding_g2h(guc); 1544 } 1545 1546 static struct intel_engine_cs * 1547 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1548 { 1549 struct intel_engine_cs *engine; 1550 intel_engine_mask_t tmp, mask = ve->mask; 1551 unsigned int num_siblings = 0; 1552 1553 for_each_engine_masked(engine, ve->gt, mask, tmp) 1554 if (num_siblings++ == sibling) 1555 return engine; 1556 1557 return NULL; 1558 } 1559 1560 static inline struct intel_engine_cs * 1561 __context_to_physical_engine(struct intel_context *ce) 1562 { 1563 struct intel_engine_cs *engine = ce->engine; 1564 1565 if (intel_engine_is_virtual(engine)) 1566 engine = guc_virtual_get_sibling(engine, 0); 1567 1568 return engine; 1569 } 1570 1571 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1572 { 1573 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1574 1575 if (intel_context_is_banned(ce)) 1576 return; 1577 1578 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1579 1580 /* 1581 * We want a simple context + ring to execute the breadcrumb update. 1582 * We cannot rely on the context being intact across the GPU hang, 1583 * so clear it and rebuild just what we need for the breadcrumb. 1584 * All pending requests for this context will be zapped, and any 1585 * future request will be after userspace has had the opportunity 1586 * to recreate its own state. 1587 */ 1588 if (scrub) 1589 lrc_init_regs(ce, engine, true); 1590 1591 /* Rerun the request; its payload has been neutered (if guilty). */ 1592 lrc_update_regs(ce, engine, head); 1593 } 1594 1595 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1596 { 1597 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) 1598 return; 1599 1600 intel_engine_stop_cs(engine); 1601 1602 /* 1603 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need 1604 * to wait for any pending mi force wakeups 1605 */ 1606 intel_engine_wait_for_pending_mi_fw(engine); 1607 } 1608 1609 static void guc_reset_nop(struct intel_engine_cs *engine) 1610 { 1611 } 1612 1613 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1614 { 1615 } 1616 1617 static void 1618 __unwind_incomplete_requests(struct intel_context *ce) 1619 { 1620 struct i915_request *rq, *rn; 1621 struct list_head *pl; 1622 int prio = I915_PRIORITY_INVALID; 1623 struct i915_sched_engine * const sched_engine = 1624 ce->engine->sched_engine; 1625 unsigned long flags; 1626 1627 spin_lock_irqsave(&sched_engine->lock, flags); 1628 spin_lock(&ce->guc_state.lock); 1629 list_for_each_entry_safe_reverse(rq, rn, 1630 &ce->guc_state.requests, 1631 sched.link) { 1632 if (i915_request_completed(rq)) 1633 continue; 1634 1635 list_del_init(&rq->sched.link); 1636 __i915_request_unsubmit(rq); 1637 1638 /* Push the request back into the queue for later resubmission. */ 1639 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1640 if (rq_prio(rq) != prio) { 1641 prio = rq_prio(rq); 1642 pl = i915_sched_lookup_priolist(sched_engine, prio); 1643 } 1644 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1645 1646 list_add(&rq->sched.link, pl); 1647 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1648 } 1649 spin_unlock(&ce->guc_state.lock); 1650 spin_unlock_irqrestore(&sched_engine->lock, flags); 1651 } 1652 1653 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1654 { 1655 bool guilty; 1656 struct i915_request *rq; 1657 unsigned long flags; 1658 u32 head; 1659 int i, number_children = ce->parallel.number_children; 1660 struct intel_context *parent = ce; 1661 1662 GEM_BUG_ON(intel_context_is_child(ce)); 1663 1664 intel_context_get(ce); 1665 1666 /* 1667 * GuC will implicitly mark the context as non-schedulable when it sends 1668 * the reset notification. Make sure our state reflects this change. The 1669 * context will be marked enabled on resubmission. 1670 */ 1671 spin_lock_irqsave(&ce->guc_state.lock, flags); 1672 clr_context_enabled(ce); 1673 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1674 1675 /* 1676 * For each context in the relationship find the hanging request 1677 * resetting each context / request as needed 1678 */ 1679 for (i = 0; i < number_children + 1; ++i) { 1680 if (!intel_context_is_pinned(ce)) 1681 goto next_context; 1682 1683 guilty = false; 1684 rq = intel_context_find_active_request(ce); 1685 if (!rq) { 1686 head = ce->ring->tail; 1687 goto out_replay; 1688 } 1689 1690 if (i915_request_started(rq)) 1691 guilty = stalled & ce->engine->mask; 1692 1693 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1694 head = intel_ring_wrap(ce->ring, rq->head); 1695 1696 __i915_request_reset(rq, guilty); 1697 out_replay: 1698 guc_reset_state(ce, head, guilty); 1699 next_context: 1700 if (i != number_children) 1701 ce = list_next_entry(ce, parallel.child_link); 1702 } 1703 1704 __unwind_incomplete_requests(parent); 1705 intel_context_put(parent); 1706 } 1707 1708 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1709 { 1710 struct intel_context *ce; 1711 unsigned long index; 1712 unsigned long flags; 1713 1714 if (unlikely(!guc_submission_initialized(guc))) { 1715 /* Reset called during driver load? GuC not yet initialised! */ 1716 return; 1717 } 1718 1719 xa_lock_irqsave(&guc->context_lookup, flags); 1720 xa_for_each(&guc->context_lookup, index, ce) { 1721 if (!kref_get_unless_zero(&ce->ref)) 1722 continue; 1723 1724 xa_unlock(&guc->context_lookup); 1725 1726 if (intel_context_is_pinned(ce) && 1727 !intel_context_is_child(ce)) 1728 __guc_reset_context(ce, stalled); 1729 1730 intel_context_put(ce); 1731 1732 xa_lock(&guc->context_lookup); 1733 } 1734 xa_unlock_irqrestore(&guc->context_lookup, flags); 1735 1736 /* GuC is blown away, drop all references to contexts */ 1737 xa_destroy(&guc->context_lookup); 1738 } 1739 1740 static void guc_cancel_context_requests(struct intel_context *ce) 1741 { 1742 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1743 struct i915_request *rq; 1744 unsigned long flags; 1745 1746 /* Mark all executing requests as skipped. */ 1747 spin_lock_irqsave(&sched_engine->lock, flags); 1748 spin_lock(&ce->guc_state.lock); 1749 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1750 i915_request_put(i915_request_mark_eio(rq)); 1751 spin_unlock(&ce->guc_state.lock); 1752 spin_unlock_irqrestore(&sched_engine->lock, flags); 1753 } 1754 1755 static void 1756 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1757 { 1758 struct i915_request *rq, *rn; 1759 struct rb_node *rb; 1760 unsigned long flags; 1761 1762 /* Can be called during boot if GuC fails to load */ 1763 if (!sched_engine) 1764 return; 1765 1766 /* 1767 * Before we call engine->cancel_requests(), we should have exclusive 1768 * access to the submission state. This is arranged for us by the 1769 * caller disabling the interrupt generation, the tasklet and other 1770 * threads that may then access the same state, giving us a free hand 1771 * to reset state. However, we still need to let lockdep be aware that 1772 * we know this state may be accessed in hardirq context, so we 1773 * disable the irq around this manipulation and we want to keep 1774 * the spinlock focused on its duties and not accidentally conflate 1775 * coverage to the submission's irq state. (Similarly, although we 1776 * shouldn't need to disable irq around the manipulation of the 1777 * submission's irq state, we also wish to remind ourselves that 1778 * it is irq state.) 1779 */ 1780 spin_lock_irqsave(&sched_engine->lock, flags); 1781 1782 /* Flush the queued requests to the timeline list (for retiring). */ 1783 while ((rb = rb_first_cached(&sched_engine->queue))) { 1784 struct i915_priolist *p = to_priolist(rb); 1785 1786 priolist_for_each_request_consume(rq, rn, p) { 1787 list_del_init(&rq->sched.link); 1788 1789 __i915_request_submit(rq); 1790 1791 i915_request_put(i915_request_mark_eio(rq)); 1792 } 1793 1794 rb_erase_cached(&p->node, &sched_engine->queue); 1795 i915_priolist_free(p); 1796 } 1797 1798 /* Remaining _unready_ requests will be nop'ed when submitted */ 1799 1800 sched_engine->queue_priority_hint = INT_MIN; 1801 sched_engine->queue = RB_ROOT_CACHED; 1802 1803 spin_unlock_irqrestore(&sched_engine->lock, flags); 1804 } 1805 1806 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1807 { 1808 struct intel_context *ce; 1809 unsigned long index; 1810 unsigned long flags; 1811 1812 xa_lock_irqsave(&guc->context_lookup, flags); 1813 xa_for_each(&guc->context_lookup, index, ce) { 1814 if (!kref_get_unless_zero(&ce->ref)) 1815 continue; 1816 1817 xa_unlock(&guc->context_lookup); 1818 1819 if (intel_context_is_pinned(ce) && 1820 !intel_context_is_child(ce)) 1821 guc_cancel_context_requests(ce); 1822 1823 intel_context_put(ce); 1824 1825 xa_lock(&guc->context_lookup); 1826 } 1827 xa_unlock_irqrestore(&guc->context_lookup, flags); 1828 1829 guc_cancel_sched_engine_requests(guc->sched_engine); 1830 1831 /* GuC is blown away, drop all references to contexts */ 1832 xa_destroy(&guc->context_lookup); 1833 } 1834 1835 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1836 { 1837 /* Reset called during driver load or during wedge? */ 1838 if (unlikely(!guc_submission_initialized(guc) || 1839 intel_gt_is_wedged(guc_to_gt(guc)))) { 1840 return; 1841 } 1842 1843 /* 1844 * Technically possible for either of these values to be non-zero here, 1845 * but very unlikely + harmless. Regardless let's add a warn so we can 1846 * see in CI if this happens frequently / a precursor to taking down the 1847 * machine. 1848 */ 1849 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1850 atomic_set(&guc->outstanding_submission_g2h, 0); 1851 1852 intel_guc_global_policies_update(guc); 1853 enable_submission(guc); 1854 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1855 } 1856 1857 static void destroyed_worker_func(struct work_struct *w); 1858 static void reset_fail_worker_func(struct work_struct *w); 1859 1860 /* 1861 * Set up the memory resources to be shared with the GuC (via the GGTT) 1862 * at firmware loading time. 1863 */ 1864 int intel_guc_submission_init(struct intel_guc *guc) 1865 { 1866 struct intel_gt *gt = guc_to_gt(guc); 1867 int ret; 1868 1869 if (guc->submission_initialized) 1870 return 0; 1871 1872 if (guc->fw.major_ver_found < 70) { 1873 ret = guc_lrc_desc_pool_create_v69(guc); 1874 if (ret) 1875 return ret; 1876 } 1877 1878 guc->submission_state.guc_ids_bitmap = 1879 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1880 if (!guc->submission_state.guc_ids_bitmap) { 1881 ret = -ENOMEM; 1882 goto destroy_pool; 1883 } 1884 1885 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1886 guc->timestamp.shift = gpm_timestamp_shift(gt); 1887 guc->submission_initialized = true; 1888 1889 return 0; 1890 1891 destroy_pool: 1892 guc_lrc_desc_pool_destroy_v69(guc); 1893 1894 return ret; 1895 } 1896 1897 void intel_guc_submission_fini(struct intel_guc *guc) 1898 { 1899 if (!guc->submission_initialized) 1900 return; 1901 1902 guc_flush_destroyed_contexts(guc); 1903 guc_lrc_desc_pool_destroy_v69(guc); 1904 i915_sched_engine_put(guc->sched_engine); 1905 bitmap_free(guc->submission_state.guc_ids_bitmap); 1906 guc->submission_initialized = false; 1907 } 1908 1909 static inline void queue_request(struct i915_sched_engine *sched_engine, 1910 struct i915_request *rq, 1911 int prio) 1912 { 1913 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1914 list_add_tail(&rq->sched.link, 1915 i915_sched_lookup_priolist(sched_engine, prio)); 1916 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1917 tasklet_hi_schedule(&sched_engine->tasklet); 1918 } 1919 1920 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1921 struct i915_request *rq) 1922 { 1923 int ret = 0; 1924 1925 __i915_request_submit(rq); 1926 1927 trace_i915_request_in(rq, 0); 1928 1929 if (is_multi_lrc_rq(rq)) { 1930 if (multi_lrc_submit(rq)) { 1931 ret = guc_wq_item_append(guc, rq); 1932 if (!ret) 1933 ret = guc_add_request(guc, rq); 1934 } 1935 } else { 1936 guc_set_lrc_tail(rq); 1937 ret = guc_add_request(guc, rq); 1938 } 1939 1940 if (unlikely(ret == -EPIPE)) 1941 disable_submission(guc); 1942 1943 return ret; 1944 } 1945 1946 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1947 { 1948 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1949 struct intel_context *ce = request_to_scheduling_context(rq); 1950 1951 return submission_disabled(guc) || guc->stalled_request || 1952 !i915_sched_engine_is_empty(sched_engine) || 1953 !ctx_id_mapped(guc, ce->guc_id.id); 1954 } 1955 1956 static void guc_submit_request(struct i915_request *rq) 1957 { 1958 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1959 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1960 unsigned long flags; 1961 1962 /* Will be called from irq-context when using foreign fences. */ 1963 spin_lock_irqsave(&sched_engine->lock, flags); 1964 1965 if (need_tasklet(guc, rq)) 1966 queue_request(sched_engine, rq, rq_prio(rq)); 1967 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1968 tasklet_hi_schedule(&sched_engine->tasklet); 1969 1970 spin_unlock_irqrestore(&sched_engine->lock, flags); 1971 } 1972 1973 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1974 { 1975 int ret; 1976 1977 GEM_BUG_ON(intel_context_is_child(ce)); 1978 1979 if (intel_context_is_parent(ce)) 1980 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1981 NUMBER_MULTI_LRC_GUC_ID(guc), 1982 order_base_2(ce->parallel.number_children 1983 + 1)); 1984 else 1985 ret = ida_simple_get(&guc->submission_state.guc_ids, 1986 NUMBER_MULTI_LRC_GUC_ID(guc), 1987 guc->submission_state.num_guc_ids, 1988 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 1989 __GFP_NOWARN); 1990 if (unlikely(ret < 0)) 1991 return ret; 1992 1993 ce->guc_id.id = ret; 1994 return 0; 1995 } 1996 1997 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1998 { 1999 GEM_BUG_ON(intel_context_is_child(ce)); 2000 2001 if (!context_guc_id_invalid(ce)) { 2002 if (intel_context_is_parent(ce)) 2003 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2004 ce->guc_id.id, 2005 order_base_2(ce->parallel.number_children 2006 + 1)); 2007 else 2008 ida_simple_remove(&guc->submission_state.guc_ids, 2009 ce->guc_id.id); 2010 clr_ctx_id_mapping(guc, ce->guc_id.id); 2011 set_context_guc_id_invalid(ce); 2012 } 2013 if (!list_empty(&ce->guc_id.link)) 2014 list_del_init(&ce->guc_id.link); 2015 } 2016 2017 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2018 { 2019 unsigned long flags; 2020 2021 spin_lock_irqsave(&guc->submission_state.lock, flags); 2022 __release_guc_id(guc, ce); 2023 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2024 } 2025 2026 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2027 { 2028 struct intel_context *cn; 2029 2030 lockdep_assert_held(&guc->submission_state.lock); 2031 GEM_BUG_ON(intel_context_is_child(ce)); 2032 GEM_BUG_ON(intel_context_is_parent(ce)); 2033 2034 if (!list_empty(&guc->submission_state.guc_id_list)) { 2035 cn = list_first_entry(&guc->submission_state.guc_id_list, 2036 struct intel_context, 2037 guc_id.link); 2038 2039 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2040 GEM_BUG_ON(context_guc_id_invalid(cn)); 2041 GEM_BUG_ON(intel_context_is_child(cn)); 2042 GEM_BUG_ON(intel_context_is_parent(cn)); 2043 2044 list_del_init(&cn->guc_id.link); 2045 ce->guc_id.id = cn->guc_id.id; 2046 2047 spin_lock(&cn->guc_state.lock); 2048 clr_context_registered(cn); 2049 spin_unlock(&cn->guc_state.lock); 2050 2051 set_context_guc_id_invalid(cn); 2052 2053 #ifdef CONFIG_DRM_I915_SELFTEST 2054 guc->number_guc_id_stolen++; 2055 #endif 2056 2057 return 0; 2058 } else { 2059 return -EAGAIN; 2060 } 2061 } 2062 2063 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2064 { 2065 int ret; 2066 2067 lockdep_assert_held(&guc->submission_state.lock); 2068 GEM_BUG_ON(intel_context_is_child(ce)); 2069 2070 ret = new_guc_id(guc, ce); 2071 if (unlikely(ret < 0)) { 2072 if (intel_context_is_parent(ce)) 2073 return -ENOSPC; 2074 2075 ret = steal_guc_id(guc, ce); 2076 if (ret < 0) 2077 return ret; 2078 } 2079 2080 if (intel_context_is_parent(ce)) { 2081 struct intel_context *child; 2082 int i = 1; 2083 2084 for_each_child(ce, child) 2085 child->guc_id.id = ce->guc_id.id + i++; 2086 } 2087 2088 return 0; 2089 } 2090 2091 #define PIN_GUC_ID_TRIES 4 2092 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2093 { 2094 int ret = 0; 2095 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2096 2097 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2098 2099 try_again: 2100 spin_lock_irqsave(&guc->submission_state.lock, flags); 2101 2102 might_lock(&ce->guc_state.lock); 2103 2104 if (context_guc_id_invalid(ce)) { 2105 ret = assign_guc_id(guc, ce); 2106 if (ret) 2107 goto out_unlock; 2108 ret = 1; /* Indidcates newly assigned guc_id */ 2109 } 2110 if (!list_empty(&ce->guc_id.link)) 2111 list_del_init(&ce->guc_id.link); 2112 atomic_inc(&ce->guc_id.ref); 2113 2114 out_unlock: 2115 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2116 2117 /* 2118 * -EAGAIN indicates no guc_id are available, let's retire any 2119 * outstanding requests to see if that frees up a guc_id. If the first 2120 * retire didn't help, insert a sleep with the timeslice duration before 2121 * attempting to retire more requests. Double the sleep period each 2122 * subsequent pass before finally giving up. The sleep period has max of 2123 * 100ms and minimum of 1ms. 2124 */ 2125 if (ret == -EAGAIN && --tries) { 2126 if (PIN_GUC_ID_TRIES - tries > 1) { 2127 unsigned int timeslice_shifted = 2128 ce->engine->props.timeslice_duration_ms << 2129 (PIN_GUC_ID_TRIES - tries - 2); 2130 unsigned int max = min_t(unsigned int, 100, 2131 timeslice_shifted); 2132 2133 msleep(max_t(unsigned int, max, 1)); 2134 } 2135 intel_gt_retire_requests(guc_to_gt(guc)); 2136 goto try_again; 2137 } 2138 2139 return ret; 2140 } 2141 2142 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2143 { 2144 unsigned long flags; 2145 2146 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2147 GEM_BUG_ON(intel_context_is_child(ce)); 2148 2149 if (unlikely(context_guc_id_invalid(ce) || 2150 intel_context_is_parent(ce))) 2151 return; 2152 2153 spin_lock_irqsave(&guc->submission_state.lock, flags); 2154 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2155 !atomic_read(&ce->guc_id.ref)) 2156 list_add_tail(&ce->guc_id.link, 2157 &guc->submission_state.guc_id_list); 2158 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2159 } 2160 2161 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2162 struct intel_context *ce, 2163 u32 guc_id, 2164 u32 offset, 2165 bool loop) 2166 { 2167 struct intel_context *child; 2168 u32 action[4 + MAX_ENGINE_INSTANCE]; 2169 int len = 0; 2170 2171 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2172 2173 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2174 action[len++] = guc_id; 2175 action[len++] = ce->parallel.number_children + 1; 2176 action[len++] = offset; 2177 for_each_child(ce, child) { 2178 offset += sizeof(struct guc_lrc_desc_v69); 2179 action[len++] = offset; 2180 } 2181 2182 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2183 } 2184 2185 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2186 struct intel_context *ce, 2187 struct guc_ctxt_registration_info *info, 2188 bool loop) 2189 { 2190 struct intel_context *child; 2191 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2192 int len = 0; 2193 u32 next_id; 2194 2195 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2196 2197 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2198 action[len++] = info->flags; 2199 action[len++] = info->context_idx; 2200 action[len++] = info->engine_class; 2201 action[len++] = info->engine_submit_mask; 2202 action[len++] = info->wq_desc_lo; 2203 action[len++] = info->wq_desc_hi; 2204 action[len++] = info->wq_base_lo; 2205 action[len++] = info->wq_base_hi; 2206 action[len++] = info->wq_size; 2207 action[len++] = ce->parallel.number_children + 1; 2208 action[len++] = info->hwlrca_lo; 2209 action[len++] = info->hwlrca_hi; 2210 2211 next_id = info->context_idx + 1; 2212 for_each_child(ce, child) { 2213 GEM_BUG_ON(next_id++ != child->guc_id.id); 2214 2215 /* 2216 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2217 * only supports 32 bit currently. 2218 */ 2219 action[len++] = lower_32_bits(child->lrc.lrca); 2220 action[len++] = upper_32_bits(child->lrc.lrca); 2221 } 2222 2223 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2224 2225 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2226 } 2227 2228 static int __guc_action_register_context_v69(struct intel_guc *guc, 2229 u32 guc_id, 2230 u32 offset, 2231 bool loop) 2232 { 2233 u32 action[] = { 2234 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2235 guc_id, 2236 offset, 2237 }; 2238 2239 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2240 0, loop); 2241 } 2242 2243 static int __guc_action_register_context_v70(struct intel_guc *guc, 2244 struct guc_ctxt_registration_info *info, 2245 bool loop) 2246 { 2247 u32 action[] = { 2248 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2249 info->flags, 2250 info->context_idx, 2251 info->engine_class, 2252 info->engine_submit_mask, 2253 info->wq_desc_lo, 2254 info->wq_desc_hi, 2255 info->wq_base_lo, 2256 info->wq_base_hi, 2257 info->wq_size, 2258 info->hwlrca_lo, 2259 info->hwlrca_hi, 2260 }; 2261 2262 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2263 0, loop); 2264 } 2265 2266 static void prepare_context_registration_info_v69(struct intel_context *ce); 2267 static void prepare_context_registration_info_v70(struct intel_context *ce, 2268 struct guc_ctxt_registration_info *info); 2269 2270 static int 2271 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2272 { 2273 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2274 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2275 2276 prepare_context_registration_info_v69(ce); 2277 2278 if (intel_context_is_parent(ce)) 2279 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2280 offset, loop); 2281 else 2282 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2283 offset, loop); 2284 } 2285 2286 static int 2287 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2288 { 2289 struct guc_ctxt_registration_info info; 2290 2291 prepare_context_registration_info_v70(ce, &info); 2292 2293 if (intel_context_is_parent(ce)) 2294 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2295 else 2296 return __guc_action_register_context_v70(guc, &info, loop); 2297 } 2298 2299 static int register_context(struct intel_context *ce, bool loop) 2300 { 2301 struct intel_guc *guc = ce_to_guc(ce); 2302 int ret; 2303 2304 GEM_BUG_ON(intel_context_is_child(ce)); 2305 trace_intel_context_register(ce); 2306 2307 if (guc->fw.major_ver_found >= 70) 2308 ret = register_context_v70(guc, ce, loop); 2309 else 2310 ret = register_context_v69(guc, ce, loop); 2311 2312 if (likely(!ret)) { 2313 unsigned long flags; 2314 2315 spin_lock_irqsave(&ce->guc_state.lock, flags); 2316 set_context_registered(ce); 2317 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2318 2319 if (guc->fw.major_ver_found >= 70) 2320 guc_context_policy_init_v70(ce, loop); 2321 } 2322 2323 return ret; 2324 } 2325 2326 static int __guc_action_deregister_context(struct intel_guc *guc, 2327 u32 guc_id) 2328 { 2329 u32 action[] = { 2330 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2331 guc_id, 2332 }; 2333 2334 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2335 G2H_LEN_DW_DEREGISTER_CONTEXT, 2336 true); 2337 } 2338 2339 static int deregister_context(struct intel_context *ce, u32 guc_id) 2340 { 2341 struct intel_guc *guc = ce_to_guc(ce); 2342 2343 GEM_BUG_ON(intel_context_is_child(ce)); 2344 trace_intel_context_deregister(ce); 2345 2346 return __guc_action_deregister_context(guc, guc_id); 2347 } 2348 2349 static inline void clear_children_join_go_memory(struct intel_context *ce) 2350 { 2351 struct parent_scratch *ps = __get_parent_scratch(ce); 2352 int i; 2353 2354 ps->go.semaphore = 0; 2355 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2356 ps->join[i].semaphore = 0; 2357 } 2358 2359 static inline u32 get_children_go_value(struct intel_context *ce) 2360 { 2361 return __get_parent_scratch(ce)->go.semaphore; 2362 } 2363 2364 static inline u32 get_children_join_value(struct intel_context *ce, 2365 u8 child_index) 2366 { 2367 return __get_parent_scratch(ce)->join[child_index].semaphore; 2368 } 2369 2370 struct context_policy { 2371 u32 count; 2372 struct guc_update_context_policy h2g; 2373 }; 2374 2375 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2376 { 2377 size_t bytes = sizeof(policy->h2g.header) + 2378 (sizeof(policy->h2g.klv[0]) * policy->count); 2379 2380 return bytes / sizeof(u32); 2381 } 2382 2383 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2384 { 2385 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2386 policy->h2g.header.ctx_id = guc_id; 2387 policy->count = 0; 2388 } 2389 2390 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2391 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2392 { \ 2393 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2394 policy->h2g.klv[policy->count].kl = \ 2395 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2396 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2397 policy->h2g.klv[policy->count].value = data; \ 2398 policy->count++; \ 2399 } 2400 2401 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2402 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2403 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2404 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2405 2406 #undef MAKE_CONTEXT_POLICY_ADD 2407 2408 static int __guc_context_set_context_policies(struct intel_guc *guc, 2409 struct context_policy *policy, 2410 bool loop) 2411 { 2412 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2413 __guc_context_policy_action_size(policy), 2414 0, loop); 2415 } 2416 2417 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2418 { 2419 struct intel_engine_cs *engine = ce->engine; 2420 struct intel_guc *guc = &engine->gt->uc.guc; 2421 struct context_policy policy; 2422 u32 execution_quantum; 2423 u32 preemption_timeout; 2424 unsigned long flags; 2425 int ret; 2426 2427 /* NB: For both of these, zero means disabled. */ 2428 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2429 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2430 2431 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2432 2433 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2434 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2435 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2436 2437 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2438 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2439 2440 ret = __guc_context_set_context_policies(guc, &policy, loop); 2441 2442 spin_lock_irqsave(&ce->guc_state.lock, flags); 2443 if (ret != 0) 2444 set_context_policy_required(ce); 2445 else 2446 clr_context_policy_required(ce); 2447 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2448 2449 return ret; 2450 } 2451 2452 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2453 struct guc_lrc_desc_v69 *desc) 2454 { 2455 desc->policy_flags = 0; 2456 2457 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2458 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2459 2460 /* NB: For both of these, zero means disabled. */ 2461 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2462 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2463 } 2464 2465 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2466 { 2467 /* 2468 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2469 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2470 */ 2471 switch (prio) { 2472 default: 2473 MISSING_CASE(prio); 2474 fallthrough; 2475 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2476 return GEN12_CTX_PRIORITY_NORMAL; 2477 case GUC_CLIENT_PRIORITY_NORMAL: 2478 return GEN12_CTX_PRIORITY_LOW; 2479 case GUC_CLIENT_PRIORITY_HIGH: 2480 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2481 return GEN12_CTX_PRIORITY_HIGH; 2482 } 2483 } 2484 2485 static void prepare_context_registration_info_v69(struct intel_context *ce) 2486 { 2487 struct intel_engine_cs *engine = ce->engine; 2488 struct intel_guc *guc = &engine->gt->uc.guc; 2489 u32 ctx_id = ce->guc_id.id; 2490 struct guc_lrc_desc_v69 *desc; 2491 struct intel_context *child; 2492 2493 GEM_BUG_ON(!engine->mask); 2494 2495 /* 2496 * Ensure LRC + CT vmas are is same region as write barrier is done 2497 * based on CT vma region. 2498 */ 2499 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2500 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2501 2502 desc = __get_lrc_desc_v69(guc, ctx_id); 2503 desc->engine_class = engine_class_to_guc_class(engine->class); 2504 desc->engine_submit_mask = engine->logical_mask; 2505 desc->hw_context_desc = ce->lrc.lrca; 2506 desc->priority = ce->guc_state.prio; 2507 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2508 guc_context_policy_init_v69(engine, desc); 2509 2510 /* 2511 * If context is a parent, we need to register a process descriptor 2512 * describing a work queue and register all child contexts. 2513 */ 2514 if (intel_context_is_parent(ce)) { 2515 struct guc_process_desc_v69 *pdesc; 2516 2517 ce->parallel.guc.wqi_tail = 0; 2518 ce->parallel.guc.wqi_head = 0; 2519 2520 desc->process_desc = i915_ggtt_offset(ce->state) + 2521 __get_parent_scratch_offset(ce); 2522 desc->wq_addr = i915_ggtt_offset(ce->state) + 2523 __get_wq_offset(ce); 2524 desc->wq_size = WQ_SIZE; 2525 2526 pdesc = __get_process_desc_v69(ce); 2527 memset(pdesc, 0, sizeof(*(pdesc))); 2528 pdesc->stage_id = ce->guc_id.id; 2529 pdesc->wq_base_addr = desc->wq_addr; 2530 pdesc->wq_size_bytes = desc->wq_size; 2531 pdesc->wq_status = WQ_STATUS_ACTIVE; 2532 2533 ce->parallel.guc.wq_head = &pdesc->head; 2534 ce->parallel.guc.wq_tail = &pdesc->tail; 2535 ce->parallel.guc.wq_status = &pdesc->wq_status; 2536 2537 for_each_child(ce, child) { 2538 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2539 2540 desc->engine_class = 2541 engine_class_to_guc_class(engine->class); 2542 desc->hw_context_desc = child->lrc.lrca; 2543 desc->priority = ce->guc_state.prio; 2544 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2545 guc_context_policy_init_v69(engine, desc); 2546 } 2547 2548 clear_children_join_go_memory(ce); 2549 } 2550 } 2551 2552 static void prepare_context_registration_info_v70(struct intel_context *ce, 2553 struct guc_ctxt_registration_info *info) 2554 { 2555 struct intel_engine_cs *engine = ce->engine; 2556 struct intel_guc *guc = &engine->gt->uc.guc; 2557 u32 ctx_id = ce->guc_id.id; 2558 2559 GEM_BUG_ON(!engine->mask); 2560 2561 /* 2562 * Ensure LRC + CT vmas are is same region as write barrier is done 2563 * based on CT vma region. 2564 */ 2565 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2566 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2567 2568 memset(info, 0, sizeof(*info)); 2569 info->context_idx = ctx_id; 2570 info->engine_class = engine_class_to_guc_class(engine->class); 2571 info->engine_submit_mask = engine->logical_mask; 2572 /* 2573 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2574 * only supports 32 bit currently. 2575 */ 2576 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2577 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2578 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2579 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2580 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2581 2582 /* 2583 * If context is a parent, we need to register a process descriptor 2584 * describing a work queue and register all child contexts. 2585 */ 2586 if (intel_context_is_parent(ce)) { 2587 struct guc_sched_wq_desc *wq_desc; 2588 u64 wq_desc_offset, wq_base_offset; 2589 2590 ce->parallel.guc.wqi_tail = 0; 2591 ce->parallel.guc.wqi_head = 0; 2592 2593 wq_desc_offset = i915_ggtt_offset(ce->state) + 2594 __get_parent_scratch_offset(ce); 2595 wq_base_offset = i915_ggtt_offset(ce->state) + 2596 __get_wq_offset(ce); 2597 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2598 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2599 info->wq_base_lo = lower_32_bits(wq_base_offset); 2600 info->wq_base_hi = upper_32_bits(wq_base_offset); 2601 info->wq_size = WQ_SIZE; 2602 2603 wq_desc = __get_wq_desc_v70(ce); 2604 memset(wq_desc, 0, sizeof(*wq_desc)); 2605 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2606 2607 ce->parallel.guc.wq_head = &wq_desc->head; 2608 ce->parallel.guc.wq_tail = &wq_desc->tail; 2609 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2610 2611 clear_children_join_go_memory(ce); 2612 } 2613 } 2614 2615 static int try_context_registration(struct intel_context *ce, bool loop) 2616 { 2617 struct intel_engine_cs *engine = ce->engine; 2618 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2619 struct intel_guc *guc = &engine->gt->uc.guc; 2620 intel_wakeref_t wakeref; 2621 u32 ctx_id = ce->guc_id.id; 2622 bool context_registered; 2623 int ret = 0; 2624 2625 GEM_BUG_ON(!sched_state_is_init(ce)); 2626 2627 context_registered = ctx_id_mapped(guc, ctx_id); 2628 2629 clr_ctx_id_mapping(guc, ctx_id); 2630 set_ctx_id_mapping(guc, ctx_id, ce); 2631 2632 /* 2633 * The context_lookup xarray is used to determine if the hardware 2634 * context is currently registered. There are two cases in which it 2635 * could be registered either the guc_id has been stolen from another 2636 * context or the lrc descriptor address of this context has changed. In 2637 * either case the context needs to be deregistered with the GuC before 2638 * registering this context. 2639 */ 2640 if (context_registered) { 2641 bool disabled; 2642 unsigned long flags; 2643 2644 trace_intel_context_steal_guc_id(ce); 2645 GEM_BUG_ON(!loop); 2646 2647 /* Seal race with Reset */ 2648 spin_lock_irqsave(&ce->guc_state.lock, flags); 2649 disabled = submission_disabled(guc); 2650 if (likely(!disabled)) { 2651 set_context_wait_for_deregister_to_register(ce); 2652 intel_context_get(ce); 2653 } 2654 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2655 if (unlikely(disabled)) { 2656 clr_ctx_id_mapping(guc, ctx_id); 2657 return 0; /* Will get registered later */ 2658 } 2659 2660 /* 2661 * If stealing the guc_id, this ce has the same guc_id as the 2662 * context whose guc_id was stolen. 2663 */ 2664 with_intel_runtime_pm(runtime_pm, wakeref) 2665 ret = deregister_context(ce, ce->guc_id.id); 2666 if (unlikely(ret == -ENODEV)) 2667 ret = 0; /* Will get registered later */ 2668 } else { 2669 with_intel_runtime_pm(runtime_pm, wakeref) 2670 ret = register_context(ce, loop); 2671 if (unlikely(ret == -EBUSY)) { 2672 clr_ctx_id_mapping(guc, ctx_id); 2673 } else if (unlikely(ret == -ENODEV)) { 2674 clr_ctx_id_mapping(guc, ctx_id); 2675 ret = 0; /* Will get registered later */ 2676 } 2677 } 2678 2679 return ret; 2680 } 2681 2682 static int __guc_context_pre_pin(struct intel_context *ce, 2683 struct intel_engine_cs *engine, 2684 struct i915_gem_ww_ctx *ww, 2685 void **vaddr) 2686 { 2687 return lrc_pre_pin(ce, engine, ww, vaddr); 2688 } 2689 2690 static int __guc_context_pin(struct intel_context *ce, 2691 struct intel_engine_cs *engine, 2692 void *vaddr) 2693 { 2694 if (i915_ggtt_offset(ce->state) != 2695 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2696 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2697 2698 /* 2699 * GuC context gets pinned in guc_request_alloc. See that function for 2700 * explaination of why. 2701 */ 2702 2703 return lrc_pin(ce, engine, vaddr); 2704 } 2705 2706 static int guc_context_pre_pin(struct intel_context *ce, 2707 struct i915_gem_ww_ctx *ww, 2708 void **vaddr) 2709 { 2710 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2711 } 2712 2713 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2714 { 2715 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2716 2717 if (likely(!ret && !intel_context_is_barrier(ce))) 2718 intel_engine_pm_get(ce->engine); 2719 2720 return ret; 2721 } 2722 2723 static void guc_context_unpin(struct intel_context *ce) 2724 { 2725 struct intel_guc *guc = ce_to_guc(ce); 2726 2727 unpin_guc_id(guc, ce); 2728 lrc_unpin(ce); 2729 2730 if (likely(!intel_context_is_barrier(ce))) 2731 intel_engine_pm_put_async(ce->engine); 2732 } 2733 2734 static void guc_context_post_unpin(struct intel_context *ce) 2735 { 2736 lrc_post_unpin(ce); 2737 } 2738 2739 static void __guc_context_sched_enable(struct intel_guc *guc, 2740 struct intel_context *ce) 2741 { 2742 u32 action[] = { 2743 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2744 ce->guc_id.id, 2745 GUC_CONTEXT_ENABLE 2746 }; 2747 2748 trace_intel_context_sched_enable(ce); 2749 2750 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2751 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2752 } 2753 2754 static void __guc_context_sched_disable(struct intel_guc *guc, 2755 struct intel_context *ce, 2756 u16 guc_id) 2757 { 2758 u32 action[] = { 2759 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2760 guc_id, /* ce->guc_id.id not stable */ 2761 GUC_CONTEXT_DISABLE 2762 }; 2763 2764 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2765 2766 GEM_BUG_ON(intel_context_is_child(ce)); 2767 trace_intel_context_sched_disable(ce); 2768 2769 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2770 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2771 } 2772 2773 static void guc_blocked_fence_complete(struct intel_context *ce) 2774 { 2775 lockdep_assert_held(&ce->guc_state.lock); 2776 2777 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2778 i915_sw_fence_complete(&ce->guc_state.blocked); 2779 } 2780 2781 static void guc_blocked_fence_reinit(struct intel_context *ce) 2782 { 2783 lockdep_assert_held(&ce->guc_state.lock); 2784 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2785 2786 /* 2787 * This fence is always complete unless a pending schedule disable is 2788 * outstanding. We arm the fence here and complete it when we receive 2789 * the pending schedule disable complete message. 2790 */ 2791 i915_sw_fence_fini(&ce->guc_state.blocked); 2792 i915_sw_fence_reinit(&ce->guc_state.blocked); 2793 i915_sw_fence_await(&ce->guc_state.blocked); 2794 i915_sw_fence_commit(&ce->guc_state.blocked); 2795 } 2796 2797 static u16 prep_context_pending_disable(struct intel_context *ce) 2798 { 2799 lockdep_assert_held(&ce->guc_state.lock); 2800 2801 set_context_pending_disable(ce); 2802 clr_context_enabled(ce); 2803 guc_blocked_fence_reinit(ce); 2804 intel_context_get(ce); 2805 2806 return ce->guc_id.id; 2807 } 2808 2809 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2810 { 2811 struct intel_guc *guc = ce_to_guc(ce); 2812 unsigned long flags; 2813 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2814 intel_wakeref_t wakeref; 2815 u16 guc_id; 2816 bool enabled; 2817 2818 GEM_BUG_ON(intel_context_is_child(ce)); 2819 2820 spin_lock_irqsave(&ce->guc_state.lock, flags); 2821 2822 incr_context_blocked(ce); 2823 2824 enabled = context_enabled(ce); 2825 if (unlikely(!enabled || submission_disabled(guc))) { 2826 if (enabled) 2827 clr_context_enabled(ce); 2828 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2829 return &ce->guc_state.blocked; 2830 } 2831 2832 /* 2833 * We add +2 here as the schedule disable complete CTB handler calls 2834 * intel_context_sched_disable_unpin (-2 to pin_count). 2835 */ 2836 atomic_add(2, &ce->pin_count); 2837 2838 guc_id = prep_context_pending_disable(ce); 2839 2840 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2841 2842 with_intel_runtime_pm(runtime_pm, wakeref) 2843 __guc_context_sched_disable(guc, ce, guc_id); 2844 2845 return &ce->guc_state.blocked; 2846 } 2847 2848 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2849 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2850 #define SCHED_STATE_NO_UNBLOCK \ 2851 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2852 SCHED_STATE_PENDING_DISABLE | \ 2853 SCHED_STATE_BANNED) 2854 2855 static bool context_cant_unblock(struct intel_context *ce) 2856 { 2857 lockdep_assert_held(&ce->guc_state.lock); 2858 2859 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2860 context_guc_id_invalid(ce) || 2861 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2862 !intel_context_is_pinned(ce); 2863 } 2864 2865 static void guc_context_unblock(struct intel_context *ce) 2866 { 2867 struct intel_guc *guc = ce_to_guc(ce); 2868 unsigned long flags; 2869 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2870 intel_wakeref_t wakeref; 2871 bool enable; 2872 2873 GEM_BUG_ON(context_enabled(ce)); 2874 GEM_BUG_ON(intel_context_is_child(ce)); 2875 2876 spin_lock_irqsave(&ce->guc_state.lock, flags); 2877 2878 if (unlikely(submission_disabled(guc) || 2879 context_cant_unblock(ce))) { 2880 enable = false; 2881 } else { 2882 enable = true; 2883 set_context_pending_enable(ce); 2884 set_context_enabled(ce); 2885 intel_context_get(ce); 2886 } 2887 2888 decr_context_blocked(ce); 2889 2890 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2891 2892 if (enable) { 2893 with_intel_runtime_pm(runtime_pm, wakeref) 2894 __guc_context_sched_enable(guc, ce); 2895 } 2896 } 2897 2898 static void guc_context_cancel_request(struct intel_context *ce, 2899 struct i915_request *rq) 2900 { 2901 struct intel_context *block_context = 2902 request_to_scheduling_context(rq); 2903 2904 if (i915_sw_fence_signaled(&rq->submit)) { 2905 struct i915_sw_fence *fence; 2906 2907 intel_context_get(ce); 2908 fence = guc_context_block(block_context); 2909 i915_sw_fence_wait(fence); 2910 if (!i915_request_completed(rq)) { 2911 __i915_request_skip(rq); 2912 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2913 true); 2914 } 2915 2916 guc_context_unblock(block_context); 2917 intel_context_put(ce); 2918 } 2919 } 2920 2921 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2922 u16 guc_id, 2923 u32 preemption_timeout) 2924 { 2925 if (guc->fw.major_ver_found >= 70) { 2926 struct context_policy policy; 2927 2928 __guc_context_policy_start_klv(&policy, guc_id); 2929 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2930 __guc_context_set_context_policies(guc, &policy, true); 2931 } else { 2932 u32 action[] = { 2933 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 2934 guc_id, 2935 preemption_timeout 2936 }; 2937 2938 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2939 } 2940 } 2941 2942 static void 2943 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 2944 unsigned int preempt_timeout_ms) 2945 { 2946 struct intel_guc *guc = ce_to_guc(ce); 2947 struct intel_runtime_pm *runtime_pm = 2948 &ce->engine->gt->i915->runtime_pm; 2949 intel_wakeref_t wakeref; 2950 unsigned long flags; 2951 2952 GEM_BUG_ON(intel_context_is_child(ce)); 2953 2954 guc_flush_submissions(guc); 2955 2956 spin_lock_irqsave(&ce->guc_state.lock, flags); 2957 set_context_banned(ce); 2958 2959 if (submission_disabled(guc) || 2960 (!context_enabled(ce) && !context_pending_disable(ce))) { 2961 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2962 2963 guc_cancel_context_requests(ce); 2964 intel_engine_signal_breadcrumbs(ce->engine); 2965 } else if (!context_pending_disable(ce)) { 2966 u16 guc_id; 2967 2968 /* 2969 * We add +2 here as the schedule disable complete CTB handler 2970 * calls intel_context_sched_disable_unpin (-2 to pin_count). 2971 */ 2972 atomic_add(2, &ce->pin_count); 2973 2974 guc_id = prep_context_pending_disable(ce); 2975 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2976 2977 /* 2978 * In addition to disabling scheduling, set the preemption 2979 * timeout to the minimum value (1 us) so the banned context 2980 * gets kicked off the HW ASAP. 2981 */ 2982 with_intel_runtime_pm(runtime_pm, wakeref) { 2983 __guc_context_set_preemption_timeout(guc, guc_id, 2984 preempt_timeout_ms); 2985 __guc_context_sched_disable(guc, ce, guc_id); 2986 } 2987 } else { 2988 if (!context_guc_id_invalid(ce)) 2989 with_intel_runtime_pm(runtime_pm, wakeref) 2990 __guc_context_set_preemption_timeout(guc, 2991 ce->guc_id.id, 2992 preempt_timeout_ms); 2993 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2994 } 2995 } 2996 2997 static void guc_context_sched_disable(struct intel_context *ce) 2998 { 2999 struct intel_guc *guc = ce_to_guc(ce); 3000 unsigned long flags; 3001 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3002 intel_wakeref_t wakeref; 3003 u16 guc_id; 3004 3005 GEM_BUG_ON(intel_context_is_child(ce)); 3006 3007 spin_lock_irqsave(&ce->guc_state.lock, flags); 3008 3009 /* 3010 * We have to check if the context has been disabled by another thread, 3011 * check if submssion has been disabled to seal a race with reset and 3012 * finally check if any more requests have been committed to the 3013 * context ensursing that a request doesn't slip through the 3014 * 'context_pending_disable' fence. 3015 */ 3016 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 3017 context_has_committed_requests(ce))) { 3018 clr_context_enabled(ce); 3019 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3020 goto unpin; 3021 } 3022 guc_id = prep_context_pending_disable(ce); 3023 3024 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3025 3026 with_intel_runtime_pm(runtime_pm, wakeref) 3027 __guc_context_sched_disable(guc, ce, guc_id); 3028 3029 return; 3030 unpin: 3031 intel_context_sched_disable_unpin(ce); 3032 } 3033 3034 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3035 { 3036 struct intel_guc *guc = ce_to_guc(ce); 3037 struct intel_gt *gt = guc_to_gt(guc); 3038 unsigned long flags; 3039 bool disabled; 3040 3041 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3042 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3043 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3044 GEM_BUG_ON(context_enabled(ce)); 3045 3046 /* Seal race with Reset */ 3047 spin_lock_irqsave(&ce->guc_state.lock, flags); 3048 disabled = submission_disabled(guc); 3049 if (likely(!disabled)) { 3050 __intel_gt_pm_get(gt); 3051 set_context_destroyed(ce); 3052 clr_context_registered(ce); 3053 } 3054 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3055 if (unlikely(disabled)) { 3056 release_guc_id(guc, ce); 3057 __guc_context_destroy(ce); 3058 return; 3059 } 3060 3061 deregister_context(ce, ce->guc_id.id); 3062 } 3063 3064 static void __guc_context_destroy(struct intel_context *ce) 3065 { 3066 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3067 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3068 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3069 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3070 GEM_BUG_ON(ce->guc_state.number_committed_requests); 3071 3072 lrc_fini(ce); 3073 intel_context_fini(ce); 3074 3075 if (intel_engine_is_virtual(ce->engine)) { 3076 struct guc_virtual_engine *ve = 3077 container_of(ce, typeof(*ve), context); 3078 3079 if (ve->base.breadcrumbs) 3080 intel_breadcrumbs_put(ve->base.breadcrumbs); 3081 3082 kfree(ve); 3083 } else { 3084 intel_context_free(ce); 3085 } 3086 } 3087 3088 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3089 { 3090 struct intel_context *ce; 3091 unsigned long flags; 3092 3093 GEM_BUG_ON(!submission_disabled(guc) && 3094 guc_submission_initialized(guc)); 3095 3096 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3097 spin_lock_irqsave(&guc->submission_state.lock, flags); 3098 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3099 struct intel_context, 3100 destroyed_link); 3101 if (ce) 3102 list_del_init(&ce->destroyed_link); 3103 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3104 3105 if (!ce) 3106 break; 3107 3108 release_guc_id(guc, ce); 3109 __guc_context_destroy(ce); 3110 } 3111 } 3112 3113 static void deregister_destroyed_contexts(struct intel_guc *guc) 3114 { 3115 struct intel_context *ce; 3116 unsigned long flags; 3117 3118 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3119 spin_lock_irqsave(&guc->submission_state.lock, flags); 3120 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3121 struct intel_context, 3122 destroyed_link); 3123 if (ce) 3124 list_del_init(&ce->destroyed_link); 3125 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3126 3127 if (!ce) 3128 break; 3129 3130 guc_lrc_desc_unpin(ce); 3131 } 3132 } 3133 3134 static void destroyed_worker_func(struct work_struct *w) 3135 { 3136 struct intel_guc *guc = container_of(w, struct intel_guc, 3137 submission_state.destroyed_worker); 3138 struct intel_gt *gt = guc_to_gt(guc); 3139 int tmp; 3140 3141 with_intel_gt_pm(gt, tmp) 3142 deregister_destroyed_contexts(guc); 3143 } 3144 3145 static void guc_context_destroy(struct kref *kref) 3146 { 3147 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3148 struct intel_guc *guc = ce_to_guc(ce); 3149 unsigned long flags; 3150 bool destroy; 3151 3152 /* 3153 * If the guc_id is invalid this context has been stolen and we can free 3154 * it immediately. Also can be freed immediately if the context is not 3155 * registered with the GuC or the GuC is in the middle of a reset. 3156 */ 3157 spin_lock_irqsave(&guc->submission_state.lock, flags); 3158 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3159 !ctx_id_mapped(guc, ce->guc_id.id); 3160 if (likely(!destroy)) { 3161 if (!list_empty(&ce->guc_id.link)) 3162 list_del_init(&ce->guc_id.link); 3163 list_add_tail(&ce->destroyed_link, 3164 &guc->submission_state.destroyed_contexts); 3165 } else { 3166 __release_guc_id(guc, ce); 3167 } 3168 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3169 if (unlikely(destroy)) { 3170 __guc_context_destroy(ce); 3171 return; 3172 } 3173 3174 /* 3175 * We use a worker to issue the H2G to deregister the context as we can 3176 * take the GT PM for the first time which isn't allowed from an atomic 3177 * context. 3178 */ 3179 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3180 } 3181 3182 static int guc_context_alloc(struct intel_context *ce) 3183 { 3184 return lrc_alloc(ce, ce->engine); 3185 } 3186 3187 static void __guc_context_set_prio(struct intel_guc *guc, 3188 struct intel_context *ce) 3189 { 3190 if (guc->fw.major_ver_found >= 70) { 3191 struct context_policy policy; 3192 3193 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3194 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3195 __guc_context_set_context_policies(guc, &policy, true); 3196 } else { 3197 u32 action[] = { 3198 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3199 ce->guc_id.id, 3200 ce->guc_state.prio, 3201 }; 3202 3203 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3204 } 3205 } 3206 3207 static void guc_context_set_prio(struct intel_guc *guc, 3208 struct intel_context *ce, 3209 u8 prio) 3210 { 3211 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3212 prio > GUC_CLIENT_PRIORITY_NORMAL); 3213 lockdep_assert_held(&ce->guc_state.lock); 3214 3215 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3216 !context_registered(ce)) { 3217 ce->guc_state.prio = prio; 3218 return; 3219 } 3220 3221 ce->guc_state.prio = prio; 3222 __guc_context_set_prio(guc, ce); 3223 3224 trace_intel_context_set_prio(ce); 3225 } 3226 3227 static inline u8 map_i915_prio_to_guc_prio(int prio) 3228 { 3229 if (prio == I915_PRIORITY_NORMAL) 3230 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3231 else if (prio < I915_PRIORITY_NORMAL) 3232 return GUC_CLIENT_PRIORITY_NORMAL; 3233 else if (prio < I915_PRIORITY_DISPLAY) 3234 return GUC_CLIENT_PRIORITY_HIGH; 3235 else 3236 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3237 } 3238 3239 static inline void add_context_inflight_prio(struct intel_context *ce, 3240 u8 guc_prio) 3241 { 3242 lockdep_assert_held(&ce->guc_state.lock); 3243 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3244 3245 ++ce->guc_state.prio_count[guc_prio]; 3246 3247 /* Overflow protection */ 3248 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3249 } 3250 3251 static inline void sub_context_inflight_prio(struct intel_context *ce, 3252 u8 guc_prio) 3253 { 3254 lockdep_assert_held(&ce->guc_state.lock); 3255 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3256 3257 /* Underflow protection */ 3258 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3259 3260 --ce->guc_state.prio_count[guc_prio]; 3261 } 3262 3263 static inline void update_context_prio(struct intel_context *ce) 3264 { 3265 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3266 int i; 3267 3268 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3269 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3270 3271 lockdep_assert_held(&ce->guc_state.lock); 3272 3273 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3274 if (ce->guc_state.prio_count[i]) { 3275 guc_context_set_prio(guc, ce, i); 3276 break; 3277 } 3278 } 3279 } 3280 3281 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3282 { 3283 /* Lower value is higher priority */ 3284 return new_guc_prio < old_guc_prio; 3285 } 3286 3287 static void add_to_context(struct i915_request *rq) 3288 { 3289 struct intel_context *ce = request_to_scheduling_context(rq); 3290 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3291 3292 GEM_BUG_ON(intel_context_is_child(ce)); 3293 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3294 3295 spin_lock(&ce->guc_state.lock); 3296 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3297 3298 if (rq->guc_prio == GUC_PRIO_INIT) { 3299 rq->guc_prio = new_guc_prio; 3300 add_context_inflight_prio(ce, rq->guc_prio); 3301 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3302 sub_context_inflight_prio(ce, rq->guc_prio); 3303 rq->guc_prio = new_guc_prio; 3304 add_context_inflight_prio(ce, rq->guc_prio); 3305 } 3306 update_context_prio(ce); 3307 3308 spin_unlock(&ce->guc_state.lock); 3309 } 3310 3311 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3312 { 3313 lockdep_assert_held(&ce->guc_state.lock); 3314 3315 if (rq->guc_prio != GUC_PRIO_INIT && 3316 rq->guc_prio != GUC_PRIO_FINI) { 3317 sub_context_inflight_prio(ce, rq->guc_prio); 3318 update_context_prio(ce); 3319 } 3320 rq->guc_prio = GUC_PRIO_FINI; 3321 } 3322 3323 static void remove_from_context(struct i915_request *rq) 3324 { 3325 struct intel_context *ce = request_to_scheduling_context(rq); 3326 3327 GEM_BUG_ON(intel_context_is_child(ce)); 3328 3329 spin_lock_irq(&ce->guc_state.lock); 3330 3331 list_del_init(&rq->sched.link); 3332 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3333 3334 /* Prevent further __await_execution() registering a cb, then flush */ 3335 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3336 3337 guc_prio_fini(rq, ce); 3338 3339 decr_context_committed_requests(ce); 3340 3341 spin_unlock_irq(&ce->guc_state.lock); 3342 3343 atomic_dec(&ce->guc_id.ref); 3344 i915_request_notify_execute_cb_imm(rq); 3345 } 3346 3347 static const struct intel_context_ops guc_context_ops = { 3348 .alloc = guc_context_alloc, 3349 3350 .pre_pin = guc_context_pre_pin, 3351 .pin = guc_context_pin, 3352 .unpin = guc_context_unpin, 3353 .post_unpin = guc_context_post_unpin, 3354 3355 .revoke = guc_context_revoke, 3356 3357 .cancel_request = guc_context_cancel_request, 3358 3359 .enter = intel_context_enter_engine, 3360 .exit = intel_context_exit_engine, 3361 3362 .sched_disable = guc_context_sched_disable, 3363 3364 .reset = lrc_reset, 3365 .destroy = guc_context_destroy, 3366 3367 .create_virtual = guc_create_virtual, 3368 .create_parallel = guc_create_parallel, 3369 }; 3370 3371 static void submit_work_cb(struct irq_work *wrk) 3372 { 3373 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3374 3375 might_lock(&rq->engine->sched_engine->lock); 3376 i915_sw_fence_complete(&rq->submit); 3377 } 3378 3379 static void __guc_signal_context_fence(struct intel_context *ce) 3380 { 3381 struct i915_request *rq, *rn; 3382 3383 lockdep_assert_held(&ce->guc_state.lock); 3384 3385 if (!list_empty(&ce->guc_state.fences)) 3386 trace_intel_context_fence_release(ce); 3387 3388 /* 3389 * Use an IRQ to ensure locking order of sched_engine->lock -> 3390 * ce->guc_state.lock is preserved. 3391 */ 3392 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3393 guc_fence_link) { 3394 list_del(&rq->guc_fence_link); 3395 irq_work_queue(&rq->submit_work); 3396 } 3397 3398 INIT_LIST_HEAD(&ce->guc_state.fences); 3399 } 3400 3401 static void guc_signal_context_fence(struct intel_context *ce) 3402 { 3403 unsigned long flags; 3404 3405 GEM_BUG_ON(intel_context_is_child(ce)); 3406 3407 spin_lock_irqsave(&ce->guc_state.lock, flags); 3408 clr_context_wait_for_deregister_to_register(ce); 3409 __guc_signal_context_fence(ce); 3410 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3411 } 3412 3413 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3414 { 3415 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3416 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3417 !submission_disabled(ce_to_guc(ce)); 3418 } 3419 3420 static void guc_context_init(struct intel_context *ce) 3421 { 3422 const struct i915_gem_context *ctx; 3423 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3424 3425 rcu_read_lock(); 3426 ctx = rcu_dereference(ce->gem_context); 3427 if (ctx) 3428 prio = ctx->sched.priority; 3429 rcu_read_unlock(); 3430 3431 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3432 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3433 } 3434 3435 static int guc_request_alloc(struct i915_request *rq) 3436 { 3437 struct intel_context *ce = request_to_scheduling_context(rq); 3438 struct intel_guc *guc = ce_to_guc(ce); 3439 unsigned long flags; 3440 int ret; 3441 3442 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3443 3444 /* 3445 * Flush enough space to reduce the likelihood of waiting after 3446 * we start building the request - in which case we will just 3447 * have to repeat work. 3448 */ 3449 rq->reserved_space += GUC_REQUEST_SIZE; 3450 3451 /* 3452 * Note that after this point, we have committed to using 3453 * this request as it is being used to both track the 3454 * state of engine initialisation and liveness of the 3455 * golden renderstate above. Think twice before you try 3456 * to cancel/unwind this request now. 3457 */ 3458 3459 /* Unconditionally invalidate GPU caches and TLBs. */ 3460 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3461 if (ret) 3462 return ret; 3463 3464 rq->reserved_space -= GUC_REQUEST_SIZE; 3465 3466 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3467 guc_context_init(ce); 3468 3469 /* 3470 * Call pin_guc_id here rather than in the pinning step as with 3471 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3472 * guc_id and creating horrible race conditions. This is especially bad 3473 * when guc_id are being stolen due to over subscription. By the time 3474 * this function is reached, it is guaranteed that the guc_id will be 3475 * persistent until the generated request is retired. Thus, sealing these 3476 * race conditions. It is still safe to fail here if guc_id are 3477 * exhausted and return -EAGAIN to the user indicating that they can try 3478 * again in the future. 3479 * 3480 * There is no need for a lock here as the timeline mutex ensures at 3481 * most one context can be executing this code path at once. The 3482 * guc_id_ref is incremented once for every request in flight and 3483 * decremented on each retire. When it is zero, a lock around the 3484 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3485 */ 3486 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3487 goto out; 3488 3489 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3490 if (unlikely(ret < 0)) 3491 return ret; 3492 if (context_needs_register(ce, !!ret)) { 3493 ret = try_context_registration(ce, true); 3494 if (unlikely(ret)) { /* unwind */ 3495 if (ret == -EPIPE) { 3496 disable_submission(guc); 3497 goto out; /* GPU will be reset */ 3498 } 3499 atomic_dec(&ce->guc_id.ref); 3500 unpin_guc_id(guc, ce); 3501 return ret; 3502 } 3503 } 3504 3505 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3506 3507 out: 3508 /* 3509 * We block all requests on this context if a G2H is pending for a 3510 * schedule disable or context deregistration as the GuC will fail a 3511 * schedule enable or context registration if either G2H is pending 3512 * respectfully. Once a G2H returns, the fence is released that is 3513 * blocking these requests (see guc_signal_context_fence). 3514 */ 3515 spin_lock_irqsave(&ce->guc_state.lock, flags); 3516 if (context_wait_for_deregister_to_register(ce) || 3517 context_pending_disable(ce)) { 3518 init_irq_work(&rq->submit_work, submit_work_cb); 3519 i915_sw_fence_await(&rq->submit); 3520 3521 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3522 } 3523 incr_context_committed_requests(ce); 3524 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3525 3526 return 0; 3527 } 3528 3529 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3530 struct i915_gem_ww_ctx *ww, 3531 void **vaddr) 3532 { 3533 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3534 3535 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3536 } 3537 3538 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3539 { 3540 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3541 int ret = __guc_context_pin(ce, engine, vaddr); 3542 intel_engine_mask_t tmp, mask = ce->engine->mask; 3543 3544 if (likely(!ret)) 3545 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3546 intel_engine_pm_get(engine); 3547 3548 return ret; 3549 } 3550 3551 static void guc_virtual_context_unpin(struct intel_context *ce) 3552 { 3553 intel_engine_mask_t tmp, mask = ce->engine->mask; 3554 struct intel_engine_cs *engine; 3555 struct intel_guc *guc = ce_to_guc(ce); 3556 3557 GEM_BUG_ON(context_enabled(ce)); 3558 GEM_BUG_ON(intel_context_is_barrier(ce)); 3559 3560 unpin_guc_id(guc, ce); 3561 lrc_unpin(ce); 3562 3563 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3564 intel_engine_pm_put_async(engine); 3565 } 3566 3567 static void guc_virtual_context_enter(struct intel_context *ce) 3568 { 3569 intel_engine_mask_t tmp, mask = ce->engine->mask; 3570 struct intel_engine_cs *engine; 3571 3572 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3573 intel_engine_pm_get(engine); 3574 3575 intel_timeline_enter(ce->timeline); 3576 } 3577 3578 static void guc_virtual_context_exit(struct intel_context *ce) 3579 { 3580 intel_engine_mask_t tmp, mask = ce->engine->mask; 3581 struct intel_engine_cs *engine; 3582 3583 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3584 intel_engine_pm_put(engine); 3585 3586 intel_timeline_exit(ce->timeline); 3587 } 3588 3589 static int guc_virtual_context_alloc(struct intel_context *ce) 3590 { 3591 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3592 3593 return lrc_alloc(ce, engine); 3594 } 3595 3596 static const struct intel_context_ops virtual_guc_context_ops = { 3597 .alloc = guc_virtual_context_alloc, 3598 3599 .pre_pin = guc_virtual_context_pre_pin, 3600 .pin = guc_virtual_context_pin, 3601 .unpin = guc_virtual_context_unpin, 3602 .post_unpin = guc_context_post_unpin, 3603 3604 .revoke = guc_context_revoke, 3605 3606 .cancel_request = guc_context_cancel_request, 3607 3608 .enter = guc_virtual_context_enter, 3609 .exit = guc_virtual_context_exit, 3610 3611 .sched_disable = guc_context_sched_disable, 3612 3613 .destroy = guc_context_destroy, 3614 3615 .get_sibling = guc_virtual_get_sibling, 3616 }; 3617 3618 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3619 { 3620 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3621 struct intel_guc *guc = ce_to_guc(ce); 3622 int ret; 3623 3624 GEM_BUG_ON(!intel_context_is_parent(ce)); 3625 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3626 3627 ret = pin_guc_id(guc, ce); 3628 if (unlikely(ret < 0)) 3629 return ret; 3630 3631 return __guc_context_pin(ce, engine, vaddr); 3632 } 3633 3634 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3635 { 3636 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3637 3638 GEM_BUG_ON(!intel_context_is_child(ce)); 3639 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3640 3641 __intel_context_pin(ce->parallel.parent); 3642 return __guc_context_pin(ce, engine, vaddr); 3643 } 3644 3645 static void guc_parent_context_unpin(struct intel_context *ce) 3646 { 3647 struct intel_guc *guc = ce_to_guc(ce); 3648 3649 GEM_BUG_ON(context_enabled(ce)); 3650 GEM_BUG_ON(intel_context_is_barrier(ce)); 3651 GEM_BUG_ON(!intel_context_is_parent(ce)); 3652 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3653 3654 unpin_guc_id(guc, ce); 3655 lrc_unpin(ce); 3656 } 3657 3658 static void guc_child_context_unpin(struct intel_context *ce) 3659 { 3660 GEM_BUG_ON(context_enabled(ce)); 3661 GEM_BUG_ON(intel_context_is_barrier(ce)); 3662 GEM_BUG_ON(!intel_context_is_child(ce)); 3663 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3664 3665 lrc_unpin(ce); 3666 } 3667 3668 static void guc_child_context_post_unpin(struct intel_context *ce) 3669 { 3670 GEM_BUG_ON(!intel_context_is_child(ce)); 3671 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3672 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3673 3674 lrc_post_unpin(ce); 3675 intel_context_unpin(ce->parallel.parent); 3676 } 3677 3678 static void guc_child_context_destroy(struct kref *kref) 3679 { 3680 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3681 3682 __guc_context_destroy(ce); 3683 } 3684 3685 static const struct intel_context_ops virtual_parent_context_ops = { 3686 .alloc = guc_virtual_context_alloc, 3687 3688 .pre_pin = guc_context_pre_pin, 3689 .pin = guc_parent_context_pin, 3690 .unpin = guc_parent_context_unpin, 3691 .post_unpin = guc_context_post_unpin, 3692 3693 .revoke = guc_context_revoke, 3694 3695 .cancel_request = guc_context_cancel_request, 3696 3697 .enter = guc_virtual_context_enter, 3698 .exit = guc_virtual_context_exit, 3699 3700 .sched_disable = guc_context_sched_disable, 3701 3702 .destroy = guc_context_destroy, 3703 3704 .get_sibling = guc_virtual_get_sibling, 3705 }; 3706 3707 static const struct intel_context_ops virtual_child_context_ops = { 3708 .alloc = guc_virtual_context_alloc, 3709 3710 .pre_pin = guc_context_pre_pin, 3711 .pin = guc_child_context_pin, 3712 .unpin = guc_child_context_unpin, 3713 .post_unpin = guc_child_context_post_unpin, 3714 3715 .cancel_request = guc_context_cancel_request, 3716 3717 .enter = guc_virtual_context_enter, 3718 .exit = guc_virtual_context_exit, 3719 3720 .destroy = guc_child_context_destroy, 3721 3722 .get_sibling = guc_virtual_get_sibling, 3723 }; 3724 3725 /* 3726 * The below override of the breadcrumbs is enabled when the user configures a 3727 * context for parallel submission (multi-lrc, parent-child). 3728 * 3729 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3730 * safely preempt all the hw contexts configured for parallel submission 3731 * between each BB. The contract between the i915 and GuC is if the parent 3732 * context can be preempted, all the children can be preempted, and the GuC will 3733 * always try to preempt the parent before the children. A handshake between the 3734 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3735 * creating a window to preempt between each set of BBs. 3736 */ 3737 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3738 u64 offset, u32 len, 3739 const unsigned int flags); 3740 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3741 u64 offset, u32 len, 3742 const unsigned int flags); 3743 static u32 * 3744 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3745 u32 *cs); 3746 static u32 * 3747 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3748 u32 *cs); 3749 3750 static struct intel_context * 3751 guc_create_parallel(struct intel_engine_cs **engines, 3752 unsigned int num_siblings, 3753 unsigned int width) 3754 { 3755 struct intel_engine_cs **siblings = NULL; 3756 struct intel_context *parent = NULL, *ce, *err; 3757 int i, j; 3758 3759 siblings = kmalloc_array(num_siblings, 3760 sizeof(*siblings), 3761 GFP_KERNEL); 3762 if (!siblings) 3763 return ERR_PTR(-ENOMEM); 3764 3765 for (i = 0; i < width; ++i) { 3766 for (j = 0; j < num_siblings; ++j) 3767 siblings[j] = engines[i * num_siblings + j]; 3768 3769 ce = intel_engine_create_virtual(siblings, num_siblings, 3770 FORCE_VIRTUAL); 3771 if (IS_ERR(ce)) { 3772 err = ERR_CAST(ce); 3773 goto unwind; 3774 } 3775 3776 if (i == 0) { 3777 parent = ce; 3778 parent->ops = &virtual_parent_context_ops; 3779 } else { 3780 ce->ops = &virtual_child_context_ops; 3781 intel_context_bind_parent_child(parent, ce); 3782 } 3783 } 3784 3785 parent->parallel.fence_context = dma_fence_context_alloc(1); 3786 3787 parent->engine->emit_bb_start = 3788 emit_bb_start_parent_no_preempt_mid_batch; 3789 parent->engine->emit_fini_breadcrumb = 3790 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3791 parent->engine->emit_fini_breadcrumb_dw = 3792 12 + 4 * parent->parallel.number_children; 3793 for_each_child(parent, ce) { 3794 ce->engine->emit_bb_start = 3795 emit_bb_start_child_no_preempt_mid_batch; 3796 ce->engine->emit_fini_breadcrumb = 3797 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3798 ce->engine->emit_fini_breadcrumb_dw = 16; 3799 } 3800 3801 kfree(siblings); 3802 return parent; 3803 3804 unwind: 3805 if (parent) 3806 intel_context_put(parent); 3807 kfree(siblings); 3808 return err; 3809 } 3810 3811 static bool 3812 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3813 { 3814 struct intel_engine_cs *sibling; 3815 intel_engine_mask_t tmp, mask = b->engine_mask; 3816 bool result = false; 3817 3818 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3819 result |= intel_engine_irq_enable(sibling); 3820 3821 return result; 3822 } 3823 3824 static void 3825 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3826 { 3827 struct intel_engine_cs *sibling; 3828 intel_engine_mask_t tmp, mask = b->engine_mask; 3829 3830 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3831 intel_engine_irq_disable(sibling); 3832 } 3833 3834 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3835 { 3836 int i; 3837 3838 /* 3839 * In GuC submission mode we do not know which physical engine a request 3840 * will be scheduled on, this creates a problem because the breadcrumb 3841 * interrupt is per physical engine. To work around this we attach 3842 * requests and direct all breadcrumb interrupts to the first instance 3843 * of an engine per class. In addition all breadcrumb interrupts are 3844 * enabled / disabled across an engine class in unison. 3845 */ 3846 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3847 struct intel_engine_cs *sibling = 3848 engine->gt->engine_class[engine->class][i]; 3849 3850 if (sibling) { 3851 if (engine->breadcrumbs != sibling->breadcrumbs) { 3852 intel_breadcrumbs_put(engine->breadcrumbs); 3853 engine->breadcrumbs = 3854 intel_breadcrumbs_get(sibling->breadcrumbs); 3855 } 3856 break; 3857 } 3858 } 3859 3860 if (engine->breadcrumbs) { 3861 engine->breadcrumbs->engine_mask |= engine->mask; 3862 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3863 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3864 } 3865 } 3866 3867 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3868 int prio) 3869 { 3870 struct intel_context *ce = request_to_scheduling_context(rq); 3871 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3872 3873 /* Short circuit function */ 3874 if (prio < I915_PRIORITY_NORMAL || 3875 rq->guc_prio == GUC_PRIO_FINI || 3876 (rq->guc_prio != GUC_PRIO_INIT && 3877 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3878 return; 3879 3880 spin_lock(&ce->guc_state.lock); 3881 if (rq->guc_prio != GUC_PRIO_FINI) { 3882 if (rq->guc_prio != GUC_PRIO_INIT) 3883 sub_context_inflight_prio(ce, rq->guc_prio); 3884 rq->guc_prio = new_guc_prio; 3885 add_context_inflight_prio(ce, rq->guc_prio); 3886 update_context_prio(ce); 3887 } 3888 spin_unlock(&ce->guc_state.lock); 3889 } 3890 3891 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3892 { 3893 struct intel_context *ce = request_to_scheduling_context(rq); 3894 3895 spin_lock(&ce->guc_state.lock); 3896 guc_prio_fini(rq, ce); 3897 spin_unlock(&ce->guc_state.lock); 3898 } 3899 3900 static void sanitize_hwsp(struct intel_engine_cs *engine) 3901 { 3902 struct intel_timeline *tl; 3903 3904 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3905 intel_timeline_reset_seqno(tl); 3906 } 3907 3908 static void guc_sanitize(struct intel_engine_cs *engine) 3909 { 3910 /* 3911 * Poison residual state on resume, in case the suspend didn't! 3912 * 3913 * We have to assume that across suspend/resume (or other loss 3914 * of control) that the contents of our pinned buffers has been 3915 * lost, replaced by garbage. Since this doesn't always happen, 3916 * let's poison such state so that we more quickly spot when 3917 * we falsely assume it has been preserved. 3918 */ 3919 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3920 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3921 3922 /* 3923 * The kernel_context HWSP is stored in the status_page. As above, 3924 * that may be lost on resume/initialisation, and so we need to 3925 * reset the value in the HWSP. 3926 */ 3927 sanitize_hwsp(engine); 3928 3929 /* And scrub the dirty cachelines for the HWSP */ 3930 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 3931 3932 intel_engine_reset_pinned_contexts(engine); 3933 } 3934 3935 static void setup_hwsp(struct intel_engine_cs *engine) 3936 { 3937 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 3938 3939 ENGINE_WRITE_FW(engine, 3940 RING_HWS_PGA, 3941 i915_ggtt_offset(engine->status_page.vma)); 3942 } 3943 3944 static void start_engine(struct intel_engine_cs *engine) 3945 { 3946 ENGINE_WRITE_FW(engine, 3947 RING_MODE_GEN7, 3948 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 3949 3950 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 3951 ENGINE_POSTING_READ(engine, RING_MI_MODE); 3952 } 3953 3954 static int guc_resume(struct intel_engine_cs *engine) 3955 { 3956 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 3957 3958 intel_mocs_init_engine(engine); 3959 3960 intel_breadcrumbs_reset(engine->breadcrumbs); 3961 3962 setup_hwsp(engine); 3963 start_engine(engine); 3964 3965 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 3966 xehp_enable_ccs_engines(engine); 3967 3968 return 0; 3969 } 3970 3971 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 3972 { 3973 return !sched_engine->tasklet.callback; 3974 } 3975 3976 static void guc_set_default_submission(struct intel_engine_cs *engine) 3977 { 3978 engine->submit_request = guc_submit_request; 3979 } 3980 3981 static inline void guc_kernel_context_pin(struct intel_guc *guc, 3982 struct intel_context *ce) 3983 { 3984 /* 3985 * Note: we purposefully do not check the returns below because 3986 * the registration can only fail if a reset is just starting. 3987 * This is called at the end of reset so presumably another reset 3988 * isn't happening and even it did this code would be run again. 3989 */ 3990 3991 if (context_guc_id_invalid(ce)) 3992 pin_guc_id(guc, ce); 3993 3994 try_context_registration(ce, true); 3995 } 3996 3997 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 3998 { 3999 struct intel_gt *gt = guc_to_gt(guc); 4000 struct intel_engine_cs *engine; 4001 enum intel_engine_id id; 4002 4003 /* make sure all descriptors are clean... */ 4004 xa_destroy(&guc->context_lookup); 4005 4006 /* 4007 * Some contexts might have been pinned before we enabled GuC 4008 * submission, so we need to add them to the GuC bookeeping. 4009 * Also, after a reset the of the GuC we want to make sure that the 4010 * information shared with GuC is properly reset. The kernel LRCs are 4011 * not attached to the gem_context, so they need to be added separately. 4012 */ 4013 for_each_engine(engine, gt, id) { 4014 struct intel_context *ce; 4015 4016 list_for_each_entry(ce, &engine->pinned_contexts_list, 4017 pinned_contexts_link) 4018 guc_kernel_context_pin(guc, ce); 4019 } 4020 } 4021 4022 static void guc_release(struct intel_engine_cs *engine) 4023 { 4024 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4025 4026 intel_engine_cleanup_common(engine); 4027 lrc_fini_wa_ctx(engine); 4028 } 4029 4030 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4031 { 4032 struct intel_engine_cs *e; 4033 intel_engine_mask_t tmp, mask = engine->mask; 4034 4035 for_each_engine_masked(e, engine->gt, mask, tmp) 4036 e->serial++; 4037 } 4038 4039 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4040 { 4041 /* Default vfuncs which can be overridden by each engine. */ 4042 4043 engine->resume = guc_resume; 4044 4045 engine->cops = &guc_context_ops; 4046 engine->request_alloc = guc_request_alloc; 4047 engine->add_active_request = add_to_context; 4048 engine->remove_active_request = remove_from_context; 4049 4050 engine->sched_engine->schedule = i915_schedule; 4051 4052 engine->reset.prepare = guc_engine_reset_prepare; 4053 engine->reset.rewind = guc_rewind_nop; 4054 engine->reset.cancel = guc_reset_nop; 4055 engine->reset.finish = guc_reset_nop; 4056 4057 engine->emit_flush = gen8_emit_flush_xcs; 4058 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4059 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4060 if (GRAPHICS_VER(engine->i915) >= 12) { 4061 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4062 engine->emit_flush = gen12_emit_flush_xcs; 4063 } 4064 engine->set_default_submission = guc_set_default_submission; 4065 engine->busyness = guc_engine_busyness; 4066 4067 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4068 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4069 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4070 4071 /* Wa_14014475959:dg2 */ 4072 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) 4073 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4074 4075 /* 4076 * TODO: GuC supports timeslicing and semaphores as well, but they're 4077 * handled by the firmware so some minor tweaks are required before 4078 * enabling. 4079 * 4080 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4081 */ 4082 4083 engine->emit_bb_start = gen8_emit_bb_start; 4084 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4085 engine->emit_bb_start = gen125_emit_bb_start; 4086 } 4087 4088 static void rcs_submission_override(struct intel_engine_cs *engine) 4089 { 4090 switch (GRAPHICS_VER(engine->i915)) { 4091 case 12: 4092 engine->emit_flush = gen12_emit_flush_rcs; 4093 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4094 break; 4095 case 11: 4096 engine->emit_flush = gen11_emit_flush_rcs; 4097 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4098 break; 4099 default: 4100 engine->emit_flush = gen8_emit_flush_rcs; 4101 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4102 break; 4103 } 4104 } 4105 4106 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4107 { 4108 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4109 intel_engine_set_irq_handler(engine, cs_irq_handler); 4110 } 4111 4112 static void guc_sched_engine_destroy(struct kref *kref) 4113 { 4114 struct i915_sched_engine *sched_engine = 4115 container_of(kref, typeof(*sched_engine), ref); 4116 struct intel_guc *guc = sched_engine->private_data; 4117 4118 guc->sched_engine = NULL; 4119 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4120 kfree(sched_engine); 4121 } 4122 4123 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4124 { 4125 struct drm_i915_private *i915 = engine->i915; 4126 struct intel_guc *guc = &engine->gt->uc.guc; 4127 4128 /* 4129 * The setup relies on several assumptions (e.g. irqs always enabled) 4130 * that are only valid on gen11+ 4131 */ 4132 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4133 4134 if (!guc->sched_engine) { 4135 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4136 if (!guc->sched_engine) 4137 return -ENOMEM; 4138 4139 guc->sched_engine->schedule = i915_schedule; 4140 guc->sched_engine->disabled = guc_sched_engine_disabled; 4141 guc->sched_engine->private_data = guc; 4142 guc->sched_engine->destroy = guc_sched_engine_destroy; 4143 guc->sched_engine->bump_inflight_request_prio = 4144 guc_bump_inflight_request_prio; 4145 guc->sched_engine->retire_inflight_request_prio = 4146 guc_retire_inflight_request_prio; 4147 tasklet_setup(&guc->sched_engine->tasklet, 4148 guc_submission_tasklet); 4149 } 4150 i915_sched_engine_put(engine->sched_engine); 4151 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4152 4153 guc_default_vfuncs(engine); 4154 guc_default_irqs(engine); 4155 guc_init_breadcrumbs(engine); 4156 4157 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4158 rcs_submission_override(engine); 4159 4160 lrc_init_wa_ctx(engine); 4161 4162 /* Finally, take ownership and responsibility for cleanup! */ 4163 engine->sanitize = guc_sanitize; 4164 engine->release = guc_release; 4165 4166 return 0; 4167 } 4168 4169 void intel_guc_submission_enable(struct intel_guc *guc) 4170 { 4171 struct intel_gt *gt = guc_to_gt(guc); 4172 4173 /* Enable and route to GuC */ 4174 if (GRAPHICS_VER(gt->i915) >= 12) 4175 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 4176 GUC_SEM_INTR_ROUTE_TO_GUC | 4177 GUC_SEM_INTR_ENABLE_ALL); 4178 4179 guc_init_lrc_mapping(guc); 4180 guc_init_engine_stats(guc); 4181 } 4182 4183 void intel_guc_submission_disable(struct intel_guc *guc) 4184 { 4185 struct intel_gt *gt = guc_to_gt(guc); 4186 4187 /* Note: By the time we're here, GuC may have already been reset */ 4188 4189 /* Disable and route to host */ 4190 if (GRAPHICS_VER(gt->i915) >= 12) 4191 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); 4192 } 4193 4194 static bool __guc_submission_supported(struct intel_guc *guc) 4195 { 4196 /* GuC submission is unavailable for pre-Gen11 */ 4197 return intel_guc_is_supported(guc) && 4198 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4199 } 4200 4201 static bool __guc_submission_selected(struct intel_guc *guc) 4202 { 4203 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4204 4205 if (!intel_guc_submission_is_supported(guc)) 4206 return false; 4207 4208 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4209 } 4210 4211 void intel_guc_submission_init_early(struct intel_guc *guc) 4212 { 4213 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4214 4215 spin_lock_init(&guc->submission_state.lock); 4216 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4217 ida_init(&guc->submission_state.guc_ids); 4218 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4219 INIT_WORK(&guc->submission_state.destroyed_worker, 4220 destroyed_worker_func); 4221 INIT_WORK(&guc->submission_state.reset_fail_worker, 4222 reset_fail_worker_func); 4223 4224 spin_lock_init(&guc->timestamp.lock); 4225 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4226 4227 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4228 guc->submission_supported = __guc_submission_supported(guc); 4229 guc->submission_selected = __guc_submission_selected(guc); 4230 } 4231 4232 static inline struct intel_context * 4233 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4234 { 4235 struct intel_context *ce; 4236 4237 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4238 drm_err(&guc_to_gt(guc)->i915->drm, 4239 "Invalid ctx_id %u\n", ctx_id); 4240 return NULL; 4241 } 4242 4243 ce = __get_context(guc, ctx_id); 4244 if (unlikely(!ce)) { 4245 drm_err(&guc_to_gt(guc)->i915->drm, 4246 "Context is NULL, ctx_id %u\n", ctx_id); 4247 return NULL; 4248 } 4249 4250 if (unlikely(intel_context_is_child(ce))) { 4251 drm_err(&guc_to_gt(guc)->i915->drm, 4252 "Context is child, ctx_id %u\n", ctx_id); 4253 return NULL; 4254 } 4255 4256 return ce; 4257 } 4258 4259 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4260 const u32 *msg, 4261 u32 len) 4262 { 4263 struct intel_context *ce; 4264 u32 ctx_id; 4265 4266 if (unlikely(len < 1)) { 4267 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4268 return -EPROTO; 4269 } 4270 ctx_id = msg[0]; 4271 4272 ce = g2h_context_lookup(guc, ctx_id); 4273 if (unlikely(!ce)) 4274 return -EPROTO; 4275 4276 trace_intel_context_deregister_done(ce); 4277 4278 #ifdef CONFIG_DRM_I915_SELFTEST 4279 if (unlikely(ce->drop_deregister)) { 4280 ce->drop_deregister = false; 4281 return 0; 4282 } 4283 #endif 4284 4285 if (context_wait_for_deregister_to_register(ce)) { 4286 struct intel_runtime_pm *runtime_pm = 4287 &ce->engine->gt->i915->runtime_pm; 4288 intel_wakeref_t wakeref; 4289 4290 /* 4291 * Previous owner of this guc_id has been deregistered, now safe 4292 * register this context. 4293 */ 4294 with_intel_runtime_pm(runtime_pm, wakeref) 4295 register_context(ce, true); 4296 guc_signal_context_fence(ce); 4297 intel_context_put(ce); 4298 } else if (context_destroyed(ce)) { 4299 /* Context has been destroyed */ 4300 intel_gt_pm_put_async(guc_to_gt(guc)); 4301 release_guc_id(guc, ce); 4302 __guc_context_destroy(ce); 4303 } 4304 4305 decr_outstanding_submission_g2h(guc); 4306 4307 return 0; 4308 } 4309 4310 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4311 const u32 *msg, 4312 u32 len) 4313 { 4314 struct intel_context *ce; 4315 unsigned long flags; 4316 u32 ctx_id; 4317 4318 if (unlikely(len < 2)) { 4319 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4320 return -EPROTO; 4321 } 4322 ctx_id = msg[0]; 4323 4324 ce = g2h_context_lookup(guc, ctx_id); 4325 if (unlikely(!ce)) 4326 return -EPROTO; 4327 4328 if (unlikely(context_destroyed(ce) || 4329 (!context_pending_enable(ce) && 4330 !context_pending_disable(ce)))) { 4331 drm_err(&guc_to_gt(guc)->i915->drm, 4332 "Bad context sched_state 0x%x, ctx_id %u\n", 4333 ce->guc_state.sched_state, ctx_id); 4334 return -EPROTO; 4335 } 4336 4337 trace_intel_context_sched_done(ce); 4338 4339 if (context_pending_enable(ce)) { 4340 #ifdef CONFIG_DRM_I915_SELFTEST 4341 if (unlikely(ce->drop_schedule_enable)) { 4342 ce->drop_schedule_enable = false; 4343 return 0; 4344 } 4345 #endif 4346 4347 spin_lock_irqsave(&ce->guc_state.lock, flags); 4348 clr_context_pending_enable(ce); 4349 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4350 } else if (context_pending_disable(ce)) { 4351 bool banned; 4352 4353 #ifdef CONFIG_DRM_I915_SELFTEST 4354 if (unlikely(ce->drop_schedule_disable)) { 4355 ce->drop_schedule_disable = false; 4356 return 0; 4357 } 4358 #endif 4359 4360 /* 4361 * Unpin must be done before __guc_signal_context_fence, 4362 * otherwise a race exists between the requests getting 4363 * submitted + retired before this unpin completes resulting in 4364 * the pin_count going to zero and the context still being 4365 * enabled. 4366 */ 4367 intel_context_sched_disable_unpin(ce); 4368 4369 spin_lock_irqsave(&ce->guc_state.lock, flags); 4370 banned = context_banned(ce); 4371 clr_context_banned(ce); 4372 clr_context_pending_disable(ce); 4373 __guc_signal_context_fence(ce); 4374 guc_blocked_fence_complete(ce); 4375 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4376 4377 if (banned) { 4378 guc_cancel_context_requests(ce); 4379 intel_engine_signal_breadcrumbs(ce->engine); 4380 } 4381 } 4382 4383 decr_outstanding_submission_g2h(guc); 4384 intel_context_put(ce); 4385 4386 return 0; 4387 } 4388 4389 static void capture_error_state(struct intel_guc *guc, 4390 struct intel_context *ce) 4391 { 4392 struct intel_gt *gt = guc_to_gt(guc); 4393 struct drm_i915_private *i915 = gt->i915; 4394 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4395 intel_wakeref_t wakeref; 4396 4397 intel_engine_set_hung_context(engine, ce); 4398 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4399 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4400 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4401 } 4402 4403 static void guc_context_replay(struct intel_context *ce) 4404 { 4405 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4406 4407 __guc_reset_context(ce, ce->engine->mask); 4408 tasklet_hi_schedule(&sched_engine->tasklet); 4409 } 4410 4411 static void guc_handle_context_reset(struct intel_guc *guc, 4412 struct intel_context *ce) 4413 { 4414 trace_intel_context_reset(ce); 4415 4416 if (likely(!intel_context_is_banned(ce))) { 4417 capture_error_state(guc, ce); 4418 guc_context_replay(ce); 4419 } else { 4420 drm_info(&guc_to_gt(guc)->i915->drm, 4421 "Ignoring context reset notification of banned context 0x%04X on %s", 4422 ce->guc_id.id, ce->engine->name); 4423 } 4424 } 4425 4426 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4427 const u32 *msg, u32 len) 4428 { 4429 struct intel_context *ce; 4430 unsigned long flags; 4431 int ctx_id; 4432 4433 if (unlikely(len != 1)) { 4434 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4435 return -EPROTO; 4436 } 4437 4438 ctx_id = msg[0]; 4439 4440 /* 4441 * The context lookup uses the xarray but lookups only require an RCU lock 4442 * not the full spinlock. So take the lock explicitly and keep it until the 4443 * context has been reference count locked to ensure it can't be destroyed 4444 * asynchronously until the reset is done. 4445 */ 4446 xa_lock_irqsave(&guc->context_lookup, flags); 4447 ce = g2h_context_lookup(guc, ctx_id); 4448 if (ce) 4449 intel_context_get(ce); 4450 xa_unlock_irqrestore(&guc->context_lookup, flags); 4451 4452 if (unlikely(!ce)) 4453 return -EPROTO; 4454 4455 guc_handle_context_reset(guc, ce); 4456 intel_context_put(ce); 4457 4458 return 0; 4459 } 4460 4461 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4462 const u32 *msg, u32 len) 4463 { 4464 u32 status; 4465 4466 if (unlikely(len != 1)) { 4467 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4468 return -EPROTO; 4469 } 4470 4471 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4472 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4473 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4474 4475 intel_guc_capture_process(guc); 4476 4477 return 0; 4478 } 4479 4480 struct intel_engine_cs * 4481 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4482 { 4483 struct intel_gt *gt = guc_to_gt(guc); 4484 u8 engine_class = guc_class_to_engine_class(guc_class); 4485 4486 /* Class index is checked in class converter */ 4487 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4488 4489 return gt->engine_class[engine_class][instance]; 4490 } 4491 4492 static void reset_fail_worker_func(struct work_struct *w) 4493 { 4494 struct intel_guc *guc = container_of(w, struct intel_guc, 4495 submission_state.reset_fail_worker); 4496 struct intel_gt *gt = guc_to_gt(guc); 4497 intel_engine_mask_t reset_fail_mask; 4498 unsigned long flags; 4499 4500 spin_lock_irqsave(&guc->submission_state.lock, flags); 4501 reset_fail_mask = guc->submission_state.reset_fail_mask; 4502 guc->submission_state.reset_fail_mask = 0; 4503 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4504 4505 if (likely(reset_fail_mask)) 4506 intel_gt_handle_error(gt, reset_fail_mask, 4507 I915_ERROR_CAPTURE, 4508 "GuC failed to reset engine mask=0x%x\n", 4509 reset_fail_mask); 4510 } 4511 4512 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4513 const u32 *msg, u32 len) 4514 { 4515 struct intel_engine_cs *engine; 4516 struct intel_gt *gt = guc_to_gt(guc); 4517 u8 guc_class, instance; 4518 u32 reason; 4519 unsigned long flags; 4520 4521 if (unlikely(len != 3)) { 4522 drm_err(>->i915->drm, "Invalid length %u", len); 4523 return -EPROTO; 4524 } 4525 4526 guc_class = msg[0]; 4527 instance = msg[1]; 4528 reason = msg[2]; 4529 4530 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4531 if (unlikely(!engine)) { 4532 drm_err(>->i915->drm, 4533 "Invalid engine %d:%d", guc_class, instance); 4534 return -EPROTO; 4535 } 4536 4537 /* 4538 * This is an unexpected failure of a hardware feature. So, log a real 4539 * error message not just the informational that comes with the reset. 4540 */ 4541 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4542 guc_class, instance, engine->name, reason); 4543 4544 spin_lock_irqsave(&guc->submission_state.lock, flags); 4545 guc->submission_state.reset_fail_mask |= engine->mask; 4546 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4547 4548 /* 4549 * A GT reset flushes this worker queue (G2H handler) so we must use 4550 * another worker to trigger a GT reset. 4551 */ 4552 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4553 4554 return 0; 4555 } 4556 4557 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4558 { 4559 struct intel_guc *guc = &engine->gt->uc.guc; 4560 struct intel_context *ce; 4561 struct i915_request *rq; 4562 unsigned long index; 4563 unsigned long flags; 4564 4565 /* Reset called during driver load? GuC not yet initialised! */ 4566 if (unlikely(!guc_submission_initialized(guc))) 4567 return; 4568 4569 xa_lock_irqsave(&guc->context_lookup, flags); 4570 xa_for_each(&guc->context_lookup, index, ce) { 4571 if (!kref_get_unless_zero(&ce->ref)) 4572 continue; 4573 4574 xa_unlock(&guc->context_lookup); 4575 4576 if (!intel_context_is_pinned(ce)) 4577 goto next; 4578 4579 if (intel_engine_is_virtual(ce->engine)) { 4580 if (!(ce->engine->mask & engine->mask)) 4581 goto next; 4582 } else { 4583 if (ce->engine != engine) 4584 goto next; 4585 } 4586 4587 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4588 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4589 continue; 4590 4591 intel_engine_set_hung_context(engine, ce); 4592 4593 /* Can only cope with one hang at a time... */ 4594 intel_context_put(ce); 4595 xa_lock(&guc->context_lookup); 4596 goto done; 4597 } 4598 next: 4599 intel_context_put(ce); 4600 xa_lock(&guc->context_lookup); 4601 } 4602 done: 4603 xa_unlock_irqrestore(&guc->context_lookup, flags); 4604 } 4605 4606 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4607 struct i915_request *hung_rq, 4608 struct drm_printer *m) 4609 { 4610 struct intel_guc *guc = &engine->gt->uc.guc; 4611 struct intel_context *ce; 4612 unsigned long index; 4613 unsigned long flags; 4614 4615 /* Reset called during driver load? GuC not yet initialised! */ 4616 if (unlikely(!guc_submission_initialized(guc))) 4617 return; 4618 4619 xa_lock_irqsave(&guc->context_lookup, flags); 4620 xa_for_each(&guc->context_lookup, index, ce) { 4621 if (!kref_get_unless_zero(&ce->ref)) 4622 continue; 4623 4624 xa_unlock(&guc->context_lookup); 4625 4626 if (!intel_context_is_pinned(ce)) 4627 goto next; 4628 4629 if (intel_engine_is_virtual(ce->engine)) { 4630 if (!(ce->engine->mask & engine->mask)) 4631 goto next; 4632 } else { 4633 if (ce->engine != engine) 4634 goto next; 4635 } 4636 4637 spin_lock(&ce->guc_state.lock); 4638 intel_engine_dump_active_requests(&ce->guc_state.requests, 4639 hung_rq, m); 4640 spin_unlock(&ce->guc_state.lock); 4641 4642 next: 4643 intel_context_put(ce); 4644 xa_lock(&guc->context_lookup); 4645 } 4646 xa_unlock_irqrestore(&guc->context_lookup, flags); 4647 } 4648 4649 void intel_guc_submission_print_info(struct intel_guc *guc, 4650 struct drm_printer *p) 4651 { 4652 struct i915_sched_engine *sched_engine = guc->sched_engine; 4653 struct rb_node *rb; 4654 unsigned long flags; 4655 4656 if (!sched_engine) 4657 return; 4658 4659 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4660 atomic_read(&guc->outstanding_submission_g2h)); 4661 drm_printf(p, "GuC tasklet count: %u\n\n", 4662 atomic_read(&sched_engine->tasklet.count)); 4663 4664 spin_lock_irqsave(&sched_engine->lock, flags); 4665 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4666 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4667 struct i915_priolist *pl = to_priolist(rb); 4668 struct i915_request *rq; 4669 4670 priolist_for_each_request(rq, pl) 4671 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4672 rq->context->guc_id.id, 4673 rq->fence.seqno); 4674 } 4675 spin_unlock_irqrestore(&sched_engine->lock, flags); 4676 drm_printf(p, "\n"); 4677 } 4678 4679 static inline void guc_log_context_priority(struct drm_printer *p, 4680 struct intel_context *ce) 4681 { 4682 int i; 4683 4684 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4685 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4686 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4687 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4688 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4689 i, ce->guc_state.prio_count[i]); 4690 } 4691 drm_printf(p, "\n"); 4692 } 4693 4694 static inline void guc_log_context(struct drm_printer *p, 4695 struct intel_context *ce) 4696 { 4697 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4698 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4699 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4700 ce->ring->head, 4701 ce->lrc_reg_state[CTX_RING_HEAD]); 4702 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4703 ce->ring->tail, 4704 ce->lrc_reg_state[CTX_RING_TAIL]); 4705 drm_printf(p, "\t\tContext Pin Count: %u\n", 4706 atomic_read(&ce->pin_count)); 4707 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4708 atomic_read(&ce->guc_id.ref)); 4709 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 4710 ce->guc_state.sched_state); 4711 } 4712 4713 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4714 struct drm_printer *p) 4715 { 4716 struct intel_context *ce; 4717 unsigned long index; 4718 unsigned long flags; 4719 4720 xa_lock_irqsave(&guc->context_lookup, flags); 4721 xa_for_each(&guc->context_lookup, index, ce) { 4722 GEM_BUG_ON(intel_context_is_child(ce)); 4723 4724 guc_log_context(p, ce); 4725 guc_log_context_priority(p, ce); 4726 4727 if (intel_context_is_parent(ce)) { 4728 struct intel_context *child; 4729 4730 drm_printf(p, "\t\tNumber children: %u\n", 4731 ce->parallel.number_children); 4732 4733 if (ce->parallel.guc.wq_status) { 4734 drm_printf(p, "\t\tWQI Head: %u\n", 4735 READ_ONCE(*ce->parallel.guc.wq_head)); 4736 drm_printf(p, "\t\tWQI Tail: %u\n", 4737 READ_ONCE(*ce->parallel.guc.wq_tail)); 4738 drm_printf(p, "\t\tWQI Status: %u\n\n", 4739 READ_ONCE(*ce->parallel.guc.wq_status)); 4740 } 4741 4742 if (ce->engine->emit_bb_start == 4743 emit_bb_start_parent_no_preempt_mid_batch) { 4744 u8 i; 4745 4746 drm_printf(p, "\t\tChildren Go: %u\n\n", 4747 get_children_go_value(ce)); 4748 for (i = 0; i < ce->parallel.number_children; ++i) 4749 drm_printf(p, "\t\tChildren Join: %u\n", 4750 get_children_join_value(ce, i)); 4751 } 4752 4753 for_each_child(ce, child) 4754 guc_log_context(p, child); 4755 } 4756 } 4757 xa_unlock_irqrestore(&guc->context_lookup, flags); 4758 } 4759 4760 static inline u32 get_children_go_addr(struct intel_context *ce) 4761 { 4762 GEM_BUG_ON(!intel_context_is_parent(ce)); 4763 4764 return i915_ggtt_offset(ce->state) + 4765 __get_parent_scratch_offset(ce) + 4766 offsetof(struct parent_scratch, go.semaphore); 4767 } 4768 4769 static inline u32 get_children_join_addr(struct intel_context *ce, 4770 u8 child_index) 4771 { 4772 GEM_BUG_ON(!intel_context_is_parent(ce)); 4773 4774 return i915_ggtt_offset(ce->state) + 4775 __get_parent_scratch_offset(ce) + 4776 offsetof(struct parent_scratch, join[child_index].semaphore); 4777 } 4778 4779 #define PARENT_GO_BB 1 4780 #define PARENT_GO_FINI_BREADCRUMB 0 4781 #define CHILD_GO_BB 1 4782 #define CHILD_GO_FINI_BREADCRUMB 0 4783 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4784 u64 offset, u32 len, 4785 const unsigned int flags) 4786 { 4787 struct intel_context *ce = rq->context; 4788 u32 *cs; 4789 u8 i; 4790 4791 GEM_BUG_ON(!intel_context_is_parent(ce)); 4792 4793 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 4794 if (IS_ERR(cs)) 4795 return PTR_ERR(cs); 4796 4797 /* Wait on children */ 4798 for (i = 0; i < ce->parallel.number_children; ++i) { 4799 *cs++ = (MI_SEMAPHORE_WAIT | 4800 MI_SEMAPHORE_GLOBAL_GTT | 4801 MI_SEMAPHORE_POLL | 4802 MI_SEMAPHORE_SAD_EQ_SDD); 4803 *cs++ = PARENT_GO_BB; 4804 *cs++ = get_children_join_addr(ce, i); 4805 *cs++ = 0; 4806 } 4807 4808 /* Turn off preemption */ 4809 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4810 *cs++ = MI_NOOP; 4811 4812 /* Tell children go */ 4813 cs = gen8_emit_ggtt_write(cs, 4814 CHILD_GO_BB, 4815 get_children_go_addr(ce), 4816 0); 4817 4818 /* Jump to batch */ 4819 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4820 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4821 *cs++ = lower_32_bits(offset); 4822 *cs++ = upper_32_bits(offset); 4823 *cs++ = MI_NOOP; 4824 4825 intel_ring_advance(rq, cs); 4826 4827 return 0; 4828 } 4829 4830 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4831 u64 offset, u32 len, 4832 const unsigned int flags) 4833 { 4834 struct intel_context *ce = rq->context; 4835 struct intel_context *parent = intel_context_to_parent(ce); 4836 u32 *cs; 4837 4838 GEM_BUG_ON(!intel_context_is_child(ce)); 4839 4840 cs = intel_ring_begin(rq, 12); 4841 if (IS_ERR(cs)) 4842 return PTR_ERR(cs); 4843 4844 /* Signal parent */ 4845 cs = gen8_emit_ggtt_write(cs, 4846 PARENT_GO_BB, 4847 get_children_join_addr(parent, 4848 ce->parallel.child_index), 4849 0); 4850 4851 /* Wait on parent for go */ 4852 *cs++ = (MI_SEMAPHORE_WAIT | 4853 MI_SEMAPHORE_GLOBAL_GTT | 4854 MI_SEMAPHORE_POLL | 4855 MI_SEMAPHORE_SAD_EQ_SDD); 4856 *cs++ = CHILD_GO_BB; 4857 *cs++ = get_children_go_addr(parent); 4858 *cs++ = 0; 4859 4860 /* Turn off preemption */ 4861 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4862 4863 /* Jump to batch */ 4864 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4865 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4866 *cs++ = lower_32_bits(offset); 4867 *cs++ = upper_32_bits(offset); 4868 4869 intel_ring_advance(rq, cs); 4870 4871 return 0; 4872 } 4873 4874 static u32 * 4875 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4876 u32 *cs) 4877 { 4878 struct intel_context *ce = rq->context; 4879 u8 i; 4880 4881 GEM_BUG_ON(!intel_context_is_parent(ce)); 4882 4883 /* Wait on children */ 4884 for (i = 0; i < ce->parallel.number_children; ++i) { 4885 *cs++ = (MI_SEMAPHORE_WAIT | 4886 MI_SEMAPHORE_GLOBAL_GTT | 4887 MI_SEMAPHORE_POLL | 4888 MI_SEMAPHORE_SAD_EQ_SDD); 4889 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4890 *cs++ = get_children_join_addr(ce, i); 4891 *cs++ = 0; 4892 } 4893 4894 /* Turn on preemption */ 4895 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4896 *cs++ = MI_NOOP; 4897 4898 /* Tell children go */ 4899 cs = gen8_emit_ggtt_write(cs, 4900 CHILD_GO_FINI_BREADCRUMB, 4901 get_children_go_addr(ce), 4902 0); 4903 4904 return cs; 4905 } 4906 4907 /* 4908 * If this true, a submission of multi-lrc requests had an error and the 4909 * requests need to be skipped. The front end (execuf IOCTL) should've called 4910 * i915_request_skip which squashes the BB but we still need to emit the fini 4911 * breadrcrumbs seqno write. At this point we don't know how many of the 4912 * requests in the multi-lrc submission were generated so we can't do the 4913 * handshake between the parent and children (e.g. if 4 requests should be 4914 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4915 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4916 * has occurred on any of the requests in submission / relationship. 4917 */ 4918 static inline bool skip_handshake(struct i915_request *rq) 4919 { 4920 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4921 } 4922 4923 #define NON_SKIP_LEN 6 4924 static u32 * 4925 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4926 u32 *cs) 4927 { 4928 struct intel_context *ce = rq->context; 4929 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4930 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4931 4932 GEM_BUG_ON(!intel_context_is_parent(ce)); 4933 4934 if (unlikely(skip_handshake(rq))) { 4935 /* 4936 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4937 * the NON_SKIP_LEN comes from the length of the emits below. 4938 */ 4939 memset(cs, 0, sizeof(u32) * 4940 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4941 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4942 } else { 4943 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 4944 } 4945 4946 /* Emit fini breadcrumb */ 4947 before_fini_breadcrumb_user_interrupt_cs = cs; 4948 cs = gen8_emit_ggtt_write(cs, 4949 rq->fence.seqno, 4950 i915_request_active_timeline(rq)->hwsp_offset, 4951 0); 4952 4953 /* User interrupt */ 4954 *cs++ = MI_USER_INTERRUPT; 4955 *cs++ = MI_NOOP; 4956 4957 /* Ensure our math for skip + emit is correct */ 4958 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 4959 cs); 4960 GEM_BUG_ON(start_fini_breadcrumb_cs + 4961 ce->engine->emit_fini_breadcrumb_dw != cs); 4962 4963 rq->tail = intel_ring_offset(rq, cs); 4964 4965 return cs; 4966 } 4967 4968 static u32 * 4969 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4970 u32 *cs) 4971 { 4972 struct intel_context *ce = rq->context; 4973 struct intel_context *parent = intel_context_to_parent(ce); 4974 4975 GEM_BUG_ON(!intel_context_is_child(ce)); 4976 4977 /* Turn on preemption */ 4978 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4979 *cs++ = MI_NOOP; 4980 4981 /* Signal parent */ 4982 cs = gen8_emit_ggtt_write(cs, 4983 PARENT_GO_FINI_BREADCRUMB, 4984 get_children_join_addr(parent, 4985 ce->parallel.child_index), 4986 0); 4987 4988 /* Wait parent on for go */ 4989 *cs++ = (MI_SEMAPHORE_WAIT | 4990 MI_SEMAPHORE_GLOBAL_GTT | 4991 MI_SEMAPHORE_POLL | 4992 MI_SEMAPHORE_SAD_EQ_SDD); 4993 *cs++ = CHILD_GO_FINI_BREADCRUMB; 4994 *cs++ = get_children_go_addr(parent); 4995 *cs++ = 0; 4996 4997 return cs; 4998 } 4999 5000 static u32 * 5001 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5002 u32 *cs) 5003 { 5004 struct intel_context *ce = rq->context; 5005 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5006 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5007 5008 GEM_BUG_ON(!intel_context_is_child(ce)); 5009 5010 if (unlikely(skip_handshake(rq))) { 5011 /* 5012 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5013 * the NON_SKIP_LEN comes from the length of the emits below. 5014 */ 5015 memset(cs, 0, sizeof(u32) * 5016 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5017 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5018 } else { 5019 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5020 } 5021 5022 /* Emit fini breadcrumb */ 5023 before_fini_breadcrumb_user_interrupt_cs = cs; 5024 cs = gen8_emit_ggtt_write(cs, 5025 rq->fence.seqno, 5026 i915_request_active_timeline(rq)->hwsp_offset, 5027 0); 5028 5029 /* User interrupt */ 5030 *cs++ = MI_USER_INTERRUPT; 5031 *cs++ = MI_NOOP; 5032 5033 /* Ensure our math for skip + emit is correct */ 5034 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5035 cs); 5036 GEM_BUG_ON(start_fini_breadcrumb_cs + 5037 ce->engine->emit_fini_breadcrumb_dw != cs); 5038 5039 rq->tail = intel_ring_offset(rq, cs); 5040 5041 return cs; 5042 } 5043 5044 #undef NON_SKIP_LEN 5045 5046 static struct intel_context * 5047 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5048 unsigned long flags) 5049 { 5050 struct guc_virtual_engine *ve; 5051 struct intel_guc *guc; 5052 unsigned int n; 5053 int err; 5054 5055 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5056 if (!ve) 5057 return ERR_PTR(-ENOMEM); 5058 5059 guc = &siblings[0]->gt->uc.guc; 5060 5061 ve->base.i915 = siblings[0]->i915; 5062 ve->base.gt = siblings[0]->gt; 5063 ve->base.uncore = siblings[0]->uncore; 5064 ve->base.id = -1; 5065 5066 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5067 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5068 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5069 ve->base.saturated = ALL_ENGINES; 5070 5071 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5072 5073 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5074 5075 ve->base.cops = &virtual_guc_context_ops; 5076 ve->base.request_alloc = guc_request_alloc; 5077 ve->base.bump_serial = virtual_guc_bump_serial; 5078 5079 ve->base.submit_request = guc_submit_request; 5080 5081 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5082 5083 intel_context_init(&ve->context, &ve->base); 5084 5085 for (n = 0; n < count; n++) { 5086 struct intel_engine_cs *sibling = siblings[n]; 5087 5088 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5089 if (sibling->mask & ve->base.mask) { 5090 DRM_DEBUG("duplicate %s entry in load balancer\n", 5091 sibling->name); 5092 err = -EINVAL; 5093 goto err_put; 5094 } 5095 5096 ve->base.mask |= sibling->mask; 5097 ve->base.logical_mask |= sibling->logical_mask; 5098 5099 if (n != 0 && ve->base.class != sibling->class) { 5100 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 5101 sibling->class, ve->base.class); 5102 err = -EINVAL; 5103 goto err_put; 5104 } else if (n == 0) { 5105 ve->base.class = sibling->class; 5106 ve->base.uabi_class = sibling->uabi_class; 5107 snprintf(ve->base.name, sizeof(ve->base.name), 5108 "v%dx%d", ve->base.class, count); 5109 ve->base.context_size = sibling->context_size; 5110 5111 ve->base.add_active_request = 5112 sibling->add_active_request; 5113 ve->base.remove_active_request = 5114 sibling->remove_active_request; 5115 ve->base.emit_bb_start = sibling->emit_bb_start; 5116 ve->base.emit_flush = sibling->emit_flush; 5117 ve->base.emit_init_breadcrumb = 5118 sibling->emit_init_breadcrumb; 5119 ve->base.emit_fini_breadcrumb = 5120 sibling->emit_fini_breadcrumb; 5121 ve->base.emit_fini_breadcrumb_dw = 5122 sibling->emit_fini_breadcrumb_dw; 5123 ve->base.breadcrumbs = 5124 intel_breadcrumbs_get(sibling->breadcrumbs); 5125 5126 ve->base.flags |= sibling->flags; 5127 5128 ve->base.props.timeslice_duration_ms = 5129 sibling->props.timeslice_duration_ms; 5130 ve->base.props.preempt_timeout_ms = 5131 sibling->props.preempt_timeout_ms; 5132 } 5133 } 5134 5135 return &ve->context; 5136 5137 err_put: 5138 intel_context_put(&ve->context); 5139 return ERR_PTR(err); 5140 } 5141 5142 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5143 { 5144 struct intel_engine_cs *engine; 5145 intel_engine_mask_t tmp, mask = ve->mask; 5146 5147 for_each_engine_masked(engine, ve->gt, mask, tmp) 5148 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5149 return true; 5150 5151 return false; 5152 } 5153 5154 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5155 #include "selftest_guc.c" 5156 #include "selftest_guc_multi_lrc.c" 5157 #include "selftest_guc_hangcheck.c" 5158 #endif 5159