1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_heartbeat.h" 13 #include "gt/intel_engine_pm.h" 14 #include "gt/intel_engine_regs.h" 15 #include "gt/intel_gpu_commands.h" 16 #include "gt/intel_gt.h" 17 #include "gt/intel_gt_clock_utils.h" 18 #include "gt/intel_gt_irq.h" 19 #include "gt/intel_gt_pm.h" 20 #include "gt/intel_gt_regs.h" 21 #include "gt/intel_gt_requests.h" 22 #include "gt/intel_lrc.h" 23 #include "gt/intel_lrc_reg.h" 24 #include "gt/intel_mocs.h" 25 #include "gt/intel_ring.h" 26 27 #include "intel_guc_ads.h" 28 #include "intel_guc_submission.h" 29 30 #include "i915_drv.h" 31 #include "i915_trace.h" 32 33 /** 34 * DOC: GuC-based command submission 35 * 36 * The Scratch registers: 37 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 38 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 39 * triggers an interrupt on the GuC via another register write (0xC4C8). 40 * Firmware writes a success/fail code back to the action register after 41 * processes the request. The kernel driver polls waiting for this update and 42 * then proceeds. 43 * 44 * Command Transport buffers (CTBs): 45 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 46 * - G2H) are a message interface between the i915 and GuC. 47 * 48 * Context registration: 49 * Before a context can be submitted it must be registered with the GuC via a 50 * H2G. A unique guc_id is associated with each context. The context is either 51 * registered at request creation time (normal operation) or at submission time 52 * (abnormal operation, e.g. after a reset). 53 * 54 * Context submission: 55 * The i915 updates the LRC tail value in memory. The i915 must enable the 56 * scheduling of the context within the GuC for the GuC to actually consider it. 57 * Therefore, the first time a disabled context is submitted we use a schedule 58 * enable H2G, while follow up submissions are done via the context submit H2G, 59 * which informs the GuC that a previously enabled context has new work 60 * available. 61 * 62 * Context unpin: 63 * To unpin a context a H2G is used to disable scheduling. When the 64 * corresponding G2H returns indicating the scheduling disable operation has 65 * completed it is safe to unpin the context. While a disable is in flight it 66 * isn't safe to resubmit the context so a fence is used to stall all future 67 * requests of that context until the G2H is returned. 68 * 69 * Context deregistration: 70 * Before a context can be destroyed or if we steal its guc_id we must 71 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 72 * safe to submit anything to this guc_id until the deregister completes so a 73 * fence is used to stall all requests associated with this guc_id until the 74 * corresponding G2H returns indicating the guc_id has been deregistered. 75 * 76 * submission_state.guc_ids: 77 * Unique number associated with private GuC context data passed in during 78 * context registration / submission / deregistration. 64k available. Simple ida 79 * is used for allocation. 80 * 81 * Stealing guc_ids: 82 * If no guc_ids are available they can be stolen from another context at 83 * request creation time if that context is unpinned. If a guc_id can't be found 84 * we punt this problem to the user as we believe this is near impossible to hit 85 * during normal use cases. 86 * 87 * Locking: 88 * In the GuC submission code we have 3 basic spin locks which protect 89 * everything. Details about each below. 90 * 91 * sched_engine->lock 92 * This is the submission lock for all contexts that share an i915 schedule 93 * engine (sched_engine), thus only one of the contexts which share a 94 * sched_engine can be submitting at a time. Currently only one sched_engine is 95 * used for all of GuC submission but that could change in the future. 96 * 97 * guc->submission_state.lock 98 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 99 * list. 100 * 101 * ce->guc_state.lock 102 * Protects everything under ce->guc_state. Ensures that a context is in the 103 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 104 * on a disabled context (bad idea), we don't issue a schedule enable when a 105 * schedule disable is in flight, etc... Also protects list of inflight requests 106 * on the context and the priority management state. Lock is individual to each 107 * context. 108 * 109 * Lock ordering rules: 110 * sched_engine->lock -> ce->guc_state.lock 111 * guc->submission_state.lock -> ce->guc_state.lock 112 * 113 * Reset races: 114 * When a full GT reset is triggered it is assumed that some G2H responses to 115 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 116 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 117 * contexts, release guc_ids, etc...). When this occurs we can scrub the 118 * context state and cleanup appropriately, however this is quite racey. 119 * To avoid races, the reset code must disable submission before scrubbing for 120 * the missing G2H, while the submission code must check for submission being 121 * disabled and skip sending H2Gs and updating context states when it is. Both 122 * sides must also make sure to hold the relevant locks. 123 */ 124 125 /* GuC Virtual Engine */ 126 struct guc_virtual_engine { 127 struct intel_engine_cs base; 128 struct intel_context context; 129 }; 130 131 static struct intel_context * 132 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 133 unsigned long flags); 134 135 static struct intel_context * 136 guc_create_parallel(struct intel_engine_cs **engines, 137 unsigned int num_siblings, 138 unsigned int width); 139 140 #define GUC_REQUEST_SIZE 64 /* bytes */ 141 142 /* 143 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 144 * per the GuC submission interface. A different allocation algorithm is used 145 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 146 * partition the guc_id space. We believe the number of multi-lrc contexts in 147 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 148 * multi-lrc. 149 */ 150 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 151 ((guc)->submission_state.num_guc_ids / 16) 152 153 /* 154 * Below is a set of functions which control the GuC scheduling state which 155 * require a lock. 156 */ 157 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 158 #define SCHED_STATE_DESTROYED BIT(1) 159 #define SCHED_STATE_PENDING_DISABLE BIT(2) 160 #define SCHED_STATE_BANNED BIT(3) 161 #define SCHED_STATE_ENABLED BIT(4) 162 #define SCHED_STATE_PENDING_ENABLE BIT(5) 163 #define SCHED_STATE_REGISTERED BIT(6) 164 #define SCHED_STATE_BLOCKED_SHIFT 7 165 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 166 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 167 168 static inline void init_sched_state(struct intel_context *ce) 169 { 170 lockdep_assert_held(&ce->guc_state.lock); 171 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 172 } 173 174 __maybe_unused 175 static bool sched_state_is_init(struct intel_context *ce) 176 { 177 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ 178 return !(ce->guc_state.sched_state & 179 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 180 } 181 182 static inline bool 183 context_wait_for_deregister_to_register(struct intel_context *ce) 184 { 185 return ce->guc_state.sched_state & 186 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 187 } 188 189 static inline void 190 set_context_wait_for_deregister_to_register(struct intel_context *ce) 191 { 192 lockdep_assert_held(&ce->guc_state.lock); 193 ce->guc_state.sched_state |= 194 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 195 } 196 197 static inline void 198 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 199 { 200 lockdep_assert_held(&ce->guc_state.lock); 201 ce->guc_state.sched_state &= 202 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 203 } 204 205 static inline bool 206 context_destroyed(struct intel_context *ce) 207 { 208 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 209 } 210 211 static inline void 212 set_context_destroyed(struct intel_context *ce) 213 { 214 lockdep_assert_held(&ce->guc_state.lock); 215 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 216 } 217 218 static inline bool context_pending_disable(struct intel_context *ce) 219 { 220 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 221 } 222 223 static inline void set_context_pending_disable(struct intel_context *ce) 224 { 225 lockdep_assert_held(&ce->guc_state.lock); 226 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 227 } 228 229 static inline void clr_context_pending_disable(struct intel_context *ce) 230 { 231 lockdep_assert_held(&ce->guc_state.lock); 232 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 233 } 234 235 static inline bool context_banned(struct intel_context *ce) 236 { 237 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 238 } 239 240 static inline void set_context_banned(struct intel_context *ce) 241 { 242 lockdep_assert_held(&ce->guc_state.lock); 243 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 244 } 245 246 static inline void clr_context_banned(struct intel_context *ce) 247 { 248 lockdep_assert_held(&ce->guc_state.lock); 249 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 250 } 251 252 static inline bool context_enabled(struct intel_context *ce) 253 { 254 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 255 } 256 257 static inline void set_context_enabled(struct intel_context *ce) 258 { 259 lockdep_assert_held(&ce->guc_state.lock); 260 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 261 } 262 263 static inline void clr_context_enabled(struct intel_context *ce) 264 { 265 lockdep_assert_held(&ce->guc_state.lock); 266 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 267 } 268 269 static inline bool context_pending_enable(struct intel_context *ce) 270 { 271 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 272 } 273 274 static inline void set_context_pending_enable(struct intel_context *ce) 275 { 276 lockdep_assert_held(&ce->guc_state.lock); 277 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 278 } 279 280 static inline void clr_context_pending_enable(struct intel_context *ce) 281 { 282 lockdep_assert_held(&ce->guc_state.lock); 283 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 284 } 285 286 static inline bool context_registered(struct intel_context *ce) 287 { 288 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 289 } 290 291 static inline void set_context_registered(struct intel_context *ce) 292 { 293 lockdep_assert_held(&ce->guc_state.lock); 294 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 295 } 296 297 static inline void clr_context_registered(struct intel_context *ce) 298 { 299 lockdep_assert_held(&ce->guc_state.lock); 300 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 301 } 302 303 static inline u32 context_blocked(struct intel_context *ce) 304 { 305 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 306 SCHED_STATE_BLOCKED_SHIFT; 307 } 308 309 static inline void incr_context_blocked(struct intel_context *ce) 310 { 311 lockdep_assert_held(&ce->guc_state.lock); 312 313 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 314 315 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 316 } 317 318 static inline void decr_context_blocked(struct intel_context *ce) 319 { 320 lockdep_assert_held(&ce->guc_state.lock); 321 322 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 323 324 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 325 } 326 327 static inline bool context_has_committed_requests(struct intel_context *ce) 328 { 329 return !!ce->guc_state.number_committed_requests; 330 } 331 332 static inline void incr_context_committed_requests(struct intel_context *ce) 333 { 334 lockdep_assert_held(&ce->guc_state.lock); 335 ++ce->guc_state.number_committed_requests; 336 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 337 } 338 339 static inline void decr_context_committed_requests(struct intel_context *ce) 340 { 341 lockdep_assert_held(&ce->guc_state.lock); 342 --ce->guc_state.number_committed_requests; 343 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 344 } 345 346 static struct intel_context * 347 request_to_scheduling_context(struct i915_request *rq) 348 { 349 return intel_context_to_parent(rq->context); 350 } 351 352 static inline bool context_guc_id_invalid(struct intel_context *ce) 353 { 354 return ce->guc_id.id == GUC_INVALID_LRC_ID; 355 } 356 357 static inline void set_context_guc_id_invalid(struct intel_context *ce) 358 { 359 ce->guc_id.id = GUC_INVALID_LRC_ID; 360 } 361 362 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 363 { 364 return &ce->engine->gt->uc.guc; 365 } 366 367 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 368 { 369 return rb_entry(rb, struct i915_priolist, node); 370 } 371 372 /* 373 * When using multi-lrc submission a scratch memory area is reserved in the 374 * parent's context state for the process descriptor, work queue, and handshake 375 * between the parent + children contexts to insert safe preemption points 376 * between each of the BBs. Currently the scratch area is sized to a page. 377 * 378 * The layout of this scratch area is below: 379 * 0 guc_process_desc 380 * + sizeof(struct guc_process_desc) child go 381 * + CACHELINE_BYTES child join[0] 382 * ... 383 * + CACHELINE_BYTES child join[n - 1] 384 * ... unused 385 * PARENT_SCRATCH_SIZE / 2 work queue start 386 * ... work queue 387 * PARENT_SCRATCH_SIZE - 1 work queue end 388 */ 389 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 390 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 391 392 struct sync_semaphore { 393 u32 semaphore; 394 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 395 }; 396 397 struct parent_scratch { 398 struct guc_process_desc pdesc; 399 400 struct sync_semaphore go; 401 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 402 403 u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - 404 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 405 406 u32 wq[WQ_SIZE / sizeof(u32)]; 407 }; 408 409 static u32 __get_parent_scratch_offset(struct intel_context *ce) 410 { 411 GEM_BUG_ON(!ce->parallel.guc.parent_page); 412 413 return ce->parallel.guc.parent_page * PAGE_SIZE; 414 } 415 416 static u32 __get_wq_offset(struct intel_context *ce) 417 { 418 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 419 420 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 421 } 422 423 static struct parent_scratch * 424 __get_parent_scratch(struct intel_context *ce) 425 { 426 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 427 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 428 429 /* 430 * Need to subtract LRC_STATE_OFFSET here as the 431 * parallel.guc.parent_page is the offset into ce->state while 432 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 433 */ 434 return (struct parent_scratch *) 435 (ce->lrc_reg_state + 436 ((__get_parent_scratch_offset(ce) - 437 LRC_STATE_OFFSET) / sizeof(u32))); 438 } 439 440 static struct guc_process_desc * 441 __get_process_desc(struct intel_context *ce) 442 { 443 struct parent_scratch *ps = __get_parent_scratch(ce); 444 445 return &ps->pdesc; 446 } 447 448 static u32 *get_wq_pointer(struct guc_process_desc *desc, 449 struct intel_context *ce, 450 u32 wqi_size) 451 { 452 /* 453 * Check for space in work queue. Caching a value of head pointer in 454 * intel_context structure in order reduce the number accesses to shared 455 * GPU memory which may be across a PCIe bus. 456 */ 457 #define AVAILABLE_SPACE \ 458 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 459 if (wqi_size > AVAILABLE_SPACE) { 460 ce->parallel.guc.wqi_head = READ_ONCE(desc->head); 461 462 if (wqi_size > AVAILABLE_SPACE) 463 return NULL; 464 } 465 #undef AVAILABLE_SPACE 466 467 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 468 } 469 470 static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) 471 { 472 struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; 473 474 GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS); 475 476 return &base[index]; 477 } 478 479 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 480 { 481 struct intel_context *ce = xa_load(&guc->context_lookup, id); 482 483 GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS); 484 485 return ce; 486 } 487 488 static int guc_lrc_desc_pool_create(struct intel_guc *guc) 489 { 490 u32 size; 491 int ret; 492 493 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * 494 GUC_MAX_LRC_DESCRIPTORS); 495 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, 496 (void **)&guc->lrc_desc_pool_vaddr); 497 if (ret) 498 return ret; 499 500 return 0; 501 } 502 503 static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) 504 { 505 guc->lrc_desc_pool_vaddr = NULL; 506 i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); 507 } 508 509 static inline bool guc_submission_initialized(struct intel_guc *guc) 510 { 511 return !!guc->lrc_desc_pool_vaddr; 512 } 513 514 static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) 515 { 516 if (likely(guc_submission_initialized(guc))) { 517 struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); 518 unsigned long flags; 519 520 memset(desc, 0, sizeof(*desc)); 521 522 /* 523 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 524 * the lower level functions directly. 525 */ 526 xa_lock_irqsave(&guc->context_lookup, flags); 527 __xa_erase(&guc->context_lookup, id); 528 xa_unlock_irqrestore(&guc->context_lookup, flags); 529 } 530 } 531 532 static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) 533 { 534 return __get_context(guc, id); 535 } 536 537 static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, 538 struct intel_context *ce) 539 { 540 unsigned long flags; 541 542 /* 543 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 544 * lower level functions directly. 545 */ 546 xa_lock_irqsave(&guc->context_lookup, flags); 547 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 548 xa_unlock_irqrestore(&guc->context_lookup, flags); 549 } 550 551 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 552 { 553 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 554 wake_up_all(&guc->ct.wq); 555 } 556 557 static int guc_submission_send_busy_loop(struct intel_guc *guc, 558 const u32 *action, 559 u32 len, 560 u32 g2h_len_dw, 561 bool loop) 562 { 563 /* 564 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 565 * so we don't handle the case where we don't get a reply because we 566 * aborted the send due to the channel being busy. 567 */ 568 GEM_BUG_ON(g2h_len_dw && !loop); 569 570 if (g2h_len_dw) 571 atomic_inc(&guc->outstanding_submission_g2h); 572 573 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 574 } 575 576 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 577 atomic_t *wait_var, 578 bool interruptible, 579 long timeout) 580 { 581 const int state = interruptible ? 582 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 583 DEFINE_WAIT(wait); 584 585 might_sleep(); 586 GEM_BUG_ON(timeout < 0); 587 588 if (!atomic_read(wait_var)) 589 return 0; 590 591 if (!timeout) 592 return -ETIME; 593 594 for (;;) { 595 prepare_to_wait(&guc->ct.wq, &wait, state); 596 597 if (!atomic_read(wait_var)) 598 break; 599 600 if (signal_pending_state(state, current)) { 601 timeout = -EINTR; 602 break; 603 } 604 605 if (!timeout) { 606 timeout = -ETIME; 607 break; 608 } 609 610 timeout = io_schedule_timeout(timeout); 611 } 612 finish_wait(&guc->ct.wq, &wait); 613 614 return (timeout < 0) ? timeout : 0; 615 } 616 617 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 618 { 619 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 620 return 0; 621 622 return intel_guc_wait_for_pending_msg(guc, 623 &guc->outstanding_submission_g2h, 624 true, timeout); 625 } 626 627 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); 628 629 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 630 { 631 int err = 0; 632 struct intel_context *ce = request_to_scheduling_context(rq); 633 u32 action[3]; 634 int len = 0; 635 u32 g2h_len_dw = 0; 636 bool enabled; 637 638 lockdep_assert_held(&rq->engine->sched_engine->lock); 639 640 /* 641 * Corner case where requests were sitting in the priority list or a 642 * request resubmitted after the context was banned. 643 */ 644 if (unlikely(intel_context_is_banned(ce))) { 645 i915_request_put(i915_request_mark_eio(rq)); 646 intel_engine_signal_breadcrumbs(ce->engine); 647 return 0; 648 } 649 650 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 651 GEM_BUG_ON(context_guc_id_invalid(ce)); 652 653 spin_lock(&ce->guc_state.lock); 654 655 /* 656 * The request / context will be run on the hardware when scheduling 657 * gets enabled in the unblock. For multi-lrc we still submit the 658 * context to move the LRC tails. 659 */ 660 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 661 goto out; 662 663 enabled = context_enabled(ce) || context_blocked(ce); 664 665 if (!enabled) { 666 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 667 action[len++] = ce->guc_id.id; 668 action[len++] = GUC_CONTEXT_ENABLE; 669 set_context_pending_enable(ce); 670 intel_context_get(ce); 671 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 672 } else { 673 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 674 action[len++] = ce->guc_id.id; 675 } 676 677 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 678 if (!enabled && !err) { 679 trace_intel_context_sched_enable(ce); 680 atomic_inc(&guc->outstanding_submission_g2h); 681 set_context_enabled(ce); 682 683 /* 684 * Without multi-lrc KMD does the submission step (moving the 685 * lrc tail) so enabling scheduling is sufficient to submit the 686 * context. This isn't the case in multi-lrc submission as the 687 * GuC needs to move the tails, hence the need for another H2G 688 * to submit a multi-lrc context after enabling scheduling. 689 */ 690 if (intel_context_is_parent(ce)) { 691 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 692 err = intel_guc_send_nb(guc, action, len - 1, 0); 693 } 694 } else if (!enabled) { 695 clr_context_pending_enable(ce); 696 intel_context_put(ce); 697 } 698 if (likely(!err)) 699 trace_i915_request_guc_submit(rq); 700 701 out: 702 spin_unlock(&ce->guc_state.lock); 703 return err; 704 } 705 706 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 707 { 708 int ret = __guc_add_request(guc, rq); 709 710 if (unlikely(ret == -EBUSY)) { 711 guc->stalled_request = rq; 712 guc->submission_stall_reason = STALL_ADD_REQUEST; 713 } 714 715 return ret; 716 } 717 718 static inline void guc_set_lrc_tail(struct i915_request *rq) 719 { 720 rq->context->lrc_reg_state[CTX_RING_TAIL] = 721 intel_ring_set_tail(rq->ring, rq->tail); 722 } 723 724 static inline int rq_prio(const struct i915_request *rq) 725 { 726 return rq->sched.attr.priority; 727 } 728 729 static bool is_multi_lrc_rq(struct i915_request *rq) 730 { 731 return intel_context_is_parallel(rq->context); 732 } 733 734 static bool can_merge_rq(struct i915_request *rq, 735 struct i915_request *last) 736 { 737 return request_to_scheduling_context(rq) == 738 request_to_scheduling_context(last); 739 } 740 741 static u32 wq_space_until_wrap(struct intel_context *ce) 742 { 743 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 744 } 745 746 static void write_wqi(struct guc_process_desc *desc, 747 struct intel_context *ce, 748 u32 wqi_size) 749 { 750 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 751 752 /* 753 * Ensure WQI are visible before updating tail 754 */ 755 intel_guc_write_barrier(ce_to_guc(ce)); 756 757 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 758 (WQ_SIZE - 1); 759 WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); 760 } 761 762 static int guc_wq_noop_append(struct intel_context *ce) 763 { 764 struct guc_process_desc *desc = __get_process_desc(ce); 765 u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); 766 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 767 768 if (!wqi) 769 return -EBUSY; 770 771 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 772 773 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 774 FIELD_PREP(WQ_LEN_MASK, len_dw); 775 ce->parallel.guc.wqi_tail = 0; 776 777 return 0; 778 } 779 780 static int __guc_wq_item_append(struct i915_request *rq) 781 { 782 struct intel_context *ce = request_to_scheduling_context(rq); 783 struct intel_context *child; 784 struct guc_process_desc *desc = __get_process_desc(ce); 785 unsigned int wqi_size = (ce->parallel.number_children + 4) * 786 sizeof(u32); 787 u32 *wqi; 788 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 789 int ret; 790 791 /* Ensure context is in correct state updating work queue */ 792 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 793 GEM_BUG_ON(context_guc_id_invalid(ce)); 794 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 795 GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)); 796 797 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 798 if (wqi_size > wq_space_until_wrap(ce)) { 799 ret = guc_wq_noop_append(ce); 800 if (ret) 801 return ret; 802 } 803 804 wqi = get_wq_pointer(desc, ce, wqi_size); 805 if (!wqi) 806 return -EBUSY; 807 808 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 809 810 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 811 FIELD_PREP(WQ_LEN_MASK, len_dw); 812 *wqi++ = ce->lrc.lrca; 813 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 814 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 815 *wqi++ = 0; /* fence_id */ 816 for_each_child(ce, child) 817 *wqi++ = child->ring->tail / sizeof(u64); 818 819 write_wqi(desc, ce, wqi_size); 820 821 return 0; 822 } 823 824 static int guc_wq_item_append(struct intel_guc *guc, 825 struct i915_request *rq) 826 { 827 struct intel_context *ce = request_to_scheduling_context(rq); 828 int ret = 0; 829 830 if (likely(!intel_context_is_banned(ce))) { 831 ret = __guc_wq_item_append(rq); 832 833 if (unlikely(ret == -EBUSY)) { 834 guc->stalled_request = rq; 835 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 836 } 837 } 838 839 return ret; 840 } 841 842 static bool multi_lrc_submit(struct i915_request *rq) 843 { 844 struct intel_context *ce = request_to_scheduling_context(rq); 845 846 intel_ring_set_tail(rq->ring, rq->tail); 847 848 /* 849 * We expect the front end (execbuf IOCTL) to set this flag on the last 850 * request generated from a multi-BB submission. This indicates to the 851 * backend (GuC interface) that we should submit this context thus 852 * submitting all the requests generated in parallel. 853 */ 854 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 855 intel_context_is_banned(ce); 856 } 857 858 static int guc_dequeue_one_context(struct intel_guc *guc) 859 { 860 struct i915_sched_engine * const sched_engine = guc->sched_engine; 861 struct i915_request *last = NULL; 862 bool submit = false; 863 struct rb_node *rb; 864 int ret; 865 866 lockdep_assert_held(&sched_engine->lock); 867 868 if (guc->stalled_request) { 869 submit = true; 870 last = guc->stalled_request; 871 872 switch (guc->submission_stall_reason) { 873 case STALL_REGISTER_CONTEXT: 874 goto register_context; 875 case STALL_MOVE_LRC_TAIL: 876 goto move_lrc_tail; 877 case STALL_ADD_REQUEST: 878 goto add_request; 879 default: 880 MISSING_CASE(guc->submission_stall_reason); 881 } 882 } 883 884 while ((rb = rb_first_cached(&sched_engine->queue))) { 885 struct i915_priolist *p = to_priolist(rb); 886 struct i915_request *rq, *rn; 887 888 priolist_for_each_request_consume(rq, rn, p) { 889 if (last && !can_merge_rq(rq, last)) 890 goto register_context; 891 892 list_del_init(&rq->sched.link); 893 894 __i915_request_submit(rq); 895 896 trace_i915_request_in(rq, 0); 897 last = rq; 898 899 if (is_multi_lrc_rq(rq)) { 900 /* 901 * We need to coalesce all multi-lrc requests in 902 * a relationship into a single H2G. We are 903 * guaranteed that all of these requests will be 904 * submitted sequentially. 905 */ 906 if (multi_lrc_submit(rq)) { 907 submit = true; 908 goto register_context; 909 } 910 } else { 911 submit = true; 912 } 913 } 914 915 rb_erase_cached(&p->node, &sched_engine->queue); 916 i915_priolist_free(p); 917 } 918 919 register_context: 920 if (submit) { 921 struct intel_context *ce = request_to_scheduling_context(last); 922 923 if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) && 924 !intel_context_is_banned(ce))) { 925 ret = guc_lrc_desc_pin(ce, false); 926 if (unlikely(ret == -EPIPE)) { 927 goto deadlk; 928 } else if (ret == -EBUSY) { 929 guc->stalled_request = last; 930 guc->submission_stall_reason = 931 STALL_REGISTER_CONTEXT; 932 goto schedule_tasklet; 933 } else if (ret != 0) { 934 GEM_WARN_ON(ret); /* Unexpected */ 935 goto deadlk; 936 } 937 } 938 939 move_lrc_tail: 940 if (is_multi_lrc_rq(last)) { 941 ret = guc_wq_item_append(guc, last); 942 if (ret == -EBUSY) { 943 goto schedule_tasklet; 944 } else if (ret != 0) { 945 GEM_WARN_ON(ret); /* Unexpected */ 946 goto deadlk; 947 } 948 } else { 949 guc_set_lrc_tail(last); 950 } 951 952 add_request: 953 ret = guc_add_request(guc, last); 954 if (unlikely(ret == -EPIPE)) { 955 goto deadlk; 956 } else if (ret == -EBUSY) { 957 goto schedule_tasklet; 958 } else if (ret != 0) { 959 GEM_WARN_ON(ret); /* Unexpected */ 960 goto deadlk; 961 } 962 } 963 964 guc->stalled_request = NULL; 965 guc->submission_stall_reason = STALL_NONE; 966 return submit; 967 968 deadlk: 969 sched_engine->tasklet.callback = NULL; 970 tasklet_disable_nosync(&sched_engine->tasklet); 971 return false; 972 973 schedule_tasklet: 974 tasklet_schedule(&sched_engine->tasklet); 975 return false; 976 } 977 978 static void guc_submission_tasklet(struct tasklet_struct *t) 979 { 980 struct i915_sched_engine *sched_engine = 981 from_tasklet(sched_engine, t, tasklet); 982 unsigned long flags; 983 bool loop; 984 985 spin_lock_irqsave(&sched_engine->lock, flags); 986 987 do { 988 loop = guc_dequeue_one_context(sched_engine->private_data); 989 } while (loop); 990 991 i915_sched_engine_reset_on_empty(sched_engine); 992 993 spin_unlock_irqrestore(&sched_engine->lock, flags); 994 } 995 996 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 997 { 998 if (iir & GT_RENDER_USER_INTERRUPT) 999 intel_engine_signal_breadcrumbs(engine); 1000 } 1001 1002 static void __guc_context_destroy(struct intel_context *ce); 1003 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1004 static void guc_signal_context_fence(struct intel_context *ce); 1005 static void guc_cancel_context_requests(struct intel_context *ce); 1006 static void guc_blocked_fence_complete(struct intel_context *ce); 1007 1008 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1009 { 1010 struct intel_context *ce; 1011 unsigned long index, flags; 1012 bool pending_disable, pending_enable, deregister, destroyed, banned; 1013 1014 xa_lock_irqsave(&guc->context_lookup, flags); 1015 xa_for_each(&guc->context_lookup, index, ce) { 1016 /* 1017 * Corner case where the ref count on the object is zero but and 1018 * deregister G2H was lost. In this case we don't touch the ref 1019 * count and finish the destroy of the context. 1020 */ 1021 bool do_put = kref_get_unless_zero(&ce->ref); 1022 1023 xa_unlock(&guc->context_lookup); 1024 1025 spin_lock(&ce->guc_state.lock); 1026 1027 /* 1028 * Once we are at this point submission_disabled() is guaranteed 1029 * to be visible to all callers who set the below flags (see above 1030 * flush and flushes in reset_prepare). If submission_disabled() 1031 * is set, the caller shouldn't set these flags. 1032 */ 1033 1034 destroyed = context_destroyed(ce); 1035 pending_enable = context_pending_enable(ce); 1036 pending_disable = context_pending_disable(ce); 1037 deregister = context_wait_for_deregister_to_register(ce); 1038 banned = context_banned(ce); 1039 init_sched_state(ce); 1040 1041 spin_unlock(&ce->guc_state.lock); 1042 1043 if (pending_enable || destroyed || deregister) { 1044 decr_outstanding_submission_g2h(guc); 1045 if (deregister) 1046 guc_signal_context_fence(ce); 1047 if (destroyed) { 1048 intel_gt_pm_put_async(guc_to_gt(guc)); 1049 release_guc_id(guc, ce); 1050 __guc_context_destroy(ce); 1051 } 1052 if (pending_enable || deregister) 1053 intel_context_put(ce); 1054 } 1055 1056 /* Not mutualy exclusive with above if statement. */ 1057 if (pending_disable) { 1058 guc_signal_context_fence(ce); 1059 if (banned) { 1060 guc_cancel_context_requests(ce); 1061 intel_engine_signal_breadcrumbs(ce->engine); 1062 } 1063 intel_context_sched_disable_unpin(ce); 1064 decr_outstanding_submission_g2h(guc); 1065 1066 spin_lock(&ce->guc_state.lock); 1067 guc_blocked_fence_complete(ce); 1068 spin_unlock(&ce->guc_state.lock); 1069 1070 intel_context_put(ce); 1071 } 1072 1073 if (do_put) 1074 intel_context_put(ce); 1075 xa_lock(&guc->context_lookup); 1076 } 1077 xa_unlock_irqrestore(&guc->context_lookup, flags); 1078 } 1079 1080 /* 1081 * GuC stores busyness stats for each engine at context in/out boundaries. A 1082 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1083 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1084 * GuC. 1085 * 1086 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1087 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1088 * active. For an active engine total busyness = total + (now - start), where 1089 * 'now' is the time at which the busyness is sampled. For inactive engine, 1090 * total busyness = total. 1091 * 1092 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1093 * 1094 * The start and total values provided by GuC are 32 bits and wrap around in a 1095 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1096 * increasing ns values, there is a need for this implementation to account for 1097 * overflows and extend the GuC provided values to 64 bits before returning 1098 * busyness to the user. In order to do that, a worker runs periodically at 1099 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1100 * 27 seconds for a gt clock frequency of 19.2 MHz). 1101 */ 1102 1103 #define WRAP_TIME_CLKS U32_MAX 1104 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1105 1106 static void 1107 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1108 { 1109 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1110 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1111 1112 if (new_start == lower_32_bits(*prev_start)) 1113 return; 1114 1115 /* 1116 * When gt is unparked, we update the gt timestamp and start the ping 1117 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1118 * is unparked, all switched in contexts will have a start time that is 1119 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1120 * 1121 * If neither gt_stamp nor new_start has rolled over, then the 1122 * gt_stamp_hi does not need to be adjusted, however if one of them has 1123 * rolled over, we need to adjust gt_stamp_hi accordingly. 1124 * 1125 * The below conditions address the cases of new_start rollover and 1126 * gt_stamp_last rollover respectively. 1127 */ 1128 if (new_start < gt_stamp_last && 1129 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1130 gt_stamp_hi++; 1131 1132 if (new_start > gt_stamp_last && 1133 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1134 gt_stamp_hi--; 1135 1136 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1137 } 1138 1139 #define record_read(map_, field_) \ 1140 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1141 1142 /* 1143 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1144 * we run into a race where the value read is inconsistent. Sometimes the 1145 * inconsistency is in reading the upper MSB bytes of the last_in value when 1146 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1147 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1148 * determine validity of these values. Instead we read the values multiple times 1149 * until they are consistent. In test runs, 3 attempts results in consistent 1150 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1151 * any new occurences. 1152 */ 1153 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1154 u32 *last_in, u32 *id, u32 *total) 1155 { 1156 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1157 int i = 0; 1158 1159 do { 1160 *last_in = record_read(&rec_map, last_switch_in_stamp); 1161 *id = record_read(&rec_map, current_context_index); 1162 *total = record_read(&rec_map, total_runtime); 1163 1164 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1165 record_read(&rec_map, current_context_index) == *id && 1166 record_read(&rec_map, total_runtime) == *total) 1167 break; 1168 } while (++i < 6); 1169 } 1170 1171 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1172 { 1173 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1174 struct intel_guc *guc = &engine->gt->uc.guc; 1175 u32 last_switch, ctx_id, total; 1176 1177 lockdep_assert_held(&guc->timestamp.lock); 1178 1179 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1180 1181 stats->running = ctx_id != ~0U && last_switch; 1182 if (stats->running) 1183 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1184 1185 /* 1186 * Instead of adjusting the total for overflow, just add the 1187 * difference from previous sample stats->total_gt_clks 1188 */ 1189 if (total && total != ~0U) { 1190 stats->total_gt_clks += (u32)(total - stats->prev_total); 1191 stats->prev_total = total; 1192 } 1193 } 1194 1195 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1196 { 1197 intel_wakeref_t wakeref; 1198 u32 reg, shift; 1199 1200 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1201 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1202 1203 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1204 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1205 1206 return 3 - shift; 1207 } 1208 1209 static u64 gpm_timestamp(struct intel_gt *gt) 1210 { 1211 u32 lo, hi, old_hi, loop = 0; 1212 1213 hi = intel_uncore_read(gt->uncore, MISC_STATUS1); 1214 do { 1215 lo = intel_uncore_read(gt->uncore, MISC_STATUS0); 1216 old_hi = hi; 1217 hi = intel_uncore_read(gt->uncore, MISC_STATUS1); 1218 } while (old_hi != hi && loop++ < 2); 1219 1220 return ((u64)hi << 32) | lo; 1221 } 1222 1223 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1224 { 1225 struct intel_gt *gt = guc_to_gt(guc); 1226 u32 gt_stamp_lo, gt_stamp_hi; 1227 u64 gpm_ts; 1228 1229 lockdep_assert_held(&guc->timestamp.lock); 1230 1231 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1232 gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift; 1233 gt_stamp_lo = lower_32_bits(gpm_ts); 1234 *now = ktime_get(); 1235 1236 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1237 gt_stamp_hi++; 1238 1239 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1240 } 1241 1242 /* 1243 * Unlike the execlist mode of submission total and active times are in terms of 1244 * gt clocks. The *now parameter is retained to return the cpu time at which the 1245 * busyness was sampled. 1246 */ 1247 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1248 { 1249 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1250 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1251 struct intel_gt *gt = engine->gt; 1252 struct intel_guc *guc = >->uc.guc; 1253 u64 total, gt_stamp_saved; 1254 unsigned long flags; 1255 u32 reset_count; 1256 bool in_reset; 1257 1258 spin_lock_irqsave(&guc->timestamp.lock, flags); 1259 1260 /* 1261 * If a reset happened, we risk reading partially updated engine 1262 * busyness from GuC, so we just use the driver stored copy of busyness. 1263 * Synchronize with gt reset using reset_count and the 1264 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1265 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1266 * usable by checking the flag afterwards. 1267 */ 1268 reset_count = i915_reset_count(gpu_error); 1269 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1270 1271 *now = ktime_get(); 1272 1273 /* 1274 * The active busyness depends on start_gt_clk and gt_stamp. 1275 * gt_stamp is updated by i915 only when gt is awake and the 1276 * start_gt_clk is derived from GuC state. To get a consistent 1277 * view of activity, we query the GuC state only if gt is awake. 1278 */ 1279 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1280 stats_saved = *stats; 1281 gt_stamp_saved = guc->timestamp.gt_stamp; 1282 /* 1283 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1284 * start_gt_clk' calculation below for active engines. 1285 */ 1286 guc_update_engine_gt_clks(engine); 1287 guc_update_pm_timestamp(guc, now); 1288 intel_gt_pm_put_async(gt); 1289 if (i915_reset_count(gpu_error) != reset_count) { 1290 *stats = stats_saved; 1291 guc->timestamp.gt_stamp = gt_stamp_saved; 1292 } 1293 } 1294 1295 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1296 if (stats->running) { 1297 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1298 1299 total += intel_gt_clock_interval_to_ns(gt, clk); 1300 } 1301 1302 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1303 1304 return ns_to_ktime(total); 1305 } 1306 1307 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1308 { 1309 struct intel_gt *gt = guc_to_gt(guc); 1310 struct intel_engine_cs *engine; 1311 enum intel_engine_id id; 1312 unsigned long flags; 1313 ktime_t unused; 1314 1315 cancel_delayed_work_sync(&guc->timestamp.work); 1316 1317 spin_lock_irqsave(&guc->timestamp.lock, flags); 1318 1319 guc_update_pm_timestamp(guc, &unused); 1320 for_each_engine(engine, gt, id) { 1321 guc_update_engine_gt_clks(engine); 1322 engine->stats.guc.prev_total = 0; 1323 } 1324 1325 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1326 } 1327 1328 static void __update_guc_busyness_stats(struct intel_guc *guc) 1329 { 1330 struct intel_gt *gt = guc_to_gt(guc); 1331 struct intel_engine_cs *engine; 1332 enum intel_engine_id id; 1333 unsigned long flags; 1334 ktime_t unused; 1335 1336 spin_lock_irqsave(&guc->timestamp.lock, flags); 1337 1338 guc_update_pm_timestamp(guc, &unused); 1339 for_each_engine(engine, gt, id) 1340 guc_update_engine_gt_clks(engine); 1341 1342 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1343 } 1344 1345 static void guc_timestamp_ping(struct work_struct *wrk) 1346 { 1347 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1348 timestamp.work.work); 1349 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1350 struct intel_gt *gt = guc_to_gt(guc); 1351 intel_wakeref_t wakeref; 1352 int srcu, ret; 1353 1354 /* 1355 * Synchronize with gt reset to make sure the worker does not 1356 * corrupt the engine/guc stats. 1357 */ 1358 ret = intel_gt_reset_trylock(gt, &srcu); 1359 if (ret) 1360 return; 1361 1362 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1363 __update_guc_busyness_stats(guc); 1364 1365 intel_gt_reset_unlock(gt, srcu); 1366 1367 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1368 guc->timestamp.ping_delay); 1369 } 1370 1371 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1372 { 1373 u32 offset = intel_guc_engine_usage_offset(guc); 1374 u32 action[] = { 1375 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1376 offset, 1377 0, 1378 }; 1379 1380 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1381 } 1382 1383 static void guc_init_engine_stats(struct intel_guc *guc) 1384 { 1385 struct intel_gt *gt = guc_to_gt(guc); 1386 intel_wakeref_t wakeref; 1387 1388 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1389 guc->timestamp.ping_delay); 1390 1391 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1392 int ret = guc_action_enable_usage_stats(guc); 1393 1394 if (ret) 1395 drm_err(>->i915->drm, 1396 "Failed to enable usage stats: %d!\n", ret); 1397 } 1398 } 1399 1400 void intel_guc_busyness_park(struct intel_gt *gt) 1401 { 1402 struct intel_guc *guc = >->uc.guc; 1403 1404 if (!guc_submission_initialized(guc)) 1405 return; 1406 1407 cancel_delayed_work(&guc->timestamp.work); 1408 __update_guc_busyness_stats(guc); 1409 } 1410 1411 void intel_guc_busyness_unpark(struct intel_gt *gt) 1412 { 1413 struct intel_guc *guc = >->uc.guc; 1414 unsigned long flags; 1415 ktime_t unused; 1416 1417 if (!guc_submission_initialized(guc)) 1418 return; 1419 1420 spin_lock_irqsave(&guc->timestamp.lock, flags); 1421 guc_update_pm_timestamp(guc, &unused); 1422 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1423 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1424 guc->timestamp.ping_delay); 1425 } 1426 1427 static inline bool 1428 submission_disabled(struct intel_guc *guc) 1429 { 1430 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1431 1432 return unlikely(!sched_engine || 1433 !__tasklet_is_enabled(&sched_engine->tasklet) || 1434 intel_gt_is_wedged(guc_to_gt(guc))); 1435 } 1436 1437 static void disable_submission(struct intel_guc *guc) 1438 { 1439 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1440 1441 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1442 GEM_BUG_ON(!guc->ct.enabled); 1443 __tasklet_disable_sync_once(&sched_engine->tasklet); 1444 sched_engine->tasklet.callback = NULL; 1445 } 1446 } 1447 1448 static void enable_submission(struct intel_guc *guc) 1449 { 1450 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1451 unsigned long flags; 1452 1453 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1454 sched_engine->tasklet.callback = guc_submission_tasklet; 1455 wmb(); /* Make sure callback visible */ 1456 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1457 __tasklet_enable(&sched_engine->tasklet)) { 1458 GEM_BUG_ON(!guc->ct.enabled); 1459 1460 /* And kick in case we missed a new request submission. */ 1461 tasklet_hi_schedule(&sched_engine->tasklet); 1462 } 1463 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1464 } 1465 1466 static void guc_flush_submissions(struct intel_guc *guc) 1467 { 1468 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1469 unsigned long flags; 1470 1471 spin_lock_irqsave(&sched_engine->lock, flags); 1472 spin_unlock_irqrestore(&sched_engine->lock, flags); 1473 } 1474 1475 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1476 1477 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1478 { 1479 if (unlikely(!guc_submission_initialized(guc))) { 1480 /* Reset called during driver load? GuC not yet initialised! */ 1481 return; 1482 } 1483 1484 intel_gt_park_heartbeats(guc_to_gt(guc)); 1485 disable_submission(guc); 1486 guc->interrupts.disable(guc); 1487 __reset_guc_busyness_stats(guc); 1488 1489 /* Flush IRQ handler */ 1490 spin_lock_irq(&guc_to_gt(guc)->irq_lock); 1491 spin_unlock_irq(&guc_to_gt(guc)->irq_lock); 1492 1493 guc_flush_submissions(guc); 1494 guc_flush_destroyed_contexts(guc); 1495 flush_work(&guc->ct.requests.worker); 1496 1497 scrub_guc_desc_for_outstanding_g2h(guc); 1498 } 1499 1500 static struct intel_engine_cs * 1501 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1502 { 1503 struct intel_engine_cs *engine; 1504 intel_engine_mask_t tmp, mask = ve->mask; 1505 unsigned int num_siblings = 0; 1506 1507 for_each_engine_masked(engine, ve->gt, mask, tmp) 1508 if (num_siblings++ == sibling) 1509 return engine; 1510 1511 return NULL; 1512 } 1513 1514 static inline struct intel_engine_cs * 1515 __context_to_physical_engine(struct intel_context *ce) 1516 { 1517 struct intel_engine_cs *engine = ce->engine; 1518 1519 if (intel_engine_is_virtual(engine)) 1520 engine = guc_virtual_get_sibling(engine, 0); 1521 1522 return engine; 1523 } 1524 1525 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1526 { 1527 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1528 1529 if (intel_context_is_banned(ce)) 1530 return; 1531 1532 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1533 1534 /* 1535 * We want a simple context + ring to execute the breadcrumb update. 1536 * We cannot rely on the context being intact across the GPU hang, 1537 * so clear it and rebuild just what we need for the breadcrumb. 1538 * All pending requests for this context will be zapped, and any 1539 * future request will be after userspace has had the opportunity 1540 * to recreate its own state. 1541 */ 1542 if (scrub) 1543 lrc_init_regs(ce, engine, true); 1544 1545 /* Rerun the request; its payload has been neutered (if guilty). */ 1546 lrc_update_regs(ce, engine, head); 1547 } 1548 1549 static void guc_reset_nop(struct intel_engine_cs *engine) 1550 { 1551 } 1552 1553 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1554 { 1555 } 1556 1557 static void 1558 __unwind_incomplete_requests(struct intel_context *ce) 1559 { 1560 struct i915_request *rq, *rn; 1561 struct list_head *pl; 1562 int prio = I915_PRIORITY_INVALID; 1563 struct i915_sched_engine * const sched_engine = 1564 ce->engine->sched_engine; 1565 unsigned long flags; 1566 1567 spin_lock_irqsave(&sched_engine->lock, flags); 1568 spin_lock(&ce->guc_state.lock); 1569 list_for_each_entry_safe_reverse(rq, rn, 1570 &ce->guc_state.requests, 1571 sched.link) { 1572 if (i915_request_completed(rq)) 1573 continue; 1574 1575 list_del_init(&rq->sched.link); 1576 __i915_request_unsubmit(rq); 1577 1578 /* Push the request back into the queue for later resubmission. */ 1579 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1580 if (rq_prio(rq) != prio) { 1581 prio = rq_prio(rq); 1582 pl = i915_sched_lookup_priolist(sched_engine, prio); 1583 } 1584 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1585 1586 list_add(&rq->sched.link, pl); 1587 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1588 } 1589 spin_unlock(&ce->guc_state.lock); 1590 spin_unlock_irqrestore(&sched_engine->lock, flags); 1591 } 1592 1593 static void __guc_reset_context(struct intel_context *ce, bool stalled) 1594 { 1595 bool local_stalled; 1596 struct i915_request *rq; 1597 unsigned long flags; 1598 u32 head; 1599 int i, number_children = ce->parallel.number_children; 1600 struct intel_context *parent = ce; 1601 1602 GEM_BUG_ON(intel_context_is_child(ce)); 1603 1604 intel_context_get(ce); 1605 1606 /* 1607 * GuC will implicitly mark the context as non-schedulable when it sends 1608 * the reset notification. Make sure our state reflects this change. The 1609 * context will be marked enabled on resubmission. 1610 */ 1611 spin_lock_irqsave(&ce->guc_state.lock, flags); 1612 clr_context_enabled(ce); 1613 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1614 1615 /* 1616 * For each context in the relationship find the hanging request 1617 * resetting each context / request as needed 1618 */ 1619 for (i = 0; i < number_children + 1; ++i) { 1620 if (!intel_context_is_pinned(ce)) 1621 goto next_context; 1622 1623 local_stalled = false; 1624 rq = intel_context_find_active_request(ce); 1625 if (!rq) { 1626 head = ce->ring->tail; 1627 goto out_replay; 1628 } 1629 1630 if (i915_request_started(rq)) 1631 local_stalled = true; 1632 1633 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1634 head = intel_ring_wrap(ce->ring, rq->head); 1635 1636 __i915_request_reset(rq, local_stalled && stalled); 1637 out_replay: 1638 guc_reset_state(ce, head, local_stalled && stalled); 1639 next_context: 1640 if (i != number_children) 1641 ce = list_next_entry(ce, parallel.child_link); 1642 } 1643 1644 __unwind_incomplete_requests(parent); 1645 intel_context_put(parent); 1646 } 1647 1648 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) 1649 { 1650 struct intel_context *ce; 1651 unsigned long index; 1652 unsigned long flags; 1653 1654 if (unlikely(!guc_submission_initialized(guc))) { 1655 /* Reset called during driver load? GuC not yet initialised! */ 1656 return; 1657 } 1658 1659 xa_lock_irqsave(&guc->context_lookup, flags); 1660 xa_for_each(&guc->context_lookup, index, ce) { 1661 if (!kref_get_unless_zero(&ce->ref)) 1662 continue; 1663 1664 xa_unlock(&guc->context_lookup); 1665 1666 if (intel_context_is_pinned(ce) && 1667 !intel_context_is_child(ce)) 1668 __guc_reset_context(ce, stalled); 1669 1670 intel_context_put(ce); 1671 1672 xa_lock(&guc->context_lookup); 1673 } 1674 xa_unlock_irqrestore(&guc->context_lookup, flags); 1675 1676 /* GuC is blown away, drop all references to contexts */ 1677 xa_destroy(&guc->context_lookup); 1678 } 1679 1680 static void guc_cancel_context_requests(struct intel_context *ce) 1681 { 1682 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1683 struct i915_request *rq; 1684 unsigned long flags; 1685 1686 /* Mark all executing requests as skipped. */ 1687 spin_lock_irqsave(&sched_engine->lock, flags); 1688 spin_lock(&ce->guc_state.lock); 1689 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1690 i915_request_put(i915_request_mark_eio(rq)); 1691 spin_unlock(&ce->guc_state.lock); 1692 spin_unlock_irqrestore(&sched_engine->lock, flags); 1693 } 1694 1695 static void 1696 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1697 { 1698 struct i915_request *rq, *rn; 1699 struct rb_node *rb; 1700 unsigned long flags; 1701 1702 /* Can be called during boot if GuC fails to load */ 1703 if (!sched_engine) 1704 return; 1705 1706 /* 1707 * Before we call engine->cancel_requests(), we should have exclusive 1708 * access to the submission state. This is arranged for us by the 1709 * caller disabling the interrupt generation, the tasklet and other 1710 * threads that may then access the same state, giving us a free hand 1711 * to reset state. However, we still need to let lockdep be aware that 1712 * we know this state may be accessed in hardirq context, so we 1713 * disable the irq around this manipulation and we want to keep 1714 * the spinlock focused on its duties and not accidentally conflate 1715 * coverage to the submission's irq state. (Similarly, although we 1716 * shouldn't need to disable irq around the manipulation of the 1717 * submission's irq state, we also wish to remind ourselves that 1718 * it is irq state.) 1719 */ 1720 spin_lock_irqsave(&sched_engine->lock, flags); 1721 1722 /* Flush the queued requests to the timeline list (for retiring). */ 1723 while ((rb = rb_first_cached(&sched_engine->queue))) { 1724 struct i915_priolist *p = to_priolist(rb); 1725 1726 priolist_for_each_request_consume(rq, rn, p) { 1727 list_del_init(&rq->sched.link); 1728 1729 __i915_request_submit(rq); 1730 1731 i915_request_put(i915_request_mark_eio(rq)); 1732 } 1733 1734 rb_erase_cached(&p->node, &sched_engine->queue); 1735 i915_priolist_free(p); 1736 } 1737 1738 /* Remaining _unready_ requests will be nop'ed when submitted */ 1739 1740 sched_engine->queue_priority_hint = INT_MIN; 1741 sched_engine->queue = RB_ROOT_CACHED; 1742 1743 spin_unlock_irqrestore(&sched_engine->lock, flags); 1744 } 1745 1746 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1747 { 1748 struct intel_context *ce; 1749 unsigned long index; 1750 unsigned long flags; 1751 1752 xa_lock_irqsave(&guc->context_lookup, flags); 1753 xa_for_each(&guc->context_lookup, index, ce) { 1754 if (!kref_get_unless_zero(&ce->ref)) 1755 continue; 1756 1757 xa_unlock(&guc->context_lookup); 1758 1759 if (intel_context_is_pinned(ce) && 1760 !intel_context_is_child(ce)) 1761 guc_cancel_context_requests(ce); 1762 1763 intel_context_put(ce); 1764 1765 xa_lock(&guc->context_lookup); 1766 } 1767 xa_unlock_irqrestore(&guc->context_lookup, flags); 1768 1769 guc_cancel_sched_engine_requests(guc->sched_engine); 1770 1771 /* GuC is blown away, drop all references to contexts */ 1772 xa_destroy(&guc->context_lookup); 1773 } 1774 1775 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1776 { 1777 /* Reset called during driver load or during wedge? */ 1778 if (unlikely(!guc_submission_initialized(guc) || 1779 intel_gt_is_wedged(guc_to_gt(guc)))) { 1780 return; 1781 } 1782 1783 /* 1784 * Technically possible for either of these values to be non-zero here, 1785 * but very unlikely + harmless. Regardless let's add a warn so we can 1786 * see in CI if this happens frequently / a precursor to taking down the 1787 * machine. 1788 */ 1789 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1790 atomic_set(&guc->outstanding_submission_g2h, 0); 1791 1792 intel_guc_global_policies_update(guc); 1793 enable_submission(guc); 1794 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1795 } 1796 1797 static void destroyed_worker_func(struct work_struct *w); 1798 static void reset_fail_worker_func(struct work_struct *w); 1799 1800 /* 1801 * Set up the memory resources to be shared with the GuC (via the GGTT) 1802 * at firmware loading time. 1803 */ 1804 int intel_guc_submission_init(struct intel_guc *guc) 1805 { 1806 struct intel_gt *gt = guc_to_gt(guc); 1807 int ret; 1808 1809 if (guc->lrc_desc_pool) 1810 return 0; 1811 1812 ret = guc_lrc_desc_pool_create(guc); 1813 if (ret) 1814 return ret; 1815 /* 1816 * Keep static analysers happy, let them know that we allocated the 1817 * vma after testing that it didn't exist earlier. 1818 */ 1819 GEM_BUG_ON(!guc->lrc_desc_pool); 1820 1821 guc->submission_state.guc_ids_bitmap = 1822 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1823 if (!guc->submission_state.guc_ids_bitmap) 1824 return -ENOMEM; 1825 1826 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1827 guc->timestamp.shift = gpm_timestamp_shift(gt); 1828 1829 return 0; 1830 } 1831 1832 void intel_guc_submission_fini(struct intel_guc *guc) 1833 { 1834 if (!guc->lrc_desc_pool) 1835 return; 1836 1837 guc_flush_destroyed_contexts(guc); 1838 guc_lrc_desc_pool_destroy(guc); 1839 i915_sched_engine_put(guc->sched_engine); 1840 bitmap_free(guc->submission_state.guc_ids_bitmap); 1841 } 1842 1843 static inline void queue_request(struct i915_sched_engine *sched_engine, 1844 struct i915_request *rq, 1845 int prio) 1846 { 1847 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1848 list_add_tail(&rq->sched.link, 1849 i915_sched_lookup_priolist(sched_engine, prio)); 1850 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1851 tasklet_hi_schedule(&sched_engine->tasklet); 1852 } 1853 1854 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1855 struct i915_request *rq) 1856 { 1857 int ret = 0; 1858 1859 __i915_request_submit(rq); 1860 1861 trace_i915_request_in(rq, 0); 1862 1863 if (is_multi_lrc_rq(rq)) { 1864 if (multi_lrc_submit(rq)) { 1865 ret = guc_wq_item_append(guc, rq); 1866 if (!ret) 1867 ret = guc_add_request(guc, rq); 1868 } 1869 } else { 1870 guc_set_lrc_tail(rq); 1871 ret = guc_add_request(guc, rq); 1872 } 1873 1874 if (unlikely(ret == -EPIPE)) 1875 disable_submission(guc); 1876 1877 return ret; 1878 } 1879 1880 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1881 { 1882 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1883 struct intel_context *ce = request_to_scheduling_context(rq); 1884 1885 return submission_disabled(guc) || guc->stalled_request || 1886 !i915_sched_engine_is_empty(sched_engine) || 1887 !lrc_desc_registered(guc, ce->guc_id.id); 1888 } 1889 1890 static void guc_submit_request(struct i915_request *rq) 1891 { 1892 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1893 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1894 unsigned long flags; 1895 1896 /* Will be called from irq-context when using foreign fences. */ 1897 spin_lock_irqsave(&sched_engine->lock, flags); 1898 1899 if (need_tasklet(guc, rq)) 1900 queue_request(sched_engine, rq, rq_prio(rq)); 1901 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1902 tasklet_hi_schedule(&sched_engine->tasklet); 1903 1904 spin_unlock_irqrestore(&sched_engine->lock, flags); 1905 } 1906 1907 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1908 { 1909 int ret; 1910 1911 GEM_BUG_ON(intel_context_is_child(ce)); 1912 1913 if (intel_context_is_parent(ce)) 1914 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1915 NUMBER_MULTI_LRC_GUC_ID(guc), 1916 order_base_2(ce->parallel.number_children 1917 + 1)); 1918 else 1919 ret = ida_simple_get(&guc->submission_state.guc_ids, 1920 NUMBER_MULTI_LRC_GUC_ID(guc), 1921 guc->submission_state.num_guc_ids, 1922 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 1923 __GFP_NOWARN); 1924 if (unlikely(ret < 0)) 1925 return ret; 1926 1927 ce->guc_id.id = ret; 1928 return 0; 1929 } 1930 1931 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1932 { 1933 GEM_BUG_ON(intel_context_is_child(ce)); 1934 1935 if (!context_guc_id_invalid(ce)) { 1936 if (intel_context_is_parent(ce)) 1937 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 1938 ce->guc_id.id, 1939 order_base_2(ce->parallel.number_children 1940 + 1)); 1941 else 1942 ida_simple_remove(&guc->submission_state.guc_ids, 1943 ce->guc_id.id); 1944 reset_lrc_desc(guc, ce->guc_id.id); 1945 set_context_guc_id_invalid(ce); 1946 } 1947 if (!list_empty(&ce->guc_id.link)) 1948 list_del_init(&ce->guc_id.link); 1949 } 1950 1951 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1952 { 1953 unsigned long flags; 1954 1955 spin_lock_irqsave(&guc->submission_state.lock, flags); 1956 __release_guc_id(guc, ce); 1957 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1958 } 1959 1960 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 1961 { 1962 struct intel_context *cn; 1963 1964 lockdep_assert_held(&guc->submission_state.lock); 1965 GEM_BUG_ON(intel_context_is_child(ce)); 1966 GEM_BUG_ON(intel_context_is_parent(ce)); 1967 1968 if (!list_empty(&guc->submission_state.guc_id_list)) { 1969 cn = list_first_entry(&guc->submission_state.guc_id_list, 1970 struct intel_context, 1971 guc_id.link); 1972 1973 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 1974 GEM_BUG_ON(context_guc_id_invalid(cn)); 1975 GEM_BUG_ON(intel_context_is_child(cn)); 1976 GEM_BUG_ON(intel_context_is_parent(cn)); 1977 1978 list_del_init(&cn->guc_id.link); 1979 ce->guc_id.id = cn->guc_id.id; 1980 1981 spin_lock(&cn->guc_state.lock); 1982 clr_context_registered(cn); 1983 spin_unlock(&cn->guc_state.lock); 1984 1985 set_context_guc_id_invalid(cn); 1986 1987 #ifdef CONFIG_DRM_I915_SELFTEST 1988 guc->number_guc_id_stolen++; 1989 #endif 1990 1991 return 0; 1992 } else { 1993 return -EAGAIN; 1994 } 1995 } 1996 1997 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 1998 { 1999 int ret; 2000 2001 lockdep_assert_held(&guc->submission_state.lock); 2002 GEM_BUG_ON(intel_context_is_child(ce)); 2003 2004 ret = new_guc_id(guc, ce); 2005 if (unlikely(ret < 0)) { 2006 if (intel_context_is_parent(ce)) 2007 return -ENOSPC; 2008 2009 ret = steal_guc_id(guc, ce); 2010 if (ret < 0) 2011 return ret; 2012 } 2013 2014 if (intel_context_is_parent(ce)) { 2015 struct intel_context *child; 2016 int i = 1; 2017 2018 for_each_child(ce, child) 2019 child->guc_id.id = ce->guc_id.id + i++; 2020 } 2021 2022 return 0; 2023 } 2024 2025 #define PIN_GUC_ID_TRIES 4 2026 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2027 { 2028 int ret = 0; 2029 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2030 2031 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2032 2033 try_again: 2034 spin_lock_irqsave(&guc->submission_state.lock, flags); 2035 2036 might_lock(&ce->guc_state.lock); 2037 2038 if (context_guc_id_invalid(ce)) { 2039 ret = assign_guc_id(guc, ce); 2040 if (ret) 2041 goto out_unlock; 2042 ret = 1; /* Indidcates newly assigned guc_id */ 2043 } 2044 if (!list_empty(&ce->guc_id.link)) 2045 list_del_init(&ce->guc_id.link); 2046 atomic_inc(&ce->guc_id.ref); 2047 2048 out_unlock: 2049 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2050 2051 /* 2052 * -EAGAIN indicates no guc_id are available, let's retire any 2053 * outstanding requests to see if that frees up a guc_id. If the first 2054 * retire didn't help, insert a sleep with the timeslice duration before 2055 * attempting to retire more requests. Double the sleep period each 2056 * subsequent pass before finally giving up. The sleep period has max of 2057 * 100ms and minimum of 1ms. 2058 */ 2059 if (ret == -EAGAIN && --tries) { 2060 if (PIN_GUC_ID_TRIES - tries > 1) { 2061 unsigned int timeslice_shifted = 2062 ce->engine->props.timeslice_duration_ms << 2063 (PIN_GUC_ID_TRIES - tries - 2); 2064 unsigned int max = min_t(unsigned int, 100, 2065 timeslice_shifted); 2066 2067 msleep(max_t(unsigned int, max, 1)); 2068 } 2069 intel_gt_retire_requests(guc_to_gt(guc)); 2070 goto try_again; 2071 } 2072 2073 return ret; 2074 } 2075 2076 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2077 { 2078 unsigned long flags; 2079 2080 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2081 GEM_BUG_ON(intel_context_is_child(ce)); 2082 2083 if (unlikely(context_guc_id_invalid(ce) || 2084 intel_context_is_parent(ce))) 2085 return; 2086 2087 spin_lock_irqsave(&guc->submission_state.lock, flags); 2088 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2089 !atomic_read(&ce->guc_id.ref)) 2090 list_add_tail(&ce->guc_id.link, 2091 &guc->submission_state.guc_id_list); 2092 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2093 } 2094 2095 static int __guc_action_register_multi_lrc(struct intel_guc *guc, 2096 struct intel_context *ce, 2097 u32 guc_id, 2098 u32 offset, 2099 bool loop) 2100 { 2101 struct intel_context *child; 2102 u32 action[4 + MAX_ENGINE_INSTANCE]; 2103 int len = 0; 2104 2105 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2106 2107 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2108 action[len++] = guc_id; 2109 action[len++] = ce->parallel.number_children + 1; 2110 action[len++] = offset; 2111 for_each_child(ce, child) { 2112 offset += sizeof(struct guc_lrc_desc); 2113 action[len++] = offset; 2114 } 2115 2116 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2117 } 2118 2119 static int __guc_action_register_context(struct intel_guc *guc, 2120 u32 guc_id, 2121 u32 offset, 2122 bool loop) 2123 { 2124 u32 action[] = { 2125 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2126 guc_id, 2127 offset, 2128 }; 2129 2130 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2131 0, loop); 2132 } 2133 2134 static int register_context(struct intel_context *ce, bool loop) 2135 { 2136 struct intel_guc *guc = ce_to_guc(ce); 2137 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + 2138 ce->guc_id.id * sizeof(struct guc_lrc_desc); 2139 int ret; 2140 2141 GEM_BUG_ON(intel_context_is_child(ce)); 2142 trace_intel_context_register(ce); 2143 2144 if (intel_context_is_parent(ce)) 2145 ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, 2146 offset, loop); 2147 else 2148 ret = __guc_action_register_context(guc, ce->guc_id.id, offset, 2149 loop); 2150 if (likely(!ret)) { 2151 unsigned long flags; 2152 2153 spin_lock_irqsave(&ce->guc_state.lock, flags); 2154 set_context_registered(ce); 2155 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2156 } 2157 2158 return ret; 2159 } 2160 2161 static int __guc_action_deregister_context(struct intel_guc *guc, 2162 u32 guc_id) 2163 { 2164 u32 action[] = { 2165 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2166 guc_id, 2167 }; 2168 2169 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2170 G2H_LEN_DW_DEREGISTER_CONTEXT, 2171 true); 2172 } 2173 2174 static int deregister_context(struct intel_context *ce, u32 guc_id) 2175 { 2176 struct intel_guc *guc = ce_to_guc(ce); 2177 2178 GEM_BUG_ON(intel_context_is_child(ce)); 2179 trace_intel_context_deregister(ce); 2180 2181 return __guc_action_deregister_context(guc, guc_id); 2182 } 2183 2184 static inline void clear_children_join_go_memory(struct intel_context *ce) 2185 { 2186 struct parent_scratch *ps = __get_parent_scratch(ce); 2187 int i; 2188 2189 ps->go.semaphore = 0; 2190 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2191 ps->join[i].semaphore = 0; 2192 } 2193 2194 static inline u32 get_children_go_value(struct intel_context *ce) 2195 { 2196 return __get_parent_scratch(ce)->go.semaphore; 2197 } 2198 2199 static inline u32 get_children_join_value(struct intel_context *ce, 2200 u8 child_index) 2201 { 2202 return __get_parent_scratch(ce)->join[child_index].semaphore; 2203 } 2204 2205 static void guc_context_policy_init(struct intel_engine_cs *engine, 2206 struct guc_lrc_desc *desc) 2207 { 2208 desc->policy_flags = 0; 2209 2210 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2211 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; 2212 2213 /* NB: For both of these, zero means disabled. */ 2214 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2215 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2216 } 2217 2218 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) 2219 { 2220 struct intel_engine_cs *engine = ce->engine; 2221 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2222 struct intel_guc *guc = &engine->gt->uc.guc; 2223 u32 desc_idx = ce->guc_id.id; 2224 struct guc_lrc_desc *desc; 2225 bool context_registered; 2226 intel_wakeref_t wakeref; 2227 struct intel_context *child; 2228 int ret = 0; 2229 2230 GEM_BUG_ON(!engine->mask); 2231 GEM_BUG_ON(!sched_state_is_init(ce)); 2232 2233 /* 2234 * Ensure LRC + CT vmas are is same region as write barrier is done 2235 * based on CT vma region. 2236 */ 2237 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2238 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2239 2240 context_registered = lrc_desc_registered(guc, desc_idx); 2241 2242 reset_lrc_desc(guc, desc_idx); 2243 set_lrc_desc_registered(guc, desc_idx, ce); 2244 2245 desc = __get_lrc_desc(guc, desc_idx); 2246 desc->engine_class = engine_class_to_guc_class(engine->class); 2247 desc->engine_submit_mask = engine->logical_mask; 2248 desc->hw_context_desc = ce->lrc.lrca; 2249 desc->priority = ce->guc_state.prio; 2250 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2251 guc_context_policy_init(engine, desc); 2252 2253 /* 2254 * If context is a parent, we need to register a process descriptor 2255 * describing a work queue and register all child contexts. 2256 */ 2257 if (intel_context_is_parent(ce)) { 2258 struct guc_process_desc *pdesc; 2259 2260 ce->parallel.guc.wqi_tail = 0; 2261 ce->parallel.guc.wqi_head = 0; 2262 2263 desc->process_desc = i915_ggtt_offset(ce->state) + 2264 __get_parent_scratch_offset(ce); 2265 desc->wq_addr = i915_ggtt_offset(ce->state) + 2266 __get_wq_offset(ce); 2267 desc->wq_size = WQ_SIZE; 2268 2269 pdesc = __get_process_desc(ce); 2270 memset(pdesc, 0, sizeof(*(pdesc))); 2271 pdesc->stage_id = ce->guc_id.id; 2272 pdesc->wq_base_addr = desc->wq_addr; 2273 pdesc->wq_size_bytes = desc->wq_size; 2274 pdesc->wq_status = WQ_STATUS_ACTIVE; 2275 2276 for_each_child(ce, child) { 2277 desc = __get_lrc_desc(guc, child->guc_id.id); 2278 2279 desc->engine_class = 2280 engine_class_to_guc_class(engine->class); 2281 desc->hw_context_desc = child->lrc.lrca; 2282 desc->priority = ce->guc_state.prio; 2283 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2284 guc_context_policy_init(engine, desc); 2285 } 2286 2287 clear_children_join_go_memory(ce); 2288 } 2289 2290 /* 2291 * The context_lookup xarray is used to determine if the hardware 2292 * context is currently registered. There are two cases in which it 2293 * could be registered either the guc_id has been stolen from another 2294 * context or the lrc descriptor address of this context has changed. In 2295 * either case the context needs to be deregistered with the GuC before 2296 * registering this context. 2297 */ 2298 if (context_registered) { 2299 bool disabled; 2300 unsigned long flags; 2301 2302 trace_intel_context_steal_guc_id(ce); 2303 GEM_BUG_ON(!loop); 2304 2305 /* Seal race with Reset */ 2306 spin_lock_irqsave(&ce->guc_state.lock, flags); 2307 disabled = submission_disabled(guc); 2308 if (likely(!disabled)) { 2309 set_context_wait_for_deregister_to_register(ce); 2310 intel_context_get(ce); 2311 } 2312 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2313 if (unlikely(disabled)) { 2314 reset_lrc_desc(guc, desc_idx); 2315 return 0; /* Will get registered later */ 2316 } 2317 2318 /* 2319 * If stealing the guc_id, this ce has the same guc_id as the 2320 * context whose guc_id was stolen. 2321 */ 2322 with_intel_runtime_pm(runtime_pm, wakeref) 2323 ret = deregister_context(ce, ce->guc_id.id); 2324 if (unlikely(ret == -ENODEV)) 2325 ret = 0; /* Will get registered later */ 2326 } else { 2327 with_intel_runtime_pm(runtime_pm, wakeref) 2328 ret = register_context(ce, loop); 2329 if (unlikely(ret == -EBUSY)) { 2330 reset_lrc_desc(guc, desc_idx); 2331 } else if (unlikely(ret == -ENODEV)) { 2332 reset_lrc_desc(guc, desc_idx); 2333 ret = 0; /* Will get registered later */ 2334 } 2335 } 2336 2337 return ret; 2338 } 2339 2340 static int __guc_context_pre_pin(struct intel_context *ce, 2341 struct intel_engine_cs *engine, 2342 struct i915_gem_ww_ctx *ww, 2343 void **vaddr) 2344 { 2345 return lrc_pre_pin(ce, engine, ww, vaddr); 2346 } 2347 2348 static int __guc_context_pin(struct intel_context *ce, 2349 struct intel_engine_cs *engine, 2350 void *vaddr) 2351 { 2352 if (i915_ggtt_offset(ce->state) != 2353 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2354 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2355 2356 /* 2357 * GuC context gets pinned in guc_request_alloc. See that function for 2358 * explaination of why. 2359 */ 2360 2361 return lrc_pin(ce, engine, vaddr); 2362 } 2363 2364 static int guc_context_pre_pin(struct intel_context *ce, 2365 struct i915_gem_ww_ctx *ww, 2366 void **vaddr) 2367 { 2368 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2369 } 2370 2371 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2372 { 2373 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2374 2375 if (likely(!ret && !intel_context_is_barrier(ce))) 2376 intel_engine_pm_get(ce->engine); 2377 2378 return ret; 2379 } 2380 2381 static void guc_context_unpin(struct intel_context *ce) 2382 { 2383 struct intel_guc *guc = ce_to_guc(ce); 2384 2385 unpin_guc_id(guc, ce); 2386 lrc_unpin(ce); 2387 2388 if (likely(!intel_context_is_barrier(ce))) 2389 intel_engine_pm_put_async(ce->engine); 2390 } 2391 2392 static void guc_context_post_unpin(struct intel_context *ce) 2393 { 2394 lrc_post_unpin(ce); 2395 } 2396 2397 static void __guc_context_sched_enable(struct intel_guc *guc, 2398 struct intel_context *ce) 2399 { 2400 u32 action[] = { 2401 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2402 ce->guc_id.id, 2403 GUC_CONTEXT_ENABLE 2404 }; 2405 2406 trace_intel_context_sched_enable(ce); 2407 2408 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2409 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2410 } 2411 2412 static void __guc_context_sched_disable(struct intel_guc *guc, 2413 struct intel_context *ce, 2414 u16 guc_id) 2415 { 2416 u32 action[] = { 2417 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2418 guc_id, /* ce->guc_id.id not stable */ 2419 GUC_CONTEXT_DISABLE 2420 }; 2421 2422 GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); 2423 2424 GEM_BUG_ON(intel_context_is_child(ce)); 2425 trace_intel_context_sched_disable(ce); 2426 2427 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2428 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2429 } 2430 2431 static void guc_blocked_fence_complete(struct intel_context *ce) 2432 { 2433 lockdep_assert_held(&ce->guc_state.lock); 2434 2435 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2436 i915_sw_fence_complete(&ce->guc_state.blocked); 2437 } 2438 2439 static void guc_blocked_fence_reinit(struct intel_context *ce) 2440 { 2441 lockdep_assert_held(&ce->guc_state.lock); 2442 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2443 2444 /* 2445 * This fence is always complete unless a pending schedule disable is 2446 * outstanding. We arm the fence here and complete it when we receive 2447 * the pending schedule disable complete message. 2448 */ 2449 i915_sw_fence_fini(&ce->guc_state.blocked); 2450 i915_sw_fence_reinit(&ce->guc_state.blocked); 2451 i915_sw_fence_await(&ce->guc_state.blocked); 2452 i915_sw_fence_commit(&ce->guc_state.blocked); 2453 } 2454 2455 static u16 prep_context_pending_disable(struct intel_context *ce) 2456 { 2457 lockdep_assert_held(&ce->guc_state.lock); 2458 2459 set_context_pending_disable(ce); 2460 clr_context_enabled(ce); 2461 guc_blocked_fence_reinit(ce); 2462 intel_context_get(ce); 2463 2464 return ce->guc_id.id; 2465 } 2466 2467 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2468 { 2469 struct intel_guc *guc = ce_to_guc(ce); 2470 unsigned long flags; 2471 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2472 intel_wakeref_t wakeref; 2473 u16 guc_id; 2474 bool enabled; 2475 2476 GEM_BUG_ON(intel_context_is_child(ce)); 2477 2478 spin_lock_irqsave(&ce->guc_state.lock, flags); 2479 2480 incr_context_blocked(ce); 2481 2482 enabled = context_enabled(ce); 2483 if (unlikely(!enabled || submission_disabled(guc))) { 2484 if (enabled) 2485 clr_context_enabled(ce); 2486 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2487 return &ce->guc_state.blocked; 2488 } 2489 2490 /* 2491 * We add +2 here as the schedule disable complete CTB handler calls 2492 * intel_context_sched_disable_unpin (-2 to pin_count). 2493 */ 2494 atomic_add(2, &ce->pin_count); 2495 2496 guc_id = prep_context_pending_disable(ce); 2497 2498 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2499 2500 with_intel_runtime_pm(runtime_pm, wakeref) 2501 __guc_context_sched_disable(guc, ce, guc_id); 2502 2503 return &ce->guc_state.blocked; 2504 } 2505 2506 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2507 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2508 #define SCHED_STATE_NO_UNBLOCK \ 2509 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2510 SCHED_STATE_PENDING_DISABLE | \ 2511 SCHED_STATE_BANNED) 2512 2513 static bool context_cant_unblock(struct intel_context *ce) 2514 { 2515 lockdep_assert_held(&ce->guc_state.lock); 2516 2517 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2518 context_guc_id_invalid(ce) || 2519 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || 2520 !intel_context_is_pinned(ce); 2521 } 2522 2523 static void guc_context_unblock(struct intel_context *ce) 2524 { 2525 struct intel_guc *guc = ce_to_guc(ce); 2526 unsigned long flags; 2527 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2528 intel_wakeref_t wakeref; 2529 bool enable; 2530 2531 GEM_BUG_ON(context_enabled(ce)); 2532 GEM_BUG_ON(intel_context_is_child(ce)); 2533 2534 spin_lock_irqsave(&ce->guc_state.lock, flags); 2535 2536 if (unlikely(submission_disabled(guc) || 2537 context_cant_unblock(ce))) { 2538 enable = false; 2539 } else { 2540 enable = true; 2541 set_context_pending_enable(ce); 2542 set_context_enabled(ce); 2543 intel_context_get(ce); 2544 } 2545 2546 decr_context_blocked(ce); 2547 2548 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2549 2550 if (enable) { 2551 with_intel_runtime_pm(runtime_pm, wakeref) 2552 __guc_context_sched_enable(guc, ce); 2553 } 2554 } 2555 2556 static void guc_context_cancel_request(struct intel_context *ce, 2557 struct i915_request *rq) 2558 { 2559 struct intel_context *block_context = 2560 request_to_scheduling_context(rq); 2561 2562 if (i915_sw_fence_signaled(&rq->submit)) { 2563 struct i915_sw_fence *fence; 2564 2565 intel_context_get(ce); 2566 fence = guc_context_block(block_context); 2567 i915_sw_fence_wait(fence); 2568 if (!i915_request_completed(rq)) { 2569 __i915_request_skip(rq); 2570 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2571 true); 2572 } 2573 2574 guc_context_unblock(block_context); 2575 intel_context_put(ce); 2576 } 2577 } 2578 2579 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2580 u16 guc_id, 2581 u32 preemption_timeout) 2582 { 2583 u32 action[] = { 2584 INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, 2585 guc_id, 2586 preemption_timeout 2587 }; 2588 2589 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2590 } 2591 2592 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) 2593 { 2594 struct intel_guc *guc = ce_to_guc(ce); 2595 struct intel_runtime_pm *runtime_pm = 2596 &ce->engine->gt->i915->runtime_pm; 2597 intel_wakeref_t wakeref; 2598 unsigned long flags; 2599 2600 GEM_BUG_ON(intel_context_is_child(ce)); 2601 2602 guc_flush_submissions(guc); 2603 2604 spin_lock_irqsave(&ce->guc_state.lock, flags); 2605 set_context_banned(ce); 2606 2607 if (submission_disabled(guc) || 2608 (!context_enabled(ce) && !context_pending_disable(ce))) { 2609 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2610 2611 guc_cancel_context_requests(ce); 2612 intel_engine_signal_breadcrumbs(ce->engine); 2613 } else if (!context_pending_disable(ce)) { 2614 u16 guc_id; 2615 2616 /* 2617 * We add +2 here as the schedule disable complete CTB handler 2618 * calls intel_context_sched_disable_unpin (-2 to pin_count). 2619 */ 2620 atomic_add(2, &ce->pin_count); 2621 2622 guc_id = prep_context_pending_disable(ce); 2623 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2624 2625 /* 2626 * In addition to disabling scheduling, set the preemption 2627 * timeout to the minimum value (1 us) so the banned context 2628 * gets kicked off the HW ASAP. 2629 */ 2630 with_intel_runtime_pm(runtime_pm, wakeref) { 2631 __guc_context_set_preemption_timeout(guc, guc_id, 1); 2632 __guc_context_sched_disable(guc, ce, guc_id); 2633 } 2634 } else { 2635 if (!context_guc_id_invalid(ce)) 2636 with_intel_runtime_pm(runtime_pm, wakeref) 2637 __guc_context_set_preemption_timeout(guc, 2638 ce->guc_id.id, 2639 1); 2640 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2641 } 2642 } 2643 2644 static void guc_context_sched_disable(struct intel_context *ce) 2645 { 2646 struct intel_guc *guc = ce_to_guc(ce); 2647 unsigned long flags; 2648 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 2649 intel_wakeref_t wakeref; 2650 u16 guc_id; 2651 2652 GEM_BUG_ON(intel_context_is_child(ce)); 2653 2654 spin_lock_irqsave(&ce->guc_state.lock, flags); 2655 2656 /* 2657 * We have to check if the context has been disabled by another thread, 2658 * check if submssion has been disabled to seal a race with reset and 2659 * finally check if any more requests have been committed to the 2660 * context ensursing that a request doesn't slip through the 2661 * 'context_pending_disable' fence. 2662 */ 2663 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 2664 context_has_committed_requests(ce))) { 2665 clr_context_enabled(ce); 2666 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2667 goto unpin; 2668 } 2669 guc_id = prep_context_pending_disable(ce); 2670 2671 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2672 2673 with_intel_runtime_pm(runtime_pm, wakeref) 2674 __guc_context_sched_disable(guc, ce, guc_id); 2675 2676 return; 2677 unpin: 2678 intel_context_sched_disable_unpin(ce); 2679 } 2680 2681 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 2682 { 2683 struct intel_guc *guc = ce_to_guc(ce); 2684 struct intel_gt *gt = guc_to_gt(guc); 2685 unsigned long flags; 2686 bool disabled; 2687 2688 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 2689 GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); 2690 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 2691 GEM_BUG_ON(context_enabled(ce)); 2692 2693 /* Seal race with Reset */ 2694 spin_lock_irqsave(&ce->guc_state.lock, flags); 2695 disabled = submission_disabled(guc); 2696 if (likely(!disabled)) { 2697 __intel_gt_pm_get(gt); 2698 set_context_destroyed(ce); 2699 clr_context_registered(ce); 2700 } 2701 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2702 if (unlikely(disabled)) { 2703 release_guc_id(guc, ce); 2704 __guc_context_destroy(ce); 2705 return; 2706 } 2707 2708 deregister_context(ce, ce->guc_id.id); 2709 } 2710 2711 static void __guc_context_destroy(struct intel_context *ce) 2712 { 2713 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 2714 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 2715 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 2716 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 2717 GEM_BUG_ON(ce->guc_state.number_committed_requests); 2718 2719 lrc_fini(ce); 2720 intel_context_fini(ce); 2721 2722 if (intel_engine_is_virtual(ce->engine)) { 2723 struct guc_virtual_engine *ve = 2724 container_of(ce, typeof(*ve), context); 2725 2726 if (ve->base.breadcrumbs) 2727 intel_breadcrumbs_put(ve->base.breadcrumbs); 2728 2729 kfree(ve); 2730 } else { 2731 intel_context_free(ce); 2732 } 2733 } 2734 2735 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 2736 { 2737 struct intel_context *ce; 2738 unsigned long flags; 2739 2740 GEM_BUG_ON(!submission_disabled(guc) && 2741 guc_submission_initialized(guc)); 2742 2743 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 2744 spin_lock_irqsave(&guc->submission_state.lock, flags); 2745 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 2746 struct intel_context, 2747 destroyed_link); 2748 if (ce) 2749 list_del_init(&ce->destroyed_link); 2750 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2751 2752 if (!ce) 2753 break; 2754 2755 release_guc_id(guc, ce); 2756 __guc_context_destroy(ce); 2757 } 2758 } 2759 2760 static void deregister_destroyed_contexts(struct intel_guc *guc) 2761 { 2762 struct intel_context *ce; 2763 unsigned long flags; 2764 2765 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 2766 spin_lock_irqsave(&guc->submission_state.lock, flags); 2767 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 2768 struct intel_context, 2769 destroyed_link); 2770 if (ce) 2771 list_del_init(&ce->destroyed_link); 2772 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2773 2774 if (!ce) 2775 break; 2776 2777 guc_lrc_desc_unpin(ce); 2778 } 2779 } 2780 2781 static void destroyed_worker_func(struct work_struct *w) 2782 { 2783 struct intel_guc *guc = container_of(w, struct intel_guc, 2784 submission_state.destroyed_worker); 2785 struct intel_gt *gt = guc_to_gt(guc); 2786 int tmp; 2787 2788 with_intel_gt_pm(gt, tmp) 2789 deregister_destroyed_contexts(guc); 2790 } 2791 2792 static void guc_context_destroy(struct kref *kref) 2793 { 2794 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2795 struct intel_guc *guc = ce_to_guc(ce); 2796 unsigned long flags; 2797 bool destroy; 2798 2799 /* 2800 * If the guc_id is invalid this context has been stolen and we can free 2801 * it immediately. Also can be freed immediately if the context is not 2802 * registered with the GuC or the GuC is in the middle of a reset. 2803 */ 2804 spin_lock_irqsave(&guc->submission_state.lock, flags); 2805 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 2806 !lrc_desc_registered(guc, ce->guc_id.id); 2807 if (likely(!destroy)) { 2808 if (!list_empty(&ce->guc_id.link)) 2809 list_del_init(&ce->guc_id.link); 2810 list_add_tail(&ce->destroyed_link, 2811 &guc->submission_state.destroyed_contexts); 2812 } else { 2813 __release_guc_id(guc, ce); 2814 } 2815 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2816 if (unlikely(destroy)) { 2817 __guc_context_destroy(ce); 2818 return; 2819 } 2820 2821 /* 2822 * We use a worker to issue the H2G to deregister the context as we can 2823 * take the GT PM for the first time which isn't allowed from an atomic 2824 * context. 2825 */ 2826 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 2827 } 2828 2829 static int guc_context_alloc(struct intel_context *ce) 2830 { 2831 return lrc_alloc(ce, ce->engine); 2832 } 2833 2834 static void guc_context_set_prio(struct intel_guc *guc, 2835 struct intel_context *ce, 2836 u8 prio) 2837 { 2838 u32 action[] = { 2839 INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, 2840 ce->guc_id.id, 2841 prio, 2842 }; 2843 2844 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 2845 prio > GUC_CLIENT_PRIORITY_NORMAL); 2846 lockdep_assert_held(&ce->guc_state.lock); 2847 2848 if (ce->guc_state.prio == prio || submission_disabled(guc) || 2849 !context_registered(ce)) { 2850 ce->guc_state.prio = prio; 2851 return; 2852 } 2853 2854 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2855 2856 ce->guc_state.prio = prio; 2857 trace_intel_context_set_prio(ce); 2858 } 2859 2860 static inline u8 map_i915_prio_to_guc_prio(int prio) 2861 { 2862 if (prio == I915_PRIORITY_NORMAL) 2863 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 2864 else if (prio < I915_PRIORITY_NORMAL) 2865 return GUC_CLIENT_PRIORITY_NORMAL; 2866 else if (prio < I915_PRIORITY_DISPLAY) 2867 return GUC_CLIENT_PRIORITY_HIGH; 2868 else 2869 return GUC_CLIENT_PRIORITY_KMD_HIGH; 2870 } 2871 2872 static inline void add_context_inflight_prio(struct intel_context *ce, 2873 u8 guc_prio) 2874 { 2875 lockdep_assert_held(&ce->guc_state.lock); 2876 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 2877 2878 ++ce->guc_state.prio_count[guc_prio]; 2879 2880 /* Overflow protection */ 2881 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 2882 } 2883 2884 static inline void sub_context_inflight_prio(struct intel_context *ce, 2885 u8 guc_prio) 2886 { 2887 lockdep_assert_held(&ce->guc_state.lock); 2888 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 2889 2890 /* Underflow protection */ 2891 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 2892 2893 --ce->guc_state.prio_count[guc_prio]; 2894 } 2895 2896 static inline void update_context_prio(struct intel_context *ce) 2897 { 2898 struct intel_guc *guc = &ce->engine->gt->uc.guc; 2899 int i; 2900 2901 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 2902 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 2903 2904 lockdep_assert_held(&ce->guc_state.lock); 2905 2906 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 2907 if (ce->guc_state.prio_count[i]) { 2908 guc_context_set_prio(guc, ce, i); 2909 break; 2910 } 2911 } 2912 } 2913 2914 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 2915 { 2916 /* Lower value is higher priority */ 2917 return new_guc_prio < old_guc_prio; 2918 } 2919 2920 static void add_to_context(struct i915_request *rq) 2921 { 2922 struct intel_context *ce = request_to_scheduling_context(rq); 2923 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 2924 2925 GEM_BUG_ON(intel_context_is_child(ce)); 2926 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 2927 2928 spin_lock(&ce->guc_state.lock); 2929 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 2930 2931 if (rq->guc_prio == GUC_PRIO_INIT) { 2932 rq->guc_prio = new_guc_prio; 2933 add_context_inflight_prio(ce, rq->guc_prio); 2934 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 2935 sub_context_inflight_prio(ce, rq->guc_prio); 2936 rq->guc_prio = new_guc_prio; 2937 add_context_inflight_prio(ce, rq->guc_prio); 2938 } 2939 update_context_prio(ce); 2940 2941 spin_unlock(&ce->guc_state.lock); 2942 } 2943 2944 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 2945 { 2946 lockdep_assert_held(&ce->guc_state.lock); 2947 2948 if (rq->guc_prio != GUC_PRIO_INIT && 2949 rq->guc_prio != GUC_PRIO_FINI) { 2950 sub_context_inflight_prio(ce, rq->guc_prio); 2951 update_context_prio(ce); 2952 } 2953 rq->guc_prio = GUC_PRIO_FINI; 2954 } 2955 2956 static void remove_from_context(struct i915_request *rq) 2957 { 2958 struct intel_context *ce = request_to_scheduling_context(rq); 2959 2960 GEM_BUG_ON(intel_context_is_child(ce)); 2961 2962 spin_lock_irq(&ce->guc_state.lock); 2963 2964 list_del_init(&rq->sched.link); 2965 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2966 2967 /* Prevent further __await_execution() registering a cb, then flush */ 2968 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2969 2970 guc_prio_fini(rq, ce); 2971 2972 decr_context_committed_requests(ce); 2973 2974 spin_unlock_irq(&ce->guc_state.lock); 2975 2976 atomic_dec(&ce->guc_id.ref); 2977 i915_request_notify_execute_cb_imm(rq); 2978 } 2979 2980 static const struct intel_context_ops guc_context_ops = { 2981 .alloc = guc_context_alloc, 2982 2983 .pre_pin = guc_context_pre_pin, 2984 .pin = guc_context_pin, 2985 .unpin = guc_context_unpin, 2986 .post_unpin = guc_context_post_unpin, 2987 2988 .ban = guc_context_ban, 2989 2990 .cancel_request = guc_context_cancel_request, 2991 2992 .enter = intel_context_enter_engine, 2993 .exit = intel_context_exit_engine, 2994 2995 .sched_disable = guc_context_sched_disable, 2996 2997 .reset = lrc_reset, 2998 .destroy = guc_context_destroy, 2999 3000 .create_virtual = guc_create_virtual, 3001 .create_parallel = guc_create_parallel, 3002 }; 3003 3004 static void submit_work_cb(struct irq_work *wrk) 3005 { 3006 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3007 3008 might_lock(&rq->engine->sched_engine->lock); 3009 i915_sw_fence_complete(&rq->submit); 3010 } 3011 3012 static void __guc_signal_context_fence(struct intel_context *ce) 3013 { 3014 struct i915_request *rq, *rn; 3015 3016 lockdep_assert_held(&ce->guc_state.lock); 3017 3018 if (!list_empty(&ce->guc_state.fences)) 3019 trace_intel_context_fence_release(ce); 3020 3021 /* 3022 * Use an IRQ to ensure locking order of sched_engine->lock -> 3023 * ce->guc_state.lock is preserved. 3024 */ 3025 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3026 guc_fence_link) { 3027 list_del(&rq->guc_fence_link); 3028 irq_work_queue(&rq->submit_work); 3029 } 3030 3031 INIT_LIST_HEAD(&ce->guc_state.fences); 3032 } 3033 3034 static void guc_signal_context_fence(struct intel_context *ce) 3035 { 3036 unsigned long flags; 3037 3038 GEM_BUG_ON(intel_context_is_child(ce)); 3039 3040 spin_lock_irqsave(&ce->guc_state.lock, flags); 3041 clr_context_wait_for_deregister_to_register(ce); 3042 __guc_signal_context_fence(ce); 3043 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3044 } 3045 3046 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3047 { 3048 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3049 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && 3050 !submission_disabled(ce_to_guc(ce)); 3051 } 3052 3053 static void guc_context_init(struct intel_context *ce) 3054 { 3055 const struct i915_gem_context *ctx; 3056 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3057 3058 rcu_read_lock(); 3059 ctx = rcu_dereference(ce->gem_context); 3060 if (ctx) 3061 prio = ctx->sched.priority; 3062 rcu_read_unlock(); 3063 3064 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3065 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3066 } 3067 3068 static int guc_request_alloc(struct i915_request *rq) 3069 { 3070 struct intel_context *ce = request_to_scheduling_context(rq); 3071 struct intel_guc *guc = ce_to_guc(ce); 3072 unsigned long flags; 3073 int ret; 3074 3075 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3076 3077 /* 3078 * Flush enough space to reduce the likelihood of waiting after 3079 * we start building the request - in which case we will just 3080 * have to repeat work. 3081 */ 3082 rq->reserved_space += GUC_REQUEST_SIZE; 3083 3084 /* 3085 * Note that after this point, we have committed to using 3086 * this request as it is being used to both track the 3087 * state of engine initialisation and liveness of the 3088 * golden renderstate above. Think twice before you try 3089 * to cancel/unwind this request now. 3090 */ 3091 3092 /* Unconditionally invalidate GPU caches and TLBs. */ 3093 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3094 if (ret) 3095 return ret; 3096 3097 rq->reserved_space -= GUC_REQUEST_SIZE; 3098 3099 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3100 guc_context_init(ce); 3101 3102 /* 3103 * Call pin_guc_id here rather than in the pinning step as with 3104 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3105 * guc_id and creating horrible race conditions. This is especially bad 3106 * when guc_id are being stolen due to over subscription. By the time 3107 * this function is reached, it is guaranteed that the guc_id will be 3108 * persistent until the generated request is retired. Thus, sealing these 3109 * race conditions. It is still safe to fail here if guc_id are 3110 * exhausted and return -EAGAIN to the user indicating that they can try 3111 * again in the future. 3112 * 3113 * There is no need for a lock here as the timeline mutex ensures at 3114 * most one context can be executing this code path at once. The 3115 * guc_id_ref is incremented once for every request in flight and 3116 * decremented on each retire. When it is zero, a lock around the 3117 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3118 */ 3119 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3120 goto out; 3121 3122 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3123 if (unlikely(ret < 0)) 3124 return ret; 3125 if (context_needs_register(ce, !!ret)) { 3126 ret = guc_lrc_desc_pin(ce, true); 3127 if (unlikely(ret)) { /* unwind */ 3128 if (ret == -EPIPE) { 3129 disable_submission(guc); 3130 goto out; /* GPU will be reset */ 3131 } 3132 atomic_dec(&ce->guc_id.ref); 3133 unpin_guc_id(guc, ce); 3134 return ret; 3135 } 3136 } 3137 3138 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3139 3140 out: 3141 /* 3142 * We block all requests on this context if a G2H is pending for a 3143 * schedule disable or context deregistration as the GuC will fail a 3144 * schedule enable or context registration if either G2H is pending 3145 * respectfully. Once a G2H returns, the fence is released that is 3146 * blocking these requests (see guc_signal_context_fence). 3147 */ 3148 spin_lock_irqsave(&ce->guc_state.lock, flags); 3149 if (context_wait_for_deregister_to_register(ce) || 3150 context_pending_disable(ce)) { 3151 init_irq_work(&rq->submit_work, submit_work_cb); 3152 i915_sw_fence_await(&rq->submit); 3153 3154 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3155 } 3156 incr_context_committed_requests(ce); 3157 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3158 3159 return 0; 3160 } 3161 3162 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3163 struct i915_gem_ww_ctx *ww, 3164 void **vaddr) 3165 { 3166 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3167 3168 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3169 } 3170 3171 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3172 { 3173 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3174 int ret = __guc_context_pin(ce, engine, vaddr); 3175 intel_engine_mask_t tmp, mask = ce->engine->mask; 3176 3177 if (likely(!ret)) 3178 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3179 intel_engine_pm_get(engine); 3180 3181 return ret; 3182 } 3183 3184 static void guc_virtual_context_unpin(struct intel_context *ce) 3185 { 3186 intel_engine_mask_t tmp, mask = ce->engine->mask; 3187 struct intel_engine_cs *engine; 3188 struct intel_guc *guc = ce_to_guc(ce); 3189 3190 GEM_BUG_ON(context_enabled(ce)); 3191 GEM_BUG_ON(intel_context_is_barrier(ce)); 3192 3193 unpin_guc_id(guc, ce); 3194 lrc_unpin(ce); 3195 3196 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3197 intel_engine_pm_put_async(engine); 3198 } 3199 3200 static void guc_virtual_context_enter(struct intel_context *ce) 3201 { 3202 intel_engine_mask_t tmp, mask = ce->engine->mask; 3203 struct intel_engine_cs *engine; 3204 3205 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3206 intel_engine_pm_get(engine); 3207 3208 intel_timeline_enter(ce->timeline); 3209 } 3210 3211 static void guc_virtual_context_exit(struct intel_context *ce) 3212 { 3213 intel_engine_mask_t tmp, mask = ce->engine->mask; 3214 struct intel_engine_cs *engine; 3215 3216 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3217 intel_engine_pm_put(engine); 3218 3219 intel_timeline_exit(ce->timeline); 3220 } 3221 3222 static int guc_virtual_context_alloc(struct intel_context *ce) 3223 { 3224 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3225 3226 return lrc_alloc(ce, engine); 3227 } 3228 3229 static const struct intel_context_ops virtual_guc_context_ops = { 3230 .alloc = guc_virtual_context_alloc, 3231 3232 .pre_pin = guc_virtual_context_pre_pin, 3233 .pin = guc_virtual_context_pin, 3234 .unpin = guc_virtual_context_unpin, 3235 .post_unpin = guc_context_post_unpin, 3236 3237 .ban = guc_context_ban, 3238 3239 .cancel_request = guc_context_cancel_request, 3240 3241 .enter = guc_virtual_context_enter, 3242 .exit = guc_virtual_context_exit, 3243 3244 .sched_disable = guc_context_sched_disable, 3245 3246 .destroy = guc_context_destroy, 3247 3248 .get_sibling = guc_virtual_get_sibling, 3249 }; 3250 3251 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3252 { 3253 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3254 struct intel_guc *guc = ce_to_guc(ce); 3255 int ret; 3256 3257 GEM_BUG_ON(!intel_context_is_parent(ce)); 3258 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3259 3260 ret = pin_guc_id(guc, ce); 3261 if (unlikely(ret < 0)) 3262 return ret; 3263 3264 return __guc_context_pin(ce, engine, vaddr); 3265 } 3266 3267 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3268 { 3269 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3270 3271 GEM_BUG_ON(!intel_context_is_child(ce)); 3272 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3273 3274 __intel_context_pin(ce->parallel.parent); 3275 return __guc_context_pin(ce, engine, vaddr); 3276 } 3277 3278 static void guc_parent_context_unpin(struct intel_context *ce) 3279 { 3280 struct intel_guc *guc = ce_to_guc(ce); 3281 3282 GEM_BUG_ON(context_enabled(ce)); 3283 GEM_BUG_ON(intel_context_is_barrier(ce)); 3284 GEM_BUG_ON(!intel_context_is_parent(ce)); 3285 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3286 3287 unpin_guc_id(guc, ce); 3288 lrc_unpin(ce); 3289 } 3290 3291 static void guc_child_context_unpin(struct intel_context *ce) 3292 { 3293 GEM_BUG_ON(context_enabled(ce)); 3294 GEM_BUG_ON(intel_context_is_barrier(ce)); 3295 GEM_BUG_ON(!intel_context_is_child(ce)); 3296 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3297 3298 lrc_unpin(ce); 3299 } 3300 3301 static void guc_child_context_post_unpin(struct intel_context *ce) 3302 { 3303 GEM_BUG_ON(!intel_context_is_child(ce)); 3304 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3305 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3306 3307 lrc_post_unpin(ce); 3308 intel_context_unpin(ce->parallel.parent); 3309 } 3310 3311 static void guc_child_context_destroy(struct kref *kref) 3312 { 3313 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3314 3315 __guc_context_destroy(ce); 3316 } 3317 3318 static const struct intel_context_ops virtual_parent_context_ops = { 3319 .alloc = guc_virtual_context_alloc, 3320 3321 .pre_pin = guc_context_pre_pin, 3322 .pin = guc_parent_context_pin, 3323 .unpin = guc_parent_context_unpin, 3324 .post_unpin = guc_context_post_unpin, 3325 3326 .ban = guc_context_ban, 3327 3328 .cancel_request = guc_context_cancel_request, 3329 3330 .enter = guc_virtual_context_enter, 3331 .exit = guc_virtual_context_exit, 3332 3333 .sched_disable = guc_context_sched_disable, 3334 3335 .destroy = guc_context_destroy, 3336 3337 .get_sibling = guc_virtual_get_sibling, 3338 }; 3339 3340 static const struct intel_context_ops virtual_child_context_ops = { 3341 .alloc = guc_virtual_context_alloc, 3342 3343 .pre_pin = guc_context_pre_pin, 3344 .pin = guc_child_context_pin, 3345 .unpin = guc_child_context_unpin, 3346 .post_unpin = guc_child_context_post_unpin, 3347 3348 .cancel_request = guc_context_cancel_request, 3349 3350 .enter = guc_virtual_context_enter, 3351 .exit = guc_virtual_context_exit, 3352 3353 .destroy = guc_child_context_destroy, 3354 3355 .get_sibling = guc_virtual_get_sibling, 3356 }; 3357 3358 /* 3359 * The below override of the breadcrumbs is enabled when the user configures a 3360 * context for parallel submission (multi-lrc, parent-child). 3361 * 3362 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3363 * safely preempt all the hw contexts configured for parallel submission 3364 * between each BB. The contract between the i915 and GuC is if the parent 3365 * context can be preempted, all the children can be preempted, and the GuC will 3366 * always try to preempt the parent before the children. A handshake between the 3367 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3368 * creating a window to preempt between each set of BBs. 3369 */ 3370 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3371 u64 offset, u32 len, 3372 const unsigned int flags); 3373 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3374 u64 offset, u32 len, 3375 const unsigned int flags); 3376 static u32 * 3377 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3378 u32 *cs); 3379 static u32 * 3380 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3381 u32 *cs); 3382 3383 static struct intel_context * 3384 guc_create_parallel(struct intel_engine_cs **engines, 3385 unsigned int num_siblings, 3386 unsigned int width) 3387 { 3388 struct intel_engine_cs **siblings = NULL; 3389 struct intel_context *parent = NULL, *ce, *err; 3390 int i, j; 3391 3392 siblings = kmalloc_array(num_siblings, 3393 sizeof(*siblings), 3394 GFP_KERNEL); 3395 if (!siblings) 3396 return ERR_PTR(-ENOMEM); 3397 3398 for (i = 0; i < width; ++i) { 3399 for (j = 0; j < num_siblings; ++j) 3400 siblings[j] = engines[i * num_siblings + j]; 3401 3402 ce = intel_engine_create_virtual(siblings, num_siblings, 3403 FORCE_VIRTUAL); 3404 if (IS_ERR(ce)) { 3405 err = ERR_CAST(ce); 3406 goto unwind; 3407 } 3408 3409 if (i == 0) { 3410 parent = ce; 3411 parent->ops = &virtual_parent_context_ops; 3412 } else { 3413 ce->ops = &virtual_child_context_ops; 3414 intel_context_bind_parent_child(parent, ce); 3415 } 3416 } 3417 3418 parent->parallel.fence_context = dma_fence_context_alloc(1); 3419 3420 parent->engine->emit_bb_start = 3421 emit_bb_start_parent_no_preempt_mid_batch; 3422 parent->engine->emit_fini_breadcrumb = 3423 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3424 parent->engine->emit_fini_breadcrumb_dw = 3425 12 + 4 * parent->parallel.number_children; 3426 for_each_child(parent, ce) { 3427 ce->engine->emit_bb_start = 3428 emit_bb_start_child_no_preempt_mid_batch; 3429 ce->engine->emit_fini_breadcrumb = 3430 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3431 ce->engine->emit_fini_breadcrumb_dw = 16; 3432 } 3433 3434 kfree(siblings); 3435 return parent; 3436 3437 unwind: 3438 if (parent) 3439 intel_context_put(parent); 3440 kfree(siblings); 3441 return err; 3442 } 3443 3444 static bool 3445 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3446 { 3447 struct intel_engine_cs *sibling; 3448 intel_engine_mask_t tmp, mask = b->engine_mask; 3449 bool result = false; 3450 3451 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3452 result |= intel_engine_irq_enable(sibling); 3453 3454 return result; 3455 } 3456 3457 static void 3458 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3459 { 3460 struct intel_engine_cs *sibling; 3461 intel_engine_mask_t tmp, mask = b->engine_mask; 3462 3463 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3464 intel_engine_irq_disable(sibling); 3465 } 3466 3467 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3468 { 3469 int i; 3470 3471 /* 3472 * In GuC submission mode we do not know which physical engine a request 3473 * will be scheduled on, this creates a problem because the breadcrumb 3474 * interrupt is per physical engine. To work around this we attach 3475 * requests and direct all breadcrumb interrupts to the first instance 3476 * of an engine per class. In addition all breadcrumb interrupts are 3477 * enabled / disabled across an engine class in unison. 3478 */ 3479 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3480 struct intel_engine_cs *sibling = 3481 engine->gt->engine_class[engine->class][i]; 3482 3483 if (sibling) { 3484 if (engine->breadcrumbs != sibling->breadcrumbs) { 3485 intel_breadcrumbs_put(engine->breadcrumbs); 3486 engine->breadcrumbs = 3487 intel_breadcrumbs_get(sibling->breadcrumbs); 3488 } 3489 break; 3490 } 3491 } 3492 3493 if (engine->breadcrumbs) { 3494 engine->breadcrumbs->engine_mask |= engine->mask; 3495 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3496 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3497 } 3498 } 3499 3500 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3501 int prio) 3502 { 3503 struct intel_context *ce = request_to_scheduling_context(rq); 3504 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3505 3506 /* Short circuit function */ 3507 if (prio < I915_PRIORITY_NORMAL || 3508 rq->guc_prio == GUC_PRIO_FINI || 3509 (rq->guc_prio != GUC_PRIO_INIT && 3510 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3511 return; 3512 3513 spin_lock(&ce->guc_state.lock); 3514 if (rq->guc_prio != GUC_PRIO_FINI) { 3515 if (rq->guc_prio != GUC_PRIO_INIT) 3516 sub_context_inflight_prio(ce, rq->guc_prio); 3517 rq->guc_prio = new_guc_prio; 3518 add_context_inflight_prio(ce, rq->guc_prio); 3519 update_context_prio(ce); 3520 } 3521 spin_unlock(&ce->guc_state.lock); 3522 } 3523 3524 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3525 { 3526 struct intel_context *ce = request_to_scheduling_context(rq); 3527 3528 spin_lock(&ce->guc_state.lock); 3529 guc_prio_fini(rq, ce); 3530 spin_unlock(&ce->guc_state.lock); 3531 } 3532 3533 static void sanitize_hwsp(struct intel_engine_cs *engine) 3534 { 3535 struct intel_timeline *tl; 3536 3537 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3538 intel_timeline_reset_seqno(tl); 3539 } 3540 3541 static void guc_sanitize(struct intel_engine_cs *engine) 3542 { 3543 /* 3544 * Poison residual state on resume, in case the suspend didn't! 3545 * 3546 * We have to assume that across suspend/resume (or other loss 3547 * of control) that the contents of our pinned buffers has been 3548 * lost, replaced by garbage. Since this doesn't always happen, 3549 * let's poison such state so that we more quickly spot when 3550 * we falsely assume it has been preserved. 3551 */ 3552 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3553 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3554 3555 /* 3556 * The kernel_context HWSP is stored in the status_page. As above, 3557 * that may be lost on resume/initialisation, and so we need to 3558 * reset the value in the HWSP. 3559 */ 3560 sanitize_hwsp(engine); 3561 3562 /* And scrub the dirty cachelines for the HWSP */ 3563 clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 3564 3565 intel_engine_reset_pinned_contexts(engine); 3566 } 3567 3568 static void setup_hwsp(struct intel_engine_cs *engine) 3569 { 3570 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 3571 3572 ENGINE_WRITE_FW(engine, 3573 RING_HWS_PGA, 3574 i915_ggtt_offset(engine->status_page.vma)); 3575 } 3576 3577 static void start_engine(struct intel_engine_cs *engine) 3578 { 3579 ENGINE_WRITE_FW(engine, 3580 RING_MODE_GEN7, 3581 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 3582 3583 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 3584 ENGINE_POSTING_READ(engine, RING_MI_MODE); 3585 } 3586 3587 static int guc_resume(struct intel_engine_cs *engine) 3588 { 3589 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 3590 3591 intel_mocs_init_engine(engine); 3592 3593 intel_breadcrumbs_reset(engine->breadcrumbs); 3594 3595 setup_hwsp(engine); 3596 start_engine(engine); 3597 3598 if (engine->class == RENDER_CLASS) 3599 xehp_enable_ccs_engines(engine); 3600 3601 return 0; 3602 } 3603 3604 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 3605 { 3606 return !sched_engine->tasklet.callback; 3607 } 3608 3609 static void guc_set_default_submission(struct intel_engine_cs *engine) 3610 { 3611 engine->submit_request = guc_submit_request; 3612 } 3613 3614 static inline void guc_kernel_context_pin(struct intel_guc *guc, 3615 struct intel_context *ce) 3616 { 3617 if (context_guc_id_invalid(ce)) 3618 pin_guc_id(guc, ce); 3619 guc_lrc_desc_pin(ce, true); 3620 } 3621 3622 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 3623 { 3624 struct intel_gt *gt = guc_to_gt(guc); 3625 struct intel_engine_cs *engine; 3626 enum intel_engine_id id; 3627 3628 /* make sure all descriptors are clean... */ 3629 xa_destroy(&guc->context_lookup); 3630 3631 /* 3632 * Some contexts might have been pinned before we enabled GuC 3633 * submission, so we need to add them to the GuC bookeeping. 3634 * Also, after a reset the of the GuC we want to make sure that the 3635 * information shared with GuC is properly reset. The kernel LRCs are 3636 * not attached to the gem_context, so they need to be added separately. 3637 * 3638 * Note: we purposefully do not check the return of guc_lrc_desc_pin, 3639 * because that function can only fail if a reset is just starting. This 3640 * is at the end of reset so presumably another reset isn't happening 3641 * and even it did this code would be run again. 3642 */ 3643 3644 for_each_engine(engine, gt, id) { 3645 struct intel_context *ce; 3646 3647 list_for_each_entry(ce, &engine->pinned_contexts_list, 3648 pinned_contexts_link) 3649 guc_kernel_context_pin(guc, ce); 3650 } 3651 } 3652 3653 static void guc_release(struct intel_engine_cs *engine) 3654 { 3655 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3656 3657 intel_engine_cleanup_common(engine); 3658 lrc_fini_wa_ctx(engine); 3659 } 3660 3661 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 3662 { 3663 struct intel_engine_cs *e; 3664 intel_engine_mask_t tmp, mask = engine->mask; 3665 3666 for_each_engine_masked(e, engine->gt, mask, tmp) 3667 e->serial++; 3668 } 3669 3670 static void guc_default_vfuncs(struct intel_engine_cs *engine) 3671 { 3672 /* Default vfuncs which can be overridden by each engine. */ 3673 3674 engine->resume = guc_resume; 3675 3676 engine->cops = &guc_context_ops; 3677 engine->request_alloc = guc_request_alloc; 3678 engine->add_active_request = add_to_context; 3679 engine->remove_active_request = remove_from_context; 3680 3681 engine->sched_engine->schedule = i915_schedule; 3682 3683 engine->reset.prepare = guc_reset_nop; 3684 engine->reset.rewind = guc_rewind_nop; 3685 engine->reset.cancel = guc_reset_nop; 3686 engine->reset.finish = guc_reset_nop; 3687 3688 engine->emit_flush = gen8_emit_flush_xcs; 3689 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3690 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3691 if (GRAPHICS_VER(engine->i915) >= 12) { 3692 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3693 engine->emit_flush = gen12_emit_flush_xcs; 3694 } 3695 engine->set_default_submission = guc_set_default_submission; 3696 engine->busyness = guc_engine_busyness; 3697 3698 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 3699 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3700 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3701 3702 /* 3703 * TODO: GuC supports timeslicing and semaphores as well, but they're 3704 * handled by the firmware so some minor tweaks are required before 3705 * enabling. 3706 * 3707 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3708 */ 3709 3710 engine->emit_bb_start = gen8_emit_bb_start; 3711 } 3712 3713 static void rcs_submission_override(struct intel_engine_cs *engine) 3714 { 3715 switch (GRAPHICS_VER(engine->i915)) { 3716 case 12: 3717 engine->emit_flush = gen12_emit_flush_rcs; 3718 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3719 break; 3720 case 11: 3721 engine->emit_flush = gen11_emit_flush_rcs; 3722 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3723 break; 3724 default: 3725 engine->emit_flush = gen8_emit_flush_rcs; 3726 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3727 break; 3728 } 3729 } 3730 3731 static inline void guc_default_irqs(struct intel_engine_cs *engine) 3732 { 3733 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 3734 intel_engine_set_irq_handler(engine, cs_irq_handler); 3735 } 3736 3737 static void guc_sched_engine_destroy(struct kref *kref) 3738 { 3739 struct i915_sched_engine *sched_engine = 3740 container_of(kref, typeof(*sched_engine), ref); 3741 struct intel_guc *guc = sched_engine->private_data; 3742 3743 guc->sched_engine = NULL; 3744 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 3745 kfree(sched_engine); 3746 } 3747 3748 int intel_guc_submission_setup(struct intel_engine_cs *engine) 3749 { 3750 struct drm_i915_private *i915 = engine->i915; 3751 struct intel_guc *guc = &engine->gt->uc.guc; 3752 3753 /* 3754 * The setup relies on several assumptions (e.g. irqs always enabled) 3755 * that are only valid on gen11+ 3756 */ 3757 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 3758 3759 if (!guc->sched_engine) { 3760 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3761 if (!guc->sched_engine) 3762 return -ENOMEM; 3763 3764 guc->sched_engine->schedule = i915_schedule; 3765 guc->sched_engine->disabled = guc_sched_engine_disabled; 3766 guc->sched_engine->private_data = guc; 3767 guc->sched_engine->destroy = guc_sched_engine_destroy; 3768 guc->sched_engine->bump_inflight_request_prio = 3769 guc_bump_inflight_request_prio; 3770 guc->sched_engine->retire_inflight_request_prio = 3771 guc_retire_inflight_request_prio; 3772 tasklet_setup(&guc->sched_engine->tasklet, 3773 guc_submission_tasklet); 3774 } 3775 i915_sched_engine_put(engine->sched_engine); 3776 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 3777 3778 guc_default_vfuncs(engine); 3779 guc_default_irqs(engine); 3780 guc_init_breadcrumbs(engine); 3781 3782 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 3783 rcs_submission_override(engine); 3784 3785 lrc_init_wa_ctx(engine); 3786 3787 /* Finally, take ownership and responsibility for cleanup! */ 3788 engine->sanitize = guc_sanitize; 3789 engine->release = guc_release; 3790 3791 return 0; 3792 } 3793 3794 void intel_guc_submission_enable(struct intel_guc *guc) 3795 { 3796 guc_init_lrc_mapping(guc); 3797 guc_init_engine_stats(guc); 3798 } 3799 3800 void intel_guc_submission_disable(struct intel_guc *guc) 3801 { 3802 /* Note: By the time we're here, GuC may have already been reset */ 3803 } 3804 3805 static bool __guc_submission_supported(struct intel_guc *guc) 3806 { 3807 /* GuC submission is unavailable for pre-Gen11 */ 3808 return intel_guc_is_supported(guc) && 3809 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 3810 } 3811 3812 static bool __guc_submission_selected(struct intel_guc *guc) 3813 { 3814 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 3815 3816 if (!intel_guc_submission_is_supported(guc)) 3817 return false; 3818 3819 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 3820 } 3821 3822 void intel_guc_submission_init_early(struct intel_guc *guc) 3823 { 3824 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 3825 3826 spin_lock_init(&guc->submission_state.lock); 3827 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 3828 ida_init(&guc->submission_state.guc_ids); 3829 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 3830 INIT_WORK(&guc->submission_state.destroyed_worker, 3831 destroyed_worker_func); 3832 INIT_WORK(&guc->submission_state.reset_fail_worker, 3833 reset_fail_worker_func); 3834 3835 spin_lock_init(&guc->timestamp.lock); 3836 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 3837 3838 guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS; 3839 guc->submission_supported = __guc_submission_supported(guc); 3840 guc->submission_selected = __guc_submission_selected(guc); 3841 } 3842 3843 static inline struct intel_context * 3844 g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) 3845 { 3846 struct intel_context *ce; 3847 3848 if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { 3849 drm_err(&guc_to_gt(guc)->i915->drm, 3850 "Invalid desc_idx %u", desc_idx); 3851 return NULL; 3852 } 3853 3854 ce = __get_context(guc, desc_idx); 3855 if (unlikely(!ce)) { 3856 drm_err(&guc_to_gt(guc)->i915->drm, 3857 "Context is NULL, desc_idx %u", desc_idx); 3858 return NULL; 3859 } 3860 3861 if (unlikely(intel_context_is_child(ce))) { 3862 drm_err(&guc_to_gt(guc)->i915->drm, 3863 "Context is child, desc_idx %u", desc_idx); 3864 return NULL; 3865 } 3866 3867 return ce; 3868 } 3869 3870 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 3871 const u32 *msg, 3872 u32 len) 3873 { 3874 struct intel_context *ce; 3875 u32 desc_idx = msg[0]; 3876 3877 if (unlikely(len < 1)) { 3878 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3879 return -EPROTO; 3880 } 3881 3882 ce = g2h_context_lookup(guc, desc_idx); 3883 if (unlikely(!ce)) 3884 return -EPROTO; 3885 3886 trace_intel_context_deregister_done(ce); 3887 3888 #ifdef CONFIG_DRM_I915_SELFTEST 3889 if (unlikely(ce->drop_deregister)) { 3890 ce->drop_deregister = false; 3891 return 0; 3892 } 3893 #endif 3894 3895 if (context_wait_for_deregister_to_register(ce)) { 3896 struct intel_runtime_pm *runtime_pm = 3897 &ce->engine->gt->i915->runtime_pm; 3898 intel_wakeref_t wakeref; 3899 3900 /* 3901 * Previous owner of this guc_id has been deregistered, now safe 3902 * register this context. 3903 */ 3904 with_intel_runtime_pm(runtime_pm, wakeref) 3905 register_context(ce, true); 3906 guc_signal_context_fence(ce); 3907 intel_context_put(ce); 3908 } else if (context_destroyed(ce)) { 3909 /* Context has been destroyed */ 3910 intel_gt_pm_put_async(guc_to_gt(guc)); 3911 release_guc_id(guc, ce); 3912 __guc_context_destroy(ce); 3913 } 3914 3915 decr_outstanding_submission_g2h(guc); 3916 3917 return 0; 3918 } 3919 3920 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 3921 const u32 *msg, 3922 u32 len) 3923 { 3924 struct intel_context *ce; 3925 unsigned long flags; 3926 u32 desc_idx = msg[0]; 3927 3928 if (unlikely(len < 2)) { 3929 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3930 return -EPROTO; 3931 } 3932 3933 ce = g2h_context_lookup(guc, desc_idx); 3934 if (unlikely(!ce)) 3935 return -EPROTO; 3936 3937 if (unlikely(context_destroyed(ce) || 3938 (!context_pending_enable(ce) && 3939 !context_pending_disable(ce)))) { 3940 drm_err(&guc_to_gt(guc)->i915->drm, 3941 "Bad context sched_state 0x%x, desc_idx %u", 3942 ce->guc_state.sched_state, desc_idx); 3943 return -EPROTO; 3944 } 3945 3946 trace_intel_context_sched_done(ce); 3947 3948 if (context_pending_enable(ce)) { 3949 #ifdef CONFIG_DRM_I915_SELFTEST 3950 if (unlikely(ce->drop_schedule_enable)) { 3951 ce->drop_schedule_enable = false; 3952 return 0; 3953 } 3954 #endif 3955 3956 spin_lock_irqsave(&ce->guc_state.lock, flags); 3957 clr_context_pending_enable(ce); 3958 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3959 } else if (context_pending_disable(ce)) { 3960 bool banned; 3961 3962 #ifdef CONFIG_DRM_I915_SELFTEST 3963 if (unlikely(ce->drop_schedule_disable)) { 3964 ce->drop_schedule_disable = false; 3965 return 0; 3966 } 3967 #endif 3968 3969 /* 3970 * Unpin must be done before __guc_signal_context_fence, 3971 * otherwise a race exists between the requests getting 3972 * submitted + retired before this unpin completes resulting in 3973 * the pin_count going to zero and the context still being 3974 * enabled. 3975 */ 3976 intel_context_sched_disable_unpin(ce); 3977 3978 spin_lock_irqsave(&ce->guc_state.lock, flags); 3979 banned = context_banned(ce); 3980 clr_context_banned(ce); 3981 clr_context_pending_disable(ce); 3982 __guc_signal_context_fence(ce); 3983 guc_blocked_fence_complete(ce); 3984 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3985 3986 if (banned) { 3987 guc_cancel_context_requests(ce); 3988 intel_engine_signal_breadcrumbs(ce->engine); 3989 } 3990 } 3991 3992 decr_outstanding_submission_g2h(guc); 3993 intel_context_put(ce); 3994 3995 return 0; 3996 } 3997 3998 static void capture_error_state(struct intel_guc *guc, 3999 struct intel_context *ce) 4000 { 4001 struct intel_gt *gt = guc_to_gt(guc); 4002 struct drm_i915_private *i915 = gt->i915; 4003 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4004 intel_wakeref_t wakeref; 4005 4006 intel_engine_set_hung_context(engine, ce); 4007 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4008 i915_capture_error_state(gt, engine->mask); 4009 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4010 } 4011 4012 static void guc_context_replay(struct intel_context *ce) 4013 { 4014 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4015 4016 __guc_reset_context(ce, true); 4017 tasklet_hi_schedule(&sched_engine->tasklet); 4018 } 4019 4020 static void guc_handle_context_reset(struct intel_guc *guc, 4021 struct intel_context *ce) 4022 { 4023 trace_intel_context_reset(ce); 4024 4025 if (likely(!intel_context_is_banned(ce))) { 4026 capture_error_state(guc, ce); 4027 guc_context_replay(ce); 4028 } else { 4029 drm_info(&guc_to_gt(guc)->i915->drm, 4030 "Ignoring context reset notification of banned context 0x%04X on %s", 4031 ce->guc_id.id, ce->engine->name); 4032 } 4033 } 4034 4035 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4036 const u32 *msg, u32 len) 4037 { 4038 struct intel_context *ce; 4039 unsigned long flags; 4040 int desc_idx; 4041 4042 if (unlikely(len != 1)) { 4043 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4044 return -EPROTO; 4045 } 4046 4047 desc_idx = msg[0]; 4048 4049 /* 4050 * The context lookup uses the xarray but lookups only require an RCU lock 4051 * not the full spinlock. So take the lock explicitly and keep it until the 4052 * context has been reference count locked to ensure it can't be destroyed 4053 * asynchronously until the reset is done. 4054 */ 4055 xa_lock_irqsave(&guc->context_lookup, flags); 4056 ce = g2h_context_lookup(guc, desc_idx); 4057 if (ce) 4058 intel_context_get(ce); 4059 xa_unlock_irqrestore(&guc->context_lookup, flags); 4060 4061 if (unlikely(!ce)) 4062 return -EPROTO; 4063 4064 guc_handle_context_reset(guc, ce); 4065 intel_context_put(ce); 4066 4067 return 0; 4068 } 4069 4070 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4071 const u32 *msg, u32 len) 4072 { 4073 int status; 4074 4075 if (unlikely(len != 1)) { 4076 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4077 return -EPROTO; 4078 } 4079 4080 status = msg[0]; 4081 drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status); 4082 4083 /* FIXME: Do something with the capture */ 4084 4085 return 0; 4086 } 4087 4088 static struct intel_engine_cs * 4089 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4090 { 4091 struct intel_gt *gt = guc_to_gt(guc); 4092 u8 engine_class = guc_class_to_engine_class(guc_class); 4093 4094 /* Class index is checked in class converter */ 4095 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4096 4097 return gt->engine_class[engine_class][instance]; 4098 } 4099 4100 static void reset_fail_worker_func(struct work_struct *w) 4101 { 4102 struct intel_guc *guc = container_of(w, struct intel_guc, 4103 submission_state.reset_fail_worker); 4104 struct intel_gt *gt = guc_to_gt(guc); 4105 intel_engine_mask_t reset_fail_mask; 4106 unsigned long flags; 4107 4108 spin_lock_irqsave(&guc->submission_state.lock, flags); 4109 reset_fail_mask = guc->submission_state.reset_fail_mask; 4110 guc->submission_state.reset_fail_mask = 0; 4111 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4112 4113 if (likely(reset_fail_mask)) 4114 intel_gt_handle_error(gt, reset_fail_mask, 4115 I915_ERROR_CAPTURE, 4116 "GuC failed to reset engine mask=0x%x\n", 4117 reset_fail_mask); 4118 } 4119 4120 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4121 const u32 *msg, u32 len) 4122 { 4123 struct intel_engine_cs *engine; 4124 struct intel_gt *gt = guc_to_gt(guc); 4125 u8 guc_class, instance; 4126 u32 reason; 4127 unsigned long flags; 4128 4129 if (unlikely(len != 3)) { 4130 drm_err(>->i915->drm, "Invalid length %u", len); 4131 return -EPROTO; 4132 } 4133 4134 guc_class = msg[0]; 4135 instance = msg[1]; 4136 reason = msg[2]; 4137 4138 engine = guc_lookup_engine(guc, guc_class, instance); 4139 if (unlikely(!engine)) { 4140 drm_err(>->i915->drm, 4141 "Invalid engine %d:%d", guc_class, instance); 4142 return -EPROTO; 4143 } 4144 4145 /* 4146 * This is an unexpected failure of a hardware feature. So, log a real 4147 * error message not just the informational that comes with the reset. 4148 */ 4149 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4150 guc_class, instance, engine->name, reason); 4151 4152 spin_lock_irqsave(&guc->submission_state.lock, flags); 4153 guc->submission_state.reset_fail_mask |= engine->mask; 4154 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4155 4156 /* 4157 * A GT reset flushes this worker queue (G2H handler) so we must use 4158 * another worker to trigger a GT reset. 4159 */ 4160 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4161 4162 return 0; 4163 } 4164 4165 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4166 { 4167 struct intel_guc *guc = &engine->gt->uc.guc; 4168 struct intel_context *ce; 4169 struct i915_request *rq; 4170 unsigned long index; 4171 unsigned long flags; 4172 4173 /* Reset called during driver load? GuC not yet initialised! */ 4174 if (unlikely(!guc_submission_initialized(guc))) 4175 return; 4176 4177 xa_lock_irqsave(&guc->context_lookup, flags); 4178 xa_for_each(&guc->context_lookup, index, ce) { 4179 if (!kref_get_unless_zero(&ce->ref)) 4180 continue; 4181 4182 xa_unlock(&guc->context_lookup); 4183 4184 if (!intel_context_is_pinned(ce)) 4185 goto next; 4186 4187 if (intel_engine_is_virtual(ce->engine)) { 4188 if (!(ce->engine->mask & engine->mask)) 4189 goto next; 4190 } else { 4191 if (ce->engine != engine) 4192 goto next; 4193 } 4194 4195 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4196 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4197 continue; 4198 4199 intel_engine_set_hung_context(engine, ce); 4200 4201 /* Can only cope with one hang at a time... */ 4202 intel_context_put(ce); 4203 xa_lock(&guc->context_lookup); 4204 goto done; 4205 } 4206 next: 4207 intel_context_put(ce); 4208 xa_lock(&guc->context_lookup); 4209 } 4210 done: 4211 xa_unlock_irqrestore(&guc->context_lookup, flags); 4212 } 4213 4214 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4215 struct i915_request *hung_rq, 4216 struct drm_printer *m) 4217 { 4218 struct intel_guc *guc = &engine->gt->uc.guc; 4219 struct intel_context *ce; 4220 unsigned long index; 4221 unsigned long flags; 4222 4223 /* Reset called during driver load? GuC not yet initialised! */ 4224 if (unlikely(!guc_submission_initialized(guc))) 4225 return; 4226 4227 xa_lock_irqsave(&guc->context_lookup, flags); 4228 xa_for_each(&guc->context_lookup, index, ce) { 4229 if (!kref_get_unless_zero(&ce->ref)) 4230 continue; 4231 4232 xa_unlock(&guc->context_lookup); 4233 4234 if (!intel_context_is_pinned(ce)) 4235 goto next; 4236 4237 if (intel_engine_is_virtual(ce->engine)) { 4238 if (!(ce->engine->mask & engine->mask)) 4239 goto next; 4240 } else { 4241 if (ce->engine != engine) 4242 goto next; 4243 } 4244 4245 spin_lock(&ce->guc_state.lock); 4246 intel_engine_dump_active_requests(&ce->guc_state.requests, 4247 hung_rq, m); 4248 spin_unlock(&ce->guc_state.lock); 4249 4250 next: 4251 intel_context_put(ce); 4252 xa_lock(&guc->context_lookup); 4253 } 4254 xa_unlock_irqrestore(&guc->context_lookup, flags); 4255 } 4256 4257 void intel_guc_submission_print_info(struct intel_guc *guc, 4258 struct drm_printer *p) 4259 { 4260 struct i915_sched_engine *sched_engine = guc->sched_engine; 4261 struct rb_node *rb; 4262 unsigned long flags; 4263 4264 if (!sched_engine) 4265 return; 4266 4267 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4268 atomic_read(&guc->outstanding_submission_g2h)); 4269 drm_printf(p, "GuC tasklet count: %u\n\n", 4270 atomic_read(&sched_engine->tasklet.count)); 4271 4272 spin_lock_irqsave(&sched_engine->lock, flags); 4273 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4274 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4275 struct i915_priolist *pl = to_priolist(rb); 4276 struct i915_request *rq; 4277 4278 priolist_for_each_request(rq, pl) 4279 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4280 rq->context->guc_id.id, 4281 rq->fence.seqno); 4282 } 4283 spin_unlock_irqrestore(&sched_engine->lock, flags); 4284 drm_printf(p, "\n"); 4285 } 4286 4287 static inline void guc_log_context_priority(struct drm_printer *p, 4288 struct intel_context *ce) 4289 { 4290 int i; 4291 4292 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4293 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4294 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4295 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4296 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4297 i, ce->guc_state.prio_count[i]); 4298 } 4299 drm_printf(p, "\n"); 4300 } 4301 4302 static inline void guc_log_context(struct drm_printer *p, 4303 struct intel_context *ce) 4304 { 4305 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4306 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4307 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4308 ce->ring->head, 4309 ce->lrc_reg_state[CTX_RING_HEAD]); 4310 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4311 ce->ring->tail, 4312 ce->lrc_reg_state[CTX_RING_TAIL]); 4313 drm_printf(p, "\t\tContext Pin Count: %u\n", 4314 atomic_read(&ce->pin_count)); 4315 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4316 atomic_read(&ce->guc_id.ref)); 4317 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 4318 ce->guc_state.sched_state); 4319 } 4320 4321 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4322 struct drm_printer *p) 4323 { 4324 struct intel_context *ce; 4325 unsigned long index; 4326 unsigned long flags; 4327 4328 xa_lock_irqsave(&guc->context_lookup, flags); 4329 xa_for_each(&guc->context_lookup, index, ce) { 4330 GEM_BUG_ON(intel_context_is_child(ce)); 4331 4332 guc_log_context(p, ce); 4333 guc_log_context_priority(p, ce); 4334 4335 if (intel_context_is_parent(ce)) { 4336 struct guc_process_desc *desc = __get_process_desc(ce); 4337 struct intel_context *child; 4338 4339 drm_printf(p, "\t\tNumber children: %u\n", 4340 ce->parallel.number_children); 4341 drm_printf(p, "\t\tWQI Head: %u\n", 4342 READ_ONCE(desc->head)); 4343 drm_printf(p, "\t\tWQI Tail: %u\n", 4344 READ_ONCE(desc->tail)); 4345 drm_printf(p, "\t\tWQI Status: %u\n\n", 4346 READ_ONCE(desc->wq_status)); 4347 4348 if (ce->engine->emit_bb_start == 4349 emit_bb_start_parent_no_preempt_mid_batch) { 4350 u8 i; 4351 4352 drm_printf(p, "\t\tChildren Go: %u\n\n", 4353 get_children_go_value(ce)); 4354 for (i = 0; i < ce->parallel.number_children; ++i) 4355 drm_printf(p, "\t\tChildren Join: %u\n", 4356 get_children_join_value(ce, i)); 4357 } 4358 4359 for_each_child(ce, child) 4360 guc_log_context(p, child); 4361 } 4362 } 4363 xa_unlock_irqrestore(&guc->context_lookup, flags); 4364 } 4365 4366 static inline u32 get_children_go_addr(struct intel_context *ce) 4367 { 4368 GEM_BUG_ON(!intel_context_is_parent(ce)); 4369 4370 return i915_ggtt_offset(ce->state) + 4371 __get_parent_scratch_offset(ce) + 4372 offsetof(struct parent_scratch, go.semaphore); 4373 } 4374 4375 static inline u32 get_children_join_addr(struct intel_context *ce, 4376 u8 child_index) 4377 { 4378 GEM_BUG_ON(!intel_context_is_parent(ce)); 4379 4380 return i915_ggtt_offset(ce->state) + 4381 __get_parent_scratch_offset(ce) + 4382 offsetof(struct parent_scratch, join[child_index].semaphore); 4383 } 4384 4385 #define PARENT_GO_BB 1 4386 #define PARENT_GO_FINI_BREADCRUMB 0 4387 #define CHILD_GO_BB 1 4388 #define CHILD_GO_FINI_BREADCRUMB 0 4389 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4390 u64 offset, u32 len, 4391 const unsigned int flags) 4392 { 4393 struct intel_context *ce = rq->context; 4394 u32 *cs; 4395 u8 i; 4396 4397 GEM_BUG_ON(!intel_context_is_parent(ce)); 4398 4399 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 4400 if (IS_ERR(cs)) 4401 return PTR_ERR(cs); 4402 4403 /* Wait on children */ 4404 for (i = 0; i < ce->parallel.number_children; ++i) { 4405 *cs++ = (MI_SEMAPHORE_WAIT | 4406 MI_SEMAPHORE_GLOBAL_GTT | 4407 MI_SEMAPHORE_POLL | 4408 MI_SEMAPHORE_SAD_EQ_SDD); 4409 *cs++ = PARENT_GO_BB; 4410 *cs++ = get_children_join_addr(ce, i); 4411 *cs++ = 0; 4412 } 4413 4414 /* Turn off preemption */ 4415 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4416 *cs++ = MI_NOOP; 4417 4418 /* Tell children go */ 4419 cs = gen8_emit_ggtt_write(cs, 4420 CHILD_GO_BB, 4421 get_children_go_addr(ce), 4422 0); 4423 4424 /* Jump to batch */ 4425 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4426 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4427 *cs++ = lower_32_bits(offset); 4428 *cs++ = upper_32_bits(offset); 4429 *cs++ = MI_NOOP; 4430 4431 intel_ring_advance(rq, cs); 4432 4433 return 0; 4434 } 4435 4436 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4437 u64 offset, u32 len, 4438 const unsigned int flags) 4439 { 4440 struct intel_context *ce = rq->context; 4441 struct intel_context *parent = intel_context_to_parent(ce); 4442 u32 *cs; 4443 4444 GEM_BUG_ON(!intel_context_is_child(ce)); 4445 4446 cs = intel_ring_begin(rq, 12); 4447 if (IS_ERR(cs)) 4448 return PTR_ERR(cs); 4449 4450 /* Signal parent */ 4451 cs = gen8_emit_ggtt_write(cs, 4452 PARENT_GO_BB, 4453 get_children_join_addr(parent, 4454 ce->parallel.child_index), 4455 0); 4456 4457 /* Wait on parent for go */ 4458 *cs++ = (MI_SEMAPHORE_WAIT | 4459 MI_SEMAPHORE_GLOBAL_GTT | 4460 MI_SEMAPHORE_POLL | 4461 MI_SEMAPHORE_SAD_EQ_SDD); 4462 *cs++ = CHILD_GO_BB; 4463 *cs++ = get_children_go_addr(parent); 4464 *cs++ = 0; 4465 4466 /* Turn off preemption */ 4467 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4468 4469 /* Jump to batch */ 4470 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4471 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4472 *cs++ = lower_32_bits(offset); 4473 *cs++ = upper_32_bits(offset); 4474 4475 intel_ring_advance(rq, cs); 4476 4477 return 0; 4478 } 4479 4480 static u32 * 4481 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4482 u32 *cs) 4483 { 4484 struct intel_context *ce = rq->context; 4485 u8 i; 4486 4487 GEM_BUG_ON(!intel_context_is_parent(ce)); 4488 4489 /* Wait on children */ 4490 for (i = 0; i < ce->parallel.number_children; ++i) { 4491 *cs++ = (MI_SEMAPHORE_WAIT | 4492 MI_SEMAPHORE_GLOBAL_GTT | 4493 MI_SEMAPHORE_POLL | 4494 MI_SEMAPHORE_SAD_EQ_SDD); 4495 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4496 *cs++ = get_children_join_addr(ce, i); 4497 *cs++ = 0; 4498 } 4499 4500 /* Turn on preemption */ 4501 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4502 *cs++ = MI_NOOP; 4503 4504 /* Tell children go */ 4505 cs = gen8_emit_ggtt_write(cs, 4506 CHILD_GO_FINI_BREADCRUMB, 4507 get_children_go_addr(ce), 4508 0); 4509 4510 return cs; 4511 } 4512 4513 /* 4514 * If this true, a submission of multi-lrc requests had an error and the 4515 * requests need to be skipped. The front end (execuf IOCTL) should've called 4516 * i915_request_skip which squashes the BB but we still need to emit the fini 4517 * breadrcrumbs seqno write. At this point we don't know how many of the 4518 * requests in the multi-lrc submission were generated so we can't do the 4519 * handshake between the parent and children (e.g. if 4 requests should be 4520 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4521 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4522 * has occurred on any of the requests in submission / relationship. 4523 */ 4524 static inline bool skip_handshake(struct i915_request *rq) 4525 { 4526 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4527 } 4528 4529 #define NON_SKIP_LEN 6 4530 static u32 * 4531 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4532 u32 *cs) 4533 { 4534 struct intel_context *ce = rq->context; 4535 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4536 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4537 4538 GEM_BUG_ON(!intel_context_is_parent(ce)); 4539 4540 if (unlikely(skip_handshake(rq))) { 4541 /* 4542 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4543 * the NON_SKIP_LEN comes from the length of the emits below. 4544 */ 4545 memset(cs, 0, sizeof(u32) * 4546 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4547 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4548 } else { 4549 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 4550 } 4551 4552 /* Emit fini breadcrumb */ 4553 before_fini_breadcrumb_user_interrupt_cs = cs; 4554 cs = gen8_emit_ggtt_write(cs, 4555 rq->fence.seqno, 4556 i915_request_active_timeline(rq)->hwsp_offset, 4557 0); 4558 4559 /* User interrupt */ 4560 *cs++ = MI_USER_INTERRUPT; 4561 *cs++ = MI_NOOP; 4562 4563 /* Ensure our math for skip + emit is correct */ 4564 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 4565 cs); 4566 GEM_BUG_ON(start_fini_breadcrumb_cs + 4567 ce->engine->emit_fini_breadcrumb_dw != cs); 4568 4569 rq->tail = intel_ring_offset(rq, cs); 4570 4571 return cs; 4572 } 4573 4574 static u32 * 4575 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4576 u32 *cs) 4577 { 4578 struct intel_context *ce = rq->context; 4579 struct intel_context *parent = intel_context_to_parent(ce); 4580 4581 GEM_BUG_ON(!intel_context_is_child(ce)); 4582 4583 /* Turn on preemption */ 4584 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4585 *cs++ = MI_NOOP; 4586 4587 /* Signal parent */ 4588 cs = gen8_emit_ggtt_write(cs, 4589 PARENT_GO_FINI_BREADCRUMB, 4590 get_children_join_addr(parent, 4591 ce->parallel.child_index), 4592 0); 4593 4594 /* Wait parent on for go */ 4595 *cs++ = (MI_SEMAPHORE_WAIT | 4596 MI_SEMAPHORE_GLOBAL_GTT | 4597 MI_SEMAPHORE_POLL | 4598 MI_SEMAPHORE_SAD_EQ_SDD); 4599 *cs++ = CHILD_GO_FINI_BREADCRUMB; 4600 *cs++ = get_children_go_addr(parent); 4601 *cs++ = 0; 4602 4603 return cs; 4604 } 4605 4606 static u32 * 4607 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4608 u32 *cs) 4609 { 4610 struct intel_context *ce = rq->context; 4611 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4612 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4613 4614 GEM_BUG_ON(!intel_context_is_child(ce)); 4615 4616 if (unlikely(skip_handshake(rq))) { 4617 /* 4618 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 4619 * the NON_SKIP_LEN comes from the length of the emits below. 4620 */ 4621 memset(cs, 0, sizeof(u32) * 4622 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4623 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4624 } else { 4625 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 4626 } 4627 4628 /* Emit fini breadcrumb */ 4629 before_fini_breadcrumb_user_interrupt_cs = cs; 4630 cs = gen8_emit_ggtt_write(cs, 4631 rq->fence.seqno, 4632 i915_request_active_timeline(rq)->hwsp_offset, 4633 0); 4634 4635 /* User interrupt */ 4636 *cs++ = MI_USER_INTERRUPT; 4637 *cs++ = MI_NOOP; 4638 4639 /* Ensure our math for skip + emit is correct */ 4640 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 4641 cs); 4642 GEM_BUG_ON(start_fini_breadcrumb_cs + 4643 ce->engine->emit_fini_breadcrumb_dw != cs); 4644 4645 rq->tail = intel_ring_offset(rq, cs); 4646 4647 return cs; 4648 } 4649 4650 #undef NON_SKIP_LEN 4651 4652 static struct intel_context * 4653 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 4654 unsigned long flags) 4655 { 4656 struct guc_virtual_engine *ve; 4657 struct intel_guc *guc; 4658 unsigned int n; 4659 int err; 4660 4661 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 4662 if (!ve) 4663 return ERR_PTR(-ENOMEM); 4664 4665 guc = &siblings[0]->gt->uc.guc; 4666 4667 ve->base.i915 = siblings[0]->i915; 4668 ve->base.gt = siblings[0]->gt; 4669 ve->base.uncore = siblings[0]->uncore; 4670 ve->base.id = -1; 4671 4672 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 4673 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4674 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4675 ve->base.saturated = ALL_ENGINES; 4676 4677 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 4678 4679 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 4680 4681 ve->base.cops = &virtual_guc_context_ops; 4682 ve->base.request_alloc = guc_request_alloc; 4683 ve->base.bump_serial = virtual_guc_bump_serial; 4684 4685 ve->base.submit_request = guc_submit_request; 4686 4687 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 4688 4689 intel_context_init(&ve->context, &ve->base); 4690 4691 for (n = 0; n < count; n++) { 4692 struct intel_engine_cs *sibling = siblings[n]; 4693 4694 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 4695 if (sibling->mask & ve->base.mask) { 4696 DRM_DEBUG("duplicate %s entry in load balancer\n", 4697 sibling->name); 4698 err = -EINVAL; 4699 goto err_put; 4700 } 4701 4702 ve->base.mask |= sibling->mask; 4703 ve->base.logical_mask |= sibling->logical_mask; 4704 4705 if (n != 0 && ve->base.class != sibling->class) { 4706 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 4707 sibling->class, ve->base.class); 4708 err = -EINVAL; 4709 goto err_put; 4710 } else if (n == 0) { 4711 ve->base.class = sibling->class; 4712 ve->base.uabi_class = sibling->uabi_class; 4713 snprintf(ve->base.name, sizeof(ve->base.name), 4714 "v%dx%d", ve->base.class, count); 4715 ve->base.context_size = sibling->context_size; 4716 4717 ve->base.add_active_request = 4718 sibling->add_active_request; 4719 ve->base.remove_active_request = 4720 sibling->remove_active_request; 4721 ve->base.emit_bb_start = sibling->emit_bb_start; 4722 ve->base.emit_flush = sibling->emit_flush; 4723 ve->base.emit_init_breadcrumb = 4724 sibling->emit_init_breadcrumb; 4725 ve->base.emit_fini_breadcrumb = 4726 sibling->emit_fini_breadcrumb; 4727 ve->base.emit_fini_breadcrumb_dw = 4728 sibling->emit_fini_breadcrumb_dw; 4729 ve->base.breadcrumbs = 4730 intel_breadcrumbs_get(sibling->breadcrumbs); 4731 4732 ve->base.flags |= sibling->flags; 4733 4734 ve->base.props.timeslice_duration_ms = 4735 sibling->props.timeslice_duration_ms; 4736 ve->base.props.preempt_timeout_ms = 4737 sibling->props.preempt_timeout_ms; 4738 } 4739 } 4740 4741 return &ve->context; 4742 4743 err_put: 4744 intel_context_put(&ve->context); 4745 return ERR_PTR(err); 4746 } 4747 4748 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 4749 { 4750 struct intel_engine_cs *engine; 4751 intel_engine_mask_t tmp, mask = ve->mask; 4752 4753 for_each_engine_masked(engine, ve->gt, mask, tmp) 4754 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 4755 return true; 4756 4757 return false; 4758 } 4759 4760 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4761 #include "selftest_guc.c" 4762 #include "selftest_guc_multi_lrc.c" 4763 #endif 4764