1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_heartbeat.h" 13 #include "gt/intel_engine_pm.h" 14 #include "gt/intel_engine_regs.h" 15 #include "gt/intel_gpu_commands.h" 16 #include "gt/intel_gt.h" 17 #include "gt/intel_gt_clock_utils.h" 18 #include "gt/intel_gt_irq.h" 19 #include "gt/intel_gt_pm.h" 20 #include "gt/intel_gt_regs.h" 21 #include "gt/intel_gt_requests.h" 22 #include "gt/intel_lrc.h" 23 #include "gt/intel_lrc_reg.h" 24 #include "gt/intel_mocs.h" 25 #include "gt/intel_ring.h" 26 27 #include "intel_guc_ads.h" 28 #include "intel_guc_capture.h" 29 #include "intel_guc_submission.h" 30 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 34 /** 35 * DOC: GuC-based command submission 36 * 37 * The Scratch registers: 38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 40 * triggers an interrupt on the GuC via another register write (0xC4C8). 41 * Firmware writes a success/fail code back to the action register after 42 * processes the request. The kernel driver polls waiting for this update and 43 * then proceeds. 44 * 45 * Command Transport buffers (CTBs): 46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 47 * - G2H) are a message interface between the i915 and GuC. 48 * 49 * Context registration: 50 * Before a context can be submitted it must be registered with the GuC via a 51 * H2G. A unique guc_id is associated with each context. The context is either 52 * registered at request creation time (normal operation) or at submission time 53 * (abnormal operation, e.g. after a reset). 54 * 55 * Context submission: 56 * The i915 updates the LRC tail value in memory. The i915 must enable the 57 * scheduling of the context within the GuC for the GuC to actually consider it. 58 * Therefore, the first time a disabled context is submitted we use a schedule 59 * enable H2G, while follow up submissions are done via the context submit H2G, 60 * which informs the GuC that a previously enabled context has new work 61 * available. 62 * 63 * Context unpin: 64 * To unpin a context a H2G is used to disable scheduling. When the 65 * corresponding G2H returns indicating the scheduling disable operation has 66 * completed it is safe to unpin the context. While a disable is in flight it 67 * isn't safe to resubmit the context so a fence is used to stall all future 68 * requests of that context until the G2H is returned. 69 * 70 * Context deregistration: 71 * Before a context can be destroyed or if we steal its guc_id we must 72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 73 * safe to submit anything to this guc_id until the deregister completes so a 74 * fence is used to stall all requests associated with this guc_id until the 75 * corresponding G2H returns indicating the guc_id has been deregistered. 76 * 77 * submission_state.guc_ids: 78 * Unique number associated with private GuC context data passed in during 79 * context registration / submission / deregistration. 64k available. Simple ida 80 * is used for allocation. 81 * 82 * Stealing guc_ids: 83 * If no guc_ids are available they can be stolen from another context at 84 * request creation time if that context is unpinned. If a guc_id can't be found 85 * we punt this problem to the user as we believe this is near impossible to hit 86 * during normal use cases. 87 * 88 * Locking: 89 * In the GuC submission code we have 3 basic spin locks which protect 90 * everything. Details about each below. 91 * 92 * sched_engine->lock 93 * This is the submission lock for all contexts that share an i915 schedule 94 * engine (sched_engine), thus only one of the contexts which share a 95 * sched_engine can be submitting at a time. Currently only one sched_engine is 96 * used for all of GuC submission but that could change in the future. 97 * 98 * guc->submission_state.lock 99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 100 * list. 101 * 102 * ce->guc_state.lock 103 * Protects everything under ce->guc_state. Ensures that a context is in the 104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 105 * on a disabled context (bad idea), we don't issue a schedule enable when a 106 * schedule disable is in flight, etc... Also protects list of inflight requests 107 * on the context and the priority management state. Lock is individual to each 108 * context. 109 * 110 * Lock ordering rules: 111 * sched_engine->lock -> ce->guc_state.lock 112 * guc->submission_state.lock -> ce->guc_state.lock 113 * 114 * Reset races: 115 * When a full GT reset is triggered it is assumed that some G2H responses to 116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 118 * contexts, release guc_ids, etc...). When this occurs we can scrub the 119 * context state and cleanup appropriately, however this is quite racey. 120 * To avoid races, the reset code must disable submission before scrubbing for 121 * the missing G2H, while the submission code must check for submission being 122 * disabled and skip sending H2Gs and updating context states when it is. Both 123 * sides must also make sure to hold the relevant locks. 124 */ 125 126 /* GuC Virtual Engine */ 127 struct guc_virtual_engine { 128 struct intel_engine_cs base; 129 struct intel_context context; 130 }; 131 132 static struct intel_context * 133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 134 unsigned long flags); 135 136 static struct intel_context * 137 guc_create_parallel(struct intel_engine_cs **engines, 138 unsigned int num_siblings, 139 unsigned int width); 140 141 #define GUC_REQUEST_SIZE 64 /* bytes */ 142 143 /* 144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 145 * per the GuC submission interface. A different allocation algorithm is used 146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 147 * partition the guc_id space. We believe the number of multi-lrc contexts in 148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 149 * multi-lrc. 150 */ 151 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 152 ((guc)->submission_state.num_guc_ids / 16) 153 154 /* 155 * Below is a set of functions which control the GuC scheduling state which 156 * require a lock. 157 */ 158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 159 #define SCHED_STATE_DESTROYED BIT(1) 160 #define SCHED_STATE_PENDING_DISABLE BIT(2) 161 #define SCHED_STATE_BANNED BIT(3) 162 #define SCHED_STATE_ENABLED BIT(4) 163 #define SCHED_STATE_PENDING_ENABLE BIT(5) 164 #define SCHED_STATE_REGISTERED BIT(6) 165 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 166 #define SCHED_STATE_BLOCKED_SHIFT 8 167 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 168 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 169 170 static inline void init_sched_state(struct intel_context *ce) 171 { 172 lockdep_assert_held(&ce->guc_state.lock); 173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 174 } 175 176 __maybe_unused 177 static bool sched_state_is_init(struct intel_context *ce) 178 { 179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ 180 return !(ce->guc_state.sched_state & 181 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 182 } 183 184 static inline bool 185 context_wait_for_deregister_to_register(struct intel_context *ce) 186 { 187 return ce->guc_state.sched_state & 188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 189 } 190 191 static inline void 192 set_context_wait_for_deregister_to_register(struct intel_context *ce) 193 { 194 lockdep_assert_held(&ce->guc_state.lock); 195 ce->guc_state.sched_state |= 196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 197 } 198 199 static inline void 200 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 201 { 202 lockdep_assert_held(&ce->guc_state.lock); 203 ce->guc_state.sched_state &= 204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 205 } 206 207 static inline bool 208 context_destroyed(struct intel_context *ce) 209 { 210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 211 } 212 213 static inline void 214 set_context_destroyed(struct intel_context *ce) 215 { 216 lockdep_assert_held(&ce->guc_state.lock); 217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 218 } 219 220 static inline bool context_pending_disable(struct intel_context *ce) 221 { 222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 223 } 224 225 static inline void set_context_pending_disable(struct intel_context *ce) 226 { 227 lockdep_assert_held(&ce->guc_state.lock); 228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 229 } 230 231 static inline void clr_context_pending_disable(struct intel_context *ce) 232 { 233 lockdep_assert_held(&ce->guc_state.lock); 234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 235 } 236 237 static inline bool context_banned(struct intel_context *ce) 238 { 239 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 240 } 241 242 static inline void set_context_banned(struct intel_context *ce) 243 { 244 lockdep_assert_held(&ce->guc_state.lock); 245 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 246 } 247 248 static inline void clr_context_banned(struct intel_context *ce) 249 { 250 lockdep_assert_held(&ce->guc_state.lock); 251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 252 } 253 254 static inline bool context_enabled(struct intel_context *ce) 255 { 256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 257 } 258 259 static inline void set_context_enabled(struct intel_context *ce) 260 { 261 lockdep_assert_held(&ce->guc_state.lock); 262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 263 } 264 265 static inline void clr_context_enabled(struct intel_context *ce) 266 { 267 lockdep_assert_held(&ce->guc_state.lock); 268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 269 } 270 271 static inline bool context_pending_enable(struct intel_context *ce) 272 { 273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 274 } 275 276 static inline void set_context_pending_enable(struct intel_context *ce) 277 { 278 lockdep_assert_held(&ce->guc_state.lock); 279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 280 } 281 282 static inline void clr_context_pending_enable(struct intel_context *ce) 283 { 284 lockdep_assert_held(&ce->guc_state.lock); 285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 286 } 287 288 static inline bool context_registered(struct intel_context *ce) 289 { 290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 291 } 292 293 static inline void set_context_registered(struct intel_context *ce) 294 { 295 lockdep_assert_held(&ce->guc_state.lock); 296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 297 } 298 299 static inline void clr_context_registered(struct intel_context *ce) 300 { 301 lockdep_assert_held(&ce->guc_state.lock); 302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 303 } 304 305 static inline bool context_policy_required(struct intel_context *ce) 306 { 307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 308 } 309 310 static inline void set_context_policy_required(struct intel_context *ce) 311 { 312 lockdep_assert_held(&ce->guc_state.lock); 313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 314 } 315 316 static inline void clr_context_policy_required(struct intel_context *ce) 317 { 318 lockdep_assert_held(&ce->guc_state.lock); 319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 320 } 321 322 static inline u32 context_blocked(struct intel_context *ce) 323 { 324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 325 SCHED_STATE_BLOCKED_SHIFT; 326 } 327 328 static inline void incr_context_blocked(struct intel_context *ce) 329 { 330 lockdep_assert_held(&ce->guc_state.lock); 331 332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 333 334 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 335 } 336 337 static inline void decr_context_blocked(struct intel_context *ce) 338 { 339 lockdep_assert_held(&ce->guc_state.lock); 340 341 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 342 343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 344 } 345 346 static inline bool context_has_committed_requests(struct intel_context *ce) 347 { 348 return !!ce->guc_state.number_committed_requests; 349 } 350 351 static inline void incr_context_committed_requests(struct intel_context *ce) 352 { 353 lockdep_assert_held(&ce->guc_state.lock); 354 ++ce->guc_state.number_committed_requests; 355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 356 } 357 358 static inline void decr_context_committed_requests(struct intel_context *ce) 359 { 360 lockdep_assert_held(&ce->guc_state.lock); 361 --ce->guc_state.number_committed_requests; 362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 363 } 364 365 static struct intel_context * 366 request_to_scheduling_context(struct i915_request *rq) 367 { 368 return intel_context_to_parent(rq->context); 369 } 370 371 static inline bool context_guc_id_invalid(struct intel_context *ce) 372 { 373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 374 } 375 376 static inline void set_context_guc_id_invalid(struct intel_context *ce) 377 { 378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 379 } 380 381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 382 { 383 return &ce->engine->gt->uc.guc; 384 } 385 386 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 387 { 388 return rb_entry(rb, struct i915_priolist, node); 389 } 390 391 /* 392 * When using multi-lrc submission a scratch memory area is reserved in the 393 * parent's context state for the process descriptor, work queue, and handshake 394 * between the parent + children contexts to insert safe preemption points 395 * between each of the BBs. Currently the scratch area is sized to a page. 396 * 397 * The layout of this scratch area is below: 398 * 0 guc_process_desc 399 * + sizeof(struct guc_process_desc) child go 400 * + CACHELINE_BYTES child join[0] 401 * ... 402 * + CACHELINE_BYTES child join[n - 1] 403 * ... unused 404 * PARENT_SCRATCH_SIZE / 2 work queue start 405 * ... work queue 406 * PARENT_SCRATCH_SIZE - 1 work queue end 407 */ 408 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 409 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 410 411 struct sync_semaphore { 412 u32 semaphore; 413 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 414 }; 415 416 struct parent_scratch { 417 struct guc_sched_wq_desc wq_desc; 418 419 struct sync_semaphore go; 420 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 421 422 u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - 423 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 424 425 u32 wq[WQ_SIZE / sizeof(u32)]; 426 }; 427 428 static u32 __get_parent_scratch_offset(struct intel_context *ce) 429 { 430 GEM_BUG_ON(!ce->parallel.guc.parent_page); 431 432 return ce->parallel.guc.parent_page * PAGE_SIZE; 433 } 434 435 static u32 __get_wq_offset(struct intel_context *ce) 436 { 437 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 438 439 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 440 } 441 442 static struct parent_scratch * 443 __get_parent_scratch(struct intel_context *ce) 444 { 445 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 446 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 447 448 /* 449 * Need to subtract LRC_STATE_OFFSET here as the 450 * parallel.guc.parent_page is the offset into ce->state while 451 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 452 */ 453 return (struct parent_scratch *) 454 (ce->lrc_reg_state + 455 ((__get_parent_scratch_offset(ce) - 456 LRC_STATE_OFFSET) / sizeof(u32))); 457 } 458 459 static struct guc_sched_wq_desc * 460 __get_wq_desc(struct intel_context *ce) 461 { 462 struct parent_scratch *ps = __get_parent_scratch(ce); 463 464 return &ps->wq_desc; 465 } 466 467 static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, 468 struct intel_context *ce, 469 u32 wqi_size) 470 { 471 /* 472 * Check for space in work queue. Caching a value of head pointer in 473 * intel_context structure in order reduce the number accesses to shared 474 * GPU memory which may be across a PCIe bus. 475 */ 476 #define AVAILABLE_SPACE \ 477 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 478 if (wqi_size > AVAILABLE_SPACE) { 479 ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); 480 481 if (wqi_size > AVAILABLE_SPACE) 482 return NULL; 483 } 484 #undef AVAILABLE_SPACE 485 486 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 487 } 488 489 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 490 { 491 struct intel_context *ce = xa_load(&guc->context_lookup, id); 492 493 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 494 495 return ce; 496 } 497 498 static inline bool guc_submission_initialized(struct intel_guc *guc) 499 { 500 return guc->submission_initialized; 501 } 502 503 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 504 { 505 return __get_context(guc, id); 506 } 507 508 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 509 struct intel_context *ce) 510 { 511 unsigned long flags; 512 513 /* 514 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 515 * lower level functions directly. 516 */ 517 xa_lock_irqsave(&guc->context_lookup, flags); 518 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 519 xa_unlock_irqrestore(&guc->context_lookup, flags); 520 } 521 522 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 523 { 524 unsigned long flags; 525 526 if (unlikely(!guc_submission_initialized(guc))) 527 return; 528 529 /* 530 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 531 * the lower level functions directly. 532 */ 533 xa_lock_irqsave(&guc->context_lookup, flags); 534 __xa_erase(&guc->context_lookup, id); 535 xa_unlock_irqrestore(&guc->context_lookup, flags); 536 } 537 538 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 539 { 540 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 541 wake_up_all(&guc->ct.wq); 542 } 543 544 static int guc_submission_send_busy_loop(struct intel_guc *guc, 545 const u32 *action, 546 u32 len, 547 u32 g2h_len_dw, 548 bool loop) 549 { 550 /* 551 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 552 * so we don't handle the case where we don't get a reply because we 553 * aborted the send due to the channel being busy. 554 */ 555 GEM_BUG_ON(g2h_len_dw && !loop); 556 557 if (g2h_len_dw) 558 atomic_inc(&guc->outstanding_submission_g2h); 559 560 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 561 } 562 563 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 564 atomic_t *wait_var, 565 bool interruptible, 566 long timeout) 567 { 568 const int state = interruptible ? 569 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 570 DEFINE_WAIT(wait); 571 572 might_sleep(); 573 GEM_BUG_ON(timeout < 0); 574 575 if (!atomic_read(wait_var)) 576 return 0; 577 578 if (!timeout) 579 return -ETIME; 580 581 for (;;) { 582 prepare_to_wait(&guc->ct.wq, &wait, state); 583 584 if (!atomic_read(wait_var)) 585 break; 586 587 if (signal_pending_state(state, current)) { 588 timeout = -EINTR; 589 break; 590 } 591 592 if (!timeout) { 593 timeout = -ETIME; 594 break; 595 } 596 597 timeout = io_schedule_timeout(timeout); 598 } 599 finish_wait(&guc->ct.wq, &wait); 600 601 return (timeout < 0) ? timeout : 0; 602 } 603 604 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 605 { 606 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 607 return 0; 608 609 return intel_guc_wait_for_pending_msg(guc, 610 &guc->outstanding_submission_g2h, 611 true, timeout); 612 } 613 614 static int guc_context_policy_init(struct intel_context *ce, bool loop); 615 static int try_context_registration(struct intel_context *ce, bool loop); 616 617 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 618 { 619 int err = 0; 620 struct intel_context *ce = request_to_scheduling_context(rq); 621 u32 action[3]; 622 int len = 0; 623 u32 g2h_len_dw = 0; 624 bool enabled; 625 626 lockdep_assert_held(&rq->engine->sched_engine->lock); 627 628 /* 629 * Corner case where requests were sitting in the priority list or a 630 * request resubmitted after the context was banned. 631 */ 632 if (unlikely(intel_context_is_banned(ce))) { 633 i915_request_put(i915_request_mark_eio(rq)); 634 intel_engine_signal_breadcrumbs(ce->engine); 635 return 0; 636 } 637 638 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 639 GEM_BUG_ON(context_guc_id_invalid(ce)); 640 641 if (context_policy_required(ce)) { 642 err = guc_context_policy_init(ce, false); 643 if (err) 644 return err; 645 } 646 647 spin_lock(&ce->guc_state.lock); 648 649 /* 650 * The request / context will be run on the hardware when scheduling 651 * gets enabled in the unblock. For multi-lrc we still submit the 652 * context to move the LRC tails. 653 */ 654 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 655 goto out; 656 657 enabled = context_enabled(ce) || context_blocked(ce); 658 659 if (!enabled) { 660 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 661 action[len++] = ce->guc_id.id; 662 action[len++] = GUC_CONTEXT_ENABLE; 663 set_context_pending_enable(ce); 664 intel_context_get(ce); 665 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 666 } else { 667 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 668 action[len++] = ce->guc_id.id; 669 } 670 671 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 672 if (!enabled && !err) { 673 trace_intel_context_sched_enable(ce); 674 atomic_inc(&guc->outstanding_submission_g2h); 675 set_context_enabled(ce); 676 677 /* 678 * Without multi-lrc KMD does the submission step (moving the 679 * lrc tail) so enabling scheduling is sufficient to submit the 680 * context. This isn't the case in multi-lrc submission as the 681 * GuC needs to move the tails, hence the need for another H2G 682 * to submit a multi-lrc context after enabling scheduling. 683 */ 684 if (intel_context_is_parent(ce)) { 685 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 686 err = intel_guc_send_nb(guc, action, len - 1, 0); 687 } 688 } else if (!enabled) { 689 clr_context_pending_enable(ce); 690 intel_context_put(ce); 691 } 692 if (likely(!err)) 693 trace_i915_request_guc_submit(rq); 694 695 out: 696 spin_unlock(&ce->guc_state.lock); 697 return err; 698 } 699 700 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 701 { 702 int ret = __guc_add_request(guc, rq); 703 704 if (unlikely(ret == -EBUSY)) { 705 guc->stalled_request = rq; 706 guc->submission_stall_reason = STALL_ADD_REQUEST; 707 } 708 709 return ret; 710 } 711 712 static inline void guc_set_lrc_tail(struct i915_request *rq) 713 { 714 rq->context->lrc_reg_state[CTX_RING_TAIL] = 715 intel_ring_set_tail(rq->ring, rq->tail); 716 } 717 718 static inline int rq_prio(const struct i915_request *rq) 719 { 720 return rq->sched.attr.priority; 721 } 722 723 static bool is_multi_lrc_rq(struct i915_request *rq) 724 { 725 return intel_context_is_parallel(rq->context); 726 } 727 728 static bool can_merge_rq(struct i915_request *rq, 729 struct i915_request *last) 730 { 731 return request_to_scheduling_context(rq) == 732 request_to_scheduling_context(last); 733 } 734 735 static u32 wq_space_until_wrap(struct intel_context *ce) 736 { 737 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 738 } 739 740 static void write_wqi(struct guc_sched_wq_desc *wq_desc, 741 struct intel_context *ce, 742 u32 wqi_size) 743 { 744 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 745 746 /* 747 * Ensure WQI are visible before updating tail 748 */ 749 intel_guc_write_barrier(ce_to_guc(ce)); 750 751 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 752 (WQ_SIZE - 1); 753 WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); 754 } 755 756 static int guc_wq_noop_append(struct intel_context *ce) 757 { 758 struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); 759 u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); 760 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 761 762 if (!wqi) 763 return -EBUSY; 764 765 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 766 767 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 768 FIELD_PREP(WQ_LEN_MASK, len_dw); 769 ce->parallel.guc.wqi_tail = 0; 770 771 return 0; 772 } 773 774 static int __guc_wq_item_append(struct i915_request *rq) 775 { 776 struct intel_context *ce = request_to_scheduling_context(rq); 777 struct intel_context *child; 778 struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); 779 unsigned int wqi_size = (ce->parallel.number_children + 4) * 780 sizeof(u32); 781 u32 *wqi; 782 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 783 int ret; 784 785 /* Ensure context is in correct state updating work queue */ 786 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 787 GEM_BUG_ON(context_guc_id_invalid(ce)); 788 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 789 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 790 791 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 792 if (wqi_size > wq_space_until_wrap(ce)) { 793 ret = guc_wq_noop_append(ce); 794 if (ret) 795 return ret; 796 } 797 798 wqi = get_wq_pointer(wq_desc, ce, wqi_size); 799 if (!wqi) 800 return -EBUSY; 801 802 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 803 804 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 805 FIELD_PREP(WQ_LEN_MASK, len_dw); 806 *wqi++ = ce->lrc.lrca; 807 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 808 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 809 *wqi++ = 0; /* fence_id */ 810 for_each_child(ce, child) 811 *wqi++ = child->ring->tail / sizeof(u64); 812 813 write_wqi(wq_desc, ce, wqi_size); 814 815 return 0; 816 } 817 818 static int guc_wq_item_append(struct intel_guc *guc, 819 struct i915_request *rq) 820 { 821 struct intel_context *ce = request_to_scheduling_context(rq); 822 int ret = 0; 823 824 if (likely(!intel_context_is_banned(ce))) { 825 ret = __guc_wq_item_append(rq); 826 827 if (unlikely(ret == -EBUSY)) { 828 guc->stalled_request = rq; 829 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 830 } 831 } 832 833 return ret; 834 } 835 836 static bool multi_lrc_submit(struct i915_request *rq) 837 { 838 struct intel_context *ce = request_to_scheduling_context(rq); 839 840 intel_ring_set_tail(rq->ring, rq->tail); 841 842 /* 843 * We expect the front end (execbuf IOCTL) to set this flag on the last 844 * request generated from a multi-BB submission. This indicates to the 845 * backend (GuC interface) that we should submit this context thus 846 * submitting all the requests generated in parallel. 847 */ 848 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 849 intel_context_is_banned(ce); 850 } 851 852 static int guc_dequeue_one_context(struct intel_guc *guc) 853 { 854 struct i915_sched_engine * const sched_engine = guc->sched_engine; 855 struct i915_request *last = NULL; 856 bool submit = false; 857 struct rb_node *rb; 858 int ret; 859 860 lockdep_assert_held(&sched_engine->lock); 861 862 if (guc->stalled_request) { 863 submit = true; 864 last = guc->stalled_request; 865 866 switch (guc->submission_stall_reason) { 867 case STALL_REGISTER_CONTEXT: 868 goto register_context; 869 case STALL_MOVE_LRC_TAIL: 870 goto move_lrc_tail; 871 case STALL_ADD_REQUEST: 872 goto add_request; 873 default: 874 MISSING_CASE(guc->submission_stall_reason); 875 } 876 } 877 878 while ((rb = rb_first_cached(&sched_engine->queue))) { 879 struct i915_priolist *p = to_priolist(rb); 880 struct i915_request *rq, *rn; 881 882 priolist_for_each_request_consume(rq, rn, p) { 883 if (last && !can_merge_rq(rq, last)) 884 goto register_context; 885 886 list_del_init(&rq->sched.link); 887 888 __i915_request_submit(rq); 889 890 trace_i915_request_in(rq, 0); 891 last = rq; 892 893 if (is_multi_lrc_rq(rq)) { 894 /* 895 * We need to coalesce all multi-lrc requests in 896 * a relationship into a single H2G. We are 897 * guaranteed that all of these requests will be 898 * submitted sequentially. 899 */ 900 if (multi_lrc_submit(rq)) { 901 submit = true; 902 goto register_context; 903 } 904 } else { 905 submit = true; 906 } 907 } 908 909 rb_erase_cached(&p->node, &sched_engine->queue); 910 i915_priolist_free(p); 911 } 912 913 register_context: 914 if (submit) { 915 struct intel_context *ce = request_to_scheduling_context(last); 916 917 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 918 !intel_context_is_banned(ce))) { 919 ret = try_context_registration(ce, false); 920 if (unlikely(ret == -EPIPE)) { 921 goto deadlk; 922 } else if (ret == -EBUSY) { 923 guc->stalled_request = last; 924 guc->submission_stall_reason = 925 STALL_REGISTER_CONTEXT; 926 goto schedule_tasklet; 927 } else if (ret != 0) { 928 GEM_WARN_ON(ret); /* Unexpected */ 929 goto deadlk; 930 } 931 } 932 933 move_lrc_tail: 934 if (is_multi_lrc_rq(last)) { 935 ret = guc_wq_item_append(guc, last); 936 if (ret == -EBUSY) { 937 goto schedule_tasklet; 938 } else if (ret != 0) { 939 GEM_WARN_ON(ret); /* Unexpected */ 940 goto deadlk; 941 } 942 } else { 943 guc_set_lrc_tail(last); 944 } 945 946 add_request: 947 ret = guc_add_request(guc, last); 948 if (unlikely(ret == -EPIPE)) { 949 goto deadlk; 950 } else if (ret == -EBUSY) { 951 goto schedule_tasklet; 952 } else if (ret != 0) { 953 GEM_WARN_ON(ret); /* Unexpected */ 954 goto deadlk; 955 } 956 } 957 958 guc->stalled_request = NULL; 959 guc->submission_stall_reason = STALL_NONE; 960 return submit; 961 962 deadlk: 963 sched_engine->tasklet.callback = NULL; 964 tasklet_disable_nosync(&sched_engine->tasklet); 965 return false; 966 967 schedule_tasklet: 968 tasklet_schedule(&sched_engine->tasklet); 969 return false; 970 } 971 972 static void guc_submission_tasklet(struct tasklet_struct *t) 973 { 974 struct i915_sched_engine *sched_engine = 975 from_tasklet(sched_engine, t, tasklet); 976 unsigned long flags; 977 bool loop; 978 979 spin_lock_irqsave(&sched_engine->lock, flags); 980 981 do { 982 loop = guc_dequeue_one_context(sched_engine->private_data); 983 } while (loop); 984 985 i915_sched_engine_reset_on_empty(sched_engine); 986 987 spin_unlock_irqrestore(&sched_engine->lock, flags); 988 } 989 990 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 991 { 992 if (iir & GT_RENDER_USER_INTERRUPT) 993 intel_engine_signal_breadcrumbs(engine); 994 } 995 996 static void __guc_context_destroy(struct intel_context *ce); 997 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 998 static void guc_signal_context_fence(struct intel_context *ce); 999 static void guc_cancel_context_requests(struct intel_context *ce); 1000 static void guc_blocked_fence_complete(struct intel_context *ce); 1001 1002 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1003 { 1004 struct intel_context *ce; 1005 unsigned long index, flags; 1006 bool pending_disable, pending_enable, deregister, destroyed, banned; 1007 1008 xa_lock_irqsave(&guc->context_lookup, flags); 1009 xa_for_each(&guc->context_lookup, index, ce) { 1010 /* 1011 * Corner case where the ref count on the object is zero but and 1012 * deregister G2H was lost. In this case we don't touch the ref 1013 * count and finish the destroy of the context. 1014 */ 1015 bool do_put = kref_get_unless_zero(&ce->ref); 1016 1017 xa_unlock(&guc->context_lookup); 1018 1019 spin_lock(&ce->guc_state.lock); 1020 1021 /* 1022 * Once we are at this point submission_disabled() is guaranteed 1023 * to be visible to all callers who set the below flags (see above 1024 * flush and flushes in reset_prepare). If submission_disabled() 1025 * is set, the caller shouldn't set these flags. 1026 */ 1027 1028 destroyed = context_destroyed(ce); 1029 pending_enable = context_pending_enable(ce); 1030 pending_disable = context_pending_disable(ce); 1031 deregister = context_wait_for_deregister_to_register(ce); 1032 banned = context_banned(ce); 1033 init_sched_state(ce); 1034 1035 spin_unlock(&ce->guc_state.lock); 1036 1037 if (pending_enable || destroyed || deregister) { 1038 decr_outstanding_submission_g2h(guc); 1039 if (deregister) 1040 guc_signal_context_fence(ce); 1041 if (destroyed) { 1042 intel_gt_pm_put_async(guc_to_gt(guc)); 1043 release_guc_id(guc, ce); 1044 __guc_context_destroy(ce); 1045 } 1046 if (pending_enable || deregister) 1047 intel_context_put(ce); 1048 } 1049 1050 /* Not mutualy exclusive with above if statement. */ 1051 if (pending_disable) { 1052 guc_signal_context_fence(ce); 1053 if (banned) { 1054 guc_cancel_context_requests(ce); 1055 intel_engine_signal_breadcrumbs(ce->engine); 1056 } 1057 intel_context_sched_disable_unpin(ce); 1058 decr_outstanding_submission_g2h(guc); 1059 1060 spin_lock(&ce->guc_state.lock); 1061 guc_blocked_fence_complete(ce); 1062 spin_unlock(&ce->guc_state.lock); 1063 1064 intel_context_put(ce); 1065 } 1066 1067 if (do_put) 1068 intel_context_put(ce); 1069 xa_lock(&guc->context_lookup); 1070 } 1071 xa_unlock_irqrestore(&guc->context_lookup, flags); 1072 } 1073 1074 /* 1075 * GuC stores busyness stats for each engine at context in/out boundaries. A 1076 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1077 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1078 * GuC. 1079 * 1080 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1081 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1082 * active. For an active engine total busyness = total + (now - start), where 1083 * 'now' is the time at which the busyness is sampled. For inactive engine, 1084 * total busyness = total. 1085 * 1086 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1087 * 1088 * The start and total values provided by GuC are 32 bits and wrap around in a 1089 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1090 * increasing ns values, there is a need for this implementation to account for 1091 * overflows and extend the GuC provided values to 64 bits before returning 1092 * busyness to the user. In order to do that, a worker runs periodically at 1093 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1094 * 27 seconds for a gt clock frequency of 19.2 MHz). 1095 */ 1096 1097 #define WRAP_TIME_CLKS U32_MAX 1098 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1099 1100 static void 1101 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1102 { 1103 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1104 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1105 1106 if (new_start == lower_32_bits(*prev_start)) 1107 return; 1108 1109 /* 1110 * When gt is unparked, we update the gt timestamp and start the ping 1111 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1112 * is unparked, all switched in contexts will have a start time that is 1113 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1114 * 1115 * If neither gt_stamp nor new_start has rolled over, then the 1116 * gt_stamp_hi does not need to be adjusted, however if one of them has 1117 * rolled over, we need to adjust gt_stamp_hi accordingly. 1118 * 1119 * The below conditions address the cases of new_start rollover and 1120 * gt_stamp_last rollover respectively. 1121 */ 1122 if (new_start < gt_stamp_last && 1123 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1124 gt_stamp_hi++; 1125 1126 if (new_start > gt_stamp_last && 1127 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1128 gt_stamp_hi--; 1129 1130 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1131 } 1132 1133 #define record_read(map_, field_) \ 1134 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1135 1136 /* 1137 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1138 * we run into a race where the value read is inconsistent. Sometimes the 1139 * inconsistency is in reading the upper MSB bytes of the last_in value when 1140 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1141 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1142 * determine validity of these values. Instead we read the values multiple times 1143 * until they are consistent. In test runs, 3 attempts results in consistent 1144 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1145 * any new occurences. 1146 */ 1147 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1148 u32 *last_in, u32 *id, u32 *total) 1149 { 1150 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1151 int i = 0; 1152 1153 do { 1154 *last_in = record_read(&rec_map, last_switch_in_stamp); 1155 *id = record_read(&rec_map, current_context_index); 1156 *total = record_read(&rec_map, total_runtime); 1157 1158 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1159 record_read(&rec_map, current_context_index) == *id && 1160 record_read(&rec_map, total_runtime) == *total) 1161 break; 1162 } while (++i < 6); 1163 } 1164 1165 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1166 { 1167 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1168 struct intel_guc *guc = &engine->gt->uc.guc; 1169 u32 last_switch, ctx_id, total; 1170 1171 lockdep_assert_held(&guc->timestamp.lock); 1172 1173 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1174 1175 stats->running = ctx_id != ~0U && last_switch; 1176 if (stats->running) 1177 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1178 1179 /* 1180 * Instead of adjusting the total for overflow, just add the 1181 * difference from previous sample stats->total_gt_clks 1182 */ 1183 if (total && total != ~0U) { 1184 stats->total_gt_clks += (u32)(total - stats->prev_total); 1185 stats->prev_total = total; 1186 } 1187 } 1188 1189 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1190 { 1191 intel_wakeref_t wakeref; 1192 u32 reg, shift; 1193 1194 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1195 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1196 1197 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1198 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1199 1200 return 3 - shift; 1201 } 1202 1203 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1204 { 1205 struct intel_gt *gt = guc_to_gt(guc); 1206 u32 gt_stamp_lo, gt_stamp_hi; 1207 u64 gpm_ts; 1208 1209 lockdep_assert_held(&guc->timestamp.lock); 1210 1211 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1212 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1213 MISC_STATUS1) >> guc->timestamp.shift; 1214 gt_stamp_lo = lower_32_bits(gpm_ts); 1215 *now = ktime_get(); 1216 1217 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1218 gt_stamp_hi++; 1219 1220 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1221 } 1222 1223 /* 1224 * Unlike the execlist mode of submission total and active times are in terms of 1225 * gt clocks. The *now parameter is retained to return the cpu time at which the 1226 * busyness was sampled. 1227 */ 1228 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1229 { 1230 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1231 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1232 struct intel_gt *gt = engine->gt; 1233 struct intel_guc *guc = >->uc.guc; 1234 u64 total, gt_stamp_saved; 1235 unsigned long flags; 1236 u32 reset_count; 1237 bool in_reset; 1238 1239 spin_lock_irqsave(&guc->timestamp.lock, flags); 1240 1241 /* 1242 * If a reset happened, we risk reading partially updated engine 1243 * busyness from GuC, so we just use the driver stored copy of busyness. 1244 * Synchronize with gt reset using reset_count and the 1245 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1246 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1247 * usable by checking the flag afterwards. 1248 */ 1249 reset_count = i915_reset_count(gpu_error); 1250 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1251 1252 *now = ktime_get(); 1253 1254 /* 1255 * The active busyness depends on start_gt_clk and gt_stamp. 1256 * gt_stamp is updated by i915 only when gt is awake and the 1257 * start_gt_clk is derived from GuC state. To get a consistent 1258 * view of activity, we query the GuC state only if gt is awake. 1259 */ 1260 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1261 stats_saved = *stats; 1262 gt_stamp_saved = guc->timestamp.gt_stamp; 1263 /* 1264 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1265 * start_gt_clk' calculation below for active engines. 1266 */ 1267 guc_update_engine_gt_clks(engine); 1268 guc_update_pm_timestamp(guc, now); 1269 intel_gt_pm_put_async(gt); 1270 if (i915_reset_count(gpu_error) != reset_count) { 1271 *stats = stats_saved; 1272 guc->timestamp.gt_stamp = gt_stamp_saved; 1273 } 1274 } 1275 1276 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1277 if (stats->running) { 1278 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1279 1280 total += intel_gt_clock_interval_to_ns(gt, clk); 1281 } 1282 1283 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1284 1285 return ns_to_ktime(total); 1286 } 1287 1288 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1289 { 1290 struct intel_gt *gt = guc_to_gt(guc); 1291 struct intel_engine_cs *engine; 1292 enum intel_engine_id id; 1293 unsigned long flags; 1294 ktime_t unused; 1295 1296 cancel_delayed_work_sync(&guc->timestamp.work); 1297 1298 spin_lock_irqsave(&guc->timestamp.lock, flags); 1299 1300 guc_update_pm_timestamp(guc, &unused); 1301 for_each_engine(engine, gt, id) { 1302 guc_update_engine_gt_clks(engine); 1303 engine->stats.guc.prev_total = 0; 1304 } 1305 1306 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1307 } 1308 1309 static void __update_guc_busyness_stats(struct intel_guc *guc) 1310 { 1311 struct intel_gt *gt = guc_to_gt(guc); 1312 struct intel_engine_cs *engine; 1313 enum intel_engine_id id; 1314 unsigned long flags; 1315 ktime_t unused; 1316 1317 guc->timestamp.last_stat_jiffies = jiffies; 1318 1319 spin_lock_irqsave(&guc->timestamp.lock, flags); 1320 1321 guc_update_pm_timestamp(guc, &unused); 1322 for_each_engine(engine, gt, id) 1323 guc_update_engine_gt_clks(engine); 1324 1325 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1326 } 1327 1328 static void guc_timestamp_ping(struct work_struct *wrk) 1329 { 1330 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1331 timestamp.work.work); 1332 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1333 struct intel_gt *gt = guc_to_gt(guc); 1334 intel_wakeref_t wakeref; 1335 int srcu, ret; 1336 1337 /* 1338 * Synchronize with gt reset to make sure the worker does not 1339 * corrupt the engine/guc stats. 1340 */ 1341 ret = intel_gt_reset_trylock(gt, &srcu); 1342 if (ret) 1343 return; 1344 1345 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1346 __update_guc_busyness_stats(guc); 1347 1348 intel_gt_reset_unlock(gt, srcu); 1349 1350 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1351 guc->timestamp.ping_delay); 1352 } 1353 1354 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1355 { 1356 u32 offset = intel_guc_engine_usage_offset(guc); 1357 u32 action[] = { 1358 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1359 offset, 1360 0, 1361 }; 1362 1363 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1364 } 1365 1366 static void guc_init_engine_stats(struct intel_guc *guc) 1367 { 1368 struct intel_gt *gt = guc_to_gt(guc); 1369 intel_wakeref_t wakeref; 1370 1371 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1372 guc->timestamp.ping_delay); 1373 1374 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1375 int ret = guc_action_enable_usage_stats(guc); 1376 1377 if (ret) 1378 drm_err(>->i915->drm, 1379 "Failed to enable usage stats: %d!\n", ret); 1380 } 1381 } 1382 1383 void intel_guc_busyness_park(struct intel_gt *gt) 1384 { 1385 struct intel_guc *guc = >->uc.guc; 1386 1387 if (!guc_submission_initialized(guc)) 1388 return; 1389 1390 cancel_delayed_work(&guc->timestamp.work); 1391 1392 /* 1393 * Before parking, we should sample engine busyness stats if we need to. 1394 * We can skip it if we are less than half a ping from the last time we 1395 * sampled the busyness stats. 1396 */ 1397 if (guc->timestamp.last_stat_jiffies && 1398 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1399 (guc->timestamp.ping_delay / 2))) 1400 return; 1401 1402 __update_guc_busyness_stats(guc); 1403 } 1404 1405 void intel_guc_busyness_unpark(struct intel_gt *gt) 1406 { 1407 struct intel_guc *guc = >->uc.guc; 1408 unsigned long flags; 1409 ktime_t unused; 1410 1411 if (!guc_submission_initialized(guc)) 1412 return; 1413 1414 spin_lock_irqsave(&guc->timestamp.lock, flags); 1415 guc_update_pm_timestamp(guc, &unused); 1416 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1417 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1418 guc->timestamp.ping_delay); 1419 } 1420 1421 static inline bool 1422 submission_disabled(struct intel_guc *guc) 1423 { 1424 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1425 1426 return unlikely(!sched_engine || 1427 !__tasklet_is_enabled(&sched_engine->tasklet) || 1428 intel_gt_is_wedged(guc_to_gt(guc))); 1429 } 1430 1431 static void disable_submission(struct intel_guc *guc) 1432 { 1433 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1434 1435 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1436 GEM_BUG_ON(!guc->ct.enabled); 1437 __tasklet_disable_sync_once(&sched_engine->tasklet); 1438 sched_engine->tasklet.callback = NULL; 1439 } 1440 } 1441 1442 static void enable_submission(struct intel_guc *guc) 1443 { 1444 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1445 unsigned long flags; 1446 1447 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1448 sched_engine->tasklet.callback = guc_submission_tasklet; 1449 wmb(); /* Make sure callback visible */ 1450 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1451 __tasklet_enable(&sched_engine->tasklet)) { 1452 GEM_BUG_ON(!guc->ct.enabled); 1453 1454 /* And kick in case we missed a new request submission. */ 1455 tasklet_hi_schedule(&sched_engine->tasklet); 1456 } 1457 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1458 } 1459 1460 static void guc_flush_submissions(struct intel_guc *guc) 1461 { 1462 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1463 unsigned long flags; 1464 1465 spin_lock_irqsave(&sched_engine->lock, flags); 1466 spin_unlock_irqrestore(&sched_engine->lock, flags); 1467 } 1468 1469 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1470 1471 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1472 { 1473 if (unlikely(!guc_submission_initialized(guc))) { 1474 /* Reset called during driver load? GuC not yet initialised! */ 1475 return; 1476 } 1477 1478 intel_gt_park_heartbeats(guc_to_gt(guc)); 1479 disable_submission(guc); 1480 guc->interrupts.disable(guc); 1481 __reset_guc_busyness_stats(guc); 1482 1483 /* Flush IRQ handler */ 1484 spin_lock_irq(&guc_to_gt(guc)->irq_lock); 1485 spin_unlock_irq(&guc_to_gt(guc)->irq_lock); 1486 1487 guc_flush_submissions(guc); 1488 guc_flush_destroyed_contexts(guc); 1489 flush_work(&guc->ct.requests.worker); 1490 1491 scrub_guc_desc_for_outstanding_g2h(guc); 1492 } 1493 1494 static struct intel_engine_cs * 1495 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1496 { 1497 struct intel_engine_cs *engine; 1498 intel_engine_mask_t tmp, mask = ve->mask; 1499 unsigned int num_siblings = 0; 1500 1501 for_each_engine_masked(engine, ve->gt, mask, tmp) 1502 if (num_siblings++ == sibling) 1503 return engine; 1504 1505 return NULL; 1506 } 1507 1508 static inline struct intel_engine_cs * 1509 __context_to_physical_engine(struct intel_context *ce) 1510 { 1511 struct intel_engine_cs *engine = ce->engine; 1512 1513 if (intel_engine_is_virtual(engine)) 1514 engine = guc_virtual_get_sibling(engine, 0); 1515 1516 return engine; 1517 } 1518 1519 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1520 { 1521 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1522 1523 if (intel_context_is_banned(ce)) 1524 return; 1525 1526 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1527 1528 /* 1529 * We want a simple context + ring to execute the breadcrumb update. 1530 * We cannot rely on the context being intact across the GPU hang, 1531 * so clear it and rebuild just what we need for the breadcrumb. 1532 * All pending requests for this context will be zapped, and any 1533 * future request will be after userspace has had the opportunity 1534 * to recreate its own state. 1535 */ 1536 if (scrub) 1537 lrc_init_regs(ce, engine, true); 1538 1539 /* Rerun the request; its payload has been neutered (if guilty). */ 1540 lrc_update_regs(ce, engine, head); 1541 } 1542 1543 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1544 { 1545 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) 1546 return; 1547 1548 intel_engine_stop_cs(engine); 1549 1550 /* 1551 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need 1552 * to wait for any pending mi force wakeups 1553 */ 1554 intel_engine_wait_for_pending_mi_fw(engine); 1555 } 1556 1557 static void guc_reset_nop(struct intel_engine_cs *engine) 1558 { 1559 } 1560 1561 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1562 { 1563 } 1564 1565 static void 1566 __unwind_incomplete_requests(struct intel_context *ce) 1567 { 1568 struct i915_request *rq, *rn; 1569 struct list_head *pl; 1570 int prio = I915_PRIORITY_INVALID; 1571 struct i915_sched_engine * const sched_engine = 1572 ce->engine->sched_engine; 1573 unsigned long flags; 1574 1575 spin_lock_irqsave(&sched_engine->lock, flags); 1576 spin_lock(&ce->guc_state.lock); 1577 list_for_each_entry_safe_reverse(rq, rn, 1578 &ce->guc_state.requests, 1579 sched.link) { 1580 if (i915_request_completed(rq)) 1581 continue; 1582 1583 list_del_init(&rq->sched.link); 1584 __i915_request_unsubmit(rq); 1585 1586 /* Push the request back into the queue for later resubmission. */ 1587 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1588 if (rq_prio(rq) != prio) { 1589 prio = rq_prio(rq); 1590 pl = i915_sched_lookup_priolist(sched_engine, prio); 1591 } 1592 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1593 1594 list_add(&rq->sched.link, pl); 1595 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1596 } 1597 spin_unlock(&ce->guc_state.lock); 1598 spin_unlock_irqrestore(&sched_engine->lock, flags); 1599 } 1600 1601 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1602 { 1603 bool guilty; 1604 struct i915_request *rq; 1605 unsigned long flags; 1606 u32 head; 1607 int i, number_children = ce->parallel.number_children; 1608 struct intel_context *parent = ce; 1609 1610 GEM_BUG_ON(intel_context_is_child(ce)); 1611 1612 intel_context_get(ce); 1613 1614 /* 1615 * GuC will implicitly mark the context as non-schedulable when it sends 1616 * the reset notification. Make sure our state reflects this change. The 1617 * context will be marked enabled on resubmission. 1618 */ 1619 spin_lock_irqsave(&ce->guc_state.lock, flags); 1620 clr_context_enabled(ce); 1621 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1622 1623 /* 1624 * For each context in the relationship find the hanging request 1625 * resetting each context / request as needed 1626 */ 1627 for (i = 0; i < number_children + 1; ++i) { 1628 if (!intel_context_is_pinned(ce)) 1629 goto next_context; 1630 1631 guilty = false; 1632 rq = intel_context_find_active_request(ce); 1633 if (!rq) { 1634 head = ce->ring->tail; 1635 goto out_replay; 1636 } 1637 1638 if (i915_request_started(rq)) 1639 guilty = stalled & ce->engine->mask; 1640 1641 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1642 head = intel_ring_wrap(ce->ring, rq->head); 1643 1644 __i915_request_reset(rq, guilty); 1645 out_replay: 1646 guc_reset_state(ce, head, guilty); 1647 next_context: 1648 if (i != number_children) 1649 ce = list_next_entry(ce, parallel.child_link); 1650 } 1651 1652 __unwind_incomplete_requests(parent); 1653 intel_context_put(parent); 1654 } 1655 1656 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1657 { 1658 struct intel_context *ce; 1659 unsigned long index; 1660 unsigned long flags; 1661 1662 if (unlikely(!guc_submission_initialized(guc))) { 1663 /* Reset called during driver load? GuC not yet initialised! */ 1664 return; 1665 } 1666 1667 xa_lock_irqsave(&guc->context_lookup, flags); 1668 xa_for_each(&guc->context_lookup, index, ce) { 1669 if (!kref_get_unless_zero(&ce->ref)) 1670 continue; 1671 1672 xa_unlock(&guc->context_lookup); 1673 1674 if (intel_context_is_pinned(ce) && 1675 !intel_context_is_child(ce)) 1676 __guc_reset_context(ce, stalled); 1677 1678 intel_context_put(ce); 1679 1680 xa_lock(&guc->context_lookup); 1681 } 1682 xa_unlock_irqrestore(&guc->context_lookup, flags); 1683 1684 /* GuC is blown away, drop all references to contexts */ 1685 xa_destroy(&guc->context_lookup); 1686 } 1687 1688 static void guc_cancel_context_requests(struct intel_context *ce) 1689 { 1690 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1691 struct i915_request *rq; 1692 unsigned long flags; 1693 1694 /* Mark all executing requests as skipped. */ 1695 spin_lock_irqsave(&sched_engine->lock, flags); 1696 spin_lock(&ce->guc_state.lock); 1697 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1698 i915_request_put(i915_request_mark_eio(rq)); 1699 spin_unlock(&ce->guc_state.lock); 1700 spin_unlock_irqrestore(&sched_engine->lock, flags); 1701 } 1702 1703 static void 1704 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1705 { 1706 struct i915_request *rq, *rn; 1707 struct rb_node *rb; 1708 unsigned long flags; 1709 1710 /* Can be called during boot if GuC fails to load */ 1711 if (!sched_engine) 1712 return; 1713 1714 /* 1715 * Before we call engine->cancel_requests(), we should have exclusive 1716 * access to the submission state. This is arranged for us by the 1717 * caller disabling the interrupt generation, the tasklet and other 1718 * threads that may then access the same state, giving us a free hand 1719 * to reset state. However, we still need to let lockdep be aware that 1720 * we know this state may be accessed in hardirq context, so we 1721 * disable the irq around this manipulation and we want to keep 1722 * the spinlock focused on its duties and not accidentally conflate 1723 * coverage to the submission's irq state. (Similarly, although we 1724 * shouldn't need to disable irq around the manipulation of the 1725 * submission's irq state, we also wish to remind ourselves that 1726 * it is irq state.) 1727 */ 1728 spin_lock_irqsave(&sched_engine->lock, flags); 1729 1730 /* Flush the queued requests to the timeline list (for retiring). */ 1731 while ((rb = rb_first_cached(&sched_engine->queue))) { 1732 struct i915_priolist *p = to_priolist(rb); 1733 1734 priolist_for_each_request_consume(rq, rn, p) { 1735 list_del_init(&rq->sched.link); 1736 1737 __i915_request_submit(rq); 1738 1739 i915_request_put(i915_request_mark_eio(rq)); 1740 } 1741 1742 rb_erase_cached(&p->node, &sched_engine->queue); 1743 i915_priolist_free(p); 1744 } 1745 1746 /* Remaining _unready_ requests will be nop'ed when submitted */ 1747 1748 sched_engine->queue_priority_hint = INT_MIN; 1749 sched_engine->queue = RB_ROOT_CACHED; 1750 1751 spin_unlock_irqrestore(&sched_engine->lock, flags); 1752 } 1753 1754 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1755 { 1756 struct intel_context *ce; 1757 unsigned long index; 1758 unsigned long flags; 1759 1760 xa_lock_irqsave(&guc->context_lookup, flags); 1761 xa_for_each(&guc->context_lookup, index, ce) { 1762 if (!kref_get_unless_zero(&ce->ref)) 1763 continue; 1764 1765 xa_unlock(&guc->context_lookup); 1766 1767 if (intel_context_is_pinned(ce) && 1768 !intel_context_is_child(ce)) 1769 guc_cancel_context_requests(ce); 1770 1771 intel_context_put(ce); 1772 1773 xa_lock(&guc->context_lookup); 1774 } 1775 xa_unlock_irqrestore(&guc->context_lookup, flags); 1776 1777 guc_cancel_sched_engine_requests(guc->sched_engine); 1778 1779 /* GuC is blown away, drop all references to contexts */ 1780 xa_destroy(&guc->context_lookup); 1781 } 1782 1783 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1784 { 1785 /* Reset called during driver load or during wedge? */ 1786 if (unlikely(!guc_submission_initialized(guc) || 1787 intel_gt_is_wedged(guc_to_gt(guc)))) { 1788 return; 1789 } 1790 1791 /* 1792 * Technically possible for either of these values to be non-zero here, 1793 * but very unlikely + harmless. Regardless let's add a warn so we can 1794 * see in CI if this happens frequently / a precursor to taking down the 1795 * machine. 1796 */ 1797 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1798 atomic_set(&guc->outstanding_submission_g2h, 0); 1799 1800 intel_guc_global_policies_update(guc); 1801 enable_submission(guc); 1802 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1803 } 1804 1805 static void destroyed_worker_func(struct work_struct *w); 1806 static void reset_fail_worker_func(struct work_struct *w); 1807 1808 /* 1809 * Set up the memory resources to be shared with the GuC (via the GGTT) 1810 * at firmware loading time. 1811 */ 1812 int intel_guc_submission_init(struct intel_guc *guc) 1813 { 1814 struct intel_gt *gt = guc_to_gt(guc); 1815 1816 if (guc->submission_initialized) 1817 return 0; 1818 1819 guc->submission_state.guc_ids_bitmap = 1820 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1821 if (!guc->submission_state.guc_ids_bitmap) 1822 return -ENOMEM; 1823 1824 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1825 guc->timestamp.shift = gpm_timestamp_shift(gt); 1826 guc->submission_initialized = true; 1827 1828 return 0; 1829 } 1830 1831 void intel_guc_submission_fini(struct intel_guc *guc) 1832 { 1833 if (!guc->submission_initialized) 1834 return; 1835 1836 guc_flush_destroyed_contexts(guc); 1837 i915_sched_engine_put(guc->sched_engine); 1838 bitmap_free(guc->submission_state.guc_ids_bitmap); 1839 guc->submission_initialized = false; 1840 } 1841 1842 static inline void queue_request(struct i915_sched_engine *sched_engine, 1843 struct i915_request *rq, 1844 int prio) 1845 { 1846 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1847 list_add_tail(&rq->sched.link, 1848 i915_sched_lookup_priolist(sched_engine, prio)); 1849 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1850 tasklet_hi_schedule(&sched_engine->tasklet); 1851 } 1852 1853 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1854 struct i915_request *rq) 1855 { 1856 int ret = 0; 1857 1858 __i915_request_submit(rq); 1859 1860 trace_i915_request_in(rq, 0); 1861 1862 if (is_multi_lrc_rq(rq)) { 1863 if (multi_lrc_submit(rq)) { 1864 ret = guc_wq_item_append(guc, rq); 1865 if (!ret) 1866 ret = guc_add_request(guc, rq); 1867 } 1868 } else { 1869 guc_set_lrc_tail(rq); 1870 ret = guc_add_request(guc, rq); 1871 } 1872 1873 if (unlikely(ret == -EPIPE)) 1874 disable_submission(guc); 1875 1876 return ret; 1877 } 1878 1879 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1880 { 1881 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1882 struct intel_context *ce = request_to_scheduling_context(rq); 1883 1884 return submission_disabled(guc) || guc->stalled_request || 1885 !i915_sched_engine_is_empty(sched_engine) || 1886 !ctx_id_mapped(guc, ce->guc_id.id); 1887 } 1888 1889 static void guc_submit_request(struct i915_request *rq) 1890 { 1891 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1892 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1893 unsigned long flags; 1894 1895 /* Will be called from irq-context when using foreign fences. */ 1896 spin_lock_irqsave(&sched_engine->lock, flags); 1897 1898 if (need_tasklet(guc, rq)) 1899 queue_request(sched_engine, rq, rq_prio(rq)); 1900 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1901 tasklet_hi_schedule(&sched_engine->tasklet); 1902 1903 spin_unlock_irqrestore(&sched_engine->lock, flags); 1904 } 1905 1906 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1907 { 1908 int ret; 1909 1910 GEM_BUG_ON(intel_context_is_child(ce)); 1911 1912 if (intel_context_is_parent(ce)) 1913 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1914 NUMBER_MULTI_LRC_GUC_ID(guc), 1915 order_base_2(ce->parallel.number_children 1916 + 1)); 1917 else 1918 ret = ida_simple_get(&guc->submission_state.guc_ids, 1919 NUMBER_MULTI_LRC_GUC_ID(guc), 1920 guc->submission_state.num_guc_ids, 1921 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 1922 __GFP_NOWARN); 1923 if (unlikely(ret < 0)) 1924 return ret; 1925 1926 ce->guc_id.id = ret; 1927 return 0; 1928 } 1929 1930 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1931 { 1932 GEM_BUG_ON(intel_context_is_child(ce)); 1933 1934 if (!context_guc_id_invalid(ce)) { 1935 if (intel_context_is_parent(ce)) 1936 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 1937 ce->guc_id.id, 1938 order_base_2(ce->parallel.number_children 1939 + 1)); 1940 else 1941 ida_simple_remove(&guc->submission_state.guc_ids, 1942 ce->guc_id.id); 1943 clr_ctx_id_mapping(guc, ce->guc_id.id); 1944 set_context_guc_id_invalid(ce); 1945 } 1946 if (!list_empty(&ce->guc_id.link)) 1947 list_del_init(&ce->guc_id.link); 1948 } 1949 1950 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1951 { 1952 unsigned long flags; 1953 1954 spin_lock_irqsave(&guc->submission_state.lock, flags); 1955 __release_guc_id(guc, ce); 1956 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1957 } 1958 1959 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 1960 { 1961 struct intel_context *cn; 1962 1963 lockdep_assert_held(&guc->submission_state.lock); 1964 GEM_BUG_ON(intel_context_is_child(ce)); 1965 GEM_BUG_ON(intel_context_is_parent(ce)); 1966 1967 if (!list_empty(&guc->submission_state.guc_id_list)) { 1968 cn = list_first_entry(&guc->submission_state.guc_id_list, 1969 struct intel_context, 1970 guc_id.link); 1971 1972 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 1973 GEM_BUG_ON(context_guc_id_invalid(cn)); 1974 GEM_BUG_ON(intel_context_is_child(cn)); 1975 GEM_BUG_ON(intel_context_is_parent(cn)); 1976 1977 list_del_init(&cn->guc_id.link); 1978 ce->guc_id.id = cn->guc_id.id; 1979 1980 spin_lock(&cn->guc_state.lock); 1981 clr_context_registered(cn); 1982 spin_unlock(&cn->guc_state.lock); 1983 1984 set_context_guc_id_invalid(cn); 1985 1986 #ifdef CONFIG_DRM_I915_SELFTEST 1987 guc->number_guc_id_stolen++; 1988 #endif 1989 1990 return 0; 1991 } else { 1992 return -EAGAIN; 1993 } 1994 } 1995 1996 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 1997 { 1998 int ret; 1999 2000 lockdep_assert_held(&guc->submission_state.lock); 2001 GEM_BUG_ON(intel_context_is_child(ce)); 2002 2003 ret = new_guc_id(guc, ce); 2004 if (unlikely(ret < 0)) { 2005 if (intel_context_is_parent(ce)) 2006 return -ENOSPC; 2007 2008 ret = steal_guc_id(guc, ce); 2009 if (ret < 0) 2010 return ret; 2011 } 2012 2013 if (intel_context_is_parent(ce)) { 2014 struct intel_context *child; 2015 int i = 1; 2016 2017 for_each_child(ce, child) 2018 child->guc_id.id = ce->guc_id.id + i++; 2019 } 2020 2021 return 0; 2022 } 2023 2024 #define PIN_GUC_ID_TRIES 4 2025 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2026 { 2027 int ret = 0; 2028 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2029 2030 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2031 2032 try_again: 2033 spin_lock_irqsave(&guc->submission_state.lock, flags); 2034 2035 might_lock(&ce->guc_state.lock); 2036 2037 if (context_guc_id_invalid(ce)) { 2038 ret = assign_guc_id(guc, ce); 2039 if (ret) 2040 goto out_unlock; 2041 ret = 1; /* Indidcates newly assigned guc_id */ 2042 } 2043 if (!list_empty(&ce->guc_id.link)) 2044 list_del_init(&ce->guc_id.link); 2045 atomic_inc(&ce->guc_id.ref); 2046 2047 out_unlock: 2048 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2049 2050 /* 2051 * -EAGAIN indicates no guc_id are available, let's retire any 2052 * outstanding requests to see if that frees up a guc_id. If the first 2053 * retire didn't help, insert a sleep with the timeslice duration before 2054 * attempting to retire more requests. Double the sleep period each 2055 * subsequent pass before finally giving up. The sleep period has max of 2056 * 100ms and minimum of 1ms. 2057 */ 2058 if (ret == -EAGAIN && --tries) { 2059 if (PIN_GUC_ID_TRIES - tries > 1) { 2060 unsigned int timeslice_shifted = 2061 ce->engine->props.timeslice_duration_ms << 2062 (PIN_GUC_ID_TRIES - tries - 2); 2063 unsigned int max = min_t(unsigned int, 100, 2064 timeslice_shifted); 2065 2066 msleep(max_t(unsigned int, max, 1)); 2067 } 2068 intel_gt_retire_requests(guc_to_gt(guc)); 2069 goto try_again; 2070 } 2071 2072 return ret; 2073 } 2074 2075 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2076 { 2077 unsigned long flags; 2078 2079 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2080 GEM_BUG_ON(intel_context_is_child(ce)); 2081 2082 if (unlikely(context_guc_id_invalid(ce) || 2083 intel_context_is_parent(ce))) 2084 return; 2085 2086 spin_lock_irqsave(&guc->submission_state.lock, flags); 2087 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2088 !atomic_read(&ce->guc_id.ref)) 2089 list_add_tail(&ce->guc_id.link, 2090 &guc->submission_state.guc_id_list); 2091 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2092 } 2093 2094 static int __guc_action_register_multi_lrc(struct intel_guc *guc, 2095 struct intel_context *ce, 2096 struct guc_ctxt_registration_info *info, 2097 bool loop) 2098 { 2099 struct intel_context *child; 2100 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2101 int len = 0; 2102 u32 next_id; 2103 2104 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2105 2106 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2107 action[len++] = info->flags; 2108 action[len++] = info->context_idx; 2109 action[len++] = info->engine_class; 2110 action[len++] = info->engine_submit_mask; 2111 action[len++] = info->wq_desc_lo; 2112 action[len++] = info->wq_desc_hi; 2113 action[len++] = info->wq_base_lo; 2114 action[len++] = info->wq_base_hi; 2115 action[len++] = info->wq_size; 2116 action[len++] = ce->parallel.number_children + 1; 2117 action[len++] = info->hwlrca_lo; 2118 action[len++] = info->hwlrca_hi; 2119 2120 next_id = info->context_idx + 1; 2121 for_each_child(ce, child) { 2122 GEM_BUG_ON(next_id++ != child->guc_id.id); 2123 2124 /* 2125 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2126 * only supports 32 bit currently. 2127 */ 2128 action[len++] = lower_32_bits(child->lrc.lrca); 2129 action[len++] = upper_32_bits(child->lrc.lrca); 2130 } 2131 2132 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2133 2134 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2135 } 2136 2137 static int __guc_action_register_context(struct intel_guc *guc, 2138 struct guc_ctxt_registration_info *info, 2139 bool loop) 2140 { 2141 u32 action[] = { 2142 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2143 info->flags, 2144 info->context_idx, 2145 info->engine_class, 2146 info->engine_submit_mask, 2147 info->wq_desc_lo, 2148 info->wq_desc_hi, 2149 info->wq_base_lo, 2150 info->wq_base_hi, 2151 info->wq_size, 2152 info->hwlrca_lo, 2153 info->hwlrca_hi, 2154 }; 2155 2156 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2157 0, loop); 2158 } 2159 2160 static void prepare_context_registration_info(struct intel_context *ce, 2161 struct guc_ctxt_registration_info *info); 2162 2163 static int register_context(struct intel_context *ce, bool loop) 2164 { 2165 struct guc_ctxt_registration_info info; 2166 struct intel_guc *guc = ce_to_guc(ce); 2167 int ret; 2168 2169 GEM_BUG_ON(intel_context_is_child(ce)); 2170 trace_intel_context_register(ce); 2171 2172 prepare_context_registration_info(ce, &info); 2173 2174 if (intel_context_is_parent(ce)) 2175 ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); 2176 else 2177 ret = __guc_action_register_context(guc, &info, loop); 2178 if (likely(!ret)) { 2179 unsigned long flags; 2180 2181 spin_lock_irqsave(&ce->guc_state.lock, flags); 2182 set_context_registered(ce); 2183 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2184 2185 guc_context_policy_init(ce, loop); 2186 } 2187 2188 return ret; 2189 } 2190 2191 static int __guc_action_deregister_context(struct intel_guc *guc, 2192 u32 guc_id) 2193 { 2194 u32 action[] = { 2195 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2196 guc_id, 2197 }; 2198 2199 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2200 G2H_LEN_DW_DEREGISTER_CONTEXT, 2201 true); 2202 } 2203 2204 static int deregister_context(struct intel_context *ce, u32 guc_id) 2205 { 2206 struct intel_guc *guc = ce_to_guc(ce); 2207 2208 GEM_BUG_ON(intel_context_is_child(ce)); 2209 trace_intel_context_deregister(ce); 2210 2211 return __guc_action_deregister_context(guc, guc_id); 2212 } 2213 2214 static inline void clear_children_join_go_memory(struct intel_context *ce) 2215 { 2216 struct parent_scratch *ps = __get_parent_scratch(ce); 2217 int i; 2218 2219 ps->go.semaphore = 0; 2220 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2221 ps->join[i].semaphore = 0; 2222 } 2223 2224 static inline u32 get_children_go_value(struct intel_context *ce) 2225 { 2226 return __get_parent_scratch(ce)->go.semaphore; 2227 } 2228 2229 static inline u32 get_children_join_value(struct intel_context *ce, 2230 u8 child_index) 2231 { 2232 return __get_parent_scratch(ce)->join[child_index].semaphore; 2233 } 2234 2235 struct context_policy { 2236 u32 count; 2237 struct guc_update_context_policy h2g; 2238 }; 2239 2240 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2241 { 2242 size_t bytes = sizeof(policy->h2g.header) + 2243 (sizeof(policy->h2g.klv[0]) * policy->count); 2244 2245 return bytes / sizeof(u32); 2246 } 2247 2248 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2249 { 2250 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2251 policy->h2g.header.ctx_id = guc_id; 2252 policy->count = 0; 2253 } 2254 2255 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2256 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2257 { \ 2258 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2259 policy->h2g.klv[policy->count].kl = \ 2260 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2261 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2262 policy->h2g.klv[policy->count].value = data; \ 2263 policy->count++; \ 2264 } 2265 2266 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2267 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2268 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2269 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2270 2271 #undef MAKE_CONTEXT_POLICY_ADD 2272 2273 static int __guc_context_set_context_policies(struct intel_guc *guc, 2274 struct context_policy *policy, 2275 bool loop) 2276 { 2277 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2278 __guc_context_policy_action_size(policy), 2279 0, loop); 2280 } 2281 2282 static int guc_context_policy_init(struct intel_context *ce, bool loop) 2283 { 2284 struct intel_engine_cs *engine = ce->engine; 2285 struct intel_guc *guc = &engine->gt->uc.guc; 2286 struct context_policy policy; 2287 u32 execution_quantum; 2288 u32 preemption_timeout; 2289 bool missing = false; 2290 unsigned long flags; 2291 int ret; 2292 2293 /* NB: For both of these, zero means disabled. */ 2294 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2295 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2296 2297 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2298 2299 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2300 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2301 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2302 2303 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2304 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2305 2306 ret = __guc_context_set_context_policies(guc, &policy, loop); 2307 missing = ret != 0; 2308 2309 if (!missing && intel_context_is_parent(ce)) { 2310 struct intel_context *child; 2311 2312 for_each_child(ce, child) { 2313 __guc_context_policy_start_klv(&policy, child->guc_id.id); 2314 2315 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2316 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2317 2318 child->guc_state.prio = ce->guc_state.prio; 2319 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2320 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2321 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2322 2323 ret = __guc_context_set_context_policies(guc, &policy, loop); 2324 if (ret) { 2325 missing = true; 2326 break; 2327 } 2328 } 2329 } 2330 2331 spin_lock_irqsave(&ce->guc_state.lock, flags); 2332 if (missing) 2333 set_context_policy_required(ce); 2334 else 2335 clr_context_policy_required(ce); 2336 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2337 2338 return ret; 2339 } 2340 2341 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2342 { 2343 /* 2344 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2345 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2346 */ 2347 switch (prio) { 2348 default: 2349 MISSING_CASE(prio); 2350 fallthrough; 2351 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2352 return GEN12_CTX_PRIORITY_NORMAL; 2353 case GUC_CLIENT_PRIORITY_NORMAL: 2354 return GEN12_CTX_PRIORITY_LOW; 2355 case GUC_CLIENT_PRIORITY_HIGH: 2356 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2357 return GEN12_CTX_PRIORITY_HIGH; 2358 } 2359 } 2360 2361 static void prepare_context_registration_info(struct intel_context *ce, 2362 struct guc_ctxt_registration_info *info) 2363 { 2364 struct intel_engine_cs *engine = ce->engine; 2365 struct intel_guc *guc = &engine->gt->uc.guc; 2366 u32 ctx_id = ce->guc_id.id; 2367 2368 GEM_BUG_ON(!engine->mask); 2369 2370 /* 2371 * Ensure LRC + CT vmas are is same region as write barrier is done 2372 * based on CT vma region. 2373 */ 2374 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2375 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2376 2377 memset(info, 0, sizeof(*info)); 2378 info->context_idx = ctx_id; 2379 info->engine_class = engine_class_to_guc_class(engine->class); 2380 info->engine_submit_mask = engine->logical_mask; 2381 /* 2382 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2383 * only supports 32 bit currently. 2384 */ 2385 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2386 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2387 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2388 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2389 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2390 2391 /* 2392 * If context is a parent, we need to register a process descriptor 2393 * describing a work queue and register all child contexts. 2394 */ 2395 if (intel_context_is_parent(ce)) { 2396 struct guc_sched_wq_desc *wq_desc; 2397 u64 wq_desc_offset, wq_base_offset; 2398 2399 ce->parallel.guc.wqi_tail = 0; 2400 ce->parallel.guc.wqi_head = 0; 2401 2402 wq_desc_offset = i915_ggtt_offset(ce->state) + 2403 __get_parent_scratch_offset(ce); 2404 wq_base_offset = i915_ggtt_offset(ce->state) + 2405 __get_wq_offset(ce); 2406 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2407 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2408 info->wq_base_lo = lower_32_bits(wq_base_offset); 2409 info->wq_base_hi = upper_32_bits(wq_base_offset); 2410 info->wq_size = WQ_SIZE; 2411 2412 wq_desc = __get_wq_desc(ce); 2413 memset(wq_desc, 0, sizeof(*wq_desc)); 2414 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2415 2416 clear_children_join_go_memory(ce); 2417 } 2418 } 2419 2420 static int try_context_registration(struct intel_context *ce, bool loop) 2421 { 2422 struct intel_engine_cs *engine = ce->engine; 2423 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2424 struct intel_guc *guc = &engine->gt->uc.guc; 2425 intel_wakeref_t wakeref; 2426 u32 ctx_id = ce->guc_id.id; 2427 bool context_registered; 2428 int ret = 0; 2429 2430 GEM_BUG_ON(!sched_state_is_init(ce)); 2431 2432 context_registered = ctx_id_mapped(guc, ctx_id); 2433 2434 clr_ctx_id_mapping(guc, ctx_id); 2435 set_ctx_id_mapping(guc, ctx_id, ce); 2436 2437 /* 2438 * The context_lookup xarray is used to determine if the hardware 2439 * context is currently registered. There are two cases in which it 2440 * could be registered either the guc_id has been stolen from another 2441 * context or the lrc descriptor address of this context has changed. In 2442 * either case the context needs to be deregistered with the GuC before 2443 * registering this context. 2444 */ 2445 if (context_registered) { 2446 bool disabled; 2447 unsigned long flags; 2448 2449 trace_intel_context_steal_guc_id(ce); 2450 GEM_BUG_ON(!loop); 2451 2452 /* Seal race with Reset */ 2453 spin_lock_irqsave(&ce->guc_state.lock, flags); 2454 disabled = submission_disabled(guc); 2455 if (likely(!disabled)) { 2456 set_context_wait_for_deregister_to_register(ce); 2457 intel_context_get(ce); 2458 } 2459 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2460 if (unlikely(disabled)) { 2461 clr_ctx_id_mapping(guc, ctx_id); 2462 return 0; /* Will get registered later */ 2463 } 2464 2465 /* 2466 * If stealing the guc_id, this ce has the same guc_id as the 2467 * context whose guc_id was stolen. 2468 */ 2469 with_intel_runtime_pm(runtime_pm, wakeref) 2470 ret = deregister_context(ce, ce->guc_id.id); 2471 if (unlikely(ret == -ENODEV)) 2472 ret = 0; /* Will get registered later */ 2473 } else { 2474 with_intel_runtime_pm(runtime_pm, wakeref) 2475 ret = register_context(ce, loop); 2476 if (unlikely(ret == -EBUSY)) { 2477 clr_ctx_id_mapping(guc, ctx_id); 2478 } else if (unlikely(ret == -ENODEV)) { 2479 clr_ctx_id_mapping(guc, ctx_id); 2480 ret = 0; /* Will get registered later */ 2481 } 2482 } 2483 2484 return ret; 2485 } 2486 2487 static int __guc_context_pre_pin(struct intel_context *ce, 2488 struct intel_engine_cs *engine, 2489 struct i915_gem_ww_ctx *ww, 2490 void **vaddr) 2491 { 2492 return lrc_pre_pin(ce, engine, ww, vaddr); 2493 } 2494 2495 static int __guc_context_pin(struct intel_context *ce, 2496 struct intel_engine_cs *engine, 2497 void *vaddr) 2498 { 2499 if (i915_ggtt_offset(ce->state) != 2500 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2501 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2502 2503 /* 2504 * GuC context gets pinned in guc_request_alloc. See that function for 2505 * explaination of why. 2506 */ 2507 2508 return lrc_pin(ce, engine, vaddr); 2509 } 2510 2511 static int guc_context_pre_pin(struct intel_context *ce, 2512 struct i915_gem_ww_ctx *ww, 2513 void **vaddr) 2514 { 2515 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2516 } 2517 2518 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2519 { 2520 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2521 2522 if (likely(!ret && !intel_context_is_barrier(ce))) 2523 intel_engine_pm_get(ce->engine); 2524 2525 return ret; 2526 } 2527 2528 static void guc_context_unpin(struct intel_context *ce) 2529 { 2530 struct intel_guc *guc = ce_to_guc(ce); 2531 2532 unpin_guc_id(guc, ce); 2533 lrc_unpin(ce); 2534 2535 if (likely(!intel_context_is_barrier(ce))) 2536 intel_engine_pm_put_async(ce->engine); 2537 } 2538 2539 static void guc_context_post_unpin(struct intel_context *ce) 2540 { 2541 lrc_post_unpin(ce); 2542 } 2543 2544 static void __guc_context_sched_enable(struct intel_guc *guc, 2545 struct intel_context *ce) 2546 { 2547 u32 action[] = { 2548 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2549 ce->guc_id.id, 2550 GUC_CONTEXT_ENABLE 2551 }; 2552 2553 trace_intel_context_sched_enable(ce); 2554 2555 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2556 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2557 } 2558 2559 static void __guc_context_sched_disable(struct intel_guc *guc, 2560 struct intel_context *ce, 2561 u16 guc_id) 2562 { 2563 u32 action[] = { 2564 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2565 guc_id, /* ce->guc_id.id not stable */ 2566 GUC_CONTEXT_DISABLE 2567 }; 2568 2569 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2570 2571 GEM_BUG_ON(intel_context_is_child(ce)); 2572 trace_intel_context_sched_disable(ce); 2573 2574 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2575 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2576 } 2577 2578 static void guc_blocked_fence_complete(struct intel_context *ce) 2579 { 2580 lockdep_assert_held(&ce->guc_state.lock); 2581 2582 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2583 i915_sw_fence_complete(&ce->guc_state.blocked); 2584 } 2585 2586 static void guc_blocked_fence_reinit(struct intel_context *ce) 2587 { 2588 lockdep_assert_held(&ce->guc_state.lock); 2589 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2590 2591 /* 2592 * This fence is always complete unless a pending schedule disable is 2593 * outstanding. We arm the fence here and complete it when we receive 2594 * the pending schedule disable complete message. 2595 */ 2596 i915_sw_fence_fini(&ce->guc_state.blocked); 2597 i915_sw_fence_reinit(&ce->guc_state.blocked); 2598 i915_sw_fence_await(&ce->guc_state.blocked); 2599 i915_sw_fence_commit(&ce->guc_state.blocked); 2600 } 2601 2602 static u16 prep_context_pending_disable(struct intel_context *ce) 2603 { 2604 lockdep_assert_held(&ce->guc_state.lock); 2605 2606 set_context_pending_disable(ce); 2607 clr_context_enabled(ce); 2608 guc_blocked_fence_reinit(ce); 2609 intel_context_get(ce); 2610 2611 return ce->guc_id.id; 2612 } 2613 2614 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2615 { 2616 struct intel_guc *guc = ce_to_guc(ce); 2617 unsigned long flags; 2618 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2619 intel_wakeref_t wakeref; 2620 u16 guc_id; 2621 bool enabled; 2622 2623 GEM_BUG_ON(intel_context_is_child(ce)); 2624 2625 spin_lock_irqsave(&ce->guc_state.lock, flags); 2626 2627 incr_context_blocked(ce); 2628 2629 enabled = context_enabled(ce); 2630 if (unlikely(!enabled || submission_disabled(guc))) { 2631 if (enabled) 2632 clr_context_enabled(ce); 2633 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2634 return &ce->guc_state.blocked; 2635 } 2636 2637 /* 2638 * We add +2 here as the schedule disable complete CTB handler calls 2639 * intel_context_sched_disable_unpin (-2 to pin_count). 2640 */ 2641 atomic_add(2, &ce->pin_count); 2642 2643 guc_id = prep_context_pending_disable(ce); 2644 2645 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2646 2647 with_intel_runtime_pm(runtime_pm, wakeref) 2648 __guc_context_sched_disable(guc, ce, guc_id); 2649 2650 return &ce->guc_state.blocked; 2651 } 2652 2653 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2654 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2655 #define SCHED_STATE_NO_UNBLOCK \ 2656 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2657 SCHED_STATE_PENDING_DISABLE | \ 2658 SCHED_STATE_BANNED) 2659 2660 static bool context_cant_unblock(struct intel_context *ce) 2661 { 2662 lockdep_assert_held(&ce->guc_state.lock); 2663 2664 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2665 context_guc_id_invalid(ce) || 2666 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2667 !intel_context_is_pinned(ce); 2668 } 2669 2670 static void guc_context_unblock(struct intel_context *ce) 2671 { 2672 struct intel_guc *guc = ce_to_guc(ce); 2673 unsigned long flags; 2674 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2675 intel_wakeref_t wakeref; 2676 bool enable; 2677 2678 GEM_BUG_ON(context_enabled(ce)); 2679 GEM_BUG_ON(intel_context_is_child(ce)); 2680 2681 spin_lock_irqsave(&ce->guc_state.lock, flags); 2682 2683 if (unlikely(submission_disabled(guc) || 2684 context_cant_unblock(ce))) { 2685 enable = false; 2686 } else { 2687 enable = true; 2688 set_context_pending_enable(ce); 2689 set_context_enabled(ce); 2690 intel_context_get(ce); 2691 } 2692 2693 decr_context_blocked(ce); 2694 2695 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2696 2697 if (enable) { 2698 with_intel_runtime_pm(runtime_pm, wakeref) 2699 __guc_context_sched_enable(guc, ce); 2700 } 2701 } 2702 2703 static void guc_context_cancel_request(struct intel_context *ce, 2704 struct i915_request *rq) 2705 { 2706 struct intel_context *block_context = 2707 request_to_scheduling_context(rq); 2708 2709 if (i915_sw_fence_signaled(&rq->submit)) { 2710 struct i915_sw_fence *fence; 2711 2712 intel_context_get(ce); 2713 fence = guc_context_block(block_context); 2714 i915_sw_fence_wait(fence); 2715 if (!i915_request_completed(rq)) { 2716 __i915_request_skip(rq); 2717 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2718 true); 2719 } 2720 2721 guc_context_unblock(block_context); 2722 intel_context_put(ce); 2723 } 2724 } 2725 2726 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2727 u16 guc_id, 2728 u32 preemption_timeout) 2729 { 2730 struct context_policy policy; 2731 2732 __guc_context_policy_start_klv(&policy, guc_id); 2733 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2734 __guc_context_set_context_policies(guc, &policy, true); 2735 } 2736 2737 static void 2738 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 2739 unsigned int preempt_timeout_ms) 2740 { 2741 struct intel_guc *guc = ce_to_guc(ce); 2742 struct intel_runtime_pm *runtime_pm = 2743 &ce->engine->gt->i915->runtime_pm; 2744 intel_wakeref_t wakeref; 2745 unsigned long flags; 2746 2747 GEM_BUG_ON(intel_context_is_child(ce)); 2748 2749 guc_flush_submissions(guc); 2750 2751 spin_lock_irqsave(&ce->guc_state.lock, flags); 2752 set_context_banned(ce); 2753 2754 if (submission_disabled(guc) || 2755 (!context_enabled(ce) && !context_pending_disable(ce))) { 2756 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2757 2758 guc_cancel_context_requests(ce); 2759 intel_engine_signal_breadcrumbs(ce->engine); 2760 } else if (!context_pending_disable(ce)) { 2761 u16 guc_id; 2762 2763 /* 2764 * We add +2 here as the schedule disable complete CTB handler 2765 * calls intel_context_sched_disable_unpin (-2 to pin_count). 2766 */ 2767 atomic_add(2, &ce->pin_count); 2768 2769 guc_id = prep_context_pending_disable(ce); 2770 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2771 2772 /* 2773 * In addition to disabling scheduling, set the preemption 2774 * timeout to the minimum value (1 us) so the banned context 2775 * gets kicked off the HW ASAP. 2776 */ 2777 with_intel_runtime_pm(runtime_pm, wakeref) { 2778 __guc_context_set_preemption_timeout(guc, guc_id, 2779 preempt_timeout_ms); 2780 __guc_context_sched_disable(guc, ce, guc_id); 2781 } 2782 } else { 2783 if (!context_guc_id_invalid(ce)) 2784 with_intel_runtime_pm(runtime_pm, wakeref) 2785 __guc_context_set_preemption_timeout(guc, 2786 ce->guc_id.id, 2787 preempt_timeout_ms); 2788 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2789 } 2790 } 2791 2792 static void guc_context_sched_disable(struct intel_context *ce) 2793 { 2794 struct intel_guc *guc = ce_to_guc(ce); 2795 unsigned long flags; 2796 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 2797 intel_wakeref_t wakeref; 2798 u16 guc_id; 2799 2800 GEM_BUG_ON(intel_context_is_child(ce)); 2801 2802 spin_lock_irqsave(&ce->guc_state.lock, flags); 2803 2804 /* 2805 * We have to check if the context has been disabled by another thread, 2806 * check if submssion has been disabled to seal a race with reset and 2807 * finally check if any more requests have been committed to the 2808 * context ensursing that a request doesn't slip through the 2809 * 'context_pending_disable' fence. 2810 */ 2811 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 2812 context_has_committed_requests(ce))) { 2813 clr_context_enabled(ce); 2814 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2815 goto unpin; 2816 } 2817 guc_id = prep_context_pending_disable(ce); 2818 2819 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2820 2821 with_intel_runtime_pm(runtime_pm, wakeref) 2822 __guc_context_sched_disable(guc, ce, guc_id); 2823 2824 return; 2825 unpin: 2826 intel_context_sched_disable_unpin(ce); 2827 } 2828 2829 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 2830 { 2831 struct intel_guc *guc = ce_to_guc(ce); 2832 struct intel_gt *gt = guc_to_gt(guc); 2833 unsigned long flags; 2834 bool disabled; 2835 2836 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 2837 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 2838 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 2839 GEM_BUG_ON(context_enabled(ce)); 2840 2841 /* Seal race with Reset */ 2842 spin_lock_irqsave(&ce->guc_state.lock, flags); 2843 disabled = submission_disabled(guc); 2844 if (likely(!disabled)) { 2845 __intel_gt_pm_get(gt); 2846 set_context_destroyed(ce); 2847 clr_context_registered(ce); 2848 } 2849 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2850 if (unlikely(disabled)) { 2851 release_guc_id(guc, ce); 2852 __guc_context_destroy(ce); 2853 return; 2854 } 2855 2856 deregister_context(ce, ce->guc_id.id); 2857 } 2858 2859 static void __guc_context_destroy(struct intel_context *ce) 2860 { 2861 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 2862 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 2863 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 2864 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 2865 GEM_BUG_ON(ce->guc_state.number_committed_requests); 2866 2867 lrc_fini(ce); 2868 intel_context_fini(ce); 2869 2870 if (intel_engine_is_virtual(ce->engine)) { 2871 struct guc_virtual_engine *ve = 2872 container_of(ce, typeof(*ve), context); 2873 2874 if (ve->base.breadcrumbs) 2875 intel_breadcrumbs_put(ve->base.breadcrumbs); 2876 2877 kfree(ve); 2878 } else { 2879 intel_context_free(ce); 2880 } 2881 } 2882 2883 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 2884 { 2885 struct intel_context *ce; 2886 unsigned long flags; 2887 2888 GEM_BUG_ON(!submission_disabled(guc) && 2889 guc_submission_initialized(guc)); 2890 2891 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 2892 spin_lock_irqsave(&guc->submission_state.lock, flags); 2893 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 2894 struct intel_context, 2895 destroyed_link); 2896 if (ce) 2897 list_del_init(&ce->destroyed_link); 2898 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2899 2900 if (!ce) 2901 break; 2902 2903 release_guc_id(guc, ce); 2904 __guc_context_destroy(ce); 2905 } 2906 } 2907 2908 static void deregister_destroyed_contexts(struct intel_guc *guc) 2909 { 2910 struct intel_context *ce; 2911 unsigned long flags; 2912 2913 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 2914 spin_lock_irqsave(&guc->submission_state.lock, flags); 2915 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 2916 struct intel_context, 2917 destroyed_link); 2918 if (ce) 2919 list_del_init(&ce->destroyed_link); 2920 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2921 2922 if (!ce) 2923 break; 2924 2925 guc_lrc_desc_unpin(ce); 2926 } 2927 } 2928 2929 static void destroyed_worker_func(struct work_struct *w) 2930 { 2931 struct intel_guc *guc = container_of(w, struct intel_guc, 2932 submission_state.destroyed_worker); 2933 struct intel_gt *gt = guc_to_gt(guc); 2934 int tmp; 2935 2936 with_intel_gt_pm(gt, tmp) 2937 deregister_destroyed_contexts(guc); 2938 } 2939 2940 static void guc_context_destroy(struct kref *kref) 2941 { 2942 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2943 struct intel_guc *guc = ce_to_guc(ce); 2944 unsigned long flags; 2945 bool destroy; 2946 2947 /* 2948 * If the guc_id is invalid this context has been stolen and we can free 2949 * it immediately. Also can be freed immediately if the context is not 2950 * registered with the GuC or the GuC is in the middle of a reset. 2951 */ 2952 spin_lock_irqsave(&guc->submission_state.lock, flags); 2953 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 2954 !ctx_id_mapped(guc, ce->guc_id.id); 2955 if (likely(!destroy)) { 2956 if (!list_empty(&ce->guc_id.link)) 2957 list_del_init(&ce->guc_id.link); 2958 list_add_tail(&ce->destroyed_link, 2959 &guc->submission_state.destroyed_contexts); 2960 } else { 2961 __release_guc_id(guc, ce); 2962 } 2963 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2964 if (unlikely(destroy)) { 2965 __guc_context_destroy(ce); 2966 return; 2967 } 2968 2969 /* 2970 * We use a worker to issue the H2G to deregister the context as we can 2971 * take the GT PM for the first time which isn't allowed from an atomic 2972 * context. 2973 */ 2974 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 2975 } 2976 2977 static int guc_context_alloc(struct intel_context *ce) 2978 { 2979 return lrc_alloc(ce, ce->engine); 2980 } 2981 2982 static void __guc_context_set_prio(struct intel_guc *guc, 2983 struct intel_context *ce) 2984 { 2985 struct context_policy policy; 2986 2987 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2988 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2989 __guc_context_set_context_policies(guc, &policy, true); 2990 } 2991 2992 static void guc_context_set_prio(struct intel_guc *guc, 2993 struct intel_context *ce, 2994 u8 prio) 2995 { 2996 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 2997 prio > GUC_CLIENT_PRIORITY_NORMAL); 2998 lockdep_assert_held(&ce->guc_state.lock); 2999 3000 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3001 !context_registered(ce)) { 3002 ce->guc_state.prio = prio; 3003 return; 3004 } 3005 3006 ce->guc_state.prio = prio; 3007 __guc_context_set_prio(guc, ce); 3008 3009 trace_intel_context_set_prio(ce); 3010 } 3011 3012 static inline u8 map_i915_prio_to_guc_prio(int prio) 3013 { 3014 if (prio == I915_PRIORITY_NORMAL) 3015 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3016 else if (prio < I915_PRIORITY_NORMAL) 3017 return GUC_CLIENT_PRIORITY_NORMAL; 3018 else if (prio < I915_PRIORITY_DISPLAY) 3019 return GUC_CLIENT_PRIORITY_HIGH; 3020 else 3021 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3022 } 3023 3024 static inline void add_context_inflight_prio(struct intel_context *ce, 3025 u8 guc_prio) 3026 { 3027 lockdep_assert_held(&ce->guc_state.lock); 3028 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3029 3030 ++ce->guc_state.prio_count[guc_prio]; 3031 3032 /* Overflow protection */ 3033 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3034 } 3035 3036 static inline void sub_context_inflight_prio(struct intel_context *ce, 3037 u8 guc_prio) 3038 { 3039 lockdep_assert_held(&ce->guc_state.lock); 3040 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3041 3042 /* Underflow protection */ 3043 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3044 3045 --ce->guc_state.prio_count[guc_prio]; 3046 } 3047 3048 static inline void update_context_prio(struct intel_context *ce) 3049 { 3050 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3051 int i; 3052 3053 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3054 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3055 3056 lockdep_assert_held(&ce->guc_state.lock); 3057 3058 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3059 if (ce->guc_state.prio_count[i]) { 3060 guc_context_set_prio(guc, ce, i); 3061 break; 3062 } 3063 } 3064 } 3065 3066 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3067 { 3068 /* Lower value is higher priority */ 3069 return new_guc_prio < old_guc_prio; 3070 } 3071 3072 static void add_to_context(struct i915_request *rq) 3073 { 3074 struct intel_context *ce = request_to_scheduling_context(rq); 3075 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3076 3077 GEM_BUG_ON(intel_context_is_child(ce)); 3078 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3079 3080 spin_lock(&ce->guc_state.lock); 3081 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3082 3083 if (rq->guc_prio == GUC_PRIO_INIT) { 3084 rq->guc_prio = new_guc_prio; 3085 add_context_inflight_prio(ce, rq->guc_prio); 3086 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3087 sub_context_inflight_prio(ce, rq->guc_prio); 3088 rq->guc_prio = new_guc_prio; 3089 add_context_inflight_prio(ce, rq->guc_prio); 3090 } 3091 update_context_prio(ce); 3092 3093 spin_unlock(&ce->guc_state.lock); 3094 } 3095 3096 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3097 { 3098 lockdep_assert_held(&ce->guc_state.lock); 3099 3100 if (rq->guc_prio != GUC_PRIO_INIT && 3101 rq->guc_prio != GUC_PRIO_FINI) { 3102 sub_context_inflight_prio(ce, rq->guc_prio); 3103 update_context_prio(ce); 3104 } 3105 rq->guc_prio = GUC_PRIO_FINI; 3106 } 3107 3108 static void remove_from_context(struct i915_request *rq) 3109 { 3110 struct intel_context *ce = request_to_scheduling_context(rq); 3111 3112 GEM_BUG_ON(intel_context_is_child(ce)); 3113 3114 spin_lock_irq(&ce->guc_state.lock); 3115 3116 list_del_init(&rq->sched.link); 3117 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3118 3119 /* Prevent further __await_execution() registering a cb, then flush */ 3120 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3121 3122 guc_prio_fini(rq, ce); 3123 3124 decr_context_committed_requests(ce); 3125 3126 spin_unlock_irq(&ce->guc_state.lock); 3127 3128 atomic_dec(&ce->guc_id.ref); 3129 i915_request_notify_execute_cb_imm(rq); 3130 } 3131 3132 static const struct intel_context_ops guc_context_ops = { 3133 .alloc = guc_context_alloc, 3134 3135 .pre_pin = guc_context_pre_pin, 3136 .pin = guc_context_pin, 3137 .unpin = guc_context_unpin, 3138 .post_unpin = guc_context_post_unpin, 3139 3140 .revoke = guc_context_revoke, 3141 3142 .cancel_request = guc_context_cancel_request, 3143 3144 .enter = intel_context_enter_engine, 3145 .exit = intel_context_exit_engine, 3146 3147 .sched_disable = guc_context_sched_disable, 3148 3149 .reset = lrc_reset, 3150 .destroy = guc_context_destroy, 3151 3152 .create_virtual = guc_create_virtual, 3153 .create_parallel = guc_create_parallel, 3154 }; 3155 3156 static void submit_work_cb(struct irq_work *wrk) 3157 { 3158 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3159 3160 might_lock(&rq->engine->sched_engine->lock); 3161 i915_sw_fence_complete(&rq->submit); 3162 } 3163 3164 static void __guc_signal_context_fence(struct intel_context *ce) 3165 { 3166 struct i915_request *rq, *rn; 3167 3168 lockdep_assert_held(&ce->guc_state.lock); 3169 3170 if (!list_empty(&ce->guc_state.fences)) 3171 trace_intel_context_fence_release(ce); 3172 3173 /* 3174 * Use an IRQ to ensure locking order of sched_engine->lock -> 3175 * ce->guc_state.lock is preserved. 3176 */ 3177 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3178 guc_fence_link) { 3179 list_del(&rq->guc_fence_link); 3180 irq_work_queue(&rq->submit_work); 3181 } 3182 3183 INIT_LIST_HEAD(&ce->guc_state.fences); 3184 } 3185 3186 static void guc_signal_context_fence(struct intel_context *ce) 3187 { 3188 unsigned long flags; 3189 3190 GEM_BUG_ON(intel_context_is_child(ce)); 3191 3192 spin_lock_irqsave(&ce->guc_state.lock, flags); 3193 clr_context_wait_for_deregister_to_register(ce); 3194 __guc_signal_context_fence(ce); 3195 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3196 } 3197 3198 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3199 { 3200 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3201 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3202 !submission_disabled(ce_to_guc(ce)); 3203 } 3204 3205 static void guc_context_init(struct intel_context *ce) 3206 { 3207 const struct i915_gem_context *ctx; 3208 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3209 3210 rcu_read_lock(); 3211 ctx = rcu_dereference(ce->gem_context); 3212 if (ctx) 3213 prio = ctx->sched.priority; 3214 rcu_read_unlock(); 3215 3216 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3217 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3218 } 3219 3220 static int guc_request_alloc(struct i915_request *rq) 3221 { 3222 struct intel_context *ce = request_to_scheduling_context(rq); 3223 struct intel_guc *guc = ce_to_guc(ce); 3224 unsigned long flags; 3225 int ret; 3226 3227 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3228 3229 /* 3230 * Flush enough space to reduce the likelihood of waiting after 3231 * we start building the request - in which case we will just 3232 * have to repeat work. 3233 */ 3234 rq->reserved_space += GUC_REQUEST_SIZE; 3235 3236 /* 3237 * Note that after this point, we have committed to using 3238 * this request as it is being used to both track the 3239 * state of engine initialisation and liveness of the 3240 * golden renderstate above. Think twice before you try 3241 * to cancel/unwind this request now. 3242 */ 3243 3244 /* Unconditionally invalidate GPU caches and TLBs. */ 3245 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3246 if (ret) 3247 return ret; 3248 3249 rq->reserved_space -= GUC_REQUEST_SIZE; 3250 3251 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3252 guc_context_init(ce); 3253 3254 /* 3255 * Call pin_guc_id here rather than in the pinning step as with 3256 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3257 * guc_id and creating horrible race conditions. This is especially bad 3258 * when guc_id are being stolen due to over subscription. By the time 3259 * this function is reached, it is guaranteed that the guc_id will be 3260 * persistent until the generated request is retired. Thus, sealing these 3261 * race conditions. It is still safe to fail here if guc_id are 3262 * exhausted and return -EAGAIN to the user indicating that they can try 3263 * again in the future. 3264 * 3265 * There is no need for a lock here as the timeline mutex ensures at 3266 * most one context can be executing this code path at once. The 3267 * guc_id_ref is incremented once for every request in flight and 3268 * decremented on each retire. When it is zero, a lock around the 3269 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3270 */ 3271 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3272 goto out; 3273 3274 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3275 if (unlikely(ret < 0)) 3276 return ret; 3277 if (context_needs_register(ce, !!ret)) { 3278 ret = try_context_registration(ce, true); 3279 if (unlikely(ret)) { /* unwind */ 3280 if (ret == -EPIPE) { 3281 disable_submission(guc); 3282 goto out; /* GPU will be reset */ 3283 } 3284 atomic_dec(&ce->guc_id.ref); 3285 unpin_guc_id(guc, ce); 3286 return ret; 3287 } 3288 } 3289 3290 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3291 3292 out: 3293 /* 3294 * We block all requests on this context if a G2H is pending for a 3295 * schedule disable or context deregistration as the GuC will fail a 3296 * schedule enable or context registration if either G2H is pending 3297 * respectfully. Once a G2H returns, the fence is released that is 3298 * blocking these requests (see guc_signal_context_fence). 3299 */ 3300 spin_lock_irqsave(&ce->guc_state.lock, flags); 3301 if (context_wait_for_deregister_to_register(ce) || 3302 context_pending_disable(ce)) { 3303 init_irq_work(&rq->submit_work, submit_work_cb); 3304 i915_sw_fence_await(&rq->submit); 3305 3306 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3307 } 3308 incr_context_committed_requests(ce); 3309 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3310 3311 return 0; 3312 } 3313 3314 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3315 struct i915_gem_ww_ctx *ww, 3316 void **vaddr) 3317 { 3318 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3319 3320 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3321 } 3322 3323 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3324 { 3325 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3326 int ret = __guc_context_pin(ce, engine, vaddr); 3327 intel_engine_mask_t tmp, mask = ce->engine->mask; 3328 3329 if (likely(!ret)) 3330 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3331 intel_engine_pm_get(engine); 3332 3333 return ret; 3334 } 3335 3336 static void guc_virtual_context_unpin(struct intel_context *ce) 3337 { 3338 intel_engine_mask_t tmp, mask = ce->engine->mask; 3339 struct intel_engine_cs *engine; 3340 struct intel_guc *guc = ce_to_guc(ce); 3341 3342 GEM_BUG_ON(context_enabled(ce)); 3343 GEM_BUG_ON(intel_context_is_barrier(ce)); 3344 3345 unpin_guc_id(guc, ce); 3346 lrc_unpin(ce); 3347 3348 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3349 intel_engine_pm_put_async(engine); 3350 } 3351 3352 static void guc_virtual_context_enter(struct intel_context *ce) 3353 { 3354 intel_engine_mask_t tmp, mask = ce->engine->mask; 3355 struct intel_engine_cs *engine; 3356 3357 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3358 intel_engine_pm_get(engine); 3359 3360 intel_timeline_enter(ce->timeline); 3361 } 3362 3363 static void guc_virtual_context_exit(struct intel_context *ce) 3364 { 3365 intel_engine_mask_t tmp, mask = ce->engine->mask; 3366 struct intel_engine_cs *engine; 3367 3368 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3369 intel_engine_pm_put(engine); 3370 3371 intel_timeline_exit(ce->timeline); 3372 } 3373 3374 static int guc_virtual_context_alloc(struct intel_context *ce) 3375 { 3376 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3377 3378 return lrc_alloc(ce, engine); 3379 } 3380 3381 static const struct intel_context_ops virtual_guc_context_ops = { 3382 .alloc = guc_virtual_context_alloc, 3383 3384 .pre_pin = guc_virtual_context_pre_pin, 3385 .pin = guc_virtual_context_pin, 3386 .unpin = guc_virtual_context_unpin, 3387 .post_unpin = guc_context_post_unpin, 3388 3389 .revoke = guc_context_revoke, 3390 3391 .cancel_request = guc_context_cancel_request, 3392 3393 .enter = guc_virtual_context_enter, 3394 .exit = guc_virtual_context_exit, 3395 3396 .sched_disable = guc_context_sched_disable, 3397 3398 .destroy = guc_context_destroy, 3399 3400 .get_sibling = guc_virtual_get_sibling, 3401 }; 3402 3403 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3404 { 3405 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3406 struct intel_guc *guc = ce_to_guc(ce); 3407 int ret; 3408 3409 GEM_BUG_ON(!intel_context_is_parent(ce)); 3410 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3411 3412 ret = pin_guc_id(guc, ce); 3413 if (unlikely(ret < 0)) 3414 return ret; 3415 3416 return __guc_context_pin(ce, engine, vaddr); 3417 } 3418 3419 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3420 { 3421 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3422 3423 GEM_BUG_ON(!intel_context_is_child(ce)); 3424 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3425 3426 __intel_context_pin(ce->parallel.parent); 3427 return __guc_context_pin(ce, engine, vaddr); 3428 } 3429 3430 static void guc_parent_context_unpin(struct intel_context *ce) 3431 { 3432 struct intel_guc *guc = ce_to_guc(ce); 3433 3434 GEM_BUG_ON(context_enabled(ce)); 3435 GEM_BUG_ON(intel_context_is_barrier(ce)); 3436 GEM_BUG_ON(!intel_context_is_parent(ce)); 3437 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3438 3439 unpin_guc_id(guc, ce); 3440 lrc_unpin(ce); 3441 } 3442 3443 static void guc_child_context_unpin(struct intel_context *ce) 3444 { 3445 GEM_BUG_ON(context_enabled(ce)); 3446 GEM_BUG_ON(intel_context_is_barrier(ce)); 3447 GEM_BUG_ON(!intel_context_is_child(ce)); 3448 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3449 3450 lrc_unpin(ce); 3451 } 3452 3453 static void guc_child_context_post_unpin(struct intel_context *ce) 3454 { 3455 GEM_BUG_ON(!intel_context_is_child(ce)); 3456 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3457 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3458 3459 lrc_post_unpin(ce); 3460 intel_context_unpin(ce->parallel.parent); 3461 } 3462 3463 static void guc_child_context_destroy(struct kref *kref) 3464 { 3465 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3466 3467 __guc_context_destroy(ce); 3468 } 3469 3470 static const struct intel_context_ops virtual_parent_context_ops = { 3471 .alloc = guc_virtual_context_alloc, 3472 3473 .pre_pin = guc_context_pre_pin, 3474 .pin = guc_parent_context_pin, 3475 .unpin = guc_parent_context_unpin, 3476 .post_unpin = guc_context_post_unpin, 3477 3478 .revoke = guc_context_revoke, 3479 3480 .cancel_request = guc_context_cancel_request, 3481 3482 .enter = guc_virtual_context_enter, 3483 .exit = guc_virtual_context_exit, 3484 3485 .sched_disable = guc_context_sched_disable, 3486 3487 .destroy = guc_context_destroy, 3488 3489 .get_sibling = guc_virtual_get_sibling, 3490 }; 3491 3492 static const struct intel_context_ops virtual_child_context_ops = { 3493 .alloc = guc_virtual_context_alloc, 3494 3495 .pre_pin = guc_context_pre_pin, 3496 .pin = guc_child_context_pin, 3497 .unpin = guc_child_context_unpin, 3498 .post_unpin = guc_child_context_post_unpin, 3499 3500 .cancel_request = guc_context_cancel_request, 3501 3502 .enter = guc_virtual_context_enter, 3503 .exit = guc_virtual_context_exit, 3504 3505 .destroy = guc_child_context_destroy, 3506 3507 .get_sibling = guc_virtual_get_sibling, 3508 }; 3509 3510 /* 3511 * The below override of the breadcrumbs is enabled when the user configures a 3512 * context for parallel submission (multi-lrc, parent-child). 3513 * 3514 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3515 * safely preempt all the hw contexts configured for parallel submission 3516 * between each BB. The contract between the i915 and GuC is if the parent 3517 * context can be preempted, all the children can be preempted, and the GuC will 3518 * always try to preempt the parent before the children. A handshake between the 3519 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3520 * creating a window to preempt between each set of BBs. 3521 */ 3522 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3523 u64 offset, u32 len, 3524 const unsigned int flags); 3525 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3526 u64 offset, u32 len, 3527 const unsigned int flags); 3528 static u32 * 3529 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3530 u32 *cs); 3531 static u32 * 3532 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3533 u32 *cs); 3534 3535 static struct intel_context * 3536 guc_create_parallel(struct intel_engine_cs **engines, 3537 unsigned int num_siblings, 3538 unsigned int width) 3539 { 3540 struct intel_engine_cs **siblings = NULL; 3541 struct intel_context *parent = NULL, *ce, *err; 3542 int i, j; 3543 3544 siblings = kmalloc_array(num_siblings, 3545 sizeof(*siblings), 3546 GFP_KERNEL); 3547 if (!siblings) 3548 return ERR_PTR(-ENOMEM); 3549 3550 for (i = 0; i < width; ++i) { 3551 for (j = 0; j < num_siblings; ++j) 3552 siblings[j] = engines[i * num_siblings + j]; 3553 3554 ce = intel_engine_create_virtual(siblings, num_siblings, 3555 FORCE_VIRTUAL); 3556 if (IS_ERR(ce)) { 3557 err = ERR_CAST(ce); 3558 goto unwind; 3559 } 3560 3561 if (i == 0) { 3562 parent = ce; 3563 parent->ops = &virtual_parent_context_ops; 3564 } else { 3565 ce->ops = &virtual_child_context_ops; 3566 intel_context_bind_parent_child(parent, ce); 3567 } 3568 } 3569 3570 parent->parallel.fence_context = dma_fence_context_alloc(1); 3571 3572 parent->engine->emit_bb_start = 3573 emit_bb_start_parent_no_preempt_mid_batch; 3574 parent->engine->emit_fini_breadcrumb = 3575 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3576 parent->engine->emit_fini_breadcrumb_dw = 3577 12 + 4 * parent->parallel.number_children; 3578 for_each_child(parent, ce) { 3579 ce->engine->emit_bb_start = 3580 emit_bb_start_child_no_preempt_mid_batch; 3581 ce->engine->emit_fini_breadcrumb = 3582 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3583 ce->engine->emit_fini_breadcrumb_dw = 16; 3584 } 3585 3586 kfree(siblings); 3587 return parent; 3588 3589 unwind: 3590 if (parent) 3591 intel_context_put(parent); 3592 kfree(siblings); 3593 return err; 3594 } 3595 3596 static bool 3597 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3598 { 3599 struct intel_engine_cs *sibling; 3600 intel_engine_mask_t tmp, mask = b->engine_mask; 3601 bool result = false; 3602 3603 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3604 result |= intel_engine_irq_enable(sibling); 3605 3606 return result; 3607 } 3608 3609 static void 3610 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3611 { 3612 struct intel_engine_cs *sibling; 3613 intel_engine_mask_t tmp, mask = b->engine_mask; 3614 3615 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3616 intel_engine_irq_disable(sibling); 3617 } 3618 3619 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3620 { 3621 int i; 3622 3623 /* 3624 * In GuC submission mode we do not know which physical engine a request 3625 * will be scheduled on, this creates a problem because the breadcrumb 3626 * interrupt is per physical engine. To work around this we attach 3627 * requests and direct all breadcrumb interrupts to the first instance 3628 * of an engine per class. In addition all breadcrumb interrupts are 3629 * enabled / disabled across an engine class in unison. 3630 */ 3631 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3632 struct intel_engine_cs *sibling = 3633 engine->gt->engine_class[engine->class][i]; 3634 3635 if (sibling) { 3636 if (engine->breadcrumbs != sibling->breadcrumbs) { 3637 intel_breadcrumbs_put(engine->breadcrumbs); 3638 engine->breadcrumbs = 3639 intel_breadcrumbs_get(sibling->breadcrumbs); 3640 } 3641 break; 3642 } 3643 } 3644 3645 if (engine->breadcrumbs) { 3646 engine->breadcrumbs->engine_mask |= engine->mask; 3647 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3648 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3649 } 3650 } 3651 3652 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3653 int prio) 3654 { 3655 struct intel_context *ce = request_to_scheduling_context(rq); 3656 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3657 3658 /* Short circuit function */ 3659 if (prio < I915_PRIORITY_NORMAL || 3660 rq->guc_prio == GUC_PRIO_FINI || 3661 (rq->guc_prio != GUC_PRIO_INIT && 3662 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3663 return; 3664 3665 spin_lock(&ce->guc_state.lock); 3666 if (rq->guc_prio != GUC_PRIO_FINI) { 3667 if (rq->guc_prio != GUC_PRIO_INIT) 3668 sub_context_inflight_prio(ce, rq->guc_prio); 3669 rq->guc_prio = new_guc_prio; 3670 add_context_inflight_prio(ce, rq->guc_prio); 3671 update_context_prio(ce); 3672 } 3673 spin_unlock(&ce->guc_state.lock); 3674 } 3675 3676 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3677 { 3678 struct intel_context *ce = request_to_scheduling_context(rq); 3679 3680 spin_lock(&ce->guc_state.lock); 3681 guc_prio_fini(rq, ce); 3682 spin_unlock(&ce->guc_state.lock); 3683 } 3684 3685 static void sanitize_hwsp(struct intel_engine_cs *engine) 3686 { 3687 struct intel_timeline *tl; 3688 3689 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3690 intel_timeline_reset_seqno(tl); 3691 } 3692 3693 static void guc_sanitize(struct intel_engine_cs *engine) 3694 { 3695 /* 3696 * Poison residual state on resume, in case the suspend didn't! 3697 * 3698 * We have to assume that across suspend/resume (or other loss 3699 * of control) that the contents of our pinned buffers has been 3700 * lost, replaced by garbage. Since this doesn't always happen, 3701 * let's poison such state so that we more quickly spot when 3702 * we falsely assume it has been preserved. 3703 */ 3704 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3705 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3706 3707 /* 3708 * The kernel_context HWSP is stored in the status_page. As above, 3709 * that may be lost on resume/initialisation, and so we need to 3710 * reset the value in the HWSP. 3711 */ 3712 sanitize_hwsp(engine); 3713 3714 /* And scrub the dirty cachelines for the HWSP */ 3715 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 3716 3717 intel_engine_reset_pinned_contexts(engine); 3718 } 3719 3720 static void setup_hwsp(struct intel_engine_cs *engine) 3721 { 3722 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 3723 3724 ENGINE_WRITE_FW(engine, 3725 RING_HWS_PGA, 3726 i915_ggtt_offset(engine->status_page.vma)); 3727 } 3728 3729 static void start_engine(struct intel_engine_cs *engine) 3730 { 3731 ENGINE_WRITE_FW(engine, 3732 RING_MODE_GEN7, 3733 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 3734 3735 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 3736 ENGINE_POSTING_READ(engine, RING_MI_MODE); 3737 } 3738 3739 static int guc_resume(struct intel_engine_cs *engine) 3740 { 3741 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 3742 3743 intel_mocs_init_engine(engine); 3744 3745 intel_breadcrumbs_reset(engine->breadcrumbs); 3746 3747 setup_hwsp(engine); 3748 start_engine(engine); 3749 3750 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 3751 xehp_enable_ccs_engines(engine); 3752 3753 return 0; 3754 } 3755 3756 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 3757 { 3758 return !sched_engine->tasklet.callback; 3759 } 3760 3761 static void guc_set_default_submission(struct intel_engine_cs *engine) 3762 { 3763 engine->submit_request = guc_submit_request; 3764 } 3765 3766 static inline void guc_kernel_context_pin(struct intel_guc *guc, 3767 struct intel_context *ce) 3768 { 3769 /* 3770 * Note: we purposefully do not check the returns below because 3771 * the registration can only fail if a reset is just starting. 3772 * This is called at the end of reset so presumably another reset 3773 * isn't happening and even it did this code would be run again. 3774 */ 3775 3776 if (context_guc_id_invalid(ce)) 3777 pin_guc_id(guc, ce); 3778 3779 try_context_registration(ce, true); 3780 } 3781 3782 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 3783 { 3784 struct intel_gt *gt = guc_to_gt(guc); 3785 struct intel_engine_cs *engine; 3786 enum intel_engine_id id; 3787 3788 /* make sure all descriptors are clean... */ 3789 xa_destroy(&guc->context_lookup); 3790 3791 /* 3792 * Some contexts might have been pinned before we enabled GuC 3793 * submission, so we need to add them to the GuC bookeeping. 3794 * Also, after a reset the of the GuC we want to make sure that the 3795 * information shared with GuC is properly reset. The kernel LRCs are 3796 * not attached to the gem_context, so they need to be added separately. 3797 */ 3798 for_each_engine(engine, gt, id) { 3799 struct intel_context *ce; 3800 3801 list_for_each_entry(ce, &engine->pinned_contexts_list, 3802 pinned_contexts_link) 3803 guc_kernel_context_pin(guc, ce); 3804 } 3805 } 3806 3807 static void guc_release(struct intel_engine_cs *engine) 3808 { 3809 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3810 3811 intel_engine_cleanup_common(engine); 3812 lrc_fini_wa_ctx(engine); 3813 } 3814 3815 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 3816 { 3817 struct intel_engine_cs *e; 3818 intel_engine_mask_t tmp, mask = engine->mask; 3819 3820 for_each_engine_masked(e, engine->gt, mask, tmp) 3821 e->serial++; 3822 } 3823 3824 static void guc_default_vfuncs(struct intel_engine_cs *engine) 3825 { 3826 /* Default vfuncs which can be overridden by each engine. */ 3827 3828 engine->resume = guc_resume; 3829 3830 engine->cops = &guc_context_ops; 3831 engine->request_alloc = guc_request_alloc; 3832 engine->add_active_request = add_to_context; 3833 engine->remove_active_request = remove_from_context; 3834 3835 engine->sched_engine->schedule = i915_schedule; 3836 3837 engine->reset.prepare = guc_engine_reset_prepare; 3838 engine->reset.rewind = guc_rewind_nop; 3839 engine->reset.cancel = guc_reset_nop; 3840 engine->reset.finish = guc_reset_nop; 3841 3842 engine->emit_flush = gen8_emit_flush_xcs; 3843 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3844 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3845 if (GRAPHICS_VER(engine->i915) >= 12) { 3846 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3847 engine->emit_flush = gen12_emit_flush_xcs; 3848 } 3849 engine->set_default_submission = guc_set_default_submission; 3850 engine->busyness = guc_engine_busyness; 3851 3852 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 3853 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3854 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3855 3856 /* Wa_14014475959:dg2 */ 3857 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) 3858 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 3859 3860 /* 3861 * TODO: GuC supports timeslicing and semaphores as well, but they're 3862 * handled by the firmware so some minor tweaks are required before 3863 * enabling. 3864 * 3865 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3866 */ 3867 3868 engine->emit_bb_start = gen8_emit_bb_start; 3869 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 3870 engine->emit_bb_start = gen125_emit_bb_start; 3871 } 3872 3873 static void rcs_submission_override(struct intel_engine_cs *engine) 3874 { 3875 switch (GRAPHICS_VER(engine->i915)) { 3876 case 12: 3877 engine->emit_flush = gen12_emit_flush_rcs; 3878 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3879 break; 3880 case 11: 3881 engine->emit_flush = gen11_emit_flush_rcs; 3882 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3883 break; 3884 default: 3885 engine->emit_flush = gen8_emit_flush_rcs; 3886 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3887 break; 3888 } 3889 } 3890 3891 static inline void guc_default_irqs(struct intel_engine_cs *engine) 3892 { 3893 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 3894 intel_engine_set_irq_handler(engine, cs_irq_handler); 3895 } 3896 3897 static void guc_sched_engine_destroy(struct kref *kref) 3898 { 3899 struct i915_sched_engine *sched_engine = 3900 container_of(kref, typeof(*sched_engine), ref); 3901 struct intel_guc *guc = sched_engine->private_data; 3902 3903 guc->sched_engine = NULL; 3904 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 3905 kfree(sched_engine); 3906 } 3907 3908 int intel_guc_submission_setup(struct intel_engine_cs *engine) 3909 { 3910 struct drm_i915_private *i915 = engine->i915; 3911 struct intel_guc *guc = &engine->gt->uc.guc; 3912 3913 /* 3914 * The setup relies on several assumptions (e.g. irqs always enabled) 3915 * that are only valid on gen11+ 3916 */ 3917 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 3918 3919 if (!guc->sched_engine) { 3920 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3921 if (!guc->sched_engine) 3922 return -ENOMEM; 3923 3924 guc->sched_engine->schedule = i915_schedule; 3925 guc->sched_engine->disabled = guc_sched_engine_disabled; 3926 guc->sched_engine->private_data = guc; 3927 guc->sched_engine->destroy = guc_sched_engine_destroy; 3928 guc->sched_engine->bump_inflight_request_prio = 3929 guc_bump_inflight_request_prio; 3930 guc->sched_engine->retire_inflight_request_prio = 3931 guc_retire_inflight_request_prio; 3932 tasklet_setup(&guc->sched_engine->tasklet, 3933 guc_submission_tasklet); 3934 } 3935 i915_sched_engine_put(engine->sched_engine); 3936 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 3937 3938 guc_default_vfuncs(engine); 3939 guc_default_irqs(engine); 3940 guc_init_breadcrumbs(engine); 3941 3942 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 3943 rcs_submission_override(engine); 3944 3945 lrc_init_wa_ctx(engine); 3946 3947 /* Finally, take ownership and responsibility for cleanup! */ 3948 engine->sanitize = guc_sanitize; 3949 engine->release = guc_release; 3950 3951 return 0; 3952 } 3953 3954 void intel_guc_submission_enable(struct intel_guc *guc) 3955 { 3956 guc_init_lrc_mapping(guc); 3957 guc_init_engine_stats(guc); 3958 } 3959 3960 void intel_guc_submission_disable(struct intel_guc *guc) 3961 { 3962 /* Note: By the time we're here, GuC may have already been reset */ 3963 } 3964 3965 static bool __guc_submission_supported(struct intel_guc *guc) 3966 { 3967 /* GuC submission is unavailable for pre-Gen11 */ 3968 return intel_guc_is_supported(guc) && 3969 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 3970 } 3971 3972 static bool __guc_submission_selected(struct intel_guc *guc) 3973 { 3974 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 3975 3976 if (!intel_guc_submission_is_supported(guc)) 3977 return false; 3978 3979 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 3980 } 3981 3982 void intel_guc_submission_init_early(struct intel_guc *guc) 3983 { 3984 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 3985 3986 spin_lock_init(&guc->submission_state.lock); 3987 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 3988 ida_init(&guc->submission_state.guc_ids); 3989 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 3990 INIT_WORK(&guc->submission_state.destroyed_worker, 3991 destroyed_worker_func); 3992 INIT_WORK(&guc->submission_state.reset_fail_worker, 3993 reset_fail_worker_func); 3994 3995 spin_lock_init(&guc->timestamp.lock); 3996 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 3997 3998 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 3999 guc->submission_supported = __guc_submission_supported(guc); 4000 guc->submission_selected = __guc_submission_selected(guc); 4001 } 4002 4003 static inline struct intel_context * 4004 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4005 { 4006 struct intel_context *ce; 4007 4008 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4009 drm_err(&guc_to_gt(guc)->i915->drm, 4010 "Invalid ctx_id %u\n", ctx_id); 4011 return NULL; 4012 } 4013 4014 ce = __get_context(guc, ctx_id); 4015 if (unlikely(!ce)) { 4016 drm_err(&guc_to_gt(guc)->i915->drm, 4017 "Context is NULL, ctx_id %u\n", ctx_id); 4018 return NULL; 4019 } 4020 4021 if (unlikely(intel_context_is_child(ce))) { 4022 drm_err(&guc_to_gt(guc)->i915->drm, 4023 "Context is child, ctx_id %u\n", ctx_id); 4024 return NULL; 4025 } 4026 4027 return ce; 4028 } 4029 4030 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4031 const u32 *msg, 4032 u32 len) 4033 { 4034 struct intel_context *ce; 4035 u32 ctx_id; 4036 4037 if (unlikely(len < 1)) { 4038 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4039 return -EPROTO; 4040 } 4041 ctx_id = msg[0]; 4042 4043 ce = g2h_context_lookup(guc, ctx_id); 4044 if (unlikely(!ce)) 4045 return -EPROTO; 4046 4047 trace_intel_context_deregister_done(ce); 4048 4049 #ifdef CONFIG_DRM_I915_SELFTEST 4050 if (unlikely(ce->drop_deregister)) { 4051 ce->drop_deregister = false; 4052 return 0; 4053 } 4054 #endif 4055 4056 if (context_wait_for_deregister_to_register(ce)) { 4057 struct intel_runtime_pm *runtime_pm = 4058 &ce->engine->gt->i915->runtime_pm; 4059 intel_wakeref_t wakeref; 4060 4061 /* 4062 * Previous owner of this guc_id has been deregistered, now safe 4063 * register this context. 4064 */ 4065 with_intel_runtime_pm(runtime_pm, wakeref) 4066 register_context(ce, true); 4067 guc_signal_context_fence(ce); 4068 intel_context_put(ce); 4069 } else if (context_destroyed(ce)) { 4070 /* Context has been destroyed */ 4071 intel_gt_pm_put_async(guc_to_gt(guc)); 4072 release_guc_id(guc, ce); 4073 __guc_context_destroy(ce); 4074 } 4075 4076 decr_outstanding_submission_g2h(guc); 4077 4078 return 0; 4079 } 4080 4081 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4082 const u32 *msg, 4083 u32 len) 4084 { 4085 struct intel_context *ce; 4086 unsigned long flags; 4087 u32 ctx_id; 4088 4089 if (unlikely(len < 2)) { 4090 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4091 return -EPROTO; 4092 } 4093 ctx_id = msg[0]; 4094 4095 ce = g2h_context_lookup(guc, ctx_id); 4096 if (unlikely(!ce)) 4097 return -EPROTO; 4098 4099 if (unlikely(context_destroyed(ce) || 4100 (!context_pending_enable(ce) && 4101 !context_pending_disable(ce)))) { 4102 drm_err(&guc_to_gt(guc)->i915->drm, 4103 "Bad context sched_state 0x%x, ctx_id %u\n", 4104 ce->guc_state.sched_state, ctx_id); 4105 return -EPROTO; 4106 } 4107 4108 trace_intel_context_sched_done(ce); 4109 4110 if (context_pending_enable(ce)) { 4111 #ifdef CONFIG_DRM_I915_SELFTEST 4112 if (unlikely(ce->drop_schedule_enable)) { 4113 ce->drop_schedule_enable = false; 4114 return 0; 4115 } 4116 #endif 4117 4118 spin_lock_irqsave(&ce->guc_state.lock, flags); 4119 clr_context_pending_enable(ce); 4120 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4121 } else if (context_pending_disable(ce)) { 4122 bool banned; 4123 4124 #ifdef CONFIG_DRM_I915_SELFTEST 4125 if (unlikely(ce->drop_schedule_disable)) { 4126 ce->drop_schedule_disable = false; 4127 return 0; 4128 } 4129 #endif 4130 4131 /* 4132 * Unpin must be done before __guc_signal_context_fence, 4133 * otherwise a race exists between the requests getting 4134 * submitted + retired before this unpin completes resulting in 4135 * the pin_count going to zero and the context still being 4136 * enabled. 4137 */ 4138 intel_context_sched_disable_unpin(ce); 4139 4140 spin_lock_irqsave(&ce->guc_state.lock, flags); 4141 banned = context_banned(ce); 4142 clr_context_banned(ce); 4143 clr_context_pending_disable(ce); 4144 __guc_signal_context_fence(ce); 4145 guc_blocked_fence_complete(ce); 4146 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4147 4148 if (banned) { 4149 guc_cancel_context_requests(ce); 4150 intel_engine_signal_breadcrumbs(ce->engine); 4151 } 4152 } 4153 4154 decr_outstanding_submission_g2h(guc); 4155 intel_context_put(ce); 4156 4157 return 0; 4158 } 4159 4160 static void capture_error_state(struct intel_guc *guc, 4161 struct intel_context *ce) 4162 { 4163 struct intel_gt *gt = guc_to_gt(guc); 4164 struct drm_i915_private *i915 = gt->i915; 4165 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4166 intel_wakeref_t wakeref; 4167 4168 intel_engine_set_hung_context(engine, ce); 4169 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4170 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4171 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4172 } 4173 4174 static void guc_context_replay(struct intel_context *ce) 4175 { 4176 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4177 4178 __guc_reset_context(ce, ce->engine->mask); 4179 tasklet_hi_schedule(&sched_engine->tasklet); 4180 } 4181 4182 static void guc_handle_context_reset(struct intel_guc *guc, 4183 struct intel_context *ce) 4184 { 4185 trace_intel_context_reset(ce); 4186 4187 if (likely(!intel_context_is_banned(ce))) { 4188 capture_error_state(guc, ce); 4189 guc_context_replay(ce); 4190 } else { 4191 drm_info(&guc_to_gt(guc)->i915->drm, 4192 "Ignoring context reset notification of banned context 0x%04X on %s", 4193 ce->guc_id.id, ce->engine->name); 4194 } 4195 } 4196 4197 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4198 const u32 *msg, u32 len) 4199 { 4200 struct intel_context *ce; 4201 unsigned long flags; 4202 int ctx_id; 4203 4204 if (unlikely(len != 1)) { 4205 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4206 return -EPROTO; 4207 } 4208 4209 ctx_id = msg[0]; 4210 4211 /* 4212 * The context lookup uses the xarray but lookups only require an RCU lock 4213 * not the full spinlock. So take the lock explicitly and keep it until the 4214 * context has been reference count locked to ensure it can't be destroyed 4215 * asynchronously until the reset is done. 4216 */ 4217 xa_lock_irqsave(&guc->context_lookup, flags); 4218 ce = g2h_context_lookup(guc, ctx_id); 4219 if (ce) 4220 intel_context_get(ce); 4221 xa_unlock_irqrestore(&guc->context_lookup, flags); 4222 4223 if (unlikely(!ce)) 4224 return -EPROTO; 4225 4226 guc_handle_context_reset(guc, ce); 4227 intel_context_put(ce); 4228 4229 return 0; 4230 } 4231 4232 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4233 const u32 *msg, u32 len) 4234 { 4235 u32 status; 4236 4237 if (unlikely(len != 1)) { 4238 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4239 return -EPROTO; 4240 } 4241 4242 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4243 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4244 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4245 4246 intel_guc_capture_process(guc); 4247 4248 return 0; 4249 } 4250 4251 struct intel_engine_cs * 4252 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4253 { 4254 struct intel_gt *gt = guc_to_gt(guc); 4255 u8 engine_class = guc_class_to_engine_class(guc_class); 4256 4257 /* Class index is checked in class converter */ 4258 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4259 4260 return gt->engine_class[engine_class][instance]; 4261 } 4262 4263 static void reset_fail_worker_func(struct work_struct *w) 4264 { 4265 struct intel_guc *guc = container_of(w, struct intel_guc, 4266 submission_state.reset_fail_worker); 4267 struct intel_gt *gt = guc_to_gt(guc); 4268 intel_engine_mask_t reset_fail_mask; 4269 unsigned long flags; 4270 4271 spin_lock_irqsave(&guc->submission_state.lock, flags); 4272 reset_fail_mask = guc->submission_state.reset_fail_mask; 4273 guc->submission_state.reset_fail_mask = 0; 4274 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4275 4276 if (likely(reset_fail_mask)) 4277 intel_gt_handle_error(gt, reset_fail_mask, 4278 I915_ERROR_CAPTURE, 4279 "GuC failed to reset engine mask=0x%x\n", 4280 reset_fail_mask); 4281 } 4282 4283 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4284 const u32 *msg, u32 len) 4285 { 4286 struct intel_engine_cs *engine; 4287 struct intel_gt *gt = guc_to_gt(guc); 4288 u8 guc_class, instance; 4289 u32 reason; 4290 unsigned long flags; 4291 4292 if (unlikely(len != 3)) { 4293 drm_err(>->i915->drm, "Invalid length %u", len); 4294 return -EPROTO; 4295 } 4296 4297 guc_class = msg[0]; 4298 instance = msg[1]; 4299 reason = msg[2]; 4300 4301 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4302 if (unlikely(!engine)) { 4303 drm_err(>->i915->drm, 4304 "Invalid engine %d:%d", guc_class, instance); 4305 return -EPROTO; 4306 } 4307 4308 /* 4309 * This is an unexpected failure of a hardware feature. So, log a real 4310 * error message not just the informational that comes with the reset. 4311 */ 4312 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4313 guc_class, instance, engine->name, reason); 4314 4315 spin_lock_irqsave(&guc->submission_state.lock, flags); 4316 guc->submission_state.reset_fail_mask |= engine->mask; 4317 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4318 4319 /* 4320 * A GT reset flushes this worker queue (G2H handler) so we must use 4321 * another worker to trigger a GT reset. 4322 */ 4323 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4324 4325 return 0; 4326 } 4327 4328 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4329 { 4330 struct intel_guc *guc = &engine->gt->uc.guc; 4331 struct intel_context *ce; 4332 struct i915_request *rq; 4333 unsigned long index; 4334 unsigned long flags; 4335 4336 /* Reset called during driver load? GuC not yet initialised! */ 4337 if (unlikely(!guc_submission_initialized(guc))) 4338 return; 4339 4340 xa_lock_irqsave(&guc->context_lookup, flags); 4341 xa_for_each(&guc->context_lookup, index, ce) { 4342 if (!kref_get_unless_zero(&ce->ref)) 4343 continue; 4344 4345 xa_unlock(&guc->context_lookup); 4346 4347 if (!intel_context_is_pinned(ce)) 4348 goto next; 4349 4350 if (intel_engine_is_virtual(ce->engine)) { 4351 if (!(ce->engine->mask & engine->mask)) 4352 goto next; 4353 } else { 4354 if (ce->engine != engine) 4355 goto next; 4356 } 4357 4358 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4359 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4360 continue; 4361 4362 intel_engine_set_hung_context(engine, ce); 4363 4364 /* Can only cope with one hang at a time... */ 4365 intel_context_put(ce); 4366 xa_lock(&guc->context_lookup); 4367 goto done; 4368 } 4369 next: 4370 intel_context_put(ce); 4371 xa_lock(&guc->context_lookup); 4372 } 4373 done: 4374 xa_unlock_irqrestore(&guc->context_lookup, flags); 4375 } 4376 4377 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4378 struct i915_request *hung_rq, 4379 struct drm_printer *m) 4380 { 4381 struct intel_guc *guc = &engine->gt->uc.guc; 4382 struct intel_context *ce; 4383 unsigned long index; 4384 unsigned long flags; 4385 4386 /* Reset called during driver load? GuC not yet initialised! */ 4387 if (unlikely(!guc_submission_initialized(guc))) 4388 return; 4389 4390 xa_lock_irqsave(&guc->context_lookup, flags); 4391 xa_for_each(&guc->context_lookup, index, ce) { 4392 if (!kref_get_unless_zero(&ce->ref)) 4393 continue; 4394 4395 xa_unlock(&guc->context_lookup); 4396 4397 if (!intel_context_is_pinned(ce)) 4398 goto next; 4399 4400 if (intel_engine_is_virtual(ce->engine)) { 4401 if (!(ce->engine->mask & engine->mask)) 4402 goto next; 4403 } else { 4404 if (ce->engine != engine) 4405 goto next; 4406 } 4407 4408 spin_lock(&ce->guc_state.lock); 4409 intel_engine_dump_active_requests(&ce->guc_state.requests, 4410 hung_rq, m); 4411 spin_unlock(&ce->guc_state.lock); 4412 4413 next: 4414 intel_context_put(ce); 4415 xa_lock(&guc->context_lookup); 4416 } 4417 xa_unlock_irqrestore(&guc->context_lookup, flags); 4418 } 4419 4420 void intel_guc_submission_print_info(struct intel_guc *guc, 4421 struct drm_printer *p) 4422 { 4423 struct i915_sched_engine *sched_engine = guc->sched_engine; 4424 struct rb_node *rb; 4425 unsigned long flags; 4426 4427 if (!sched_engine) 4428 return; 4429 4430 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4431 atomic_read(&guc->outstanding_submission_g2h)); 4432 drm_printf(p, "GuC tasklet count: %u\n\n", 4433 atomic_read(&sched_engine->tasklet.count)); 4434 4435 spin_lock_irqsave(&sched_engine->lock, flags); 4436 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4437 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4438 struct i915_priolist *pl = to_priolist(rb); 4439 struct i915_request *rq; 4440 4441 priolist_for_each_request(rq, pl) 4442 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4443 rq->context->guc_id.id, 4444 rq->fence.seqno); 4445 } 4446 spin_unlock_irqrestore(&sched_engine->lock, flags); 4447 drm_printf(p, "\n"); 4448 } 4449 4450 static inline void guc_log_context_priority(struct drm_printer *p, 4451 struct intel_context *ce) 4452 { 4453 int i; 4454 4455 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4456 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4457 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4458 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4459 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4460 i, ce->guc_state.prio_count[i]); 4461 } 4462 drm_printf(p, "\n"); 4463 } 4464 4465 static inline void guc_log_context(struct drm_printer *p, 4466 struct intel_context *ce) 4467 { 4468 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4469 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4470 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4471 ce->ring->head, 4472 ce->lrc_reg_state[CTX_RING_HEAD]); 4473 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4474 ce->ring->tail, 4475 ce->lrc_reg_state[CTX_RING_TAIL]); 4476 drm_printf(p, "\t\tContext Pin Count: %u\n", 4477 atomic_read(&ce->pin_count)); 4478 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4479 atomic_read(&ce->guc_id.ref)); 4480 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 4481 ce->guc_state.sched_state); 4482 } 4483 4484 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4485 struct drm_printer *p) 4486 { 4487 struct intel_context *ce; 4488 unsigned long index; 4489 unsigned long flags; 4490 4491 xa_lock_irqsave(&guc->context_lookup, flags); 4492 xa_for_each(&guc->context_lookup, index, ce) { 4493 GEM_BUG_ON(intel_context_is_child(ce)); 4494 4495 guc_log_context(p, ce); 4496 guc_log_context_priority(p, ce); 4497 4498 if (intel_context_is_parent(ce)) { 4499 struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); 4500 struct intel_context *child; 4501 4502 drm_printf(p, "\t\tNumber children: %u\n", 4503 ce->parallel.number_children); 4504 drm_printf(p, "\t\tWQI Head: %u\n", 4505 READ_ONCE(wq_desc->head)); 4506 drm_printf(p, "\t\tWQI Tail: %u\n", 4507 READ_ONCE(wq_desc->tail)); 4508 drm_printf(p, "\t\tWQI Status: %u\n\n", 4509 READ_ONCE(wq_desc->wq_status)); 4510 4511 if (ce->engine->emit_bb_start == 4512 emit_bb_start_parent_no_preempt_mid_batch) { 4513 u8 i; 4514 4515 drm_printf(p, "\t\tChildren Go: %u\n\n", 4516 get_children_go_value(ce)); 4517 for (i = 0; i < ce->parallel.number_children; ++i) 4518 drm_printf(p, "\t\tChildren Join: %u\n", 4519 get_children_join_value(ce, i)); 4520 } 4521 4522 for_each_child(ce, child) 4523 guc_log_context(p, child); 4524 } 4525 } 4526 xa_unlock_irqrestore(&guc->context_lookup, flags); 4527 } 4528 4529 static inline u32 get_children_go_addr(struct intel_context *ce) 4530 { 4531 GEM_BUG_ON(!intel_context_is_parent(ce)); 4532 4533 return i915_ggtt_offset(ce->state) + 4534 __get_parent_scratch_offset(ce) + 4535 offsetof(struct parent_scratch, go.semaphore); 4536 } 4537 4538 static inline u32 get_children_join_addr(struct intel_context *ce, 4539 u8 child_index) 4540 { 4541 GEM_BUG_ON(!intel_context_is_parent(ce)); 4542 4543 return i915_ggtt_offset(ce->state) + 4544 __get_parent_scratch_offset(ce) + 4545 offsetof(struct parent_scratch, join[child_index].semaphore); 4546 } 4547 4548 #define PARENT_GO_BB 1 4549 #define PARENT_GO_FINI_BREADCRUMB 0 4550 #define CHILD_GO_BB 1 4551 #define CHILD_GO_FINI_BREADCRUMB 0 4552 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4553 u64 offset, u32 len, 4554 const unsigned int flags) 4555 { 4556 struct intel_context *ce = rq->context; 4557 u32 *cs; 4558 u8 i; 4559 4560 GEM_BUG_ON(!intel_context_is_parent(ce)); 4561 4562 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 4563 if (IS_ERR(cs)) 4564 return PTR_ERR(cs); 4565 4566 /* Wait on children */ 4567 for (i = 0; i < ce->parallel.number_children; ++i) { 4568 *cs++ = (MI_SEMAPHORE_WAIT | 4569 MI_SEMAPHORE_GLOBAL_GTT | 4570 MI_SEMAPHORE_POLL | 4571 MI_SEMAPHORE_SAD_EQ_SDD); 4572 *cs++ = PARENT_GO_BB; 4573 *cs++ = get_children_join_addr(ce, i); 4574 *cs++ = 0; 4575 } 4576 4577 /* Turn off preemption */ 4578 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4579 *cs++ = MI_NOOP; 4580 4581 /* Tell children go */ 4582 cs = gen8_emit_ggtt_write(cs, 4583 CHILD_GO_BB, 4584 get_children_go_addr(ce), 4585 0); 4586 4587 /* Jump to batch */ 4588 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4589 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4590 *cs++ = lower_32_bits(offset); 4591 *cs++ = upper_32_bits(offset); 4592 *cs++ = MI_NOOP; 4593 4594 intel_ring_advance(rq, cs); 4595 4596 return 0; 4597 } 4598 4599 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4600 u64 offset, u32 len, 4601 const unsigned int flags) 4602 { 4603 struct intel_context *ce = rq->context; 4604 struct intel_context *parent = intel_context_to_parent(ce); 4605 u32 *cs; 4606 4607 GEM_BUG_ON(!intel_context_is_child(ce)); 4608 4609 cs = intel_ring_begin(rq, 12); 4610 if (IS_ERR(cs)) 4611 return PTR_ERR(cs); 4612 4613 /* Signal parent */ 4614 cs = gen8_emit_ggtt_write(cs, 4615 PARENT_GO_BB, 4616 get_children_join_addr(parent, 4617 ce->parallel.child_index), 4618 0); 4619 4620 /* Wait on parent for go */ 4621 *cs++ = (MI_SEMAPHORE_WAIT | 4622 MI_SEMAPHORE_GLOBAL_GTT | 4623 MI_SEMAPHORE_POLL | 4624 MI_SEMAPHORE_SAD_EQ_SDD); 4625 *cs++ = CHILD_GO_BB; 4626 *cs++ = get_children_go_addr(parent); 4627 *cs++ = 0; 4628 4629 /* Turn off preemption */ 4630 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4631 4632 /* Jump to batch */ 4633 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4634 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4635 *cs++ = lower_32_bits(offset); 4636 *cs++ = upper_32_bits(offset); 4637 4638 intel_ring_advance(rq, cs); 4639 4640 return 0; 4641 } 4642 4643 static u32 * 4644 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4645 u32 *cs) 4646 { 4647 struct intel_context *ce = rq->context; 4648 u8 i; 4649 4650 GEM_BUG_ON(!intel_context_is_parent(ce)); 4651 4652 /* Wait on children */ 4653 for (i = 0; i < ce->parallel.number_children; ++i) { 4654 *cs++ = (MI_SEMAPHORE_WAIT | 4655 MI_SEMAPHORE_GLOBAL_GTT | 4656 MI_SEMAPHORE_POLL | 4657 MI_SEMAPHORE_SAD_EQ_SDD); 4658 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4659 *cs++ = get_children_join_addr(ce, i); 4660 *cs++ = 0; 4661 } 4662 4663 /* Turn on preemption */ 4664 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4665 *cs++ = MI_NOOP; 4666 4667 /* Tell children go */ 4668 cs = gen8_emit_ggtt_write(cs, 4669 CHILD_GO_FINI_BREADCRUMB, 4670 get_children_go_addr(ce), 4671 0); 4672 4673 return cs; 4674 } 4675 4676 /* 4677 * If this true, a submission of multi-lrc requests had an error and the 4678 * requests need to be skipped. The front end (execuf IOCTL) should've called 4679 * i915_request_skip which squashes the BB but we still need to emit the fini 4680 * breadrcrumbs seqno write. At this point we don't know how many of the 4681 * requests in the multi-lrc submission were generated so we can't do the 4682 * handshake between the parent and children (e.g. if 4 requests should be 4683 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4684 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4685 * has occurred on any of the requests in submission / relationship. 4686 */ 4687 static inline bool skip_handshake(struct i915_request *rq) 4688 { 4689 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4690 } 4691 4692 #define NON_SKIP_LEN 6 4693 static u32 * 4694 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4695 u32 *cs) 4696 { 4697 struct intel_context *ce = rq->context; 4698 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4699 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4700 4701 GEM_BUG_ON(!intel_context_is_parent(ce)); 4702 4703 if (unlikely(skip_handshake(rq))) { 4704 /* 4705 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4706 * the NON_SKIP_LEN comes from the length of the emits below. 4707 */ 4708 memset(cs, 0, sizeof(u32) * 4709 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4710 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4711 } else { 4712 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 4713 } 4714 4715 /* Emit fini breadcrumb */ 4716 before_fini_breadcrumb_user_interrupt_cs = cs; 4717 cs = gen8_emit_ggtt_write(cs, 4718 rq->fence.seqno, 4719 i915_request_active_timeline(rq)->hwsp_offset, 4720 0); 4721 4722 /* User interrupt */ 4723 *cs++ = MI_USER_INTERRUPT; 4724 *cs++ = MI_NOOP; 4725 4726 /* Ensure our math for skip + emit is correct */ 4727 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 4728 cs); 4729 GEM_BUG_ON(start_fini_breadcrumb_cs + 4730 ce->engine->emit_fini_breadcrumb_dw != cs); 4731 4732 rq->tail = intel_ring_offset(rq, cs); 4733 4734 return cs; 4735 } 4736 4737 static u32 * 4738 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4739 u32 *cs) 4740 { 4741 struct intel_context *ce = rq->context; 4742 struct intel_context *parent = intel_context_to_parent(ce); 4743 4744 GEM_BUG_ON(!intel_context_is_child(ce)); 4745 4746 /* Turn on preemption */ 4747 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4748 *cs++ = MI_NOOP; 4749 4750 /* Signal parent */ 4751 cs = gen8_emit_ggtt_write(cs, 4752 PARENT_GO_FINI_BREADCRUMB, 4753 get_children_join_addr(parent, 4754 ce->parallel.child_index), 4755 0); 4756 4757 /* Wait parent on for go */ 4758 *cs++ = (MI_SEMAPHORE_WAIT | 4759 MI_SEMAPHORE_GLOBAL_GTT | 4760 MI_SEMAPHORE_POLL | 4761 MI_SEMAPHORE_SAD_EQ_SDD); 4762 *cs++ = CHILD_GO_FINI_BREADCRUMB; 4763 *cs++ = get_children_go_addr(parent); 4764 *cs++ = 0; 4765 4766 return cs; 4767 } 4768 4769 static u32 * 4770 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4771 u32 *cs) 4772 { 4773 struct intel_context *ce = rq->context; 4774 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4775 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4776 4777 GEM_BUG_ON(!intel_context_is_child(ce)); 4778 4779 if (unlikely(skip_handshake(rq))) { 4780 /* 4781 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 4782 * the NON_SKIP_LEN comes from the length of the emits below. 4783 */ 4784 memset(cs, 0, sizeof(u32) * 4785 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4786 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4787 } else { 4788 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 4789 } 4790 4791 /* Emit fini breadcrumb */ 4792 before_fini_breadcrumb_user_interrupt_cs = cs; 4793 cs = gen8_emit_ggtt_write(cs, 4794 rq->fence.seqno, 4795 i915_request_active_timeline(rq)->hwsp_offset, 4796 0); 4797 4798 /* User interrupt */ 4799 *cs++ = MI_USER_INTERRUPT; 4800 *cs++ = MI_NOOP; 4801 4802 /* Ensure our math for skip + emit is correct */ 4803 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 4804 cs); 4805 GEM_BUG_ON(start_fini_breadcrumb_cs + 4806 ce->engine->emit_fini_breadcrumb_dw != cs); 4807 4808 rq->tail = intel_ring_offset(rq, cs); 4809 4810 return cs; 4811 } 4812 4813 #undef NON_SKIP_LEN 4814 4815 static struct intel_context * 4816 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 4817 unsigned long flags) 4818 { 4819 struct guc_virtual_engine *ve; 4820 struct intel_guc *guc; 4821 unsigned int n; 4822 int err; 4823 4824 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 4825 if (!ve) 4826 return ERR_PTR(-ENOMEM); 4827 4828 guc = &siblings[0]->gt->uc.guc; 4829 4830 ve->base.i915 = siblings[0]->i915; 4831 ve->base.gt = siblings[0]->gt; 4832 ve->base.uncore = siblings[0]->uncore; 4833 ve->base.id = -1; 4834 4835 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 4836 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4837 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4838 ve->base.saturated = ALL_ENGINES; 4839 4840 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 4841 4842 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 4843 4844 ve->base.cops = &virtual_guc_context_ops; 4845 ve->base.request_alloc = guc_request_alloc; 4846 ve->base.bump_serial = virtual_guc_bump_serial; 4847 4848 ve->base.submit_request = guc_submit_request; 4849 4850 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 4851 4852 intel_context_init(&ve->context, &ve->base); 4853 4854 for (n = 0; n < count; n++) { 4855 struct intel_engine_cs *sibling = siblings[n]; 4856 4857 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 4858 if (sibling->mask & ve->base.mask) { 4859 DRM_DEBUG("duplicate %s entry in load balancer\n", 4860 sibling->name); 4861 err = -EINVAL; 4862 goto err_put; 4863 } 4864 4865 ve->base.mask |= sibling->mask; 4866 ve->base.logical_mask |= sibling->logical_mask; 4867 4868 if (n != 0 && ve->base.class != sibling->class) { 4869 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 4870 sibling->class, ve->base.class); 4871 err = -EINVAL; 4872 goto err_put; 4873 } else if (n == 0) { 4874 ve->base.class = sibling->class; 4875 ve->base.uabi_class = sibling->uabi_class; 4876 snprintf(ve->base.name, sizeof(ve->base.name), 4877 "v%dx%d", ve->base.class, count); 4878 ve->base.context_size = sibling->context_size; 4879 4880 ve->base.add_active_request = 4881 sibling->add_active_request; 4882 ve->base.remove_active_request = 4883 sibling->remove_active_request; 4884 ve->base.emit_bb_start = sibling->emit_bb_start; 4885 ve->base.emit_flush = sibling->emit_flush; 4886 ve->base.emit_init_breadcrumb = 4887 sibling->emit_init_breadcrumb; 4888 ve->base.emit_fini_breadcrumb = 4889 sibling->emit_fini_breadcrumb; 4890 ve->base.emit_fini_breadcrumb_dw = 4891 sibling->emit_fini_breadcrumb_dw; 4892 ve->base.breadcrumbs = 4893 intel_breadcrumbs_get(sibling->breadcrumbs); 4894 4895 ve->base.flags |= sibling->flags; 4896 4897 ve->base.props.timeslice_duration_ms = 4898 sibling->props.timeslice_duration_ms; 4899 ve->base.props.preempt_timeout_ms = 4900 sibling->props.preempt_timeout_ms; 4901 } 4902 } 4903 4904 return &ve->context; 4905 4906 err_put: 4907 intel_context_put(&ve->context); 4908 return ERR_PTR(err); 4909 } 4910 4911 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 4912 { 4913 struct intel_engine_cs *engine; 4914 intel_engine_mask_t tmp, mask = ve->mask; 4915 4916 for_each_engine_masked(engine, ve->gt, mask, tmp) 4917 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 4918 return true; 4919 4920 return false; 4921 } 4922 4923 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4924 #include "selftest_guc.c" 4925 #include "selftest_guc_multi_lrc.c" 4926 #endif 4927