1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_heartbeat.h" 13 #include "gt/intel_engine_pm.h" 14 #include "gt/intel_engine_regs.h" 15 #include "gt/intel_gpu_commands.h" 16 #include "gt/intel_gt.h" 17 #include "gt/intel_gt_clock_utils.h" 18 #include "gt/intel_gt_irq.h" 19 #include "gt/intel_gt_pm.h" 20 #include "gt/intel_gt_regs.h" 21 #include "gt/intel_gt_requests.h" 22 #include "gt/intel_lrc.h" 23 #include "gt/intel_lrc_reg.h" 24 #include "gt/intel_mocs.h" 25 #include "gt/intel_ring.h" 26 27 #include "intel_guc_ads.h" 28 #include "intel_guc_capture.h" 29 #include "intel_guc_submission.h" 30 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 34 /** 35 * DOC: GuC-based command submission 36 * 37 * The Scratch registers: 38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 40 * triggers an interrupt on the GuC via another register write (0xC4C8). 41 * Firmware writes a success/fail code back to the action register after 42 * processes the request. The kernel driver polls waiting for this update and 43 * then proceeds. 44 * 45 * Command Transport buffers (CTBs): 46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 47 * - G2H) are a message interface between the i915 and GuC. 48 * 49 * Context registration: 50 * Before a context can be submitted it must be registered with the GuC via a 51 * H2G. A unique guc_id is associated with each context. The context is either 52 * registered at request creation time (normal operation) or at submission time 53 * (abnormal operation, e.g. after a reset). 54 * 55 * Context submission: 56 * The i915 updates the LRC tail value in memory. The i915 must enable the 57 * scheduling of the context within the GuC for the GuC to actually consider it. 58 * Therefore, the first time a disabled context is submitted we use a schedule 59 * enable H2G, while follow up submissions are done via the context submit H2G, 60 * which informs the GuC that a previously enabled context has new work 61 * available. 62 * 63 * Context unpin: 64 * To unpin a context a H2G is used to disable scheduling. When the 65 * corresponding G2H returns indicating the scheduling disable operation has 66 * completed it is safe to unpin the context. While a disable is in flight it 67 * isn't safe to resubmit the context so a fence is used to stall all future 68 * requests of that context until the G2H is returned. 69 * 70 * Context deregistration: 71 * Before a context can be destroyed or if we steal its guc_id we must 72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 73 * safe to submit anything to this guc_id until the deregister completes so a 74 * fence is used to stall all requests associated with this guc_id until the 75 * corresponding G2H returns indicating the guc_id has been deregistered. 76 * 77 * submission_state.guc_ids: 78 * Unique number associated with private GuC context data passed in during 79 * context registration / submission / deregistration. 64k available. Simple ida 80 * is used for allocation. 81 * 82 * Stealing guc_ids: 83 * If no guc_ids are available they can be stolen from another context at 84 * request creation time if that context is unpinned. If a guc_id can't be found 85 * we punt this problem to the user as we believe this is near impossible to hit 86 * during normal use cases. 87 * 88 * Locking: 89 * In the GuC submission code we have 3 basic spin locks which protect 90 * everything. Details about each below. 91 * 92 * sched_engine->lock 93 * This is the submission lock for all contexts that share an i915 schedule 94 * engine (sched_engine), thus only one of the contexts which share a 95 * sched_engine can be submitting at a time. Currently only one sched_engine is 96 * used for all of GuC submission but that could change in the future. 97 * 98 * guc->submission_state.lock 99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 100 * list. 101 * 102 * ce->guc_state.lock 103 * Protects everything under ce->guc_state. Ensures that a context is in the 104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 105 * on a disabled context (bad idea), we don't issue a schedule enable when a 106 * schedule disable is in flight, etc... Also protects list of inflight requests 107 * on the context and the priority management state. Lock is individual to each 108 * context. 109 * 110 * Lock ordering rules: 111 * sched_engine->lock -> ce->guc_state.lock 112 * guc->submission_state.lock -> ce->guc_state.lock 113 * 114 * Reset races: 115 * When a full GT reset is triggered it is assumed that some G2H responses to 116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 118 * contexts, release guc_ids, etc...). When this occurs we can scrub the 119 * context state and cleanup appropriately, however this is quite racey. 120 * To avoid races, the reset code must disable submission before scrubbing for 121 * the missing G2H, while the submission code must check for submission being 122 * disabled and skip sending H2Gs and updating context states when it is. Both 123 * sides must also make sure to hold the relevant locks. 124 */ 125 126 /* GuC Virtual Engine */ 127 struct guc_virtual_engine { 128 struct intel_engine_cs base; 129 struct intel_context context; 130 }; 131 132 static struct intel_context * 133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 134 unsigned long flags); 135 136 static struct intel_context * 137 guc_create_parallel(struct intel_engine_cs **engines, 138 unsigned int num_siblings, 139 unsigned int width); 140 141 #define GUC_REQUEST_SIZE 64 /* bytes */ 142 143 /* 144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 145 * per the GuC submission interface. A different allocation algorithm is used 146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 147 * partition the guc_id space. We believe the number of multi-lrc contexts in 148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 149 * multi-lrc. 150 */ 151 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 152 ((guc)->submission_state.num_guc_ids / 16) 153 154 /* 155 * Below is a set of functions which control the GuC scheduling state which 156 * require a lock. 157 */ 158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 159 #define SCHED_STATE_DESTROYED BIT(1) 160 #define SCHED_STATE_PENDING_DISABLE BIT(2) 161 #define SCHED_STATE_BANNED BIT(3) 162 #define SCHED_STATE_ENABLED BIT(4) 163 #define SCHED_STATE_PENDING_ENABLE BIT(5) 164 #define SCHED_STATE_REGISTERED BIT(6) 165 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 166 #define SCHED_STATE_BLOCKED_SHIFT 8 167 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 168 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 169 170 static inline void init_sched_state(struct intel_context *ce) 171 { 172 lockdep_assert_held(&ce->guc_state.lock); 173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 174 } 175 176 __maybe_unused 177 static bool sched_state_is_init(struct intel_context *ce) 178 { 179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ 180 return !(ce->guc_state.sched_state & 181 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 182 } 183 184 static inline bool 185 context_wait_for_deregister_to_register(struct intel_context *ce) 186 { 187 return ce->guc_state.sched_state & 188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 189 } 190 191 static inline void 192 set_context_wait_for_deregister_to_register(struct intel_context *ce) 193 { 194 lockdep_assert_held(&ce->guc_state.lock); 195 ce->guc_state.sched_state |= 196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 197 } 198 199 static inline void 200 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 201 { 202 lockdep_assert_held(&ce->guc_state.lock); 203 ce->guc_state.sched_state &= 204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 205 } 206 207 static inline bool 208 context_destroyed(struct intel_context *ce) 209 { 210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 211 } 212 213 static inline void 214 set_context_destroyed(struct intel_context *ce) 215 { 216 lockdep_assert_held(&ce->guc_state.lock); 217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 218 } 219 220 static inline bool context_pending_disable(struct intel_context *ce) 221 { 222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 223 } 224 225 static inline void set_context_pending_disable(struct intel_context *ce) 226 { 227 lockdep_assert_held(&ce->guc_state.lock); 228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 229 } 230 231 static inline void clr_context_pending_disable(struct intel_context *ce) 232 { 233 lockdep_assert_held(&ce->guc_state.lock); 234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 235 } 236 237 static inline bool context_banned(struct intel_context *ce) 238 { 239 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 240 } 241 242 static inline void set_context_banned(struct intel_context *ce) 243 { 244 lockdep_assert_held(&ce->guc_state.lock); 245 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 246 } 247 248 static inline void clr_context_banned(struct intel_context *ce) 249 { 250 lockdep_assert_held(&ce->guc_state.lock); 251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 252 } 253 254 static inline bool context_enabled(struct intel_context *ce) 255 { 256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 257 } 258 259 static inline void set_context_enabled(struct intel_context *ce) 260 { 261 lockdep_assert_held(&ce->guc_state.lock); 262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 263 } 264 265 static inline void clr_context_enabled(struct intel_context *ce) 266 { 267 lockdep_assert_held(&ce->guc_state.lock); 268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 269 } 270 271 static inline bool context_pending_enable(struct intel_context *ce) 272 { 273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 274 } 275 276 static inline void set_context_pending_enable(struct intel_context *ce) 277 { 278 lockdep_assert_held(&ce->guc_state.lock); 279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 280 } 281 282 static inline void clr_context_pending_enable(struct intel_context *ce) 283 { 284 lockdep_assert_held(&ce->guc_state.lock); 285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 286 } 287 288 static inline bool context_registered(struct intel_context *ce) 289 { 290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 291 } 292 293 static inline void set_context_registered(struct intel_context *ce) 294 { 295 lockdep_assert_held(&ce->guc_state.lock); 296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 297 } 298 299 static inline void clr_context_registered(struct intel_context *ce) 300 { 301 lockdep_assert_held(&ce->guc_state.lock); 302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 303 } 304 305 static inline bool context_policy_required(struct intel_context *ce) 306 { 307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 308 } 309 310 static inline void set_context_policy_required(struct intel_context *ce) 311 { 312 lockdep_assert_held(&ce->guc_state.lock); 313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 314 } 315 316 static inline void clr_context_policy_required(struct intel_context *ce) 317 { 318 lockdep_assert_held(&ce->guc_state.lock); 319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 320 } 321 322 static inline u32 context_blocked(struct intel_context *ce) 323 { 324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 325 SCHED_STATE_BLOCKED_SHIFT; 326 } 327 328 static inline void incr_context_blocked(struct intel_context *ce) 329 { 330 lockdep_assert_held(&ce->guc_state.lock); 331 332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 333 334 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 335 } 336 337 static inline void decr_context_blocked(struct intel_context *ce) 338 { 339 lockdep_assert_held(&ce->guc_state.lock); 340 341 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 342 343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 344 } 345 346 static inline bool context_has_committed_requests(struct intel_context *ce) 347 { 348 return !!ce->guc_state.number_committed_requests; 349 } 350 351 static inline void incr_context_committed_requests(struct intel_context *ce) 352 { 353 lockdep_assert_held(&ce->guc_state.lock); 354 ++ce->guc_state.number_committed_requests; 355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 356 } 357 358 static inline void decr_context_committed_requests(struct intel_context *ce) 359 { 360 lockdep_assert_held(&ce->guc_state.lock); 361 --ce->guc_state.number_committed_requests; 362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 363 } 364 365 static struct intel_context * 366 request_to_scheduling_context(struct i915_request *rq) 367 { 368 return intel_context_to_parent(rq->context); 369 } 370 371 static inline bool context_guc_id_invalid(struct intel_context *ce) 372 { 373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 374 } 375 376 static inline void set_context_guc_id_invalid(struct intel_context *ce) 377 { 378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 379 } 380 381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 382 { 383 return &ce->engine->gt->uc.guc; 384 } 385 386 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 387 { 388 return rb_entry(rb, struct i915_priolist, node); 389 } 390 391 /* 392 * When using multi-lrc submission a scratch memory area is reserved in the 393 * parent's context state for the process descriptor, work queue, and handshake 394 * between the parent + children contexts to insert safe preemption points 395 * between each of the BBs. Currently the scratch area is sized to a page. 396 * 397 * The layout of this scratch area is below: 398 * 0 guc_process_desc 399 * + sizeof(struct guc_process_desc) child go 400 * + CACHELINE_BYTES child join[0] 401 * ... 402 * + CACHELINE_BYTES child join[n - 1] 403 * ... unused 404 * PARENT_SCRATCH_SIZE / 2 work queue start 405 * ... work queue 406 * PARENT_SCRATCH_SIZE - 1 work queue end 407 */ 408 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 409 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 410 411 struct sync_semaphore { 412 u32 semaphore; 413 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 414 }; 415 416 struct parent_scratch { 417 struct guc_sched_wq_desc wq_desc; 418 419 struct sync_semaphore go; 420 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 421 422 u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - 423 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 424 425 u32 wq[WQ_SIZE / sizeof(u32)]; 426 }; 427 428 static u32 __get_parent_scratch_offset(struct intel_context *ce) 429 { 430 GEM_BUG_ON(!ce->parallel.guc.parent_page); 431 432 return ce->parallel.guc.parent_page * PAGE_SIZE; 433 } 434 435 static u32 __get_wq_offset(struct intel_context *ce) 436 { 437 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 438 439 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 440 } 441 442 static struct parent_scratch * 443 __get_parent_scratch(struct intel_context *ce) 444 { 445 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 446 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 447 448 /* 449 * Need to subtract LRC_STATE_OFFSET here as the 450 * parallel.guc.parent_page is the offset into ce->state while 451 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 452 */ 453 return (struct parent_scratch *) 454 (ce->lrc_reg_state + 455 ((__get_parent_scratch_offset(ce) - 456 LRC_STATE_OFFSET) / sizeof(u32))); 457 } 458 459 static struct guc_sched_wq_desc * 460 __get_wq_desc(struct intel_context *ce) 461 { 462 struct parent_scratch *ps = __get_parent_scratch(ce); 463 464 return &ps->wq_desc; 465 } 466 467 static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, 468 struct intel_context *ce, 469 u32 wqi_size) 470 { 471 /* 472 * Check for space in work queue. Caching a value of head pointer in 473 * intel_context structure in order reduce the number accesses to shared 474 * GPU memory which may be across a PCIe bus. 475 */ 476 #define AVAILABLE_SPACE \ 477 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 478 if (wqi_size > AVAILABLE_SPACE) { 479 ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); 480 481 if (wqi_size > AVAILABLE_SPACE) 482 return NULL; 483 } 484 #undef AVAILABLE_SPACE 485 486 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 487 } 488 489 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 490 { 491 struct intel_context *ce = xa_load(&guc->context_lookup, id); 492 493 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 494 495 return ce; 496 } 497 498 static inline bool guc_submission_initialized(struct intel_guc *guc) 499 { 500 return guc->submission_initialized; 501 } 502 503 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 504 { 505 return __get_context(guc, id); 506 } 507 508 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 509 struct intel_context *ce) 510 { 511 unsigned long flags; 512 513 /* 514 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 515 * lower level functions directly. 516 */ 517 xa_lock_irqsave(&guc->context_lookup, flags); 518 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 519 xa_unlock_irqrestore(&guc->context_lookup, flags); 520 } 521 522 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 523 { 524 unsigned long flags; 525 526 if (unlikely(!guc_submission_initialized(guc))) 527 return; 528 529 /* 530 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 531 * the lower level functions directly. 532 */ 533 xa_lock_irqsave(&guc->context_lookup, flags); 534 __xa_erase(&guc->context_lookup, id); 535 xa_unlock_irqrestore(&guc->context_lookup, flags); 536 } 537 538 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 539 { 540 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 541 wake_up_all(&guc->ct.wq); 542 } 543 544 static int guc_submission_send_busy_loop(struct intel_guc *guc, 545 const u32 *action, 546 u32 len, 547 u32 g2h_len_dw, 548 bool loop) 549 { 550 /* 551 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 552 * so we don't handle the case where we don't get a reply because we 553 * aborted the send due to the channel being busy. 554 */ 555 GEM_BUG_ON(g2h_len_dw && !loop); 556 557 if (g2h_len_dw) 558 atomic_inc(&guc->outstanding_submission_g2h); 559 560 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 561 } 562 563 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 564 atomic_t *wait_var, 565 bool interruptible, 566 long timeout) 567 { 568 const int state = interruptible ? 569 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 570 DEFINE_WAIT(wait); 571 572 might_sleep(); 573 GEM_BUG_ON(timeout < 0); 574 575 if (!atomic_read(wait_var)) 576 return 0; 577 578 if (!timeout) 579 return -ETIME; 580 581 for (;;) { 582 prepare_to_wait(&guc->ct.wq, &wait, state); 583 584 if (!atomic_read(wait_var)) 585 break; 586 587 if (signal_pending_state(state, current)) { 588 timeout = -EINTR; 589 break; 590 } 591 592 if (!timeout) { 593 timeout = -ETIME; 594 break; 595 } 596 597 timeout = io_schedule_timeout(timeout); 598 } 599 finish_wait(&guc->ct.wq, &wait); 600 601 return (timeout < 0) ? timeout : 0; 602 } 603 604 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 605 { 606 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 607 return 0; 608 609 return intel_guc_wait_for_pending_msg(guc, 610 &guc->outstanding_submission_g2h, 611 true, timeout); 612 } 613 614 static int guc_context_policy_init(struct intel_context *ce, bool loop); 615 static int try_context_registration(struct intel_context *ce, bool loop); 616 617 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 618 { 619 int err = 0; 620 struct intel_context *ce = request_to_scheduling_context(rq); 621 u32 action[3]; 622 int len = 0; 623 u32 g2h_len_dw = 0; 624 bool enabled; 625 626 lockdep_assert_held(&rq->engine->sched_engine->lock); 627 628 /* 629 * Corner case where requests were sitting in the priority list or a 630 * request resubmitted after the context was banned. 631 */ 632 if (unlikely(intel_context_is_banned(ce))) { 633 i915_request_put(i915_request_mark_eio(rq)); 634 intel_engine_signal_breadcrumbs(ce->engine); 635 return 0; 636 } 637 638 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 639 GEM_BUG_ON(context_guc_id_invalid(ce)); 640 641 if (context_policy_required(ce)) { 642 err = guc_context_policy_init(ce, false); 643 if (err) 644 return err; 645 } 646 647 spin_lock(&ce->guc_state.lock); 648 649 /* 650 * The request / context will be run on the hardware when scheduling 651 * gets enabled in the unblock. For multi-lrc we still submit the 652 * context to move the LRC tails. 653 */ 654 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 655 goto out; 656 657 enabled = context_enabled(ce) || context_blocked(ce); 658 659 if (!enabled) { 660 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 661 action[len++] = ce->guc_id.id; 662 action[len++] = GUC_CONTEXT_ENABLE; 663 set_context_pending_enable(ce); 664 intel_context_get(ce); 665 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 666 } else { 667 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 668 action[len++] = ce->guc_id.id; 669 } 670 671 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 672 if (!enabled && !err) { 673 trace_intel_context_sched_enable(ce); 674 atomic_inc(&guc->outstanding_submission_g2h); 675 set_context_enabled(ce); 676 677 /* 678 * Without multi-lrc KMD does the submission step (moving the 679 * lrc tail) so enabling scheduling is sufficient to submit the 680 * context. This isn't the case in multi-lrc submission as the 681 * GuC needs to move the tails, hence the need for another H2G 682 * to submit a multi-lrc context after enabling scheduling. 683 */ 684 if (intel_context_is_parent(ce)) { 685 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 686 err = intel_guc_send_nb(guc, action, len - 1, 0); 687 } 688 } else if (!enabled) { 689 clr_context_pending_enable(ce); 690 intel_context_put(ce); 691 } 692 if (likely(!err)) 693 trace_i915_request_guc_submit(rq); 694 695 out: 696 spin_unlock(&ce->guc_state.lock); 697 return err; 698 } 699 700 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 701 { 702 int ret = __guc_add_request(guc, rq); 703 704 if (unlikely(ret == -EBUSY)) { 705 guc->stalled_request = rq; 706 guc->submission_stall_reason = STALL_ADD_REQUEST; 707 } 708 709 return ret; 710 } 711 712 static inline void guc_set_lrc_tail(struct i915_request *rq) 713 { 714 rq->context->lrc_reg_state[CTX_RING_TAIL] = 715 intel_ring_set_tail(rq->ring, rq->tail); 716 } 717 718 static inline int rq_prio(const struct i915_request *rq) 719 { 720 return rq->sched.attr.priority; 721 } 722 723 static bool is_multi_lrc_rq(struct i915_request *rq) 724 { 725 return intel_context_is_parallel(rq->context); 726 } 727 728 static bool can_merge_rq(struct i915_request *rq, 729 struct i915_request *last) 730 { 731 return request_to_scheduling_context(rq) == 732 request_to_scheduling_context(last); 733 } 734 735 static u32 wq_space_until_wrap(struct intel_context *ce) 736 { 737 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 738 } 739 740 static void write_wqi(struct guc_sched_wq_desc *wq_desc, 741 struct intel_context *ce, 742 u32 wqi_size) 743 { 744 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 745 746 /* 747 * Ensure WQI are visible before updating tail 748 */ 749 intel_guc_write_barrier(ce_to_guc(ce)); 750 751 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 752 (WQ_SIZE - 1); 753 WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); 754 } 755 756 static int guc_wq_noop_append(struct intel_context *ce) 757 { 758 struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); 759 u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); 760 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 761 762 if (!wqi) 763 return -EBUSY; 764 765 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 766 767 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 768 FIELD_PREP(WQ_LEN_MASK, len_dw); 769 ce->parallel.guc.wqi_tail = 0; 770 771 return 0; 772 } 773 774 static int __guc_wq_item_append(struct i915_request *rq) 775 { 776 struct intel_context *ce = request_to_scheduling_context(rq); 777 struct intel_context *child; 778 struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); 779 unsigned int wqi_size = (ce->parallel.number_children + 4) * 780 sizeof(u32); 781 u32 *wqi; 782 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 783 int ret; 784 785 /* Ensure context is in correct state updating work queue */ 786 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 787 GEM_BUG_ON(context_guc_id_invalid(ce)); 788 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 789 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 790 791 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 792 if (wqi_size > wq_space_until_wrap(ce)) { 793 ret = guc_wq_noop_append(ce); 794 if (ret) 795 return ret; 796 } 797 798 wqi = get_wq_pointer(wq_desc, ce, wqi_size); 799 if (!wqi) 800 return -EBUSY; 801 802 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 803 804 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 805 FIELD_PREP(WQ_LEN_MASK, len_dw); 806 *wqi++ = ce->lrc.lrca; 807 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 808 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 809 *wqi++ = 0; /* fence_id */ 810 for_each_child(ce, child) 811 *wqi++ = child->ring->tail / sizeof(u64); 812 813 write_wqi(wq_desc, ce, wqi_size); 814 815 return 0; 816 } 817 818 static int guc_wq_item_append(struct intel_guc *guc, 819 struct i915_request *rq) 820 { 821 struct intel_context *ce = request_to_scheduling_context(rq); 822 int ret = 0; 823 824 if (likely(!intel_context_is_banned(ce))) { 825 ret = __guc_wq_item_append(rq); 826 827 if (unlikely(ret == -EBUSY)) { 828 guc->stalled_request = rq; 829 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 830 } 831 } 832 833 return ret; 834 } 835 836 static bool multi_lrc_submit(struct i915_request *rq) 837 { 838 struct intel_context *ce = request_to_scheduling_context(rq); 839 840 intel_ring_set_tail(rq->ring, rq->tail); 841 842 /* 843 * We expect the front end (execbuf IOCTL) to set this flag on the last 844 * request generated from a multi-BB submission. This indicates to the 845 * backend (GuC interface) that we should submit this context thus 846 * submitting all the requests generated in parallel. 847 */ 848 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 849 intel_context_is_banned(ce); 850 } 851 852 static int guc_dequeue_one_context(struct intel_guc *guc) 853 { 854 struct i915_sched_engine * const sched_engine = guc->sched_engine; 855 struct i915_request *last = NULL; 856 bool submit = false; 857 struct rb_node *rb; 858 int ret; 859 860 lockdep_assert_held(&sched_engine->lock); 861 862 if (guc->stalled_request) { 863 submit = true; 864 last = guc->stalled_request; 865 866 switch (guc->submission_stall_reason) { 867 case STALL_REGISTER_CONTEXT: 868 goto register_context; 869 case STALL_MOVE_LRC_TAIL: 870 goto move_lrc_tail; 871 case STALL_ADD_REQUEST: 872 goto add_request; 873 default: 874 MISSING_CASE(guc->submission_stall_reason); 875 } 876 } 877 878 while ((rb = rb_first_cached(&sched_engine->queue))) { 879 struct i915_priolist *p = to_priolist(rb); 880 struct i915_request *rq, *rn; 881 882 priolist_for_each_request_consume(rq, rn, p) { 883 if (last && !can_merge_rq(rq, last)) 884 goto register_context; 885 886 list_del_init(&rq->sched.link); 887 888 __i915_request_submit(rq); 889 890 trace_i915_request_in(rq, 0); 891 last = rq; 892 893 if (is_multi_lrc_rq(rq)) { 894 /* 895 * We need to coalesce all multi-lrc requests in 896 * a relationship into a single H2G. We are 897 * guaranteed that all of these requests will be 898 * submitted sequentially. 899 */ 900 if (multi_lrc_submit(rq)) { 901 submit = true; 902 goto register_context; 903 } 904 } else { 905 submit = true; 906 } 907 } 908 909 rb_erase_cached(&p->node, &sched_engine->queue); 910 i915_priolist_free(p); 911 } 912 913 register_context: 914 if (submit) { 915 struct intel_context *ce = request_to_scheduling_context(last); 916 917 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 918 !intel_context_is_banned(ce))) { 919 ret = try_context_registration(ce, false); 920 if (unlikely(ret == -EPIPE)) { 921 goto deadlk; 922 } else if (ret == -EBUSY) { 923 guc->stalled_request = last; 924 guc->submission_stall_reason = 925 STALL_REGISTER_CONTEXT; 926 goto schedule_tasklet; 927 } else if (ret != 0) { 928 GEM_WARN_ON(ret); /* Unexpected */ 929 goto deadlk; 930 } 931 } 932 933 move_lrc_tail: 934 if (is_multi_lrc_rq(last)) { 935 ret = guc_wq_item_append(guc, last); 936 if (ret == -EBUSY) { 937 goto schedule_tasklet; 938 } else if (ret != 0) { 939 GEM_WARN_ON(ret); /* Unexpected */ 940 goto deadlk; 941 } 942 } else { 943 guc_set_lrc_tail(last); 944 } 945 946 add_request: 947 ret = guc_add_request(guc, last); 948 if (unlikely(ret == -EPIPE)) { 949 goto deadlk; 950 } else if (ret == -EBUSY) { 951 goto schedule_tasklet; 952 } else if (ret != 0) { 953 GEM_WARN_ON(ret); /* Unexpected */ 954 goto deadlk; 955 } 956 } 957 958 guc->stalled_request = NULL; 959 guc->submission_stall_reason = STALL_NONE; 960 return submit; 961 962 deadlk: 963 sched_engine->tasklet.callback = NULL; 964 tasklet_disable_nosync(&sched_engine->tasklet); 965 return false; 966 967 schedule_tasklet: 968 tasklet_schedule(&sched_engine->tasklet); 969 return false; 970 } 971 972 static void guc_submission_tasklet(struct tasklet_struct *t) 973 { 974 struct i915_sched_engine *sched_engine = 975 from_tasklet(sched_engine, t, tasklet); 976 unsigned long flags; 977 bool loop; 978 979 spin_lock_irqsave(&sched_engine->lock, flags); 980 981 do { 982 loop = guc_dequeue_one_context(sched_engine->private_data); 983 } while (loop); 984 985 i915_sched_engine_reset_on_empty(sched_engine); 986 987 spin_unlock_irqrestore(&sched_engine->lock, flags); 988 } 989 990 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 991 { 992 if (iir & GT_RENDER_USER_INTERRUPT) 993 intel_engine_signal_breadcrumbs(engine); 994 } 995 996 static void __guc_context_destroy(struct intel_context *ce); 997 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 998 static void guc_signal_context_fence(struct intel_context *ce); 999 static void guc_cancel_context_requests(struct intel_context *ce); 1000 static void guc_blocked_fence_complete(struct intel_context *ce); 1001 1002 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1003 { 1004 struct intel_context *ce; 1005 unsigned long index, flags; 1006 bool pending_disable, pending_enable, deregister, destroyed, banned; 1007 1008 xa_lock_irqsave(&guc->context_lookup, flags); 1009 xa_for_each(&guc->context_lookup, index, ce) { 1010 /* 1011 * Corner case where the ref count on the object is zero but and 1012 * deregister G2H was lost. In this case we don't touch the ref 1013 * count and finish the destroy of the context. 1014 */ 1015 bool do_put = kref_get_unless_zero(&ce->ref); 1016 1017 xa_unlock(&guc->context_lookup); 1018 1019 spin_lock(&ce->guc_state.lock); 1020 1021 /* 1022 * Once we are at this point submission_disabled() is guaranteed 1023 * to be visible to all callers who set the below flags (see above 1024 * flush and flushes in reset_prepare). If submission_disabled() 1025 * is set, the caller shouldn't set these flags. 1026 */ 1027 1028 destroyed = context_destroyed(ce); 1029 pending_enable = context_pending_enable(ce); 1030 pending_disable = context_pending_disable(ce); 1031 deregister = context_wait_for_deregister_to_register(ce); 1032 banned = context_banned(ce); 1033 init_sched_state(ce); 1034 1035 spin_unlock(&ce->guc_state.lock); 1036 1037 if (pending_enable || destroyed || deregister) { 1038 decr_outstanding_submission_g2h(guc); 1039 if (deregister) 1040 guc_signal_context_fence(ce); 1041 if (destroyed) { 1042 intel_gt_pm_put_async(guc_to_gt(guc)); 1043 release_guc_id(guc, ce); 1044 __guc_context_destroy(ce); 1045 } 1046 if (pending_enable || deregister) 1047 intel_context_put(ce); 1048 } 1049 1050 /* Not mutualy exclusive with above if statement. */ 1051 if (pending_disable) { 1052 guc_signal_context_fence(ce); 1053 if (banned) { 1054 guc_cancel_context_requests(ce); 1055 intel_engine_signal_breadcrumbs(ce->engine); 1056 } 1057 intel_context_sched_disable_unpin(ce); 1058 decr_outstanding_submission_g2h(guc); 1059 1060 spin_lock(&ce->guc_state.lock); 1061 guc_blocked_fence_complete(ce); 1062 spin_unlock(&ce->guc_state.lock); 1063 1064 intel_context_put(ce); 1065 } 1066 1067 if (do_put) 1068 intel_context_put(ce); 1069 xa_lock(&guc->context_lookup); 1070 } 1071 xa_unlock_irqrestore(&guc->context_lookup, flags); 1072 } 1073 1074 /* 1075 * GuC stores busyness stats for each engine at context in/out boundaries. A 1076 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1077 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1078 * GuC. 1079 * 1080 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1081 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1082 * active. For an active engine total busyness = total + (now - start), where 1083 * 'now' is the time at which the busyness is sampled. For inactive engine, 1084 * total busyness = total. 1085 * 1086 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1087 * 1088 * The start and total values provided by GuC are 32 bits and wrap around in a 1089 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1090 * increasing ns values, there is a need for this implementation to account for 1091 * overflows and extend the GuC provided values to 64 bits before returning 1092 * busyness to the user. In order to do that, a worker runs periodically at 1093 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1094 * 27 seconds for a gt clock frequency of 19.2 MHz). 1095 */ 1096 1097 #define WRAP_TIME_CLKS U32_MAX 1098 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1099 1100 static void 1101 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1102 { 1103 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1104 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1105 1106 if (new_start == lower_32_bits(*prev_start)) 1107 return; 1108 1109 /* 1110 * When gt is unparked, we update the gt timestamp and start the ping 1111 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1112 * is unparked, all switched in contexts will have a start time that is 1113 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1114 * 1115 * If neither gt_stamp nor new_start has rolled over, then the 1116 * gt_stamp_hi does not need to be adjusted, however if one of them has 1117 * rolled over, we need to adjust gt_stamp_hi accordingly. 1118 * 1119 * The below conditions address the cases of new_start rollover and 1120 * gt_stamp_last rollover respectively. 1121 */ 1122 if (new_start < gt_stamp_last && 1123 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1124 gt_stamp_hi++; 1125 1126 if (new_start > gt_stamp_last && 1127 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1128 gt_stamp_hi--; 1129 1130 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1131 } 1132 1133 #define record_read(map_, field_) \ 1134 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1135 1136 /* 1137 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1138 * we run into a race where the value read is inconsistent. Sometimes the 1139 * inconsistency is in reading the upper MSB bytes of the last_in value when 1140 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1141 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1142 * determine validity of these values. Instead we read the values multiple times 1143 * until they are consistent. In test runs, 3 attempts results in consistent 1144 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1145 * any new occurences. 1146 */ 1147 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1148 u32 *last_in, u32 *id, u32 *total) 1149 { 1150 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1151 int i = 0; 1152 1153 do { 1154 *last_in = record_read(&rec_map, last_switch_in_stamp); 1155 *id = record_read(&rec_map, current_context_index); 1156 *total = record_read(&rec_map, total_runtime); 1157 1158 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1159 record_read(&rec_map, current_context_index) == *id && 1160 record_read(&rec_map, total_runtime) == *total) 1161 break; 1162 } while (++i < 6); 1163 } 1164 1165 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1166 { 1167 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1168 struct intel_guc *guc = &engine->gt->uc.guc; 1169 u32 last_switch, ctx_id, total; 1170 1171 lockdep_assert_held(&guc->timestamp.lock); 1172 1173 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1174 1175 stats->running = ctx_id != ~0U && last_switch; 1176 if (stats->running) 1177 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1178 1179 /* 1180 * Instead of adjusting the total for overflow, just add the 1181 * difference from previous sample stats->total_gt_clks 1182 */ 1183 if (total && total != ~0U) { 1184 stats->total_gt_clks += (u32)(total - stats->prev_total); 1185 stats->prev_total = total; 1186 } 1187 } 1188 1189 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1190 { 1191 intel_wakeref_t wakeref; 1192 u32 reg, shift; 1193 1194 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1195 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1196 1197 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1198 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1199 1200 return 3 - shift; 1201 } 1202 1203 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1204 { 1205 struct intel_gt *gt = guc_to_gt(guc); 1206 u32 gt_stamp_lo, gt_stamp_hi; 1207 u64 gpm_ts; 1208 1209 lockdep_assert_held(&guc->timestamp.lock); 1210 1211 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1212 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1213 MISC_STATUS1) >> guc->timestamp.shift; 1214 gt_stamp_lo = lower_32_bits(gpm_ts); 1215 *now = ktime_get(); 1216 1217 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1218 gt_stamp_hi++; 1219 1220 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1221 } 1222 1223 /* 1224 * Unlike the execlist mode of submission total and active times are in terms of 1225 * gt clocks. The *now parameter is retained to return the cpu time at which the 1226 * busyness was sampled. 1227 */ 1228 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1229 { 1230 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1231 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1232 struct intel_gt *gt = engine->gt; 1233 struct intel_guc *guc = >->uc.guc; 1234 u64 total, gt_stamp_saved; 1235 unsigned long flags; 1236 u32 reset_count; 1237 bool in_reset; 1238 1239 spin_lock_irqsave(&guc->timestamp.lock, flags); 1240 1241 /* 1242 * If a reset happened, we risk reading partially updated engine 1243 * busyness from GuC, so we just use the driver stored copy of busyness. 1244 * Synchronize with gt reset using reset_count and the 1245 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1246 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1247 * usable by checking the flag afterwards. 1248 */ 1249 reset_count = i915_reset_count(gpu_error); 1250 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1251 1252 *now = ktime_get(); 1253 1254 /* 1255 * The active busyness depends on start_gt_clk and gt_stamp. 1256 * gt_stamp is updated by i915 only when gt is awake and the 1257 * start_gt_clk is derived from GuC state. To get a consistent 1258 * view of activity, we query the GuC state only if gt is awake. 1259 */ 1260 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1261 stats_saved = *stats; 1262 gt_stamp_saved = guc->timestamp.gt_stamp; 1263 /* 1264 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1265 * start_gt_clk' calculation below for active engines. 1266 */ 1267 guc_update_engine_gt_clks(engine); 1268 guc_update_pm_timestamp(guc, now); 1269 intel_gt_pm_put_async(gt); 1270 if (i915_reset_count(gpu_error) != reset_count) { 1271 *stats = stats_saved; 1272 guc->timestamp.gt_stamp = gt_stamp_saved; 1273 } 1274 } 1275 1276 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1277 if (stats->running) { 1278 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1279 1280 total += intel_gt_clock_interval_to_ns(gt, clk); 1281 } 1282 1283 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1284 1285 return ns_to_ktime(total); 1286 } 1287 1288 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1289 { 1290 struct intel_gt *gt = guc_to_gt(guc); 1291 struct intel_engine_cs *engine; 1292 enum intel_engine_id id; 1293 unsigned long flags; 1294 ktime_t unused; 1295 1296 cancel_delayed_work_sync(&guc->timestamp.work); 1297 1298 spin_lock_irqsave(&guc->timestamp.lock, flags); 1299 1300 guc_update_pm_timestamp(guc, &unused); 1301 for_each_engine(engine, gt, id) { 1302 guc_update_engine_gt_clks(engine); 1303 engine->stats.guc.prev_total = 0; 1304 } 1305 1306 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1307 } 1308 1309 static void __update_guc_busyness_stats(struct intel_guc *guc) 1310 { 1311 struct intel_gt *gt = guc_to_gt(guc); 1312 struct intel_engine_cs *engine; 1313 enum intel_engine_id id; 1314 unsigned long flags; 1315 ktime_t unused; 1316 1317 spin_lock_irqsave(&guc->timestamp.lock, flags); 1318 1319 guc_update_pm_timestamp(guc, &unused); 1320 for_each_engine(engine, gt, id) 1321 guc_update_engine_gt_clks(engine); 1322 1323 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1324 } 1325 1326 static void guc_timestamp_ping(struct work_struct *wrk) 1327 { 1328 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1329 timestamp.work.work); 1330 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1331 struct intel_gt *gt = guc_to_gt(guc); 1332 intel_wakeref_t wakeref; 1333 int srcu, ret; 1334 1335 /* 1336 * Synchronize with gt reset to make sure the worker does not 1337 * corrupt the engine/guc stats. 1338 */ 1339 ret = intel_gt_reset_trylock(gt, &srcu); 1340 if (ret) 1341 return; 1342 1343 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1344 __update_guc_busyness_stats(guc); 1345 1346 intel_gt_reset_unlock(gt, srcu); 1347 1348 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1349 guc->timestamp.ping_delay); 1350 } 1351 1352 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1353 { 1354 u32 offset = intel_guc_engine_usage_offset(guc); 1355 u32 action[] = { 1356 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1357 offset, 1358 0, 1359 }; 1360 1361 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1362 } 1363 1364 static void guc_init_engine_stats(struct intel_guc *guc) 1365 { 1366 struct intel_gt *gt = guc_to_gt(guc); 1367 intel_wakeref_t wakeref; 1368 1369 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1370 guc->timestamp.ping_delay); 1371 1372 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1373 int ret = guc_action_enable_usage_stats(guc); 1374 1375 if (ret) 1376 drm_err(>->i915->drm, 1377 "Failed to enable usage stats: %d!\n", ret); 1378 } 1379 } 1380 1381 void intel_guc_busyness_park(struct intel_gt *gt) 1382 { 1383 struct intel_guc *guc = >->uc.guc; 1384 1385 if (!guc_submission_initialized(guc)) 1386 return; 1387 1388 cancel_delayed_work(&guc->timestamp.work); 1389 __update_guc_busyness_stats(guc); 1390 } 1391 1392 void intel_guc_busyness_unpark(struct intel_gt *gt) 1393 { 1394 struct intel_guc *guc = >->uc.guc; 1395 unsigned long flags; 1396 ktime_t unused; 1397 1398 if (!guc_submission_initialized(guc)) 1399 return; 1400 1401 spin_lock_irqsave(&guc->timestamp.lock, flags); 1402 guc_update_pm_timestamp(guc, &unused); 1403 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1404 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1405 guc->timestamp.ping_delay); 1406 } 1407 1408 static inline bool 1409 submission_disabled(struct intel_guc *guc) 1410 { 1411 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1412 1413 return unlikely(!sched_engine || 1414 !__tasklet_is_enabled(&sched_engine->tasklet) || 1415 intel_gt_is_wedged(guc_to_gt(guc))); 1416 } 1417 1418 static void disable_submission(struct intel_guc *guc) 1419 { 1420 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1421 1422 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1423 GEM_BUG_ON(!guc->ct.enabled); 1424 __tasklet_disable_sync_once(&sched_engine->tasklet); 1425 sched_engine->tasklet.callback = NULL; 1426 } 1427 } 1428 1429 static void enable_submission(struct intel_guc *guc) 1430 { 1431 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1432 unsigned long flags; 1433 1434 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1435 sched_engine->tasklet.callback = guc_submission_tasklet; 1436 wmb(); /* Make sure callback visible */ 1437 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1438 __tasklet_enable(&sched_engine->tasklet)) { 1439 GEM_BUG_ON(!guc->ct.enabled); 1440 1441 /* And kick in case we missed a new request submission. */ 1442 tasklet_hi_schedule(&sched_engine->tasklet); 1443 } 1444 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1445 } 1446 1447 static void guc_flush_submissions(struct intel_guc *guc) 1448 { 1449 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1450 unsigned long flags; 1451 1452 spin_lock_irqsave(&sched_engine->lock, flags); 1453 spin_unlock_irqrestore(&sched_engine->lock, flags); 1454 } 1455 1456 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1457 1458 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1459 { 1460 if (unlikely(!guc_submission_initialized(guc))) { 1461 /* Reset called during driver load? GuC not yet initialised! */ 1462 return; 1463 } 1464 1465 intel_gt_park_heartbeats(guc_to_gt(guc)); 1466 disable_submission(guc); 1467 guc->interrupts.disable(guc); 1468 __reset_guc_busyness_stats(guc); 1469 1470 /* Flush IRQ handler */ 1471 spin_lock_irq(&guc_to_gt(guc)->irq_lock); 1472 spin_unlock_irq(&guc_to_gt(guc)->irq_lock); 1473 1474 guc_flush_submissions(guc); 1475 guc_flush_destroyed_contexts(guc); 1476 flush_work(&guc->ct.requests.worker); 1477 1478 scrub_guc_desc_for_outstanding_g2h(guc); 1479 } 1480 1481 static struct intel_engine_cs * 1482 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1483 { 1484 struct intel_engine_cs *engine; 1485 intel_engine_mask_t tmp, mask = ve->mask; 1486 unsigned int num_siblings = 0; 1487 1488 for_each_engine_masked(engine, ve->gt, mask, tmp) 1489 if (num_siblings++ == sibling) 1490 return engine; 1491 1492 return NULL; 1493 } 1494 1495 static inline struct intel_engine_cs * 1496 __context_to_physical_engine(struct intel_context *ce) 1497 { 1498 struct intel_engine_cs *engine = ce->engine; 1499 1500 if (intel_engine_is_virtual(engine)) 1501 engine = guc_virtual_get_sibling(engine, 0); 1502 1503 return engine; 1504 } 1505 1506 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1507 { 1508 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1509 1510 if (intel_context_is_banned(ce)) 1511 return; 1512 1513 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1514 1515 /* 1516 * We want a simple context + ring to execute the breadcrumb update. 1517 * We cannot rely on the context being intact across the GPU hang, 1518 * so clear it and rebuild just what we need for the breadcrumb. 1519 * All pending requests for this context will be zapped, and any 1520 * future request will be after userspace has had the opportunity 1521 * to recreate its own state. 1522 */ 1523 if (scrub) 1524 lrc_init_regs(ce, engine, true); 1525 1526 /* Rerun the request; its payload has been neutered (if guilty). */ 1527 lrc_update_regs(ce, engine, head); 1528 } 1529 1530 static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) 1531 { 1532 static const i915_reg_t _reg[I915_NUM_ENGINES] = { 1533 [RCS0] = MSG_IDLE_CS, 1534 [BCS0] = MSG_IDLE_BCS, 1535 [VCS0] = MSG_IDLE_VCS0, 1536 [VCS1] = MSG_IDLE_VCS1, 1537 [VCS2] = MSG_IDLE_VCS2, 1538 [VCS3] = MSG_IDLE_VCS3, 1539 [VCS4] = MSG_IDLE_VCS4, 1540 [VCS5] = MSG_IDLE_VCS5, 1541 [VCS6] = MSG_IDLE_VCS6, 1542 [VCS7] = MSG_IDLE_VCS7, 1543 [VECS0] = MSG_IDLE_VECS0, 1544 [VECS1] = MSG_IDLE_VECS1, 1545 [VECS2] = MSG_IDLE_VECS2, 1546 [VECS3] = MSG_IDLE_VECS3, 1547 [CCS0] = MSG_IDLE_CS, 1548 [CCS1] = MSG_IDLE_CS, 1549 [CCS2] = MSG_IDLE_CS, 1550 [CCS3] = MSG_IDLE_CS, 1551 }; 1552 u32 val; 1553 1554 if (!_reg[engine->id].reg) 1555 return 0; 1556 1557 val = intel_uncore_read(engine->uncore, _reg[engine->id]); 1558 1559 /* bits[29:25] & bits[13:9] >> shift */ 1560 return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; 1561 } 1562 1563 static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) 1564 { 1565 int ret; 1566 1567 /* Ensure GPM receives fw up/down after CS is stopped */ 1568 udelay(1); 1569 1570 /* Wait for forcewake request to complete in GPM */ 1571 ret = __intel_wait_for_register_fw(gt->uncore, 1572 GEN9_PWRGT_DOMAIN_STATUS, 1573 fw_mask, fw_mask, 5000, 0, NULL); 1574 1575 /* Ensure CS receives fw ack from GPM */ 1576 udelay(1); 1577 1578 if (ret) 1579 GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); 1580 } 1581 1582 /* 1583 * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any 1584 * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The 1585 * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the 1586 * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we 1587 * are concerned only with the gt reset here, we use a logical OR of pending 1588 * forcewakeups from all reset domains and then wait for them to complete by 1589 * querying PWRGT_DOMAIN_STATUS. 1590 */ 1591 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1592 { 1593 u32 fw_pending; 1594 1595 if (GRAPHICS_VER(engine->i915) != 12) 1596 return; 1597 1598 /* 1599 * Wa_22011802037 1600 * TODO: Occasionally trying to stop the cs times out, but does not 1601 * adversely affect functionality. The timeout is set as a config 1602 * parameter that defaults to 100ms. Assuming that this timeout is 1603 * sufficient for any pending MI_FORCEWAKEs to complete, ignore the 1604 * timeout returned here until it is root caused. 1605 */ 1606 intel_engine_stop_cs(engine); 1607 1608 fw_pending = __cs_pending_mi_force_wakes(engine); 1609 if (fw_pending) 1610 __gpm_wait_for_fw_complete(engine->gt, fw_pending); 1611 } 1612 1613 static void guc_reset_nop(struct intel_engine_cs *engine) 1614 { 1615 } 1616 1617 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1618 { 1619 } 1620 1621 static void 1622 __unwind_incomplete_requests(struct intel_context *ce) 1623 { 1624 struct i915_request *rq, *rn; 1625 struct list_head *pl; 1626 int prio = I915_PRIORITY_INVALID; 1627 struct i915_sched_engine * const sched_engine = 1628 ce->engine->sched_engine; 1629 unsigned long flags; 1630 1631 spin_lock_irqsave(&sched_engine->lock, flags); 1632 spin_lock(&ce->guc_state.lock); 1633 list_for_each_entry_safe_reverse(rq, rn, 1634 &ce->guc_state.requests, 1635 sched.link) { 1636 if (i915_request_completed(rq)) 1637 continue; 1638 1639 list_del_init(&rq->sched.link); 1640 __i915_request_unsubmit(rq); 1641 1642 /* Push the request back into the queue for later resubmission. */ 1643 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1644 if (rq_prio(rq) != prio) { 1645 prio = rq_prio(rq); 1646 pl = i915_sched_lookup_priolist(sched_engine, prio); 1647 } 1648 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1649 1650 list_add(&rq->sched.link, pl); 1651 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1652 } 1653 spin_unlock(&ce->guc_state.lock); 1654 spin_unlock_irqrestore(&sched_engine->lock, flags); 1655 } 1656 1657 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1658 { 1659 bool guilty; 1660 struct i915_request *rq; 1661 unsigned long flags; 1662 u32 head; 1663 int i, number_children = ce->parallel.number_children; 1664 struct intel_context *parent = ce; 1665 1666 GEM_BUG_ON(intel_context_is_child(ce)); 1667 1668 intel_context_get(ce); 1669 1670 /* 1671 * GuC will implicitly mark the context as non-schedulable when it sends 1672 * the reset notification. Make sure our state reflects this change. The 1673 * context will be marked enabled on resubmission. 1674 */ 1675 spin_lock_irqsave(&ce->guc_state.lock, flags); 1676 clr_context_enabled(ce); 1677 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1678 1679 /* 1680 * For each context in the relationship find the hanging request 1681 * resetting each context / request as needed 1682 */ 1683 for (i = 0; i < number_children + 1; ++i) { 1684 if (!intel_context_is_pinned(ce)) 1685 goto next_context; 1686 1687 guilty = false; 1688 rq = intel_context_find_active_request(ce); 1689 if (!rq) { 1690 head = ce->ring->tail; 1691 goto out_replay; 1692 } 1693 1694 if (i915_request_started(rq)) 1695 guilty = stalled & ce->engine->mask; 1696 1697 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1698 head = intel_ring_wrap(ce->ring, rq->head); 1699 1700 __i915_request_reset(rq, guilty); 1701 out_replay: 1702 guc_reset_state(ce, head, guilty); 1703 next_context: 1704 if (i != number_children) 1705 ce = list_next_entry(ce, parallel.child_link); 1706 } 1707 1708 __unwind_incomplete_requests(parent); 1709 intel_context_put(parent); 1710 } 1711 1712 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1713 { 1714 struct intel_context *ce; 1715 unsigned long index; 1716 unsigned long flags; 1717 1718 if (unlikely(!guc_submission_initialized(guc))) { 1719 /* Reset called during driver load? GuC not yet initialised! */ 1720 return; 1721 } 1722 1723 xa_lock_irqsave(&guc->context_lookup, flags); 1724 xa_for_each(&guc->context_lookup, index, ce) { 1725 if (!kref_get_unless_zero(&ce->ref)) 1726 continue; 1727 1728 xa_unlock(&guc->context_lookup); 1729 1730 if (intel_context_is_pinned(ce) && 1731 !intel_context_is_child(ce)) 1732 __guc_reset_context(ce, stalled); 1733 1734 intel_context_put(ce); 1735 1736 xa_lock(&guc->context_lookup); 1737 } 1738 xa_unlock_irqrestore(&guc->context_lookup, flags); 1739 1740 /* GuC is blown away, drop all references to contexts */ 1741 xa_destroy(&guc->context_lookup); 1742 } 1743 1744 static void guc_cancel_context_requests(struct intel_context *ce) 1745 { 1746 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1747 struct i915_request *rq; 1748 unsigned long flags; 1749 1750 /* Mark all executing requests as skipped. */ 1751 spin_lock_irqsave(&sched_engine->lock, flags); 1752 spin_lock(&ce->guc_state.lock); 1753 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1754 i915_request_put(i915_request_mark_eio(rq)); 1755 spin_unlock(&ce->guc_state.lock); 1756 spin_unlock_irqrestore(&sched_engine->lock, flags); 1757 } 1758 1759 static void 1760 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1761 { 1762 struct i915_request *rq, *rn; 1763 struct rb_node *rb; 1764 unsigned long flags; 1765 1766 /* Can be called during boot if GuC fails to load */ 1767 if (!sched_engine) 1768 return; 1769 1770 /* 1771 * Before we call engine->cancel_requests(), we should have exclusive 1772 * access to the submission state. This is arranged for us by the 1773 * caller disabling the interrupt generation, the tasklet and other 1774 * threads that may then access the same state, giving us a free hand 1775 * to reset state. However, we still need to let lockdep be aware that 1776 * we know this state may be accessed in hardirq context, so we 1777 * disable the irq around this manipulation and we want to keep 1778 * the spinlock focused on its duties and not accidentally conflate 1779 * coverage to the submission's irq state. (Similarly, although we 1780 * shouldn't need to disable irq around the manipulation of the 1781 * submission's irq state, we also wish to remind ourselves that 1782 * it is irq state.) 1783 */ 1784 spin_lock_irqsave(&sched_engine->lock, flags); 1785 1786 /* Flush the queued requests to the timeline list (for retiring). */ 1787 while ((rb = rb_first_cached(&sched_engine->queue))) { 1788 struct i915_priolist *p = to_priolist(rb); 1789 1790 priolist_for_each_request_consume(rq, rn, p) { 1791 list_del_init(&rq->sched.link); 1792 1793 __i915_request_submit(rq); 1794 1795 i915_request_put(i915_request_mark_eio(rq)); 1796 } 1797 1798 rb_erase_cached(&p->node, &sched_engine->queue); 1799 i915_priolist_free(p); 1800 } 1801 1802 /* Remaining _unready_ requests will be nop'ed when submitted */ 1803 1804 sched_engine->queue_priority_hint = INT_MIN; 1805 sched_engine->queue = RB_ROOT_CACHED; 1806 1807 spin_unlock_irqrestore(&sched_engine->lock, flags); 1808 } 1809 1810 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1811 { 1812 struct intel_context *ce; 1813 unsigned long index; 1814 unsigned long flags; 1815 1816 xa_lock_irqsave(&guc->context_lookup, flags); 1817 xa_for_each(&guc->context_lookup, index, ce) { 1818 if (!kref_get_unless_zero(&ce->ref)) 1819 continue; 1820 1821 xa_unlock(&guc->context_lookup); 1822 1823 if (intel_context_is_pinned(ce) && 1824 !intel_context_is_child(ce)) 1825 guc_cancel_context_requests(ce); 1826 1827 intel_context_put(ce); 1828 1829 xa_lock(&guc->context_lookup); 1830 } 1831 xa_unlock_irqrestore(&guc->context_lookup, flags); 1832 1833 guc_cancel_sched_engine_requests(guc->sched_engine); 1834 1835 /* GuC is blown away, drop all references to contexts */ 1836 xa_destroy(&guc->context_lookup); 1837 } 1838 1839 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1840 { 1841 /* Reset called during driver load or during wedge? */ 1842 if (unlikely(!guc_submission_initialized(guc) || 1843 intel_gt_is_wedged(guc_to_gt(guc)))) { 1844 return; 1845 } 1846 1847 /* 1848 * Technically possible for either of these values to be non-zero here, 1849 * but very unlikely + harmless. Regardless let's add a warn so we can 1850 * see in CI if this happens frequently / a precursor to taking down the 1851 * machine. 1852 */ 1853 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1854 atomic_set(&guc->outstanding_submission_g2h, 0); 1855 1856 intel_guc_global_policies_update(guc); 1857 enable_submission(guc); 1858 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1859 } 1860 1861 static void destroyed_worker_func(struct work_struct *w); 1862 static void reset_fail_worker_func(struct work_struct *w); 1863 1864 /* 1865 * Set up the memory resources to be shared with the GuC (via the GGTT) 1866 * at firmware loading time. 1867 */ 1868 int intel_guc_submission_init(struct intel_guc *guc) 1869 { 1870 struct intel_gt *gt = guc_to_gt(guc); 1871 1872 if (guc->submission_initialized) 1873 return 0; 1874 1875 guc->submission_state.guc_ids_bitmap = 1876 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1877 if (!guc->submission_state.guc_ids_bitmap) 1878 return -ENOMEM; 1879 1880 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1881 guc->timestamp.shift = gpm_timestamp_shift(gt); 1882 guc->submission_initialized = true; 1883 1884 return 0; 1885 } 1886 1887 void intel_guc_submission_fini(struct intel_guc *guc) 1888 { 1889 if (!guc->submission_initialized) 1890 return; 1891 1892 guc_flush_destroyed_contexts(guc); 1893 i915_sched_engine_put(guc->sched_engine); 1894 bitmap_free(guc->submission_state.guc_ids_bitmap); 1895 guc->submission_initialized = false; 1896 } 1897 1898 static inline void queue_request(struct i915_sched_engine *sched_engine, 1899 struct i915_request *rq, 1900 int prio) 1901 { 1902 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1903 list_add_tail(&rq->sched.link, 1904 i915_sched_lookup_priolist(sched_engine, prio)); 1905 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1906 tasklet_hi_schedule(&sched_engine->tasklet); 1907 } 1908 1909 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1910 struct i915_request *rq) 1911 { 1912 int ret = 0; 1913 1914 __i915_request_submit(rq); 1915 1916 trace_i915_request_in(rq, 0); 1917 1918 if (is_multi_lrc_rq(rq)) { 1919 if (multi_lrc_submit(rq)) { 1920 ret = guc_wq_item_append(guc, rq); 1921 if (!ret) 1922 ret = guc_add_request(guc, rq); 1923 } 1924 } else { 1925 guc_set_lrc_tail(rq); 1926 ret = guc_add_request(guc, rq); 1927 } 1928 1929 if (unlikely(ret == -EPIPE)) 1930 disable_submission(guc); 1931 1932 return ret; 1933 } 1934 1935 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1936 { 1937 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1938 struct intel_context *ce = request_to_scheduling_context(rq); 1939 1940 return submission_disabled(guc) || guc->stalled_request || 1941 !i915_sched_engine_is_empty(sched_engine) || 1942 !ctx_id_mapped(guc, ce->guc_id.id); 1943 } 1944 1945 static void guc_submit_request(struct i915_request *rq) 1946 { 1947 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1948 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1949 unsigned long flags; 1950 1951 /* Will be called from irq-context when using foreign fences. */ 1952 spin_lock_irqsave(&sched_engine->lock, flags); 1953 1954 if (need_tasklet(guc, rq)) 1955 queue_request(sched_engine, rq, rq_prio(rq)); 1956 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1957 tasklet_hi_schedule(&sched_engine->tasklet); 1958 1959 spin_unlock_irqrestore(&sched_engine->lock, flags); 1960 } 1961 1962 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1963 { 1964 int ret; 1965 1966 GEM_BUG_ON(intel_context_is_child(ce)); 1967 1968 if (intel_context_is_parent(ce)) 1969 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1970 NUMBER_MULTI_LRC_GUC_ID(guc), 1971 order_base_2(ce->parallel.number_children 1972 + 1)); 1973 else 1974 ret = ida_simple_get(&guc->submission_state.guc_ids, 1975 NUMBER_MULTI_LRC_GUC_ID(guc), 1976 guc->submission_state.num_guc_ids, 1977 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 1978 __GFP_NOWARN); 1979 if (unlikely(ret < 0)) 1980 return ret; 1981 1982 ce->guc_id.id = ret; 1983 return 0; 1984 } 1985 1986 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1987 { 1988 GEM_BUG_ON(intel_context_is_child(ce)); 1989 1990 if (!context_guc_id_invalid(ce)) { 1991 if (intel_context_is_parent(ce)) 1992 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 1993 ce->guc_id.id, 1994 order_base_2(ce->parallel.number_children 1995 + 1)); 1996 else 1997 ida_simple_remove(&guc->submission_state.guc_ids, 1998 ce->guc_id.id); 1999 clr_ctx_id_mapping(guc, ce->guc_id.id); 2000 set_context_guc_id_invalid(ce); 2001 } 2002 if (!list_empty(&ce->guc_id.link)) 2003 list_del_init(&ce->guc_id.link); 2004 } 2005 2006 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2007 { 2008 unsigned long flags; 2009 2010 spin_lock_irqsave(&guc->submission_state.lock, flags); 2011 __release_guc_id(guc, ce); 2012 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2013 } 2014 2015 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2016 { 2017 struct intel_context *cn; 2018 2019 lockdep_assert_held(&guc->submission_state.lock); 2020 GEM_BUG_ON(intel_context_is_child(ce)); 2021 GEM_BUG_ON(intel_context_is_parent(ce)); 2022 2023 if (!list_empty(&guc->submission_state.guc_id_list)) { 2024 cn = list_first_entry(&guc->submission_state.guc_id_list, 2025 struct intel_context, 2026 guc_id.link); 2027 2028 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2029 GEM_BUG_ON(context_guc_id_invalid(cn)); 2030 GEM_BUG_ON(intel_context_is_child(cn)); 2031 GEM_BUG_ON(intel_context_is_parent(cn)); 2032 2033 list_del_init(&cn->guc_id.link); 2034 ce->guc_id.id = cn->guc_id.id; 2035 2036 spin_lock(&cn->guc_state.lock); 2037 clr_context_registered(cn); 2038 spin_unlock(&cn->guc_state.lock); 2039 2040 set_context_guc_id_invalid(cn); 2041 2042 #ifdef CONFIG_DRM_I915_SELFTEST 2043 guc->number_guc_id_stolen++; 2044 #endif 2045 2046 return 0; 2047 } else { 2048 return -EAGAIN; 2049 } 2050 } 2051 2052 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2053 { 2054 int ret; 2055 2056 lockdep_assert_held(&guc->submission_state.lock); 2057 GEM_BUG_ON(intel_context_is_child(ce)); 2058 2059 ret = new_guc_id(guc, ce); 2060 if (unlikely(ret < 0)) { 2061 if (intel_context_is_parent(ce)) 2062 return -ENOSPC; 2063 2064 ret = steal_guc_id(guc, ce); 2065 if (ret < 0) 2066 return ret; 2067 } 2068 2069 if (intel_context_is_parent(ce)) { 2070 struct intel_context *child; 2071 int i = 1; 2072 2073 for_each_child(ce, child) 2074 child->guc_id.id = ce->guc_id.id + i++; 2075 } 2076 2077 return 0; 2078 } 2079 2080 #define PIN_GUC_ID_TRIES 4 2081 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2082 { 2083 int ret = 0; 2084 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2085 2086 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2087 2088 try_again: 2089 spin_lock_irqsave(&guc->submission_state.lock, flags); 2090 2091 might_lock(&ce->guc_state.lock); 2092 2093 if (context_guc_id_invalid(ce)) { 2094 ret = assign_guc_id(guc, ce); 2095 if (ret) 2096 goto out_unlock; 2097 ret = 1; /* Indidcates newly assigned guc_id */ 2098 } 2099 if (!list_empty(&ce->guc_id.link)) 2100 list_del_init(&ce->guc_id.link); 2101 atomic_inc(&ce->guc_id.ref); 2102 2103 out_unlock: 2104 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2105 2106 /* 2107 * -EAGAIN indicates no guc_id are available, let's retire any 2108 * outstanding requests to see if that frees up a guc_id. If the first 2109 * retire didn't help, insert a sleep with the timeslice duration before 2110 * attempting to retire more requests. Double the sleep period each 2111 * subsequent pass before finally giving up. The sleep period has max of 2112 * 100ms and minimum of 1ms. 2113 */ 2114 if (ret == -EAGAIN && --tries) { 2115 if (PIN_GUC_ID_TRIES - tries > 1) { 2116 unsigned int timeslice_shifted = 2117 ce->engine->props.timeslice_duration_ms << 2118 (PIN_GUC_ID_TRIES - tries - 2); 2119 unsigned int max = min_t(unsigned int, 100, 2120 timeslice_shifted); 2121 2122 msleep(max_t(unsigned int, max, 1)); 2123 } 2124 intel_gt_retire_requests(guc_to_gt(guc)); 2125 goto try_again; 2126 } 2127 2128 return ret; 2129 } 2130 2131 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2132 { 2133 unsigned long flags; 2134 2135 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2136 GEM_BUG_ON(intel_context_is_child(ce)); 2137 2138 if (unlikely(context_guc_id_invalid(ce) || 2139 intel_context_is_parent(ce))) 2140 return; 2141 2142 spin_lock_irqsave(&guc->submission_state.lock, flags); 2143 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2144 !atomic_read(&ce->guc_id.ref)) 2145 list_add_tail(&ce->guc_id.link, 2146 &guc->submission_state.guc_id_list); 2147 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2148 } 2149 2150 static int __guc_action_register_multi_lrc(struct intel_guc *guc, 2151 struct intel_context *ce, 2152 struct guc_ctxt_registration_info *info, 2153 bool loop) 2154 { 2155 struct intel_context *child; 2156 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2157 int len = 0; 2158 u32 next_id; 2159 2160 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2161 2162 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2163 action[len++] = info->flags; 2164 action[len++] = info->context_idx; 2165 action[len++] = info->engine_class; 2166 action[len++] = info->engine_submit_mask; 2167 action[len++] = info->wq_desc_lo; 2168 action[len++] = info->wq_desc_hi; 2169 action[len++] = info->wq_base_lo; 2170 action[len++] = info->wq_base_hi; 2171 action[len++] = info->wq_size; 2172 action[len++] = ce->parallel.number_children + 1; 2173 action[len++] = info->hwlrca_lo; 2174 action[len++] = info->hwlrca_hi; 2175 2176 next_id = info->context_idx + 1; 2177 for_each_child(ce, child) { 2178 GEM_BUG_ON(next_id++ != child->guc_id.id); 2179 2180 /* 2181 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2182 * only supports 32 bit currently. 2183 */ 2184 action[len++] = lower_32_bits(child->lrc.lrca); 2185 action[len++] = upper_32_bits(child->lrc.lrca); 2186 } 2187 2188 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2189 2190 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2191 } 2192 2193 static int __guc_action_register_context(struct intel_guc *guc, 2194 struct guc_ctxt_registration_info *info, 2195 bool loop) 2196 { 2197 u32 action[] = { 2198 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2199 info->flags, 2200 info->context_idx, 2201 info->engine_class, 2202 info->engine_submit_mask, 2203 info->wq_desc_lo, 2204 info->wq_desc_hi, 2205 info->wq_base_lo, 2206 info->wq_base_hi, 2207 info->wq_size, 2208 info->hwlrca_lo, 2209 info->hwlrca_hi, 2210 }; 2211 2212 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2213 0, loop); 2214 } 2215 2216 static void prepare_context_registration_info(struct intel_context *ce, 2217 struct guc_ctxt_registration_info *info); 2218 2219 static int register_context(struct intel_context *ce, bool loop) 2220 { 2221 struct guc_ctxt_registration_info info; 2222 struct intel_guc *guc = ce_to_guc(ce); 2223 int ret; 2224 2225 GEM_BUG_ON(intel_context_is_child(ce)); 2226 trace_intel_context_register(ce); 2227 2228 prepare_context_registration_info(ce, &info); 2229 2230 if (intel_context_is_parent(ce)) 2231 ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); 2232 else 2233 ret = __guc_action_register_context(guc, &info, loop); 2234 if (likely(!ret)) { 2235 unsigned long flags; 2236 2237 spin_lock_irqsave(&ce->guc_state.lock, flags); 2238 set_context_registered(ce); 2239 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2240 2241 guc_context_policy_init(ce, loop); 2242 } 2243 2244 return ret; 2245 } 2246 2247 static int __guc_action_deregister_context(struct intel_guc *guc, 2248 u32 guc_id) 2249 { 2250 u32 action[] = { 2251 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2252 guc_id, 2253 }; 2254 2255 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2256 G2H_LEN_DW_DEREGISTER_CONTEXT, 2257 true); 2258 } 2259 2260 static int deregister_context(struct intel_context *ce, u32 guc_id) 2261 { 2262 struct intel_guc *guc = ce_to_guc(ce); 2263 2264 GEM_BUG_ON(intel_context_is_child(ce)); 2265 trace_intel_context_deregister(ce); 2266 2267 return __guc_action_deregister_context(guc, guc_id); 2268 } 2269 2270 static inline void clear_children_join_go_memory(struct intel_context *ce) 2271 { 2272 struct parent_scratch *ps = __get_parent_scratch(ce); 2273 int i; 2274 2275 ps->go.semaphore = 0; 2276 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2277 ps->join[i].semaphore = 0; 2278 } 2279 2280 static inline u32 get_children_go_value(struct intel_context *ce) 2281 { 2282 return __get_parent_scratch(ce)->go.semaphore; 2283 } 2284 2285 static inline u32 get_children_join_value(struct intel_context *ce, 2286 u8 child_index) 2287 { 2288 return __get_parent_scratch(ce)->join[child_index].semaphore; 2289 } 2290 2291 struct context_policy { 2292 u32 count; 2293 struct guc_update_context_policy h2g; 2294 }; 2295 2296 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2297 { 2298 size_t bytes = sizeof(policy->h2g.header) + 2299 (sizeof(policy->h2g.klv[0]) * policy->count); 2300 2301 return bytes / sizeof(u32); 2302 } 2303 2304 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2305 { 2306 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2307 policy->h2g.header.ctx_id = guc_id; 2308 policy->count = 0; 2309 } 2310 2311 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2312 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2313 { \ 2314 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2315 policy->h2g.klv[policy->count].kl = \ 2316 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2317 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2318 policy->h2g.klv[policy->count].value = data; \ 2319 policy->count++; \ 2320 } 2321 2322 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2323 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2324 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2325 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2326 2327 #undef MAKE_CONTEXT_POLICY_ADD 2328 2329 static int __guc_context_set_context_policies(struct intel_guc *guc, 2330 struct context_policy *policy, 2331 bool loop) 2332 { 2333 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2334 __guc_context_policy_action_size(policy), 2335 0, loop); 2336 } 2337 2338 static int guc_context_policy_init(struct intel_context *ce, bool loop) 2339 { 2340 struct intel_engine_cs *engine = ce->engine; 2341 struct intel_guc *guc = &engine->gt->uc.guc; 2342 struct context_policy policy; 2343 u32 execution_quantum; 2344 u32 preemption_timeout; 2345 bool missing = false; 2346 unsigned long flags; 2347 int ret; 2348 2349 /* NB: For both of these, zero means disabled. */ 2350 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2351 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2352 2353 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2354 2355 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2356 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2357 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2358 2359 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2360 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2361 2362 ret = __guc_context_set_context_policies(guc, &policy, loop); 2363 missing = ret != 0; 2364 2365 if (!missing && intel_context_is_parent(ce)) { 2366 struct intel_context *child; 2367 2368 for_each_child(ce, child) { 2369 __guc_context_policy_start_klv(&policy, child->guc_id.id); 2370 2371 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2372 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2373 2374 child->guc_state.prio = ce->guc_state.prio; 2375 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2376 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2377 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2378 2379 ret = __guc_context_set_context_policies(guc, &policy, loop); 2380 if (ret) { 2381 missing = true; 2382 break; 2383 } 2384 } 2385 } 2386 2387 spin_lock_irqsave(&ce->guc_state.lock, flags); 2388 if (missing) 2389 set_context_policy_required(ce); 2390 else 2391 clr_context_policy_required(ce); 2392 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2393 2394 return ret; 2395 } 2396 2397 static void prepare_context_registration_info(struct intel_context *ce, 2398 struct guc_ctxt_registration_info *info) 2399 { 2400 struct intel_engine_cs *engine = ce->engine; 2401 struct intel_guc *guc = &engine->gt->uc.guc; 2402 u32 ctx_id = ce->guc_id.id; 2403 2404 GEM_BUG_ON(!engine->mask); 2405 2406 /* 2407 * Ensure LRC + CT vmas are is same region as write barrier is done 2408 * based on CT vma region. 2409 */ 2410 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2411 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2412 2413 memset(info, 0, sizeof(*info)); 2414 info->context_idx = ctx_id; 2415 info->engine_class = engine_class_to_guc_class(engine->class); 2416 info->engine_submit_mask = engine->logical_mask; 2417 /* 2418 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2419 * only supports 32 bit currently. 2420 */ 2421 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2422 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2423 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2424 2425 /* 2426 * If context is a parent, we need to register a process descriptor 2427 * describing a work queue and register all child contexts. 2428 */ 2429 if (intel_context_is_parent(ce)) { 2430 struct guc_sched_wq_desc *wq_desc; 2431 u64 wq_desc_offset, wq_base_offset; 2432 2433 ce->parallel.guc.wqi_tail = 0; 2434 ce->parallel.guc.wqi_head = 0; 2435 2436 wq_desc_offset = i915_ggtt_offset(ce->state) + 2437 __get_parent_scratch_offset(ce); 2438 wq_base_offset = i915_ggtt_offset(ce->state) + 2439 __get_wq_offset(ce); 2440 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2441 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2442 info->wq_base_lo = lower_32_bits(wq_base_offset); 2443 info->wq_base_hi = upper_32_bits(wq_base_offset); 2444 info->wq_size = WQ_SIZE; 2445 2446 wq_desc = __get_wq_desc(ce); 2447 memset(wq_desc, 0, sizeof(*wq_desc)); 2448 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2449 2450 clear_children_join_go_memory(ce); 2451 } 2452 } 2453 2454 static int try_context_registration(struct intel_context *ce, bool loop) 2455 { 2456 struct intel_engine_cs *engine = ce->engine; 2457 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2458 struct intel_guc *guc = &engine->gt->uc.guc; 2459 intel_wakeref_t wakeref; 2460 u32 ctx_id = ce->guc_id.id; 2461 bool context_registered; 2462 int ret = 0; 2463 2464 GEM_BUG_ON(!sched_state_is_init(ce)); 2465 2466 context_registered = ctx_id_mapped(guc, ctx_id); 2467 2468 clr_ctx_id_mapping(guc, ctx_id); 2469 set_ctx_id_mapping(guc, ctx_id, ce); 2470 2471 /* 2472 * The context_lookup xarray is used to determine if the hardware 2473 * context is currently registered. There are two cases in which it 2474 * could be registered either the guc_id has been stolen from another 2475 * context or the lrc descriptor address of this context has changed. In 2476 * either case the context needs to be deregistered with the GuC before 2477 * registering this context. 2478 */ 2479 if (context_registered) { 2480 bool disabled; 2481 unsigned long flags; 2482 2483 trace_intel_context_steal_guc_id(ce); 2484 GEM_BUG_ON(!loop); 2485 2486 /* Seal race with Reset */ 2487 spin_lock_irqsave(&ce->guc_state.lock, flags); 2488 disabled = submission_disabled(guc); 2489 if (likely(!disabled)) { 2490 set_context_wait_for_deregister_to_register(ce); 2491 intel_context_get(ce); 2492 } 2493 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2494 if (unlikely(disabled)) { 2495 clr_ctx_id_mapping(guc, ctx_id); 2496 return 0; /* Will get registered later */ 2497 } 2498 2499 /* 2500 * If stealing the guc_id, this ce has the same guc_id as the 2501 * context whose guc_id was stolen. 2502 */ 2503 with_intel_runtime_pm(runtime_pm, wakeref) 2504 ret = deregister_context(ce, ce->guc_id.id); 2505 if (unlikely(ret == -ENODEV)) 2506 ret = 0; /* Will get registered later */ 2507 } else { 2508 with_intel_runtime_pm(runtime_pm, wakeref) 2509 ret = register_context(ce, loop); 2510 if (unlikely(ret == -EBUSY)) { 2511 clr_ctx_id_mapping(guc, ctx_id); 2512 } else if (unlikely(ret == -ENODEV)) { 2513 clr_ctx_id_mapping(guc, ctx_id); 2514 ret = 0; /* Will get registered later */ 2515 } 2516 } 2517 2518 return ret; 2519 } 2520 2521 static int __guc_context_pre_pin(struct intel_context *ce, 2522 struct intel_engine_cs *engine, 2523 struct i915_gem_ww_ctx *ww, 2524 void **vaddr) 2525 { 2526 return lrc_pre_pin(ce, engine, ww, vaddr); 2527 } 2528 2529 static int __guc_context_pin(struct intel_context *ce, 2530 struct intel_engine_cs *engine, 2531 void *vaddr) 2532 { 2533 if (i915_ggtt_offset(ce->state) != 2534 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2535 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2536 2537 /* 2538 * GuC context gets pinned in guc_request_alloc. See that function for 2539 * explaination of why. 2540 */ 2541 2542 return lrc_pin(ce, engine, vaddr); 2543 } 2544 2545 static int guc_context_pre_pin(struct intel_context *ce, 2546 struct i915_gem_ww_ctx *ww, 2547 void **vaddr) 2548 { 2549 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2550 } 2551 2552 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2553 { 2554 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2555 2556 if (likely(!ret && !intel_context_is_barrier(ce))) 2557 intel_engine_pm_get(ce->engine); 2558 2559 return ret; 2560 } 2561 2562 static void guc_context_unpin(struct intel_context *ce) 2563 { 2564 struct intel_guc *guc = ce_to_guc(ce); 2565 2566 unpin_guc_id(guc, ce); 2567 lrc_unpin(ce); 2568 2569 if (likely(!intel_context_is_barrier(ce))) 2570 intel_engine_pm_put_async(ce->engine); 2571 } 2572 2573 static void guc_context_post_unpin(struct intel_context *ce) 2574 { 2575 lrc_post_unpin(ce); 2576 } 2577 2578 static void __guc_context_sched_enable(struct intel_guc *guc, 2579 struct intel_context *ce) 2580 { 2581 u32 action[] = { 2582 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2583 ce->guc_id.id, 2584 GUC_CONTEXT_ENABLE 2585 }; 2586 2587 trace_intel_context_sched_enable(ce); 2588 2589 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2590 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2591 } 2592 2593 static void __guc_context_sched_disable(struct intel_guc *guc, 2594 struct intel_context *ce, 2595 u16 guc_id) 2596 { 2597 u32 action[] = { 2598 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2599 guc_id, /* ce->guc_id.id not stable */ 2600 GUC_CONTEXT_DISABLE 2601 }; 2602 2603 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2604 2605 GEM_BUG_ON(intel_context_is_child(ce)); 2606 trace_intel_context_sched_disable(ce); 2607 2608 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2609 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2610 } 2611 2612 static void guc_blocked_fence_complete(struct intel_context *ce) 2613 { 2614 lockdep_assert_held(&ce->guc_state.lock); 2615 2616 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2617 i915_sw_fence_complete(&ce->guc_state.blocked); 2618 } 2619 2620 static void guc_blocked_fence_reinit(struct intel_context *ce) 2621 { 2622 lockdep_assert_held(&ce->guc_state.lock); 2623 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2624 2625 /* 2626 * This fence is always complete unless a pending schedule disable is 2627 * outstanding. We arm the fence here and complete it when we receive 2628 * the pending schedule disable complete message. 2629 */ 2630 i915_sw_fence_fini(&ce->guc_state.blocked); 2631 i915_sw_fence_reinit(&ce->guc_state.blocked); 2632 i915_sw_fence_await(&ce->guc_state.blocked); 2633 i915_sw_fence_commit(&ce->guc_state.blocked); 2634 } 2635 2636 static u16 prep_context_pending_disable(struct intel_context *ce) 2637 { 2638 lockdep_assert_held(&ce->guc_state.lock); 2639 2640 set_context_pending_disable(ce); 2641 clr_context_enabled(ce); 2642 guc_blocked_fence_reinit(ce); 2643 intel_context_get(ce); 2644 2645 return ce->guc_id.id; 2646 } 2647 2648 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2649 { 2650 struct intel_guc *guc = ce_to_guc(ce); 2651 unsigned long flags; 2652 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2653 intel_wakeref_t wakeref; 2654 u16 guc_id; 2655 bool enabled; 2656 2657 GEM_BUG_ON(intel_context_is_child(ce)); 2658 2659 spin_lock_irqsave(&ce->guc_state.lock, flags); 2660 2661 incr_context_blocked(ce); 2662 2663 enabled = context_enabled(ce); 2664 if (unlikely(!enabled || submission_disabled(guc))) { 2665 if (enabled) 2666 clr_context_enabled(ce); 2667 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2668 return &ce->guc_state.blocked; 2669 } 2670 2671 /* 2672 * We add +2 here as the schedule disable complete CTB handler calls 2673 * intel_context_sched_disable_unpin (-2 to pin_count). 2674 */ 2675 atomic_add(2, &ce->pin_count); 2676 2677 guc_id = prep_context_pending_disable(ce); 2678 2679 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2680 2681 with_intel_runtime_pm(runtime_pm, wakeref) 2682 __guc_context_sched_disable(guc, ce, guc_id); 2683 2684 return &ce->guc_state.blocked; 2685 } 2686 2687 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2688 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2689 #define SCHED_STATE_NO_UNBLOCK \ 2690 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2691 SCHED_STATE_PENDING_DISABLE | \ 2692 SCHED_STATE_BANNED) 2693 2694 static bool context_cant_unblock(struct intel_context *ce) 2695 { 2696 lockdep_assert_held(&ce->guc_state.lock); 2697 2698 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2699 context_guc_id_invalid(ce) || 2700 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2701 !intel_context_is_pinned(ce); 2702 } 2703 2704 static void guc_context_unblock(struct intel_context *ce) 2705 { 2706 struct intel_guc *guc = ce_to_guc(ce); 2707 unsigned long flags; 2708 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2709 intel_wakeref_t wakeref; 2710 bool enable; 2711 2712 GEM_BUG_ON(context_enabled(ce)); 2713 GEM_BUG_ON(intel_context_is_child(ce)); 2714 2715 spin_lock_irqsave(&ce->guc_state.lock, flags); 2716 2717 if (unlikely(submission_disabled(guc) || 2718 context_cant_unblock(ce))) { 2719 enable = false; 2720 } else { 2721 enable = true; 2722 set_context_pending_enable(ce); 2723 set_context_enabled(ce); 2724 intel_context_get(ce); 2725 } 2726 2727 decr_context_blocked(ce); 2728 2729 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2730 2731 if (enable) { 2732 with_intel_runtime_pm(runtime_pm, wakeref) 2733 __guc_context_sched_enable(guc, ce); 2734 } 2735 } 2736 2737 static void guc_context_cancel_request(struct intel_context *ce, 2738 struct i915_request *rq) 2739 { 2740 struct intel_context *block_context = 2741 request_to_scheduling_context(rq); 2742 2743 if (i915_sw_fence_signaled(&rq->submit)) { 2744 struct i915_sw_fence *fence; 2745 2746 intel_context_get(ce); 2747 fence = guc_context_block(block_context); 2748 i915_sw_fence_wait(fence); 2749 if (!i915_request_completed(rq)) { 2750 __i915_request_skip(rq); 2751 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2752 true); 2753 } 2754 2755 guc_context_unblock(block_context); 2756 intel_context_put(ce); 2757 } 2758 } 2759 2760 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2761 u16 guc_id, 2762 u32 preemption_timeout) 2763 { 2764 struct context_policy policy; 2765 2766 __guc_context_policy_start_klv(&policy, guc_id); 2767 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2768 __guc_context_set_context_policies(guc, &policy, true); 2769 } 2770 2771 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) 2772 { 2773 struct intel_guc *guc = ce_to_guc(ce); 2774 struct intel_runtime_pm *runtime_pm = 2775 &ce->engine->gt->i915->runtime_pm; 2776 intel_wakeref_t wakeref; 2777 unsigned long flags; 2778 2779 GEM_BUG_ON(intel_context_is_child(ce)); 2780 2781 guc_flush_submissions(guc); 2782 2783 spin_lock_irqsave(&ce->guc_state.lock, flags); 2784 set_context_banned(ce); 2785 2786 if (submission_disabled(guc) || 2787 (!context_enabled(ce) && !context_pending_disable(ce))) { 2788 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2789 2790 guc_cancel_context_requests(ce); 2791 intel_engine_signal_breadcrumbs(ce->engine); 2792 } else if (!context_pending_disable(ce)) { 2793 u16 guc_id; 2794 2795 /* 2796 * We add +2 here as the schedule disable complete CTB handler 2797 * calls intel_context_sched_disable_unpin (-2 to pin_count). 2798 */ 2799 atomic_add(2, &ce->pin_count); 2800 2801 guc_id = prep_context_pending_disable(ce); 2802 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2803 2804 /* 2805 * In addition to disabling scheduling, set the preemption 2806 * timeout to the minimum value (1 us) so the banned context 2807 * gets kicked off the HW ASAP. 2808 */ 2809 with_intel_runtime_pm(runtime_pm, wakeref) { 2810 __guc_context_set_preemption_timeout(guc, guc_id, 1); 2811 __guc_context_sched_disable(guc, ce, guc_id); 2812 } 2813 } else { 2814 if (!context_guc_id_invalid(ce)) 2815 with_intel_runtime_pm(runtime_pm, wakeref) 2816 __guc_context_set_preemption_timeout(guc, 2817 ce->guc_id.id, 2818 1); 2819 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2820 } 2821 } 2822 2823 static void guc_context_sched_disable(struct intel_context *ce) 2824 { 2825 struct intel_guc *guc = ce_to_guc(ce); 2826 unsigned long flags; 2827 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 2828 intel_wakeref_t wakeref; 2829 u16 guc_id; 2830 2831 GEM_BUG_ON(intel_context_is_child(ce)); 2832 2833 spin_lock_irqsave(&ce->guc_state.lock, flags); 2834 2835 /* 2836 * We have to check if the context has been disabled by another thread, 2837 * check if submssion has been disabled to seal a race with reset and 2838 * finally check if any more requests have been committed to the 2839 * context ensursing that a request doesn't slip through the 2840 * 'context_pending_disable' fence. 2841 */ 2842 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 2843 context_has_committed_requests(ce))) { 2844 clr_context_enabled(ce); 2845 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2846 goto unpin; 2847 } 2848 guc_id = prep_context_pending_disable(ce); 2849 2850 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2851 2852 with_intel_runtime_pm(runtime_pm, wakeref) 2853 __guc_context_sched_disable(guc, ce, guc_id); 2854 2855 return; 2856 unpin: 2857 intel_context_sched_disable_unpin(ce); 2858 } 2859 2860 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 2861 { 2862 struct intel_guc *guc = ce_to_guc(ce); 2863 struct intel_gt *gt = guc_to_gt(guc); 2864 unsigned long flags; 2865 bool disabled; 2866 2867 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 2868 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 2869 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 2870 GEM_BUG_ON(context_enabled(ce)); 2871 2872 /* Seal race with Reset */ 2873 spin_lock_irqsave(&ce->guc_state.lock, flags); 2874 disabled = submission_disabled(guc); 2875 if (likely(!disabled)) { 2876 __intel_gt_pm_get(gt); 2877 set_context_destroyed(ce); 2878 clr_context_registered(ce); 2879 } 2880 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2881 if (unlikely(disabled)) { 2882 release_guc_id(guc, ce); 2883 __guc_context_destroy(ce); 2884 return; 2885 } 2886 2887 deregister_context(ce, ce->guc_id.id); 2888 } 2889 2890 static void __guc_context_destroy(struct intel_context *ce) 2891 { 2892 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 2893 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 2894 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 2895 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 2896 GEM_BUG_ON(ce->guc_state.number_committed_requests); 2897 2898 lrc_fini(ce); 2899 intel_context_fini(ce); 2900 2901 if (intel_engine_is_virtual(ce->engine)) { 2902 struct guc_virtual_engine *ve = 2903 container_of(ce, typeof(*ve), context); 2904 2905 if (ve->base.breadcrumbs) 2906 intel_breadcrumbs_put(ve->base.breadcrumbs); 2907 2908 kfree(ve); 2909 } else { 2910 intel_context_free(ce); 2911 } 2912 } 2913 2914 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 2915 { 2916 struct intel_context *ce; 2917 unsigned long flags; 2918 2919 GEM_BUG_ON(!submission_disabled(guc) && 2920 guc_submission_initialized(guc)); 2921 2922 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 2923 spin_lock_irqsave(&guc->submission_state.lock, flags); 2924 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 2925 struct intel_context, 2926 destroyed_link); 2927 if (ce) 2928 list_del_init(&ce->destroyed_link); 2929 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2930 2931 if (!ce) 2932 break; 2933 2934 release_guc_id(guc, ce); 2935 __guc_context_destroy(ce); 2936 } 2937 } 2938 2939 static void deregister_destroyed_contexts(struct intel_guc *guc) 2940 { 2941 struct intel_context *ce; 2942 unsigned long flags; 2943 2944 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 2945 spin_lock_irqsave(&guc->submission_state.lock, flags); 2946 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 2947 struct intel_context, 2948 destroyed_link); 2949 if (ce) 2950 list_del_init(&ce->destroyed_link); 2951 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2952 2953 if (!ce) 2954 break; 2955 2956 guc_lrc_desc_unpin(ce); 2957 } 2958 } 2959 2960 static void destroyed_worker_func(struct work_struct *w) 2961 { 2962 struct intel_guc *guc = container_of(w, struct intel_guc, 2963 submission_state.destroyed_worker); 2964 struct intel_gt *gt = guc_to_gt(guc); 2965 int tmp; 2966 2967 with_intel_gt_pm(gt, tmp) 2968 deregister_destroyed_contexts(guc); 2969 } 2970 2971 static void guc_context_destroy(struct kref *kref) 2972 { 2973 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2974 struct intel_guc *guc = ce_to_guc(ce); 2975 unsigned long flags; 2976 bool destroy; 2977 2978 /* 2979 * If the guc_id is invalid this context has been stolen and we can free 2980 * it immediately. Also can be freed immediately if the context is not 2981 * registered with the GuC or the GuC is in the middle of a reset. 2982 */ 2983 spin_lock_irqsave(&guc->submission_state.lock, flags); 2984 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 2985 !ctx_id_mapped(guc, ce->guc_id.id); 2986 if (likely(!destroy)) { 2987 if (!list_empty(&ce->guc_id.link)) 2988 list_del_init(&ce->guc_id.link); 2989 list_add_tail(&ce->destroyed_link, 2990 &guc->submission_state.destroyed_contexts); 2991 } else { 2992 __release_guc_id(guc, ce); 2993 } 2994 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2995 if (unlikely(destroy)) { 2996 __guc_context_destroy(ce); 2997 return; 2998 } 2999 3000 /* 3001 * We use a worker to issue the H2G to deregister the context as we can 3002 * take the GT PM for the first time which isn't allowed from an atomic 3003 * context. 3004 */ 3005 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3006 } 3007 3008 static int guc_context_alloc(struct intel_context *ce) 3009 { 3010 return lrc_alloc(ce, ce->engine); 3011 } 3012 3013 static void __guc_context_set_prio(struct intel_guc *guc, 3014 struct intel_context *ce) 3015 { 3016 struct context_policy policy; 3017 3018 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3019 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3020 __guc_context_set_context_policies(guc, &policy, true); 3021 } 3022 3023 static void guc_context_set_prio(struct intel_guc *guc, 3024 struct intel_context *ce, 3025 u8 prio) 3026 { 3027 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3028 prio > GUC_CLIENT_PRIORITY_NORMAL); 3029 lockdep_assert_held(&ce->guc_state.lock); 3030 3031 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3032 !context_registered(ce)) { 3033 ce->guc_state.prio = prio; 3034 return; 3035 } 3036 3037 ce->guc_state.prio = prio; 3038 __guc_context_set_prio(guc, ce); 3039 3040 trace_intel_context_set_prio(ce); 3041 } 3042 3043 static inline u8 map_i915_prio_to_guc_prio(int prio) 3044 { 3045 if (prio == I915_PRIORITY_NORMAL) 3046 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3047 else if (prio < I915_PRIORITY_NORMAL) 3048 return GUC_CLIENT_PRIORITY_NORMAL; 3049 else if (prio < I915_PRIORITY_DISPLAY) 3050 return GUC_CLIENT_PRIORITY_HIGH; 3051 else 3052 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3053 } 3054 3055 static inline void add_context_inflight_prio(struct intel_context *ce, 3056 u8 guc_prio) 3057 { 3058 lockdep_assert_held(&ce->guc_state.lock); 3059 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3060 3061 ++ce->guc_state.prio_count[guc_prio]; 3062 3063 /* Overflow protection */ 3064 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3065 } 3066 3067 static inline void sub_context_inflight_prio(struct intel_context *ce, 3068 u8 guc_prio) 3069 { 3070 lockdep_assert_held(&ce->guc_state.lock); 3071 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3072 3073 /* Underflow protection */ 3074 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3075 3076 --ce->guc_state.prio_count[guc_prio]; 3077 } 3078 3079 static inline void update_context_prio(struct intel_context *ce) 3080 { 3081 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3082 int i; 3083 3084 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3085 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3086 3087 lockdep_assert_held(&ce->guc_state.lock); 3088 3089 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3090 if (ce->guc_state.prio_count[i]) { 3091 guc_context_set_prio(guc, ce, i); 3092 break; 3093 } 3094 } 3095 } 3096 3097 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3098 { 3099 /* Lower value is higher priority */ 3100 return new_guc_prio < old_guc_prio; 3101 } 3102 3103 static void add_to_context(struct i915_request *rq) 3104 { 3105 struct intel_context *ce = request_to_scheduling_context(rq); 3106 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3107 3108 GEM_BUG_ON(intel_context_is_child(ce)); 3109 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3110 3111 spin_lock(&ce->guc_state.lock); 3112 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3113 3114 if (rq->guc_prio == GUC_PRIO_INIT) { 3115 rq->guc_prio = new_guc_prio; 3116 add_context_inflight_prio(ce, rq->guc_prio); 3117 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3118 sub_context_inflight_prio(ce, rq->guc_prio); 3119 rq->guc_prio = new_guc_prio; 3120 add_context_inflight_prio(ce, rq->guc_prio); 3121 } 3122 update_context_prio(ce); 3123 3124 spin_unlock(&ce->guc_state.lock); 3125 } 3126 3127 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3128 { 3129 lockdep_assert_held(&ce->guc_state.lock); 3130 3131 if (rq->guc_prio != GUC_PRIO_INIT && 3132 rq->guc_prio != GUC_PRIO_FINI) { 3133 sub_context_inflight_prio(ce, rq->guc_prio); 3134 update_context_prio(ce); 3135 } 3136 rq->guc_prio = GUC_PRIO_FINI; 3137 } 3138 3139 static void remove_from_context(struct i915_request *rq) 3140 { 3141 struct intel_context *ce = request_to_scheduling_context(rq); 3142 3143 GEM_BUG_ON(intel_context_is_child(ce)); 3144 3145 spin_lock_irq(&ce->guc_state.lock); 3146 3147 list_del_init(&rq->sched.link); 3148 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3149 3150 /* Prevent further __await_execution() registering a cb, then flush */ 3151 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3152 3153 guc_prio_fini(rq, ce); 3154 3155 decr_context_committed_requests(ce); 3156 3157 spin_unlock_irq(&ce->guc_state.lock); 3158 3159 atomic_dec(&ce->guc_id.ref); 3160 i915_request_notify_execute_cb_imm(rq); 3161 } 3162 3163 static const struct intel_context_ops guc_context_ops = { 3164 .alloc = guc_context_alloc, 3165 3166 .pre_pin = guc_context_pre_pin, 3167 .pin = guc_context_pin, 3168 .unpin = guc_context_unpin, 3169 .post_unpin = guc_context_post_unpin, 3170 3171 .ban = guc_context_ban, 3172 3173 .cancel_request = guc_context_cancel_request, 3174 3175 .enter = intel_context_enter_engine, 3176 .exit = intel_context_exit_engine, 3177 3178 .sched_disable = guc_context_sched_disable, 3179 3180 .reset = lrc_reset, 3181 .destroy = guc_context_destroy, 3182 3183 .create_virtual = guc_create_virtual, 3184 .create_parallel = guc_create_parallel, 3185 }; 3186 3187 static void submit_work_cb(struct irq_work *wrk) 3188 { 3189 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3190 3191 might_lock(&rq->engine->sched_engine->lock); 3192 i915_sw_fence_complete(&rq->submit); 3193 } 3194 3195 static void __guc_signal_context_fence(struct intel_context *ce) 3196 { 3197 struct i915_request *rq, *rn; 3198 3199 lockdep_assert_held(&ce->guc_state.lock); 3200 3201 if (!list_empty(&ce->guc_state.fences)) 3202 trace_intel_context_fence_release(ce); 3203 3204 /* 3205 * Use an IRQ to ensure locking order of sched_engine->lock -> 3206 * ce->guc_state.lock is preserved. 3207 */ 3208 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3209 guc_fence_link) { 3210 list_del(&rq->guc_fence_link); 3211 irq_work_queue(&rq->submit_work); 3212 } 3213 3214 INIT_LIST_HEAD(&ce->guc_state.fences); 3215 } 3216 3217 static void guc_signal_context_fence(struct intel_context *ce) 3218 { 3219 unsigned long flags; 3220 3221 GEM_BUG_ON(intel_context_is_child(ce)); 3222 3223 spin_lock_irqsave(&ce->guc_state.lock, flags); 3224 clr_context_wait_for_deregister_to_register(ce); 3225 __guc_signal_context_fence(ce); 3226 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3227 } 3228 3229 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3230 { 3231 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3232 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3233 !submission_disabled(ce_to_guc(ce)); 3234 } 3235 3236 static void guc_context_init(struct intel_context *ce) 3237 { 3238 const struct i915_gem_context *ctx; 3239 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3240 3241 rcu_read_lock(); 3242 ctx = rcu_dereference(ce->gem_context); 3243 if (ctx) 3244 prio = ctx->sched.priority; 3245 rcu_read_unlock(); 3246 3247 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3248 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3249 } 3250 3251 static int guc_request_alloc(struct i915_request *rq) 3252 { 3253 struct intel_context *ce = request_to_scheduling_context(rq); 3254 struct intel_guc *guc = ce_to_guc(ce); 3255 unsigned long flags; 3256 int ret; 3257 3258 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3259 3260 /* 3261 * Flush enough space to reduce the likelihood of waiting after 3262 * we start building the request - in which case we will just 3263 * have to repeat work. 3264 */ 3265 rq->reserved_space += GUC_REQUEST_SIZE; 3266 3267 /* 3268 * Note that after this point, we have committed to using 3269 * this request as it is being used to both track the 3270 * state of engine initialisation and liveness of the 3271 * golden renderstate above. Think twice before you try 3272 * to cancel/unwind this request now. 3273 */ 3274 3275 /* Unconditionally invalidate GPU caches and TLBs. */ 3276 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3277 if (ret) 3278 return ret; 3279 3280 rq->reserved_space -= GUC_REQUEST_SIZE; 3281 3282 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3283 guc_context_init(ce); 3284 3285 /* 3286 * Call pin_guc_id here rather than in the pinning step as with 3287 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3288 * guc_id and creating horrible race conditions. This is especially bad 3289 * when guc_id are being stolen due to over subscription. By the time 3290 * this function is reached, it is guaranteed that the guc_id will be 3291 * persistent until the generated request is retired. Thus, sealing these 3292 * race conditions. It is still safe to fail here if guc_id are 3293 * exhausted and return -EAGAIN to the user indicating that they can try 3294 * again in the future. 3295 * 3296 * There is no need for a lock here as the timeline mutex ensures at 3297 * most one context can be executing this code path at once. The 3298 * guc_id_ref is incremented once for every request in flight and 3299 * decremented on each retire. When it is zero, a lock around the 3300 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3301 */ 3302 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3303 goto out; 3304 3305 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3306 if (unlikely(ret < 0)) 3307 return ret; 3308 if (context_needs_register(ce, !!ret)) { 3309 ret = try_context_registration(ce, true); 3310 if (unlikely(ret)) { /* unwind */ 3311 if (ret == -EPIPE) { 3312 disable_submission(guc); 3313 goto out; /* GPU will be reset */ 3314 } 3315 atomic_dec(&ce->guc_id.ref); 3316 unpin_guc_id(guc, ce); 3317 return ret; 3318 } 3319 } 3320 3321 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3322 3323 out: 3324 /* 3325 * We block all requests on this context if a G2H is pending for a 3326 * schedule disable or context deregistration as the GuC will fail a 3327 * schedule enable or context registration if either G2H is pending 3328 * respectfully. Once a G2H returns, the fence is released that is 3329 * blocking these requests (see guc_signal_context_fence). 3330 */ 3331 spin_lock_irqsave(&ce->guc_state.lock, flags); 3332 if (context_wait_for_deregister_to_register(ce) || 3333 context_pending_disable(ce)) { 3334 init_irq_work(&rq->submit_work, submit_work_cb); 3335 i915_sw_fence_await(&rq->submit); 3336 3337 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3338 } 3339 incr_context_committed_requests(ce); 3340 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3341 3342 return 0; 3343 } 3344 3345 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3346 struct i915_gem_ww_ctx *ww, 3347 void **vaddr) 3348 { 3349 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3350 3351 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3352 } 3353 3354 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3355 { 3356 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3357 int ret = __guc_context_pin(ce, engine, vaddr); 3358 intel_engine_mask_t tmp, mask = ce->engine->mask; 3359 3360 if (likely(!ret)) 3361 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3362 intel_engine_pm_get(engine); 3363 3364 return ret; 3365 } 3366 3367 static void guc_virtual_context_unpin(struct intel_context *ce) 3368 { 3369 intel_engine_mask_t tmp, mask = ce->engine->mask; 3370 struct intel_engine_cs *engine; 3371 struct intel_guc *guc = ce_to_guc(ce); 3372 3373 GEM_BUG_ON(context_enabled(ce)); 3374 GEM_BUG_ON(intel_context_is_barrier(ce)); 3375 3376 unpin_guc_id(guc, ce); 3377 lrc_unpin(ce); 3378 3379 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3380 intel_engine_pm_put_async(engine); 3381 } 3382 3383 static void guc_virtual_context_enter(struct intel_context *ce) 3384 { 3385 intel_engine_mask_t tmp, mask = ce->engine->mask; 3386 struct intel_engine_cs *engine; 3387 3388 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3389 intel_engine_pm_get(engine); 3390 3391 intel_timeline_enter(ce->timeline); 3392 } 3393 3394 static void guc_virtual_context_exit(struct intel_context *ce) 3395 { 3396 intel_engine_mask_t tmp, mask = ce->engine->mask; 3397 struct intel_engine_cs *engine; 3398 3399 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3400 intel_engine_pm_put(engine); 3401 3402 intel_timeline_exit(ce->timeline); 3403 } 3404 3405 static int guc_virtual_context_alloc(struct intel_context *ce) 3406 { 3407 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3408 3409 return lrc_alloc(ce, engine); 3410 } 3411 3412 static const struct intel_context_ops virtual_guc_context_ops = { 3413 .alloc = guc_virtual_context_alloc, 3414 3415 .pre_pin = guc_virtual_context_pre_pin, 3416 .pin = guc_virtual_context_pin, 3417 .unpin = guc_virtual_context_unpin, 3418 .post_unpin = guc_context_post_unpin, 3419 3420 .ban = guc_context_ban, 3421 3422 .cancel_request = guc_context_cancel_request, 3423 3424 .enter = guc_virtual_context_enter, 3425 .exit = guc_virtual_context_exit, 3426 3427 .sched_disable = guc_context_sched_disable, 3428 3429 .destroy = guc_context_destroy, 3430 3431 .get_sibling = guc_virtual_get_sibling, 3432 }; 3433 3434 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3435 { 3436 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3437 struct intel_guc *guc = ce_to_guc(ce); 3438 int ret; 3439 3440 GEM_BUG_ON(!intel_context_is_parent(ce)); 3441 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3442 3443 ret = pin_guc_id(guc, ce); 3444 if (unlikely(ret < 0)) 3445 return ret; 3446 3447 return __guc_context_pin(ce, engine, vaddr); 3448 } 3449 3450 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3451 { 3452 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3453 3454 GEM_BUG_ON(!intel_context_is_child(ce)); 3455 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3456 3457 __intel_context_pin(ce->parallel.parent); 3458 return __guc_context_pin(ce, engine, vaddr); 3459 } 3460 3461 static void guc_parent_context_unpin(struct intel_context *ce) 3462 { 3463 struct intel_guc *guc = ce_to_guc(ce); 3464 3465 GEM_BUG_ON(context_enabled(ce)); 3466 GEM_BUG_ON(intel_context_is_barrier(ce)); 3467 GEM_BUG_ON(!intel_context_is_parent(ce)); 3468 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3469 3470 unpin_guc_id(guc, ce); 3471 lrc_unpin(ce); 3472 } 3473 3474 static void guc_child_context_unpin(struct intel_context *ce) 3475 { 3476 GEM_BUG_ON(context_enabled(ce)); 3477 GEM_BUG_ON(intel_context_is_barrier(ce)); 3478 GEM_BUG_ON(!intel_context_is_child(ce)); 3479 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3480 3481 lrc_unpin(ce); 3482 } 3483 3484 static void guc_child_context_post_unpin(struct intel_context *ce) 3485 { 3486 GEM_BUG_ON(!intel_context_is_child(ce)); 3487 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3488 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3489 3490 lrc_post_unpin(ce); 3491 intel_context_unpin(ce->parallel.parent); 3492 } 3493 3494 static void guc_child_context_destroy(struct kref *kref) 3495 { 3496 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3497 3498 __guc_context_destroy(ce); 3499 } 3500 3501 static const struct intel_context_ops virtual_parent_context_ops = { 3502 .alloc = guc_virtual_context_alloc, 3503 3504 .pre_pin = guc_context_pre_pin, 3505 .pin = guc_parent_context_pin, 3506 .unpin = guc_parent_context_unpin, 3507 .post_unpin = guc_context_post_unpin, 3508 3509 .ban = guc_context_ban, 3510 3511 .cancel_request = guc_context_cancel_request, 3512 3513 .enter = guc_virtual_context_enter, 3514 .exit = guc_virtual_context_exit, 3515 3516 .sched_disable = guc_context_sched_disable, 3517 3518 .destroy = guc_context_destroy, 3519 3520 .get_sibling = guc_virtual_get_sibling, 3521 }; 3522 3523 static const struct intel_context_ops virtual_child_context_ops = { 3524 .alloc = guc_virtual_context_alloc, 3525 3526 .pre_pin = guc_context_pre_pin, 3527 .pin = guc_child_context_pin, 3528 .unpin = guc_child_context_unpin, 3529 .post_unpin = guc_child_context_post_unpin, 3530 3531 .cancel_request = guc_context_cancel_request, 3532 3533 .enter = guc_virtual_context_enter, 3534 .exit = guc_virtual_context_exit, 3535 3536 .destroy = guc_child_context_destroy, 3537 3538 .get_sibling = guc_virtual_get_sibling, 3539 }; 3540 3541 /* 3542 * The below override of the breadcrumbs is enabled when the user configures a 3543 * context for parallel submission (multi-lrc, parent-child). 3544 * 3545 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3546 * safely preempt all the hw contexts configured for parallel submission 3547 * between each BB. The contract between the i915 and GuC is if the parent 3548 * context can be preempted, all the children can be preempted, and the GuC will 3549 * always try to preempt the parent before the children. A handshake between the 3550 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3551 * creating a window to preempt between each set of BBs. 3552 */ 3553 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3554 u64 offset, u32 len, 3555 const unsigned int flags); 3556 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3557 u64 offset, u32 len, 3558 const unsigned int flags); 3559 static u32 * 3560 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3561 u32 *cs); 3562 static u32 * 3563 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3564 u32 *cs); 3565 3566 static struct intel_context * 3567 guc_create_parallel(struct intel_engine_cs **engines, 3568 unsigned int num_siblings, 3569 unsigned int width) 3570 { 3571 struct intel_engine_cs **siblings = NULL; 3572 struct intel_context *parent = NULL, *ce, *err; 3573 int i, j; 3574 3575 siblings = kmalloc_array(num_siblings, 3576 sizeof(*siblings), 3577 GFP_KERNEL); 3578 if (!siblings) 3579 return ERR_PTR(-ENOMEM); 3580 3581 for (i = 0; i < width; ++i) { 3582 for (j = 0; j < num_siblings; ++j) 3583 siblings[j] = engines[i * num_siblings + j]; 3584 3585 ce = intel_engine_create_virtual(siblings, num_siblings, 3586 FORCE_VIRTUAL); 3587 if (IS_ERR(ce)) { 3588 err = ERR_CAST(ce); 3589 goto unwind; 3590 } 3591 3592 if (i == 0) { 3593 parent = ce; 3594 parent->ops = &virtual_parent_context_ops; 3595 } else { 3596 ce->ops = &virtual_child_context_ops; 3597 intel_context_bind_parent_child(parent, ce); 3598 } 3599 } 3600 3601 parent->parallel.fence_context = dma_fence_context_alloc(1); 3602 3603 parent->engine->emit_bb_start = 3604 emit_bb_start_parent_no_preempt_mid_batch; 3605 parent->engine->emit_fini_breadcrumb = 3606 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3607 parent->engine->emit_fini_breadcrumb_dw = 3608 12 + 4 * parent->parallel.number_children; 3609 for_each_child(parent, ce) { 3610 ce->engine->emit_bb_start = 3611 emit_bb_start_child_no_preempt_mid_batch; 3612 ce->engine->emit_fini_breadcrumb = 3613 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3614 ce->engine->emit_fini_breadcrumb_dw = 16; 3615 } 3616 3617 kfree(siblings); 3618 return parent; 3619 3620 unwind: 3621 if (parent) 3622 intel_context_put(parent); 3623 kfree(siblings); 3624 return err; 3625 } 3626 3627 static bool 3628 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3629 { 3630 struct intel_engine_cs *sibling; 3631 intel_engine_mask_t tmp, mask = b->engine_mask; 3632 bool result = false; 3633 3634 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3635 result |= intel_engine_irq_enable(sibling); 3636 3637 return result; 3638 } 3639 3640 static void 3641 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3642 { 3643 struct intel_engine_cs *sibling; 3644 intel_engine_mask_t tmp, mask = b->engine_mask; 3645 3646 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3647 intel_engine_irq_disable(sibling); 3648 } 3649 3650 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3651 { 3652 int i; 3653 3654 /* 3655 * In GuC submission mode we do not know which physical engine a request 3656 * will be scheduled on, this creates a problem because the breadcrumb 3657 * interrupt is per physical engine. To work around this we attach 3658 * requests and direct all breadcrumb interrupts to the first instance 3659 * of an engine per class. In addition all breadcrumb interrupts are 3660 * enabled / disabled across an engine class in unison. 3661 */ 3662 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3663 struct intel_engine_cs *sibling = 3664 engine->gt->engine_class[engine->class][i]; 3665 3666 if (sibling) { 3667 if (engine->breadcrumbs != sibling->breadcrumbs) { 3668 intel_breadcrumbs_put(engine->breadcrumbs); 3669 engine->breadcrumbs = 3670 intel_breadcrumbs_get(sibling->breadcrumbs); 3671 } 3672 break; 3673 } 3674 } 3675 3676 if (engine->breadcrumbs) { 3677 engine->breadcrumbs->engine_mask |= engine->mask; 3678 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3679 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3680 } 3681 } 3682 3683 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3684 int prio) 3685 { 3686 struct intel_context *ce = request_to_scheduling_context(rq); 3687 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3688 3689 /* Short circuit function */ 3690 if (prio < I915_PRIORITY_NORMAL || 3691 rq->guc_prio == GUC_PRIO_FINI || 3692 (rq->guc_prio != GUC_PRIO_INIT && 3693 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3694 return; 3695 3696 spin_lock(&ce->guc_state.lock); 3697 if (rq->guc_prio != GUC_PRIO_FINI) { 3698 if (rq->guc_prio != GUC_PRIO_INIT) 3699 sub_context_inflight_prio(ce, rq->guc_prio); 3700 rq->guc_prio = new_guc_prio; 3701 add_context_inflight_prio(ce, rq->guc_prio); 3702 update_context_prio(ce); 3703 } 3704 spin_unlock(&ce->guc_state.lock); 3705 } 3706 3707 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3708 { 3709 struct intel_context *ce = request_to_scheduling_context(rq); 3710 3711 spin_lock(&ce->guc_state.lock); 3712 guc_prio_fini(rq, ce); 3713 spin_unlock(&ce->guc_state.lock); 3714 } 3715 3716 static void sanitize_hwsp(struct intel_engine_cs *engine) 3717 { 3718 struct intel_timeline *tl; 3719 3720 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3721 intel_timeline_reset_seqno(tl); 3722 } 3723 3724 static void guc_sanitize(struct intel_engine_cs *engine) 3725 { 3726 /* 3727 * Poison residual state on resume, in case the suspend didn't! 3728 * 3729 * We have to assume that across suspend/resume (or other loss 3730 * of control) that the contents of our pinned buffers has been 3731 * lost, replaced by garbage. Since this doesn't always happen, 3732 * let's poison such state so that we more quickly spot when 3733 * we falsely assume it has been preserved. 3734 */ 3735 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3736 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3737 3738 /* 3739 * The kernel_context HWSP is stored in the status_page. As above, 3740 * that may be lost on resume/initialisation, and so we need to 3741 * reset the value in the HWSP. 3742 */ 3743 sanitize_hwsp(engine); 3744 3745 /* And scrub the dirty cachelines for the HWSP */ 3746 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 3747 3748 intel_engine_reset_pinned_contexts(engine); 3749 } 3750 3751 static void setup_hwsp(struct intel_engine_cs *engine) 3752 { 3753 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 3754 3755 ENGINE_WRITE_FW(engine, 3756 RING_HWS_PGA, 3757 i915_ggtt_offset(engine->status_page.vma)); 3758 } 3759 3760 static void start_engine(struct intel_engine_cs *engine) 3761 { 3762 ENGINE_WRITE_FW(engine, 3763 RING_MODE_GEN7, 3764 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 3765 3766 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 3767 ENGINE_POSTING_READ(engine, RING_MI_MODE); 3768 } 3769 3770 static int guc_resume(struct intel_engine_cs *engine) 3771 { 3772 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 3773 3774 intel_mocs_init_engine(engine); 3775 3776 intel_breadcrumbs_reset(engine->breadcrumbs); 3777 3778 setup_hwsp(engine); 3779 start_engine(engine); 3780 3781 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 3782 xehp_enable_ccs_engines(engine); 3783 3784 return 0; 3785 } 3786 3787 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 3788 { 3789 return !sched_engine->tasklet.callback; 3790 } 3791 3792 static void guc_set_default_submission(struct intel_engine_cs *engine) 3793 { 3794 engine->submit_request = guc_submit_request; 3795 } 3796 3797 static inline void guc_kernel_context_pin(struct intel_guc *guc, 3798 struct intel_context *ce) 3799 { 3800 /* 3801 * Note: we purposefully do not check the returns below because 3802 * the registration can only fail if a reset is just starting. 3803 * This is called at the end of reset so presumably another reset 3804 * isn't happening and even it did this code would be run again. 3805 */ 3806 3807 if (context_guc_id_invalid(ce)) 3808 pin_guc_id(guc, ce); 3809 3810 try_context_registration(ce, true); 3811 } 3812 3813 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 3814 { 3815 struct intel_gt *gt = guc_to_gt(guc); 3816 struct intel_engine_cs *engine; 3817 enum intel_engine_id id; 3818 3819 /* make sure all descriptors are clean... */ 3820 xa_destroy(&guc->context_lookup); 3821 3822 /* 3823 * Some contexts might have been pinned before we enabled GuC 3824 * submission, so we need to add them to the GuC bookeeping. 3825 * Also, after a reset the of the GuC we want to make sure that the 3826 * information shared with GuC is properly reset. The kernel LRCs are 3827 * not attached to the gem_context, so they need to be added separately. 3828 */ 3829 for_each_engine(engine, gt, id) { 3830 struct intel_context *ce; 3831 3832 list_for_each_entry(ce, &engine->pinned_contexts_list, 3833 pinned_contexts_link) 3834 guc_kernel_context_pin(guc, ce); 3835 } 3836 } 3837 3838 static void guc_release(struct intel_engine_cs *engine) 3839 { 3840 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3841 3842 intel_engine_cleanup_common(engine); 3843 lrc_fini_wa_ctx(engine); 3844 } 3845 3846 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 3847 { 3848 struct intel_engine_cs *e; 3849 intel_engine_mask_t tmp, mask = engine->mask; 3850 3851 for_each_engine_masked(e, engine->gt, mask, tmp) 3852 e->serial++; 3853 } 3854 3855 static void guc_default_vfuncs(struct intel_engine_cs *engine) 3856 { 3857 /* Default vfuncs which can be overridden by each engine. */ 3858 3859 engine->resume = guc_resume; 3860 3861 engine->cops = &guc_context_ops; 3862 engine->request_alloc = guc_request_alloc; 3863 engine->add_active_request = add_to_context; 3864 engine->remove_active_request = remove_from_context; 3865 3866 engine->sched_engine->schedule = i915_schedule; 3867 3868 engine->reset.prepare = guc_engine_reset_prepare; 3869 engine->reset.rewind = guc_rewind_nop; 3870 engine->reset.cancel = guc_reset_nop; 3871 engine->reset.finish = guc_reset_nop; 3872 3873 engine->emit_flush = gen8_emit_flush_xcs; 3874 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3875 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3876 if (GRAPHICS_VER(engine->i915) >= 12) { 3877 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3878 engine->emit_flush = gen12_emit_flush_xcs; 3879 } 3880 engine->set_default_submission = guc_set_default_submission; 3881 engine->busyness = guc_engine_busyness; 3882 3883 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 3884 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3885 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3886 3887 /* Wa_14014475959:dg2 */ 3888 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) 3889 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 3890 3891 /* 3892 * TODO: GuC supports timeslicing and semaphores as well, but they're 3893 * handled by the firmware so some minor tweaks are required before 3894 * enabling. 3895 * 3896 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3897 */ 3898 3899 engine->emit_bb_start = gen8_emit_bb_start; 3900 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 3901 engine->emit_bb_start = gen125_emit_bb_start; 3902 } 3903 3904 static void rcs_submission_override(struct intel_engine_cs *engine) 3905 { 3906 switch (GRAPHICS_VER(engine->i915)) { 3907 case 12: 3908 engine->emit_flush = gen12_emit_flush_rcs; 3909 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3910 break; 3911 case 11: 3912 engine->emit_flush = gen11_emit_flush_rcs; 3913 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3914 break; 3915 default: 3916 engine->emit_flush = gen8_emit_flush_rcs; 3917 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3918 break; 3919 } 3920 } 3921 3922 static inline void guc_default_irqs(struct intel_engine_cs *engine) 3923 { 3924 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 3925 intel_engine_set_irq_handler(engine, cs_irq_handler); 3926 } 3927 3928 static void guc_sched_engine_destroy(struct kref *kref) 3929 { 3930 struct i915_sched_engine *sched_engine = 3931 container_of(kref, typeof(*sched_engine), ref); 3932 struct intel_guc *guc = sched_engine->private_data; 3933 3934 guc->sched_engine = NULL; 3935 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 3936 kfree(sched_engine); 3937 } 3938 3939 int intel_guc_submission_setup(struct intel_engine_cs *engine) 3940 { 3941 struct drm_i915_private *i915 = engine->i915; 3942 struct intel_guc *guc = &engine->gt->uc.guc; 3943 3944 /* 3945 * The setup relies on several assumptions (e.g. irqs always enabled) 3946 * that are only valid on gen11+ 3947 */ 3948 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 3949 3950 if (!guc->sched_engine) { 3951 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3952 if (!guc->sched_engine) 3953 return -ENOMEM; 3954 3955 guc->sched_engine->schedule = i915_schedule; 3956 guc->sched_engine->disabled = guc_sched_engine_disabled; 3957 guc->sched_engine->private_data = guc; 3958 guc->sched_engine->destroy = guc_sched_engine_destroy; 3959 guc->sched_engine->bump_inflight_request_prio = 3960 guc_bump_inflight_request_prio; 3961 guc->sched_engine->retire_inflight_request_prio = 3962 guc_retire_inflight_request_prio; 3963 tasklet_setup(&guc->sched_engine->tasklet, 3964 guc_submission_tasklet); 3965 } 3966 i915_sched_engine_put(engine->sched_engine); 3967 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 3968 3969 guc_default_vfuncs(engine); 3970 guc_default_irqs(engine); 3971 guc_init_breadcrumbs(engine); 3972 3973 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 3974 rcs_submission_override(engine); 3975 3976 lrc_init_wa_ctx(engine); 3977 3978 /* Finally, take ownership and responsibility for cleanup! */ 3979 engine->sanitize = guc_sanitize; 3980 engine->release = guc_release; 3981 3982 return 0; 3983 } 3984 3985 void intel_guc_submission_enable(struct intel_guc *guc) 3986 { 3987 guc_init_lrc_mapping(guc); 3988 guc_init_engine_stats(guc); 3989 } 3990 3991 void intel_guc_submission_disable(struct intel_guc *guc) 3992 { 3993 /* Note: By the time we're here, GuC may have already been reset */ 3994 } 3995 3996 static bool __guc_submission_supported(struct intel_guc *guc) 3997 { 3998 /* GuC submission is unavailable for pre-Gen11 */ 3999 return intel_guc_is_supported(guc) && 4000 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4001 } 4002 4003 static bool __guc_submission_selected(struct intel_guc *guc) 4004 { 4005 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4006 4007 if (!intel_guc_submission_is_supported(guc)) 4008 return false; 4009 4010 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4011 } 4012 4013 void intel_guc_submission_init_early(struct intel_guc *guc) 4014 { 4015 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4016 4017 spin_lock_init(&guc->submission_state.lock); 4018 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4019 ida_init(&guc->submission_state.guc_ids); 4020 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4021 INIT_WORK(&guc->submission_state.destroyed_worker, 4022 destroyed_worker_func); 4023 INIT_WORK(&guc->submission_state.reset_fail_worker, 4024 reset_fail_worker_func); 4025 4026 spin_lock_init(&guc->timestamp.lock); 4027 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4028 4029 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4030 guc->submission_supported = __guc_submission_supported(guc); 4031 guc->submission_selected = __guc_submission_selected(guc); 4032 } 4033 4034 static inline struct intel_context * 4035 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4036 { 4037 struct intel_context *ce; 4038 4039 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4040 drm_err(&guc_to_gt(guc)->i915->drm, 4041 "Invalid ctx_id %u\n", ctx_id); 4042 return NULL; 4043 } 4044 4045 ce = __get_context(guc, ctx_id); 4046 if (unlikely(!ce)) { 4047 drm_err(&guc_to_gt(guc)->i915->drm, 4048 "Context is NULL, ctx_id %u\n", ctx_id); 4049 return NULL; 4050 } 4051 4052 if (unlikely(intel_context_is_child(ce))) { 4053 drm_err(&guc_to_gt(guc)->i915->drm, 4054 "Context is child, ctx_id %u\n", ctx_id); 4055 return NULL; 4056 } 4057 4058 return ce; 4059 } 4060 4061 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4062 const u32 *msg, 4063 u32 len) 4064 { 4065 struct intel_context *ce; 4066 u32 ctx_id; 4067 4068 if (unlikely(len < 1)) { 4069 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4070 return -EPROTO; 4071 } 4072 ctx_id = msg[0]; 4073 4074 ce = g2h_context_lookup(guc, ctx_id); 4075 if (unlikely(!ce)) 4076 return -EPROTO; 4077 4078 trace_intel_context_deregister_done(ce); 4079 4080 #ifdef CONFIG_DRM_I915_SELFTEST 4081 if (unlikely(ce->drop_deregister)) { 4082 ce->drop_deregister = false; 4083 return 0; 4084 } 4085 #endif 4086 4087 if (context_wait_for_deregister_to_register(ce)) { 4088 struct intel_runtime_pm *runtime_pm = 4089 &ce->engine->gt->i915->runtime_pm; 4090 intel_wakeref_t wakeref; 4091 4092 /* 4093 * Previous owner of this guc_id has been deregistered, now safe 4094 * register this context. 4095 */ 4096 with_intel_runtime_pm(runtime_pm, wakeref) 4097 register_context(ce, true); 4098 guc_signal_context_fence(ce); 4099 intel_context_put(ce); 4100 } else if (context_destroyed(ce)) { 4101 /* Context has been destroyed */ 4102 intel_gt_pm_put_async(guc_to_gt(guc)); 4103 release_guc_id(guc, ce); 4104 __guc_context_destroy(ce); 4105 } 4106 4107 decr_outstanding_submission_g2h(guc); 4108 4109 return 0; 4110 } 4111 4112 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4113 const u32 *msg, 4114 u32 len) 4115 { 4116 struct intel_context *ce; 4117 unsigned long flags; 4118 u32 ctx_id; 4119 4120 if (unlikely(len < 2)) { 4121 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); 4122 return -EPROTO; 4123 } 4124 ctx_id = msg[0]; 4125 4126 ce = g2h_context_lookup(guc, ctx_id); 4127 if (unlikely(!ce)) 4128 return -EPROTO; 4129 4130 if (unlikely(context_destroyed(ce) || 4131 (!context_pending_enable(ce) && 4132 !context_pending_disable(ce)))) { 4133 drm_err(&guc_to_gt(guc)->i915->drm, 4134 "Bad context sched_state 0x%x, ctx_id %u\n", 4135 ce->guc_state.sched_state, ctx_id); 4136 return -EPROTO; 4137 } 4138 4139 trace_intel_context_sched_done(ce); 4140 4141 if (context_pending_enable(ce)) { 4142 #ifdef CONFIG_DRM_I915_SELFTEST 4143 if (unlikely(ce->drop_schedule_enable)) { 4144 ce->drop_schedule_enable = false; 4145 return 0; 4146 } 4147 #endif 4148 4149 spin_lock_irqsave(&ce->guc_state.lock, flags); 4150 clr_context_pending_enable(ce); 4151 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4152 } else if (context_pending_disable(ce)) { 4153 bool banned; 4154 4155 #ifdef CONFIG_DRM_I915_SELFTEST 4156 if (unlikely(ce->drop_schedule_disable)) { 4157 ce->drop_schedule_disable = false; 4158 return 0; 4159 } 4160 #endif 4161 4162 /* 4163 * Unpin must be done before __guc_signal_context_fence, 4164 * otherwise a race exists between the requests getting 4165 * submitted + retired before this unpin completes resulting in 4166 * the pin_count going to zero and the context still being 4167 * enabled. 4168 */ 4169 intel_context_sched_disable_unpin(ce); 4170 4171 spin_lock_irqsave(&ce->guc_state.lock, flags); 4172 banned = context_banned(ce); 4173 clr_context_banned(ce); 4174 clr_context_pending_disable(ce); 4175 __guc_signal_context_fence(ce); 4176 guc_blocked_fence_complete(ce); 4177 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4178 4179 if (banned) { 4180 guc_cancel_context_requests(ce); 4181 intel_engine_signal_breadcrumbs(ce->engine); 4182 } 4183 } 4184 4185 decr_outstanding_submission_g2h(guc); 4186 intel_context_put(ce); 4187 4188 return 0; 4189 } 4190 4191 static void capture_error_state(struct intel_guc *guc, 4192 struct intel_context *ce) 4193 { 4194 struct intel_gt *gt = guc_to_gt(guc); 4195 struct drm_i915_private *i915 = gt->i915; 4196 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4197 intel_wakeref_t wakeref; 4198 4199 intel_engine_set_hung_context(engine, ce); 4200 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4201 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4202 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4203 } 4204 4205 static void guc_context_replay(struct intel_context *ce) 4206 { 4207 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4208 4209 __guc_reset_context(ce, ce->engine->mask); 4210 tasklet_hi_schedule(&sched_engine->tasklet); 4211 } 4212 4213 static void guc_handle_context_reset(struct intel_guc *guc, 4214 struct intel_context *ce) 4215 { 4216 trace_intel_context_reset(ce); 4217 4218 if (likely(!intel_context_is_banned(ce))) { 4219 capture_error_state(guc, ce); 4220 guc_context_replay(ce); 4221 } else { 4222 drm_info(&guc_to_gt(guc)->i915->drm, 4223 "Ignoring context reset notification of banned context 0x%04X on %s", 4224 ce->guc_id.id, ce->engine->name); 4225 } 4226 } 4227 4228 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4229 const u32 *msg, u32 len) 4230 { 4231 struct intel_context *ce; 4232 unsigned long flags; 4233 int ctx_id; 4234 4235 if (unlikely(len != 1)) { 4236 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4237 return -EPROTO; 4238 } 4239 4240 ctx_id = msg[0]; 4241 4242 /* 4243 * The context lookup uses the xarray but lookups only require an RCU lock 4244 * not the full spinlock. So take the lock explicitly and keep it until the 4245 * context has been reference count locked to ensure it can't be destroyed 4246 * asynchronously until the reset is done. 4247 */ 4248 xa_lock_irqsave(&guc->context_lookup, flags); 4249 ce = g2h_context_lookup(guc, ctx_id); 4250 if (ce) 4251 intel_context_get(ce); 4252 xa_unlock_irqrestore(&guc->context_lookup, flags); 4253 4254 if (unlikely(!ce)) 4255 return -EPROTO; 4256 4257 guc_handle_context_reset(guc, ce); 4258 intel_context_put(ce); 4259 4260 return 0; 4261 } 4262 4263 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4264 const u32 *msg, u32 len) 4265 { 4266 u32 status; 4267 4268 if (unlikely(len != 1)) { 4269 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 4270 return -EPROTO; 4271 } 4272 4273 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4274 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4275 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); 4276 4277 intel_guc_capture_process(guc); 4278 4279 return 0; 4280 } 4281 4282 struct intel_engine_cs * 4283 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4284 { 4285 struct intel_gt *gt = guc_to_gt(guc); 4286 u8 engine_class = guc_class_to_engine_class(guc_class); 4287 4288 /* Class index is checked in class converter */ 4289 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4290 4291 return gt->engine_class[engine_class][instance]; 4292 } 4293 4294 static void reset_fail_worker_func(struct work_struct *w) 4295 { 4296 struct intel_guc *guc = container_of(w, struct intel_guc, 4297 submission_state.reset_fail_worker); 4298 struct intel_gt *gt = guc_to_gt(guc); 4299 intel_engine_mask_t reset_fail_mask; 4300 unsigned long flags; 4301 4302 spin_lock_irqsave(&guc->submission_state.lock, flags); 4303 reset_fail_mask = guc->submission_state.reset_fail_mask; 4304 guc->submission_state.reset_fail_mask = 0; 4305 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4306 4307 if (likely(reset_fail_mask)) 4308 intel_gt_handle_error(gt, reset_fail_mask, 4309 I915_ERROR_CAPTURE, 4310 "GuC failed to reset engine mask=0x%x\n", 4311 reset_fail_mask); 4312 } 4313 4314 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4315 const u32 *msg, u32 len) 4316 { 4317 struct intel_engine_cs *engine; 4318 struct intel_gt *gt = guc_to_gt(guc); 4319 u8 guc_class, instance; 4320 u32 reason; 4321 unsigned long flags; 4322 4323 if (unlikely(len != 3)) { 4324 drm_err(>->i915->drm, "Invalid length %u", len); 4325 return -EPROTO; 4326 } 4327 4328 guc_class = msg[0]; 4329 instance = msg[1]; 4330 reason = msg[2]; 4331 4332 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4333 if (unlikely(!engine)) { 4334 drm_err(>->i915->drm, 4335 "Invalid engine %d:%d", guc_class, instance); 4336 return -EPROTO; 4337 } 4338 4339 /* 4340 * This is an unexpected failure of a hardware feature. So, log a real 4341 * error message not just the informational that comes with the reset. 4342 */ 4343 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X", 4344 guc_class, instance, engine->name, reason); 4345 4346 spin_lock_irqsave(&guc->submission_state.lock, flags); 4347 guc->submission_state.reset_fail_mask |= engine->mask; 4348 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4349 4350 /* 4351 * A GT reset flushes this worker queue (G2H handler) so we must use 4352 * another worker to trigger a GT reset. 4353 */ 4354 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4355 4356 return 0; 4357 } 4358 4359 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4360 { 4361 struct intel_guc *guc = &engine->gt->uc.guc; 4362 struct intel_context *ce; 4363 struct i915_request *rq; 4364 unsigned long index; 4365 unsigned long flags; 4366 4367 /* Reset called during driver load? GuC not yet initialised! */ 4368 if (unlikely(!guc_submission_initialized(guc))) 4369 return; 4370 4371 xa_lock_irqsave(&guc->context_lookup, flags); 4372 xa_for_each(&guc->context_lookup, index, ce) { 4373 if (!kref_get_unless_zero(&ce->ref)) 4374 continue; 4375 4376 xa_unlock(&guc->context_lookup); 4377 4378 if (!intel_context_is_pinned(ce)) 4379 goto next; 4380 4381 if (intel_engine_is_virtual(ce->engine)) { 4382 if (!(ce->engine->mask & engine->mask)) 4383 goto next; 4384 } else { 4385 if (ce->engine != engine) 4386 goto next; 4387 } 4388 4389 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4390 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4391 continue; 4392 4393 intel_engine_set_hung_context(engine, ce); 4394 4395 /* Can only cope with one hang at a time... */ 4396 intel_context_put(ce); 4397 xa_lock(&guc->context_lookup); 4398 goto done; 4399 } 4400 next: 4401 intel_context_put(ce); 4402 xa_lock(&guc->context_lookup); 4403 } 4404 done: 4405 xa_unlock_irqrestore(&guc->context_lookup, flags); 4406 } 4407 4408 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4409 struct i915_request *hung_rq, 4410 struct drm_printer *m) 4411 { 4412 struct intel_guc *guc = &engine->gt->uc.guc; 4413 struct intel_context *ce; 4414 unsigned long index; 4415 unsigned long flags; 4416 4417 /* Reset called during driver load? GuC not yet initialised! */ 4418 if (unlikely(!guc_submission_initialized(guc))) 4419 return; 4420 4421 xa_lock_irqsave(&guc->context_lookup, flags); 4422 xa_for_each(&guc->context_lookup, index, ce) { 4423 if (!kref_get_unless_zero(&ce->ref)) 4424 continue; 4425 4426 xa_unlock(&guc->context_lookup); 4427 4428 if (!intel_context_is_pinned(ce)) 4429 goto next; 4430 4431 if (intel_engine_is_virtual(ce->engine)) { 4432 if (!(ce->engine->mask & engine->mask)) 4433 goto next; 4434 } else { 4435 if (ce->engine != engine) 4436 goto next; 4437 } 4438 4439 spin_lock(&ce->guc_state.lock); 4440 intel_engine_dump_active_requests(&ce->guc_state.requests, 4441 hung_rq, m); 4442 spin_unlock(&ce->guc_state.lock); 4443 4444 next: 4445 intel_context_put(ce); 4446 xa_lock(&guc->context_lookup); 4447 } 4448 xa_unlock_irqrestore(&guc->context_lookup, flags); 4449 } 4450 4451 void intel_guc_submission_print_info(struct intel_guc *guc, 4452 struct drm_printer *p) 4453 { 4454 struct i915_sched_engine *sched_engine = guc->sched_engine; 4455 struct rb_node *rb; 4456 unsigned long flags; 4457 4458 if (!sched_engine) 4459 return; 4460 4461 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4462 atomic_read(&guc->outstanding_submission_g2h)); 4463 drm_printf(p, "GuC tasklet count: %u\n\n", 4464 atomic_read(&sched_engine->tasklet.count)); 4465 4466 spin_lock_irqsave(&sched_engine->lock, flags); 4467 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4468 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4469 struct i915_priolist *pl = to_priolist(rb); 4470 struct i915_request *rq; 4471 4472 priolist_for_each_request(rq, pl) 4473 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4474 rq->context->guc_id.id, 4475 rq->fence.seqno); 4476 } 4477 spin_unlock_irqrestore(&sched_engine->lock, flags); 4478 drm_printf(p, "\n"); 4479 } 4480 4481 static inline void guc_log_context_priority(struct drm_printer *p, 4482 struct intel_context *ce) 4483 { 4484 int i; 4485 4486 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4487 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4488 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4489 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4490 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4491 i, ce->guc_state.prio_count[i]); 4492 } 4493 drm_printf(p, "\n"); 4494 } 4495 4496 static inline void guc_log_context(struct drm_printer *p, 4497 struct intel_context *ce) 4498 { 4499 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4500 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4501 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4502 ce->ring->head, 4503 ce->lrc_reg_state[CTX_RING_HEAD]); 4504 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4505 ce->ring->tail, 4506 ce->lrc_reg_state[CTX_RING_TAIL]); 4507 drm_printf(p, "\t\tContext Pin Count: %u\n", 4508 atomic_read(&ce->pin_count)); 4509 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4510 atomic_read(&ce->guc_id.ref)); 4511 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 4512 ce->guc_state.sched_state); 4513 } 4514 4515 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4516 struct drm_printer *p) 4517 { 4518 struct intel_context *ce; 4519 unsigned long index; 4520 unsigned long flags; 4521 4522 xa_lock_irqsave(&guc->context_lookup, flags); 4523 xa_for_each(&guc->context_lookup, index, ce) { 4524 GEM_BUG_ON(intel_context_is_child(ce)); 4525 4526 guc_log_context(p, ce); 4527 guc_log_context_priority(p, ce); 4528 4529 if (intel_context_is_parent(ce)) { 4530 struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); 4531 struct intel_context *child; 4532 4533 drm_printf(p, "\t\tNumber children: %u\n", 4534 ce->parallel.number_children); 4535 drm_printf(p, "\t\tWQI Head: %u\n", 4536 READ_ONCE(wq_desc->head)); 4537 drm_printf(p, "\t\tWQI Tail: %u\n", 4538 READ_ONCE(wq_desc->tail)); 4539 drm_printf(p, "\t\tWQI Status: %u\n\n", 4540 READ_ONCE(wq_desc->wq_status)); 4541 4542 if (ce->engine->emit_bb_start == 4543 emit_bb_start_parent_no_preempt_mid_batch) { 4544 u8 i; 4545 4546 drm_printf(p, "\t\tChildren Go: %u\n\n", 4547 get_children_go_value(ce)); 4548 for (i = 0; i < ce->parallel.number_children; ++i) 4549 drm_printf(p, "\t\tChildren Join: %u\n", 4550 get_children_join_value(ce, i)); 4551 } 4552 4553 for_each_child(ce, child) 4554 guc_log_context(p, child); 4555 } 4556 } 4557 xa_unlock_irqrestore(&guc->context_lookup, flags); 4558 } 4559 4560 static inline u32 get_children_go_addr(struct intel_context *ce) 4561 { 4562 GEM_BUG_ON(!intel_context_is_parent(ce)); 4563 4564 return i915_ggtt_offset(ce->state) + 4565 __get_parent_scratch_offset(ce) + 4566 offsetof(struct parent_scratch, go.semaphore); 4567 } 4568 4569 static inline u32 get_children_join_addr(struct intel_context *ce, 4570 u8 child_index) 4571 { 4572 GEM_BUG_ON(!intel_context_is_parent(ce)); 4573 4574 return i915_ggtt_offset(ce->state) + 4575 __get_parent_scratch_offset(ce) + 4576 offsetof(struct parent_scratch, join[child_index].semaphore); 4577 } 4578 4579 #define PARENT_GO_BB 1 4580 #define PARENT_GO_FINI_BREADCRUMB 0 4581 #define CHILD_GO_BB 1 4582 #define CHILD_GO_FINI_BREADCRUMB 0 4583 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4584 u64 offset, u32 len, 4585 const unsigned int flags) 4586 { 4587 struct intel_context *ce = rq->context; 4588 u32 *cs; 4589 u8 i; 4590 4591 GEM_BUG_ON(!intel_context_is_parent(ce)); 4592 4593 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 4594 if (IS_ERR(cs)) 4595 return PTR_ERR(cs); 4596 4597 /* Wait on children */ 4598 for (i = 0; i < ce->parallel.number_children; ++i) { 4599 *cs++ = (MI_SEMAPHORE_WAIT | 4600 MI_SEMAPHORE_GLOBAL_GTT | 4601 MI_SEMAPHORE_POLL | 4602 MI_SEMAPHORE_SAD_EQ_SDD); 4603 *cs++ = PARENT_GO_BB; 4604 *cs++ = get_children_join_addr(ce, i); 4605 *cs++ = 0; 4606 } 4607 4608 /* Turn off preemption */ 4609 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4610 *cs++ = MI_NOOP; 4611 4612 /* Tell children go */ 4613 cs = gen8_emit_ggtt_write(cs, 4614 CHILD_GO_BB, 4615 get_children_go_addr(ce), 4616 0); 4617 4618 /* Jump to batch */ 4619 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4620 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4621 *cs++ = lower_32_bits(offset); 4622 *cs++ = upper_32_bits(offset); 4623 *cs++ = MI_NOOP; 4624 4625 intel_ring_advance(rq, cs); 4626 4627 return 0; 4628 } 4629 4630 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4631 u64 offset, u32 len, 4632 const unsigned int flags) 4633 { 4634 struct intel_context *ce = rq->context; 4635 struct intel_context *parent = intel_context_to_parent(ce); 4636 u32 *cs; 4637 4638 GEM_BUG_ON(!intel_context_is_child(ce)); 4639 4640 cs = intel_ring_begin(rq, 12); 4641 if (IS_ERR(cs)) 4642 return PTR_ERR(cs); 4643 4644 /* Signal parent */ 4645 cs = gen8_emit_ggtt_write(cs, 4646 PARENT_GO_BB, 4647 get_children_join_addr(parent, 4648 ce->parallel.child_index), 4649 0); 4650 4651 /* Wait on parent for go */ 4652 *cs++ = (MI_SEMAPHORE_WAIT | 4653 MI_SEMAPHORE_GLOBAL_GTT | 4654 MI_SEMAPHORE_POLL | 4655 MI_SEMAPHORE_SAD_EQ_SDD); 4656 *cs++ = CHILD_GO_BB; 4657 *cs++ = get_children_go_addr(parent); 4658 *cs++ = 0; 4659 4660 /* Turn off preemption */ 4661 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4662 4663 /* Jump to batch */ 4664 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4665 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4666 *cs++ = lower_32_bits(offset); 4667 *cs++ = upper_32_bits(offset); 4668 4669 intel_ring_advance(rq, cs); 4670 4671 return 0; 4672 } 4673 4674 static u32 * 4675 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4676 u32 *cs) 4677 { 4678 struct intel_context *ce = rq->context; 4679 u8 i; 4680 4681 GEM_BUG_ON(!intel_context_is_parent(ce)); 4682 4683 /* Wait on children */ 4684 for (i = 0; i < ce->parallel.number_children; ++i) { 4685 *cs++ = (MI_SEMAPHORE_WAIT | 4686 MI_SEMAPHORE_GLOBAL_GTT | 4687 MI_SEMAPHORE_POLL | 4688 MI_SEMAPHORE_SAD_EQ_SDD); 4689 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4690 *cs++ = get_children_join_addr(ce, i); 4691 *cs++ = 0; 4692 } 4693 4694 /* Turn on preemption */ 4695 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4696 *cs++ = MI_NOOP; 4697 4698 /* Tell children go */ 4699 cs = gen8_emit_ggtt_write(cs, 4700 CHILD_GO_FINI_BREADCRUMB, 4701 get_children_go_addr(ce), 4702 0); 4703 4704 return cs; 4705 } 4706 4707 /* 4708 * If this true, a submission of multi-lrc requests had an error and the 4709 * requests need to be skipped. The front end (execuf IOCTL) should've called 4710 * i915_request_skip which squashes the BB but we still need to emit the fini 4711 * breadrcrumbs seqno write. At this point we don't know how many of the 4712 * requests in the multi-lrc submission were generated so we can't do the 4713 * handshake between the parent and children (e.g. if 4 requests should be 4714 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4715 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4716 * has occurred on any of the requests in submission / relationship. 4717 */ 4718 static inline bool skip_handshake(struct i915_request *rq) 4719 { 4720 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4721 } 4722 4723 #define NON_SKIP_LEN 6 4724 static u32 * 4725 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4726 u32 *cs) 4727 { 4728 struct intel_context *ce = rq->context; 4729 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4730 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4731 4732 GEM_BUG_ON(!intel_context_is_parent(ce)); 4733 4734 if (unlikely(skip_handshake(rq))) { 4735 /* 4736 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4737 * the NON_SKIP_LEN comes from the length of the emits below. 4738 */ 4739 memset(cs, 0, sizeof(u32) * 4740 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4741 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4742 } else { 4743 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 4744 } 4745 4746 /* Emit fini breadcrumb */ 4747 before_fini_breadcrumb_user_interrupt_cs = cs; 4748 cs = gen8_emit_ggtt_write(cs, 4749 rq->fence.seqno, 4750 i915_request_active_timeline(rq)->hwsp_offset, 4751 0); 4752 4753 /* User interrupt */ 4754 *cs++ = MI_USER_INTERRUPT; 4755 *cs++ = MI_NOOP; 4756 4757 /* Ensure our math for skip + emit is correct */ 4758 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 4759 cs); 4760 GEM_BUG_ON(start_fini_breadcrumb_cs + 4761 ce->engine->emit_fini_breadcrumb_dw != cs); 4762 4763 rq->tail = intel_ring_offset(rq, cs); 4764 4765 return cs; 4766 } 4767 4768 static u32 * 4769 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4770 u32 *cs) 4771 { 4772 struct intel_context *ce = rq->context; 4773 struct intel_context *parent = intel_context_to_parent(ce); 4774 4775 GEM_BUG_ON(!intel_context_is_child(ce)); 4776 4777 /* Turn on preemption */ 4778 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4779 *cs++ = MI_NOOP; 4780 4781 /* Signal parent */ 4782 cs = gen8_emit_ggtt_write(cs, 4783 PARENT_GO_FINI_BREADCRUMB, 4784 get_children_join_addr(parent, 4785 ce->parallel.child_index), 4786 0); 4787 4788 /* Wait parent on for go */ 4789 *cs++ = (MI_SEMAPHORE_WAIT | 4790 MI_SEMAPHORE_GLOBAL_GTT | 4791 MI_SEMAPHORE_POLL | 4792 MI_SEMAPHORE_SAD_EQ_SDD); 4793 *cs++ = CHILD_GO_FINI_BREADCRUMB; 4794 *cs++ = get_children_go_addr(parent); 4795 *cs++ = 0; 4796 4797 return cs; 4798 } 4799 4800 static u32 * 4801 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4802 u32 *cs) 4803 { 4804 struct intel_context *ce = rq->context; 4805 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 4806 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 4807 4808 GEM_BUG_ON(!intel_context_is_child(ce)); 4809 4810 if (unlikely(skip_handshake(rq))) { 4811 /* 4812 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 4813 * the NON_SKIP_LEN comes from the length of the emits below. 4814 */ 4815 memset(cs, 0, sizeof(u32) * 4816 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 4817 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 4818 } else { 4819 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 4820 } 4821 4822 /* Emit fini breadcrumb */ 4823 before_fini_breadcrumb_user_interrupt_cs = cs; 4824 cs = gen8_emit_ggtt_write(cs, 4825 rq->fence.seqno, 4826 i915_request_active_timeline(rq)->hwsp_offset, 4827 0); 4828 4829 /* User interrupt */ 4830 *cs++ = MI_USER_INTERRUPT; 4831 *cs++ = MI_NOOP; 4832 4833 /* Ensure our math for skip + emit is correct */ 4834 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 4835 cs); 4836 GEM_BUG_ON(start_fini_breadcrumb_cs + 4837 ce->engine->emit_fini_breadcrumb_dw != cs); 4838 4839 rq->tail = intel_ring_offset(rq, cs); 4840 4841 return cs; 4842 } 4843 4844 #undef NON_SKIP_LEN 4845 4846 static struct intel_context * 4847 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 4848 unsigned long flags) 4849 { 4850 struct guc_virtual_engine *ve; 4851 struct intel_guc *guc; 4852 unsigned int n; 4853 int err; 4854 4855 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 4856 if (!ve) 4857 return ERR_PTR(-ENOMEM); 4858 4859 guc = &siblings[0]->gt->uc.guc; 4860 4861 ve->base.i915 = siblings[0]->i915; 4862 ve->base.gt = siblings[0]->gt; 4863 ve->base.uncore = siblings[0]->uncore; 4864 ve->base.id = -1; 4865 4866 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 4867 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4868 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4869 ve->base.saturated = ALL_ENGINES; 4870 4871 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 4872 4873 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 4874 4875 ve->base.cops = &virtual_guc_context_ops; 4876 ve->base.request_alloc = guc_request_alloc; 4877 ve->base.bump_serial = virtual_guc_bump_serial; 4878 4879 ve->base.submit_request = guc_submit_request; 4880 4881 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 4882 4883 intel_context_init(&ve->context, &ve->base); 4884 4885 for (n = 0; n < count; n++) { 4886 struct intel_engine_cs *sibling = siblings[n]; 4887 4888 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 4889 if (sibling->mask & ve->base.mask) { 4890 DRM_DEBUG("duplicate %s entry in load balancer\n", 4891 sibling->name); 4892 err = -EINVAL; 4893 goto err_put; 4894 } 4895 4896 ve->base.mask |= sibling->mask; 4897 ve->base.logical_mask |= sibling->logical_mask; 4898 4899 if (n != 0 && ve->base.class != sibling->class) { 4900 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 4901 sibling->class, ve->base.class); 4902 err = -EINVAL; 4903 goto err_put; 4904 } else if (n == 0) { 4905 ve->base.class = sibling->class; 4906 ve->base.uabi_class = sibling->uabi_class; 4907 snprintf(ve->base.name, sizeof(ve->base.name), 4908 "v%dx%d", ve->base.class, count); 4909 ve->base.context_size = sibling->context_size; 4910 4911 ve->base.add_active_request = 4912 sibling->add_active_request; 4913 ve->base.remove_active_request = 4914 sibling->remove_active_request; 4915 ve->base.emit_bb_start = sibling->emit_bb_start; 4916 ve->base.emit_flush = sibling->emit_flush; 4917 ve->base.emit_init_breadcrumb = 4918 sibling->emit_init_breadcrumb; 4919 ve->base.emit_fini_breadcrumb = 4920 sibling->emit_fini_breadcrumb; 4921 ve->base.emit_fini_breadcrumb_dw = 4922 sibling->emit_fini_breadcrumb_dw; 4923 ve->base.breadcrumbs = 4924 intel_breadcrumbs_get(sibling->breadcrumbs); 4925 4926 ve->base.flags |= sibling->flags; 4927 4928 ve->base.props.timeslice_duration_ms = 4929 sibling->props.timeslice_duration_ms; 4930 ve->base.props.preempt_timeout_ms = 4931 sibling->props.preempt_timeout_ms; 4932 } 4933 } 4934 4935 return &ve->context; 4936 4937 err_put: 4938 intel_context_put(&ve->context); 4939 return ERR_PTR(err); 4940 } 4941 4942 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 4943 { 4944 struct intel_engine_cs *engine; 4945 intel_engine_mask_t tmp, mask = ve->mask; 4946 4947 for_each_engine_masked(engine, ve->gt, mask, tmp) 4948 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 4949 return true; 4950 4951 return false; 4952 } 4953 4954 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4955 #include "selftest_guc.c" 4956 #include "selftest_guc_multi_lrc.c" 4957 #endif 4958