1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_gpu_commands.h" 15 #include "gt/intel_gt.h" 16 #include "gt/intel_gt_irq.h" 17 #include "gt/intel_gt_pm.h" 18 #include "gt/intel_gt_requests.h" 19 #include "gt/intel_lrc.h" 20 #include "gt/intel_lrc_reg.h" 21 #include "gt/intel_mocs.h" 22 #include "gt/intel_ring.h" 23 24 #include "intel_guc_submission.h" 25 26 #include "i915_drv.h" 27 #include "i915_trace.h" 28 29 /** 30 * DOC: GuC-based command submission 31 * 32 * The Scratch registers: 33 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 34 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 35 * triggers an interrupt on the GuC via another register write (0xC4C8). 36 * Firmware writes a success/fail code back to the action register after 37 * processes the request. The kernel driver polls waiting for this update and 38 * then proceeds. 39 * 40 * Command Transport buffers (CTBs): 41 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 42 * - G2H) are a message interface between the i915 and GuC. 43 * 44 * Context registration: 45 * Before a context can be submitted it must be registered with the GuC via a 46 * H2G. A unique guc_id is associated with each context. The context is either 47 * registered at request creation time (normal operation) or at submission time 48 * (abnormal operation, e.g. after a reset). 49 * 50 * Context submission: 51 * The i915 updates the LRC tail value in memory. The i915 must enable the 52 * scheduling of the context within the GuC for the GuC to actually consider it. 53 * Therefore, the first time a disabled context is submitted we use a schedule 54 * enable H2G, while follow up submissions are done via the context submit H2G, 55 * which informs the GuC that a previously enabled context has new work 56 * available. 57 * 58 * Context unpin: 59 * To unpin a context a H2G is used to disable scheduling. When the 60 * corresponding G2H returns indicating the scheduling disable operation has 61 * completed it is safe to unpin the context. While a disable is in flight it 62 * isn't safe to resubmit the context so a fence is used to stall all future 63 * requests of that context until the G2H is returned. 64 * 65 * Context deregistration: 66 * Before a context can be destroyed or if we steal its guc_id we must 67 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 68 * safe to submit anything to this guc_id until the deregister completes so a 69 * fence is used to stall all requests associated with this guc_id until the 70 * corresponding G2H returns indicating the guc_id has been deregistered. 71 * 72 * submission_state.guc_ids: 73 * Unique number associated with private GuC context data passed in during 74 * context registration / submission / deregistration. 64k available. Simple ida 75 * is used for allocation. 76 * 77 * Stealing guc_ids: 78 * If no guc_ids are available they can be stolen from another context at 79 * request creation time if that context is unpinned. If a guc_id can't be found 80 * we punt this problem to the user as we believe this is near impossible to hit 81 * during normal use cases. 82 * 83 * Locking: 84 * In the GuC submission code we have 3 basic spin locks which protect 85 * everything. Details about each below. 86 * 87 * sched_engine->lock 88 * This is the submission lock for all contexts that share an i915 schedule 89 * engine (sched_engine), thus only one of the contexts which share a 90 * sched_engine can be submitting at a time. Currently only one sched_engine is 91 * used for all of GuC submission but that could change in the future. 92 * 93 * guc->submission_state.lock 94 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 95 * list. 96 * 97 * ce->guc_state.lock 98 * Protects everything under ce->guc_state. Ensures that a context is in the 99 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 100 * on a disabled context (bad idea), we don't issue a schedule enable when a 101 * schedule disable is in flight, etc... Also protects list of inflight requests 102 * on the context and the priority management state. Lock is individual to each 103 * context. 104 * 105 * Lock ordering rules: 106 * sched_engine->lock -> ce->guc_state.lock 107 * guc->submission_state.lock -> ce->guc_state.lock 108 * 109 * Reset races: 110 * When a full GT reset is triggered it is assumed that some G2H responses to 111 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 112 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 113 * contexts, release guc_ids, etc...). When this occurs we can scrub the 114 * context state and cleanup appropriately, however this is quite racey. 115 * To avoid races, the reset code must disable submission before scrubbing for 116 * the missing G2H, while the submission code must check for submission being 117 * disabled and skip sending H2Gs and updating context states when it is. Both 118 * sides must also make sure to hold the relevant locks. 119 */ 120 121 /* GuC Virtual Engine */ 122 struct guc_virtual_engine { 123 struct intel_engine_cs base; 124 struct intel_context context; 125 }; 126 127 static struct intel_context * 128 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 129 unsigned long flags); 130 131 static struct intel_context * 132 guc_create_parallel(struct intel_engine_cs **engines, 133 unsigned int num_siblings, 134 unsigned int width); 135 136 #define GUC_REQUEST_SIZE 64 /* bytes */ 137 138 /* 139 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 140 * per the GuC submission interface. A different allocation algorithm is used 141 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 142 * partition the guc_id space. We believe the number of multi-lrc contexts in 143 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 144 * multi-lrc. 145 */ 146 #define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16) 147 148 /* 149 * Below is a set of functions which control the GuC scheduling state which 150 * require a lock. 151 */ 152 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 153 #define SCHED_STATE_DESTROYED BIT(1) 154 #define SCHED_STATE_PENDING_DISABLE BIT(2) 155 #define SCHED_STATE_BANNED BIT(3) 156 #define SCHED_STATE_ENABLED BIT(4) 157 #define SCHED_STATE_PENDING_ENABLE BIT(5) 158 #define SCHED_STATE_REGISTERED BIT(6) 159 #define SCHED_STATE_BLOCKED_SHIFT 7 160 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 161 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 162 163 static inline void init_sched_state(struct intel_context *ce) 164 { 165 lockdep_assert_held(&ce->guc_state.lock); 166 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 167 } 168 169 __maybe_unused 170 static bool sched_state_is_init(struct intel_context *ce) 171 { 172 /* 173 * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after 174 * suspend. 175 */ 176 return !(ce->guc_state.sched_state &= 177 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 178 } 179 180 static inline bool 181 context_wait_for_deregister_to_register(struct intel_context *ce) 182 { 183 return ce->guc_state.sched_state & 184 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 185 } 186 187 static inline void 188 set_context_wait_for_deregister_to_register(struct intel_context *ce) 189 { 190 lockdep_assert_held(&ce->guc_state.lock); 191 ce->guc_state.sched_state |= 192 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 193 } 194 195 static inline void 196 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 197 { 198 lockdep_assert_held(&ce->guc_state.lock); 199 ce->guc_state.sched_state &= 200 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 201 } 202 203 static inline bool 204 context_destroyed(struct intel_context *ce) 205 { 206 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 207 } 208 209 static inline void 210 set_context_destroyed(struct intel_context *ce) 211 { 212 lockdep_assert_held(&ce->guc_state.lock); 213 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 214 } 215 216 static inline bool context_pending_disable(struct intel_context *ce) 217 { 218 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 219 } 220 221 static inline void set_context_pending_disable(struct intel_context *ce) 222 { 223 lockdep_assert_held(&ce->guc_state.lock); 224 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 225 } 226 227 static inline void clr_context_pending_disable(struct intel_context *ce) 228 { 229 lockdep_assert_held(&ce->guc_state.lock); 230 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 231 } 232 233 static inline bool context_banned(struct intel_context *ce) 234 { 235 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 236 } 237 238 static inline void set_context_banned(struct intel_context *ce) 239 { 240 lockdep_assert_held(&ce->guc_state.lock); 241 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 242 } 243 244 static inline void clr_context_banned(struct intel_context *ce) 245 { 246 lockdep_assert_held(&ce->guc_state.lock); 247 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 248 } 249 250 static inline bool context_enabled(struct intel_context *ce) 251 { 252 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 253 } 254 255 static inline void set_context_enabled(struct intel_context *ce) 256 { 257 lockdep_assert_held(&ce->guc_state.lock); 258 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 259 } 260 261 static inline void clr_context_enabled(struct intel_context *ce) 262 { 263 lockdep_assert_held(&ce->guc_state.lock); 264 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 265 } 266 267 static inline bool context_pending_enable(struct intel_context *ce) 268 { 269 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 270 } 271 272 static inline void set_context_pending_enable(struct intel_context *ce) 273 { 274 lockdep_assert_held(&ce->guc_state.lock); 275 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 276 } 277 278 static inline void clr_context_pending_enable(struct intel_context *ce) 279 { 280 lockdep_assert_held(&ce->guc_state.lock); 281 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 282 } 283 284 static inline bool context_registered(struct intel_context *ce) 285 { 286 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 287 } 288 289 static inline void set_context_registered(struct intel_context *ce) 290 { 291 lockdep_assert_held(&ce->guc_state.lock); 292 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 293 } 294 295 static inline void clr_context_registered(struct intel_context *ce) 296 { 297 lockdep_assert_held(&ce->guc_state.lock); 298 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 299 } 300 301 static inline u32 context_blocked(struct intel_context *ce) 302 { 303 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 304 SCHED_STATE_BLOCKED_SHIFT; 305 } 306 307 static inline void incr_context_blocked(struct intel_context *ce) 308 { 309 lockdep_assert_held(&ce->guc_state.lock); 310 311 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 312 313 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 314 } 315 316 static inline void decr_context_blocked(struct intel_context *ce) 317 { 318 lockdep_assert_held(&ce->guc_state.lock); 319 320 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 321 322 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 323 } 324 325 static inline bool context_has_committed_requests(struct intel_context *ce) 326 { 327 return !!ce->guc_state.number_committed_requests; 328 } 329 330 static inline void incr_context_committed_requests(struct intel_context *ce) 331 { 332 lockdep_assert_held(&ce->guc_state.lock); 333 ++ce->guc_state.number_committed_requests; 334 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 335 } 336 337 static inline void decr_context_committed_requests(struct intel_context *ce) 338 { 339 lockdep_assert_held(&ce->guc_state.lock); 340 --ce->guc_state.number_committed_requests; 341 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 342 } 343 344 static struct intel_context * 345 request_to_scheduling_context(struct i915_request *rq) 346 { 347 return intel_context_to_parent(rq->context); 348 } 349 350 static inline bool context_guc_id_invalid(struct intel_context *ce) 351 { 352 return ce->guc_id.id == GUC_INVALID_LRC_ID; 353 } 354 355 static inline void set_context_guc_id_invalid(struct intel_context *ce) 356 { 357 ce->guc_id.id = GUC_INVALID_LRC_ID; 358 } 359 360 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 361 { 362 return &ce->engine->gt->uc.guc; 363 } 364 365 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 366 { 367 return rb_entry(rb, struct i915_priolist, node); 368 } 369 370 /* 371 * When using multi-lrc submission a scratch memory area is reserved in the 372 * parent's context state for the process descriptor, work queue, and handshake 373 * between the parent + children contexts to insert safe preemption points 374 * between each of the BBs. Currently the scratch area is sized to a page. 375 * 376 * The layout of this scratch area is below: 377 * 0 guc_process_desc 378 * + sizeof(struct guc_process_desc) child go 379 * + CACHELINE_BYTES child join[0] 380 * ... 381 * + CACHELINE_BYTES child join[n - 1] 382 * ... unused 383 * PARENT_SCRATCH_SIZE / 2 work queue start 384 * ... work queue 385 * PARENT_SCRATCH_SIZE - 1 work queue end 386 */ 387 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 388 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 389 390 struct sync_semaphore { 391 u32 semaphore; 392 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 393 }; 394 395 struct parent_scratch { 396 struct guc_process_desc pdesc; 397 398 struct sync_semaphore go; 399 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 400 401 u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - 402 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 403 404 u32 wq[WQ_SIZE / sizeof(u32)]; 405 }; 406 407 static u32 __get_parent_scratch_offset(struct intel_context *ce) 408 { 409 GEM_BUG_ON(!ce->parallel.guc.parent_page); 410 411 return ce->parallel.guc.parent_page * PAGE_SIZE; 412 } 413 414 static u32 __get_wq_offset(struct intel_context *ce) 415 { 416 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 417 418 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 419 } 420 421 static struct parent_scratch * 422 __get_parent_scratch(struct intel_context *ce) 423 { 424 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 425 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 426 427 /* 428 * Need to subtract LRC_STATE_OFFSET here as the 429 * parallel.guc.parent_page is the offset into ce->state while 430 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 431 */ 432 return (struct parent_scratch *) 433 (ce->lrc_reg_state + 434 ((__get_parent_scratch_offset(ce) - 435 LRC_STATE_OFFSET) / sizeof(u32))); 436 } 437 438 static struct guc_process_desc * 439 __get_process_desc(struct intel_context *ce) 440 { 441 struct parent_scratch *ps = __get_parent_scratch(ce); 442 443 return &ps->pdesc; 444 } 445 446 static u32 *get_wq_pointer(struct guc_process_desc *desc, 447 struct intel_context *ce, 448 u32 wqi_size) 449 { 450 /* 451 * Check for space in work queue. Caching a value of head pointer in 452 * intel_context structure in order reduce the number accesses to shared 453 * GPU memory which may be across a PCIe bus. 454 */ 455 #define AVAILABLE_SPACE \ 456 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 457 if (wqi_size > AVAILABLE_SPACE) { 458 ce->parallel.guc.wqi_head = READ_ONCE(desc->head); 459 460 if (wqi_size > AVAILABLE_SPACE) 461 return NULL; 462 } 463 #undef AVAILABLE_SPACE 464 465 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 466 } 467 468 static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) 469 { 470 struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; 471 472 GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS); 473 474 return &base[index]; 475 } 476 477 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 478 { 479 struct intel_context *ce = xa_load(&guc->context_lookup, id); 480 481 GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS); 482 483 return ce; 484 } 485 486 static int guc_lrc_desc_pool_create(struct intel_guc *guc) 487 { 488 u32 size; 489 int ret; 490 491 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * 492 GUC_MAX_LRC_DESCRIPTORS); 493 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, 494 (void **)&guc->lrc_desc_pool_vaddr); 495 if (ret) 496 return ret; 497 498 return 0; 499 } 500 501 static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) 502 { 503 guc->lrc_desc_pool_vaddr = NULL; 504 i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); 505 } 506 507 static inline bool guc_submission_initialized(struct intel_guc *guc) 508 { 509 return !!guc->lrc_desc_pool_vaddr; 510 } 511 512 static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) 513 { 514 if (likely(guc_submission_initialized(guc))) { 515 struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); 516 unsigned long flags; 517 518 memset(desc, 0, sizeof(*desc)); 519 520 /* 521 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 522 * the lower level functions directly. 523 */ 524 xa_lock_irqsave(&guc->context_lookup, flags); 525 __xa_erase(&guc->context_lookup, id); 526 xa_unlock_irqrestore(&guc->context_lookup, flags); 527 } 528 } 529 530 static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) 531 { 532 return __get_context(guc, id); 533 } 534 535 static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, 536 struct intel_context *ce) 537 { 538 unsigned long flags; 539 540 /* 541 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 542 * lower level functions directly. 543 */ 544 xa_lock_irqsave(&guc->context_lookup, flags); 545 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 546 xa_unlock_irqrestore(&guc->context_lookup, flags); 547 } 548 549 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 550 { 551 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 552 wake_up_all(&guc->ct.wq); 553 } 554 555 static int guc_submission_send_busy_loop(struct intel_guc *guc, 556 const u32 *action, 557 u32 len, 558 u32 g2h_len_dw, 559 bool loop) 560 { 561 /* 562 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 563 * so we don't handle the case where we don't get a reply because we 564 * aborted the send due to the channel being busy. 565 */ 566 GEM_BUG_ON(g2h_len_dw && !loop); 567 568 if (g2h_len_dw) 569 atomic_inc(&guc->outstanding_submission_g2h); 570 571 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 572 } 573 574 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 575 atomic_t *wait_var, 576 bool interruptible, 577 long timeout) 578 { 579 const int state = interruptible ? 580 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 581 DEFINE_WAIT(wait); 582 583 might_sleep(); 584 GEM_BUG_ON(timeout < 0); 585 586 if (!atomic_read(wait_var)) 587 return 0; 588 589 if (!timeout) 590 return -ETIME; 591 592 for (;;) { 593 prepare_to_wait(&guc->ct.wq, &wait, state); 594 595 if (!atomic_read(wait_var)) 596 break; 597 598 if (signal_pending_state(state, current)) { 599 timeout = -EINTR; 600 break; 601 } 602 603 if (!timeout) { 604 timeout = -ETIME; 605 break; 606 } 607 608 timeout = io_schedule_timeout(timeout); 609 } 610 finish_wait(&guc->ct.wq, &wait); 611 612 return (timeout < 0) ? timeout : 0; 613 } 614 615 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 616 { 617 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 618 return 0; 619 620 return intel_guc_wait_for_pending_msg(guc, 621 &guc->outstanding_submission_g2h, 622 true, timeout); 623 } 624 625 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); 626 627 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 628 { 629 int err = 0; 630 struct intel_context *ce = request_to_scheduling_context(rq); 631 u32 action[3]; 632 int len = 0; 633 u32 g2h_len_dw = 0; 634 bool enabled; 635 636 lockdep_assert_held(&rq->engine->sched_engine->lock); 637 638 /* 639 * Corner case where requests were sitting in the priority list or a 640 * request resubmitted after the context was banned. 641 */ 642 if (unlikely(intel_context_is_banned(ce))) { 643 i915_request_put(i915_request_mark_eio(rq)); 644 intel_engine_signal_breadcrumbs(ce->engine); 645 return 0; 646 } 647 648 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 649 GEM_BUG_ON(context_guc_id_invalid(ce)); 650 651 spin_lock(&ce->guc_state.lock); 652 653 /* 654 * The request / context will be run on the hardware when scheduling 655 * gets enabled in the unblock. For multi-lrc we still submit the 656 * context to move the LRC tails. 657 */ 658 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 659 goto out; 660 661 enabled = context_enabled(ce) || context_blocked(ce); 662 663 if (!enabled) { 664 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 665 action[len++] = ce->guc_id.id; 666 action[len++] = GUC_CONTEXT_ENABLE; 667 set_context_pending_enable(ce); 668 intel_context_get(ce); 669 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 670 } else { 671 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 672 action[len++] = ce->guc_id.id; 673 } 674 675 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 676 if (!enabled && !err) { 677 trace_intel_context_sched_enable(ce); 678 atomic_inc(&guc->outstanding_submission_g2h); 679 set_context_enabled(ce); 680 681 /* 682 * Without multi-lrc KMD does the submission step (moving the 683 * lrc tail) so enabling scheduling is sufficient to submit the 684 * context. This isn't the case in multi-lrc submission as the 685 * GuC needs to move the tails, hence the need for another H2G 686 * to submit a multi-lrc context after enabling scheduling. 687 */ 688 if (intel_context_is_parent(ce)) { 689 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 690 err = intel_guc_send_nb(guc, action, len - 1, 0); 691 } 692 } else if (!enabled) { 693 clr_context_pending_enable(ce); 694 intel_context_put(ce); 695 } 696 if (likely(!err)) 697 trace_i915_request_guc_submit(rq); 698 699 out: 700 spin_unlock(&ce->guc_state.lock); 701 return err; 702 } 703 704 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 705 { 706 int ret = __guc_add_request(guc, rq); 707 708 if (unlikely(ret == -EBUSY)) { 709 guc->stalled_request = rq; 710 guc->submission_stall_reason = STALL_ADD_REQUEST; 711 } 712 713 return ret; 714 } 715 716 static inline void guc_set_lrc_tail(struct i915_request *rq) 717 { 718 rq->context->lrc_reg_state[CTX_RING_TAIL] = 719 intel_ring_set_tail(rq->ring, rq->tail); 720 } 721 722 static inline int rq_prio(const struct i915_request *rq) 723 { 724 return rq->sched.attr.priority; 725 } 726 727 static bool is_multi_lrc_rq(struct i915_request *rq) 728 { 729 return intel_context_is_parallel(rq->context); 730 } 731 732 static bool can_merge_rq(struct i915_request *rq, 733 struct i915_request *last) 734 { 735 return request_to_scheduling_context(rq) == 736 request_to_scheduling_context(last); 737 } 738 739 static u32 wq_space_until_wrap(struct intel_context *ce) 740 { 741 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 742 } 743 744 static void write_wqi(struct guc_process_desc *desc, 745 struct intel_context *ce, 746 u32 wqi_size) 747 { 748 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 749 750 /* 751 * Ensure WQI are visible before updating tail 752 */ 753 intel_guc_write_barrier(ce_to_guc(ce)); 754 755 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 756 (WQ_SIZE - 1); 757 WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); 758 } 759 760 static int guc_wq_noop_append(struct intel_context *ce) 761 { 762 struct guc_process_desc *desc = __get_process_desc(ce); 763 u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); 764 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 765 766 if (!wqi) 767 return -EBUSY; 768 769 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 770 771 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 772 FIELD_PREP(WQ_LEN_MASK, len_dw); 773 ce->parallel.guc.wqi_tail = 0; 774 775 return 0; 776 } 777 778 static int __guc_wq_item_append(struct i915_request *rq) 779 { 780 struct intel_context *ce = request_to_scheduling_context(rq); 781 struct intel_context *child; 782 struct guc_process_desc *desc = __get_process_desc(ce); 783 unsigned int wqi_size = (ce->parallel.number_children + 4) * 784 sizeof(u32); 785 u32 *wqi; 786 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 787 int ret; 788 789 /* Ensure context is in correct state updating work queue */ 790 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 791 GEM_BUG_ON(context_guc_id_invalid(ce)); 792 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 793 GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)); 794 795 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 796 if (wqi_size > wq_space_until_wrap(ce)) { 797 ret = guc_wq_noop_append(ce); 798 if (ret) 799 return ret; 800 } 801 802 wqi = get_wq_pointer(desc, ce, wqi_size); 803 if (!wqi) 804 return -EBUSY; 805 806 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 807 808 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 809 FIELD_PREP(WQ_LEN_MASK, len_dw); 810 *wqi++ = ce->lrc.lrca; 811 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 812 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 813 *wqi++ = 0; /* fence_id */ 814 for_each_child(ce, child) 815 *wqi++ = child->ring->tail / sizeof(u64); 816 817 write_wqi(desc, ce, wqi_size); 818 819 return 0; 820 } 821 822 static int guc_wq_item_append(struct intel_guc *guc, 823 struct i915_request *rq) 824 { 825 struct intel_context *ce = request_to_scheduling_context(rq); 826 int ret = 0; 827 828 if (likely(!intel_context_is_banned(ce))) { 829 ret = __guc_wq_item_append(rq); 830 831 if (unlikely(ret == -EBUSY)) { 832 guc->stalled_request = rq; 833 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 834 } 835 } 836 837 return ret; 838 } 839 840 static bool multi_lrc_submit(struct i915_request *rq) 841 { 842 struct intel_context *ce = request_to_scheduling_context(rq); 843 844 intel_ring_set_tail(rq->ring, rq->tail); 845 846 /* 847 * We expect the front end (execbuf IOCTL) to set this flag on the last 848 * request generated from a multi-BB submission. This indicates to the 849 * backend (GuC interface) that we should submit this context thus 850 * submitting all the requests generated in parallel. 851 */ 852 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 853 intel_context_is_banned(ce); 854 } 855 856 static int guc_dequeue_one_context(struct intel_guc *guc) 857 { 858 struct i915_sched_engine * const sched_engine = guc->sched_engine; 859 struct i915_request *last = NULL; 860 bool submit = false; 861 struct rb_node *rb; 862 int ret; 863 864 lockdep_assert_held(&sched_engine->lock); 865 866 if (guc->stalled_request) { 867 submit = true; 868 last = guc->stalled_request; 869 870 switch (guc->submission_stall_reason) { 871 case STALL_REGISTER_CONTEXT: 872 goto register_context; 873 case STALL_MOVE_LRC_TAIL: 874 goto move_lrc_tail; 875 case STALL_ADD_REQUEST: 876 goto add_request; 877 default: 878 MISSING_CASE(guc->submission_stall_reason); 879 } 880 } 881 882 while ((rb = rb_first_cached(&sched_engine->queue))) { 883 struct i915_priolist *p = to_priolist(rb); 884 struct i915_request *rq, *rn; 885 886 priolist_for_each_request_consume(rq, rn, p) { 887 if (last && !can_merge_rq(rq, last)) 888 goto register_context; 889 890 list_del_init(&rq->sched.link); 891 892 __i915_request_submit(rq); 893 894 trace_i915_request_in(rq, 0); 895 last = rq; 896 897 if (is_multi_lrc_rq(rq)) { 898 /* 899 * We need to coalesce all multi-lrc requests in 900 * a relationship into a single H2G. We are 901 * guaranteed that all of these requests will be 902 * submitted sequentially. 903 */ 904 if (multi_lrc_submit(rq)) { 905 submit = true; 906 goto register_context; 907 } 908 } else { 909 submit = true; 910 } 911 } 912 913 rb_erase_cached(&p->node, &sched_engine->queue); 914 i915_priolist_free(p); 915 } 916 917 register_context: 918 if (submit) { 919 struct intel_context *ce = request_to_scheduling_context(last); 920 921 if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) && 922 !intel_context_is_banned(ce))) { 923 ret = guc_lrc_desc_pin(ce, false); 924 if (unlikely(ret == -EPIPE)) { 925 goto deadlk; 926 } else if (ret == -EBUSY) { 927 guc->stalled_request = last; 928 guc->submission_stall_reason = 929 STALL_REGISTER_CONTEXT; 930 goto schedule_tasklet; 931 } else if (ret != 0) { 932 GEM_WARN_ON(ret); /* Unexpected */ 933 goto deadlk; 934 } 935 } 936 937 move_lrc_tail: 938 if (is_multi_lrc_rq(last)) { 939 ret = guc_wq_item_append(guc, last); 940 if (ret == -EBUSY) { 941 goto schedule_tasklet; 942 } else if (ret != 0) { 943 GEM_WARN_ON(ret); /* Unexpected */ 944 goto deadlk; 945 } 946 } else { 947 guc_set_lrc_tail(last); 948 } 949 950 add_request: 951 ret = guc_add_request(guc, last); 952 if (unlikely(ret == -EPIPE)) { 953 goto deadlk; 954 } else if (ret == -EBUSY) { 955 goto schedule_tasklet; 956 } else if (ret != 0) { 957 GEM_WARN_ON(ret); /* Unexpected */ 958 goto deadlk; 959 } 960 } 961 962 guc->stalled_request = NULL; 963 guc->submission_stall_reason = STALL_NONE; 964 return submit; 965 966 deadlk: 967 sched_engine->tasklet.callback = NULL; 968 tasklet_disable_nosync(&sched_engine->tasklet); 969 return false; 970 971 schedule_tasklet: 972 tasklet_schedule(&sched_engine->tasklet); 973 return false; 974 } 975 976 static void guc_submission_tasklet(struct tasklet_struct *t) 977 { 978 struct i915_sched_engine *sched_engine = 979 from_tasklet(sched_engine, t, tasklet); 980 unsigned long flags; 981 bool loop; 982 983 spin_lock_irqsave(&sched_engine->lock, flags); 984 985 do { 986 loop = guc_dequeue_one_context(sched_engine->private_data); 987 } while (loop); 988 989 i915_sched_engine_reset_on_empty(sched_engine); 990 991 spin_unlock_irqrestore(&sched_engine->lock, flags); 992 } 993 994 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 995 { 996 if (iir & GT_RENDER_USER_INTERRUPT) 997 intel_engine_signal_breadcrumbs(engine); 998 } 999 1000 static void __guc_context_destroy(struct intel_context *ce); 1001 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1002 static void guc_signal_context_fence(struct intel_context *ce); 1003 static void guc_cancel_context_requests(struct intel_context *ce); 1004 static void guc_blocked_fence_complete(struct intel_context *ce); 1005 1006 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1007 { 1008 struct intel_context *ce; 1009 unsigned long index, flags; 1010 bool pending_disable, pending_enable, deregister, destroyed, banned; 1011 1012 xa_lock_irqsave(&guc->context_lookup, flags); 1013 xa_for_each(&guc->context_lookup, index, ce) { 1014 /* 1015 * Corner case where the ref count on the object is zero but and 1016 * deregister G2H was lost. In this case we don't touch the ref 1017 * count and finish the destroy of the context. 1018 */ 1019 bool do_put = kref_get_unless_zero(&ce->ref); 1020 1021 xa_unlock(&guc->context_lookup); 1022 1023 spin_lock(&ce->guc_state.lock); 1024 1025 /* 1026 * Once we are at this point submission_disabled() is guaranteed 1027 * to be visible to all callers who set the below flags (see above 1028 * flush and flushes in reset_prepare). If submission_disabled() 1029 * is set, the caller shouldn't set these flags. 1030 */ 1031 1032 destroyed = context_destroyed(ce); 1033 pending_enable = context_pending_enable(ce); 1034 pending_disable = context_pending_disable(ce); 1035 deregister = context_wait_for_deregister_to_register(ce); 1036 banned = context_banned(ce); 1037 init_sched_state(ce); 1038 1039 spin_unlock(&ce->guc_state.lock); 1040 1041 GEM_BUG_ON(!do_put && !destroyed); 1042 1043 if (pending_enable || destroyed || deregister) { 1044 decr_outstanding_submission_g2h(guc); 1045 if (deregister) 1046 guc_signal_context_fence(ce); 1047 if (destroyed) { 1048 intel_gt_pm_put_async(guc_to_gt(guc)); 1049 release_guc_id(guc, ce); 1050 __guc_context_destroy(ce); 1051 } 1052 if (pending_enable || deregister) 1053 intel_context_put(ce); 1054 } 1055 1056 /* Not mutualy exclusive with above if statement. */ 1057 if (pending_disable) { 1058 guc_signal_context_fence(ce); 1059 if (banned) { 1060 guc_cancel_context_requests(ce); 1061 intel_engine_signal_breadcrumbs(ce->engine); 1062 } 1063 intel_context_sched_disable_unpin(ce); 1064 decr_outstanding_submission_g2h(guc); 1065 1066 spin_lock(&ce->guc_state.lock); 1067 guc_blocked_fence_complete(ce); 1068 spin_unlock(&ce->guc_state.lock); 1069 1070 intel_context_put(ce); 1071 } 1072 1073 if (do_put) 1074 intel_context_put(ce); 1075 xa_lock(&guc->context_lookup); 1076 } 1077 xa_unlock_irqrestore(&guc->context_lookup, flags); 1078 } 1079 1080 static inline bool 1081 submission_disabled(struct intel_guc *guc) 1082 { 1083 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1084 1085 return unlikely(!sched_engine || 1086 !__tasklet_is_enabled(&sched_engine->tasklet)); 1087 } 1088 1089 static void disable_submission(struct intel_guc *guc) 1090 { 1091 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1092 1093 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1094 GEM_BUG_ON(!guc->ct.enabled); 1095 __tasklet_disable_sync_once(&sched_engine->tasklet); 1096 sched_engine->tasklet.callback = NULL; 1097 } 1098 } 1099 1100 static void enable_submission(struct intel_guc *guc) 1101 { 1102 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1103 unsigned long flags; 1104 1105 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1106 sched_engine->tasklet.callback = guc_submission_tasklet; 1107 wmb(); /* Make sure callback visible */ 1108 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1109 __tasklet_enable(&sched_engine->tasklet)) { 1110 GEM_BUG_ON(!guc->ct.enabled); 1111 1112 /* And kick in case we missed a new request submission. */ 1113 tasklet_hi_schedule(&sched_engine->tasklet); 1114 } 1115 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1116 } 1117 1118 static void guc_flush_submissions(struct intel_guc *guc) 1119 { 1120 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1121 unsigned long flags; 1122 1123 spin_lock_irqsave(&sched_engine->lock, flags); 1124 spin_unlock_irqrestore(&sched_engine->lock, flags); 1125 } 1126 1127 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1128 1129 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1130 { 1131 int i; 1132 1133 if (unlikely(!guc_submission_initialized(guc))) { 1134 /* Reset called during driver load? GuC not yet initialised! */ 1135 return; 1136 } 1137 1138 intel_gt_park_heartbeats(guc_to_gt(guc)); 1139 disable_submission(guc); 1140 guc->interrupts.disable(guc); 1141 1142 /* Flush IRQ handler */ 1143 spin_lock_irq(&guc_to_gt(guc)->irq_lock); 1144 spin_unlock_irq(&guc_to_gt(guc)->irq_lock); 1145 1146 guc_flush_submissions(guc); 1147 guc_flush_destroyed_contexts(guc); 1148 1149 /* 1150 * Handle any outstanding G2Hs before reset. Call IRQ handler directly 1151 * each pass as interrupt have been disabled. We always scrub for 1152 * outstanding G2H as it is possible for outstanding_submission_g2h to 1153 * be incremented after the context state update. 1154 */ 1155 for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { 1156 intel_guc_to_host_event_handler(guc); 1157 #define wait_for_reset(guc, wait_var) \ 1158 intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) 1159 do { 1160 wait_for_reset(guc, &guc->outstanding_submission_g2h); 1161 } while (!list_empty(&guc->ct.requests.incoming)); 1162 } 1163 1164 scrub_guc_desc_for_outstanding_g2h(guc); 1165 } 1166 1167 static struct intel_engine_cs * 1168 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1169 { 1170 struct intel_engine_cs *engine; 1171 intel_engine_mask_t tmp, mask = ve->mask; 1172 unsigned int num_siblings = 0; 1173 1174 for_each_engine_masked(engine, ve->gt, mask, tmp) 1175 if (num_siblings++ == sibling) 1176 return engine; 1177 1178 return NULL; 1179 } 1180 1181 static inline struct intel_engine_cs * 1182 __context_to_physical_engine(struct intel_context *ce) 1183 { 1184 struct intel_engine_cs *engine = ce->engine; 1185 1186 if (intel_engine_is_virtual(engine)) 1187 engine = guc_virtual_get_sibling(engine, 0); 1188 1189 return engine; 1190 } 1191 1192 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1193 { 1194 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1195 1196 if (intel_context_is_banned(ce)) 1197 return; 1198 1199 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1200 1201 /* 1202 * We want a simple context + ring to execute the breadcrumb update. 1203 * We cannot rely on the context being intact across the GPU hang, 1204 * so clear it and rebuild just what we need for the breadcrumb. 1205 * All pending requests for this context will be zapped, and any 1206 * future request will be after userspace has had the opportunity 1207 * to recreate its own state. 1208 */ 1209 if (scrub) 1210 lrc_init_regs(ce, engine, true); 1211 1212 /* Rerun the request; its payload has been neutered (if guilty). */ 1213 lrc_update_regs(ce, engine, head); 1214 } 1215 1216 static void guc_reset_nop(struct intel_engine_cs *engine) 1217 { 1218 } 1219 1220 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1221 { 1222 } 1223 1224 static void 1225 __unwind_incomplete_requests(struct intel_context *ce) 1226 { 1227 struct i915_request *rq, *rn; 1228 struct list_head *pl; 1229 int prio = I915_PRIORITY_INVALID; 1230 struct i915_sched_engine * const sched_engine = 1231 ce->engine->sched_engine; 1232 unsigned long flags; 1233 1234 spin_lock_irqsave(&sched_engine->lock, flags); 1235 spin_lock(&ce->guc_state.lock); 1236 list_for_each_entry_safe_reverse(rq, rn, 1237 &ce->guc_state.requests, 1238 sched.link) { 1239 if (i915_request_completed(rq)) 1240 continue; 1241 1242 list_del_init(&rq->sched.link); 1243 __i915_request_unsubmit(rq); 1244 1245 /* Push the request back into the queue for later resubmission. */ 1246 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1247 if (rq_prio(rq) != prio) { 1248 prio = rq_prio(rq); 1249 pl = i915_sched_lookup_priolist(sched_engine, prio); 1250 } 1251 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1252 1253 list_add(&rq->sched.link, pl); 1254 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1255 } 1256 spin_unlock(&ce->guc_state.lock); 1257 spin_unlock_irqrestore(&sched_engine->lock, flags); 1258 } 1259 1260 static void __guc_reset_context(struct intel_context *ce, bool stalled) 1261 { 1262 bool local_stalled; 1263 struct i915_request *rq; 1264 unsigned long flags; 1265 u32 head; 1266 int i, number_children = ce->parallel.number_children; 1267 bool skip = false; 1268 struct intel_context *parent = ce; 1269 1270 GEM_BUG_ON(intel_context_is_child(ce)); 1271 1272 intel_context_get(ce); 1273 1274 /* 1275 * GuC will implicitly mark the context as non-schedulable when it sends 1276 * the reset notification. Make sure our state reflects this change. The 1277 * context will be marked enabled on resubmission. 1278 * 1279 * XXX: If the context is reset as a result of the request cancellation 1280 * this G2H is received after the schedule disable complete G2H which is 1281 * wrong as this creates a race between the request cancellation code 1282 * re-submitting the context and this G2H handler. This is a bug in the 1283 * GuC but can be worked around in the meantime but converting this to a 1284 * NOP if a pending enable is in flight as this indicates that a request 1285 * cancellation has occurred. 1286 */ 1287 spin_lock_irqsave(&ce->guc_state.lock, flags); 1288 if (likely(!context_pending_enable(ce))) 1289 clr_context_enabled(ce); 1290 else 1291 skip = true; 1292 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1293 if (unlikely(skip)) 1294 goto out_put; 1295 1296 /* 1297 * For each context in the relationship find the hanging request 1298 * resetting each context / request as needed 1299 */ 1300 for (i = 0; i < number_children + 1; ++i) { 1301 if (!intel_context_is_pinned(ce)) 1302 goto next_context; 1303 1304 local_stalled = false; 1305 rq = intel_context_find_active_request(ce); 1306 if (!rq) { 1307 head = ce->ring->tail; 1308 goto out_replay; 1309 } 1310 1311 if (i915_request_started(rq)) 1312 local_stalled = true; 1313 1314 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1315 head = intel_ring_wrap(ce->ring, rq->head); 1316 1317 __i915_request_reset(rq, local_stalled && stalled); 1318 out_replay: 1319 guc_reset_state(ce, head, local_stalled && stalled); 1320 next_context: 1321 if (i != number_children) 1322 ce = list_next_entry(ce, parallel.child_link); 1323 } 1324 1325 __unwind_incomplete_requests(parent); 1326 out_put: 1327 intel_context_put(parent); 1328 } 1329 1330 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) 1331 { 1332 struct intel_context *ce; 1333 unsigned long index; 1334 unsigned long flags; 1335 1336 if (unlikely(!guc_submission_initialized(guc))) { 1337 /* Reset called during driver load? GuC not yet initialised! */ 1338 return; 1339 } 1340 1341 xa_lock_irqsave(&guc->context_lookup, flags); 1342 xa_for_each(&guc->context_lookup, index, ce) { 1343 if (!kref_get_unless_zero(&ce->ref)) 1344 continue; 1345 1346 xa_unlock(&guc->context_lookup); 1347 1348 if (intel_context_is_pinned(ce) && 1349 !intel_context_is_child(ce)) 1350 __guc_reset_context(ce, stalled); 1351 1352 intel_context_put(ce); 1353 1354 xa_lock(&guc->context_lookup); 1355 } 1356 xa_unlock_irqrestore(&guc->context_lookup, flags); 1357 1358 /* GuC is blown away, drop all references to contexts */ 1359 xa_destroy(&guc->context_lookup); 1360 } 1361 1362 static void guc_cancel_context_requests(struct intel_context *ce) 1363 { 1364 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1365 struct i915_request *rq; 1366 unsigned long flags; 1367 1368 /* Mark all executing requests as skipped. */ 1369 spin_lock_irqsave(&sched_engine->lock, flags); 1370 spin_lock(&ce->guc_state.lock); 1371 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1372 i915_request_put(i915_request_mark_eio(rq)); 1373 spin_unlock(&ce->guc_state.lock); 1374 spin_unlock_irqrestore(&sched_engine->lock, flags); 1375 } 1376 1377 static void 1378 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1379 { 1380 struct i915_request *rq, *rn; 1381 struct rb_node *rb; 1382 unsigned long flags; 1383 1384 /* Can be called during boot if GuC fails to load */ 1385 if (!sched_engine) 1386 return; 1387 1388 /* 1389 * Before we call engine->cancel_requests(), we should have exclusive 1390 * access to the submission state. This is arranged for us by the 1391 * caller disabling the interrupt generation, the tasklet and other 1392 * threads that may then access the same state, giving us a free hand 1393 * to reset state. However, we still need to let lockdep be aware that 1394 * we know this state may be accessed in hardirq context, so we 1395 * disable the irq around this manipulation and we want to keep 1396 * the spinlock focused on its duties and not accidentally conflate 1397 * coverage to the submission's irq state. (Similarly, although we 1398 * shouldn't need to disable irq around the manipulation of the 1399 * submission's irq state, we also wish to remind ourselves that 1400 * it is irq state.) 1401 */ 1402 spin_lock_irqsave(&sched_engine->lock, flags); 1403 1404 /* Flush the queued requests to the timeline list (for retiring). */ 1405 while ((rb = rb_first_cached(&sched_engine->queue))) { 1406 struct i915_priolist *p = to_priolist(rb); 1407 1408 priolist_for_each_request_consume(rq, rn, p) { 1409 list_del_init(&rq->sched.link); 1410 1411 __i915_request_submit(rq); 1412 1413 i915_request_put(i915_request_mark_eio(rq)); 1414 } 1415 1416 rb_erase_cached(&p->node, &sched_engine->queue); 1417 i915_priolist_free(p); 1418 } 1419 1420 /* Remaining _unready_ requests will be nop'ed when submitted */ 1421 1422 sched_engine->queue_priority_hint = INT_MIN; 1423 sched_engine->queue = RB_ROOT_CACHED; 1424 1425 spin_unlock_irqrestore(&sched_engine->lock, flags); 1426 } 1427 1428 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1429 { 1430 struct intel_context *ce; 1431 unsigned long index; 1432 unsigned long flags; 1433 1434 xa_lock_irqsave(&guc->context_lookup, flags); 1435 xa_for_each(&guc->context_lookup, index, ce) { 1436 if (!kref_get_unless_zero(&ce->ref)) 1437 continue; 1438 1439 xa_unlock(&guc->context_lookup); 1440 1441 if (intel_context_is_pinned(ce) && 1442 !intel_context_is_child(ce)) 1443 guc_cancel_context_requests(ce); 1444 1445 intel_context_put(ce); 1446 1447 xa_lock(&guc->context_lookup); 1448 } 1449 xa_unlock_irqrestore(&guc->context_lookup, flags); 1450 1451 guc_cancel_sched_engine_requests(guc->sched_engine); 1452 1453 /* GuC is blown away, drop all references to contexts */ 1454 xa_destroy(&guc->context_lookup); 1455 } 1456 1457 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1458 { 1459 /* Reset called during driver load or during wedge? */ 1460 if (unlikely(!guc_submission_initialized(guc) || 1461 test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) { 1462 return; 1463 } 1464 1465 /* 1466 * Technically possible for either of these values to be non-zero here, 1467 * but very unlikely + harmless. Regardless let's add a warn so we can 1468 * see in CI if this happens frequently / a precursor to taking down the 1469 * machine. 1470 */ 1471 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1472 atomic_set(&guc->outstanding_submission_g2h, 0); 1473 1474 intel_guc_global_policies_update(guc); 1475 enable_submission(guc); 1476 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1477 } 1478 1479 static void destroyed_worker_func(struct work_struct *w); 1480 1481 /* 1482 * Set up the memory resources to be shared with the GuC (via the GGTT) 1483 * at firmware loading time. 1484 */ 1485 int intel_guc_submission_init(struct intel_guc *guc) 1486 { 1487 int ret; 1488 1489 if (guc->lrc_desc_pool) 1490 return 0; 1491 1492 ret = guc_lrc_desc_pool_create(guc); 1493 if (ret) 1494 return ret; 1495 /* 1496 * Keep static analysers happy, let them know that we allocated the 1497 * vma after testing that it didn't exist earlier. 1498 */ 1499 GEM_BUG_ON(!guc->lrc_desc_pool); 1500 1501 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 1502 1503 spin_lock_init(&guc->submission_state.lock); 1504 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 1505 ida_init(&guc->submission_state.guc_ids); 1506 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 1507 INIT_WORK(&guc->submission_state.destroyed_worker, 1508 destroyed_worker_func); 1509 1510 guc->submission_state.guc_ids_bitmap = 1511 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL); 1512 if (!guc->submission_state.guc_ids_bitmap) 1513 return -ENOMEM; 1514 1515 return 0; 1516 } 1517 1518 void intel_guc_submission_fini(struct intel_guc *guc) 1519 { 1520 if (!guc->lrc_desc_pool) 1521 return; 1522 1523 guc_flush_destroyed_contexts(guc); 1524 guc_lrc_desc_pool_destroy(guc); 1525 i915_sched_engine_put(guc->sched_engine); 1526 bitmap_free(guc->submission_state.guc_ids_bitmap); 1527 } 1528 1529 static inline void queue_request(struct i915_sched_engine *sched_engine, 1530 struct i915_request *rq, 1531 int prio) 1532 { 1533 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1534 list_add_tail(&rq->sched.link, 1535 i915_sched_lookup_priolist(sched_engine, prio)); 1536 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1537 tasklet_hi_schedule(&sched_engine->tasklet); 1538 } 1539 1540 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1541 struct i915_request *rq) 1542 { 1543 int ret = 0; 1544 1545 __i915_request_submit(rq); 1546 1547 trace_i915_request_in(rq, 0); 1548 1549 if (is_multi_lrc_rq(rq)) { 1550 if (multi_lrc_submit(rq)) { 1551 ret = guc_wq_item_append(guc, rq); 1552 if (!ret) 1553 ret = guc_add_request(guc, rq); 1554 } 1555 } else { 1556 guc_set_lrc_tail(rq); 1557 ret = guc_add_request(guc, rq); 1558 } 1559 1560 if (unlikely(ret == -EPIPE)) 1561 disable_submission(guc); 1562 1563 return ret; 1564 } 1565 1566 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1567 { 1568 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1569 struct intel_context *ce = request_to_scheduling_context(rq); 1570 1571 return submission_disabled(guc) || guc->stalled_request || 1572 !i915_sched_engine_is_empty(sched_engine) || 1573 !lrc_desc_registered(guc, ce->guc_id.id); 1574 } 1575 1576 static void guc_submit_request(struct i915_request *rq) 1577 { 1578 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1579 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1580 unsigned long flags; 1581 1582 /* Will be called from irq-context when using foreign fences. */ 1583 spin_lock_irqsave(&sched_engine->lock, flags); 1584 1585 if (need_tasklet(guc, rq)) 1586 queue_request(sched_engine, rq, rq_prio(rq)); 1587 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1588 tasklet_hi_schedule(&sched_engine->tasklet); 1589 1590 spin_unlock_irqrestore(&sched_engine->lock, flags); 1591 } 1592 1593 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1594 { 1595 int ret; 1596 1597 GEM_BUG_ON(intel_context_is_child(ce)); 1598 1599 if (intel_context_is_parent(ce)) 1600 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1601 NUMBER_MULTI_LRC_GUC_ID, 1602 order_base_2(ce->parallel.number_children 1603 + 1)); 1604 else 1605 ret = ida_simple_get(&guc->submission_state.guc_ids, 1606 NUMBER_MULTI_LRC_GUC_ID, 1607 GUC_MAX_LRC_DESCRIPTORS, 1608 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 1609 __GFP_NOWARN); 1610 if (unlikely(ret < 0)) 1611 return ret; 1612 1613 ce->guc_id.id = ret; 1614 return 0; 1615 } 1616 1617 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1618 { 1619 GEM_BUG_ON(intel_context_is_child(ce)); 1620 1621 if (!context_guc_id_invalid(ce)) { 1622 if (intel_context_is_parent(ce)) 1623 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 1624 ce->guc_id.id, 1625 order_base_2(ce->parallel.number_children 1626 + 1)); 1627 else 1628 ida_simple_remove(&guc->submission_state.guc_ids, 1629 ce->guc_id.id); 1630 reset_lrc_desc(guc, ce->guc_id.id); 1631 set_context_guc_id_invalid(ce); 1632 } 1633 if (!list_empty(&ce->guc_id.link)) 1634 list_del_init(&ce->guc_id.link); 1635 } 1636 1637 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1638 { 1639 unsigned long flags; 1640 1641 spin_lock_irqsave(&guc->submission_state.lock, flags); 1642 __release_guc_id(guc, ce); 1643 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1644 } 1645 1646 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 1647 { 1648 struct intel_context *cn; 1649 1650 lockdep_assert_held(&guc->submission_state.lock); 1651 GEM_BUG_ON(intel_context_is_child(ce)); 1652 GEM_BUG_ON(intel_context_is_parent(ce)); 1653 1654 if (!list_empty(&guc->submission_state.guc_id_list)) { 1655 cn = list_first_entry(&guc->submission_state.guc_id_list, 1656 struct intel_context, 1657 guc_id.link); 1658 1659 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 1660 GEM_BUG_ON(context_guc_id_invalid(cn)); 1661 GEM_BUG_ON(intel_context_is_child(cn)); 1662 GEM_BUG_ON(intel_context_is_parent(cn)); 1663 1664 list_del_init(&cn->guc_id.link); 1665 ce->guc_id = cn->guc_id; 1666 1667 spin_lock(&ce->guc_state.lock); 1668 clr_context_registered(cn); 1669 spin_unlock(&ce->guc_state.lock); 1670 1671 set_context_guc_id_invalid(cn); 1672 1673 return 0; 1674 } else { 1675 return -EAGAIN; 1676 } 1677 } 1678 1679 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 1680 { 1681 int ret; 1682 1683 lockdep_assert_held(&guc->submission_state.lock); 1684 GEM_BUG_ON(intel_context_is_child(ce)); 1685 1686 ret = new_guc_id(guc, ce); 1687 if (unlikely(ret < 0)) { 1688 if (intel_context_is_parent(ce)) 1689 return -ENOSPC; 1690 1691 ret = steal_guc_id(guc, ce); 1692 if (ret < 0) 1693 return ret; 1694 } 1695 1696 if (intel_context_is_parent(ce)) { 1697 struct intel_context *child; 1698 int i = 1; 1699 1700 for_each_child(ce, child) 1701 child->guc_id.id = ce->guc_id.id + i++; 1702 } 1703 1704 return 0; 1705 } 1706 1707 #define PIN_GUC_ID_TRIES 4 1708 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 1709 { 1710 int ret = 0; 1711 unsigned long flags, tries = PIN_GUC_ID_TRIES; 1712 1713 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 1714 1715 try_again: 1716 spin_lock_irqsave(&guc->submission_state.lock, flags); 1717 1718 might_lock(&ce->guc_state.lock); 1719 1720 if (context_guc_id_invalid(ce)) { 1721 ret = assign_guc_id(guc, ce); 1722 if (ret) 1723 goto out_unlock; 1724 ret = 1; /* Indidcates newly assigned guc_id */ 1725 } 1726 if (!list_empty(&ce->guc_id.link)) 1727 list_del_init(&ce->guc_id.link); 1728 atomic_inc(&ce->guc_id.ref); 1729 1730 out_unlock: 1731 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1732 1733 /* 1734 * -EAGAIN indicates no guc_id are available, let's retire any 1735 * outstanding requests to see if that frees up a guc_id. If the first 1736 * retire didn't help, insert a sleep with the timeslice duration before 1737 * attempting to retire more requests. Double the sleep period each 1738 * subsequent pass before finally giving up. The sleep period has max of 1739 * 100ms and minimum of 1ms. 1740 */ 1741 if (ret == -EAGAIN && --tries) { 1742 if (PIN_GUC_ID_TRIES - tries > 1) { 1743 unsigned int timeslice_shifted = 1744 ce->engine->props.timeslice_duration_ms << 1745 (PIN_GUC_ID_TRIES - tries - 2); 1746 unsigned int max = min_t(unsigned int, 100, 1747 timeslice_shifted); 1748 1749 msleep(max_t(unsigned int, max, 1)); 1750 } 1751 intel_gt_retire_requests(guc_to_gt(guc)); 1752 goto try_again; 1753 } 1754 1755 return ret; 1756 } 1757 1758 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 1759 { 1760 unsigned long flags; 1761 1762 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 1763 GEM_BUG_ON(intel_context_is_child(ce)); 1764 1765 if (unlikely(context_guc_id_invalid(ce) || 1766 intel_context_is_parent(ce))) 1767 return; 1768 1769 spin_lock_irqsave(&guc->submission_state.lock, flags); 1770 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 1771 !atomic_read(&ce->guc_id.ref)) 1772 list_add_tail(&ce->guc_id.link, 1773 &guc->submission_state.guc_id_list); 1774 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1775 } 1776 1777 static int __guc_action_register_multi_lrc(struct intel_guc *guc, 1778 struct intel_context *ce, 1779 u32 guc_id, 1780 u32 offset, 1781 bool loop) 1782 { 1783 struct intel_context *child; 1784 u32 action[4 + MAX_ENGINE_INSTANCE]; 1785 int len = 0; 1786 1787 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 1788 1789 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 1790 action[len++] = guc_id; 1791 action[len++] = ce->parallel.number_children + 1; 1792 action[len++] = offset; 1793 for_each_child(ce, child) { 1794 offset += sizeof(struct guc_lrc_desc); 1795 action[len++] = offset; 1796 } 1797 1798 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 1799 } 1800 1801 static int __guc_action_register_context(struct intel_guc *guc, 1802 u32 guc_id, 1803 u32 offset, 1804 bool loop) 1805 { 1806 u32 action[] = { 1807 INTEL_GUC_ACTION_REGISTER_CONTEXT, 1808 guc_id, 1809 offset, 1810 }; 1811 1812 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 1813 0, loop); 1814 } 1815 1816 static int register_context(struct intel_context *ce, bool loop) 1817 { 1818 struct intel_guc *guc = ce_to_guc(ce); 1819 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + 1820 ce->guc_id.id * sizeof(struct guc_lrc_desc); 1821 int ret; 1822 1823 GEM_BUG_ON(intel_context_is_child(ce)); 1824 trace_intel_context_register(ce); 1825 1826 if (intel_context_is_parent(ce)) 1827 ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, 1828 offset, loop); 1829 else 1830 ret = __guc_action_register_context(guc, ce->guc_id.id, offset, 1831 loop); 1832 if (likely(!ret)) { 1833 unsigned long flags; 1834 1835 spin_lock_irqsave(&ce->guc_state.lock, flags); 1836 set_context_registered(ce); 1837 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1838 } 1839 1840 return ret; 1841 } 1842 1843 static int __guc_action_deregister_context(struct intel_guc *guc, 1844 u32 guc_id) 1845 { 1846 u32 action[] = { 1847 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 1848 guc_id, 1849 }; 1850 1851 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 1852 G2H_LEN_DW_DEREGISTER_CONTEXT, 1853 true); 1854 } 1855 1856 static int deregister_context(struct intel_context *ce, u32 guc_id) 1857 { 1858 struct intel_guc *guc = ce_to_guc(ce); 1859 1860 GEM_BUG_ON(intel_context_is_child(ce)); 1861 trace_intel_context_deregister(ce); 1862 1863 return __guc_action_deregister_context(guc, guc_id); 1864 } 1865 1866 static inline void clear_children_join_go_memory(struct intel_context *ce) 1867 { 1868 struct parent_scratch *ps = __get_parent_scratch(ce); 1869 int i; 1870 1871 ps->go.semaphore = 0; 1872 for (i = 0; i < ce->parallel.number_children + 1; ++i) 1873 ps->join[i].semaphore = 0; 1874 } 1875 1876 static inline u32 get_children_go_value(struct intel_context *ce) 1877 { 1878 return __get_parent_scratch(ce)->go.semaphore; 1879 } 1880 1881 static inline u32 get_children_join_value(struct intel_context *ce, 1882 u8 child_index) 1883 { 1884 return __get_parent_scratch(ce)->join[child_index].semaphore; 1885 } 1886 1887 static void guc_context_policy_init(struct intel_engine_cs *engine, 1888 struct guc_lrc_desc *desc) 1889 { 1890 desc->policy_flags = 0; 1891 1892 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 1893 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; 1894 1895 /* NB: For both of these, zero means disabled. */ 1896 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 1897 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 1898 } 1899 1900 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) 1901 { 1902 struct intel_engine_cs *engine = ce->engine; 1903 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 1904 struct intel_guc *guc = &engine->gt->uc.guc; 1905 u32 desc_idx = ce->guc_id.id; 1906 struct guc_lrc_desc *desc; 1907 bool context_registered; 1908 intel_wakeref_t wakeref; 1909 struct intel_context *child; 1910 int ret = 0; 1911 1912 GEM_BUG_ON(!engine->mask); 1913 GEM_BUG_ON(!sched_state_is_init(ce)); 1914 1915 /* 1916 * Ensure LRC + CT vmas are is same region as write barrier is done 1917 * based on CT vma region. 1918 */ 1919 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 1920 i915_gem_object_is_lmem(ce->ring->vma->obj)); 1921 1922 context_registered = lrc_desc_registered(guc, desc_idx); 1923 1924 reset_lrc_desc(guc, desc_idx); 1925 set_lrc_desc_registered(guc, desc_idx, ce); 1926 1927 desc = __get_lrc_desc(guc, desc_idx); 1928 desc->engine_class = engine_class_to_guc_class(engine->class); 1929 desc->engine_submit_mask = engine->logical_mask; 1930 desc->hw_context_desc = ce->lrc.lrca; 1931 desc->priority = ce->guc_state.prio; 1932 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 1933 guc_context_policy_init(engine, desc); 1934 1935 /* 1936 * If context is a parent, we need to register a process descriptor 1937 * describing a work queue and register all child contexts. 1938 */ 1939 if (intel_context_is_parent(ce)) { 1940 struct guc_process_desc *pdesc; 1941 1942 ce->parallel.guc.wqi_tail = 0; 1943 ce->parallel.guc.wqi_head = 0; 1944 1945 desc->process_desc = i915_ggtt_offset(ce->state) + 1946 __get_parent_scratch_offset(ce); 1947 desc->wq_addr = i915_ggtt_offset(ce->state) + 1948 __get_wq_offset(ce); 1949 desc->wq_size = WQ_SIZE; 1950 1951 pdesc = __get_process_desc(ce); 1952 memset(pdesc, 0, sizeof(*(pdesc))); 1953 pdesc->stage_id = ce->guc_id.id; 1954 pdesc->wq_base_addr = desc->wq_addr; 1955 pdesc->wq_size_bytes = desc->wq_size; 1956 pdesc->wq_status = WQ_STATUS_ACTIVE; 1957 1958 for_each_child(ce, child) { 1959 desc = __get_lrc_desc(guc, child->guc_id.id); 1960 1961 desc->engine_class = 1962 engine_class_to_guc_class(engine->class); 1963 desc->hw_context_desc = child->lrc.lrca; 1964 desc->priority = ce->guc_state.prio; 1965 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 1966 guc_context_policy_init(engine, desc); 1967 } 1968 1969 clear_children_join_go_memory(ce); 1970 } 1971 1972 /* 1973 * The context_lookup xarray is used to determine if the hardware 1974 * context is currently registered. There are two cases in which it 1975 * could be registered either the guc_id has been stolen from another 1976 * context or the lrc descriptor address of this context has changed. In 1977 * either case the context needs to be deregistered with the GuC before 1978 * registering this context. 1979 */ 1980 if (context_registered) { 1981 bool disabled; 1982 unsigned long flags; 1983 1984 trace_intel_context_steal_guc_id(ce); 1985 GEM_BUG_ON(!loop); 1986 1987 /* Seal race with Reset */ 1988 spin_lock_irqsave(&ce->guc_state.lock, flags); 1989 disabled = submission_disabled(guc); 1990 if (likely(!disabled)) { 1991 set_context_wait_for_deregister_to_register(ce); 1992 intel_context_get(ce); 1993 } 1994 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1995 if (unlikely(disabled)) { 1996 reset_lrc_desc(guc, desc_idx); 1997 return 0; /* Will get registered later */ 1998 } 1999 2000 /* 2001 * If stealing the guc_id, this ce has the same guc_id as the 2002 * context whose guc_id was stolen. 2003 */ 2004 with_intel_runtime_pm(runtime_pm, wakeref) 2005 ret = deregister_context(ce, ce->guc_id.id); 2006 if (unlikely(ret == -ENODEV)) 2007 ret = 0; /* Will get registered later */ 2008 } else { 2009 with_intel_runtime_pm(runtime_pm, wakeref) 2010 ret = register_context(ce, loop); 2011 if (unlikely(ret == -EBUSY)) { 2012 reset_lrc_desc(guc, desc_idx); 2013 } else if (unlikely(ret == -ENODEV)) { 2014 reset_lrc_desc(guc, desc_idx); 2015 ret = 0; /* Will get registered later */ 2016 } 2017 } 2018 2019 return ret; 2020 } 2021 2022 static int __guc_context_pre_pin(struct intel_context *ce, 2023 struct intel_engine_cs *engine, 2024 struct i915_gem_ww_ctx *ww, 2025 void **vaddr) 2026 { 2027 return lrc_pre_pin(ce, engine, ww, vaddr); 2028 } 2029 2030 static int __guc_context_pin(struct intel_context *ce, 2031 struct intel_engine_cs *engine, 2032 void *vaddr) 2033 { 2034 if (i915_ggtt_offset(ce->state) != 2035 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2036 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2037 2038 /* 2039 * GuC context gets pinned in guc_request_alloc. See that function for 2040 * explaination of why. 2041 */ 2042 2043 return lrc_pin(ce, engine, vaddr); 2044 } 2045 2046 static int guc_context_pre_pin(struct intel_context *ce, 2047 struct i915_gem_ww_ctx *ww, 2048 void **vaddr) 2049 { 2050 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2051 } 2052 2053 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2054 { 2055 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2056 2057 if (likely(!ret && !intel_context_is_barrier(ce))) 2058 intel_engine_pm_get(ce->engine); 2059 2060 return ret; 2061 } 2062 2063 static void guc_context_unpin(struct intel_context *ce) 2064 { 2065 struct intel_guc *guc = ce_to_guc(ce); 2066 2067 unpin_guc_id(guc, ce); 2068 lrc_unpin(ce); 2069 2070 if (likely(!intel_context_is_barrier(ce))) 2071 intel_engine_pm_put_async(ce->engine); 2072 } 2073 2074 static void guc_context_post_unpin(struct intel_context *ce) 2075 { 2076 lrc_post_unpin(ce); 2077 } 2078 2079 static void __guc_context_sched_enable(struct intel_guc *guc, 2080 struct intel_context *ce) 2081 { 2082 u32 action[] = { 2083 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2084 ce->guc_id.id, 2085 GUC_CONTEXT_ENABLE 2086 }; 2087 2088 trace_intel_context_sched_enable(ce); 2089 2090 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2091 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2092 } 2093 2094 static void __guc_context_sched_disable(struct intel_guc *guc, 2095 struct intel_context *ce, 2096 u16 guc_id) 2097 { 2098 u32 action[] = { 2099 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2100 guc_id, /* ce->guc_id.id not stable */ 2101 GUC_CONTEXT_DISABLE 2102 }; 2103 2104 GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); 2105 2106 GEM_BUG_ON(intel_context_is_child(ce)); 2107 trace_intel_context_sched_disable(ce); 2108 2109 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2110 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2111 } 2112 2113 static void guc_blocked_fence_complete(struct intel_context *ce) 2114 { 2115 lockdep_assert_held(&ce->guc_state.lock); 2116 2117 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2118 i915_sw_fence_complete(&ce->guc_state.blocked); 2119 } 2120 2121 static void guc_blocked_fence_reinit(struct intel_context *ce) 2122 { 2123 lockdep_assert_held(&ce->guc_state.lock); 2124 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2125 2126 /* 2127 * This fence is always complete unless a pending schedule disable is 2128 * outstanding. We arm the fence here and complete it when we receive 2129 * the pending schedule disable complete message. 2130 */ 2131 i915_sw_fence_fini(&ce->guc_state.blocked); 2132 i915_sw_fence_reinit(&ce->guc_state.blocked); 2133 i915_sw_fence_await(&ce->guc_state.blocked); 2134 i915_sw_fence_commit(&ce->guc_state.blocked); 2135 } 2136 2137 static u16 prep_context_pending_disable(struct intel_context *ce) 2138 { 2139 lockdep_assert_held(&ce->guc_state.lock); 2140 2141 set_context_pending_disable(ce); 2142 clr_context_enabled(ce); 2143 guc_blocked_fence_reinit(ce); 2144 intel_context_get(ce); 2145 2146 return ce->guc_id.id; 2147 } 2148 2149 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2150 { 2151 struct intel_guc *guc = ce_to_guc(ce); 2152 unsigned long flags; 2153 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2154 intel_wakeref_t wakeref; 2155 u16 guc_id; 2156 bool enabled; 2157 2158 GEM_BUG_ON(intel_context_is_child(ce)); 2159 2160 spin_lock_irqsave(&ce->guc_state.lock, flags); 2161 2162 incr_context_blocked(ce); 2163 2164 enabled = context_enabled(ce); 2165 if (unlikely(!enabled || submission_disabled(guc))) { 2166 if (enabled) 2167 clr_context_enabled(ce); 2168 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2169 return &ce->guc_state.blocked; 2170 } 2171 2172 /* 2173 * We add +2 here as the schedule disable complete CTB handler calls 2174 * intel_context_sched_disable_unpin (-2 to pin_count). 2175 */ 2176 atomic_add(2, &ce->pin_count); 2177 2178 guc_id = prep_context_pending_disable(ce); 2179 2180 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2181 2182 with_intel_runtime_pm(runtime_pm, wakeref) 2183 __guc_context_sched_disable(guc, ce, guc_id); 2184 2185 return &ce->guc_state.blocked; 2186 } 2187 2188 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2189 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2190 #define SCHED_STATE_NO_UNBLOCK \ 2191 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2192 SCHED_STATE_PENDING_DISABLE | \ 2193 SCHED_STATE_BANNED) 2194 2195 static bool context_cant_unblock(struct intel_context *ce) 2196 { 2197 lockdep_assert_held(&ce->guc_state.lock); 2198 2199 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2200 context_guc_id_invalid(ce) || 2201 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || 2202 !intel_context_is_pinned(ce); 2203 } 2204 2205 static void guc_context_unblock(struct intel_context *ce) 2206 { 2207 struct intel_guc *guc = ce_to_guc(ce); 2208 unsigned long flags; 2209 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2210 intel_wakeref_t wakeref; 2211 bool enable; 2212 2213 GEM_BUG_ON(context_enabled(ce)); 2214 GEM_BUG_ON(intel_context_is_child(ce)); 2215 2216 spin_lock_irqsave(&ce->guc_state.lock, flags); 2217 2218 if (unlikely(submission_disabled(guc) || 2219 context_cant_unblock(ce))) { 2220 enable = false; 2221 } else { 2222 enable = true; 2223 set_context_pending_enable(ce); 2224 set_context_enabled(ce); 2225 intel_context_get(ce); 2226 } 2227 2228 decr_context_blocked(ce); 2229 2230 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2231 2232 if (enable) { 2233 with_intel_runtime_pm(runtime_pm, wakeref) 2234 __guc_context_sched_enable(guc, ce); 2235 } 2236 } 2237 2238 static void guc_context_cancel_request(struct intel_context *ce, 2239 struct i915_request *rq) 2240 { 2241 struct intel_context *block_context = 2242 request_to_scheduling_context(rq); 2243 2244 if (i915_sw_fence_signaled(&rq->submit)) { 2245 struct i915_sw_fence *fence; 2246 2247 intel_context_get(ce); 2248 fence = guc_context_block(block_context); 2249 i915_sw_fence_wait(fence); 2250 if (!i915_request_completed(rq)) { 2251 __i915_request_skip(rq); 2252 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2253 true); 2254 } 2255 2256 /* 2257 * XXX: Racey if context is reset, see comment in 2258 * __guc_reset_context(). 2259 */ 2260 flush_work(&ce_to_guc(ce)->ct.requests.worker); 2261 2262 guc_context_unblock(block_context); 2263 intel_context_put(ce); 2264 } 2265 } 2266 2267 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2268 u16 guc_id, 2269 u32 preemption_timeout) 2270 { 2271 u32 action[] = { 2272 INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, 2273 guc_id, 2274 preemption_timeout 2275 }; 2276 2277 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2278 } 2279 2280 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) 2281 { 2282 struct intel_guc *guc = ce_to_guc(ce); 2283 struct intel_runtime_pm *runtime_pm = 2284 &ce->engine->gt->i915->runtime_pm; 2285 intel_wakeref_t wakeref; 2286 unsigned long flags; 2287 2288 GEM_BUG_ON(intel_context_is_child(ce)); 2289 2290 guc_flush_submissions(guc); 2291 2292 spin_lock_irqsave(&ce->guc_state.lock, flags); 2293 set_context_banned(ce); 2294 2295 if (submission_disabled(guc) || 2296 (!context_enabled(ce) && !context_pending_disable(ce))) { 2297 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2298 2299 guc_cancel_context_requests(ce); 2300 intel_engine_signal_breadcrumbs(ce->engine); 2301 } else if (!context_pending_disable(ce)) { 2302 u16 guc_id; 2303 2304 /* 2305 * We add +2 here as the schedule disable complete CTB handler 2306 * calls intel_context_sched_disable_unpin (-2 to pin_count). 2307 */ 2308 atomic_add(2, &ce->pin_count); 2309 2310 guc_id = prep_context_pending_disable(ce); 2311 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2312 2313 /* 2314 * In addition to disabling scheduling, set the preemption 2315 * timeout to the minimum value (1 us) so the banned context 2316 * gets kicked off the HW ASAP. 2317 */ 2318 with_intel_runtime_pm(runtime_pm, wakeref) { 2319 __guc_context_set_preemption_timeout(guc, guc_id, 1); 2320 __guc_context_sched_disable(guc, ce, guc_id); 2321 } 2322 } else { 2323 if (!context_guc_id_invalid(ce)) 2324 with_intel_runtime_pm(runtime_pm, wakeref) 2325 __guc_context_set_preemption_timeout(guc, 2326 ce->guc_id.id, 2327 1); 2328 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2329 } 2330 } 2331 2332 static void guc_context_sched_disable(struct intel_context *ce) 2333 { 2334 struct intel_guc *guc = ce_to_guc(ce); 2335 unsigned long flags; 2336 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 2337 intel_wakeref_t wakeref; 2338 u16 guc_id; 2339 2340 GEM_BUG_ON(intel_context_is_child(ce)); 2341 2342 spin_lock_irqsave(&ce->guc_state.lock, flags); 2343 2344 /* 2345 * We have to check if the context has been disabled by another thread, 2346 * check if submssion has been disabled to seal a race with reset and 2347 * finally check if any more requests have been committed to the 2348 * context ensursing that a request doesn't slip through the 2349 * 'context_pending_disable' fence. 2350 */ 2351 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 2352 context_has_committed_requests(ce))) { 2353 clr_context_enabled(ce); 2354 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2355 goto unpin; 2356 } 2357 guc_id = prep_context_pending_disable(ce); 2358 2359 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2360 2361 with_intel_runtime_pm(runtime_pm, wakeref) 2362 __guc_context_sched_disable(guc, ce, guc_id); 2363 2364 return; 2365 unpin: 2366 intel_context_sched_disable_unpin(ce); 2367 } 2368 2369 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 2370 { 2371 struct intel_guc *guc = ce_to_guc(ce); 2372 struct intel_gt *gt = guc_to_gt(guc); 2373 unsigned long flags; 2374 bool disabled; 2375 2376 lockdep_assert_held(&guc->submission_state.lock); 2377 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 2378 GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); 2379 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 2380 GEM_BUG_ON(context_enabled(ce)); 2381 2382 /* Seal race with Reset */ 2383 spin_lock_irqsave(&ce->guc_state.lock, flags); 2384 disabled = submission_disabled(guc); 2385 if (likely(!disabled)) { 2386 __intel_gt_pm_get(gt); 2387 set_context_destroyed(ce); 2388 clr_context_registered(ce); 2389 } 2390 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2391 if (unlikely(disabled)) { 2392 __release_guc_id(guc, ce); 2393 __guc_context_destroy(ce); 2394 return; 2395 } 2396 2397 deregister_context(ce, ce->guc_id.id); 2398 } 2399 2400 static void __guc_context_destroy(struct intel_context *ce) 2401 { 2402 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 2403 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 2404 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 2405 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 2406 GEM_BUG_ON(ce->guc_state.number_committed_requests); 2407 2408 lrc_fini(ce); 2409 intel_context_fini(ce); 2410 2411 if (intel_engine_is_virtual(ce->engine)) { 2412 struct guc_virtual_engine *ve = 2413 container_of(ce, typeof(*ve), context); 2414 2415 if (ve->base.breadcrumbs) 2416 intel_breadcrumbs_put(ve->base.breadcrumbs); 2417 2418 kfree(ve); 2419 } else { 2420 intel_context_free(ce); 2421 } 2422 } 2423 2424 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 2425 { 2426 struct intel_context *ce, *cn; 2427 unsigned long flags; 2428 2429 GEM_BUG_ON(!submission_disabled(guc) && 2430 guc_submission_initialized(guc)); 2431 2432 spin_lock_irqsave(&guc->submission_state.lock, flags); 2433 list_for_each_entry_safe(ce, cn, 2434 &guc->submission_state.destroyed_contexts, 2435 destroyed_link) { 2436 list_del_init(&ce->destroyed_link); 2437 __release_guc_id(guc, ce); 2438 __guc_context_destroy(ce); 2439 } 2440 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2441 } 2442 2443 static void deregister_destroyed_contexts(struct intel_guc *guc) 2444 { 2445 struct intel_context *ce, *cn; 2446 unsigned long flags; 2447 2448 spin_lock_irqsave(&guc->submission_state.lock, flags); 2449 list_for_each_entry_safe(ce, cn, 2450 &guc->submission_state.destroyed_contexts, 2451 destroyed_link) { 2452 list_del_init(&ce->destroyed_link); 2453 guc_lrc_desc_unpin(ce); 2454 } 2455 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2456 } 2457 2458 static void destroyed_worker_func(struct work_struct *w) 2459 { 2460 struct intel_guc *guc = container_of(w, struct intel_guc, 2461 submission_state.destroyed_worker); 2462 struct intel_gt *gt = guc_to_gt(guc); 2463 int tmp; 2464 2465 with_intel_gt_pm(gt, tmp) 2466 deregister_destroyed_contexts(guc); 2467 } 2468 2469 static void guc_context_destroy(struct kref *kref) 2470 { 2471 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2472 struct intel_guc *guc = ce_to_guc(ce); 2473 unsigned long flags; 2474 bool destroy; 2475 2476 /* 2477 * If the guc_id is invalid this context has been stolen and we can free 2478 * it immediately. Also can be freed immediately if the context is not 2479 * registered with the GuC or the GuC is in the middle of a reset. 2480 */ 2481 spin_lock_irqsave(&guc->submission_state.lock, flags); 2482 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 2483 !lrc_desc_registered(guc, ce->guc_id.id); 2484 if (likely(!destroy)) { 2485 if (!list_empty(&ce->guc_id.link)) 2486 list_del_init(&ce->guc_id.link); 2487 list_add_tail(&ce->destroyed_link, 2488 &guc->submission_state.destroyed_contexts); 2489 } else { 2490 __release_guc_id(guc, ce); 2491 } 2492 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2493 if (unlikely(destroy)) { 2494 __guc_context_destroy(ce); 2495 return; 2496 } 2497 2498 /* 2499 * We use a worker to issue the H2G to deregister the context as we can 2500 * take the GT PM for the first time which isn't allowed from an atomic 2501 * context. 2502 */ 2503 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 2504 } 2505 2506 static int guc_context_alloc(struct intel_context *ce) 2507 { 2508 return lrc_alloc(ce, ce->engine); 2509 } 2510 2511 static void guc_context_set_prio(struct intel_guc *guc, 2512 struct intel_context *ce, 2513 u8 prio) 2514 { 2515 u32 action[] = { 2516 INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, 2517 ce->guc_id.id, 2518 prio, 2519 }; 2520 2521 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 2522 prio > GUC_CLIENT_PRIORITY_NORMAL); 2523 lockdep_assert_held(&ce->guc_state.lock); 2524 2525 if (ce->guc_state.prio == prio || submission_disabled(guc) || 2526 !context_registered(ce)) { 2527 ce->guc_state.prio = prio; 2528 return; 2529 } 2530 2531 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2532 2533 ce->guc_state.prio = prio; 2534 trace_intel_context_set_prio(ce); 2535 } 2536 2537 static inline u8 map_i915_prio_to_guc_prio(int prio) 2538 { 2539 if (prio == I915_PRIORITY_NORMAL) 2540 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 2541 else if (prio < I915_PRIORITY_NORMAL) 2542 return GUC_CLIENT_PRIORITY_NORMAL; 2543 else if (prio < I915_PRIORITY_DISPLAY) 2544 return GUC_CLIENT_PRIORITY_HIGH; 2545 else 2546 return GUC_CLIENT_PRIORITY_KMD_HIGH; 2547 } 2548 2549 static inline void add_context_inflight_prio(struct intel_context *ce, 2550 u8 guc_prio) 2551 { 2552 lockdep_assert_held(&ce->guc_state.lock); 2553 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 2554 2555 ++ce->guc_state.prio_count[guc_prio]; 2556 2557 /* Overflow protection */ 2558 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 2559 } 2560 2561 static inline void sub_context_inflight_prio(struct intel_context *ce, 2562 u8 guc_prio) 2563 { 2564 lockdep_assert_held(&ce->guc_state.lock); 2565 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 2566 2567 /* Underflow protection */ 2568 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 2569 2570 --ce->guc_state.prio_count[guc_prio]; 2571 } 2572 2573 static inline void update_context_prio(struct intel_context *ce) 2574 { 2575 struct intel_guc *guc = &ce->engine->gt->uc.guc; 2576 int i; 2577 2578 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 2579 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 2580 2581 lockdep_assert_held(&ce->guc_state.lock); 2582 2583 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 2584 if (ce->guc_state.prio_count[i]) { 2585 guc_context_set_prio(guc, ce, i); 2586 break; 2587 } 2588 } 2589 } 2590 2591 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 2592 { 2593 /* Lower value is higher priority */ 2594 return new_guc_prio < old_guc_prio; 2595 } 2596 2597 static void add_to_context(struct i915_request *rq) 2598 { 2599 struct intel_context *ce = request_to_scheduling_context(rq); 2600 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 2601 2602 GEM_BUG_ON(intel_context_is_child(ce)); 2603 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 2604 2605 spin_lock(&ce->guc_state.lock); 2606 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 2607 2608 if (rq->guc_prio == GUC_PRIO_INIT) { 2609 rq->guc_prio = new_guc_prio; 2610 add_context_inflight_prio(ce, rq->guc_prio); 2611 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 2612 sub_context_inflight_prio(ce, rq->guc_prio); 2613 rq->guc_prio = new_guc_prio; 2614 add_context_inflight_prio(ce, rq->guc_prio); 2615 } 2616 update_context_prio(ce); 2617 2618 spin_unlock(&ce->guc_state.lock); 2619 } 2620 2621 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 2622 { 2623 lockdep_assert_held(&ce->guc_state.lock); 2624 2625 if (rq->guc_prio != GUC_PRIO_INIT && 2626 rq->guc_prio != GUC_PRIO_FINI) { 2627 sub_context_inflight_prio(ce, rq->guc_prio); 2628 update_context_prio(ce); 2629 } 2630 rq->guc_prio = GUC_PRIO_FINI; 2631 } 2632 2633 static void remove_from_context(struct i915_request *rq) 2634 { 2635 struct intel_context *ce = request_to_scheduling_context(rq); 2636 2637 GEM_BUG_ON(intel_context_is_child(ce)); 2638 2639 spin_lock_irq(&ce->guc_state.lock); 2640 2641 list_del_init(&rq->sched.link); 2642 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2643 2644 /* Prevent further __await_execution() registering a cb, then flush */ 2645 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2646 2647 guc_prio_fini(rq, ce); 2648 2649 decr_context_committed_requests(ce); 2650 2651 spin_unlock_irq(&ce->guc_state.lock); 2652 2653 atomic_dec(&ce->guc_id.ref); 2654 i915_request_notify_execute_cb_imm(rq); 2655 } 2656 2657 static const struct intel_context_ops guc_context_ops = { 2658 .alloc = guc_context_alloc, 2659 2660 .pre_pin = guc_context_pre_pin, 2661 .pin = guc_context_pin, 2662 .unpin = guc_context_unpin, 2663 .post_unpin = guc_context_post_unpin, 2664 2665 .ban = guc_context_ban, 2666 2667 .cancel_request = guc_context_cancel_request, 2668 2669 .enter = intel_context_enter_engine, 2670 .exit = intel_context_exit_engine, 2671 2672 .sched_disable = guc_context_sched_disable, 2673 2674 .reset = lrc_reset, 2675 .destroy = guc_context_destroy, 2676 2677 .create_virtual = guc_create_virtual, 2678 .create_parallel = guc_create_parallel, 2679 }; 2680 2681 static void submit_work_cb(struct irq_work *wrk) 2682 { 2683 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 2684 2685 might_lock(&rq->engine->sched_engine->lock); 2686 i915_sw_fence_complete(&rq->submit); 2687 } 2688 2689 static void __guc_signal_context_fence(struct intel_context *ce) 2690 { 2691 struct i915_request *rq, *rn; 2692 2693 lockdep_assert_held(&ce->guc_state.lock); 2694 2695 if (!list_empty(&ce->guc_state.fences)) 2696 trace_intel_context_fence_release(ce); 2697 2698 /* 2699 * Use an IRQ to ensure locking order of sched_engine->lock -> 2700 * ce->guc_state.lock is preserved. 2701 */ 2702 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 2703 guc_fence_link) { 2704 list_del(&rq->guc_fence_link); 2705 irq_work_queue(&rq->submit_work); 2706 } 2707 2708 INIT_LIST_HEAD(&ce->guc_state.fences); 2709 } 2710 2711 static void guc_signal_context_fence(struct intel_context *ce) 2712 { 2713 unsigned long flags; 2714 2715 GEM_BUG_ON(intel_context_is_child(ce)); 2716 2717 spin_lock_irqsave(&ce->guc_state.lock, flags); 2718 clr_context_wait_for_deregister_to_register(ce); 2719 __guc_signal_context_fence(ce); 2720 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2721 } 2722 2723 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 2724 { 2725 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 2726 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && 2727 !submission_disabled(ce_to_guc(ce)); 2728 } 2729 2730 static void guc_context_init(struct intel_context *ce) 2731 { 2732 const struct i915_gem_context *ctx; 2733 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 2734 2735 rcu_read_lock(); 2736 ctx = rcu_dereference(ce->gem_context); 2737 if (ctx) 2738 prio = ctx->sched.priority; 2739 rcu_read_unlock(); 2740 2741 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 2742 set_bit(CONTEXT_GUC_INIT, &ce->flags); 2743 } 2744 2745 static int guc_request_alloc(struct i915_request *rq) 2746 { 2747 struct intel_context *ce = request_to_scheduling_context(rq); 2748 struct intel_guc *guc = ce_to_guc(ce); 2749 unsigned long flags; 2750 int ret; 2751 2752 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 2753 2754 /* 2755 * Flush enough space to reduce the likelihood of waiting after 2756 * we start building the request - in which case we will just 2757 * have to repeat work. 2758 */ 2759 rq->reserved_space += GUC_REQUEST_SIZE; 2760 2761 /* 2762 * Note that after this point, we have committed to using 2763 * this request as it is being used to both track the 2764 * state of engine initialisation and liveness of the 2765 * golden renderstate above. Think twice before you try 2766 * to cancel/unwind this request now. 2767 */ 2768 2769 /* Unconditionally invalidate GPU caches and TLBs. */ 2770 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 2771 if (ret) 2772 return ret; 2773 2774 rq->reserved_space -= GUC_REQUEST_SIZE; 2775 2776 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 2777 guc_context_init(ce); 2778 2779 /* 2780 * Call pin_guc_id here rather than in the pinning step as with 2781 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 2782 * guc_id and creating horrible race conditions. This is especially bad 2783 * when guc_id are being stolen due to over subscription. By the time 2784 * this function is reached, it is guaranteed that the guc_id will be 2785 * persistent until the generated request is retired. Thus, sealing these 2786 * race conditions. It is still safe to fail here if guc_id are 2787 * exhausted and return -EAGAIN to the user indicating that they can try 2788 * again in the future. 2789 * 2790 * There is no need for a lock here as the timeline mutex ensures at 2791 * most one context can be executing this code path at once. The 2792 * guc_id_ref is incremented once for every request in flight and 2793 * decremented on each retire. When it is zero, a lock around the 2794 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 2795 */ 2796 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 2797 goto out; 2798 2799 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 2800 if (unlikely(ret < 0)) 2801 return ret; 2802 if (context_needs_register(ce, !!ret)) { 2803 ret = guc_lrc_desc_pin(ce, true); 2804 if (unlikely(ret)) { /* unwind */ 2805 if (ret == -EPIPE) { 2806 disable_submission(guc); 2807 goto out; /* GPU will be reset */ 2808 } 2809 atomic_dec(&ce->guc_id.ref); 2810 unpin_guc_id(guc, ce); 2811 return ret; 2812 } 2813 } 2814 2815 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2816 2817 out: 2818 /* 2819 * We block all requests on this context if a G2H is pending for a 2820 * schedule disable or context deregistration as the GuC will fail a 2821 * schedule enable or context registration if either G2H is pending 2822 * respectfully. Once a G2H returns, the fence is released that is 2823 * blocking these requests (see guc_signal_context_fence). 2824 */ 2825 spin_lock_irqsave(&ce->guc_state.lock, flags); 2826 if (context_wait_for_deregister_to_register(ce) || 2827 context_pending_disable(ce)) { 2828 init_irq_work(&rq->submit_work, submit_work_cb); 2829 i915_sw_fence_await(&rq->submit); 2830 2831 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 2832 } 2833 incr_context_committed_requests(ce); 2834 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2835 2836 return 0; 2837 } 2838 2839 static int guc_virtual_context_pre_pin(struct intel_context *ce, 2840 struct i915_gem_ww_ctx *ww, 2841 void **vaddr) 2842 { 2843 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2844 2845 return __guc_context_pre_pin(ce, engine, ww, vaddr); 2846 } 2847 2848 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 2849 { 2850 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2851 int ret = __guc_context_pin(ce, engine, vaddr); 2852 intel_engine_mask_t tmp, mask = ce->engine->mask; 2853 2854 if (likely(!ret)) 2855 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2856 intel_engine_pm_get(engine); 2857 2858 return ret; 2859 } 2860 2861 static void guc_virtual_context_unpin(struct intel_context *ce) 2862 { 2863 intel_engine_mask_t tmp, mask = ce->engine->mask; 2864 struct intel_engine_cs *engine; 2865 struct intel_guc *guc = ce_to_guc(ce); 2866 2867 GEM_BUG_ON(context_enabled(ce)); 2868 GEM_BUG_ON(intel_context_is_barrier(ce)); 2869 2870 unpin_guc_id(guc, ce); 2871 lrc_unpin(ce); 2872 2873 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2874 intel_engine_pm_put_async(engine); 2875 } 2876 2877 static void guc_virtual_context_enter(struct intel_context *ce) 2878 { 2879 intel_engine_mask_t tmp, mask = ce->engine->mask; 2880 struct intel_engine_cs *engine; 2881 2882 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2883 intel_engine_pm_get(engine); 2884 2885 intel_timeline_enter(ce->timeline); 2886 } 2887 2888 static void guc_virtual_context_exit(struct intel_context *ce) 2889 { 2890 intel_engine_mask_t tmp, mask = ce->engine->mask; 2891 struct intel_engine_cs *engine; 2892 2893 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2894 intel_engine_pm_put(engine); 2895 2896 intel_timeline_exit(ce->timeline); 2897 } 2898 2899 static int guc_virtual_context_alloc(struct intel_context *ce) 2900 { 2901 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2902 2903 return lrc_alloc(ce, engine); 2904 } 2905 2906 static const struct intel_context_ops virtual_guc_context_ops = { 2907 .alloc = guc_virtual_context_alloc, 2908 2909 .pre_pin = guc_virtual_context_pre_pin, 2910 .pin = guc_virtual_context_pin, 2911 .unpin = guc_virtual_context_unpin, 2912 .post_unpin = guc_context_post_unpin, 2913 2914 .ban = guc_context_ban, 2915 2916 .cancel_request = guc_context_cancel_request, 2917 2918 .enter = guc_virtual_context_enter, 2919 .exit = guc_virtual_context_exit, 2920 2921 .sched_disable = guc_context_sched_disable, 2922 2923 .destroy = guc_context_destroy, 2924 2925 .get_sibling = guc_virtual_get_sibling, 2926 }; 2927 2928 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 2929 { 2930 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2931 struct intel_guc *guc = ce_to_guc(ce); 2932 int ret; 2933 2934 GEM_BUG_ON(!intel_context_is_parent(ce)); 2935 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2936 2937 ret = pin_guc_id(guc, ce); 2938 if (unlikely(ret < 0)) 2939 return ret; 2940 2941 return __guc_context_pin(ce, engine, vaddr); 2942 } 2943 2944 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 2945 { 2946 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2947 2948 GEM_BUG_ON(!intel_context_is_child(ce)); 2949 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2950 2951 __intel_context_pin(ce->parallel.parent); 2952 return __guc_context_pin(ce, engine, vaddr); 2953 } 2954 2955 static void guc_parent_context_unpin(struct intel_context *ce) 2956 { 2957 struct intel_guc *guc = ce_to_guc(ce); 2958 2959 GEM_BUG_ON(context_enabled(ce)); 2960 GEM_BUG_ON(intel_context_is_barrier(ce)); 2961 GEM_BUG_ON(!intel_context_is_parent(ce)); 2962 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2963 2964 if (ce->parallel.last_rq) 2965 i915_request_put(ce->parallel.last_rq); 2966 unpin_guc_id(guc, ce); 2967 lrc_unpin(ce); 2968 } 2969 2970 static void guc_child_context_unpin(struct intel_context *ce) 2971 { 2972 GEM_BUG_ON(context_enabled(ce)); 2973 GEM_BUG_ON(intel_context_is_barrier(ce)); 2974 GEM_BUG_ON(!intel_context_is_child(ce)); 2975 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2976 2977 lrc_unpin(ce); 2978 } 2979 2980 static void guc_child_context_post_unpin(struct intel_context *ce) 2981 { 2982 GEM_BUG_ON(!intel_context_is_child(ce)); 2983 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 2984 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2985 2986 lrc_post_unpin(ce); 2987 intel_context_unpin(ce->parallel.parent); 2988 } 2989 2990 static void guc_child_context_destroy(struct kref *kref) 2991 { 2992 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2993 2994 __guc_context_destroy(ce); 2995 } 2996 2997 static const struct intel_context_ops virtual_parent_context_ops = { 2998 .alloc = guc_virtual_context_alloc, 2999 3000 .pre_pin = guc_context_pre_pin, 3001 .pin = guc_parent_context_pin, 3002 .unpin = guc_parent_context_unpin, 3003 .post_unpin = guc_context_post_unpin, 3004 3005 .ban = guc_context_ban, 3006 3007 .cancel_request = guc_context_cancel_request, 3008 3009 .enter = guc_virtual_context_enter, 3010 .exit = guc_virtual_context_exit, 3011 3012 .sched_disable = guc_context_sched_disable, 3013 3014 .destroy = guc_context_destroy, 3015 3016 .get_sibling = guc_virtual_get_sibling, 3017 }; 3018 3019 static const struct intel_context_ops virtual_child_context_ops = { 3020 .alloc = guc_virtual_context_alloc, 3021 3022 .pre_pin = guc_context_pre_pin, 3023 .pin = guc_child_context_pin, 3024 .unpin = guc_child_context_unpin, 3025 .post_unpin = guc_child_context_post_unpin, 3026 3027 .cancel_request = guc_context_cancel_request, 3028 3029 .enter = guc_virtual_context_enter, 3030 .exit = guc_virtual_context_exit, 3031 3032 .destroy = guc_child_context_destroy, 3033 3034 .get_sibling = guc_virtual_get_sibling, 3035 }; 3036 3037 /* 3038 * The below override of the breadcrumbs is enabled when the user configures a 3039 * context for parallel submission (multi-lrc, parent-child). 3040 * 3041 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3042 * safely preempt all the hw contexts configured for parallel submission 3043 * between each BB. The contract between the i915 and GuC is if the parent 3044 * context can be preempted, all the children can be preempted, and the GuC will 3045 * always try to preempt the parent before the children. A handshake between the 3046 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3047 * creating a window to preempt between each set of BBs. 3048 */ 3049 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3050 u64 offset, u32 len, 3051 const unsigned int flags); 3052 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3053 u64 offset, u32 len, 3054 const unsigned int flags); 3055 static u32 * 3056 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3057 u32 *cs); 3058 static u32 * 3059 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3060 u32 *cs); 3061 3062 static struct intel_context * 3063 guc_create_parallel(struct intel_engine_cs **engines, 3064 unsigned int num_siblings, 3065 unsigned int width) 3066 { 3067 struct intel_engine_cs **siblings = NULL; 3068 struct intel_context *parent = NULL, *ce, *err; 3069 int i, j; 3070 3071 siblings = kmalloc_array(num_siblings, 3072 sizeof(*siblings), 3073 GFP_KERNEL); 3074 if (!siblings) 3075 return ERR_PTR(-ENOMEM); 3076 3077 for (i = 0; i < width; ++i) { 3078 for (j = 0; j < num_siblings; ++j) 3079 siblings[j] = engines[i * num_siblings + j]; 3080 3081 ce = intel_engine_create_virtual(siblings, num_siblings, 3082 FORCE_VIRTUAL); 3083 if (IS_ERR(ce)) { 3084 err = ERR_CAST(ce); 3085 goto unwind; 3086 } 3087 3088 if (i == 0) { 3089 parent = ce; 3090 parent->ops = &virtual_parent_context_ops; 3091 } else { 3092 ce->ops = &virtual_child_context_ops; 3093 intel_context_bind_parent_child(parent, ce); 3094 } 3095 } 3096 3097 parent->parallel.fence_context = dma_fence_context_alloc(1); 3098 3099 parent->engine->emit_bb_start = 3100 emit_bb_start_parent_no_preempt_mid_batch; 3101 parent->engine->emit_fini_breadcrumb = 3102 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3103 parent->engine->emit_fini_breadcrumb_dw = 3104 12 + 4 * parent->parallel.number_children; 3105 for_each_child(parent, ce) { 3106 ce->engine->emit_bb_start = 3107 emit_bb_start_child_no_preempt_mid_batch; 3108 ce->engine->emit_fini_breadcrumb = 3109 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3110 ce->engine->emit_fini_breadcrumb_dw = 16; 3111 } 3112 3113 kfree(siblings); 3114 return parent; 3115 3116 unwind: 3117 if (parent) 3118 intel_context_put(parent); 3119 kfree(siblings); 3120 return err; 3121 } 3122 3123 static bool 3124 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3125 { 3126 struct intel_engine_cs *sibling; 3127 intel_engine_mask_t tmp, mask = b->engine_mask; 3128 bool result = false; 3129 3130 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3131 result |= intel_engine_irq_enable(sibling); 3132 3133 return result; 3134 } 3135 3136 static void 3137 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3138 { 3139 struct intel_engine_cs *sibling; 3140 intel_engine_mask_t tmp, mask = b->engine_mask; 3141 3142 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3143 intel_engine_irq_disable(sibling); 3144 } 3145 3146 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3147 { 3148 int i; 3149 3150 /* 3151 * In GuC submission mode we do not know which physical engine a request 3152 * will be scheduled on, this creates a problem because the breadcrumb 3153 * interrupt is per physical engine. To work around this we attach 3154 * requests and direct all breadcrumb interrupts to the first instance 3155 * of an engine per class. In addition all breadcrumb interrupts are 3156 * enabled / disabled across an engine class in unison. 3157 */ 3158 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3159 struct intel_engine_cs *sibling = 3160 engine->gt->engine_class[engine->class][i]; 3161 3162 if (sibling) { 3163 if (engine->breadcrumbs != sibling->breadcrumbs) { 3164 intel_breadcrumbs_put(engine->breadcrumbs); 3165 engine->breadcrumbs = 3166 intel_breadcrumbs_get(sibling->breadcrumbs); 3167 } 3168 break; 3169 } 3170 } 3171 3172 if (engine->breadcrumbs) { 3173 engine->breadcrumbs->engine_mask |= engine->mask; 3174 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3175 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3176 } 3177 } 3178 3179 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3180 int prio) 3181 { 3182 struct intel_context *ce = request_to_scheduling_context(rq); 3183 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3184 3185 /* Short circuit function */ 3186 if (prio < I915_PRIORITY_NORMAL || 3187 rq->guc_prio == GUC_PRIO_FINI || 3188 (rq->guc_prio != GUC_PRIO_INIT && 3189 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3190 return; 3191 3192 spin_lock(&ce->guc_state.lock); 3193 if (rq->guc_prio != GUC_PRIO_FINI) { 3194 if (rq->guc_prio != GUC_PRIO_INIT) 3195 sub_context_inflight_prio(ce, rq->guc_prio); 3196 rq->guc_prio = new_guc_prio; 3197 add_context_inflight_prio(ce, rq->guc_prio); 3198 update_context_prio(ce); 3199 } 3200 spin_unlock(&ce->guc_state.lock); 3201 } 3202 3203 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3204 { 3205 struct intel_context *ce = request_to_scheduling_context(rq); 3206 3207 spin_lock(&ce->guc_state.lock); 3208 guc_prio_fini(rq, ce); 3209 spin_unlock(&ce->guc_state.lock); 3210 } 3211 3212 static void sanitize_hwsp(struct intel_engine_cs *engine) 3213 { 3214 struct intel_timeline *tl; 3215 3216 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3217 intel_timeline_reset_seqno(tl); 3218 } 3219 3220 static void guc_sanitize(struct intel_engine_cs *engine) 3221 { 3222 /* 3223 * Poison residual state on resume, in case the suspend didn't! 3224 * 3225 * We have to assume that across suspend/resume (or other loss 3226 * of control) that the contents of our pinned buffers has been 3227 * lost, replaced by garbage. Since this doesn't always happen, 3228 * let's poison such state so that we more quickly spot when 3229 * we falsely assume it has been preserved. 3230 */ 3231 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3232 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3233 3234 /* 3235 * The kernel_context HWSP is stored in the status_page. As above, 3236 * that may be lost on resume/initialisation, and so we need to 3237 * reset the value in the HWSP. 3238 */ 3239 sanitize_hwsp(engine); 3240 3241 /* And scrub the dirty cachelines for the HWSP */ 3242 clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 3243 3244 intel_engine_reset_pinned_contexts(engine); 3245 } 3246 3247 static void setup_hwsp(struct intel_engine_cs *engine) 3248 { 3249 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 3250 3251 ENGINE_WRITE_FW(engine, 3252 RING_HWS_PGA, 3253 i915_ggtt_offset(engine->status_page.vma)); 3254 } 3255 3256 static void start_engine(struct intel_engine_cs *engine) 3257 { 3258 ENGINE_WRITE_FW(engine, 3259 RING_MODE_GEN7, 3260 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 3261 3262 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 3263 ENGINE_POSTING_READ(engine, RING_MI_MODE); 3264 } 3265 3266 static int guc_resume(struct intel_engine_cs *engine) 3267 { 3268 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 3269 3270 intel_mocs_init_engine(engine); 3271 3272 intel_breadcrumbs_reset(engine->breadcrumbs); 3273 3274 setup_hwsp(engine); 3275 start_engine(engine); 3276 3277 return 0; 3278 } 3279 3280 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 3281 { 3282 return !sched_engine->tasklet.callback; 3283 } 3284 3285 static void guc_set_default_submission(struct intel_engine_cs *engine) 3286 { 3287 engine->submit_request = guc_submit_request; 3288 } 3289 3290 static inline void guc_kernel_context_pin(struct intel_guc *guc, 3291 struct intel_context *ce) 3292 { 3293 if (context_guc_id_invalid(ce)) 3294 pin_guc_id(guc, ce); 3295 guc_lrc_desc_pin(ce, true); 3296 } 3297 3298 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 3299 { 3300 struct intel_gt *gt = guc_to_gt(guc); 3301 struct intel_engine_cs *engine; 3302 enum intel_engine_id id; 3303 3304 /* make sure all descriptors are clean... */ 3305 xa_destroy(&guc->context_lookup); 3306 3307 /* 3308 * Some contexts might have been pinned before we enabled GuC 3309 * submission, so we need to add them to the GuC bookeeping. 3310 * Also, after a reset the of the GuC we want to make sure that the 3311 * information shared with GuC is properly reset. The kernel LRCs are 3312 * not attached to the gem_context, so they need to be added separately. 3313 * 3314 * Note: we purposefully do not check the return of guc_lrc_desc_pin, 3315 * because that function can only fail if a reset is just starting. This 3316 * is at the end of reset so presumably another reset isn't happening 3317 * and even it did this code would be run again. 3318 */ 3319 3320 for_each_engine(engine, gt, id) { 3321 struct intel_context *ce; 3322 3323 list_for_each_entry(ce, &engine->pinned_contexts_list, 3324 pinned_contexts_link) 3325 guc_kernel_context_pin(guc, ce); 3326 } 3327 } 3328 3329 static void guc_release(struct intel_engine_cs *engine) 3330 { 3331 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3332 3333 intel_engine_cleanup_common(engine); 3334 lrc_fini_wa_ctx(engine); 3335 } 3336 3337 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 3338 { 3339 struct intel_engine_cs *e; 3340 intel_engine_mask_t tmp, mask = engine->mask; 3341 3342 for_each_engine_masked(e, engine->gt, mask, tmp) 3343 e->serial++; 3344 } 3345 3346 static void guc_default_vfuncs(struct intel_engine_cs *engine) 3347 { 3348 /* Default vfuncs which can be overridden by each engine. */ 3349 3350 engine->resume = guc_resume; 3351 3352 engine->cops = &guc_context_ops; 3353 engine->request_alloc = guc_request_alloc; 3354 engine->add_active_request = add_to_context; 3355 engine->remove_active_request = remove_from_context; 3356 3357 engine->sched_engine->schedule = i915_schedule; 3358 3359 engine->reset.prepare = guc_reset_nop; 3360 engine->reset.rewind = guc_rewind_nop; 3361 engine->reset.cancel = guc_reset_nop; 3362 engine->reset.finish = guc_reset_nop; 3363 3364 engine->emit_flush = gen8_emit_flush_xcs; 3365 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3366 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3367 if (GRAPHICS_VER(engine->i915) >= 12) { 3368 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3369 engine->emit_flush = gen12_emit_flush_xcs; 3370 } 3371 engine->set_default_submission = guc_set_default_submission; 3372 3373 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3374 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3375 3376 /* 3377 * TODO: GuC supports timeslicing and semaphores as well, but they're 3378 * handled by the firmware so some minor tweaks are required before 3379 * enabling. 3380 * 3381 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3382 */ 3383 3384 engine->emit_bb_start = gen8_emit_bb_start; 3385 } 3386 3387 static void rcs_submission_override(struct intel_engine_cs *engine) 3388 { 3389 switch (GRAPHICS_VER(engine->i915)) { 3390 case 12: 3391 engine->emit_flush = gen12_emit_flush_rcs; 3392 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3393 break; 3394 case 11: 3395 engine->emit_flush = gen11_emit_flush_rcs; 3396 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3397 break; 3398 default: 3399 engine->emit_flush = gen8_emit_flush_rcs; 3400 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3401 break; 3402 } 3403 } 3404 3405 static inline void guc_default_irqs(struct intel_engine_cs *engine) 3406 { 3407 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 3408 intel_engine_set_irq_handler(engine, cs_irq_handler); 3409 } 3410 3411 static void guc_sched_engine_destroy(struct kref *kref) 3412 { 3413 struct i915_sched_engine *sched_engine = 3414 container_of(kref, typeof(*sched_engine), ref); 3415 struct intel_guc *guc = sched_engine->private_data; 3416 3417 guc->sched_engine = NULL; 3418 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 3419 kfree(sched_engine); 3420 } 3421 3422 int intel_guc_submission_setup(struct intel_engine_cs *engine) 3423 { 3424 struct drm_i915_private *i915 = engine->i915; 3425 struct intel_guc *guc = &engine->gt->uc.guc; 3426 3427 /* 3428 * The setup relies on several assumptions (e.g. irqs always enabled) 3429 * that are only valid on gen11+ 3430 */ 3431 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 3432 3433 if (!guc->sched_engine) { 3434 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3435 if (!guc->sched_engine) 3436 return -ENOMEM; 3437 3438 guc->sched_engine->schedule = i915_schedule; 3439 guc->sched_engine->disabled = guc_sched_engine_disabled; 3440 guc->sched_engine->private_data = guc; 3441 guc->sched_engine->destroy = guc_sched_engine_destroy; 3442 guc->sched_engine->bump_inflight_request_prio = 3443 guc_bump_inflight_request_prio; 3444 guc->sched_engine->retire_inflight_request_prio = 3445 guc_retire_inflight_request_prio; 3446 tasklet_setup(&guc->sched_engine->tasklet, 3447 guc_submission_tasklet); 3448 } 3449 i915_sched_engine_put(engine->sched_engine); 3450 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 3451 3452 guc_default_vfuncs(engine); 3453 guc_default_irqs(engine); 3454 guc_init_breadcrumbs(engine); 3455 3456 if (engine->class == RENDER_CLASS) 3457 rcs_submission_override(engine); 3458 3459 lrc_init_wa_ctx(engine); 3460 3461 /* Finally, take ownership and responsibility for cleanup! */ 3462 engine->sanitize = guc_sanitize; 3463 engine->release = guc_release; 3464 3465 return 0; 3466 } 3467 3468 void intel_guc_submission_enable(struct intel_guc *guc) 3469 { 3470 guc_init_lrc_mapping(guc); 3471 } 3472 3473 void intel_guc_submission_disable(struct intel_guc *guc) 3474 { 3475 /* Note: By the time we're here, GuC may have already been reset */ 3476 } 3477 3478 static bool __guc_submission_supported(struct intel_guc *guc) 3479 { 3480 /* GuC submission is unavailable for pre-Gen11 */ 3481 return intel_guc_is_supported(guc) && 3482 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 3483 } 3484 3485 static bool __guc_submission_selected(struct intel_guc *guc) 3486 { 3487 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 3488 3489 if (!intel_guc_submission_is_supported(guc)) 3490 return false; 3491 3492 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 3493 } 3494 3495 void intel_guc_submission_init_early(struct intel_guc *guc) 3496 { 3497 guc->submission_supported = __guc_submission_supported(guc); 3498 guc->submission_selected = __guc_submission_selected(guc); 3499 } 3500 3501 static inline struct intel_context * 3502 g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) 3503 { 3504 struct intel_context *ce; 3505 3506 if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { 3507 drm_err(&guc_to_gt(guc)->i915->drm, 3508 "Invalid desc_idx %u", desc_idx); 3509 return NULL; 3510 } 3511 3512 ce = __get_context(guc, desc_idx); 3513 if (unlikely(!ce)) { 3514 drm_err(&guc_to_gt(guc)->i915->drm, 3515 "Context is NULL, desc_idx %u", desc_idx); 3516 return NULL; 3517 } 3518 3519 if (unlikely(intel_context_is_child(ce))) { 3520 drm_err(&guc_to_gt(guc)->i915->drm, 3521 "Context is child, desc_idx %u", desc_idx); 3522 return NULL; 3523 } 3524 3525 return ce; 3526 } 3527 3528 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 3529 const u32 *msg, 3530 u32 len) 3531 { 3532 struct intel_context *ce; 3533 u32 desc_idx = msg[0]; 3534 3535 if (unlikely(len < 1)) { 3536 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3537 return -EPROTO; 3538 } 3539 3540 ce = g2h_context_lookup(guc, desc_idx); 3541 if (unlikely(!ce)) 3542 return -EPROTO; 3543 3544 trace_intel_context_deregister_done(ce); 3545 3546 #ifdef CONFIG_DRM_I915_SELFTEST 3547 if (unlikely(ce->drop_deregister)) { 3548 ce->drop_deregister = false; 3549 return 0; 3550 } 3551 #endif 3552 3553 if (context_wait_for_deregister_to_register(ce)) { 3554 struct intel_runtime_pm *runtime_pm = 3555 &ce->engine->gt->i915->runtime_pm; 3556 intel_wakeref_t wakeref; 3557 3558 /* 3559 * Previous owner of this guc_id has been deregistered, now safe 3560 * register this context. 3561 */ 3562 with_intel_runtime_pm(runtime_pm, wakeref) 3563 register_context(ce, true); 3564 guc_signal_context_fence(ce); 3565 intel_context_put(ce); 3566 } else if (context_destroyed(ce)) { 3567 /* Context has been destroyed */ 3568 intel_gt_pm_put_async(guc_to_gt(guc)); 3569 release_guc_id(guc, ce); 3570 __guc_context_destroy(ce); 3571 } 3572 3573 decr_outstanding_submission_g2h(guc); 3574 3575 return 0; 3576 } 3577 3578 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 3579 const u32 *msg, 3580 u32 len) 3581 { 3582 struct intel_context *ce; 3583 unsigned long flags; 3584 u32 desc_idx = msg[0]; 3585 3586 if (unlikely(len < 2)) { 3587 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3588 return -EPROTO; 3589 } 3590 3591 ce = g2h_context_lookup(guc, desc_idx); 3592 if (unlikely(!ce)) 3593 return -EPROTO; 3594 3595 if (unlikely(context_destroyed(ce) || 3596 (!context_pending_enable(ce) && 3597 !context_pending_disable(ce)))) { 3598 drm_err(&guc_to_gt(guc)->i915->drm, 3599 "Bad context sched_state 0x%x, desc_idx %u", 3600 ce->guc_state.sched_state, desc_idx); 3601 return -EPROTO; 3602 } 3603 3604 trace_intel_context_sched_done(ce); 3605 3606 if (context_pending_enable(ce)) { 3607 #ifdef CONFIG_DRM_I915_SELFTEST 3608 if (unlikely(ce->drop_schedule_enable)) { 3609 ce->drop_schedule_enable = false; 3610 return 0; 3611 } 3612 #endif 3613 3614 spin_lock_irqsave(&ce->guc_state.lock, flags); 3615 clr_context_pending_enable(ce); 3616 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3617 } else if (context_pending_disable(ce)) { 3618 bool banned; 3619 3620 #ifdef CONFIG_DRM_I915_SELFTEST 3621 if (unlikely(ce->drop_schedule_disable)) { 3622 ce->drop_schedule_disable = false; 3623 return 0; 3624 } 3625 #endif 3626 3627 /* 3628 * Unpin must be done before __guc_signal_context_fence, 3629 * otherwise a race exists between the requests getting 3630 * submitted + retired before this unpin completes resulting in 3631 * the pin_count going to zero and the context still being 3632 * enabled. 3633 */ 3634 intel_context_sched_disable_unpin(ce); 3635 3636 spin_lock_irqsave(&ce->guc_state.lock, flags); 3637 banned = context_banned(ce); 3638 clr_context_banned(ce); 3639 clr_context_pending_disable(ce); 3640 __guc_signal_context_fence(ce); 3641 guc_blocked_fence_complete(ce); 3642 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3643 3644 if (banned) { 3645 guc_cancel_context_requests(ce); 3646 intel_engine_signal_breadcrumbs(ce->engine); 3647 } 3648 } 3649 3650 decr_outstanding_submission_g2h(guc); 3651 intel_context_put(ce); 3652 3653 return 0; 3654 } 3655 3656 static void capture_error_state(struct intel_guc *guc, 3657 struct intel_context *ce) 3658 { 3659 struct intel_gt *gt = guc_to_gt(guc); 3660 struct drm_i915_private *i915 = gt->i915; 3661 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 3662 intel_wakeref_t wakeref; 3663 3664 intel_engine_set_hung_context(engine, ce); 3665 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 3666 i915_capture_error_state(gt, engine->mask); 3667 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 3668 } 3669 3670 static void guc_context_replay(struct intel_context *ce) 3671 { 3672 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 3673 3674 __guc_reset_context(ce, true); 3675 tasklet_hi_schedule(&sched_engine->tasklet); 3676 } 3677 3678 static void guc_handle_context_reset(struct intel_guc *guc, 3679 struct intel_context *ce) 3680 { 3681 trace_intel_context_reset(ce); 3682 3683 /* 3684 * XXX: Racey if request cancellation has occurred, see comment in 3685 * __guc_reset_context(). 3686 */ 3687 if (likely(!intel_context_is_banned(ce) && 3688 !context_blocked(ce))) { 3689 capture_error_state(guc, ce); 3690 guc_context_replay(ce); 3691 } 3692 } 3693 3694 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 3695 const u32 *msg, u32 len) 3696 { 3697 struct intel_context *ce; 3698 int desc_idx; 3699 3700 if (unlikely(len != 1)) { 3701 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3702 return -EPROTO; 3703 } 3704 3705 desc_idx = msg[0]; 3706 ce = g2h_context_lookup(guc, desc_idx); 3707 if (unlikely(!ce)) 3708 return -EPROTO; 3709 3710 guc_handle_context_reset(guc, ce); 3711 3712 return 0; 3713 } 3714 3715 static struct intel_engine_cs * 3716 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 3717 { 3718 struct intel_gt *gt = guc_to_gt(guc); 3719 u8 engine_class = guc_class_to_engine_class(guc_class); 3720 3721 /* Class index is checked in class converter */ 3722 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 3723 3724 return gt->engine_class[engine_class][instance]; 3725 } 3726 3727 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 3728 const u32 *msg, u32 len) 3729 { 3730 struct intel_engine_cs *engine; 3731 u8 guc_class, instance; 3732 u32 reason; 3733 3734 if (unlikely(len != 3)) { 3735 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3736 return -EPROTO; 3737 } 3738 3739 guc_class = msg[0]; 3740 instance = msg[1]; 3741 reason = msg[2]; 3742 3743 engine = guc_lookup_engine(guc, guc_class, instance); 3744 if (unlikely(!engine)) { 3745 drm_err(&guc_to_gt(guc)->i915->drm, 3746 "Invalid engine %d:%d", guc_class, instance); 3747 return -EPROTO; 3748 } 3749 3750 intel_gt_handle_error(guc_to_gt(guc), engine->mask, 3751 I915_ERROR_CAPTURE, 3752 "GuC failed to reset %s (reason=0x%08x)\n", 3753 engine->name, reason); 3754 3755 return 0; 3756 } 3757 3758 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 3759 { 3760 struct intel_guc *guc = &engine->gt->uc.guc; 3761 struct intel_context *ce; 3762 struct i915_request *rq; 3763 unsigned long index; 3764 unsigned long flags; 3765 3766 /* Reset called during driver load? GuC not yet initialised! */ 3767 if (unlikely(!guc_submission_initialized(guc))) 3768 return; 3769 3770 xa_lock_irqsave(&guc->context_lookup, flags); 3771 xa_for_each(&guc->context_lookup, index, ce) { 3772 if (!kref_get_unless_zero(&ce->ref)) 3773 continue; 3774 3775 xa_unlock(&guc->context_lookup); 3776 3777 if (!intel_context_is_pinned(ce)) 3778 goto next; 3779 3780 if (intel_engine_is_virtual(ce->engine)) { 3781 if (!(ce->engine->mask & engine->mask)) 3782 goto next; 3783 } else { 3784 if (ce->engine != engine) 3785 goto next; 3786 } 3787 3788 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 3789 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 3790 continue; 3791 3792 intel_engine_set_hung_context(engine, ce); 3793 3794 /* Can only cope with one hang at a time... */ 3795 intel_context_put(ce); 3796 xa_lock(&guc->context_lookup); 3797 goto done; 3798 } 3799 next: 3800 intel_context_put(ce); 3801 xa_lock(&guc->context_lookup); 3802 } 3803 done: 3804 xa_unlock_irqrestore(&guc->context_lookup, flags); 3805 } 3806 3807 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 3808 struct i915_request *hung_rq, 3809 struct drm_printer *m) 3810 { 3811 struct intel_guc *guc = &engine->gt->uc.guc; 3812 struct intel_context *ce; 3813 unsigned long index; 3814 unsigned long flags; 3815 3816 /* Reset called during driver load? GuC not yet initialised! */ 3817 if (unlikely(!guc_submission_initialized(guc))) 3818 return; 3819 3820 xa_lock_irqsave(&guc->context_lookup, flags); 3821 xa_for_each(&guc->context_lookup, index, ce) { 3822 if (!kref_get_unless_zero(&ce->ref)) 3823 continue; 3824 3825 xa_unlock(&guc->context_lookup); 3826 3827 if (!intel_context_is_pinned(ce)) 3828 goto next; 3829 3830 if (intel_engine_is_virtual(ce->engine)) { 3831 if (!(ce->engine->mask & engine->mask)) 3832 goto next; 3833 } else { 3834 if (ce->engine != engine) 3835 goto next; 3836 } 3837 3838 spin_lock(&ce->guc_state.lock); 3839 intel_engine_dump_active_requests(&ce->guc_state.requests, 3840 hung_rq, m); 3841 spin_unlock(&ce->guc_state.lock); 3842 3843 next: 3844 intel_context_put(ce); 3845 xa_lock(&guc->context_lookup); 3846 } 3847 xa_unlock_irqrestore(&guc->context_lookup, flags); 3848 } 3849 3850 void intel_guc_submission_print_info(struct intel_guc *guc, 3851 struct drm_printer *p) 3852 { 3853 struct i915_sched_engine *sched_engine = guc->sched_engine; 3854 struct rb_node *rb; 3855 unsigned long flags; 3856 3857 if (!sched_engine) 3858 return; 3859 3860 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 3861 atomic_read(&guc->outstanding_submission_g2h)); 3862 drm_printf(p, "GuC tasklet count: %u\n\n", 3863 atomic_read(&sched_engine->tasklet.count)); 3864 3865 spin_lock_irqsave(&sched_engine->lock, flags); 3866 drm_printf(p, "Requests in GuC submit tasklet:\n"); 3867 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 3868 struct i915_priolist *pl = to_priolist(rb); 3869 struct i915_request *rq; 3870 3871 priolist_for_each_request(rq, pl) 3872 drm_printf(p, "guc_id=%u, seqno=%llu\n", 3873 rq->context->guc_id.id, 3874 rq->fence.seqno); 3875 } 3876 spin_unlock_irqrestore(&sched_engine->lock, flags); 3877 drm_printf(p, "\n"); 3878 } 3879 3880 static inline void guc_log_context_priority(struct drm_printer *p, 3881 struct intel_context *ce) 3882 { 3883 int i; 3884 3885 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 3886 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 3887 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 3888 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 3889 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 3890 i, ce->guc_state.prio_count[i]); 3891 } 3892 drm_printf(p, "\n"); 3893 } 3894 3895 static inline void guc_log_context(struct drm_printer *p, 3896 struct intel_context *ce) 3897 { 3898 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 3899 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 3900 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 3901 ce->ring->head, 3902 ce->lrc_reg_state[CTX_RING_HEAD]); 3903 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 3904 ce->ring->tail, 3905 ce->lrc_reg_state[CTX_RING_TAIL]); 3906 drm_printf(p, "\t\tContext Pin Count: %u\n", 3907 atomic_read(&ce->pin_count)); 3908 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 3909 atomic_read(&ce->guc_id.ref)); 3910 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 3911 ce->guc_state.sched_state); 3912 } 3913 3914 void intel_guc_submission_print_context_info(struct intel_guc *guc, 3915 struct drm_printer *p) 3916 { 3917 struct intel_context *ce; 3918 unsigned long index; 3919 unsigned long flags; 3920 3921 xa_lock_irqsave(&guc->context_lookup, flags); 3922 xa_for_each(&guc->context_lookup, index, ce) { 3923 GEM_BUG_ON(intel_context_is_child(ce)); 3924 3925 guc_log_context(p, ce); 3926 guc_log_context_priority(p, ce); 3927 3928 if (intel_context_is_parent(ce)) { 3929 struct guc_process_desc *desc = __get_process_desc(ce); 3930 struct intel_context *child; 3931 3932 drm_printf(p, "\t\tNumber children: %u\n", 3933 ce->parallel.number_children); 3934 drm_printf(p, "\t\tWQI Head: %u\n", 3935 READ_ONCE(desc->head)); 3936 drm_printf(p, "\t\tWQI Tail: %u\n", 3937 READ_ONCE(desc->tail)); 3938 drm_printf(p, "\t\tWQI Status: %u\n\n", 3939 READ_ONCE(desc->wq_status)); 3940 3941 if (ce->engine->emit_bb_start == 3942 emit_bb_start_parent_no_preempt_mid_batch) { 3943 u8 i; 3944 3945 drm_printf(p, "\t\tChildren Go: %u\n\n", 3946 get_children_go_value(ce)); 3947 for (i = 0; i < ce->parallel.number_children; ++i) 3948 drm_printf(p, "\t\tChildren Join: %u\n", 3949 get_children_join_value(ce, i)); 3950 } 3951 3952 for_each_child(ce, child) 3953 guc_log_context(p, child); 3954 } 3955 } 3956 xa_unlock_irqrestore(&guc->context_lookup, flags); 3957 } 3958 3959 static inline u32 get_children_go_addr(struct intel_context *ce) 3960 { 3961 GEM_BUG_ON(!intel_context_is_parent(ce)); 3962 3963 return i915_ggtt_offset(ce->state) + 3964 __get_parent_scratch_offset(ce) + 3965 offsetof(struct parent_scratch, go.semaphore); 3966 } 3967 3968 static inline u32 get_children_join_addr(struct intel_context *ce, 3969 u8 child_index) 3970 { 3971 GEM_BUG_ON(!intel_context_is_parent(ce)); 3972 3973 return i915_ggtt_offset(ce->state) + 3974 __get_parent_scratch_offset(ce) + 3975 offsetof(struct parent_scratch, join[child_index].semaphore); 3976 } 3977 3978 #define PARENT_GO_BB 1 3979 #define PARENT_GO_FINI_BREADCRUMB 0 3980 #define CHILD_GO_BB 1 3981 #define CHILD_GO_FINI_BREADCRUMB 0 3982 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3983 u64 offset, u32 len, 3984 const unsigned int flags) 3985 { 3986 struct intel_context *ce = rq->context; 3987 u32 *cs; 3988 u8 i; 3989 3990 GEM_BUG_ON(!intel_context_is_parent(ce)); 3991 3992 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 3993 if (IS_ERR(cs)) 3994 return PTR_ERR(cs); 3995 3996 /* Wait on children */ 3997 for (i = 0; i < ce->parallel.number_children; ++i) { 3998 *cs++ = (MI_SEMAPHORE_WAIT | 3999 MI_SEMAPHORE_GLOBAL_GTT | 4000 MI_SEMAPHORE_POLL | 4001 MI_SEMAPHORE_SAD_EQ_SDD); 4002 *cs++ = PARENT_GO_BB; 4003 *cs++ = get_children_join_addr(ce, i); 4004 *cs++ = 0; 4005 } 4006 4007 /* Turn off preemption */ 4008 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4009 *cs++ = MI_NOOP; 4010 4011 /* Tell children go */ 4012 cs = gen8_emit_ggtt_write(cs, 4013 CHILD_GO_BB, 4014 get_children_go_addr(ce), 4015 0); 4016 4017 /* Jump to batch */ 4018 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4019 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4020 *cs++ = lower_32_bits(offset); 4021 *cs++ = upper_32_bits(offset); 4022 *cs++ = MI_NOOP; 4023 4024 intel_ring_advance(rq, cs); 4025 4026 return 0; 4027 } 4028 4029 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4030 u64 offset, u32 len, 4031 const unsigned int flags) 4032 { 4033 struct intel_context *ce = rq->context; 4034 struct intel_context *parent = intel_context_to_parent(ce); 4035 u32 *cs; 4036 4037 GEM_BUG_ON(!intel_context_is_child(ce)); 4038 4039 cs = intel_ring_begin(rq, 12); 4040 if (IS_ERR(cs)) 4041 return PTR_ERR(cs); 4042 4043 /* Signal parent */ 4044 cs = gen8_emit_ggtt_write(cs, 4045 PARENT_GO_BB, 4046 get_children_join_addr(parent, 4047 ce->parallel.child_index), 4048 0); 4049 4050 /* Wait on parent for go */ 4051 *cs++ = (MI_SEMAPHORE_WAIT | 4052 MI_SEMAPHORE_GLOBAL_GTT | 4053 MI_SEMAPHORE_POLL | 4054 MI_SEMAPHORE_SAD_EQ_SDD); 4055 *cs++ = CHILD_GO_BB; 4056 *cs++ = get_children_go_addr(parent); 4057 *cs++ = 0; 4058 4059 /* Turn off preemption */ 4060 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4061 4062 /* Jump to batch */ 4063 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4064 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4065 *cs++ = lower_32_bits(offset); 4066 *cs++ = upper_32_bits(offset); 4067 4068 intel_ring_advance(rq, cs); 4069 4070 return 0; 4071 } 4072 4073 static u32 * 4074 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4075 u32 *cs) 4076 { 4077 struct intel_context *ce = rq->context; 4078 u8 i; 4079 4080 GEM_BUG_ON(!intel_context_is_parent(ce)); 4081 4082 /* Wait on children */ 4083 for (i = 0; i < ce->parallel.number_children; ++i) { 4084 *cs++ = (MI_SEMAPHORE_WAIT | 4085 MI_SEMAPHORE_GLOBAL_GTT | 4086 MI_SEMAPHORE_POLL | 4087 MI_SEMAPHORE_SAD_EQ_SDD); 4088 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4089 *cs++ = get_children_join_addr(ce, i); 4090 *cs++ = 0; 4091 } 4092 4093 /* Turn on preemption */ 4094 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4095 *cs++ = MI_NOOP; 4096 4097 /* Tell children go */ 4098 cs = gen8_emit_ggtt_write(cs, 4099 CHILD_GO_FINI_BREADCRUMB, 4100 get_children_go_addr(ce), 4101 0); 4102 4103 return cs; 4104 } 4105 4106 /* 4107 * If this true, a submission of multi-lrc requests had an error and the 4108 * requests need to be skipped. The front end (execuf IOCTL) should've called 4109 * i915_request_skip which squashes the BB but we still need to emit the fini 4110 * breadrcrumbs seqno write. At this point we don't know how many of the 4111 * requests in the multi-lrc submission were generated so we can't do the 4112 * handshake between the parent and children (e.g. if 4 requests should be 4113 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4114 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4115 * has occurred on any of the requests in submission / relationship. 4116 */ 4117 static inline bool skip_handshake(struct i915_request *rq) 4118 { 4119 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4120 } 4121 4122 static u32 * 4123 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4124 u32 *cs) 4125 { 4126 struct intel_context *ce = rq->context; 4127 4128 GEM_BUG_ON(!intel_context_is_parent(ce)); 4129 4130 if (unlikely(skip_handshake(rq))) { 4131 /* 4132 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4133 * the -6 comes from the length of the emits below. 4134 */ 4135 memset(cs, 0, sizeof(u32) * 4136 (ce->engine->emit_fini_breadcrumb_dw - 6)); 4137 cs += ce->engine->emit_fini_breadcrumb_dw - 6; 4138 } else { 4139 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 4140 } 4141 4142 /* Emit fini breadcrumb */ 4143 cs = gen8_emit_ggtt_write(cs, 4144 rq->fence.seqno, 4145 i915_request_active_timeline(rq)->hwsp_offset, 4146 0); 4147 4148 /* User interrupt */ 4149 *cs++ = MI_USER_INTERRUPT; 4150 *cs++ = MI_NOOP; 4151 4152 rq->tail = intel_ring_offset(rq, cs); 4153 4154 return cs; 4155 } 4156 4157 static u32 * 4158 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4159 u32 *cs) 4160 { 4161 struct intel_context *ce = rq->context; 4162 struct intel_context *parent = intel_context_to_parent(ce); 4163 4164 GEM_BUG_ON(!intel_context_is_child(ce)); 4165 4166 /* Turn on preemption */ 4167 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4168 *cs++ = MI_NOOP; 4169 4170 /* Signal parent */ 4171 cs = gen8_emit_ggtt_write(cs, 4172 PARENT_GO_FINI_BREADCRUMB, 4173 get_children_join_addr(parent, 4174 ce->parallel.child_index), 4175 0); 4176 4177 /* Wait parent on for go */ 4178 *cs++ = (MI_SEMAPHORE_WAIT | 4179 MI_SEMAPHORE_GLOBAL_GTT | 4180 MI_SEMAPHORE_POLL | 4181 MI_SEMAPHORE_SAD_EQ_SDD); 4182 *cs++ = CHILD_GO_FINI_BREADCRUMB; 4183 *cs++ = get_children_go_addr(parent); 4184 *cs++ = 0; 4185 4186 return cs; 4187 } 4188 4189 static u32 * 4190 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4191 u32 *cs) 4192 { 4193 struct intel_context *ce = rq->context; 4194 4195 GEM_BUG_ON(!intel_context_is_child(ce)); 4196 4197 if (unlikely(skip_handshake(rq))) { 4198 /* 4199 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 4200 * the -6 comes from the length of the emits below. 4201 */ 4202 memset(cs, 0, sizeof(u32) * 4203 (ce->engine->emit_fini_breadcrumb_dw - 6)); 4204 cs += ce->engine->emit_fini_breadcrumb_dw - 6; 4205 } else { 4206 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 4207 } 4208 4209 /* Emit fini breadcrumb */ 4210 cs = gen8_emit_ggtt_write(cs, 4211 rq->fence.seqno, 4212 i915_request_active_timeline(rq)->hwsp_offset, 4213 0); 4214 4215 /* User interrupt */ 4216 *cs++ = MI_USER_INTERRUPT; 4217 *cs++ = MI_NOOP; 4218 4219 rq->tail = intel_ring_offset(rq, cs); 4220 4221 return cs; 4222 } 4223 4224 static struct intel_context * 4225 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 4226 unsigned long flags) 4227 { 4228 struct guc_virtual_engine *ve; 4229 struct intel_guc *guc; 4230 unsigned int n; 4231 int err; 4232 4233 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 4234 if (!ve) 4235 return ERR_PTR(-ENOMEM); 4236 4237 guc = &siblings[0]->gt->uc.guc; 4238 4239 ve->base.i915 = siblings[0]->i915; 4240 ve->base.gt = siblings[0]->gt; 4241 ve->base.uncore = siblings[0]->uncore; 4242 ve->base.id = -1; 4243 4244 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 4245 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4246 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4247 ve->base.saturated = ALL_ENGINES; 4248 4249 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 4250 4251 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 4252 4253 ve->base.cops = &virtual_guc_context_ops; 4254 ve->base.request_alloc = guc_request_alloc; 4255 ve->base.bump_serial = virtual_guc_bump_serial; 4256 4257 ve->base.submit_request = guc_submit_request; 4258 4259 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 4260 4261 intel_context_init(&ve->context, &ve->base); 4262 4263 for (n = 0; n < count; n++) { 4264 struct intel_engine_cs *sibling = siblings[n]; 4265 4266 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 4267 if (sibling->mask & ve->base.mask) { 4268 DRM_DEBUG("duplicate %s entry in load balancer\n", 4269 sibling->name); 4270 err = -EINVAL; 4271 goto err_put; 4272 } 4273 4274 ve->base.mask |= sibling->mask; 4275 ve->base.logical_mask |= sibling->logical_mask; 4276 4277 if (n != 0 && ve->base.class != sibling->class) { 4278 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 4279 sibling->class, ve->base.class); 4280 err = -EINVAL; 4281 goto err_put; 4282 } else if (n == 0) { 4283 ve->base.class = sibling->class; 4284 ve->base.uabi_class = sibling->uabi_class; 4285 snprintf(ve->base.name, sizeof(ve->base.name), 4286 "v%dx%d", ve->base.class, count); 4287 ve->base.context_size = sibling->context_size; 4288 4289 ve->base.add_active_request = 4290 sibling->add_active_request; 4291 ve->base.remove_active_request = 4292 sibling->remove_active_request; 4293 ve->base.emit_bb_start = sibling->emit_bb_start; 4294 ve->base.emit_flush = sibling->emit_flush; 4295 ve->base.emit_init_breadcrumb = 4296 sibling->emit_init_breadcrumb; 4297 ve->base.emit_fini_breadcrumb = 4298 sibling->emit_fini_breadcrumb; 4299 ve->base.emit_fini_breadcrumb_dw = 4300 sibling->emit_fini_breadcrumb_dw; 4301 ve->base.breadcrumbs = 4302 intel_breadcrumbs_get(sibling->breadcrumbs); 4303 4304 ve->base.flags |= sibling->flags; 4305 4306 ve->base.props.timeslice_duration_ms = 4307 sibling->props.timeslice_duration_ms; 4308 ve->base.props.preempt_timeout_ms = 4309 sibling->props.preempt_timeout_ms; 4310 } 4311 } 4312 4313 return &ve->context; 4314 4315 err_put: 4316 intel_context_put(&ve->context); 4317 return ERR_PTR(err); 4318 } 4319 4320 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 4321 { 4322 struct intel_engine_cs *engine; 4323 intel_engine_mask_t tmp, mask = ve->mask; 4324 4325 for_each_engine_masked(engine, ve->gt, mask, tmp) 4326 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 4327 return true; 4328 4329 return false; 4330 } 4331 4332 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4333 #include "selftest_guc.c" 4334 #include "selftest_guc_multi_lrc.c" 4335 #endif 4336