1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gt/gen8_engine_cs.h" 10 #include "gt/intel_breadcrumbs.h" 11 #include "gt/intel_context.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_gpu_commands.h" 15 #include "gt/intel_gt.h" 16 #include "gt/intel_gt_irq.h" 17 #include "gt/intel_gt_pm.h" 18 #include "gt/intel_gt_requests.h" 19 #include "gt/intel_lrc.h" 20 #include "gt/intel_lrc_reg.h" 21 #include "gt/intel_mocs.h" 22 #include "gt/intel_ring.h" 23 24 #include "intel_guc_submission.h" 25 26 #include "i915_drv.h" 27 #include "i915_trace.h" 28 29 /** 30 * DOC: GuC-based command submission 31 * 32 * The Scratch registers: 33 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 34 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 35 * triggers an interrupt on the GuC via another register write (0xC4C8). 36 * Firmware writes a success/fail code back to the action register after 37 * processes the request. The kernel driver polls waiting for this update and 38 * then proceeds. 39 * 40 * Command Transport buffers (CTBs): 41 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 42 * - G2H) are a message interface between the i915 and GuC. 43 * 44 * Context registration: 45 * Before a context can be submitted it must be registered with the GuC via a 46 * H2G. A unique guc_id is associated with each context. The context is either 47 * registered at request creation time (normal operation) or at submission time 48 * (abnormal operation, e.g. after a reset). 49 * 50 * Context submission: 51 * The i915 updates the LRC tail value in memory. The i915 must enable the 52 * scheduling of the context within the GuC for the GuC to actually consider it. 53 * Therefore, the first time a disabled context is submitted we use a schedule 54 * enable H2G, while follow up submissions are done via the context submit H2G, 55 * which informs the GuC that a previously enabled context has new work 56 * available. 57 * 58 * Context unpin: 59 * To unpin a context a H2G is used to disable scheduling. When the 60 * corresponding G2H returns indicating the scheduling disable operation has 61 * completed it is safe to unpin the context. While a disable is in flight it 62 * isn't safe to resubmit the context so a fence is used to stall all future 63 * requests of that context until the G2H is returned. 64 * 65 * Context deregistration: 66 * Before a context can be destroyed or if we steal its guc_id we must 67 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 68 * safe to submit anything to this guc_id until the deregister completes so a 69 * fence is used to stall all requests associated with this guc_id until the 70 * corresponding G2H returns indicating the guc_id has been deregistered. 71 * 72 * submission_state.guc_ids: 73 * Unique number associated with private GuC context data passed in during 74 * context registration / submission / deregistration. 64k available. Simple ida 75 * is used for allocation. 76 * 77 * Stealing guc_ids: 78 * If no guc_ids are available they can be stolen from another context at 79 * request creation time if that context is unpinned. If a guc_id can't be found 80 * we punt this problem to the user as we believe this is near impossible to hit 81 * during normal use cases. 82 * 83 * Locking: 84 * In the GuC submission code we have 3 basic spin locks which protect 85 * everything. Details about each below. 86 * 87 * sched_engine->lock 88 * This is the submission lock for all contexts that share an i915 schedule 89 * engine (sched_engine), thus only one of the contexts which share a 90 * sched_engine can be submitting at a time. Currently only one sched_engine is 91 * used for all of GuC submission but that could change in the future. 92 * 93 * guc->submission_state.lock 94 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 95 * list. 96 * 97 * ce->guc_state.lock 98 * Protects everything under ce->guc_state. Ensures that a context is in the 99 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 100 * on a disabled context (bad idea), we don't issue a schedule enable when a 101 * schedule disable is in flight, etc... Also protects list of inflight requests 102 * on the context and the priority management state. Lock is individual to each 103 * context. 104 * 105 * Lock ordering rules: 106 * sched_engine->lock -> ce->guc_state.lock 107 * guc->submission_state.lock -> ce->guc_state.lock 108 * 109 * Reset races: 110 * When a full GT reset is triggered it is assumed that some G2H responses to 111 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 112 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 113 * contexts, release guc_ids, etc...). When this occurs we can scrub the 114 * context state and cleanup appropriately, however this is quite racey. 115 * To avoid races, the reset code must disable submission before scrubbing for 116 * the missing G2H, while the submission code must check for submission being 117 * disabled and skip sending H2Gs and updating context states when it is. Both 118 * sides must also make sure to hold the relevant locks. 119 */ 120 121 /* GuC Virtual Engine */ 122 struct guc_virtual_engine { 123 struct intel_engine_cs base; 124 struct intel_context context; 125 }; 126 127 static struct intel_context * 128 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 129 unsigned long flags); 130 131 static struct intel_context * 132 guc_create_parallel(struct intel_engine_cs **engines, 133 unsigned int num_siblings, 134 unsigned int width); 135 136 #define GUC_REQUEST_SIZE 64 /* bytes */ 137 138 /* 139 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 140 * per the GuC submission interface. A different allocation algorithm is used 141 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 142 * partition the guc_id space. We believe the number of multi-lrc contexts in 143 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 144 * multi-lrc. 145 */ 146 #define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16) 147 148 /* 149 * Below is a set of functions which control the GuC scheduling state which 150 * require a lock. 151 */ 152 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 153 #define SCHED_STATE_DESTROYED BIT(1) 154 #define SCHED_STATE_PENDING_DISABLE BIT(2) 155 #define SCHED_STATE_BANNED BIT(3) 156 #define SCHED_STATE_ENABLED BIT(4) 157 #define SCHED_STATE_PENDING_ENABLE BIT(5) 158 #define SCHED_STATE_REGISTERED BIT(6) 159 #define SCHED_STATE_BLOCKED_SHIFT 7 160 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 161 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 162 163 static inline void init_sched_state(struct intel_context *ce) 164 { 165 lockdep_assert_held(&ce->guc_state.lock); 166 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 167 } 168 169 __maybe_unused 170 static bool sched_state_is_init(struct intel_context *ce) 171 { 172 /* 173 * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after 174 * suspend. 175 */ 176 return !(ce->guc_state.sched_state &= 177 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); 178 } 179 180 static inline bool 181 context_wait_for_deregister_to_register(struct intel_context *ce) 182 { 183 return ce->guc_state.sched_state & 184 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 185 } 186 187 static inline void 188 set_context_wait_for_deregister_to_register(struct intel_context *ce) 189 { 190 lockdep_assert_held(&ce->guc_state.lock); 191 ce->guc_state.sched_state |= 192 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 193 } 194 195 static inline void 196 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 197 { 198 lockdep_assert_held(&ce->guc_state.lock); 199 ce->guc_state.sched_state &= 200 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 201 } 202 203 static inline bool 204 context_destroyed(struct intel_context *ce) 205 { 206 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 207 } 208 209 static inline void 210 set_context_destroyed(struct intel_context *ce) 211 { 212 lockdep_assert_held(&ce->guc_state.lock); 213 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 214 } 215 216 static inline bool context_pending_disable(struct intel_context *ce) 217 { 218 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 219 } 220 221 static inline void set_context_pending_disable(struct intel_context *ce) 222 { 223 lockdep_assert_held(&ce->guc_state.lock); 224 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 225 } 226 227 static inline void clr_context_pending_disable(struct intel_context *ce) 228 { 229 lockdep_assert_held(&ce->guc_state.lock); 230 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 231 } 232 233 static inline bool context_banned(struct intel_context *ce) 234 { 235 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 236 } 237 238 static inline void set_context_banned(struct intel_context *ce) 239 { 240 lockdep_assert_held(&ce->guc_state.lock); 241 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 242 } 243 244 static inline void clr_context_banned(struct intel_context *ce) 245 { 246 lockdep_assert_held(&ce->guc_state.lock); 247 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 248 } 249 250 static inline bool context_enabled(struct intel_context *ce) 251 { 252 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 253 } 254 255 static inline void set_context_enabled(struct intel_context *ce) 256 { 257 lockdep_assert_held(&ce->guc_state.lock); 258 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 259 } 260 261 static inline void clr_context_enabled(struct intel_context *ce) 262 { 263 lockdep_assert_held(&ce->guc_state.lock); 264 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 265 } 266 267 static inline bool context_pending_enable(struct intel_context *ce) 268 { 269 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 270 } 271 272 static inline void set_context_pending_enable(struct intel_context *ce) 273 { 274 lockdep_assert_held(&ce->guc_state.lock); 275 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 276 } 277 278 static inline void clr_context_pending_enable(struct intel_context *ce) 279 { 280 lockdep_assert_held(&ce->guc_state.lock); 281 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 282 } 283 284 static inline bool context_registered(struct intel_context *ce) 285 { 286 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 287 } 288 289 static inline void set_context_registered(struct intel_context *ce) 290 { 291 lockdep_assert_held(&ce->guc_state.lock); 292 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 293 } 294 295 static inline void clr_context_registered(struct intel_context *ce) 296 { 297 lockdep_assert_held(&ce->guc_state.lock); 298 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 299 } 300 301 static inline u32 context_blocked(struct intel_context *ce) 302 { 303 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 304 SCHED_STATE_BLOCKED_SHIFT; 305 } 306 307 static inline void incr_context_blocked(struct intel_context *ce) 308 { 309 lockdep_assert_held(&ce->guc_state.lock); 310 311 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 312 313 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 314 } 315 316 static inline void decr_context_blocked(struct intel_context *ce) 317 { 318 lockdep_assert_held(&ce->guc_state.lock); 319 320 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 321 322 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 323 } 324 325 static inline bool context_has_committed_requests(struct intel_context *ce) 326 { 327 return !!ce->guc_state.number_committed_requests; 328 } 329 330 static inline void incr_context_committed_requests(struct intel_context *ce) 331 { 332 lockdep_assert_held(&ce->guc_state.lock); 333 ++ce->guc_state.number_committed_requests; 334 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 335 } 336 337 static inline void decr_context_committed_requests(struct intel_context *ce) 338 { 339 lockdep_assert_held(&ce->guc_state.lock); 340 --ce->guc_state.number_committed_requests; 341 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); 342 } 343 344 static struct intel_context * 345 request_to_scheduling_context(struct i915_request *rq) 346 { 347 return intel_context_to_parent(rq->context); 348 } 349 350 static inline bool context_guc_id_invalid(struct intel_context *ce) 351 { 352 return ce->guc_id.id == GUC_INVALID_LRC_ID; 353 } 354 355 static inline void set_context_guc_id_invalid(struct intel_context *ce) 356 { 357 ce->guc_id.id = GUC_INVALID_LRC_ID; 358 } 359 360 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 361 { 362 return &ce->engine->gt->uc.guc; 363 } 364 365 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 366 { 367 return rb_entry(rb, struct i915_priolist, node); 368 } 369 370 /* 371 * When using multi-lrc submission a scratch memory area is reserved in the 372 * parent's context state for the process descriptor, work queue, and handshake 373 * between the parent + children contexts to insert safe preemption points 374 * between each of the BBs. Currently the scratch area is sized to a page. 375 * 376 * The layout of this scratch area is below: 377 * 0 guc_process_desc 378 * + sizeof(struct guc_process_desc) child go 379 * + CACHELINE_BYTES child join[0] 380 * ... 381 * + CACHELINE_BYTES child join[n - 1] 382 * ... unused 383 * PARENT_SCRATCH_SIZE / 2 work queue start 384 * ... work queue 385 * PARENT_SCRATCH_SIZE - 1 work queue end 386 */ 387 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 388 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 389 390 struct sync_semaphore { 391 u32 semaphore; 392 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 393 }; 394 395 struct parent_scratch { 396 struct guc_process_desc pdesc; 397 398 struct sync_semaphore go; 399 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 400 401 u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - 402 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 403 404 u32 wq[WQ_SIZE / sizeof(u32)]; 405 }; 406 407 static u32 __get_parent_scratch_offset(struct intel_context *ce) 408 { 409 GEM_BUG_ON(!ce->parallel.guc.parent_page); 410 411 return ce->parallel.guc.parent_page * PAGE_SIZE; 412 } 413 414 static u32 __get_wq_offset(struct intel_context *ce) 415 { 416 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 417 418 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 419 } 420 421 static struct parent_scratch * 422 __get_parent_scratch(struct intel_context *ce) 423 { 424 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 425 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 426 427 /* 428 * Need to subtract LRC_STATE_OFFSET here as the 429 * parallel.guc.parent_page is the offset into ce->state while 430 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 431 */ 432 return (struct parent_scratch *) 433 (ce->lrc_reg_state + 434 ((__get_parent_scratch_offset(ce) - 435 LRC_STATE_OFFSET) / sizeof(u32))); 436 } 437 438 static struct guc_process_desc * 439 __get_process_desc(struct intel_context *ce) 440 { 441 struct parent_scratch *ps = __get_parent_scratch(ce); 442 443 return &ps->pdesc; 444 } 445 446 static u32 *get_wq_pointer(struct guc_process_desc *desc, 447 struct intel_context *ce, 448 u32 wqi_size) 449 { 450 /* 451 * Check for space in work queue. Caching a value of head pointer in 452 * intel_context structure in order reduce the number accesses to shared 453 * GPU memory which may be across a PCIe bus. 454 */ 455 #define AVAILABLE_SPACE \ 456 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 457 if (wqi_size > AVAILABLE_SPACE) { 458 ce->parallel.guc.wqi_head = READ_ONCE(desc->head); 459 460 if (wqi_size > AVAILABLE_SPACE) 461 return NULL; 462 } 463 #undef AVAILABLE_SPACE 464 465 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 466 } 467 468 static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) 469 { 470 struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; 471 472 GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS); 473 474 return &base[index]; 475 } 476 477 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 478 { 479 struct intel_context *ce = xa_load(&guc->context_lookup, id); 480 481 GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS); 482 483 return ce; 484 } 485 486 static int guc_lrc_desc_pool_create(struct intel_guc *guc) 487 { 488 u32 size; 489 int ret; 490 491 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * 492 GUC_MAX_LRC_DESCRIPTORS); 493 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, 494 (void **)&guc->lrc_desc_pool_vaddr); 495 if (ret) 496 return ret; 497 498 return 0; 499 } 500 501 static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) 502 { 503 guc->lrc_desc_pool_vaddr = NULL; 504 i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); 505 } 506 507 static inline bool guc_submission_initialized(struct intel_guc *guc) 508 { 509 return !!guc->lrc_desc_pool_vaddr; 510 } 511 512 static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) 513 { 514 if (likely(guc_submission_initialized(guc))) { 515 struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); 516 unsigned long flags; 517 518 memset(desc, 0, sizeof(*desc)); 519 520 /* 521 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 522 * the lower level functions directly. 523 */ 524 xa_lock_irqsave(&guc->context_lookup, flags); 525 __xa_erase(&guc->context_lookup, id); 526 xa_unlock_irqrestore(&guc->context_lookup, flags); 527 } 528 } 529 530 static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) 531 { 532 return __get_context(guc, id); 533 } 534 535 static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, 536 struct intel_context *ce) 537 { 538 unsigned long flags; 539 540 /* 541 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 542 * lower level functions directly. 543 */ 544 xa_lock_irqsave(&guc->context_lookup, flags); 545 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 546 xa_unlock_irqrestore(&guc->context_lookup, flags); 547 } 548 549 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 550 { 551 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 552 wake_up_all(&guc->ct.wq); 553 } 554 555 static int guc_submission_send_busy_loop(struct intel_guc *guc, 556 const u32 *action, 557 u32 len, 558 u32 g2h_len_dw, 559 bool loop) 560 { 561 /* 562 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 563 * so we don't handle the case where we don't get a reply because we 564 * aborted the send due to the channel being busy. 565 */ 566 GEM_BUG_ON(g2h_len_dw && !loop); 567 568 if (g2h_len_dw) 569 atomic_inc(&guc->outstanding_submission_g2h); 570 571 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 572 } 573 574 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 575 atomic_t *wait_var, 576 bool interruptible, 577 long timeout) 578 { 579 const int state = interruptible ? 580 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 581 DEFINE_WAIT(wait); 582 583 might_sleep(); 584 GEM_BUG_ON(timeout < 0); 585 586 if (!atomic_read(wait_var)) 587 return 0; 588 589 if (!timeout) 590 return -ETIME; 591 592 for (;;) { 593 prepare_to_wait(&guc->ct.wq, &wait, state); 594 595 if (!atomic_read(wait_var)) 596 break; 597 598 if (signal_pending_state(state, current)) { 599 timeout = -EINTR; 600 break; 601 } 602 603 if (!timeout) { 604 timeout = -ETIME; 605 break; 606 } 607 608 timeout = io_schedule_timeout(timeout); 609 } 610 finish_wait(&guc->ct.wq, &wait); 611 612 return (timeout < 0) ? timeout : 0; 613 } 614 615 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 616 { 617 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 618 return 0; 619 620 return intel_guc_wait_for_pending_msg(guc, 621 &guc->outstanding_submission_g2h, 622 true, timeout); 623 } 624 625 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); 626 627 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 628 { 629 int err = 0; 630 struct intel_context *ce = request_to_scheduling_context(rq); 631 u32 action[3]; 632 int len = 0; 633 u32 g2h_len_dw = 0; 634 bool enabled; 635 636 lockdep_assert_held(&rq->engine->sched_engine->lock); 637 638 /* 639 * Corner case where requests were sitting in the priority list or a 640 * request resubmitted after the context was banned. 641 */ 642 if (unlikely(intel_context_is_banned(ce))) { 643 i915_request_put(i915_request_mark_eio(rq)); 644 intel_engine_signal_breadcrumbs(ce->engine); 645 return 0; 646 } 647 648 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 649 GEM_BUG_ON(context_guc_id_invalid(ce)); 650 651 spin_lock(&ce->guc_state.lock); 652 653 /* 654 * The request / context will be run on the hardware when scheduling 655 * gets enabled in the unblock. For multi-lrc we still submit the 656 * context to move the LRC tails. 657 */ 658 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 659 goto out; 660 661 enabled = context_enabled(ce) || context_blocked(ce); 662 663 if (!enabled) { 664 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 665 action[len++] = ce->guc_id.id; 666 action[len++] = GUC_CONTEXT_ENABLE; 667 set_context_pending_enable(ce); 668 intel_context_get(ce); 669 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 670 } else { 671 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 672 action[len++] = ce->guc_id.id; 673 } 674 675 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 676 if (!enabled && !err) { 677 trace_intel_context_sched_enable(ce); 678 atomic_inc(&guc->outstanding_submission_g2h); 679 set_context_enabled(ce); 680 681 /* 682 * Without multi-lrc KMD does the submission step (moving the 683 * lrc tail) so enabling scheduling is sufficient to submit the 684 * context. This isn't the case in multi-lrc submission as the 685 * GuC needs to move the tails, hence the need for another H2G 686 * to submit a multi-lrc context after enabling scheduling. 687 */ 688 if (intel_context_is_parent(ce)) { 689 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 690 err = intel_guc_send_nb(guc, action, len - 1, 0); 691 } 692 } else if (!enabled) { 693 clr_context_pending_enable(ce); 694 intel_context_put(ce); 695 } 696 if (likely(!err)) 697 trace_i915_request_guc_submit(rq); 698 699 out: 700 spin_unlock(&ce->guc_state.lock); 701 return err; 702 } 703 704 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 705 { 706 int ret = __guc_add_request(guc, rq); 707 708 if (unlikely(ret == -EBUSY)) { 709 guc->stalled_request = rq; 710 guc->submission_stall_reason = STALL_ADD_REQUEST; 711 } 712 713 return ret; 714 } 715 716 static inline void guc_set_lrc_tail(struct i915_request *rq) 717 { 718 rq->context->lrc_reg_state[CTX_RING_TAIL] = 719 intel_ring_set_tail(rq->ring, rq->tail); 720 } 721 722 static inline int rq_prio(const struct i915_request *rq) 723 { 724 return rq->sched.attr.priority; 725 } 726 727 static bool is_multi_lrc_rq(struct i915_request *rq) 728 { 729 return intel_context_is_parallel(rq->context); 730 } 731 732 static bool can_merge_rq(struct i915_request *rq, 733 struct i915_request *last) 734 { 735 return request_to_scheduling_context(rq) == 736 request_to_scheduling_context(last); 737 } 738 739 static u32 wq_space_until_wrap(struct intel_context *ce) 740 { 741 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 742 } 743 744 static void write_wqi(struct guc_process_desc *desc, 745 struct intel_context *ce, 746 u32 wqi_size) 747 { 748 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 749 750 /* 751 * Ensure WQI are visible before updating tail 752 */ 753 intel_guc_write_barrier(ce_to_guc(ce)); 754 755 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 756 (WQ_SIZE - 1); 757 WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); 758 } 759 760 static int guc_wq_noop_append(struct intel_context *ce) 761 { 762 struct guc_process_desc *desc = __get_process_desc(ce); 763 u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); 764 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 765 766 if (!wqi) 767 return -EBUSY; 768 769 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 770 771 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 772 FIELD_PREP(WQ_LEN_MASK, len_dw); 773 ce->parallel.guc.wqi_tail = 0; 774 775 return 0; 776 } 777 778 static int __guc_wq_item_append(struct i915_request *rq) 779 { 780 struct intel_context *ce = request_to_scheduling_context(rq); 781 struct intel_context *child; 782 struct guc_process_desc *desc = __get_process_desc(ce); 783 unsigned int wqi_size = (ce->parallel.number_children + 4) * 784 sizeof(u32); 785 u32 *wqi; 786 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 787 int ret; 788 789 /* Ensure context is in correct state updating work queue */ 790 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 791 GEM_BUG_ON(context_guc_id_invalid(ce)); 792 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 793 GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)); 794 795 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 796 if (wqi_size > wq_space_until_wrap(ce)) { 797 ret = guc_wq_noop_append(ce); 798 if (ret) 799 return ret; 800 } 801 802 wqi = get_wq_pointer(desc, ce, wqi_size); 803 if (!wqi) 804 return -EBUSY; 805 806 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 807 808 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 809 FIELD_PREP(WQ_LEN_MASK, len_dw); 810 *wqi++ = ce->lrc.lrca; 811 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 812 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 813 *wqi++ = 0; /* fence_id */ 814 for_each_child(ce, child) 815 *wqi++ = child->ring->tail / sizeof(u64); 816 817 write_wqi(desc, ce, wqi_size); 818 819 return 0; 820 } 821 822 static int guc_wq_item_append(struct intel_guc *guc, 823 struct i915_request *rq) 824 { 825 struct intel_context *ce = request_to_scheduling_context(rq); 826 int ret = 0; 827 828 if (likely(!intel_context_is_banned(ce))) { 829 ret = __guc_wq_item_append(rq); 830 831 if (unlikely(ret == -EBUSY)) { 832 guc->stalled_request = rq; 833 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 834 } 835 } 836 837 return ret; 838 } 839 840 static bool multi_lrc_submit(struct i915_request *rq) 841 { 842 struct intel_context *ce = request_to_scheduling_context(rq); 843 844 intel_ring_set_tail(rq->ring, rq->tail); 845 846 /* 847 * We expect the front end (execbuf IOCTL) to set this flag on the last 848 * request generated from a multi-BB submission. This indicates to the 849 * backend (GuC interface) that we should submit this context thus 850 * submitting all the requests generated in parallel. 851 */ 852 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 853 intel_context_is_banned(ce); 854 } 855 856 static int guc_dequeue_one_context(struct intel_guc *guc) 857 { 858 struct i915_sched_engine * const sched_engine = guc->sched_engine; 859 struct i915_request *last = NULL; 860 bool submit = false; 861 struct rb_node *rb; 862 int ret; 863 864 lockdep_assert_held(&sched_engine->lock); 865 866 if (guc->stalled_request) { 867 submit = true; 868 last = guc->stalled_request; 869 870 switch (guc->submission_stall_reason) { 871 case STALL_REGISTER_CONTEXT: 872 goto register_context; 873 case STALL_MOVE_LRC_TAIL: 874 goto move_lrc_tail; 875 case STALL_ADD_REQUEST: 876 goto add_request; 877 default: 878 MISSING_CASE(guc->submission_stall_reason); 879 } 880 } 881 882 while ((rb = rb_first_cached(&sched_engine->queue))) { 883 struct i915_priolist *p = to_priolist(rb); 884 struct i915_request *rq, *rn; 885 886 priolist_for_each_request_consume(rq, rn, p) { 887 if (last && !can_merge_rq(rq, last)) 888 goto register_context; 889 890 list_del_init(&rq->sched.link); 891 892 __i915_request_submit(rq); 893 894 trace_i915_request_in(rq, 0); 895 last = rq; 896 897 if (is_multi_lrc_rq(rq)) { 898 /* 899 * We need to coalesce all multi-lrc requests in 900 * a relationship into a single H2G. We are 901 * guaranteed that all of these requests will be 902 * submitted sequentially. 903 */ 904 if (multi_lrc_submit(rq)) { 905 submit = true; 906 goto register_context; 907 } 908 } else { 909 submit = true; 910 } 911 } 912 913 rb_erase_cached(&p->node, &sched_engine->queue); 914 i915_priolist_free(p); 915 } 916 917 register_context: 918 if (submit) { 919 struct intel_context *ce = request_to_scheduling_context(last); 920 921 if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) && 922 !intel_context_is_banned(ce))) { 923 ret = guc_lrc_desc_pin(ce, false); 924 if (unlikely(ret == -EPIPE)) { 925 goto deadlk; 926 } else if (ret == -EBUSY) { 927 guc->stalled_request = last; 928 guc->submission_stall_reason = 929 STALL_REGISTER_CONTEXT; 930 goto schedule_tasklet; 931 } else if (ret != 0) { 932 GEM_WARN_ON(ret); /* Unexpected */ 933 goto deadlk; 934 } 935 } 936 937 move_lrc_tail: 938 if (is_multi_lrc_rq(last)) { 939 ret = guc_wq_item_append(guc, last); 940 if (ret == -EBUSY) { 941 goto schedule_tasklet; 942 } else if (ret != 0) { 943 GEM_WARN_ON(ret); /* Unexpected */ 944 goto deadlk; 945 } 946 } else { 947 guc_set_lrc_tail(last); 948 } 949 950 add_request: 951 ret = guc_add_request(guc, last); 952 if (unlikely(ret == -EPIPE)) { 953 goto deadlk; 954 } else if (ret == -EBUSY) { 955 goto schedule_tasklet; 956 } else if (ret != 0) { 957 GEM_WARN_ON(ret); /* Unexpected */ 958 goto deadlk; 959 } 960 } 961 962 guc->stalled_request = NULL; 963 guc->submission_stall_reason = STALL_NONE; 964 return submit; 965 966 deadlk: 967 sched_engine->tasklet.callback = NULL; 968 tasklet_disable_nosync(&sched_engine->tasklet); 969 return false; 970 971 schedule_tasklet: 972 tasklet_schedule(&sched_engine->tasklet); 973 return false; 974 } 975 976 static void guc_submission_tasklet(struct tasklet_struct *t) 977 { 978 struct i915_sched_engine *sched_engine = 979 from_tasklet(sched_engine, t, tasklet); 980 unsigned long flags; 981 bool loop; 982 983 spin_lock_irqsave(&sched_engine->lock, flags); 984 985 do { 986 loop = guc_dequeue_one_context(sched_engine->private_data); 987 } while (loop); 988 989 i915_sched_engine_reset_on_empty(sched_engine); 990 991 spin_unlock_irqrestore(&sched_engine->lock, flags); 992 } 993 994 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 995 { 996 if (iir & GT_RENDER_USER_INTERRUPT) 997 intel_engine_signal_breadcrumbs(engine); 998 } 999 1000 static void __guc_context_destroy(struct intel_context *ce); 1001 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1002 static void guc_signal_context_fence(struct intel_context *ce); 1003 static void guc_cancel_context_requests(struct intel_context *ce); 1004 static void guc_blocked_fence_complete(struct intel_context *ce); 1005 1006 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1007 { 1008 struct intel_context *ce; 1009 unsigned long index, flags; 1010 bool pending_disable, pending_enable, deregister, destroyed, banned; 1011 1012 xa_lock_irqsave(&guc->context_lookup, flags); 1013 xa_for_each(&guc->context_lookup, index, ce) { 1014 /* 1015 * Corner case where the ref count on the object is zero but and 1016 * deregister G2H was lost. In this case we don't touch the ref 1017 * count and finish the destroy of the context. 1018 */ 1019 bool do_put = kref_get_unless_zero(&ce->ref); 1020 1021 xa_unlock(&guc->context_lookup); 1022 1023 spin_lock(&ce->guc_state.lock); 1024 1025 /* 1026 * Once we are at this point submission_disabled() is guaranteed 1027 * to be visible to all callers who set the below flags (see above 1028 * flush and flushes in reset_prepare). If submission_disabled() 1029 * is set, the caller shouldn't set these flags. 1030 */ 1031 1032 destroyed = context_destroyed(ce); 1033 pending_enable = context_pending_enable(ce); 1034 pending_disable = context_pending_disable(ce); 1035 deregister = context_wait_for_deregister_to_register(ce); 1036 banned = context_banned(ce); 1037 init_sched_state(ce); 1038 1039 spin_unlock(&ce->guc_state.lock); 1040 1041 GEM_BUG_ON(!do_put && !destroyed); 1042 1043 if (pending_enable || destroyed || deregister) { 1044 decr_outstanding_submission_g2h(guc); 1045 if (deregister) 1046 guc_signal_context_fence(ce); 1047 if (destroyed) { 1048 intel_gt_pm_put_async(guc_to_gt(guc)); 1049 release_guc_id(guc, ce); 1050 __guc_context_destroy(ce); 1051 } 1052 if (pending_enable || deregister) 1053 intel_context_put(ce); 1054 } 1055 1056 /* Not mutualy exclusive with above if statement. */ 1057 if (pending_disable) { 1058 guc_signal_context_fence(ce); 1059 if (banned) { 1060 guc_cancel_context_requests(ce); 1061 intel_engine_signal_breadcrumbs(ce->engine); 1062 } 1063 intel_context_sched_disable_unpin(ce); 1064 decr_outstanding_submission_g2h(guc); 1065 1066 spin_lock(&ce->guc_state.lock); 1067 guc_blocked_fence_complete(ce); 1068 spin_unlock(&ce->guc_state.lock); 1069 1070 intel_context_put(ce); 1071 } 1072 1073 if (do_put) 1074 intel_context_put(ce); 1075 xa_lock(&guc->context_lookup); 1076 } 1077 xa_unlock_irqrestore(&guc->context_lookup, flags); 1078 } 1079 1080 static inline bool 1081 submission_disabled(struct intel_guc *guc) 1082 { 1083 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1084 1085 return unlikely(!sched_engine || 1086 !__tasklet_is_enabled(&sched_engine->tasklet)); 1087 } 1088 1089 static void disable_submission(struct intel_guc *guc) 1090 { 1091 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1092 1093 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1094 GEM_BUG_ON(!guc->ct.enabled); 1095 __tasklet_disable_sync_once(&sched_engine->tasklet); 1096 sched_engine->tasklet.callback = NULL; 1097 } 1098 } 1099 1100 static void enable_submission(struct intel_guc *guc) 1101 { 1102 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1103 unsigned long flags; 1104 1105 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1106 sched_engine->tasklet.callback = guc_submission_tasklet; 1107 wmb(); /* Make sure callback visible */ 1108 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1109 __tasklet_enable(&sched_engine->tasklet)) { 1110 GEM_BUG_ON(!guc->ct.enabled); 1111 1112 /* And kick in case we missed a new request submission. */ 1113 tasklet_hi_schedule(&sched_engine->tasklet); 1114 } 1115 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1116 } 1117 1118 static void guc_flush_submissions(struct intel_guc *guc) 1119 { 1120 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1121 unsigned long flags; 1122 1123 spin_lock_irqsave(&sched_engine->lock, flags); 1124 spin_unlock_irqrestore(&sched_engine->lock, flags); 1125 } 1126 1127 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1128 1129 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1130 { 1131 int i; 1132 1133 if (unlikely(!guc_submission_initialized(guc))) { 1134 /* Reset called during driver load? GuC not yet initialised! */ 1135 return; 1136 } 1137 1138 intel_gt_park_heartbeats(guc_to_gt(guc)); 1139 disable_submission(guc); 1140 guc->interrupts.disable(guc); 1141 1142 /* Flush IRQ handler */ 1143 spin_lock_irq(&guc_to_gt(guc)->irq_lock); 1144 spin_unlock_irq(&guc_to_gt(guc)->irq_lock); 1145 1146 guc_flush_submissions(guc); 1147 guc_flush_destroyed_contexts(guc); 1148 1149 /* 1150 * Handle any outstanding G2Hs before reset. Call IRQ handler directly 1151 * each pass as interrupt have been disabled. We always scrub for 1152 * outstanding G2H as it is possible for outstanding_submission_g2h to 1153 * be incremented after the context state update. 1154 */ 1155 for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { 1156 intel_guc_to_host_event_handler(guc); 1157 #define wait_for_reset(guc, wait_var) \ 1158 intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) 1159 do { 1160 wait_for_reset(guc, &guc->outstanding_submission_g2h); 1161 } while (!list_empty(&guc->ct.requests.incoming)); 1162 } 1163 1164 scrub_guc_desc_for_outstanding_g2h(guc); 1165 } 1166 1167 static struct intel_engine_cs * 1168 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1169 { 1170 struct intel_engine_cs *engine; 1171 intel_engine_mask_t tmp, mask = ve->mask; 1172 unsigned int num_siblings = 0; 1173 1174 for_each_engine_masked(engine, ve->gt, mask, tmp) 1175 if (num_siblings++ == sibling) 1176 return engine; 1177 1178 return NULL; 1179 } 1180 1181 static inline struct intel_engine_cs * 1182 __context_to_physical_engine(struct intel_context *ce) 1183 { 1184 struct intel_engine_cs *engine = ce->engine; 1185 1186 if (intel_engine_is_virtual(engine)) 1187 engine = guc_virtual_get_sibling(engine, 0); 1188 1189 return engine; 1190 } 1191 1192 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1193 { 1194 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1195 1196 if (intel_context_is_banned(ce)) 1197 return; 1198 1199 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1200 1201 /* 1202 * We want a simple context + ring to execute the breadcrumb update. 1203 * We cannot rely on the context being intact across the GPU hang, 1204 * so clear it and rebuild just what we need for the breadcrumb. 1205 * All pending requests for this context will be zapped, and any 1206 * future request will be after userspace has had the opportunity 1207 * to recreate its own state. 1208 */ 1209 if (scrub) 1210 lrc_init_regs(ce, engine, true); 1211 1212 /* Rerun the request; its payload has been neutered (if guilty). */ 1213 lrc_update_regs(ce, engine, head); 1214 } 1215 1216 static void guc_reset_nop(struct intel_engine_cs *engine) 1217 { 1218 } 1219 1220 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1221 { 1222 } 1223 1224 static void 1225 __unwind_incomplete_requests(struct intel_context *ce) 1226 { 1227 struct i915_request *rq, *rn; 1228 struct list_head *pl; 1229 int prio = I915_PRIORITY_INVALID; 1230 struct i915_sched_engine * const sched_engine = 1231 ce->engine->sched_engine; 1232 unsigned long flags; 1233 1234 spin_lock_irqsave(&sched_engine->lock, flags); 1235 spin_lock(&ce->guc_state.lock); 1236 list_for_each_entry_safe_reverse(rq, rn, 1237 &ce->guc_state.requests, 1238 sched.link) { 1239 if (i915_request_completed(rq)) 1240 continue; 1241 1242 list_del_init(&rq->sched.link); 1243 __i915_request_unsubmit(rq); 1244 1245 /* Push the request back into the queue for later resubmission. */ 1246 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1247 if (rq_prio(rq) != prio) { 1248 prio = rq_prio(rq); 1249 pl = i915_sched_lookup_priolist(sched_engine, prio); 1250 } 1251 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1252 1253 list_add(&rq->sched.link, pl); 1254 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1255 } 1256 spin_unlock(&ce->guc_state.lock); 1257 spin_unlock_irqrestore(&sched_engine->lock, flags); 1258 } 1259 1260 static void __guc_reset_context(struct intel_context *ce, bool stalled) 1261 { 1262 bool local_stalled; 1263 struct i915_request *rq; 1264 unsigned long flags; 1265 u32 head; 1266 int i, number_children = ce->parallel.number_children; 1267 bool skip = false; 1268 struct intel_context *parent = ce; 1269 1270 GEM_BUG_ON(intel_context_is_child(ce)); 1271 1272 intel_context_get(ce); 1273 1274 /* 1275 * GuC will implicitly mark the context as non-schedulable when it sends 1276 * the reset notification. Make sure our state reflects this change. The 1277 * context will be marked enabled on resubmission. 1278 * 1279 * XXX: If the context is reset as a result of the request cancellation 1280 * this G2H is received after the schedule disable complete G2H which is 1281 * wrong as this creates a race between the request cancellation code 1282 * re-submitting the context and this G2H handler. This is a bug in the 1283 * GuC but can be worked around in the meantime but converting this to a 1284 * NOP if a pending enable is in flight as this indicates that a request 1285 * cancellation has occurred. 1286 */ 1287 spin_lock_irqsave(&ce->guc_state.lock, flags); 1288 if (likely(!context_pending_enable(ce))) 1289 clr_context_enabled(ce); 1290 else 1291 skip = true; 1292 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1293 if (unlikely(skip)) 1294 goto out_put; 1295 1296 /* 1297 * For each context in the relationship find the hanging request 1298 * resetting each context / request as needed 1299 */ 1300 for (i = 0; i < number_children + 1; ++i) { 1301 if (!intel_context_is_pinned(ce)) 1302 goto next_context; 1303 1304 local_stalled = false; 1305 rq = intel_context_find_active_request(ce); 1306 if (!rq) { 1307 head = ce->ring->tail; 1308 goto out_replay; 1309 } 1310 1311 if (i915_request_started(rq)) 1312 local_stalled = true; 1313 1314 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1315 head = intel_ring_wrap(ce->ring, rq->head); 1316 1317 __i915_request_reset(rq, local_stalled && stalled); 1318 out_replay: 1319 guc_reset_state(ce, head, local_stalled && stalled); 1320 next_context: 1321 if (i != number_children) 1322 ce = list_next_entry(ce, parallel.child_link); 1323 } 1324 1325 __unwind_incomplete_requests(parent); 1326 out_put: 1327 intel_context_put(parent); 1328 } 1329 1330 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) 1331 { 1332 struct intel_context *ce; 1333 unsigned long index; 1334 unsigned long flags; 1335 1336 if (unlikely(!guc_submission_initialized(guc))) { 1337 /* Reset called during driver load? GuC not yet initialised! */ 1338 return; 1339 } 1340 1341 xa_lock_irqsave(&guc->context_lookup, flags); 1342 xa_for_each(&guc->context_lookup, index, ce) { 1343 if (!kref_get_unless_zero(&ce->ref)) 1344 continue; 1345 1346 xa_unlock(&guc->context_lookup); 1347 1348 if (intel_context_is_pinned(ce) && 1349 !intel_context_is_child(ce)) 1350 __guc_reset_context(ce, stalled); 1351 1352 intel_context_put(ce); 1353 1354 xa_lock(&guc->context_lookup); 1355 } 1356 xa_unlock_irqrestore(&guc->context_lookup, flags); 1357 1358 /* GuC is blown away, drop all references to contexts */ 1359 xa_destroy(&guc->context_lookup); 1360 } 1361 1362 static void guc_cancel_context_requests(struct intel_context *ce) 1363 { 1364 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1365 struct i915_request *rq; 1366 unsigned long flags; 1367 1368 /* Mark all executing requests as skipped. */ 1369 spin_lock_irqsave(&sched_engine->lock, flags); 1370 spin_lock(&ce->guc_state.lock); 1371 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1372 i915_request_put(i915_request_mark_eio(rq)); 1373 spin_unlock(&ce->guc_state.lock); 1374 spin_unlock_irqrestore(&sched_engine->lock, flags); 1375 } 1376 1377 static void 1378 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1379 { 1380 struct i915_request *rq, *rn; 1381 struct rb_node *rb; 1382 unsigned long flags; 1383 1384 /* Can be called during boot if GuC fails to load */ 1385 if (!sched_engine) 1386 return; 1387 1388 /* 1389 * Before we call engine->cancel_requests(), we should have exclusive 1390 * access to the submission state. This is arranged for us by the 1391 * caller disabling the interrupt generation, the tasklet and other 1392 * threads that may then access the same state, giving us a free hand 1393 * to reset state. However, we still need to let lockdep be aware that 1394 * we know this state may be accessed in hardirq context, so we 1395 * disable the irq around this manipulation and we want to keep 1396 * the spinlock focused on its duties and not accidentally conflate 1397 * coverage to the submission's irq state. (Similarly, although we 1398 * shouldn't need to disable irq around the manipulation of the 1399 * submission's irq state, we also wish to remind ourselves that 1400 * it is irq state.) 1401 */ 1402 spin_lock_irqsave(&sched_engine->lock, flags); 1403 1404 /* Flush the queued requests to the timeline list (for retiring). */ 1405 while ((rb = rb_first_cached(&sched_engine->queue))) { 1406 struct i915_priolist *p = to_priolist(rb); 1407 1408 priolist_for_each_request_consume(rq, rn, p) { 1409 list_del_init(&rq->sched.link); 1410 1411 __i915_request_submit(rq); 1412 1413 i915_request_put(i915_request_mark_eio(rq)); 1414 } 1415 1416 rb_erase_cached(&p->node, &sched_engine->queue); 1417 i915_priolist_free(p); 1418 } 1419 1420 /* Remaining _unready_ requests will be nop'ed when submitted */ 1421 1422 sched_engine->queue_priority_hint = INT_MIN; 1423 sched_engine->queue = RB_ROOT_CACHED; 1424 1425 spin_unlock_irqrestore(&sched_engine->lock, flags); 1426 } 1427 1428 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1429 { 1430 struct intel_context *ce; 1431 unsigned long index; 1432 unsigned long flags; 1433 1434 xa_lock_irqsave(&guc->context_lookup, flags); 1435 xa_for_each(&guc->context_lookup, index, ce) { 1436 if (!kref_get_unless_zero(&ce->ref)) 1437 continue; 1438 1439 xa_unlock(&guc->context_lookup); 1440 1441 if (intel_context_is_pinned(ce) && 1442 !intel_context_is_child(ce)) 1443 guc_cancel_context_requests(ce); 1444 1445 intel_context_put(ce); 1446 1447 xa_lock(&guc->context_lookup); 1448 } 1449 xa_unlock_irqrestore(&guc->context_lookup, flags); 1450 1451 guc_cancel_sched_engine_requests(guc->sched_engine); 1452 1453 /* GuC is blown away, drop all references to contexts */ 1454 xa_destroy(&guc->context_lookup); 1455 } 1456 1457 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1458 { 1459 /* Reset called during driver load or during wedge? */ 1460 if (unlikely(!guc_submission_initialized(guc) || 1461 test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) { 1462 return; 1463 } 1464 1465 /* 1466 * Technically possible for either of these values to be non-zero here, 1467 * but very unlikely + harmless. Regardless let's add a warn so we can 1468 * see in CI if this happens frequently / a precursor to taking down the 1469 * machine. 1470 */ 1471 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1472 atomic_set(&guc->outstanding_submission_g2h, 0); 1473 1474 intel_guc_global_policies_update(guc); 1475 enable_submission(guc); 1476 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1477 } 1478 1479 static void destroyed_worker_func(struct work_struct *w); 1480 1481 /* 1482 * Set up the memory resources to be shared with the GuC (via the GGTT) 1483 * at firmware loading time. 1484 */ 1485 int intel_guc_submission_init(struct intel_guc *guc) 1486 { 1487 int ret; 1488 1489 if (guc->lrc_desc_pool) 1490 return 0; 1491 1492 ret = guc_lrc_desc_pool_create(guc); 1493 if (ret) 1494 return ret; 1495 /* 1496 * Keep static analysers happy, let them know that we allocated the 1497 * vma after testing that it didn't exist earlier. 1498 */ 1499 GEM_BUG_ON(!guc->lrc_desc_pool); 1500 1501 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 1502 1503 spin_lock_init(&guc->submission_state.lock); 1504 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 1505 ida_init(&guc->submission_state.guc_ids); 1506 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 1507 INIT_WORK(&guc->submission_state.destroyed_worker, 1508 destroyed_worker_func); 1509 1510 guc->submission_state.guc_ids_bitmap = 1511 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL); 1512 if (!guc->submission_state.guc_ids_bitmap) 1513 return -ENOMEM; 1514 1515 return 0; 1516 } 1517 1518 void intel_guc_submission_fini(struct intel_guc *guc) 1519 { 1520 if (!guc->lrc_desc_pool) 1521 return; 1522 1523 guc_flush_destroyed_contexts(guc); 1524 guc_lrc_desc_pool_destroy(guc); 1525 i915_sched_engine_put(guc->sched_engine); 1526 bitmap_free(guc->submission_state.guc_ids_bitmap); 1527 } 1528 1529 static inline void queue_request(struct i915_sched_engine *sched_engine, 1530 struct i915_request *rq, 1531 int prio) 1532 { 1533 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1534 list_add_tail(&rq->sched.link, 1535 i915_sched_lookup_priolist(sched_engine, prio)); 1536 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1537 tasklet_hi_schedule(&sched_engine->tasklet); 1538 } 1539 1540 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1541 struct i915_request *rq) 1542 { 1543 int ret = 0; 1544 1545 __i915_request_submit(rq); 1546 1547 trace_i915_request_in(rq, 0); 1548 1549 if (is_multi_lrc_rq(rq)) { 1550 if (multi_lrc_submit(rq)) { 1551 ret = guc_wq_item_append(guc, rq); 1552 if (!ret) 1553 ret = guc_add_request(guc, rq); 1554 } 1555 } else { 1556 guc_set_lrc_tail(rq); 1557 ret = guc_add_request(guc, rq); 1558 } 1559 1560 if (unlikely(ret == -EPIPE)) 1561 disable_submission(guc); 1562 1563 return ret; 1564 } 1565 1566 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1567 { 1568 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1569 struct intel_context *ce = request_to_scheduling_context(rq); 1570 1571 return submission_disabled(guc) || guc->stalled_request || 1572 !i915_sched_engine_is_empty(sched_engine) || 1573 !lrc_desc_registered(guc, ce->guc_id.id); 1574 } 1575 1576 static void guc_submit_request(struct i915_request *rq) 1577 { 1578 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1579 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1580 unsigned long flags; 1581 1582 /* Will be called from irq-context when using foreign fences. */ 1583 spin_lock_irqsave(&sched_engine->lock, flags); 1584 1585 if (need_tasklet(guc, rq)) 1586 queue_request(sched_engine, rq, rq_prio(rq)); 1587 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1588 tasklet_hi_schedule(&sched_engine->tasklet); 1589 1590 spin_unlock_irqrestore(&sched_engine->lock, flags); 1591 } 1592 1593 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1594 { 1595 int ret; 1596 1597 GEM_BUG_ON(intel_context_is_child(ce)); 1598 1599 if (intel_context_is_parent(ce)) 1600 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 1601 NUMBER_MULTI_LRC_GUC_ID, 1602 order_base_2(ce->parallel.number_children 1603 + 1)); 1604 else 1605 ret = ida_simple_get(&guc->submission_state.guc_ids, 1606 NUMBER_MULTI_LRC_GUC_ID, 1607 GUC_MAX_LRC_DESCRIPTORS, 1608 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 1609 __GFP_NOWARN); 1610 if (unlikely(ret < 0)) 1611 return ret; 1612 1613 ce->guc_id.id = ret; 1614 return 0; 1615 } 1616 1617 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1618 { 1619 GEM_BUG_ON(intel_context_is_child(ce)); 1620 1621 if (!context_guc_id_invalid(ce)) { 1622 if (intel_context_is_parent(ce)) 1623 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 1624 ce->guc_id.id, 1625 order_base_2(ce->parallel.number_children 1626 + 1)); 1627 else 1628 ida_simple_remove(&guc->submission_state.guc_ids, 1629 ce->guc_id.id); 1630 reset_lrc_desc(guc, ce->guc_id.id); 1631 set_context_guc_id_invalid(ce); 1632 } 1633 if (!list_empty(&ce->guc_id.link)) 1634 list_del_init(&ce->guc_id.link); 1635 } 1636 1637 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 1638 { 1639 unsigned long flags; 1640 1641 spin_lock_irqsave(&guc->submission_state.lock, flags); 1642 __release_guc_id(guc, ce); 1643 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1644 } 1645 1646 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 1647 { 1648 struct intel_context *cn; 1649 1650 lockdep_assert_held(&guc->submission_state.lock); 1651 GEM_BUG_ON(intel_context_is_child(ce)); 1652 GEM_BUG_ON(intel_context_is_parent(ce)); 1653 1654 if (!list_empty(&guc->submission_state.guc_id_list)) { 1655 cn = list_first_entry(&guc->submission_state.guc_id_list, 1656 struct intel_context, 1657 guc_id.link); 1658 1659 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 1660 GEM_BUG_ON(context_guc_id_invalid(cn)); 1661 GEM_BUG_ON(intel_context_is_child(cn)); 1662 GEM_BUG_ON(intel_context_is_parent(cn)); 1663 1664 list_del_init(&cn->guc_id.link); 1665 ce->guc_id = cn->guc_id; 1666 1667 spin_lock(&ce->guc_state.lock); 1668 clr_context_registered(cn); 1669 spin_unlock(&ce->guc_state.lock); 1670 1671 set_context_guc_id_invalid(cn); 1672 1673 return 0; 1674 } else { 1675 return -EAGAIN; 1676 } 1677 } 1678 1679 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 1680 { 1681 int ret; 1682 1683 lockdep_assert_held(&guc->submission_state.lock); 1684 GEM_BUG_ON(intel_context_is_child(ce)); 1685 1686 ret = new_guc_id(guc, ce); 1687 if (unlikely(ret < 0)) { 1688 if (intel_context_is_parent(ce)) 1689 return -ENOSPC; 1690 1691 ret = steal_guc_id(guc, ce); 1692 if (ret < 0) 1693 return ret; 1694 } 1695 1696 if (intel_context_is_parent(ce)) { 1697 struct intel_context *child; 1698 int i = 1; 1699 1700 for_each_child(ce, child) 1701 child->guc_id.id = ce->guc_id.id + i++; 1702 } 1703 1704 return 0; 1705 } 1706 1707 #define PIN_GUC_ID_TRIES 4 1708 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 1709 { 1710 int ret = 0; 1711 unsigned long flags, tries = PIN_GUC_ID_TRIES; 1712 1713 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 1714 1715 try_again: 1716 spin_lock_irqsave(&guc->submission_state.lock, flags); 1717 1718 might_lock(&ce->guc_state.lock); 1719 1720 if (context_guc_id_invalid(ce)) { 1721 ret = assign_guc_id(guc, ce); 1722 if (ret) 1723 goto out_unlock; 1724 ret = 1; /* Indidcates newly assigned guc_id */ 1725 } 1726 if (!list_empty(&ce->guc_id.link)) 1727 list_del_init(&ce->guc_id.link); 1728 atomic_inc(&ce->guc_id.ref); 1729 1730 out_unlock: 1731 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1732 1733 /* 1734 * -EAGAIN indicates no guc_id are available, let's retire any 1735 * outstanding requests to see if that frees up a guc_id. If the first 1736 * retire didn't help, insert a sleep with the timeslice duration before 1737 * attempting to retire more requests. Double the sleep period each 1738 * subsequent pass before finally giving up. The sleep period has max of 1739 * 100ms and minimum of 1ms. 1740 */ 1741 if (ret == -EAGAIN && --tries) { 1742 if (PIN_GUC_ID_TRIES - tries > 1) { 1743 unsigned int timeslice_shifted = 1744 ce->engine->props.timeslice_duration_ms << 1745 (PIN_GUC_ID_TRIES - tries - 2); 1746 unsigned int max = min_t(unsigned int, 100, 1747 timeslice_shifted); 1748 1749 msleep(max_t(unsigned int, max, 1)); 1750 } 1751 intel_gt_retire_requests(guc_to_gt(guc)); 1752 goto try_again; 1753 } 1754 1755 return ret; 1756 } 1757 1758 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 1759 { 1760 unsigned long flags; 1761 1762 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 1763 GEM_BUG_ON(intel_context_is_child(ce)); 1764 1765 if (unlikely(context_guc_id_invalid(ce) || 1766 intel_context_is_parent(ce))) 1767 return; 1768 1769 spin_lock_irqsave(&guc->submission_state.lock, flags); 1770 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 1771 !atomic_read(&ce->guc_id.ref)) 1772 list_add_tail(&ce->guc_id.link, 1773 &guc->submission_state.guc_id_list); 1774 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 1775 } 1776 1777 static int __guc_action_register_multi_lrc(struct intel_guc *guc, 1778 struct intel_context *ce, 1779 u32 guc_id, 1780 u32 offset, 1781 bool loop) 1782 { 1783 struct intel_context *child; 1784 u32 action[4 + MAX_ENGINE_INSTANCE]; 1785 int len = 0; 1786 1787 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 1788 1789 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 1790 action[len++] = guc_id; 1791 action[len++] = ce->parallel.number_children + 1; 1792 action[len++] = offset; 1793 for_each_child(ce, child) { 1794 offset += sizeof(struct guc_lrc_desc); 1795 action[len++] = offset; 1796 } 1797 1798 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 1799 } 1800 1801 static int __guc_action_register_context(struct intel_guc *guc, 1802 u32 guc_id, 1803 u32 offset, 1804 bool loop) 1805 { 1806 u32 action[] = { 1807 INTEL_GUC_ACTION_REGISTER_CONTEXT, 1808 guc_id, 1809 offset, 1810 }; 1811 1812 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 1813 0, loop); 1814 } 1815 1816 static int register_context(struct intel_context *ce, bool loop) 1817 { 1818 struct intel_guc *guc = ce_to_guc(ce); 1819 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + 1820 ce->guc_id.id * sizeof(struct guc_lrc_desc); 1821 int ret; 1822 1823 GEM_BUG_ON(intel_context_is_child(ce)); 1824 trace_intel_context_register(ce); 1825 1826 if (intel_context_is_parent(ce)) 1827 ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, 1828 offset, loop); 1829 else 1830 ret = __guc_action_register_context(guc, ce->guc_id.id, offset, 1831 loop); 1832 if (likely(!ret)) { 1833 unsigned long flags; 1834 1835 spin_lock_irqsave(&ce->guc_state.lock, flags); 1836 set_context_registered(ce); 1837 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1838 } 1839 1840 return ret; 1841 } 1842 1843 static int __guc_action_deregister_context(struct intel_guc *guc, 1844 u32 guc_id) 1845 { 1846 u32 action[] = { 1847 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 1848 guc_id, 1849 }; 1850 1851 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 1852 G2H_LEN_DW_DEREGISTER_CONTEXT, 1853 true); 1854 } 1855 1856 static int deregister_context(struct intel_context *ce, u32 guc_id) 1857 { 1858 struct intel_guc *guc = ce_to_guc(ce); 1859 1860 GEM_BUG_ON(intel_context_is_child(ce)); 1861 trace_intel_context_deregister(ce); 1862 1863 return __guc_action_deregister_context(guc, guc_id); 1864 } 1865 1866 static inline void clear_children_join_go_memory(struct intel_context *ce) 1867 { 1868 struct parent_scratch *ps = __get_parent_scratch(ce); 1869 int i; 1870 1871 ps->go.semaphore = 0; 1872 for (i = 0; i < ce->parallel.number_children + 1; ++i) 1873 ps->join[i].semaphore = 0; 1874 } 1875 1876 static inline u32 get_children_go_value(struct intel_context *ce) 1877 { 1878 return __get_parent_scratch(ce)->go.semaphore; 1879 } 1880 1881 static inline u32 get_children_join_value(struct intel_context *ce, 1882 u8 child_index) 1883 { 1884 return __get_parent_scratch(ce)->join[child_index].semaphore; 1885 } 1886 1887 static void guc_context_policy_init(struct intel_engine_cs *engine, 1888 struct guc_lrc_desc *desc) 1889 { 1890 desc->policy_flags = 0; 1891 1892 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 1893 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; 1894 1895 /* NB: For both of these, zero means disabled. */ 1896 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 1897 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 1898 } 1899 1900 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) 1901 { 1902 struct intel_engine_cs *engine = ce->engine; 1903 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 1904 struct intel_guc *guc = &engine->gt->uc.guc; 1905 u32 desc_idx = ce->guc_id.id; 1906 struct guc_lrc_desc *desc; 1907 bool context_registered; 1908 intel_wakeref_t wakeref; 1909 struct intel_context *child; 1910 int ret = 0; 1911 1912 GEM_BUG_ON(!engine->mask); 1913 GEM_BUG_ON(!sched_state_is_init(ce)); 1914 1915 /* 1916 * Ensure LRC + CT vmas are is same region as write barrier is done 1917 * based on CT vma region. 1918 */ 1919 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 1920 i915_gem_object_is_lmem(ce->ring->vma->obj)); 1921 1922 context_registered = lrc_desc_registered(guc, desc_idx); 1923 1924 reset_lrc_desc(guc, desc_idx); 1925 set_lrc_desc_registered(guc, desc_idx, ce); 1926 1927 desc = __get_lrc_desc(guc, desc_idx); 1928 desc->engine_class = engine_class_to_guc_class(engine->class); 1929 desc->engine_submit_mask = engine->logical_mask; 1930 desc->hw_context_desc = ce->lrc.lrca; 1931 desc->priority = ce->guc_state.prio; 1932 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 1933 guc_context_policy_init(engine, desc); 1934 1935 /* 1936 * If context is a parent, we need to register a process descriptor 1937 * describing a work queue and register all child contexts. 1938 */ 1939 if (intel_context_is_parent(ce)) { 1940 struct guc_process_desc *pdesc; 1941 1942 ce->parallel.guc.wqi_tail = 0; 1943 ce->parallel.guc.wqi_head = 0; 1944 1945 desc->process_desc = i915_ggtt_offset(ce->state) + 1946 __get_parent_scratch_offset(ce); 1947 desc->wq_addr = i915_ggtt_offset(ce->state) + 1948 __get_wq_offset(ce); 1949 desc->wq_size = WQ_SIZE; 1950 1951 pdesc = __get_process_desc(ce); 1952 memset(pdesc, 0, sizeof(*(pdesc))); 1953 pdesc->stage_id = ce->guc_id.id; 1954 pdesc->wq_base_addr = desc->wq_addr; 1955 pdesc->wq_size_bytes = desc->wq_size; 1956 pdesc->wq_status = WQ_STATUS_ACTIVE; 1957 1958 for_each_child(ce, child) { 1959 desc = __get_lrc_desc(guc, child->guc_id.id); 1960 1961 desc->engine_class = 1962 engine_class_to_guc_class(engine->class); 1963 desc->hw_context_desc = child->lrc.lrca; 1964 desc->priority = ce->guc_state.prio; 1965 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 1966 guc_context_policy_init(engine, desc); 1967 } 1968 1969 clear_children_join_go_memory(ce); 1970 } 1971 1972 /* 1973 * The context_lookup xarray is used to determine if the hardware 1974 * context is currently registered. There are two cases in which it 1975 * could be registered either the guc_id has been stolen from another 1976 * context or the lrc descriptor address of this context has changed. In 1977 * either case the context needs to be deregistered with the GuC before 1978 * registering this context. 1979 */ 1980 if (context_registered) { 1981 bool disabled; 1982 unsigned long flags; 1983 1984 trace_intel_context_steal_guc_id(ce); 1985 GEM_BUG_ON(!loop); 1986 1987 /* Seal race with Reset */ 1988 spin_lock_irqsave(&ce->guc_state.lock, flags); 1989 disabled = submission_disabled(guc); 1990 if (likely(!disabled)) { 1991 set_context_wait_for_deregister_to_register(ce); 1992 intel_context_get(ce); 1993 } 1994 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1995 if (unlikely(disabled)) { 1996 reset_lrc_desc(guc, desc_idx); 1997 return 0; /* Will get registered later */ 1998 } 1999 2000 /* 2001 * If stealing the guc_id, this ce has the same guc_id as the 2002 * context whose guc_id was stolen. 2003 */ 2004 with_intel_runtime_pm(runtime_pm, wakeref) 2005 ret = deregister_context(ce, ce->guc_id.id); 2006 if (unlikely(ret == -ENODEV)) 2007 ret = 0; /* Will get registered later */ 2008 } else { 2009 with_intel_runtime_pm(runtime_pm, wakeref) 2010 ret = register_context(ce, loop); 2011 if (unlikely(ret == -EBUSY)) { 2012 reset_lrc_desc(guc, desc_idx); 2013 } else if (unlikely(ret == -ENODEV)) { 2014 reset_lrc_desc(guc, desc_idx); 2015 ret = 0; /* Will get registered later */ 2016 } 2017 } 2018 2019 return ret; 2020 } 2021 2022 static int __guc_context_pre_pin(struct intel_context *ce, 2023 struct intel_engine_cs *engine, 2024 struct i915_gem_ww_ctx *ww, 2025 void **vaddr) 2026 { 2027 return lrc_pre_pin(ce, engine, ww, vaddr); 2028 } 2029 2030 static int __guc_context_pin(struct intel_context *ce, 2031 struct intel_engine_cs *engine, 2032 void *vaddr) 2033 { 2034 if (i915_ggtt_offset(ce->state) != 2035 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2036 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2037 2038 /* 2039 * GuC context gets pinned in guc_request_alloc. See that function for 2040 * explaination of why. 2041 */ 2042 2043 return lrc_pin(ce, engine, vaddr); 2044 } 2045 2046 static int guc_context_pre_pin(struct intel_context *ce, 2047 struct i915_gem_ww_ctx *ww, 2048 void **vaddr) 2049 { 2050 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2051 } 2052 2053 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2054 { 2055 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2056 2057 if (likely(!ret && !intel_context_is_barrier(ce))) 2058 intel_engine_pm_get(ce->engine); 2059 2060 return ret; 2061 } 2062 2063 static void guc_context_unpin(struct intel_context *ce) 2064 { 2065 struct intel_guc *guc = ce_to_guc(ce); 2066 2067 unpin_guc_id(guc, ce); 2068 lrc_unpin(ce); 2069 2070 if (likely(!intel_context_is_barrier(ce))) 2071 intel_engine_pm_put_async(ce->engine); 2072 } 2073 2074 static void guc_context_post_unpin(struct intel_context *ce) 2075 { 2076 lrc_post_unpin(ce); 2077 } 2078 2079 static void __guc_context_sched_enable(struct intel_guc *guc, 2080 struct intel_context *ce) 2081 { 2082 u32 action[] = { 2083 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2084 ce->guc_id.id, 2085 GUC_CONTEXT_ENABLE 2086 }; 2087 2088 trace_intel_context_sched_enable(ce); 2089 2090 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2091 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2092 } 2093 2094 static void __guc_context_sched_disable(struct intel_guc *guc, 2095 struct intel_context *ce, 2096 u16 guc_id) 2097 { 2098 u32 action[] = { 2099 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2100 guc_id, /* ce->guc_id.id not stable */ 2101 GUC_CONTEXT_DISABLE 2102 }; 2103 2104 GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); 2105 2106 GEM_BUG_ON(intel_context_is_child(ce)); 2107 trace_intel_context_sched_disable(ce); 2108 2109 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2110 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2111 } 2112 2113 static void guc_blocked_fence_complete(struct intel_context *ce) 2114 { 2115 lockdep_assert_held(&ce->guc_state.lock); 2116 2117 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2118 i915_sw_fence_complete(&ce->guc_state.blocked); 2119 } 2120 2121 static void guc_blocked_fence_reinit(struct intel_context *ce) 2122 { 2123 lockdep_assert_held(&ce->guc_state.lock); 2124 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2125 2126 /* 2127 * This fence is always complete unless a pending schedule disable is 2128 * outstanding. We arm the fence here and complete it when we receive 2129 * the pending schedule disable complete message. 2130 */ 2131 i915_sw_fence_fini(&ce->guc_state.blocked); 2132 i915_sw_fence_reinit(&ce->guc_state.blocked); 2133 i915_sw_fence_await(&ce->guc_state.blocked); 2134 i915_sw_fence_commit(&ce->guc_state.blocked); 2135 } 2136 2137 static u16 prep_context_pending_disable(struct intel_context *ce) 2138 { 2139 lockdep_assert_held(&ce->guc_state.lock); 2140 2141 set_context_pending_disable(ce); 2142 clr_context_enabled(ce); 2143 guc_blocked_fence_reinit(ce); 2144 intel_context_get(ce); 2145 2146 return ce->guc_id.id; 2147 } 2148 2149 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2150 { 2151 struct intel_guc *guc = ce_to_guc(ce); 2152 unsigned long flags; 2153 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2154 intel_wakeref_t wakeref; 2155 u16 guc_id; 2156 bool enabled; 2157 2158 GEM_BUG_ON(intel_context_is_child(ce)); 2159 2160 spin_lock_irqsave(&ce->guc_state.lock, flags); 2161 2162 incr_context_blocked(ce); 2163 2164 enabled = context_enabled(ce); 2165 if (unlikely(!enabled || submission_disabled(guc))) { 2166 if (enabled) 2167 clr_context_enabled(ce); 2168 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2169 return &ce->guc_state.blocked; 2170 } 2171 2172 /* 2173 * We add +2 here as the schedule disable complete CTB handler calls 2174 * intel_context_sched_disable_unpin (-2 to pin_count). 2175 */ 2176 atomic_add(2, &ce->pin_count); 2177 2178 guc_id = prep_context_pending_disable(ce); 2179 2180 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2181 2182 with_intel_runtime_pm(runtime_pm, wakeref) 2183 __guc_context_sched_disable(guc, ce, guc_id); 2184 2185 return &ce->guc_state.blocked; 2186 } 2187 2188 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2189 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2190 #define SCHED_STATE_NO_UNBLOCK \ 2191 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2192 SCHED_STATE_PENDING_DISABLE | \ 2193 SCHED_STATE_BANNED) 2194 2195 static bool context_cant_unblock(struct intel_context *ce) 2196 { 2197 lockdep_assert_held(&ce->guc_state.lock); 2198 2199 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2200 context_guc_id_invalid(ce) || 2201 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || 2202 !intel_context_is_pinned(ce); 2203 } 2204 2205 static void guc_context_unblock(struct intel_context *ce) 2206 { 2207 struct intel_guc *guc = ce_to_guc(ce); 2208 unsigned long flags; 2209 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2210 intel_wakeref_t wakeref; 2211 bool enable; 2212 2213 GEM_BUG_ON(context_enabled(ce)); 2214 GEM_BUG_ON(intel_context_is_child(ce)); 2215 2216 spin_lock_irqsave(&ce->guc_state.lock, flags); 2217 2218 if (unlikely(submission_disabled(guc) || 2219 context_cant_unblock(ce))) { 2220 enable = false; 2221 } else { 2222 enable = true; 2223 set_context_pending_enable(ce); 2224 set_context_enabled(ce); 2225 intel_context_get(ce); 2226 } 2227 2228 decr_context_blocked(ce); 2229 2230 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2231 2232 if (enable) { 2233 with_intel_runtime_pm(runtime_pm, wakeref) 2234 __guc_context_sched_enable(guc, ce); 2235 } 2236 } 2237 2238 static void guc_context_cancel_request(struct intel_context *ce, 2239 struct i915_request *rq) 2240 { 2241 struct intel_context *block_context = 2242 request_to_scheduling_context(rq); 2243 2244 if (i915_sw_fence_signaled(&rq->submit)) { 2245 struct i915_sw_fence *fence; 2246 2247 intel_context_get(ce); 2248 fence = guc_context_block(block_context); 2249 i915_sw_fence_wait(fence); 2250 if (!i915_request_completed(rq)) { 2251 __i915_request_skip(rq); 2252 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2253 true); 2254 } 2255 2256 /* 2257 * XXX: Racey if context is reset, see comment in 2258 * __guc_reset_context(). 2259 */ 2260 flush_work(&ce_to_guc(ce)->ct.requests.worker); 2261 2262 guc_context_unblock(block_context); 2263 intel_context_put(ce); 2264 } 2265 } 2266 2267 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2268 u16 guc_id, 2269 u32 preemption_timeout) 2270 { 2271 u32 action[] = { 2272 INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, 2273 guc_id, 2274 preemption_timeout 2275 }; 2276 2277 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2278 } 2279 2280 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) 2281 { 2282 struct intel_guc *guc = ce_to_guc(ce); 2283 struct intel_runtime_pm *runtime_pm = 2284 &ce->engine->gt->i915->runtime_pm; 2285 intel_wakeref_t wakeref; 2286 unsigned long flags; 2287 2288 GEM_BUG_ON(intel_context_is_child(ce)); 2289 2290 guc_flush_submissions(guc); 2291 2292 spin_lock_irqsave(&ce->guc_state.lock, flags); 2293 set_context_banned(ce); 2294 2295 if (submission_disabled(guc) || 2296 (!context_enabled(ce) && !context_pending_disable(ce))) { 2297 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2298 2299 guc_cancel_context_requests(ce); 2300 intel_engine_signal_breadcrumbs(ce->engine); 2301 } else if (!context_pending_disable(ce)) { 2302 u16 guc_id; 2303 2304 /* 2305 * We add +2 here as the schedule disable complete CTB handler 2306 * calls intel_context_sched_disable_unpin (-2 to pin_count). 2307 */ 2308 atomic_add(2, &ce->pin_count); 2309 2310 guc_id = prep_context_pending_disable(ce); 2311 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2312 2313 /* 2314 * In addition to disabling scheduling, set the preemption 2315 * timeout to the minimum value (1 us) so the banned context 2316 * gets kicked off the HW ASAP. 2317 */ 2318 with_intel_runtime_pm(runtime_pm, wakeref) { 2319 __guc_context_set_preemption_timeout(guc, guc_id, 1); 2320 __guc_context_sched_disable(guc, ce, guc_id); 2321 } 2322 } else { 2323 if (!context_guc_id_invalid(ce)) 2324 with_intel_runtime_pm(runtime_pm, wakeref) 2325 __guc_context_set_preemption_timeout(guc, 2326 ce->guc_id.id, 2327 1); 2328 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2329 } 2330 } 2331 2332 static void guc_context_sched_disable(struct intel_context *ce) 2333 { 2334 struct intel_guc *guc = ce_to_guc(ce); 2335 unsigned long flags; 2336 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 2337 intel_wakeref_t wakeref; 2338 u16 guc_id; 2339 2340 GEM_BUG_ON(intel_context_is_child(ce)); 2341 2342 spin_lock_irqsave(&ce->guc_state.lock, flags); 2343 2344 /* 2345 * We have to check if the context has been disabled by another thread, 2346 * check if submssion has been disabled to seal a race with reset and 2347 * finally check if any more requests have been committed to the 2348 * context ensursing that a request doesn't slip through the 2349 * 'context_pending_disable' fence. 2350 */ 2351 if (unlikely(!context_enabled(ce) || submission_disabled(guc) || 2352 context_has_committed_requests(ce))) { 2353 clr_context_enabled(ce); 2354 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2355 goto unpin; 2356 } 2357 guc_id = prep_context_pending_disable(ce); 2358 2359 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2360 2361 with_intel_runtime_pm(runtime_pm, wakeref) 2362 __guc_context_sched_disable(guc, ce, guc_id); 2363 2364 return; 2365 unpin: 2366 intel_context_sched_disable_unpin(ce); 2367 } 2368 2369 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 2370 { 2371 struct intel_guc *guc = ce_to_guc(ce); 2372 struct intel_gt *gt = guc_to_gt(guc); 2373 unsigned long flags; 2374 bool disabled; 2375 2376 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 2377 GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); 2378 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 2379 GEM_BUG_ON(context_enabled(ce)); 2380 2381 /* Seal race with Reset */ 2382 spin_lock_irqsave(&ce->guc_state.lock, flags); 2383 disabled = submission_disabled(guc); 2384 if (likely(!disabled)) { 2385 __intel_gt_pm_get(gt); 2386 set_context_destroyed(ce); 2387 clr_context_registered(ce); 2388 } 2389 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2390 if (unlikely(disabled)) { 2391 release_guc_id(guc, ce); 2392 __guc_context_destroy(ce); 2393 return; 2394 } 2395 2396 deregister_context(ce, ce->guc_id.id); 2397 } 2398 2399 static void __guc_context_destroy(struct intel_context *ce) 2400 { 2401 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 2402 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 2403 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 2404 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 2405 GEM_BUG_ON(ce->guc_state.number_committed_requests); 2406 2407 lrc_fini(ce); 2408 intel_context_fini(ce); 2409 2410 if (intel_engine_is_virtual(ce->engine)) { 2411 struct guc_virtual_engine *ve = 2412 container_of(ce, typeof(*ve), context); 2413 2414 if (ve->base.breadcrumbs) 2415 intel_breadcrumbs_put(ve->base.breadcrumbs); 2416 2417 kfree(ve); 2418 } else { 2419 intel_context_free(ce); 2420 } 2421 } 2422 2423 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 2424 { 2425 struct intel_context *ce, *cn; 2426 unsigned long flags; 2427 2428 GEM_BUG_ON(!submission_disabled(guc) && 2429 guc_submission_initialized(guc)); 2430 2431 spin_lock_irqsave(&guc->submission_state.lock, flags); 2432 list_for_each_entry_safe(ce, cn, 2433 &guc->submission_state.destroyed_contexts, 2434 destroyed_link) { 2435 list_del_init(&ce->destroyed_link); 2436 __release_guc_id(guc, ce); 2437 __guc_context_destroy(ce); 2438 } 2439 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2440 } 2441 2442 static void deregister_destroyed_contexts(struct intel_guc *guc) 2443 { 2444 struct intel_context *ce, *cn; 2445 unsigned long flags; 2446 2447 spin_lock_irqsave(&guc->submission_state.lock, flags); 2448 list_for_each_entry_safe(ce, cn, 2449 &guc->submission_state.destroyed_contexts, 2450 destroyed_link) { 2451 list_del_init(&ce->destroyed_link); 2452 guc_lrc_desc_unpin(ce); 2453 } 2454 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2455 } 2456 2457 static void destroyed_worker_func(struct work_struct *w) 2458 { 2459 struct intel_guc *guc = container_of(w, struct intel_guc, 2460 submission_state.destroyed_worker); 2461 struct intel_gt *gt = guc_to_gt(guc); 2462 int tmp; 2463 2464 with_intel_gt_pm(gt, tmp) 2465 deregister_destroyed_contexts(guc); 2466 } 2467 2468 static void guc_context_destroy(struct kref *kref) 2469 { 2470 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2471 struct intel_guc *guc = ce_to_guc(ce); 2472 unsigned long flags; 2473 bool destroy; 2474 2475 /* 2476 * If the guc_id is invalid this context has been stolen and we can free 2477 * it immediately. Also can be freed immediately if the context is not 2478 * registered with the GuC or the GuC is in the middle of a reset. 2479 */ 2480 spin_lock_irqsave(&guc->submission_state.lock, flags); 2481 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 2482 !lrc_desc_registered(guc, ce->guc_id.id); 2483 if (likely(!destroy)) { 2484 if (!list_empty(&ce->guc_id.link)) 2485 list_del_init(&ce->guc_id.link); 2486 list_add_tail(&ce->destroyed_link, 2487 &guc->submission_state.destroyed_contexts); 2488 } else { 2489 __release_guc_id(guc, ce); 2490 } 2491 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2492 if (unlikely(destroy)) { 2493 __guc_context_destroy(ce); 2494 return; 2495 } 2496 2497 /* 2498 * We use a worker to issue the H2G to deregister the context as we can 2499 * take the GT PM for the first time which isn't allowed from an atomic 2500 * context. 2501 */ 2502 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 2503 } 2504 2505 static int guc_context_alloc(struct intel_context *ce) 2506 { 2507 return lrc_alloc(ce, ce->engine); 2508 } 2509 2510 static void guc_context_set_prio(struct intel_guc *guc, 2511 struct intel_context *ce, 2512 u8 prio) 2513 { 2514 u32 action[] = { 2515 INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, 2516 ce->guc_id.id, 2517 prio, 2518 }; 2519 2520 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 2521 prio > GUC_CLIENT_PRIORITY_NORMAL); 2522 lockdep_assert_held(&ce->guc_state.lock); 2523 2524 if (ce->guc_state.prio == prio || submission_disabled(guc) || 2525 !context_registered(ce)) { 2526 ce->guc_state.prio = prio; 2527 return; 2528 } 2529 2530 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2531 2532 ce->guc_state.prio = prio; 2533 trace_intel_context_set_prio(ce); 2534 } 2535 2536 static inline u8 map_i915_prio_to_guc_prio(int prio) 2537 { 2538 if (prio == I915_PRIORITY_NORMAL) 2539 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 2540 else if (prio < I915_PRIORITY_NORMAL) 2541 return GUC_CLIENT_PRIORITY_NORMAL; 2542 else if (prio < I915_PRIORITY_DISPLAY) 2543 return GUC_CLIENT_PRIORITY_HIGH; 2544 else 2545 return GUC_CLIENT_PRIORITY_KMD_HIGH; 2546 } 2547 2548 static inline void add_context_inflight_prio(struct intel_context *ce, 2549 u8 guc_prio) 2550 { 2551 lockdep_assert_held(&ce->guc_state.lock); 2552 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 2553 2554 ++ce->guc_state.prio_count[guc_prio]; 2555 2556 /* Overflow protection */ 2557 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 2558 } 2559 2560 static inline void sub_context_inflight_prio(struct intel_context *ce, 2561 u8 guc_prio) 2562 { 2563 lockdep_assert_held(&ce->guc_state.lock); 2564 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 2565 2566 /* Underflow protection */ 2567 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 2568 2569 --ce->guc_state.prio_count[guc_prio]; 2570 } 2571 2572 static inline void update_context_prio(struct intel_context *ce) 2573 { 2574 struct intel_guc *guc = &ce->engine->gt->uc.guc; 2575 int i; 2576 2577 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 2578 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 2579 2580 lockdep_assert_held(&ce->guc_state.lock); 2581 2582 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 2583 if (ce->guc_state.prio_count[i]) { 2584 guc_context_set_prio(guc, ce, i); 2585 break; 2586 } 2587 } 2588 } 2589 2590 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 2591 { 2592 /* Lower value is higher priority */ 2593 return new_guc_prio < old_guc_prio; 2594 } 2595 2596 static void add_to_context(struct i915_request *rq) 2597 { 2598 struct intel_context *ce = request_to_scheduling_context(rq); 2599 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 2600 2601 GEM_BUG_ON(intel_context_is_child(ce)); 2602 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 2603 2604 spin_lock(&ce->guc_state.lock); 2605 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 2606 2607 if (rq->guc_prio == GUC_PRIO_INIT) { 2608 rq->guc_prio = new_guc_prio; 2609 add_context_inflight_prio(ce, rq->guc_prio); 2610 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 2611 sub_context_inflight_prio(ce, rq->guc_prio); 2612 rq->guc_prio = new_guc_prio; 2613 add_context_inflight_prio(ce, rq->guc_prio); 2614 } 2615 update_context_prio(ce); 2616 2617 spin_unlock(&ce->guc_state.lock); 2618 } 2619 2620 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 2621 { 2622 lockdep_assert_held(&ce->guc_state.lock); 2623 2624 if (rq->guc_prio != GUC_PRIO_INIT && 2625 rq->guc_prio != GUC_PRIO_FINI) { 2626 sub_context_inflight_prio(ce, rq->guc_prio); 2627 update_context_prio(ce); 2628 } 2629 rq->guc_prio = GUC_PRIO_FINI; 2630 } 2631 2632 static void remove_from_context(struct i915_request *rq) 2633 { 2634 struct intel_context *ce = request_to_scheduling_context(rq); 2635 2636 GEM_BUG_ON(intel_context_is_child(ce)); 2637 2638 spin_lock_irq(&ce->guc_state.lock); 2639 2640 list_del_init(&rq->sched.link); 2641 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2642 2643 /* Prevent further __await_execution() registering a cb, then flush */ 2644 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2645 2646 guc_prio_fini(rq, ce); 2647 2648 decr_context_committed_requests(ce); 2649 2650 spin_unlock_irq(&ce->guc_state.lock); 2651 2652 atomic_dec(&ce->guc_id.ref); 2653 i915_request_notify_execute_cb_imm(rq); 2654 } 2655 2656 static const struct intel_context_ops guc_context_ops = { 2657 .alloc = guc_context_alloc, 2658 2659 .pre_pin = guc_context_pre_pin, 2660 .pin = guc_context_pin, 2661 .unpin = guc_context_unpin, 2662 .post_unpin = guc_context_post_unpin, 2663 2664 .ban = guc_context_ban, 2665 2666 .cancel_request = guc_context_cancel_request, 2667 2668 .enter = intel_context_enter_engine, 2669 .exit = intel_context_exit_engine, 2670 2671 .sched_disable = guc_context_sched_disable, 2672 2673 .reset = lrc_reset, 2674 .destroy = guc_context_destroy, 2675 2676 .create_virtual = guc_create_virtual, 2677 .create_parallel = guc_create_parallel, 2678 }; 2679 2680 static void submit_work_cb(struct irq_work *wrk) 2681 { 2682 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 2683 2684 might_lock(&rq->engine->sched_engine->lock); 2685 i915_sw_fence_complete(&rq->submit); 2686 } 2687 2688 static void __guc_signal_context_fence(struct intel_context *ce) 2689 { 2690 struct i915_request *rq, *rn; 2691 2692 lockdep_assert_held(&ce->guc_state.lock); 2693 2694 if (!list_empty(&ce->guc_state.fences)) 2695 trace_intel_context_fence_release(ce); 2696 2697 /* 2698 * Use an IRQ to ensure locking order of sched_engine->lock -> 2699 * ce->guc_state.lock is preserved. 2700 */ 2701 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 2702 guc_fence_link) { 2703 list_del(&rq->guc_fence_link); 2704 irq_work_queue(&rq->submit_work); 2705 } 2706 2707 INIT_LIST_HEAD(&ce->guc_state.fences); 2708 } 2709 2710 static void guc_signal_context_fence(struct intel_context *ce) 2711 { 2712 unsigned long flags; 2713 2714 GEM_BUG_ON(intel_context_is_child(ce)); 2715 2716 spin_lock_irqsave(&ce->guc_state.lock, flags); 2717 clr_context_wait_for_deregister_to_register(ce); 2718 __guc_signal_context_fence(ce); 2719 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2720 } 2721 2722 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 2723 { 2724 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 2725 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && 2726 !submission_disabled(ce_to_guc(ce)); 2727 } 2728 2729 static void guc_context_init(struct intel_context *ce) 2730 { 2731 const struct i915_gem_context *ctx; 2732 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 2733 2734 rcu_read_lock(); 2735 ctx = rcu_dereference(ce->gem_context); 2736 if (ctx) 2737 prio = ctx->sched.priority; 2738 rcu_read_unlock(); 2739 2740 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 2741 set_bit(CONTEXT_GUC_INIT, &ce->flags); 2742 } 2743 2744 static int guc_request_alloc(struct i915_request *rq) 2745 { 2746 struct intel_context *ce = request_to_scheduling_context(rq); 2747 struct intel_guc *guc = ce_to_guc(ce); 2748 unsigned long flags; 2749 int ret; 2750 2751 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 2752 2753 /* 2754 * Flush enough space to reduce the likelihood of waiting after 2755 * we start building the request - in which case we will just 2756 * have to repeat work. 2757 */ 2758 rq->reserved_space += GUC_REQUEST_SIZE; 2759 2760 /* 2761 * Note that after this point, we have committed to using 2762 * this request as it is being used to both track the 2763 * state of engine initialisation and liveness of the 2764 * golden renderstate above. Think twice before you try 2765 * to cancel/unwind this request now. 2766 */ 2767 2768 /* Unconditionally invalidate GPU caches and TLBs. */ 2769 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 2770 if (ret) 2771 return ret; 2772 2773 rq->reserved_space -= GUC_REQUEST_SIZE; 2774 2775 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 2776 guc_context_init(ce); 2777 2778 /* 2779 * Call pin_guc_id here rather than in the pinning step as with 2780 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 2781 * guc_id and creating horrible race conditions. This is especially bad 2782 * when guc_id are being stolen due to over subscription. By the time 2783 * this function is reached, it is guaranteed that the guc_id will be 2784 * persistent until the generated request is retired. Thus, sealing these 2785 * race conditions. It is still safe to fail here if guc_id are 2786 * exhausted and return -EAGAIN to the user indicating that they can try 2787 * again in the future. 2788 * 2789 * There is no need for a lock here as the timeline mutex ensures at 2790 * most one context can be executing this code path at once. The 2791 * guc_id_ref is incremented once for every request in flight and 2792 * decremented on each retire. When it is zero, a lock around the 2793 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 2794 */ 2795 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 2796 goto out; 2797 2798 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 2799 if (unlikely(ret < 0)) 2800 return ret; 2801 if (context_needs_register(ce, !!ret)) { 2802 ret = guc_lrc_desc_pin(ce, true); 2803 if (unlikely(ret)) { /* unwind */ 2804 if (ret == -EPIPE) { 2805 disable_submission(guc); 2806 goto out; /* GPU will be reset */ 2807 } 2808 atomic_dec(&ce->guc_id.ref); 2809 unpin_guc_id(guc, ce); 2810 return ret; 2811 } 2812 } 2813 2814 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2815 2816 out: 2817 /* 2818 * We block all requests on this context if a G2H is pending for a 2819 * schedule disable or context deregistration as the GuC will fail a 2820 * schedule enable or context registration if either G2H is pending 2821 * respectfully. Once a G2H returns, the fence is released that is 2822 * blocking these requests (see guc_signal_context_fence). 2823 */ 2824 spin_lock_irqsave(&ce->guc_state.lock, flags); 2825 if (context_wait_for_deregister_to_register(ce) || 2826 context_pending_disable(ce)) { 2827 init_irq_work(&rq->submit_work, submit_work_cb); 2828 i915_sw_fence_await(&rq->submit); 2829 2830 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 2831 } 2832 incr_context_committed_requests(ce); 2833 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2834 2835 return 0; 2836 } 2837 2838 static int guc_virtual_context_pre_pin(struct intel_context *ce, 2839 struct i915_gem_ww_ctx *ww, 2840 void **vaddr) 2841 { 2842 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2843 2844 return __guc_context_pre_pin(ce, engine, ww, vaddr); 2845 } 2846 2847 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 2848 { 2849 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2850 int ret = __guc_context_pin(ce, engine, vaddr); 2851 intel_engine_mask_t tmp, mask = ce->engine->mask; 2852 2853 if (likely(!ret)) 2854 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2855 intel_engine_pm_get(engine); 2856 2857 return ret; 2858 } 2859 2860 static void guc_virtual_context_unpin(struct intel_context *ce) 2861 { 2862 intel_engine_mask_t tmp, mask = ce->engine->mask; 2863 struct intel_engine_cs *engine; 2864 struct intel_guc *guc = ce_to_guc(ce); 2865 2866 GEM_BUG_ON(context_enabled(ce)); 2867 GEM_BUG_ON(intel_context_is_barrier(ce)); 2868 2869 unpin_guc_id(guc, ce); 2870 lrc_unpin(ce); 2871 2872 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2873 intel_engine_pm_put_async(engine); 2874 } 2875 2876 static void guc_virtual_context_enter(struct intel_context *ce) 2877 { 2878 intel_engine_mask_t tmp, mask = ce->engine->mask; 2879 struct intel_engine_cs *engine; 2880 2881 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2882 intel_engine_pm_get(engine); 2883 2884 intel_timeline_enter(ce->timeline); 2885 } 2886 2887 static void guc_virtual_context_exit(struct intel_context *ce) 2888 { 2889 intel_engine_mask_t tmp, mask = ce->engine->mask; 2890 struct intel_engine_cs *engine; 2891 2892 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 2893 intel_engine_pm_put(engine); 2894 2895 intel_timeline_exit(ce->timeline); 2896 } 2897 2898 static int guc_virtual_context_alloc(struct intel_context *ce) 2899 { 2900 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2901 2902 return lrc_alloc(ce, engine); 2903 } 2904 2905 static const struct intel_context_ops virtual_guc_context_ops = { 2906 .alloc = guc_virtual_context_alloc, 2907 2908 .pre_pin = guc_virtual_context_pre_pin, 2909 .pin = guc_virtual_context_pin, 2910 .unpin = guc_virtual_context_unpin, 2911 .post_unpin = guc_context_post_unpin, 2912 2913 .ban = guc_context_ban, 2914 2915 .cancel_request = guc_context_cancel_request, 2916 2917 .enter = guc_virtual_context_enter, 2918 .exit = guc_virtual_context_exit, 2919 2920 .sched_disable = guc_context_sched_disable, 2921 2922 .destroy = guc_context_destroy, 2923 2924 .get_sibling = guc_virtual_get_sibling, 2925 }; 2926 2927 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 2928 { 2929 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2930 struct intel_guc *guc = ce_to_guc(ce); 2931 int ret; 2932 2933 GEM_BUG_ON(!intel_context_is_parent(ce)); 2934 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2935 2936 ret = pin_guc_id(guc, ce); 2937 if (unlikely(ret < 0)) 2938 return ret; 2939 2940 return __guc_context_pin(ce, engine, vaddr); 2941 } 2942 2943 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 2944 { 2945 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 2946 2947 GEM_BUG_ON(!intel_context_is_child(ce)); 2948 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2949 2950 __intel_context_pin(ce->parallel.parent); 2951 return __guc_context_pin(ce, engine, vaddr); 2952 } 2953 2954 static void guc_parent_context_unpin(struct intel_context *ce) 2955 { 2956 struct intel_guc *guc = ce_to_guc(ce); 2957 2958 GEM_BUG_ON(context_enabled(ce)); 2959 GEM_BUG_ON(intel_context_is_barrier(ce)); 2960 GEM_BUG_ON(!intel_context_is_parent(ce)); 2961 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2962 2963 if (ce->parallel.last_rq) 2964 i915_request_put(ce->parallel.last_rq); 2965 unpin_guc_id(guc, ce); 2966 lrc_unpin(ce); 2967 } 2968 2969 static void guc_child_context_unpin(struct intel_context *ce) 2970 { 2971 GEM_BUG_ON(context_enabled(ce)); 2972 GEM_BUG_ON(intel_context_is_barrier(ce)); 2973 GEM_BUG_ON(!intel_context_is_child(ce)); 2974 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2975 2976 lrc_unpin(ce); 2977 } 2978 2979 static void guc_child_context_post_unpin(struct intel_context *ce) 2980 { 2981 GEM_BUG_ON(!intel_context_is_child(ce)); 2982 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 2983 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 2984 2985 lrc_post_unpin(ce); 2986 intel_context_unpin(ce->parallel.parent); 2987 } 2988 2989 static void guc_child_context_destroy(struct kref *kref) 2990 { 2991 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2992 2993 __guc_context_destroy(ce); 2994 } 2995 2996 static const struct intel_context_ops virtual_parent_context_ops = { 2997 .alloc = guc_virtual_context_alloc, 2998 2999 .pre_pin = guc_context_pre_pin, 3000 .pin = guc_parent_context_pin, 3001 .unpin = guc_parent_context_unpin, 3002 .post_unpin = guc_context_post_unpin, 3003 3004 .ban = guc_context_ban, 3005 3006 .cancel_request = guc_context_cancel_request, 3007 3008 .enter = guc_virtual_context_enter, 3009 .exit = guc_virtual_context_exit, 3010 3011 .sched_disable = guc_context_sched_disable, 3012 3013 .destroy = guc_context_destroy, 3014 3015 .get_sibling = guc_virtual_get_sibling, 3016 }; 3017 3018 static const struct intel_context_ops virtual_child_context_ops = { 3019 .alloc = guc_virtual_context_alloc, 3020 3021 .pre_pin = guc_context_pre_pin, 3022 .pin = guc_child_context_pin, 3023 .unpin = guc_child_context_unpin, 3024 .post_unpin = guc_child_context_post_unpin, 3025 3026 .cancel_request = guc_context_cancel_request, 3027 3028 .enter = guc_virtual_context_enter, 3029 .exit = guc_virtual_context_exit, 3030 3031 .destroy = guc_child_context_destroy, 3032 3033 .get_sibling = guc_virtual_get_sibling, 3034 }; 3035 3036 /* 3037 * The below override of the breadcrumbs is enabled when the user configures a 3038 * context for parallel submission (multi-lrc, parent-child). 3039 * 3040 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3041 * safely preempt all the hw contexts configured for parallel submission 3042 * between each BB. The contract between the i915 and GuC is if the parent 3043 * context can be preempted, all the children can be preempted, and the GuC will 3044 * always try to preempt the parent before the children. A handshake between the 3045 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3046 * creating a window to preempt between each set of BBs. 3047 */ 3048 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3049 u64 offset, u32 len, 3050 const unsigned int flags); 3051 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3052 u64 offset, u32 len, 3053 const unsigned int flags); 3054 static u32 * 3055 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3056 u32 *cs); 3057 static u32 * 3058 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3059 u32 *cs); 3060 3061 static struct intel_context * 3062 guc_create_parallel(struct intel_engine_cs **engines, 3063 unsigned int num_siblings, 3064 unsigned int width) 3065 { 3066 struct intel_engine_cs **siblings = NULL; 3067 struct intel_context *parent = NULL, *ce, *err; 3068 int i, j; 3069 3070 siblings = kmalloc_array(num_siblings, 3071 sizeof(*siblings), 3072 GFP_KERNEL); 3073 if (!siblings) 3074 return ERR_PTR(-ENOMEM); 3075 3076 for (i = 0; i < width; ++i) { 3077 for (j = 0; j < num_siblings; ++j) 3078 siblings[j] = engines[i * num_siblings + j]; 3079 3080 ce = intel_engine_create_virtual(siblings, num_siblings, 3081 FORCE_VIRTUAL); 3082 if (!ce) { 3083 err = ERR_PTR(-ENOMEM); 3084 goto unwind; 3085 } 3086 3087 if (i == 0) { 3088 parent = ce; 3089 parent->ops = &virtual_parent_context_ops; 3090 } else { 3091 ce->ops = &virtual_child_context_ops; 3092 intel_context_bind_parent_child(parent, ce); 3093 } 3094 } 3095 3096 parent->parallel.fence_context = dma_fence_context_alloc(1); 3097 3098 parent->engine->emit_bb_start = 3099 emit_bb_start_parent_no_preempt_mid_batch; 3100 parent->engine->emit_fini_breadcrumb = 3101 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3102 parent->engine->emit_fini_breadcrumb_dw = 3103 12 + 4 * parent->parallel.number_children; 3104 for_each_child(parent, ce) { 3105 ce->engine->emit_bb_start = 3106 emit_bb_start_child_no_preempt_mid_batch; 3107 ce->engine->emit_fini_breadcrumb = 3108 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3109 ce->engine->emit_fini_breadcrumb_dw = 16; 3110 } 3111 3112 kfree(siblings); 3113 return parent; 3114 3115 unwind: 3116 if (parent) 3117 intel_context_put(parent); 3118 kfree(siblings); 3119 return err; 3120 } 3121 3122 static bool 3123 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3124 { 3125 struct intel_engine_cs *sibling; 3126 intel_engine_mask_t tmp, mask = b->engine_mask; 3127 bool result = false; 3128 3129 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3130 result |= intel_engine_irq_enable(sibling); 3131 3132 return result; 3133 } 3134 3135 static void 3136 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3137 { 3138 struct intel_engine_cs *sibling; 3139 intel_engine_mask_t tmp, mask = b->engine_mask; 3140 3141 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3142 intel_engine_irq_disable(sibling); 3143 } 3144 3145 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3146 { 3147 int i; 3148 3149 /* 3150 * In GuC submission mode we do not know which physical engine a request 3151 * will be scheduled on, this creates a problem because the breadcrumb 3152 * interrupt is per physical engine. To work around this we attach 3153 * requests and direct all breadcrumb interrupts to the first instance 3154 * of an engine per class. In addition all breadcrumb interrupts are 3155 * enabled / disabled across an engine class in unison. 3156 */ 3157 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3158 struct intel_engine_cs *sibling = 3159 engine->gt->engine_class[engine->class][i]; 3160 3161 if (sibling) { 3162 if (engine->breadcrumbs != sibling->breadcrumbs) { 3163 intel_breadcrumbs_put(engine->breadcrumbs); 3164 engine->breadcrumbs = 3165 intel_breadcrumbs_get(sibling->breadcrumbs); 3166 } 3167 break; 3168 } 3169 } 3170 3171 if (engine->breadcrumbs) { 3172 engine->breadcrumbs->engine_mask |= engine->mask; 3173 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3174 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3175 } 3176 } 3177 3178 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3179 int prio) 3180 { 3181 struct intel_context *ce = request_to_scheduling_context(rq); 3182 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3183 3184 /* Short circuit function */ 3185 if (prio < I915_PRIORITY_NORMAL || 3186 rq->guc_prio == GUC_PRIO_FINI || 3187 (rq->guc_prio != GUC_PRIO_INIT && 3188 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 3189 return; 3190 3191 spin_lock(&ce->guc_state.lock); 3192 if (rq->guc_prio != GUC_PRIO_FINI) { 3193 if (rq->guc_prio != GUC_PRIO_INIT) 3194 sub_context_inflight_prio(ce, rq->guc_prio); 3195 rq->guc_prio = new_guc_prio; 3196 add_context_inflight_prio(ce, rq->guc_prio); 3197 update_context_prio(ce); 3198 } 3199 spin_unlock(&ce->guc_state.lock); 3200 } 3201 3202 static void guc_retire_inflight_request_prio(struct i915_request *rq) 3203 { 3204 struct intel_context *ce = request_to_scheduling_context(rq); 3205 3206 spin_lock(&ce->guc_state.lock); 3207 guc_prio_fini(rq, ce); 3208 spin_unlock(&ce->guc_state.lock); 3209 } 3210 3211 static void sanitize_hwsp(struct intel_engine_cs *engine) 3212 { 3213 struct intel_timeline *tl; 3214 3215 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 3216 intel_timeline_reset_seqno(tl); 3217 } 3218 3219 static void guc_sanitize(struct intel_engine_cs *engine) 3220 { 3221 /* 3222 * Poison residual state on resume, in case the suspend didn't! 3223 * 3224 * We have to assume that across suspend/resume (or other loss 3225 * of control) that the contents of our pinned buffers has been 3226 * lost, replaced by garbage. Since this doesn't always happen, 3227 * let's poison such state so that we more quickly spot when 3228 * we falsely assume it has been preserved. 3229 */ 3230 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 3231 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 3232 3233 /* 3234 * The kernel_context HWSP is stored in the status_page. As above, 3235 * that may be lost on resume/initialisation, and so we need to 3236 * reset the value in the HWSP. 3237 */ 3238 sanitize_hwsp(engine); 3239 3240 /* And scrub the dirty cachelines for the HWSP */ 3241 clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 3242 3243 intel_engine_reset_pinned_contexts(engine); 3244 } 3245 3246 static void setup_hwsp(struct intel_engine_cs *engine) 3247 { 3248 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 3249 3250 ENGINE_WRITE_FW(engine, 3251 RING_HWS_PGA, 3252 i915_ggtt_offset(engine->status_page.vma)); 3253 } 3254 3255 static void start_engine(struct intel_engine_cs *engine) 3256 { 3257 ENGINE_WRITE_FW(engine, 3258 RING_MODE_GEN7, 3259 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 3260 3261 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 3262 ENGINE_POSTING_READ(engine, RING_MI_MODE); 3263 } 3264 3265 static int guc_resume(struct intel_engine_cs *engine) 3266 { 3267 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 3268 3269 intel_mocs_init_engine(engine); 3270 3271 intel_breadcrumbs_reset(engine->breadcrumbs); 3272 3273 setup_hwsp(engine); 3274 start_engine(engine); 3275 3276 return 0; 3277 } 3278 3279 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 3280 { 3281 return !sched_engine->tasklet.callback; 3282 } 3283 3284 static void guc_set_default_submission(struct intel_engine_cs *engine) 3285 { 3286 engine->submit_request = guc_submit_request; 3287 } 3288 3289 static inline void guc_kernel_context_pin(struct intel_guc *guc, 3290 struct intel_context *ce) 3291 { 3292 if (context_guc_id_invalid(ce)) 3293 pin_guc_id(guc, ce); 3294 guc_lrc_desc_pin(ce, true); 3295 } 3296 3297 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 3298 { 3299 struct intel_gt *gt = guc_to_gt(guc); 3300 struct intel_engine_cs *engine; 3301 enum intel_engine_id id; 3302 3303 /* make sure all descriptors are clean... */ 3304 xa_destroy(&guc->context_lookup); 3305 3306 /* 3307 * Some contexts might have been pinned before we enabled GuC 3308 * submission, so we need to add them to the GuC bookeeping. 3309 * Also, after a reset the of the GuC we want to make sure that the 3310 * information shared with GuC is properly reset. The kernel LRCs are 3311 * not attached to the gem_context, so they need to be added separately. 3312 * 3313 * Note: we purposefully do not check the return of guc_lrc_desc_pin, 3314 * because that function can only fail if a reset is just starting. This 3315 * is at the end of reset so presumably another reset isn't happening 3316 * and even it did this code would be run again. 3317 */ 3318 3319 for_each_engine(engine, gt, id) { 3320 struct intel_context *ce; 3321 3322 list_for_each_entry(ce, &engine->pinned_contexts_list, 3323 pinned_contexts_link) 3324 guc_kernel_context_pin(guc, ce); 3325 } 3326 } 3327 3328 static void guc_release(struct intel_engine_cs *engine) 3329 { 3330 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3331 3332 intel_engine_cleanup_common(engine); 3333 lrc_fini_wa_ctx(engine); 3334 } 3335 3336 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 3337 { 3338 struct intel_engine_cs *e; 3339 intel_engine_mask_t tmp, mask = engine->mask; 3340 3341 for_each_engine_masked(e, engine->gt, mask, tmp) 3342 e->serial++; 3343 } 3344 3345 static void guc_default_vfuncs(struct intel_engine_cs *engine) 3346 { 3347 /* Default vfuncs which can be overridden by each engine. */ 3348 3349 engine->resume = guc_resume; 3350 3351 engine->cops = &guc_context_ops; 3352 engine->request_alloc = guc_request_alloc; 3353 engine->add_active_request = add_to_context; 3354 engine->remove_active_request = remove_from_context; 3355 3356 engine->sched_engine->schedule = i915_schedule; 3357 3358 engine->reset.prepare = guc_reset_nop; 3359 engine->reset.rewind = guc_rewind_nop; 3360 engine->reset.cancel = guc_reset_nop; 3361 engine->reset.finish = guc_reset_nop; 3362 3363 engine->emit_flush = gen8_emit_flush_xcs; 3364 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3365 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3366 if (GRAPHICS_VER(engine->i915) >= 12) { 3367 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3368 engine->emit_flush = gen12_emit_flush_xcs; 3369 } 3370 engine->set_default_submission = guc_set_default_submission; 3371 3372 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3373 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3374 3375 /* 3376 * TODO: GuC supports timeslicing and semaphores as well, but they're 3377 * handled by the firmware so some minor tweaks are required before 3378 * enabling. 3379 * 3380 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3381 */ 3382 3383 engine->emit_bb_start = gen8_emit_bb_start; 3384 } 3385 3386 static void rcs_submission_override(struct intel_engine_cs *engine) 3387 { 3388 switch (GRAPHICS_VER(engine->i915)) { 3389 case 12: 3390 engine->emit_flush = gen12_emit_flush_rcs; 3391 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3392 break; 3393 case 11: 3394 engine->emit_flush = gen11_emit_flush_rcs; 3395 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3396 break; 3397 default: 3398 engine->emit_flush = gen8_emit_flush_rcs; 3399 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3400 break; 3401 } 3402 } 3403 3404 static inline void guc_default_irqs(struct intel_engine_cs *engine) 3405 { 3406 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 3407 intel_engine_set_irq_handler(engine, cs_irq_handler); 3408 } 3409 3410 static void guc_sched_engine_destroy(struct kref *kref) 3411 { 3412 struct i915_sched_engine *sched_engine = 3413 container_of(kref, typeof(*sched_engine), ref); 3414 struct intel_guc *guc = sched_engine->private_data; 3415 3416 guc->sched_engine = NULL; 3417 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 3418 kfree(sched_engine); 3419 } 3420 3421 int intel_guc_submission_setup(struct intel_engine_cs *engine) 3422 { 3423 struct drm_i915_private *i915 = engine->i915; 3424 struct intel_guc *guc = &engine->gt->uc.guc; 3425 3426 /* 3427 * The setup relies on several assumptions (e.g. irqs always enabled) 3428 * that are only valid on gen11+ 3429 */ 3430 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 3431 3432 if (!guc->sched_engine) { 3433 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3434 if (!guc->sched_engine) 3435 return -ENOMEM; 3436 3437 guc->sched_engine->schedule = i915_schedule; 3438 guc->sched_engine->disabled = guc_sched_engine_disabled; 3439 guc->sched_engine->private_data = guc; 3440 guc->sched_engine->destroy = guc_sched_engine_destroy; 3441 guc->sched_engine->bump_inflight_request_prio = 3442 guc_bump_inflight_request_prio; 3443 guc->sched_engine->retire_inflight_request_prio = 3444 guc_retire_inflight_request_prio; 3445 tasklet_setup(&guc->sched_engine->tasklet, 3446 guc_submission_tasklet); 3447 } 3448 i915_sched_engine_put(engine->sched_engine); 3449 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 3450 3451 guc_default_vfuncs(engine); 3452 guc_default_irqs(engine); 3453 guc_init_breadcrumbs(engine); 3454 3455 if (engine->class == RENDER_CLASS) 3456 rcs_submission_override(engine); 3457 3458 lrc_init_wa_ctx(engine); 3459 3460 /* Finally, take ownership and responsibility for cleanup! */ 3461 engine->sanitize = guc_sanitize; 3462 engine->release = guc_release; 3463 3464 return 0; 3465 } 3466 3467 void intel_guc_submission_enable(struct intel_guc *guc) 3468 { 3469 guc_init_lrc_mapping(guc); 3470 } 3471 3472 void intel_guc_submission_disable(struct intel_guc *guc) 3473 { 3474 /* Note: By the time we're here, GuC may have already been reset */ 3475 } 3476 3477 static bool __guc_submission_supported(struct intel_guc *guc) 3478 { 3479 /* GuC submission is unavailable for pre-Gen11 */ 3480 return intel_guc_is_supported(guc) && 3481 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 3482 } 3483 3484 static bool __guc_submission_selected(struct intel_guc *guc) 3485 { 3486 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 3487 3488 if (!intel_guc_submission_is_supported(guc)) 3489 return false; 3490 3491 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 3492 } 3493 3494 void intel_guc_submission_init_early(struct intel_guc *guc) 3495 { 3496 guc->submission_supported = __guc_submission_supported(guc); 3497 guc->submission_selected = __guc_submission_selected(guc); 3498 } 3499 3500 static inline struct intel_context * 3501 g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) 3502 { 3503 struct intel_context *ce; 3504 3505 if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { 3506 drm_err(&guc_to_gt(guc)->i915->drm, 3507 "Invalid desc_idx %u", desc_idx); 3508 return NULL; 3509 } 3510 3511 ce = __get_context(guc, desc_idx); 3512 if (unlikely(!ce)) { 3513 drm_err(&guc_to_gt(guc)->i915->drm, 3514 "Context is NULL, desc_idx %u", desc_idx); 3515 return NULL; 3516 } 3517 3518 if (unlikely(intel_context_is_child(ce))) { 3519 drm_err(&guc_to_gt(guc)->i915->drm, 3520 "Context is child, desc_idx %u", desc_idx); 3521 return NULL; 3522 } 3523 3524 return ce; 3525 } 3526 3527 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 3528 const u32 *msg, 3529 u32 len) 3530 { 3531 struct intel_context *ce; 3532 u32 desc_idx = msg[0]; 3533 3534 if (unlikely(len < 1)) { 3535 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3536 return -EPROTO; 3537 } 3538 3539 ce = g2h_context_lookup(guc, desc_idx); 3540 if (unlikely(!ce)) 3541 return -EPROTO; 3542 3543 trace_intel_context_deregister_done(ce); 3544 3545 #ifdef CONFIG_DRM_I915_SELFTEST 3546 if (unlikely(ce->drop_deregister)) { 3547 ce->drop_deregister = false; 3548 return 0; 3549 } 3550 #endif 3551 3552 if (context_wait_for_deregister_to_register(ce)) { 3553 struct intel_runtime_pm *runtime_pm = 3554 &ce->engine->gt->i915->runtime_pm; 3555 intel_wakeref_t wakeref; 3556 3557 /* 3558 * Previous owner of this guc_id has been deregistered, now safe 3559 * register this context. 3560 */ 3561 with_intel_runtime_pm(runtime_pm, wakeref) 3562 register_context(ce, true); 3563 guc_signal_context_fence(ce); 3564 intel_context_put(ce); 3565 } else if (context_destroyed(ce)) { 3566 /* Context has been destroyed */ 3567 intel_gt_pm_put_async(guc_to_gt(guc)); 3568 release_guc_id(guc, ce); 3569 __guc_context_destroy(ce); 3570 } 3571 3572 decr_outstanding_submission_g2h(guc); 3573 3574 return 0; 3575 } 3576 3577 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 3578 const u32 *msg, 3579 u32 len) 3580 { 3581 struct intel_context *ce; 3582 unsigned long flags; 3583 u32 desc_idx = msg[0]; 3584 3585 if (unlikely(len < 2)) { 3586 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3587 return -EPROTO; 3588 } 3589 3590 ce = g2h_context_lookup(guc, desc_idx); 3591 if (unlikely(!ce)) 3592 return -EPROTO; 3593 3594 if (unlikely(context_destroyed(ce) || 3595 (!context_pending_enable(ce) && 3596 !context_pending_disable(ce)))) { 3597 drm_err(&guc_to_gt(guc)->i915->drm, 3598 "Bad context sched_state 0x%x, desc_idx %u", 3599 ce->guc_state.sched_state, desc_idx); 3600 return -EPROTO; 3601 } 3602 3603 trace_intel_context_sched_done(ce); 3604 3605 if (context_pending_enable(ce)) { 3606 #ifdef CONFIG_DRM_I915_SELFTEST 3607 if (unlikely(ce->drop_schedule_enable)) { 3608 ce->drop_schedule_enable = false; 3609 return 0; 3610 } 3611 #endif 3612 3613 spin_lock_irqsave(&ce->guc_state.lock, flags); 3614 clr_context_pending_enable(ce); 3615 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3616 } else if (context_pending_disable(ce)) { 3617 bool banned; 3618 3619 #ifdef CONFIG_DRM_I915_SELFTEST 3620 if (unlikely(ce->drop_schedule_disable)) { 3621 ce->drop_schedule_disable = false; 3622 return 0; 3623 } 3624 #endif 3625 3626 /* 3627 * Unpin must be done before __guc_signal_context_fence, 3628 * otherwise a race exists between the requests getting 3629 * submitted + retired before this unpin completes resulting in 3630 * the pin_count going to zero and the context still being 3631 * enabled. 3632 */ 3633 intel_context_sched_disable_unpin(ce); 3634 3635 spin_lock_irqsave(&ce->guc_state.lock, flags); 3636 banned = context_banned(ce); 3637 clr_context_banned(ce); 3638 clr_context_pending_disable(ce); 3639 __guc_signal_context_fence(ce); 3640 guc_blocked_fence_complete(ce); 3641 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3642 3643 if (banned) { 3644 guc_cancel_context_requests(ce); 3645 intel_engine_signal_breadcrumbs(ce->engine); 3646 } 3647 } 3648 3649 decr_outstanding_submission_g2h(guc); 3650 intel_context_put(ce); 3651 3652 return 0; 3653 } 3654 3655 static void capture_error_state(struct intel_guc *guc, 3656 struct intel_context *ce) 3657 { 3658 struct intel_gt *gt = guc_to_gt(guc); 3659 struct drm_i915_private *i915 = gt->i915; 3660 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 3661 intel_wakeref_t wakeref; 3662 3663 intel_engine_set_hung_context(engine, ce); 3664 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 3665 i915_capture_error_state(gt, engine->mask); 3666 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 3667 } 3668 3669 static void guc_context_replay(struct intel_context *ce) 3670 { 3671 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 3672 3673 __guc_reset_context(ce, true); 3674 tasklet_hi_schedule(&sched_engine->tasklet); 3675 } 3676 3677 static void guc_handle_context_reset(struct intel_guc *guc, 3678 struct intel_context *ce) 3679 { 3680 trace_intel_context_reset(ce); 3681 3682 /* 3683 * XXX: Racey if request cancellation has occurred, see comment in 3684 * __guc_reset_context(). 3685 */ 3686 if (likely(!intel_context_is_banned(ce) && 3687 !context_blocked(ce))) { 3688 capture_error_state(guc, ce); 3689 guc_context_replay(ce); 3690 } 3691 } 3692 3693 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 3694 const u32 *msg, u32 len) 3695 { 3696 struct intel_context *ce; 3697 int desc_idx; 3698 3699 if (unlikely(len != 1)) { 3700 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3701 return -EPROTO; 3702 } 3703 3704 desc_idx = msg[0]; 3705 ce = g2h_context_lookup(guc, desc_idx); 3706 if (unlikely(!ce)) 3707 return -EPROTO; 3708 3709 guc_handle_context_reset(guc, ce); 3710 3711 return 0; 3712 } 3713 3714 static struct intel_engine_cs * 3715 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 3716 { 3717 struct intel_gt *gt = guc_to_gt(guc); 3718 u8 engine_class = guc_class_to_engine_class(guc_class); 3719 3720 /* Class index is checked in class converter */ 3721 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 3722 3723 return gt->engine_class[engine_class][instance]; 3724 } 3725 3726 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 3727 const u32 *msg, u32 len) 3728 { 3729 struct intel_engine_cs *engine; 3730 u8 guc_class, instance; 3731 u32 reason; 3732 3733 if (unlikely(len != 3)) { 3734 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 3735 return -EPROTO; 3736 } 3737 3738 guc_class = msg[0]; 3739 instance = msg[1]; 3740 reason = msg[2]; 3741 3742 engine = guc_lookup_engine(guc, guc_class, instance); 3743 if (unlikely(!engine)) { 3744 drm_err(&guc_to_gt(guc)->i915->drm, 3745 "Invalid engine %d:%d", guc_class, instance); 3746 return -EPROTO; 3747 } 3748 3749 intel_gt_handle_error(guc_to_gt(guc), engine->mask, 3750 I915_ERROR_CAPTURE, 3751 "GuC failed to reset %s (reason=0x%08x)\n", 3752 engine->name, reason); 3753 3754 return 0; 3755 } 3756 3757 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 3758 { 3759 struct intel_guc *guc = &engine->gt->uc.guc; 3760 struct intel_context *ce; 3761 struct i915_request *rq; 3762 unsigned long index; 3763 unsigned long flags; 3764 3765 /* Reset called during driver load? GuC not yet initialised! */ 3766 if (unlikely(!guc_submission_initialized(guc))) 3767 return; 3768 3769 xa_lock_irqsave(&guc->context_lookup, flags); 3770 xa_for_each(&guc->context_lookup, index, ce) { 3771 if (!kref_get_unless_zero(&ce->ref)) 3772 continue; 3773 3774 xa_unlock(&guc->context_lookup); 3775 3776 if (!intel_context_is_pinned(ce)) 3777 goto next; 3778 3779 if (intel_engine_is_virtual(ce->engine)) { 3780 if (!(ce->engine->mask & engine->mask)) 3781 goto next; 3782 } else { 3783 if (ce->engine != engine) 3784 goto next; 3785 } 3786 3787 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 3788 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 3789 continue; 3790 3791 intel_engine_set_hung_context(engine, ce); 3792 3793 /* Can only cope with one hang at a time... */ 3794 intel_context_put(ce); 3795 xa_lock(&guc->context_lookup); 3796 goto done; 3797 } 3798 next: 3799 intel_context_put(ce); 3800 xa_lock(&guc->context_lookup); 3801 } 3802 done: 3803 xa_unlock_irqrestore(&guc->context_lookup, flags); 3804 } 3805 3806 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 3807 struct i915_request *hung_rq, 3808 struct drm_printer *m) 3809 { 3810 struct intel_guc *guc = &engine->gt->uc.guc; 3811 struct intel_context *ce; 3812 unsigned long index; 3813 unsigned long flags; 3814 3815 /* Reset called during driver load? GuC not yet initialised! */ 3816 if (unlikely(!guc_submission_initialized(guc))) 3817 return; 3818 3819 xa_lock_irqsave(&guc->context_lookup, flags); 3820 xa_for_each(&guc->context_lookup, index, ce) { 3821 if (!kref_get_unless_zero(&ce->ref)) 3822 continue; 3823 3824 xa_unlock(&guc->context_lookup); 3825 3826 if (!intel_context_is_pinned(ce)) 3827 goto next; 3828 3829 if (intel_engine_is_virtual(ce->engine)) { 3830 if (!(ce->engine->mask & engine->mask)) 3831 goto next; 3832 } else { 3833 if (ce->engine != engine) 3834 goto next; 3835 } 3836 3837 spin_lock(&ce->guc_state.lock); 3838 intel_engine_dump_active_requests(&ce->guc_state.requests, 3839 hung_rq, m); 3840 spin_unlock(&ce->guc_state.lock); 3841 3842 next: 3843 intel_context_put(ce); 3844 xa_lock(&guc->context_lookup); 3845 } 3846 xa_unlock_irqrestore(&guc->context_lookup, flags); 3847 } 3848 3849 void intel_guc_submission_print_info(struct intel_guc *guc, 3850 struct drm_printer *p) 3851 { 3852 struct i915_sched_engine *sched_engine = guc->sched_engine; 3853 struct rb_node *rb; 3854 unsigned long flags; 3855 3856 if (!sched_engine) 3857 return; 3858 3859 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 3860 atomic_read(&guc->outstanding_submission_g2h)); 3861 drm_printf(p, "GuC tasklet count: %u\n\n", 3862 atomic_read(&sched_engine->tasklet.count)); 3863 3864 spin_lock_irqsave(&sched_engine->lock, flags); 3865 drm_printf(p, "Requests in GuC submit tasklet:\n"); 3866 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 3867 struct i915_priolist *pl = to_priolist(rb); 3868 struct i915_request *rq; 3869 3870 priolist_for_each_request(rq, pl) 3871 drm_printf(p, "guc_id=%u, seqno=%llu\n", 3872 rq->context->guc_id.id, 3873 rq->fence.seqno); 3874 } 3875 spin_unlock_irqrestore(&sched_engine->lock, flags); 3876 drm_printf(p, "\n"); 3877 } 3878 3879 static inline void guc_log_context_priority(struct drm_printer *p, 3880 struct intel_context *ce) 3881 { 3882 int i; 3883 3884 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 3885 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 3886 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 3887 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 3888 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 3889 i, ce->guc_state.prio_count[i]); 3890 } 3891 drm_printf(p, "\n"); 3892 } 3893 3894 static inline void guc_log_context(struct drm_printer *p, 3895 struct intel_context *ce) 3896 { 3897 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 3898 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 3899 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 3900 ce->ring->head, 3901 ce->lrc_reg_state[CTX_RING_HEAD]); 3902 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 3903 ce->ring->tail, 3904 ce->lrc_reg_state[CTX_RING_TAIL]); 3905 drm_printf(p, "\t\tContext Pin Count: %u\n", 3906 atomic_read(&ce->pin_count)); 3907 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 3908 atomic_read(&ce->guc_id.ref)); 3909 drm_printf(p, "\t\tSchedule State: 0x%x\n\n", 3910 ce->guc_state.sched_state); 3911 } 3912 3913 void intel_guc_submission_print_context_info(struct intel_guc *guc, 3914 struct drm_printer *p) 3915 { 3916 struct intel_context *ce; 3917 unsigned long index; 3918 unsigned long flags; 3919 3920 xa_lock_irqsave(&guc->context_lookup, flags); 3921 xa_for_each(&guc->context_lookup, index, ce) { 3922 GEM_BUG_ON(intel_context_is_child(ce)); 3923 3924 guc_log_context(p, ce); 3925 guc_log_context_priority(p, ce); 3926 3927 if (intel_context_is_parent(ce)) { 3928 struct guc_process_desc *desc = __get_process_desc(ce); 3929 struct intel_context *child; 3930 3931 drm_printf(p, "\t\tNumber children: %u\n", 3932 ce->parallel.number_children); 3933 drm_printf(p, "\t\tWQI Head: %u\n", 3934 READ_ONCE(desc->head)); 3935 drm_printf(p, "\t\tWQI Tail: %u\n", 3936 READ_ONCE(desc->tail)); 3937 drm_printf(p, "\t\tWQI Status: %u\n\n", 3938 READ_ONCE(desc->wq_status)); 3939 3940 if (ce->engine->emit_bb_start == 3941 emit_bb_start_parent_no_preempt_mid_batch) { 3942 u8 i; 3943 3944 drm_printf(p, "\t\tChildren Go: %u\n\n", 3945 get_children_go_value(ce)); 3946 for (i = 0; i < ce->parallel.number_children; ++i) 3947 drm_printf(p, "\t\tChildren Join: %u\n", 3948 get_children_join_value(ce, i)); 3949 } 3950 3951 for_each_child(ce, child) 3952 guc_log_context(p, child); 3953 } 3954 } 3955 xa_unlock_irqrestore(&guc->context_lookup, flags); 3956 } 3957 3958 static inline u32 get_children_go_addr(struct intel_context *ce) 3959 { 3960 GEM_BUG_ON(!intel_context_is_parent(ce)); 3961 3962 return i915_ggtt_offset(ce->state) + 3963 __get_parent_scratch_offset(ce) + 3964 offsetof(struct parent_scratch, go.semaphore); 3965 } 3966 3967 static inline u32 get_children_join_addr(struct intel_context *ce, 3968 u8 child_index) 3969 { 3970 GEM_BUG_ON(!intel_context_is_parent(ce)); 3971 3972 return i915_ggtt_offset(ce->state) + 3973 __get_parent_scratch_offset(ce) + 3974 offsetof(struct parent_scratch, join[child_index].semaphore); 3975 } 3976 3977 #define PARENT_GO_BB 1 3978 #define PARENT_GO_FINI_BREADCRUMB 0 3979 #define CHILD_GO_BB 1 3980 #define CHILD_GO_FINI_BREADCRUMB 0 3981 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3982 u64 offset, u32 len, 3983 const unsigned int flags) 3984 { 3985 struct intel_context *ce = rq->context; 3986 u32 *cs; 3987 u8 i; 3988 3989 GEM_BUG_ON(!intel_context_is_parent(ce)); 3990 3991 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 3992 if (IS_ERR(cs)) 3993 return PTR_ERR(cs); 3994 3995 /* Wait on children */ 3996 for (i = 0; i < ce->parallel.number_children; ++i) { 3997 *cs++ = (MI_SEMAPHORE_WAIT | 3998 MI_SEMAPHORE_GLOBAL_GTT | 3999 MI_SEMAPHORE_POLL | 4000 MI_SEMAPHORE_SAD_EQ_SDD); 4001 *cs++ = PARENT_GO_BB; 4002 *cs++ = get_children_join_addr(ce, i); 4003 *cs++ = 0; 4004 } 4005 4006 /* Turn off preemption */ 4007 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4008 *cs++ = MI_NOOP; 4009 4010 /* Tell children go */ 4011 cs = gen8_emit_ggtt_write(cs, 4012 CHILD_GO_BB, 4013 get_children_go_addr(ce), 4014 0); 4015 4016 /* Jump to batch */ 4017 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4018 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4019 *cs++ = lower_32_bits(offset); 4020 *cs++ = upper_32_bits(offset); 4021 *cs++ = MI_NOOP; 4022 4023 intel_ring_advance(rq, cs); 4024 4025 return 0; 4026 } 4027 4028 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4029 u64 offset, u32 len, 4030 const unsigned int flags) 4031 { 4032 struct intel_context *ce = rq->context; 4033 struct intel_context *parent = intel_context_to_parent(ce); 4034 u32 *cs; 4035 4036 GEM_BUG_ON(!intel_context_is_child(ce)); 4037 4038 cs = intel_ring_begin(rq, 12); 4039 if (IS_ERR(cs)) 4040 return PTR_ERR(cs); 4041 4042 /* Signal parent */ 4043 cs = gen8_emit_ggtt_write(cs, 4044 PARENT_GO_BB, 4045 get_children_join_addr(parent, 4046 ce->parallel.child_index), 4047 0); 4048 4049 /* Wait on parent for go */ 4050 *cs++ = (MI_SEMAPHORE_WAIT | 4051 MI_SEMAPHORE_GLOBAL_GTT | 4052 MI_SEMAPHORE_POLL | 4053 MI_SEMAPHORE_SAD_EQ_SDD); 4054 *cs++ = CHILD_GO_BB; 4055 *cs++ = get_children_go_addr(parent); 4056 *cs++ = 0; 4057 4058 /* Turn off preemption */ 4059 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4060 4061 /* Jump to batch */ 4062 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 4063 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 4064 *cs++ = lower_32_bits(offset); 4065 *cs++ = upper_32_bits(offset); 4066 4067 intel_ring_advance(rq, cs); 4068 4069 return 0; 4070 } 4071 4072 static u32 * 4073 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4074 u32 *cs) 4075 { 4076 struct intel_context *ce = rq->context; 4077 u8 i; 4078 4079 GEM_BUG_ON(!intel_context_is_parent(ce)); 4080 4081 /* Wait on children */ 4082 for (i = 0; i < ce->parallel.number_children; ++i) { 4083 *cs++ = (MI_SEMAPHORE_WAIT | 4084 MI_SEMAPHORE_GLOBAL_GTT | 4085 MI_SEMAPHORE_POLL | 4086 MI_SEMAPHORE_SAD_EQ_SDD); 4087 *cs++ = PARENT_GO_FINI_BREADCRUMB; 4088 *cs++ = get_children_join_addr(ce, i); 4089 *cs++ = 0; 4090 } 4091 4092 /* Turn on preemption */ 4093 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4094 *cs++ = MI_NOOP; 4095 4096 /* Tell children go */ 4097 cs = gen8_emit_ggtt_write(cs, 4098 CHILD_GO_FINI_BREADCRUMB, 4099 get_children_go_addr(ce), 4100 0); 4101 4102 return cs; 4103 } 4104 4105 /* 4106 * If this true, a submission of multi-lrc requests had an error and the 4107 * requests need to be skipped. The front end (execuf IOCTL) should've called 4108 * i915_request_skip which squashes the BB but we still need to emit the fini 4109 * breadrcrumbs seqno write. At this point we don't know how many of the 4110 * requests in the multi-lrc submission were generated so we can't do the 4111 * handshake between the parent and children (e.g. if 4 requests should be 4112 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 4113 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 4114 * has occurred on any of the requests in submission / relationship. 4115 */ 4116 static inline bool skip_handshake(struct i915_request *rq) 4117 { 4118 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 4119 } 4120 4121 static u32 * 4122 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4123 u32 *cs) 4124 { 4125 struct intel_context *ce = rq->context; 4126 4127 GEM_BUG_ON(!intel_context_is_parent(ce)); 4128 4129 if (unlikely(skip_handshake(rq))) { 4130 /* 4131 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 4132 * the -6 comes from the length of the emits below. 4133 */ 4134 memset(cs, 0, sizeof(u32) * 4135 (ce->engine->emit_fini_breadcrumb_dw - 6)); 4136 cs += ce->engine->emit_fini_breadcrumb_dw - 6; 4137 } else { 4138 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 4139 } 4140 4141 /* Emit fini breadcrumb */ 4142 cs = gen8_emit_ggtt_write(cs, 4143 rq->fence.seqno, 4144 i915_request_active_timeline(rq)->hwsp_offset, 4145 0); 4146 4147 /* User interrupt */ 4148 *cs++ = MI_USER_INTERRUPT; 4149 *cs++ = MI_NOOP; 4150 4151 rq->tail = intel_ring_offset(rq, cs); 4152 4153 return cs; 4154 } 4155 4156 static u32 * 4157 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4158 u32 *cs) 4159 { 4160 struct intel_context *ce = rq->context; 4161 struct intel_context *parent = intel_context_to_parent(ce); 4162 4163 GEM_BUG_ON(!intel_context_is_child(ce)); 4164 4165 /* Turn on preemption */ 4166 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4167 *cs++ = MI_NOOP; 4168 4169 /* Signal parent */ 4170 cs = gen8_emit_ggtt_write(cs, 4171 PARENT_GO_FINI_BREADCRUMB, 4172 get_children_join_addr(parent, 4173 ce->parallel.child_index), 4174 0); 4175 4176 /* Wait parent on for go */ 4177 *cs++ = (MI_SEMAPHORE_WAIT | 4178 MI_SEMAPHORE_GLOBAL_GTT | 4179 MI_SEMAPHORE_POLL | 4180 MI_SEMAPHORE_SAD_EQ_SDD); 4181 *cs++ = CHILD_GO_FINI_BREADCRUMB; 4182 *cs++ = get_children_go_addr(parent); 4183 *cs++ = 0; 4184 4185 return cs; 4186 } 4187 4188 static u32 * 4189 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4190 u32 *cs) 4191 { 4192 struct intel_context *ce = rq->context; 4193 4194 GEM_BUG_ON(!intel_context_is_child(ce)); 4195 4196 if (unlikely(skip_handshake(rq))) { 4197 /* 4198 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 4199 * the -6 comes from the length of the emits below. 4200 */ 4201 memset(cs, 0, sizeof(u32) * 4202 (ce->engine->emit_fini_breadcrumb_dw - 6)); 4203 cs += ce->engine->emit_fini_breadcrumb_dw - 6; 4204 } else { 4205 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 4206 } 4207 4208 /* Emit fini breadcrumb */ 4209 cs = gen8_emit_ggtt_write(cs, 4210 rq->fence.seqno, 4211 i915_request_active_timeline(rq)->hwsp_offset, 4212 0); 4213 4214 /* User interrupt */ 4215 *cs++ = MI_USER_INTERRUPT; 4216 *cs++ = MI_NOOP; 4217 4218 rq->tail = intel_ring_offset(rq, cs); 4219 4220 return cs; 4221 } 4222 4223 static struct intel_context * 4224 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 4225 unsigned long flags) 4226 { 4227 struct guc_virtual_engine *ve; 4228 struct intel_guc *guc; 4229 unsigned int n; 4230 int err; 4231 4232 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 4233 if (!ve) 4234 return ERR_PTR(-ENOMEM); 4235 4236 guc = &siblings[0]->gt->uc.guc; 4237 4238 ve->base.i915 = siblings[0]->i915; 4239 ve->base.gt = siblings[0]->gt; 4240 ve->base.uncore = siblings[0]->uncore; 4241 ve->base.id = -1; 4242 4243 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 4244 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4245 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 4246 ve->base.saturated = ALL_ENGINES; 4247 4248 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 4249 4250 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 4251 4252 ve->base.cops = &virtual_guc_context_ops; 4253 ve->base.request_alloc = guc_request_alloc; 4254 ve->base.bump_serial = virtual_guc_bump_serial; 4255 4256 ve->base.submit_request = guc_submit_request; 4257 4258 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 4259 4260 intel_context_init(&ve->context, &ve->base); 4261 4262 for (n = 0; n < count; n++) { 4263 struct intel_engine_cs *sibling = siblings[n]; 4264 4265 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 4266 if (sibling->mask & ve->base.mask) { 4267 DRM_DEBUG("duplicate %s entry in load balancer\n", 4268 sibling->name); 4269 err = -EINVAL; 4270 goto err_put; 4271 } 4272 4273 ve->base.mask |= sibling->mask; 4274 ve->base.logical_mask |= sibling->logical_mask; 4275 4276 if (n != 0 && ve->base.class != sibling->class) { 4277 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 4278 sibling->class, ve->base.class); 4279 err = -EINVAL; 4280 goto err_put; 4281 } else if (n == 0) { 4282 ve->base.class = sibling->class; 4283 ve->base.uabi_class = sibling->uabi_class; 4284 snprintf(ve->base.name, sizeof(ve->base.name), 4285 "v%dx%d", ve->base.class, count); 4286 ve->base.context_size = sibling->context_size; 4287 4288 ve->base.add_active_request = 4289 sibling->add_active_request; 4290 ve->base.remove_active_request = 4291 sibling->remove_active_request; 4292 ve->base.emit_bb_start = sibling->emit_bb_start; 4293 ve->base.emit_flush = sibling->emit_flush; 4294 ve->base.emit_init_breadcrumb = 4295 sibling->emit_init_breadcrumb; 4296 ve->base.emit_fini_breadcrumb = 4297 sibling->emit_fini_breadcrumb; 4298 ve->base.emit_fini_breadcrumb_dw = 4299 sibling->emit_fini_breadcrumb_dw; 4300 ve->base.breadcrumbs = 4301 intel_breadcrumbs_get(sibling->breadcrumbs); 4302 4303 ve->base.flags |= sibling->flags; 4304 4305 ve->base.props.timeslice_duration_ms = 4306 sibling->props.timeslice_duration_ms; 4307 ve->base.props.preempt_timeout_ms = 4308 sibling->props.preempt_timeout_ms; 4309 } 4310 } 4311 4312 return &ve->context; 4313 4314 err_put: 4315 intel_context_put(&ve->context); 4316 return ERR_PTR(err); 4317 } 4318 4319 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 4320 { 4321 struct intel_engine_cs *engine; 4322 intel_engine_mask_t tmp, mask = ve->mask; 4323 4324 for_each_engine_masked(engine, ve->gt, mask, tmp) 4325 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 4326 return true; 4327 4328 return false; 4329 } 4330 4331 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4332 #include "selftest_guc.c" 4333 #include "selftest_guc_multi_lrc.c" 4334 #endif 4335