1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #ifndef __INTEL_ENGINE_TYPES__ 8 #define __INTEL_ENGINE_TYPES__ 9 10 #include <linux/average.h> 11 #include <linux/hashtable.h> 12 #include <linux/irq_work.h> 13 #include <linux/kref.h> 14 #include <linux/list.h> 15 #include <linux/llist.h> 16 #include <linux/rbtree.h> 17 #include <linux/timer.h> 18 #include <linux/types.h> 19 #include <linux/workqueue.h> 20 21 #include "i915_gem.h" 22 #include "i915_pmu.h" 23 #include "i915_priolist_types.h" 24 #include "i915_selftest.h" 25 #include "intel_sseu.h" 26 #include "intel_timeline_types.h" 27 #include "intel_uncore.h" 28 #include "intel_wakeref.h" 29 #include "intel_workarounds_types.h" 30 31 /* Legacy HW Engine ID */ 32 33 #define RCS0_HW 0 34 #define VCS0_HW 1 35 #define BCS0_HW 2 36 #define VECS0_HW 3 37 #define VCS1_HW 4 38 #define VCS2_HW 6 39 #define VCS3_HW 7 40 #define VECS1_HW 12 41 42 /* Gen11+ HW Engine class + instance */ 43 #define RENDER_CLASS 0 44 #define VIDEO_DECODE_CLASS 1 45 #define VIDEO_ENHANCEMENT_CLASS 2 46 #define COPY_ENGINE_CLASS 3 47 #define OTHER_CLASS 4 48 #define MAX_ENGINE_CLASS 4 49 #define MAX_ENGINE_INSTANCE 3 50 51 #define I915_MAX_SLICES 3 52 #define I915_MAX_SUBSLICES 8 53 54 #define I915_CMD_HASH_ORDER 9 55 56 struct dma_fence; 57 struct drm_i915_gem_object; 58 struct drm_i915_reg_table; 59 struct i915_gem_context; 60 struct i915_request; 61 struct i915_sched_attr; 62 struct intel_gt; 63 struct intel_ring; 64 struct intel_uncore; 65 66 typedef u8 intel_engine_mask_t; 67 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 68 69 struct intel_hw_status_page { 70 struct i915_vma *vma; 71 u32 *addr; 72 }; 73 74 struct intel_instdone { 75 u32 instdone; 76 /* The following exist only in the RCS engine */ 77 u32 slice_common; 78 u32 slice_common_extra[2]; 79 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 80 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 81 }; 82 83 /* 84 * we use a single page to load ctx workarounds so all of these 85 * values are referred in terms of dwords 86 * 87 * struct i915_wa_ctx_bb: 88 * offset: specifies batch starting position, also helpful in case 89 * if we want to have multiple batches at different offsets based on 90 * some criteria. It is not a requirement at the moment but provides 91 * an option for future use. 92 * size: size of the batch in DWORDS 93 */ 94 struct i915_ctx_workarounds { 95 struct i915_wa_ctx_bb { 96 u32 offset; 97 u32 size; 98 } indirect_ctx, per_ctx; 99 struct i915_vma *vma; 100 }; 101 102 #define I915_MAX_VCS 4 103 #define I915_MAX_VECS 2 104 105 /* 106 * Engine IDs definitions. 107 * Keep instances of the same type engine together. 108 */ 109 enum intel_engine_id { 110 RCS0 = 0, 111 BCS0, 112 VCS0, 113 VCS1, 114 VCS2, 115 VCS3, 116 #define _VCS(n) (VCS0 + (n)) 117 VECS0, 118 VECS1, 119 #define _VECS(n) (VECS0 + (n)) 120 I915_NUM_ENGINES 121 #define INVALID_ENGINE ((enum intel_engine_id)-1) 122 }; 123 124 /* A simple estimator for the round-trip latency of an engine */ 125 DECLARE_EWMA(_engine_latency, 6, 4) 126 127 struct st_preempt_hang { 128 struct completion completion; 129 unsigned int count; 130 }; 131 132 /** 133 * struct intel_engine_execlists - execlist submission queue and port state 134 * 135 * The struct intel_engine_execlists represents the combined logical state of 136 * driver and the hardware state for execlist mode of submission. 137 */ 138 struct intel_engine_execlists { 139 /** 140 * @tasklet: softirq tasklet for bottom handler 141 */ 142 struct tasklet_struct tasklet; 143 144 /** 145 * @timer: kick the current context if its timeslice expires 146 */ 147 struct timer_list timer; 148 149 /** 150 * @preempt: reset the current context if it fails to give way 151 */ 152 struct timer_list preempt; 153 154 /** 155 * @default_priolist: priority list for I915_PRIORITY_NORMAL 156 */ 157 struct i915_priolist default_priolist; 158 159 /** 160 * @ccid: identifier for contexts submitted to this engine 161 */ 162 u32 ccid; 163 164 /** 165 * @yield: CCID at the time of the last semaphore-wait interrupt. 166 * 167 * Instead of leaving a semaphore busy-spinning on an engine, we would 168 * like to switch to another ready context, i.e. yielding the semaphore 169 * timeslice. 170 */ 171 u32 yield; 172 173 /** 174 * @error_interrupt: CS Master EIR 175 * 176 * The CS generates an interrupt when it detects an error. We capture 177 * the first error interrupt, record the EIR and schedule the tasklet. 178 * In the tasklet, we process the pending CS events to ensure we have 179 * the guilty request, and then reset the engine. 180 * 181 * Low 16b are used by HW, with the upper 16b used as the enabling mask. 182 * Reserve the upper 16b for tracking internal errors. 183 */ 184 u32 error_interrupt; 185 #define ERROR_CSB BIT(31) 186 187 /** 188 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset 189 */ 190 u32 reset_ccid; 191 192 /** 193 * @no_priolist: priority lists disabled 194 */ 195 bool no_priolist; 196 197 /** 198 * @submit_reg: gen-specific execlist submission register 199 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 200 * the ExecList Submission Queue Contents register array for Gen11+ 201 */ 202 u32 __iomem *submit_reg; 203 204 /** 205 * @ctrl_reg: the enhanced execlists control register, used to load the 206 * submit queue on the HW and to request preemptions to idle 207 */ 208 u32 __iomem *ctrl_reg; 209 210 #define EXECLIST_MAX_PORTS 2 211 /** 212 * @active: the currently known context executing on HW 213 */ 214 struct i915_request * const *active; 215 /** 216 * @inflight: the set of contexts submitted and acknowleged by HW 217 * 218 * The set of inflight contexts is managed by reading CS events 219 * from the HW. On a context-switch event (not preemption), we 220 * know the HW has transitioned from port0 to port1, and we 221 * advance our inflight/active tracking accordingly. 222 */ 223 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 224 /** 225 * @pending: the next set of contexts submitted to ELSP 226 * 227 * We store the array of contexts that we submit to HW (via ELSP) and 228 * promote them to the inflight array once HW has signaled the 229 * preemption or idle-to-active event. 230 */ 231 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 232 233 /** 234 * @port_mask: number of execlist ports - 1 235 */ 236 unsigned int port_mask; 237 238 /** 239 * @switch_priority_hint: Second context priority. 240 * 241 * We submit multiple contexts to the HW simultaneously and would 242 * like to occasionally switch between them to emulate timeslicing. 243 * To know when timeslicing is suitable, we track the priority of 244 * the context submitted second. 245 */ 246 int switch_priority_hint; 247 248 /** 249 * @queue_priority_hint: Highest pending priority. 250 * 251 * When we add requests into the queue, or adjust the priority of 252 * executing requests, we compute the maximum priority of those 253 * pending requests. We can then use this value to determine if 254 * we need to preempt the executing requests to service the queue. 255 * However, since the we may have recorded the priority of an inflight 256 * request we wanted to preempt but since completed, at the time of 257 * dequeuing the priority hint may no longer may match the highest 258 * available request priority. 259 */ 260 int queue_priority_hint; 261 262 /** 263 * @queue: queue of requests, in priority lists 264 */ 265 struct rb_root_cached queue; 266 struct rb_root_cached virtual; 267 268 /** 269 * @csb_write: control register for Context Switch buffer 270 * 271 * Note this register may be either mmio or HWSP shadow. 272 */ 273 u32 *csb_write; 274 275 /** 276 * @csb_status: status array for Context Switch buffer 277 * 278 * Note these register may be either mmio or HWSP shadow. 279 */ 280 u32 *csb_status; 281 282 /** 283 * @csb_size: context status buffer FIFO size 284 */ 285 u8 csb_size; 286 287 /** 288 * @csb_head: context status buffer head 289 */ 290 u8 csb_head; 291 292 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 293 }; 294 295 #define INTEL_ENGINE_CS_MAX_NAME 8 296 297 struct intel_engine_cs { 298 struct drm_i915_private *i915; 299 struct intel_gt *gt; 300 struct intel_uncore *uncore; 301 char name[INTEL_ENGINE_CS_MAX_NAME]; 302 303 enum intel_engine_id id; 304 enum intel_engine_id legacy_idx; 305 306 unsigned int hw_id; 307 unsigned int guc_id; 308 309 intel_engine_mask_t mask; 310 311 u8 class; 312 u8 instance; 313 314 u16 uabi_class; 315 u16 uabi_instance; 316 317 u32 uabi_capabilities; 318 u32 context_size; 319 u32 mmio_base; 320 321 /* 322 * Some w/a require forcewake to be held (which prevents RC6) while 323 * a particular engine is active. If so, we set fw_domain to which 324 * domains need to be held for the duration of request activity, 325 * and 0 if none. We try to limit the duration of the hold as much 326 * as possible. 327 */ 328 enum forcewake_domains fw_domain; 329 atomic_t fw_active; 330 331 unsigned long context_tag; 332 333 struct rb_node uabi_node; 334 335 struct intel_sseu sseu; 336 337 struct { 338 spinlock_t lock; 339 struct list_head requests; 340 struct list_head hold; /* ready requests, but on hold */ 341 } active; 342 343 /* keep a request in reserve for a [pm] barrier under oom */ 344 struct i915_request *request_pool; 345 346 struct llist_head barrier_tasks; 347 348 struct intel_context *kernel_context; /* pinned */ 349 350 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 351 352 struct { 353 struct delayed_work work; 354 struct i915_request *systole; 355 unsigned long blocked; 356 } heartbeat; 357 358 unsigned long serial; 359 360 unsigned long wakeref_serial; 361 struct intel_wakeref wakeref; 362 struct file *default_state; 363 364 struct { 365 struct intel_ring *ring; 366 struct intel_timeline *timeline; 367 } legacy; 368 369 /* 370 * We track the average duration of the idle pulse on parking the 371 * engine to keep an estimate of the how the fast the engine is 372 * under ideal conditions. 373 */ 374 struct ewma__engine_latency latency; 375 376 /* Rather than have every client wait upon all user interrupts, 377 * with the herd waking after every interrupt and each doing the 378 * heavyweight seqno dance, we delegate the task (of being the 379 * bottom-half of the user interrupt) to the first client. After 380 * every interrupt, we wake up one client, who does the heavyweight 381 * coherent seqno read and either goes back to sleep (if incomplete), 382 * or wakes up all the completed clients in parallel, before then 383 * transferring the bottom-half status to the next client in the queue. 384 * 385 * Compared to walking the entire list of waiters in a single dedicated 386 * bottom-half, we reduce the latency of the first waiter by avoiding 387 * a context switch, but incur additional coherent seqno reads when 388 * following the chain of request breadcrumbs. Since it is most likely 389 * that we have a single client waiting on each seqno, then reducing 390 * the overhead of waking that client is much preferred. 391 */ 392 struct intel_breadcrumbs { 393 spinlock_t irq_lock; 394 struct list_head signalers; 395 396 struct list_head signaled_requests; 397 398 struct irq_work irq_work; /* for use from inside irq_lock */ 399 400 unsigned int irq_enabled; 401 402 bool irq_armed; 403 } breadcrumbs; 404 405 struct intel_engine_pmu { 406 /** 407 * @enable: Bitmask of enable sample events on this engine. 408 * 409 * Bits correspond to sample event types, for instance 410 * I915_SAMPLE_QUEUED is bit 0 etc. 411 */ 412 u32 enable; 413 /** 414 * @enable_count: Reference count for the enabled samplers. 415 * 416 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 417 */ 418 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 419 /** 420 * @sample: Counter values for sampling events. 421 * 422 * Our internal timer stores the current counters in this field. 423 * 424 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 425 */ 426 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 427 } pmu; 428 429 struct intel_hw_status_page status_page; 430 struct i915_ctx_workarounds wa_ctx; 431 struct i915_wa_list ctx_wa_list; 432 struct i915_wa_list wa_list; 433 struct i915_wa_list whitelist; 434 435 u32 irq_keep_mask; /* always keep these interrupts */ 436 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 437 void (*irq_enable)(struct intel_engine_cs *engine); 438 void (*irq_disable)(struct intel_engine_cs *engine); 439 440 void (*sanitize)(struct intel_engine_cs *engine); 441 int (*resume)(struct intel_engine_cs *engine); 442 443 struct { 444 void (*prepare)(struct intel_engine_cs *engine); 445 446 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 447 void (*cancel)(struct intel_engine_cs *engine); 448 449 void (*finish)(struct intel_engine_cs *engine); 450 } reset; 451 452 void (*park)(struct intel_engine_cs *engine); 453 void (*unpark)(struct intel_engine_cs *engine); 454 455 void (*set_default_submission)(struct intel_engine_cs *engine); 456 457 const struct intel_context_ops *cops; 458 459 int (*request_alloc)(struct i915_request *rq); 460 461 int (*emit_flush)(struct i915_request *request, u32 mode); 462 #define EMIT_INVALIDATE BIT(0) 463 #define EMIT_FLUSH BIT(1) 464 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 465 int (*emit_bb_start)(struct i915_request *rq, 466 u64 offset, u32 length, 467 unsigned int dispatch_flags); 468 #define I915_DISPATCH_SECURE BIT(0) 469 #define I915_DISPATCH_PINNED BIT(1) 470 int (*emit_init_breadcrumb)(struct i915_request *rq); 471 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 472 u32 *cs); 473 unsigned int emit_fini_breadcrumb_dw; 474 475 /* Pass the request to the hardware queue (e.g. directly into 476 * the legacy ringbuffer or to the end of an execlist). 477 * 478 * This is called from an atomic context with irqs disabled; must 479 * be irq safe. 480 */ 481 void (*submit_request)(struct i915_request *rq); 482 483 /* 484 * Called on signaling of a SUBMIT_FENCE, passing along the signaling 485 * request down to the bonded pairs. 486 */ 487 void (*bond_execute)(struct i915_request *rq, 488 struct dma_fence *signal); 489 490 /* 491 * Call when the priority on a request has changed and it and its 492 * dependencies may need rescheduling. Note the request itself may 493 * not be ready to run! 494 */ 495 void (*schedule)(struct i915_request *request, 496 const struct i915_sched_attr *attr); 497 498 void (*release)(struct intel_engine_cs *engine); 499 500 struct intel_engine_execlists execlists; 501 502 /* 503 * Keep track of completed timelines on this engine for early 504 * retirement with the goal of quickly enabling powersaving as 505 * soon as the engine is idle. 506 */ 507 struct intel_timeline *retire; 508 struct work_struct retire_work; 509 510 /* status_notifier: list of callbacks for context-switch changes */ 511 struct atomic_notifier_head context_status_notifier; 512 513 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 514 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 515 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 516 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 517 #define I915_ENGINE_HAS_TIMESLICES BIT(4) 518 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) 519 #define I915_ENGINE_IS_VIRTUAL BIT(6) 520 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) 521 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) 522 unsigned int flags; 523 524 /* 525 * Table of commands the command parser needs to know about 526 * for this engine. 527 */ 528 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 529 530 /* 531 * Table of registers allowed in commands that read/write registers. 532 */ 533 const struct drm_i915_reg_table *reg_tables; 534 int reg_table_count; 535 536 /* 537 * Returns the bitmask for the length field of the specified command. 538 * Return 0 for an unrecognized/invalid command. 539 * 540 * If the command parser finds an entry for a command in the engine's 541 * cmd_tables, it gets the command's length based on the table entry. 542 * If not, it calls this function to determine the per-engine length 543 * field encoding for the command (i.e. different opcode ranges use 544 * certain bits to encode the command length in the header). 545 */ 546 u32 (*get_cmd_length_mask)(u32 cmd_header); 547 548 struct { 549 /** 550 * @active: Number of contexts currently scheduled in. 551 */ 552 atomic_t active; 553 554 /** 555 * @lock: Lock protecting the below fields. 556 */ 557 seqlock_t lock; 558 559 /** 560 * @total: Total time this engine was busy. 561 * 562 * Accumulated time not counting the most recent block in cases 563 * where engine is currently busy (active > 0). 564 */ 565 ktime_t total; 566 567 /** 568 * @start: Timestamp of the last idle to active transition. 569 * 570 * Idle is defined as active == 0, active is active > 0. 571 */ 572 ktime_t start; 573 574 /** 575 * @rps: Utilisation at last RPS sampling. 576 */ 577 ktime_t rps; 578 } stats; 579 580 struct { 581 unsigned long heartbeat_interval_ms; 582 unsigned long max_busywait_duration_ns; 583 unsigned long preempt_timeout_ms; 584 unsigned long stop_timeout_ms; 585 unsigned long timeslice_duration_ms; 586 } props, defaults; 587 }; 588 589 static inline bool 590 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 591 { 592 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 593 } 594 595 static inline bool 596 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 597 { 598 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 599 } 600 601 static inline bool 602 intel_engine_supports_stats(const struct intel_engine_cs *engine) 603 { 604 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 605 } 606 607 static inline bool 608 intel_engine_has_preemption(const struct intel_engine_cs *engine) 609 { 610 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 611 } 612 613 static inline bool 614 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 615 { 616 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 617 } 618 619 static inline bool 620 intel_engine_has_timeslices(const struct intel_engine_cs *engine) 621 { 622 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 623 return false; 624 625 return engine->flags & I915_ENGINE_HAS_TIMESLICES; 626 } 627 628 static inline bool 629 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) 630 { 631 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; 632 } 633 634 static inline bool 635 intel_engine_is_virtual(const struct intel_engine_cs *engine) 636 { 637 return engine->flags & I915_ENGINE_IS_VIRTUAL; 638 } 639 640 static inline bool 641 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 642 { 643 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 644 } 645 646 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 647 ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 648 649 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 650 (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ 651 intel_sseu_has_subslice(sseu__, 0, subslice__)) 652 653 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 654 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 655 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 656 (slice_) += ((subslice_) == 0)) \ 657 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 658 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 659 subslice_))) 660 #endif /* __INTEL_ENGINE_TYPES_H__ */ 661