1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #ifndef __INTEL_ENGINE_TYPES__ 8 #define __INTEL_ENGINE_TYPES__ 9 10 #include <linux/average.h> 11 #include <linux/hashtable.h> 12 #include <linux/irq_work.h> 13 #include <linux/kref.h> 14 #include <linux/list.h> 15 #include <linux/llist.h> 16 #include <linux/rbtree.h> 17 #include <linux/timer.h> 18 #include <linux/types.h> 19 #include <linux/workqueue.h> 20 21 #include "i915_gem.h" 22 #include "i915_pmu.h" 23 #include "i915_priolist_types.h" 24 #include "i915_selftest.h" 25 #include "intel_breadcrumbs_types.h" 26 #include "intel_sseu.h" 27 #include "intel_timeline_types.h" 28 #include "intel_uncore.h" 29 #include "intel_wakeref.h" 30 #include "intel_workarounds_types.h" 31 32 /* Legacy HW Engine ID */ 33 34 #define RCS0_HW 0 35 #define VCS0_HW 1 36 #define BCS0_HW 2 37 #define VECS0_HW 3 38 #define VCS1_HW 4 39 #define VCS2_HW 6 40 #define VCS3_HW 7 41 #define VECS1_HW 12 42 43 /* Gen11+ HW Engine class + instance */ 44 #define RENDER_CLASS 0 45 #define VIDEO_DECODE_CLASS 1 46 #define VIDEO_ENHANCEMENT_CLASS 2 47 #define COPY_ENGINE_CLASS 3 48 #define OTHER_CLASS 4 49 #define MAX_ENGINE_CLASS 4 50 #define MAX_ENGINE_INSTANCE 3 51 52 #define I915_MAX_SLICES 3 53 #define I915_MAX_SUBSLICES 8 54 55 #define I915_CMD_HASH_ORDER 9 56 57 struct dma_fence; 58 struct drm_i915_gem_object; 59 struct drm_i915_reg_table; 60 struct i915_gem_context; 61 struct i915_request; 62 struct i915_sched_attr; 63 struct intel_gt; 64 struct intel_ring; 65 struct intel_uncore; 66 67 typedef u8 intel_engine_mask_t; 68 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 69 70 struct intel_hw_status_page { 71 struct i915_vma *vma; 72 u32 *addr; 73 }; 74 75 struct intel_instdone { 76 u32 instdone; 77 /* The following exist only in the RCS engine */ 78 u32 slice_common; 79 u32 slice_common_extra[2]; 80 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 81 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 82 }; 83 84 /* 85 * we use a single page to load ctx workarounds so all of these 86 * values are referred in terms of dwords 87 * 88 * struct i915_wa_ctx_bb: 89 * offset: specifies batch starting position, also helpful in case 90 * if we want to have multiple batches at different offsets based on 91 * some criteria. It is not a requirement at the moment but provides 92 * an option for future use. 93 * size: size of the batch in DWORDS 94 */ 95 struct i915_ctx_workarounds { 96 struct i915_wa_ctx_bb { 97 u32 offset; 98 u32 size; 99 } indirect_ctx, per_ctx; 100 struct i915_vma *vma; 101 }; 102 103 #define I915_MAX_VCS 4 104 #define I915_MAX_VECS 2 105 106 /* 107 * Engine IDs definitions. 108 * Keep instances of the same type engine together. 109 */ 110 enum intel_engine_id { 111 RCS0 = 0, 112 BCS0, 113 VCS0, 114 VCS1, 115 VCS2, 116 VCS3, 117 #define _VCS(n) (VCS0 + (n)) 118 VECS0, 119 VECS1, 120 #define _VECS(n) (VECS0 + (n)) 121 I915_NUM_ENGINES 122 #define INVALID_ENGINE ((enum intel_engine_id)-1) 123 }; 124 125 /* A simple estimator for the round-trip latency of an engine */ 126 DECLARE_EWMA(_engine_latency, 6, 4) 127 128 struct st_preempt_hang { 129 struct completion completion; 130 unsigned int count; 131 }; 132 133 /** 134 * struct intel_engine_execlists - execlist submission queue and port state 135 * 136 * The struct intel_engine_execlists represents the combined logical state of 137 * driver and the hardware state for execlist mode of submission. 138 */ 139 struct intel_engine_execlists { 140 /** 141 * @tasklet: softirq tasklet for bottom handler 142 */ 143 struct tasklet_struct tasklet; 144 145 /** 146 * @timer: kick the current context if its timeslice expires 147 */ 148 struct timer_list timer; 149 150 /** 151 * @preempt: reset the current context if it fails to give way 152 */ 153 struct timer_list preempt; 154 155 /** 156 * @default_priolist: priority list for I915_PRIORITY_NORMAL 157 */ 158 struct i915_priolist default_priolist; 159 160 /** 161 * @ccid: identifier for contexts submitted to this engine 162 */ 163 u32 ccid; 164 165 /** 166 * @yield: CCID at the time of the last semaphore-wait interrupt. 167 * 168 * Instead of leaving a semaphore busy-spinning on an engine, we would 169 * like to switch to another ready context, i.e. yielding the semaphore 170 * timeslice. 171 */ 172 u32 yield; 173 174 /** 175 * @error_interrupt: CS Master EIR 176 * 177 * The CS generates an interrupt when it detects an error. We capture 178 * the first error interrupt, record the EIR and schedule the tasklet. 179 * In the tasklet, we process the pending CS events to ensure we have 180 * the guilty request, and then reset the engine. 181 * 182 * Low 16b are used by HW, with the upper 16b used as the enabling mask. 183 * Reserve the upper 16b for tracking internal errors. 184 */ 185 u32 error_interrupt; 186 #define ERROR_CSB BIT(31) 187 188 /** 189 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset 190 */ 191 u32 reset_ccid; 192 193 /** 194 * @no_priolist: priority lists disabled 195 */ 196 bool no_priolist; 197 198 /** 199 * @submit_reg: gen-specific execlist submission register 200 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 201 * the ExecList Submission Queue Contents register array for Gen11+ 202 */ 203 u32 __iomem *submit_reg; 204 205 /** 206 * @ctrl_reg: the enhanced execlists control register, used to load the 207 * submit queue on the HW and to request preemptions to idle 208 */ 209 u32 __iomem *ctrl_reg; 210 211 #define EXECLIST_MAX_PORTS 2 212 /** 213 * @active: the currently known context executing on HW 214 */ 215 struct i915_request * const *active; 216 /** 217 * @inflight: the set of contexts submitted and acknowleged by HW 218 * 219 * The set of inflight contexts is managed by reading CS events 220 * from the HW. On a context-switch event (not preemption), we 221 * know the HW has transitioned from port0 to port1, and we 222 * advance our inflight/active tracking accordingly. 223 */ 224 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 225 /** 226 * @pending: the next set of contexts submitted to ELSP 227 * 228 * We store the array of contexts that we submit to HW (via ELSP) and 229 * promote them to the inflight array once HW has signaled the 230 * preemption or idle-to-active event. 231 */ 232 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 233 234 /** 235 * @port_mask: number of execlist ports - 1 236 */ 237 unsigned int port_mask; 238 239 /** 240 * @switch_priority_hint: Second context priority. 241 * 242 * We submit multiple contexts to the HW simultaneously and would 243 * like to occasionally switch between them to emulate timeslicing. 244 * To know when timeslicing is suitable, we track the priority of 245 * the context submitted second. 246 */ 247 int switch_priority_hint; 248 249 /** 250 * @queue_priority_hint: Highest pending priority. 251 * 252 * When we add requests into the queue, or adjust the priority of 253 * executing requests, we compute the maximum priority of those 254 * pending requests. We can then use this value to determine if 255 * we need to preempt the executing requests to service the queue. 256 * However, since the we may have recorded the priority of an inflight 257 * request we wanted to preempt but since completed, at the time of 258 * dequeuing the priority hint may no longer may match the highest 259 * available request priority. 260 */ 261 int queue_priority_hint; 262 263 /** 264 * @queue: queue of requests, in priority lists 265 */ 266 struct rb_root_cached queue; 267 struct rb_root_cached virtual; 268 269 /** 270 * @csb_write: control register for Context Switch buffer 271 * 272 * Note this register may be either mmio or HWSP shadow. 273 */ 274 u32 *csb_write; 275 276 /** 277 * @csb_status: status array for Context Switch buffer 278 * 279 * Note these register may be either mmio or HWSP shadow. 280 */ 281 u64 *csb_status; 282 283 /** 284 * @csb_size: context status buffer FIFO size 285 */ 286 u8 csb_size; 287 288 /** 289 * @csb_head: context status buffer head 290 */ 291 u8 csb_head; 292 293 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 294 }; 295 296 #define INTEL_ENGINE_CS_MAX_NAME 8 297 298 struct intel_engine_cs { 299 struct drm_i915_private *i915; 300 struct intel_gt *gt; 301 struct intel_uncore *uncore; 302 char name[INTEL_ENGINE_CS_MAX_NAME]; 303 304 enum intel_engine_id id; 305 enum intel_engine_id legacy_idx; 306 307 unsigned int hw_id; 308 unsigned int guc_id; 309 310 intel_engine_mask_t mask; 311 312 u8 class; 313 u8 instance; 314 315 u16 uabi_class; 316 u16 uabi_instance; 317 318 u32 uabi_capabilities; 319 u32 context_size; 320 u32 mmio_base; 321 322 /* 323 * Some w/a require forcewake to be held (which prevents RC6) while 324 * a particular engine is active. If so, we set fw_domain to which 325 * domains need to be held for the duration of request activity, 326 * and 0 if none. We try to limit the duration of the hold as much 327 * as possible. 328 */ 329 enum forcewake_domains fw_domain; 330 atomic_t fw_active; 331 332 unsigned long context_tag; 333 334 struct rb_node uabi_node; 335 336 struct intel_sseu sseu; 337 338 struct { 339 spinlock_t lock; 340 struct list_head requests; 341 struct list_head hold; /* ready requests, but on hold */ 342 } active; 343 344 /* keep a request in reserve for a [pm] barrier under oom */ 345 struct i915_request *request_pool; 346 347 struct llist_head barrier_tasks; 348 349 struct intel_context *kernel_context; /* pinned */ 350 351 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 352 353 struct { 354 struct delayed_work work; 355 struct i915_request *systole; 356 unsigned long blocked; 357 } heartbeat; 358 359 unsigned long serial; 360 361 unsigned long wakeref_serial; 362 struct intel_wakeref wakeref; 363 struct file *default_state; 364 365 struct { 366 struct intel_ring *ring; 367 struct intel_timeline *timeline; 368 } legacy; 369 370 /* 371 * We track the average duration of the idle pulse on parking the 372 * engine to keep an estimate of the how the fast the engine is 373 * under ideal conditions. 374 */ 375 struct ewma__engine_latency latency; 376 377 /* Keep track of all the seqno used, a trail of breadcrumbs */ 378 struct intel_breadcrumbs *breadcrumbs; 379 380 struct intel_engine_pmu { 381 /** 382 * @enable: Bitmask of enable sample events on this engine. 383 * 384 * Bits correspond to sample event types, for instance 385 * I915_SAMPLE_QUEUED is bit 0 etc. 386 */ 387 u32 enable; 388 /** 389 * @enable_count: Reference count for the enabled samplers. 390 * 391 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 392 */ 393 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 394 /** 395 * @sample: Counter values for sampling events. 396 * 397 * Our internal timer stores the current counters in this field. 398 * 399 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 400 */ 401 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 402 } pmu; 403 404 struct intel_hw_status_page status_page; 405 struct i915_ctx_workarounds wa_ctx; 406 struct i915_wa_list ctx_wa_list; 407 struct i915_wa_list wa_list; 408 struct i915_wa_list whitelist; 409 410 u32 irq_keep_mask; /* always keep these interrupts */ 411 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 412 void (*irq_enable)(struct intel_engine_cs *engine); 413 void (*irq_disable)(struct intel_engine_cs *engine); 414 415 void (*sanitize)(struct intel_engine_cs *engine); 416 int (*resume)(struct intel_engine_cs *engine); 417 418 struct { 419 void (*prepare)(struct intel_engine_cs *engine); 420 421 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 422 void (*cancel)(struct intel_engine_cs *engine); 423 424 void (*finish)(struct intel_engine_cs *engine); 425 } reset; 426 427 void (*park)(struct intel_engine_cs *engine); 428 void (*unpark)(struct intel_engine_cs *engine); 429 430 void (*set_default_submission)(struct intel_engine_cs *engine); 431 432 const struct intel_context_ops *cops; 433 434 int (*request_alloc)(struct i915_request *rq); 435 436 int (*emit_flush)(struct i915_request *request, u32 mode); 437 #define EMIT_INVALIDATE BIT(0) 438 #define EMIT_FLUSH BIT(1) 439 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 440 int (*emit_bb_start)(struct i915_request *rq, 441 u64 offset, u32 length, 442 unsigned int dispatch_flags); 443 #define I915_DISPATCH_SECURE BIT(0) 444 #define I915_DISPATCH_PINNED BIT(1) 445 int (*emit_init_breadcrumb)(struct i915_request *rq); 446 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 447 u32 *cs); 448 unsigned int emit_fini_breadcrumb_dw; 449 450 /* Pass the request to the hardware queue (e.g. directly into 451 * the legacy ringbuffer or to the end of an execlist). 452 * 453 * This is called from an atomic context with irqs disabled; must 454 * be irq safe. 455 */ 456 void (*submit_request)(struct i915_request *rq); 457 458 /* 459 * Called on signaling of a SUBMIT_FENCE, passing along the signaling 460 * request down to the bonded pairs. 461 */ 462 void (*bond_execute)(struct i915_request *rq, 463 struct dma_fence *signal); 464 465 /* 466 * Call when the priority on a request has changed and it and its 467 * dependencies may need rescheduling. Note the request itself may 468 * not be ready to run! 469 */ 470 void (*schedule)(struct i915_request *request, 471 const struct i915_sched_attr *attr); 472 473 void (*release)(struct intel_engine_cs *engine); 474 475 struct intel_engine_execlists execlists; 476 477 /* 478 * Keep track of completed timelines on this engine for early 479 * retirement with the goal of quickly enabling powersaving as 480 * soon as the engine is idle. 481 */ 482 struct intel_timeline *retire; 483 struct work_struct retire_work; 484 485 /* status_notifier: list of callbacks for context-switch changes */ 486 struct atomic_notifier_head context_status_notifier; 487 488 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 489 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 490 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 491 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 492 #define I915_ENGINE_HAS_TIMESLICES BIT(4) 493 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) 494 #define I915_ENGINE_IS_VIRTUAL BIT(6) 495 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) 496 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) 497 unsigned int flags; 498 499 /* 500 * Table of commands the command parser needs to know about 501 * for this engine. 502 */ 503 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 504 505 /* 506 * Table of registers allowed in commands that read/write registers. 507 */ 508 const struct drm_i915_reg_table *reg_tables; 509 int reg_table_count; 510 511 /* 512 * Returns the bitmask for the length field of the specified command. 513 * Return 0 for an unrecognized/invalid command. 514 * 515 * If the command parser finds an entry for a command in the engine's 516 * cmd_tables, it gets the command's length based on the table entry. 517 * If not, it calls this function to determine the per-engine length 518 * field encoding for the command (i.e. different opcode ranges use 519 * certain bits to encode the command length in the header). 520 */ 521 u32 (*get_cmd_length_mask)(u32 cmd_header); 522 523 struct { 524 /** 525 * @active: Number of contexts currently scheduled in. 526 */ 527 atomic_t active; 528 529 /** 530 * @lock: Lock protecting the below fields. 531 */ 532 seqlock_t lock; 533 534 /** 535 * @total: Total time this engine was busy. 536 * 537 * Accumulated time not counting the most recent block in cases 538 * where engine is currently busy (active > 0). 539 */ 540 ktime_t total; 541 542 /** 543 * @start: Timestamp of the last idle to active transition. 544 * 545 * Idle is defined as active == 0, active is active > 0. 546 */ 547 ktime_t start; 548 549 /** 550 * @rps: Utilisation at last RPS sampling. 551 */ 552 ktime_t rps; 553 } stats; 554 555 struct { 556 unsigned long heartbeat_interval_ms; 557 unsigned long max_busywait_duration_ns; 558 unsigned long preempt_timeout_ms; 559 unsigned long stop_timeout_ms; 560 unsigned long timeslice_duration_ms; 561 } props, defaults; 562 }; 563 564 static inline bool 565 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 566 { 567 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 568 } 569 570 static inline bool 571 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 572 { 573 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 574 } 575 576 static inline bool 577 intel_engine_supports_stats(const struct intel_engine_cs *engine) 578 { 579 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 580 } 581 582 static inline bool 583 intel_engine_has_preemption(const struct intel_engine_cs *engine) 584 { 585 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 586 } 587 588 static inline bool 589 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 590 { 591 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 592 } 593 594 static inline bool 595 intel_engine_has_timeslices(const struct intel_engine_cs *engine) 596 { 597 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 598 return false; 599 600 return engine->flags & I915_ENGINE_HAS_TIMESLICES; 601 } 602 603 static inline bool 604 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) 605 { 606 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; 607 } 608 609 static inline bool 610 intel_engine_is_virtual(const struct intel_engine_cs *engine) 611 { 612 return engine->flags & I915_ENGINE_IS_VIRTUAL; 613 } 614 615 static inline bool 616 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 617 { 618 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 619 } 620 621 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 622 ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 623 624 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 625 (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ 626 intel_sseu_has_subslice(sseu__, 0, subslice__)) 627 628 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 629 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 630 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 631 (slice_) += ((subslice_) == 0)) \ 632 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 633 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 634 subslice_))) 635 #endif /* __INTEL_ENGINE_TYPES_H__ */ 636