1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef __INTEL_ENGINE_TYPES__ 7 #define __INTEL_ENGINE_TYPES__ 8 9 #include <linux/average.h> 10 #include <linux/hashtable.h> 11 #include <linux/irq_work.h> 12 #include <linux/kref.h> 13 #include <linux/list.h> 14 #include <linux/llist.h> 15 #include <linux/rbtree.h> 16 #include <linux/timer.h> 17 #include <linux/types.h> 18 #include <linux/workqueue.h> 19 20 #include "i915_gem.h" 21 #include "i915_pmu.h" 22 #include "i915_priolist_types.h" 23 #include "i915_selftest.h" 24 #include "intel_breadcrumbs_types.h" 25 #include "intel_sseu.h" 26 #include "intel_timeline_types.h" 27 #include "intel_uncore.h" 28 #include "intel_wakeref.h" 29 #include "intel_workarounds_types.h" 30 31 /* Legacy HW Engine ID */ 32 33 #define RCS0_HW 0 34 #define VCS0_HW 1 35 #define BCS0_HW 2 36 #define VECS0_HW 3 37 #define VCS1_HW 4 38 #define VCS2_HW 6 39 #define VCS3_HW 7 40 #define VECS1_HW 12 41 42 /* Gen11+ HW Engine class + instance */ 43 #define RENDER_CLASS 0 44 #define VIDEO_DECODE_CLASS 1 45 #define VIDEO_ENHANCEMENT_CLASS 2 46 #define COPY_ENGINE_CLASS 3 47 #define OTHER_CLASS 4 48 #define MAX_ENGINE_CLASS 4 49 #define MAX_ENGINE_INSTANCE 3 50 51 #define I915_MAX_SLICES 3 52 #define I915_MAX_SUBSLICES 8 53 54 #define I915_CMD_HASH_ORDER 9 55 56 struct dma_fence; 57 struct drm_i915_gem_object; 58 struct drm_i915_reg_table; 59 struct i915_gem_context; 60 struct i915_request; 61 struct i915_sched_attr; 62 struct intel_gt; 63 struct intel_ring; 64 struct intel_uncore; 65 66 typedef u8 intel_engine_mask_t; 67 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 68 69 struct intel_hw_status_page { 70 struct list_head timelines; 71 struct i915_vma *vma; 72 u32 *addr; 73 }; 74 75 struct intel_instdone { 76 u32 instdone; 77 /* The following exist only in the RCS engine */ 78 u32 slice_common; 79 u32 slice_common_extra[2]; 80 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 81 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 82 }; 83 84 /* 85 * we use a single page to load ctx workarounds so all of these 86 * values are referred in terms of dwords 87 * 88 * struct i915_wa_ctx_bb: 89 * offset: specifies batch starting position, also helpful in case 90 * if we want to have multiple batches at different offsets based on 91 * some criteria. It is not a requirement at the moment but provides 92 * an option for future use. 93 * size: size of the batch in DWORDS 94 */ 95 struct i915_ctx_workarounds { 96 struct i915_wa_ctx_bb { 97 u32 offset; 98 u32 size; 99 } indirect_ctx, per_ctx; 100 struct i915_vma *vma; 101 }; 102 103 #define I915_MAX_VCS 4 104 #define I915_MAX_VECS 2 105 106 /* 107 * Engine IDs definitions. 108 * Keep instances of the same type engine together. 109 */ 110 enum intel_engine_id { 111 RCS0 = 0, 112 BCS0, 113 VCS0, 114 VCS1, 115 VCS2, 116 VCS3, 117 #define _VCS(n) (VCS0 + (n)) 118 VECS0, 119 VECS1, 120 #define _VECS(n) (VECS0 + (n)) 121 I915_NUM_ENGINES 122 #define INVALID_ENGINE ((enum intel_engine_id)-1) 123 }; 124 125 /* A simple estimator for the round-trip latency of an engine */ 126 DECLARE_EWMA(_engine_latency, 6, 4) 127 128 struct st_preempt_hang { 129 struct completion completion; 130 unsigned int count; 131 }; 132 133 /** 134 * struct intel_engine_execlists - execlist submission queue and port state 135 * 136 * The struct intel_engine_execlists represents the combined logical state of 137 * driver and the hardware state for execlist mode of submission. 138 */ 139 struct intel_engine_execlists { 140 /** 141 * @tasklet: softirq tasklet for bottom handler 142 */ 143 struct tasklet_struct tasklet; 144 145 /** 146 * @timer: kick the current context if its timeslice expires 147 */ 148 struct timer_list timer; 149 150 /** 151 * @preempt: reset the current context if it fails to give way 152 */ 153 struct timer_list preempt; 154 155 /** 156 * @default_priolist: priority list for I915_PRIORITY_NORMAL 157 */ 158 struct i915_priolist default_priolist; 159 160 /** 161 * @ccid: identifier for contexts submitted to this engine 162 */ 163 u32 ccid; 164 165 /** 166 * @yield: CCID at the time of the last semaphore-wait interrupt. 167 * 168 * Instead of leaving a semaphore busy-spinning on an engine, we would 169 * like to switch to another ready context, i.e. yielding the semaphore 170 * timeslice. 171 */ 172 u32 yield; 173 174 /** 175 * @error_interrupt: CS Master EIR 176 * 177 * The CS generates an interrupt when it detects an error. We capture 178 * the first error interrupt, record the EIR and schedule the tasklet. 179 * In the tasklet, we process the pending CS events to ensure we have 180 * the guilty request, and then reset the engine. 181 * 182 * Low 16b are used by HW, with the upper 16b used as the enabling mask. 183 * Reserve the upper 16b for tracking internal errors. 184 */ 185 u32 error_interrupt; 186 #define ERROR_CSB BIT(31) 187 #define ERROR_PREEMPT BIT(30) 188 189 /** 190 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset 191 */ 192 u32 reset_ccid; 193 194 /** 195 * @no_priolist: priority lists disabled 196 */ 197 bool no_priolist; 198 199 /** 200 * @submit_reg: gen-specific execlist submission register 201 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 202 * the ExecList Submission Queue Contents register array for Gen11+ 203 */ 204 u32 __iomem *submit_reg; 205 206 /** 207 * @ctrl_reg: the enhanced execlists control register, used to load the 208 * submit queue on the HW and to request preemptions to idle 209 */ 210 u32 __iomem *ctrl_reg; 211 212 #define EXECLIST_MAX_PORTS 2 213 /** 214 * @active: the currently known context executing on HW 215 */ 216 struct i915_request * const *active; 217 /** 218 * @inflight: the set of contexts submitted and acknowleged by HW 219 * 220 * The set of inflight contexts is managed by reading CS events 221 * from the HW. On a context-switch event (not preemption), we 222 * know the HW has transitioned from port0 to port1, and we 223 * advance our inflight/active tracking accordingly. 224 */ 225 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 226 /** 227 * @pending: the next set of contexts submitted to ELSP 228 * 229 * We store the array of contexts that we submit to HW (via ELSP) and 230 * promote them to the inflight array once HW has signaled the 231 * preemption or idle-to-active event. 232 */ 233 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 234 235 /** 236 * @port_mask: number of execlist ports - 1 237 */ 238 unsigned int port_mask; 239 240 /** 241 * @queue_priority_hint: Highest pending priority. 242 * 243 * When we add requests into the queue, or adjust the priority of 244 * executing requests, we compute the maximum priority of those 245 * pending requests. We can then use this value to determine if 246 * we need to preempt the executing requests to service the queue. 247 * However, since the we may have recorded the priority of an inflight 248 * request we wanted to preempt but since completed, at the time of 249 * dequeuing the priority hint may no longer may match the highest 250 * available request priority. 251 */ 252 int queue_priority_hint; 253 254 /** 255 * @queue: queue of requests, in priority lists 256 */ 257 struct rb_root_cached queue; 258 struct rb_root_cached virtual; 259 260 /** 261 * @csb_write: control register for Context Switch buffer 262 * 263 * Note this register may be either mmio or HWSP shadow. 264 */ 265 u32 *csb_write; 266 267 /** 268 * @csb_status: status array for Context Switch buffer 269 * 270 * Note these register may be either mmio or HWSP shadow. 271 */ 272 u64 *csb_status; 273 274 /** 275 * @csb_size: context status buffer FIFO size 276 */ 277 u8 csb_size; 278 279 /** 280 * @csb_head: context status buffer head 281 */ 282 u8 csb_head; 283 284 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 285 }; 286 287 #define INTEL_ENGINE_CS_MAX_NAME 8 288 289 struct intel_engine_cs { 290 struct drm_i915_private *i915; 291 struct intel_gt *gt; 292 struct intel_uncore *uncore; 293 char name[INTEL_ENGINE_CS_MAX_NAME]; 294 295 enum intel_engine_id id; 296 enum intel_engine_id legacy_idx; 297 298 unsigned int hw_id; 299 unsigned int guc_id; 300 301 intel_engine_mask_t mask; 302 303 u8 class; 304 u8 instance; 305 306 u16 uabi_class; 307 u16 uabi_instance; 308 309 u32 uabi_capabilities; 310 u32 context_size; 311 u32 mmio_base; 312 313 /* 314 * Some w/a require forcewake to be held (which prevents RC6) while 315 * a particular engine is active. If so, we set fw_domain to which 316 * domains need to be held for the duration of request activity, 317 * and 0 if none. We try to limit the duration of the hold as much 318 * as possible. 319 */ 320 enum forcewake_domains fw_domain; 321 unsigned int fw_active; 322 323 unsigned long context_tag; 324 325 struct rb_node uabi_node; 326 327 struct intel_sseu sseu; 328 329 struct { 330 spinlock_t lock; 331 struct list_head requests; 332 struct list_head hold; /* ready requests, but on hold */ 333 } active; 334 335 /* keep a request in reserve for a [pm] barrier under oom */ 336 struct i915_request *request_pool; 337 338 struct llist_head barrier_tasks; 339 340 struct intel_context *kernel_context; /* pinned */ 341 342 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 343 344 struct { 345 struct delayed_work work; 346 struct i915_request *systole; 347 unsigned long blocked; 348 } heartbeat; 349 350 unsigned long serial; 351 352 unsigned long wakeref_serial; 353 struct intel_wakeref wakeref; 354 struct file *default_state; 355 356 struct { 357 struct intel_ring *ring; 358 struct intel_timeline *timeline; 359 } legacy; 360 361 /* 362 * We track the average duration of the idle pulse on parking the 363 * engine to keep an estimate of the how the fast the engine is 364 * under ideal conditions. 365 */ 366 struct ewma__engine_latency latency; 367 368 /* Keep track of all the seqno used, a trail of breadcrumbs */ 369 struct intel_breadcrumbs *breadcrumbs; 370 371 struct intel_engine_pmu { 372 /** 373 * @enable: Bitmask of enable sample events on this engine. 374 * 375 * Bits correspond to sample event types, for instance 376 * I915_SAMPLE_QUEUED is bit 0 etc. 377 */ 378 u32 enable; 379 /** 380 * @enable_count: Reference count for the enabled samplers. 381 * 382 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 383 */ 384 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 385 /** 386 * @sample: Counter values for sampling events. 387 * 388 * Our internal timer stores the current counters in this field. 389 * 390 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 391 */ 392 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 393 } pmu; 394 395 struct intel_hw_status_page status_page; 396 struct i915_ctx_workarounds wa_ctx; 397 struct i915_wa_list ctx_wa_list; 398 struct i915_wa_list wa_list; 399 struct i915_wa_list whitelist; 400 401 u32 irq_keep_mask; /* always keep these interrupts */ 402 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 403 void (*irq_enable)(struct intel_engine_cs *engine); 404 void (*irq_disable)(struct intel_engine_cs *engine); 405 406 void (*sanitize)(struct intel_engine_cs *engine); 407 int (*resume)(struct intel_engine_cs *engine); 408 409 struct { 410 void (*prepare)(struct intel_engine_cs *engine); 411 412 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 413 void (*cancel)(struct intel_engine_cs *engine); 414 415 void (*finish)(struct intel_engine_cs *engine); 416 } reset; 417 418 void (*park)(struct intel_engine_cs *engine); 419 void (*unpark)(struct intel_engine_cs *engine); 420 421 void (*set_default_submission)(struct intel_engine_cs *engine); 422 423 const struct intel_context_ops *cops; 424 425 int (*request_alloc)(struct i915_request *rq); 426 427 int (*emit_flush)(struct i915_request *request, u32 mode); 428 #define EMIT_INVALIDATE BIT(0) 429 #define EMIT_FLUSH BIT(1) 430 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 431 int (*emit_bb_start)(struct i915_request *rq, 432 u64 offset, u32 length, 433 unsigned int dispatch_flags); 434 #define I915_DISPATCH_SECURE BIT(0) 435 #define I915_DISPATCH_PINNED BIT(1) 436 int (*emit_init_breadcrumb)(struct i915_request *rq); 437 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 438 u32 *cs); 439 unsigned int emit_fini_breadcrumb_dw; 440 441 /* Pass the request to the hardware queue (e.g. directly into 442 * the legacy ringbuffer or to the end of an execlist). 443 * 444 * This is called from an atomic context with irqs disabled; must 445 * be irq safe. 446 */ 447 void (*submit_request)(struct i915_request *rq); 448 449 /* 450 * Called on signaling of a SUBMIT_FENCE, passing along the signaling 451 * request down to the bonded pairs. 452 */ 453 void (*bond_execute)(struct i915_request *rq, 454 struct dma_fence *signal); 455 456 /* 457 * Call when the priority on a request has changed and it and its 458 * dependencies may need rescheduling. Note the request itself may 459 * not be ready to run! 460 */ 461 void (*schedule)(struct i915_request *request, 462 const struct i915_sched_attr *attr); 463 464 void (*release)(struct intel_engine_cs *engine); 465 466 struct intel_engine_execlists execlists; 467 468 /* 469 * Keep track of completed timelines on this engine for early 470 * retirement with the goal of quickly enabling powersaving as 471 * soon as the engine is idle. 472 */ 473 struct intel_timeline *retire; 474 struct work_struct retire_work; 475 476 /* status_notifier: list of callbacks for context-switch changes */ 477 struct atomic_notifier_head context_status_notifier; 478 479 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 480 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 481 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 482 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 483 #define I915_ENGINE_HAS_TIMESLICES BIT(4) 484 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) 485 #define I915_ENGINE_IS_VIRTUAL BIT(6) 486 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) 487 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) 488 unsigned int flags; 489 490 /* 491 * Table of commands the command parser needs to know about 492 * for this engine. 493 */ 494 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 495 496 /* 497 * Table of registers allowed in commands that read/write registers. 498 */ 499 const struct drm_i915_reg_table *reg_tables; 500 int reg_table_count; 501 502 /* 503 * Returns the bitmask for the length field of the specified command. 504 * Return 0 for an unrecognized/invalid command. 505 * 506 * If the command parser finds an entry for a command in the engine's 507 * cmd_tables, it gets the command's length based on the table entry. 508 * If not, it calls this function to determine the per-engine length 509 * field encoding for the command (i.e. different opcode ranges use 510 * certain bits to encode the command length in the header). 511 */ 512 u32 (*get_cmd_length_mask)(u32 cmd_header); 513 514 struct { 515 /** 516 * @active: Number of contexts currently scheduled in. 517 */ 518 unsigned int active; 519 520 /** 521 * @lock: Lock protecting the below fields. 522 */ 523 seqcount_t lock; 524 525 /** 526 * @total: Total time this engine was busy. 527 * 528 * Accumulated time not counting the most recent block in cases 529 * where engine is currently busy (active > 0). 530 */ 531 ktime_t total; 532 533 /** 534 * @start: Timestamp of the last idle to active transition. 535 * 536 * Idle is defined as active == 0, active is active > 0. 537 */ 538 ktime_t start; 539 540 /** 541 * @rps: Utilisation at last RPS sampling. 542 */ 543 ktime_t rps; 544 } stats; 545 546 struct { 547 unsigned long heartbeat_interval_ms; 548 unsigned long max_busywait_duration_ns; 549 unsigned long preempt_timeout_ms; 550 unsigned long stop_timeout_ms; 551 unsigned long timeslice_duration_ms; 552 } props, defaults; 553 554 I915_SELFTEST_DECLARE(struct fault_attr reset_timeout); 555 }; 556 557 static inline bool 558 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 559 { 560 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 561 } 562 563 static inline bool 564 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 565 { 566 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 567 } 568 569 static inline bool 570 intel_engine_supports_stats(const struct intel_engine_cs *engine) 571 { 572 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 573 } 574 575 static inline bool 576 intel_engine_has_preemption(const struct intel_engine_cs *engine) 577 { 578 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 579 } 580 581 static inline bool 582 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 583 { 584 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 585 } 586 587 static inline bool 588 intel_engine_has_timeslices(const struct intel_engine_cs *engine) 589 { 590 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 591 return false; 592 593 return engine->flags & I915_ENGINE_HAS_TIMESLICES; 594 } 595 596 static inline bool 597 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) 598 { 599 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; 600 } 601 602 static inline bool 603 intel_engine_is_virtual(const struct intel_engine_cs *engine) 604 { 605 return engine->flags & I915_ENGINE_IS_VIRTUAL; 606 } 607 608 static inline bool 609 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 610 { 611 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 612 } 613 614 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 615 ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 616 617 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 618 (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ 619 intel_sseu_has_subslice(sseu__, 0, subslice__)) 620 621 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 622 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 623 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 624 (slice_) += ((subslice_) == 0)) \ 625 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 626 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 627 subslice_))) 628 #endif /* __INTEL_ENGINE_TYPES_H__ */ 629