1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef __INTEL_ENGINE_TYPES__ 7 #define __INTEL_ENGINE_TYPES__ 8 9 #include <linux/average.h> 10 #include <linux/hashtable.h> 11 #include <linux/irq_work.h> 12 #include <linux/kref.h> 13 #include <linux/list.h> 14 #include <linux/llist.h> 15 #include <linux/rbtree.h> 16 #include <linux/timer.h> 17 #include <linux/types.h> 18 #include <linux/workqueue.h> 19 20 #include "i915_gem.h" 21 #include "i915_pmu.h" 22 #include "i915_priolist_types.h" 23 #include "i915_selftest.h" 24 #include "intel_sseu.h" 25 #include "intel_timeline_types.h" 26 #include "intel_uncore.h" 27 #include "intel_wakeref.h" 28 #include "intel_workarounds_types.h" 29 30 /* HW Engine class + instance */ 31 #define RENDER_CLASS 0 32 #define VIDEO_DECODE_CLASS 1 33 #define VIDEO_ENHANCEMENT_CLASS 2 34 #define COPY_ENGINE_CLASS 3 35 #define OTHER_CLASS 4 36 #define MAX_ENGINE_CLASS 4 37 #define MAX_ENGINE_INSTANCE 7 38 39 #define I915_MAX_SLICES 3 40 #define I915_MAX_SUBSLICES 8 41 42 #define I915_CMD_HASH_ORDER 9 43 44 struct dma_fence; 45 struct drm_i915_gem_object; 46 struct drm_i915_reg_table; 47 struct i915_gem_context; 48 struct i915_request; 49 struct i915_sched_attr; 50 struct i915_sched_engine; 51 struct intel_gt; 52 struct intel_ring; 53 struct intel_uncore; 54 struct intel_breadcrumbs; 55 56 typedef u32 intel_engine_mask_t; 57 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 58 59 struct intel_hw_status_page { 60 struct list_head timelines; 61 struct i915_vma *vma; 62 u32 *addr; 63 }; 64 65 struct intel_instdone { 66 u32 instdone; 67 /* The following exist only in the RCS engine */ 68 u32 slice_common; 69 u32 slice_common_extra[2]; 70 u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; 71 u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; 72 73 /* Added in XeHPG */ 74 u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; 75 }; 76 77 /* 78 * we use a single page to load ctx workarounds so all of these 79 * values are referred in terms of dwords 80 * 81 * struct i915_wa_ctx_bb: 82 * offset: specifies batch starting position, also helpful in case 83 * if we want to have multiple batches at different offsets based on 84 * some criteria. It is not a requirement at the moment but provides 85 * an option for future use. 86 * size: size of the batch in DWORDS 87 */ 88 struct i915_ctx_workarounds { 89 struct i915_wa_ctx_bb { 90 u32 offset; 91 u32 size; 92 } indirect_ctx, per_ctx; 93 struct i915_vma *vma; 94 }; 95 96 #define I915_MAX_VCS 8 97 #define I915_MAX_VECS 4 98 99 /* 100 * Engine IDs definitions. 101 * Keep instances of the same type engine together. 102 */ 103 enum intel_engine_id { 104 RCS0 = 0, 105 BCS0, 106 VCS0, 107 VCS1, 108 VCS2, 109 VCS3, 110 VCS4, 111 VCS5, 112 VCS6, 113 VCS7, 114 #define _VCS(n) (VCS0 + (n)) 115 VECS0, 116 VECS1, 117 VECS2, 118 VECS3, 119 #define _VECS(n) (VECS0 + (n)) 120 I915_NUM_ENGINES 121 #define INVALID_ENGINE ((enum intel_engine_id)-1) 122 }; 123 124 /* A simple estimator for the round-trip latency of an engine */ 125 DECLARE_EWMA(_engine_latency, 6, 4) 126 127 struct st_preempt_hang { 128 struct completion completion; 129 unsigned int count; 130 }; 131 132 /** 133 * struct intel_engine_execlists - execlist submission queue and port state 134 * 135 * The struct intel_engine_execlists represents the combined logical state of 136 * driver and the hardware state for execlist mode of submission. 137 */ 138 struct intel_engine_execlists { 139 /** 140 * @timer: kick the current context if its timeslice expires 141 */ 142 struct timer_list timer; 143 144 /** 145 * @preempt: reset the current context if it fails to give way 146 */ 147 struct timer_list preempt; 148 149 /** 150 * @ccid: identifier for contexts submitted to this engine 151 */ 152 u32 ccid; 153 154 /** 155 * @yield: CCID at the time of the last semaphore-wait interrupt. 156 * 157 * Instead of leaving a semaphore busy-spinning on an engine, we would 158 * like to switch to another ready context, i.e. yielding the semaphore 159 * timeslice. 160 */ 161 u32 yield; 162 163 /** 164 * @error_interrupt: CS Master EIR 165 * 166 * The CS generates an interrupt when it detects an error. We capture 167 * the first error interrupt, record the EIR and schedule the tasklet. 168 * In the tasklet, we process the pending CS events to ensure we have 169 * the guilty request, and then reset the engine. 170 * 171 * Low 16b are used by HW, with the upper 16b used as the enabling mask. 172 * Reserve the upper 16b for tracking internal errors. 173 */ 174 u32 error_interrupt; 175 #define ERROR_CSB BIT(31) 176 #define ERROR_PREEMPT BIT(30) 177 178 /** 179 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset 180 */ 181 u32 reset_ccid; 182 183 /** 184 * @submit_reg: gen-specific execlist submission register 185 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 186 * the ExecList Submission Queue Contents register array for Gen11+ 187 */ 188 u32 __iomem *submit_reg; 189 190 /** 191 * @ctrl_reg: the enhanced execlists control register, used to load the 192 * submit queue on the HW and to request preemptions to idle 193 */ 194 u32 __iomem *ctrl_reg; 195 196 #define EXECLIST_MAX_PORTS 2 197 /** 198 * @active: the currently known context executing on HW 199 */ 200 struct i915_request * const *active; 201 /** 202 * @inflight: the set of contexts submitted and acknowleged by HW 203 * 204 * The set of inflight contexts is managed by reading CS events 205 * from the HW. On a context-switch event (not preemption), we 206 * know the HW has transitioned from port0 to port1, and we 207 * advance our inflight/active tracking accordingly. 208 */ 209 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 210 /** 211 * @pending: the next set of contexts submitted to ELSP 212 * 213 * We store the array of contexts that we submit to HW (via ELSP) and 214 * promote them to the inflight array once HW has signaled the 215 * preemption or idle-to-active event. 216 */ 217 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 218 219 /** 220 * @port_mask: number of execlist ports - 1 221 */ 222 unsigned int port_mask; 223 224 /** 225 * @virtual: Queue of requets on a virtual engine, sorted by priority. 226 * Each RB entry is a struct i915_priolist containing a list of requests 227 * of the same priority. 228 */ 229 struct rb_root_cached virtual; 230 231 /** 232 * @csb_write: control register for Context Switch buffer 233 * 234 * Note this register may be either mmio or HWSP shadow. 235 */ 236 u32 *csb_write; 237 238 /** 239 * @csb_status: status array for Context Switch buffer 240 * 241 * Note these register may be either mmio or HWSP shadow. 242 */ 243 u64 *csb_status; 244 245 /** 246 * @csb_size: context status buffer FIFO size 247 */ 248 u8 csb_size; 249 250 /** 251 * @csb_head: context status buffer head 252 */ 253 u8 csb_head; 254 255 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 256 }; 257 258 #define INTEL_ENGINE_CS_MAX_NAME 8 259 260 struct intel_engine_cs { 261 struct drm_i915_private *i915; 262 struct intel_gt *gt; 263 struct intel_uncore *uncore; 264 char name[INTEL_ENGINE_CS_MAX_NAME]; 265 266 enum intel_engine_id id; 267 enum intel_engine_id legacy_idx; 268 269 unsigned int guc_id; 270 271 intel_engine_mask_t mask; 272 /** 273 * @logical_mask: logical mask of engine, reported to user space via 274 * query IOCTL and used to communicate with the GuC in logical space. 275 * The logical instance of a physical engine can change based on product 276 * and fusing. 277 */ 278 intel_engine_mask_t logical_mask; 279 280 u8 class; 281 u8 instance; 282 283 u16 uabi_class; 284 u16 uabi_instance; 285 286 u32 uabi_capabilities; 287 u32 context_size; 288 u32 mmio_base; 289 290 /* 291 * Some w/a require forcewake to be held (which prevents RC6) while 292 * a particular engine is active. If so, we set fw_domain to which 293 * domains need to be held for the duration of request activity, 294 * and 0 if none. We try to limit the duration of the hold as much 295 * as possible. 296 */ 297 enum forcewake_domains fw_domain; 298 unsigned int fw_active; 299 300 unsigned long context_tag; 301 302 struct rb_node uabi_node; 303 304 struct intel_sseu sseu; 305 306 struct i915_sched_engine *sched_engine; 307 308 /* keep a request in reserve for a [pm] barrier under oom */ 309 struct i915_request *request_pool; 310 311 struct intel_context *hung_ce; 312 313 struct llist_head barrier_tasks; 314 315 struct intel_context *kernel_context; /* pinned */ 316 317 /** 318 * pinned_contexts_list: List of pinned contexts. This list is only 319 * assumed to be manipulated during driver load- or unload time and 320 * does therefore not have any additional protection. 321 */ 322 struct list_head pinned_contexts_list; 323 324 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 325 326 struct { 327 struct delayed_work work; 328 struct i915_request *systole; 329 unsigned long blocked; 330 } heartbeat; 331 332 unsigned long serial; 333 334 unsigned long wakeref_serial; 335 struct intel_wakeref wakeref; 336 struct file *default_state; 337 338 struct { 339 struct intel_ring *ring; 340 struct intel_timeline *timeline; 341 } legacy; 342 343 /* 344 * We track the average duration of the idle pulse on parking the 345 * engine to keep an estimate of the how the fast the engine is 346 * under ideal conditions. 347 */ 348 struct ewma__engine_latency latency; 349 350 /* Keep track of all the seqno used, a trail of breadcrumbs */ 351 struct intel_breadcrumbs *breadcrumbs; 352 353 struct intel_engine_pmu { 354 /** 355 * @enable: Bitmask of enable sample events on this engine. 356 * 357 * Bits correspond to sample event types, for instance 358 * I915_SAMPLE_QUEUED is bit 0 etc. 359 */ 360 u32 enable; 361 /** 362 * @enable_count: Reference count for the enabled samplers. 363 * 364 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 365 */ 366 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 367 /** 368 * @sample: Counter values for sampling events. 369 * 370 * Our internal timer stores the current counters in this field. 371 * 372 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 373 */ 374 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 375 } pmu; 376 377 struct intel_hw_status_page status_page; 378 struct i915_ctx_workarounds wa_ctx; 379 struct i915_wa_list ctx_wa_list; 380 struct i915_wa_list wa_list; 381 struct i915_wa_list whitelist; 382 383 u32 irq_keep_mask; /* always keep these interrupts */ 384 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 385 void (*irq_enable)(struct intel_engine_cs *engine); 386 void (*irq_disable)(struct intel_engine_cs *engine); 387 void (*irq_handler)(struct intel_engine_cs *engine, u16 iir); 388 389 void (*sanitize)(struct intel_engine_cs *engine); 390 int (*resume)(struct intel_engine_cs *engine); 391 392 struct { 393 void (*prepare)(struct intel_engine_cs *engine); 394 395 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 396 void (*cancel)(struct intel_engine_cs *engine); 397 398 void (*finish)(struct intel_engine_cs *engine); 399 } reset; 400 401 void (*park)(struct intel_engine_cs *engine); 402 void (*unpark)(struct intel_engine_cs *engine); 403 404 void (*bump_serial)(struct intel_engine_cs *engine); 405 406 void (*set_default_submission)(struct intel_engine_cs *engine); 407 408 const struct intel_context_ops *cops; 409 410 int (*request_alloc)(struct i915_request *rq); 411 412 int (*emit_flush)(struct i915_request *request, u32 mode); 413 #define EMIT_INVALIDATE BIT(0) 414 #define EMIT_FLUSH BIT(1) 415 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 416 int (*emit_bb_start)(struct i915_request *rq, 417 u64 offset, u32 length, 418 unsigned int dispatch_flags); 419 #define I915_DISPATCH_SECURE BIT(0) 420 #define I915_DISPATCH_PINNED BIT(1) 421 int (*emit_init_breadcrumb)(struct i915_request *rq); 422 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 423 u32 *cs); 424 unsigned int emit_fini_breadcrumb_dw; 425 426 /* Pass the request to the hardware queue (e.g. directly into 427 * the legacy ringbuffer or to the end of an execlist). 428 * 429 * This is called from an atomic context with irqs disabled; must 430 * be irq safe. 431 */ 432 void (*submit_request)(struct i915_request *rq); 433 434 void (*release)(struct intel_engine_cs *engine); 435 436 /* 437 * Add / remove request from engine active tracking 438 */ 439 void (*add_active_request)(struct i915_request *rq); 440 void (*remove_active_request)(struct i915_request *rq); 441 442 struct intel_engine_execlists execlists; 443 444 /* 445 * Keep track of completed timelines on this engine for early 446 * retirement with the goal of quickly enabling powersaving as 447 * soon as the engine is idle. 448 */ 449 struct intel_timeline *retire; 450 struct work_struct retire_work; 451 452 /* status_notifier: list of callbacks for context-switch changes */ 453 struct atomic_notifier_head context_status_notifier; 454 455 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 456 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 457 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 458 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 459 #define I915_ENGINE_HAS_TIMESLICES BIT(4) 460 #define I915_ENGINE_IS_VIRTUAL BIT(5) 461 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) 462 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) 463 #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) 464 unsigned int flags; 465 466 /* 467 * Table of commands the command parser needs to know about 468 * for this engine. 469 */ 470 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 471 472 /* 473 * Table of registers allowed in commands that read/write registers. 474 */ 475 const struct drm_i915_reg_table *reg_tables; 476 int reg_table_count; 477 478 /* 479 * Returns the bitmask for the length field of the specified command. 480 * Return 0 for an unrecognized/invalid command. 481 * 482 * If the command parser finds an entry for a command in the engine's 483 * cmd_tables, it gets the command's length based on the table entry. 484 * If not, it calls this function to determine the per-engine length 485 * field encoding for the command (i.e. different opcode ranges use 486 * certain bits to encode the command length in the header). 487 */ 488 u32 (*get_cmd_length_mask)(u32 cmd_header); 489 490 struct { 491 /** 492 * @active: Number of contexts currently scheduled in. 493 */ 494 unsigned int active; 495 496 /** 497 * @lock: Lock protecting the below fields. 498 */ 499 seqcount_t lock; 500 501 /** 502 * @total: Total time this engine was busy. 503 * 504 * Accumulated time not counting the most recent block in cases 505 * where engine is currently busy (active > 0). 506 */ 507 ktime_t total; 508 509 /** 510 * @start: Timestamp of the last idle to active transition. 511 * 512 * Idle is defined as active == 0, active is active > 0. 513 */ 514 ktime_t start; 515 516 /** 517 * @rps: Utilisation at last RPS sampling. 518 */ 519 ktime_t rps; 520 } stats; 521 522 struct { 523 unsigned long heartbeat_interval_ms; 524 unsigned long max_busywait_duration_ns; 525 unsigned long preempt_timeout_ms; 526 unsigned long stop_timeout_ms; 527 unsigned long timeslice_duration_ms; 528 } props, defaults; 529 530 I915_SELFTEST_DECLARE(struct fault_attr reset_timeout); 531 }; 532 533 static inline bool 534 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 535 { 536 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 537 } 538 539 static inline bool 540 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 541 { 542 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 543 } 544 545 static inline bool 546 intel_engine_supports_stats(const struct intel_engine_cs *engine) 547 { 548 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 549 } 550 551 static inline bool 552 intel_engine_has_preemption(const struct intel_engine_cs *engine) 553 { 554 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 555 } 556 557 static inline bool 558 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 559 { 560 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 561 } 562 563 static inline bool 564 intel_engine_has_timeslices(const struct intel_engine_cs *engine) 565 { 566 if (!CONFIG_DRM_I915_TIMESLICE_DURATION) 567 return false; 568 569 return engine->flags & I915_ENGINE_HAS_TIMESLICES; 570 } 571 572 static inline bool 573 intel_engine_is_virtual(const struct intel_engine_cs *engine) 574 { 575 return engine->flags & I915_ENGINE_IS_VIRTUAL; 576 } 577 578 static inline bool 579 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 580 { 581 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 582 } 583 584 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 585 ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 586 587 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 588 (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ 589 intel_sseu_has_subslice(sseu__, 0, subslice__)) 590 591 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 592 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 593 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 594 (slice_) += ((subslice_) == 0)) \ 595 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 596 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 597 subslice_))) 598 599 #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ 600 for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ 601 (iter_) < GEN_MAX_SUBSLICES; \ 602 (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ 603 (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ 604 for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) 605 606 #endif /* __INTEL_ENGINE_TYPES_H__ */ 607