1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef __INTEL_ENGINE_TYPES__ 7 #define __INTEL_ENGINE_TYPES__ 8 9 #include <linux/average.h> 10 #include <linux/hashtable.h> 11 #include <linux/irq_work.h> 12 #include <linux/kref.h> 13 #include <linux/list.h> 14 #include <linux/llist.h> 15 #include <linux/rbtree.h> 16 #include <linux/timer.h> 17 #include <linux/types.h> 18 #include <linux/workqueue.h> 19 20 #include "i915_gem.h" 21 #include "i915_pmu.h" 22 #include "i915_priolist_types.h" 23 #include "i915_selftest.h" 24 #include "intel_sseu.h" 25 #include "intel_timeline_types.h" 26 #include "intel_uncore.h" 27 #include "intel_wakeref.h" 28 #include "intel_workarounds_types.h" 29 30 /* HW Engine class + instance */ 31 #define RENDER_CLASS 0 32 #define VIDEO_DECODE_CLASS 1 33 #define VIDEO_ENHANCEMENT_CLASS 2 34 #define COPY_ENGINE_CLASS 3 35 #define OTHER_CLASS 4 36 #define MAX_ENGINE_CLASS 4 37 #define MAX_ENGINE_INSTANCE 7 38 39 #define I915_MAX_SLICES 3 40 #define I915_MAX_SUBSLICES 8 41 42 #define I915_CMD_HASH_ORDER 9 43 44 struct dma_fence; 45 struct drm_i915_gem_object; 46 struct drm_i915_reg_table; 47 struct i915_gem_context; 48 struct i915_request; 49 struct i915_sched_attr; 50 struct i915_sched_engine; 51 struct intel_gt; 52 struct intel_ring; 53 struct intel_uncore; 54 struct intel_breadcrumbs; 55 56 typedef u32 intel_engine_mask_t; 57 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 58 59 struct intel_hw_status_page { 60 struct list_head timelines; 61 struct i915_vma *vma; 62 u32 *addr; 63 }; 64 65 struct intel_instdone { 66 u32 instdone; 67 /* The following exist only in the RCS engine */ 68 u32 slice_common; 69 u32 slice_common_extra[2]; 70 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 71 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 72 }; 73 74 /* 75 * we use a single page to load ctx workarounds so all of these 76 * values are referred in terms of dwords 77 * 78 * struct i915_wa_ctx_bb: 79 * offset: specifies batch starting position, also helpful in case 80 * if we want to have multiple batches at different offsets based on 81 * some criteria. It is not a requirement at the moment but provides 82 * an option for future use. 83 * size: size of the batch in DWORDS 84 */ 85 struct i915_ctx_workarounds { 86 struct i915_wa_ctx_bb { 87 u32 offset; 88 u32 size; 89 } indirect_ctx, per_ctx; 90 struct i915_vma *vma; 91 }; 92 93 #define I915_MAX_VCS 8 94 #define I915_MAX_VECS 4 95 96 /* 97 * Engine IDs definitions. 98 * Keep instances of the same type engine together. 99 */ 100 enum intel_engine_id { 101 RCS0 = 0, 102 BCS0, 103 VCS0, 104 VCS1, 105 VCS2, 106 VCS3, 107 VCS4, 108 VCS5, 109 VCS6, 110 VCS7, 111 #define _VCS(n) (VCS0 + (n)) 112 VECS0, 113 VECS1, 114 VECS2, 115 VECS3, 116 #define _VECS(n) (VECS0 + (n)) 117 I915_NUM_ENGINES 118 #define INVALID_ENGINE ((enum intel_engine_id)-1) 119 }; 120 121 /* A simple estimator for the round-trip latency of an engine */ 122 DECLARE_EWMA(_engine_latency, 6, 4) 123 124 struct st_preempt_hang { 125 struct completion completion; 126 unsigned int count; 127 }; 128 129 /** 130 * struct intel_engine_execlists - execlist submission queue and port state 131 * 132 * The struct intel_engine_execlists represents the combined logical state of 133 * driver and the hardware state for execlist mode of submission. 134 */ 135 struct intel_engine_execlists { 136 /** 137 * @timer: kick the current context if its timeslice expires 138 */ 139 struct timer_list timer; 140 141 /** 142 * @preempt: reset the current context if it fails to give way 143 */ 144 struct timer_list preempt; 145 146 /** 147 * @ccid: identifier for contexts submitted to this engine 148 */ 149 u32 ccid; 150 151 /** 152 * @yield: CCID at the time of the last semaphore-wait interrupt. 153 * 154 * Instead of leaving a semaphore busy-spinning on an engine, we would 155 * like to switch to another ready context, i.e. yielding the semaphore 156 * timeslice. 157 */ 158 u32 yield; 159 160 /** 161 * @error_interrupt: CS Master EIR 162 * 163 * The CS generates an interrupt when it detects an error. We capture 164 * the first error interrupt, record the EIR and schedule the tasklet. 165 * In the tasklet, we process the pending CS events to ensure we have 166 * the guilty request, and then reset the engine. 167 * 168 * Low 16b are used by HW, with the upper 16b used as the enabling mask. 169 * Reserve the upper 16b for tracking internal errors. 170 */ 171 u32 error_interrupt; 172 #define ERROR_CSB BIT(31) 173 #define ERROR_PREEMPT BIT(30) 174 175 /** 176 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset 177 */ 178 u32 reset_ccid; 179 180 /** 181 * @submit_reg: gen-specific execlist submission register 182 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 183 * the ExecList Submission Queue Contents register array for Gen11+ 184 */ 185 u32 __iomem *submit_reg; 186 187 /** 188 * @ctrl_reg: the enhanced execlists control register, used to load the 189 * submit queue on the HW and to request preemptions to idle 190 */ 191 u32 __iomem *ctrl_reg; 192 193 #define EXECLIST_MAX_PORTS 2 194 /** 195 * @active: the currently known context executing on HW 196 */ 197 struct i915_request * const *active; 198 /** 199 * @inflight: the set of contexts submitted and acknowleged by HW 200 * 201 * The set of inflight contexts is managed by reading CS events 202 * from the HW. On a context-switch event (not preemption), we 203 * know the HW has transitioned from port0 to port1, and we 204 * advance our inflight/active tracking accordingly. 205 */ 206 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 207 /** 208 * @pending: the next set of contexts submitted to ELSP 209 * 210 * We store the array of contexts that we submit to HW (via ELSP) and 211 * promote them to the inflight array once HW has signaled the 212 * preemption or idle-to-active event. 213 */ 214 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 215 216 /** 217 * @port_mask: number of execlist ports - 1 218 */ 219 unsigned int port_mask; 220 221 /** 222 * @virtual: Queue of requets on a virtual engine, sorted by priority. 223 * Each RB entry is a struct i915_priolist containing a list of requests 224 * of the same priority. 225 */ 226 struct rb_root_cached virtual; 227 228 /** 229 * @csb_write: control register for Context Switch buffer 230 * 231 * Note this register may be either mmio or HWSP shadow. 232 */ 233 u32 *csb_write; 234 235 /** 236 * @csb_status: status array for Context Switch buffer 237 * 238 * Note these register may be either mmio or HWSP shadow. 239 */ 240 u64 *csb_status; 241 242 /** 243 * @csb_size: context status buffer FIFO size 244 */ 245 u8 csb_size; 246 247 /** 248 * @csb_head: context status buffer head 249 */ 250 u8 csb_head; 251 252 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 253 }; 254 255 #define INTEL_ENGINE_CS_MAX_NAME 8 256 257 struct intel_engine_cs { 258 struct drm_i915_private *i915; 259 struct intel_gt *gt; 260 struct intel_uncore *uncore; 261 char name[INTEL_ENGINE_CS_MAX_NAME]; 262 263 enum intel_engine_id id; 264 enum intel_engine_id legacy_idx; 265 266 unsigned int guc_id; 267 268 intel_engine_mask_t mask; 269 270 u8 class; 271 u8 instance; 272 273 u16 uabi_class; 274 u16 uabi_instance; 275 276 u32 uabi_capabilities; 277 u32 context_size; 278 u32 mmio_base; 279 280 /* 281 * Some w/a require forcewake to be held (which prevents RC6) while 282 * a particular engine is active. If so, we set fw_domain to which 283 * domains need to be held for the duration of request activity, 284 * and 0 if none. We try to limit the duration of the hold as much 285 * as possible. 286 */ 287 enum forcewake_domains fw_domain; 288 unsigned int fw_active; 289 290 unsigned long context_tag; 291 292 struct rb_node uabi_node; 293 294 struct intel_sseu sseu; 295 296 struct i915_sched_engine *sched_engine; 297 298 /* keep a request in reserve for a [pm] barrier under oom */ 299 struct i915_request *request_pool; 300 301 struct intel_context *hung_ce; 302 303 struct llist_head barrier_tasks; 304 305 struct intel_context *kernel_context; /* pinned */ 306 307 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 308 309 struct { 310 struct delayed_work work; 311 struct i915_request *systole; 312 unsigned long blocked; 313 } heartbeat; 314 315 unsigned long serial; 316 317 unsigned long wakeref_serial; 318 struct intel_wakeref wakeref; 319 struct file *default_state; 320 321 struct { 322 struct intel_ring *ring; 323 struct intel_timeline *timeline; 324 } legacy; 325 326 /* 327 * We track the average duration of the idle pulse on parking the 328 * engine to keep an estimate of the how the fast the engine is 329 * under ideal conditions. 330 */ 331 struct ewma__engine_latency latency; 332 333 /* Keep track of all the seqno used, a trail of breadcrumbs */ 334 struct intel_breadcrumbs *breadcrumbs; 335 336 struct intel_engine_pmu { 337 /** 338 * @enable: Bitmask of enable sample events on this engine. 339 * 340 * Bits correspond to sample event types, for instance 341 * I915_SAMPLE_QUEUED is bit 0 etc. 342 */ 343 u32 enable; 344 /** 345 * @enable_count: Reference count for the enabled samplers. 346 * 347 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 348 */ 349 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 350 /** 351 * @sample: Counter values for sampling events. 352 * 353 * Our internal timer stores the current counters in this field. 354 * 355 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 356 */ 357 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 358 } pmu; 359 360 struct intel_hw_status_page status_page; 361 struct i915_ctx_workarounds wa_ctx; 362 struct i915_wa_list ctx_wa_list; 363 struct i915_wa_list wa_list; 364 struct i915_wa_list whitelist; 365 366 u32 irq_keep_mask; /* always keep these interrupts */ 367 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 368 void (*irq_enable)(struct intel_engine_cs *engine); 369 void (*irq_disable)(struct intel_engine_cs *engine); 370 void (*irq_handler)(struct intel_engine_cs *engine, u16 iir); 371 372 void (*sanitize)(struct intel_engine_cs *engine); 373 int (*resume)(struct intel_engine_cs *engine); 374 375 struct { 376 void (*prepare)(struct intel_engine_cs *engine); 377 378 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 379 void (*cancel)(struct intel_engine_cs *engine); 380 381 void (*finish)(struct intel_engine_cs *engine); 382 } reset; 383 384 void (*park)(struct intel_engine_cs *engine); 385 void (*unpark)(struct intel_engine_cs *engine); 386 387 void (*bump_serial)(struct intel_engine_cs *engine); 388 389 void (*set_default_submission)(struct intel_engine_cs *engine); 390 391 const struct intel_context_ops *cops; 392 393 int (*request_alloc)(struct i915_request *rq); 394 395 int (*emit_flush)(struct i915_request *request, u32 mode); 396 #define EMIT_INVALIDATE BIT(0) 397 #define EMIT_FLUSH BIT(1) 398 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 399 int (*emit_bb_start)(struct i915_request *rq, 400 u64 offset, u32 length, 401 unsigned int dispatch_flags); 402 #define I915_DISPATCH_SECURE BIT(0) 403 #define I915_DISPATCH_PINNED BIT(1) 404 int (*emit_init_breadcrumb)(struct i915_request *rq); 405 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 406 u32 *cs); 407 unsigned int emit_fini_breadcrumb_dw; 408 409 /* Pass the request to the hardware queue (e.g. directly into 410 * the legacy ringbuffer or to the end of an execlist). 411 * 412 * This is called from an atomic context with irqs disabled; must 413 * be irq safe. 414 */ 415 void (*submit_request)(struct i915_request *rq); 416 417 void (*release)(struct intel_engine_cs *engine); 418 419 /* 420 * Add / remove request from engine active tracking 421 */ 422 void (*add_active_request)(struct i915_request *rq); 423 void (*remove_active_request)(struct i915_request *rq); 424 425 struct intel_engine_execlists execlists; 426 427 /* 428 * Keep track of completed timelines on this engine for early 429 * retirement with the goal of quickly enabling powersaving as 430 * soon as the engine is idle. 431 */ 432 struct intel_timeline *retire; 433 struct work_struct retire_work; 434 435 /* status_notifier: list of callbacks for context-switch changes */ 436 struct atomic_notifier_head context_status_notifier; 437 438 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 439 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 440 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 441 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 442 #define I915_ENGINE_HAS_TIMESLICES BIT(4) 443 #define I915_ENGINE_IS_VIRTUAL BIT(5) 444 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) 445 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) 446 #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) 447 unsigned int flags; 448 449 /* 450 * Table of commands the command parser needs to know about 451 * for this engine. 452 */ 453 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 454 455 /* 456 * Table of registers allowed in commands that read/write registers. 457 */ 458 const struct drm_i915_reg_table *reg_tables; 459 int reg_table_count; 460 461 /* 462 * Returns the bitmask for the length field of the specified command. 463 * Return 0 for an unrecognized/invalid command. 464 * 465 * If the command parser finds an entry for a command in the engine's 466 * cmd_tables, it gets the command's length based on the table entry. 467 * If not, it calls this function to determine the per-engine length 468 * field encoding for the command (i.e. different opcode ranges use 469 * certain bits to encode the command length in the header). 470 */ 471 u32 (*get_cmd_length_mask)(u32 cmd_header); 472 473 struct { 474 /** 475 * @active: Number of contexts currently scheduled in. 476 */ 477 unsigned int active; 478 479 /** 480 * @lock: Lock protecting the below fields. 481 */ 482 seqcount_t lock; 483 484 /** 485 * @total: Total time this engine was busy. 486 * 487 * Accumulated time not counting the most recent block in cases 488 * where engine is currently busy (active > 0). 489 */ 490 ktime_t total; 491 492 /** 493 * @start: Timestamp of the last idle to active transition. 494 * 495 * Idle is defined as active == 0, active is active > 0. 496 */ 497 ktime_t start; 498 499 /** 500 * @rps: Utilisation at last RPS sampling. 501 */ 502 ktime_t rps; 503 } stats; 504 505 struct { 506 unsigned long heartbeat_interval_ms; 507 unsigned long max_busywait_duration_ns; 508 unsigned long preempt_timeout_ms; 509 unsigned long stop_timeout_ms; 510 unsigned long timeslice_duration_ms; 511 } props, defaults; 512 513 I915_SELFTEST_DECLARE(struct fault_attr reset_timeout); 514 }; 515 516 static inline bool 517 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 518 { 519 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 520 } 521 522 static inline bool 523 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 524 { 525 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 526 } 527 528 static inline bool 529 intel_engine_supports_stats(const struct intel_engine_cs *engine) 530 { 531 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 532 } 533 534 static inline bool 535 intel_engine_has_preemption(const struct intel_engine_cs *engine) 536 { 537 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 538 } 539 540 static inline bool 541 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 542 { 543 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 544 } 545 546 static inline bool 547 intel_engine_has_timeslices(const struct intel_engine_cs *engine) 548 { 549 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 550 return false; 551 552 return engine->flags & I915_ENGINE_HAS_TIMESLICES; 553 } 554 555 static inline bool 556 intel_engine_is_virtual(const struct intel_engine_cs *engine) 557 { 558 return engine->flags & I915_ENGINE_IS_VIRTUAL; 559 } 560 561 static inline bool 562 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 563 { 564 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 565 } 566 567 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 568 ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 569 570 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 571 (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ 572 intel_sseu_has_subslice(sseu__, 0, subslice__)) 573 574 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 575 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 576 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 577 (slice_) += ((subslice_) == 0)) \ 578 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 579 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 580 subslice_))) 581 #endif /* __INTEL_ENGINE_TYPES_H__ */ 582