1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #ifndef __INTEL_ENGINE_TYPES__ 8 #define __INTEL_ENGINE_TYPES__ 9 10 #include <linux/average.h> 11 #include <linux/hashtable.h> 12 #include <linux/irq_work.h> 13 #include <linux/kref.h> 14 #include <linux/list.h> 15 #include <linux/llist.h> 16 #include <linux/rbtree.h> 17 #include <linux/timer.h> 18 #include <linux/types.h> 19 #include <linux/workqueue.h> 20 21 #include "i915_gem.h" 22 #include "i915_pmu.h" 23 #include "i915_priolist_types.h" 24 #include "i915_selftest.h" 25 #include "intel_breadcrumbs_types.h" 26 #include "intel_sseu.h" 27 #include "intel_timeline_types.h" 28 #include "intel_uncore.h" 29 #include "intel_wakeref.h" 30 #include "intel_workarounds_types.h" 31 32 /* Legacy HW Engine ID */ 33 34 #define RCS0_HW 0 35 #define VCS0_HW 1 36 #define BCS0_HW 2 37 #define VECS0_HW 3 38 #define VCS1_HW 4 39 #define VCS2_HW 6 40 #define VCS3_HW 7 41 #define VECS1_HW 12 42 43 /* Gen11+ HW Engine class + instance */ 44 #define RENDER_CLASS 0 45 #define VIDEO_DECODE_CLASS 1 46 #define VIDEO_ENHANCEMENT_CLASS 2 47 #define COPY_ENGINE_CLASS 3 48 #define OTHER_CLASS 4 49 #define MAX_ENGINE_CLASS 4 50 #define MAX_ENGINE_INSTANCE 3 51 52 #define I915_MAX_SLICES 3 53 #define I915_MAX_SUBSLICES 8 54 55 #define I915_CMD_HASH_ORDER 9 56 57 struct dma_fence; 58 struct drm_i915_gem_object; 59 struct drm_i915_reg_table; 60 struct i915_gem_context; 61 struct i915_request; 62 struct i915_sched_attr; 63 struct intel_gt; 64 struct intel_ring; 65 struct intel_uncore; 66 67 typedef u8 intel_engine_mask_t; 68 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 69 70 struct intel_hw_status_page { 71 struct list_head timelines; 72 struct i915_vma *vma; 73 u32 *addr; 74 }; 75 76 struct intel_instdone { 77 u32 instdone; 78 /* The following exist only in the RCS engine */ 79 u32 slice_common; 80 u32 slice_common_extra[2]; 81 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 82 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 83 }; 84 85 /* 86 * we use a single page to load ctx workarounds so all of these 87 * values are referred in terms of dwords 88 * 89 * struct i915_wa_ctx_bb: 90 * offset: specifies batch starting position, also helpful in case 91 * if we want to have multiple batches at different offsets based on 92 * some criteria. It is not a requirement at the moment but provides 93 * an option for future use. 94 * size: size of the batch in DWORDS 95 */ 96 struct i915_ctx_workarounds { 97 struct i915_wa_ctx_bb { 98 u32 offset; 99 u32 size; 100 } indirect_ctx, per_ctx; 101 struct i915_vma *vma; 102 }; 103 104 #define I915_MAX_VCS 4 105 #define I915_MAX_VECS 2 106 107 /* 108 * Engine IDs definitions. 109 * Keep instances of the same type engine together. 110 */ 111 enum intel_engine_id { 112 RCS0 = 0, 113 BCS0, 114 VCS0, 115 VCS1, 116 VCS2, 117 VCS3, 118 #define _VCS(n) (VCS0 + (n)) 119 VECS0, 120 VECS1, 121 #define _VECS(n) (VECS0 + (n)) 122 I915_NUM_ENGINES 123 #define INVALID_ENGINE ((enum intel_engine_id)-1) 124 }; 125 126 /* A simple estimator for the round-trip latency of an engine */ 127 DECLARE_EWMA(_engine_latency, 6, 4) 128 129 struct st_preempt_hang { 130 struct completion completion; 131 unsigned int count; 132 }; 133 134 /** 135 * struct intel_engine_execlists - execlist submission queue and port state 136 * 137 * The struct intel_engine_execlists represents the combined logical state of 138 * driver and the hardware state for execlist mode of submission. 139 */ 140 struct intel_engine_execlists { 141 /** 142 * @tasklet: softirq tasklet for bottom handler 143 */ 144 struct tasklet_struct tasklet; 145 146 /** 147 * @timer: kick the current context if its timeslice expires 148 */ 149 struct timer_list timer; 150 151 /** 152 * @preempt: reset the current context if it fails to give way 153 */ 154 struct timer_list preempt; 155 156 /** 157 * @default_priolist: priority list for I915_PRIORITY_NORMAL 158 */ 159 struct i915_priolist default_priolist; 160 161 /** 162 * @ccid: identifier for contexts submitted to this engine 163 */ 164 u32 ccid; 165 166 /** 167 * @yield: CCID at the time of the last semaphore-wait interrupt. 168 * 169 * Instead of leaving a semaphore busy-spinning on an engine, we would 170 * like to switch to another ready context, i.e. yielding the semaphore 171 * timeslice. 172 */ 173 u32 yield; 174 175 /** 176 * @error_interrupt: CS Master EIR 177 * 178 * The CS generates an interrupt when it detects an error. We capture 179 * the first error interrupt, record the EIR and schedule the tasklet. 180 * In the tasklet, we process the pending CS events to ensure we have 181 * the guilty request, and then reset the engine. 182 * 183 * Low 16b are used by HW, with the upper 16b used as the enabling mask. 184 * Reserve the upper 16b for tracking internal errors. 185 */ 186 u32 error_interrupt; 187 #define ERROR_CSB BIT(31) 188 #define ERROR_PREEMPT BIT(30) 189 190 /** 191 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset 192 */ 193 u32 reset_ccid; 194 195 /** 196 * @no_priolist: priority lists disabled 197 */ 198 bool no_priolist; 199 200 /** 201 * @submit_reg: gen-specific execlist submission register 202 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 203 * the ExecList Submission Queue Contents register array for Gen11+ 204 */ 205 u32 __iomem *submit_reg; 206 207 /** 208 * @ctrl_reg: the enhanced execlists control register, used to load the 209 * submit queue on the HW and to request preemptions to idle 210 */ 211 u32 __iomem *ctrl_reg; 212 213 #define EXECLIST_MAX_PORTS 2 214 /** 215 * @active: the currently known context executing on HW 216 */ 217 struct i915_request * const *active; 218 /** 219 * @inflight: the set of contexts submitted and acknowleged by HW 220 * 221 * The set of inflight contexts is managed by reading CS events 222 * from the HW. On a context-switch event (not preemption), we 223 * know the HW has transitioned from port0 to port1, and we 224 * advance our inflight/active tracking accordingly. 225 */ 226 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 227 /** 228 * @pending: the next set of contexts submitted to ELSP 229 * 230 * We store the array of contexts that we submit to HW (via ELSP) and 231 * promote them to the inflight array once HW has signaled the 232 * preemption or idle-to-active event. 233 */ 234 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 235 236 /** 237 * @port_mask: number of execlist ports - 1 238 */ 239 unsigned int port_mask; 240 241 /** 242 * @queue_priority_hint: Highest pending priority. 243 * 244 * When we add requests into the queue, or adjust the priority of 245 * executing requests, we compute the maximum priority of those 246 * pending requests. We can then use this value to determine if 247 * we need to preempt the executing requests to service the queue. 248 * However, since the we may have recorded the priority of an inflight 249 * request we wanted to preempt but since completed, at the time of 250 * dequeuing the priority hint may no longer may match the highest 251 * available request priority. 252 */ 253 int queue_priority_hint; 254 255 /** 256 * @queue: queue of requests, in priority lists 257 */ 258 struct rb_root_cached queue; 259 struct rb_root_cached virtual; 260 261 /** 262 * @csb_write: control register for Context Switch buffer 263 * 264 * Note this register may be either mmio or HWSP shadow. 265 */ 266 u32 *csb_write; 267 268 /** 269 * @csb_status: status array for Context Switch buffer 270 * 271 * Note these register may be either mmio or HWSP shadow. 272 */ 273 u64 *csb_status; 274 275 /** 276 * @csb_size: context status buffer FIFO size 277 */ 278 u8 csb_size; 279 280 /** 281 * @csb_head: context status buffer head 282 */ 283 u8 csb_head; 284 285 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 286 }; 287 288 #define INTEL_ENGINE_CS_MAX_NAME 8 289 290 struct intel_engine_cs { 291 struct drm_i915_private *i915; 292 struct intel_gt *gt; 293 struct intel_uncore *uncore; 294 char name[INTEL_ENGINE_CS_MAX_NAME]; 295 296 enum intel_engine_id id; 297 enum intel_engine_id legacy_idx; 298 299 unsigned int hw_id; 300 unsigned int guc_id; 301 302 intel_engine_mask_t mask; 303 304 u8 class; 305 u8 instance; 306 307 u16 uabi_class; 308 u16 uabi_instance; 309 310 u32 uabi_capabilities; 311 u32 context_size; 312 u32 mmio_base; 313 314 /* 315 * Some w/a require forcewake to be held (which prevents RC6) while 316 * a particular engine is active. If so, we set fw_domain to which 317 * domains need to be held for the duration of request activity, 318 * and 0 if none. We try to limit the duration of the hold as much 319 * as possible. 320 */ 321 enum forcewake_domains fw_domain; 322 unsigned int fw_active; 323 324 unsigned long context_tag; 325 326 struct rb_node uabi_node; 327 328 struct intel_sseu sseu; 329 330 struct { 331 spinlock_t lock; 332 struct list_head requests; 333 struct list_head hold; /* ready requests, but on hold */ 334 } active; 335 336 /* keep a request in reserve for a [pm] barrier under oom */ 337 struct i915_request *request_pool; 338 339 struct llist_head barrier_tasks; 340 341 struct intel_context *kernel_context; /* pinned */ 342 343 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 344 345 struct { 346 struct delayed_work work; 347 struct i915_request *systole; 348 unsigned long blocked; 349 } heartbeat; 350 351 unsigned long serial; 352 353 unsigned long wakeref_serial; 354 struct intel_wakeref wakeref; 355 struct file *default_state; 356 357 struct { 358 struct intel_ring *ring; 359 struct intel_timeline *timeline; 360 } legacy; 361 362 /* 363 * We track the average duration of the idle pulse on parking the 364 * engine to keep an estimate of the how the fast the engine is 365 * under ideal conditions. 366 */ 367 struct ewma__engine_latency latency; 368 369 /* Keep track of all the seqno used, a trail of breadcrumbs */ 370 struct intel_breadcrumbs *breadcrumbs; 371 372 struct intel_engine_pmu { 373 /** 374 * @enable: Bitmask of enable sample events on this engine. 375 * 376 * Bits correspond to sample event types, for instance 377 * I915_SAMPLE_QUEUED is bit 0 etc. 378 */ 379 u32 enable; 380 /** 381 * @enable_count: Reference count for the enabled samplers. 382 * 383 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 384 */ 385 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 386 /** 387 * @sample: Counter values for sampling events. 388 * 389 * Our internal timer stores the current counters in this field. 390 * 391 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 392 */ 393 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 394 } pmu; 395 396 struct intel_hw_status_page status_page; 397 struct i915_ctx_workarounds wa_ctx; 398 struct i915_wa_list ctx_wa_list; 399 struct i915_wa_list wa_list; 400 struct i915_wa_list whitelist; 401 402 u32 irq_keep_mask; /* always keep these interrupts */ 403 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 404 void (*irq_enable)(struct intel_engine_cs *engine); 405 void (*irq_disable)(struct intel_engine_cs *engine); 406 407 void (*sanitize)(struct intel_engine_cs *engine); 408 int (*resume)(struct intel_engine_cs *engine); 409 410 struct { 411 void (*prepare)(struct intel_engine_cs *engine); 412 413 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 414 void (*cancel)(struct intel_engine_cs *engine); 415 416 void (*finish)(struct intel_engine_cs *engine); 417 } reset; 418 419 void (*park)(struct intel_engine_cs *engine); 420 void (*unpark)(struct intel_engine_cs *engine); 421 422 void (*set_default_submission)(struct intel_engine_cs *engine); 423 424 const struct intel_context_ops *cops; 425 426 int (*request_alloc)(struct i915_request *rq); 427 428 int (*emit_flush)(struct i915_request *request, u32 mode); 429 #define EMIT_INVALIDATE BIT(0) 430 #define EMIT_FLUSH BIT(1) 431 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 432 int (*emit_bb_start)(struct i915_request *rq, 433 u64 offset, u32 length, 434 unsigned int dispatch_flags); 435 #define I915_DISPATCH_SECURE BIT(0) 436 #define I915_DISPATCH_PINNED BIT(1) 437 int (*emit_init_breadcrumb)(struct i915_request *rq); 438 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 439 u32 *cs); 440 unsigned int emit_fini_breadcrumb_dw; 441 442 /* Pass the request to the hardware queue (e.g. directly into 443 * the legacy ringbuffer or to the end of an execlist). 444 * 445 * This is called from an atomic context with irqs disabled; must 446 * be irq safe. 447 */ 448 void (*submit_request)(struct i915_request *rq); 449 450 /* 451 * Called on signaling of a SUBMIT_FENCE, passing along the signaling 452 * request down to the bonded pairs. 453 */ 454 void (*bond_execute)(struct i915_request *rq, 455 struct dma_fence *signal); 456 457 /* 458 * Call when the priority on a request has changed and it and its 459 * dependencies may need rescheduling. Note the request itself may 460 * not be ready to run! 461 */ 462 void (*schedule)(struct i915_request *request, 463 const struct i915_sched_attr *attr); 464 465 void (*release)(struct intel_engine_cs *engine); 466 467 struct intel_engine_execlists execlists; 468 469 /* 470 * Keep track of completed timelines on this engine for early 471 * retirement with the goal of quickly enabling powersaving as 472 * soon as the engine is idle. 473 */ 474 struct intel_timeline *retire; 475 struct work_struct retire_work; 476 477 /* status_notifier: list of callbacks for context-switch changes */ 478 struct atomic_notifier_head context_status_notifier; 479 480 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 481 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 482 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 483 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 484 #define I915_ENGINE_HAS_TIMESLICES BIT(4) 485 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) 486 #define I915_ENGINE_IS_VIRTUAL BIT(6) 487 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) 488 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) 489 unsigned int flags; 490 491 /* 492 * Table of commands the command parser needs to know about 493 * for this engine. 494 */ 495 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 496 497 /* 498 * Table of registers allowed in commands that read/write registers. 499 */ 500 const struct drm_i915_reg_table *reg_tables; 501 int reg_table_count; 502 503 /* 504 * Returns the bitmask for the length field of the specified command. 505 * Return 0 for an unrecognized/invalid command. 506 * 507 * If the command parser finds an entry for a command in the engine's 508 * cmd_tables, it gets the command's length based on the table entry. 509 * If not, it calls this function to determine the per-engine length 510 * field encoding for the command (i.e. different opcode ranges use 511 * certain bits to encode the command length in the header). 512 */ 513 u32 (*get_cmd_length_mask)(u32 cmd_header); 514 515 struct { 516 /** 517 * @active: Number of contexts currently scheduled in. 518 */ 519 unsigned int active; 520 521 /** 522 * @lock: Lock protecting the below fields. 523 */ 524 seqcount_t lock; 525 526 /** 527 * @total: Total time this engine was busy. 528 * 529 * Accumulated time not counting the most recent block in cases 530 * where engine is currently busy (active > 0). 531 */ 532 ktime_t total; 533 534 /** 535 * @start: Timestamp of the last idle to active transition. 536 * 537 * Idle is defined as active == 0, active is active > 0. 538 */ 539 ktime_t start; 540 541 /** 542 * @rps: Utilisation at last RPS sampling. 543 */ 544 ktime_t rps; 545 } stats; 546 547 struct { 548 unsigned long heartbeat_interval_ms; 549 unsigned long max_busywait_duration_ns; 550 unsigned long preempt_timeout_ms; 551 unsigned long stop_timeout_ms; 552 unsigned long timeslice_duration_ms; 553 } props, defaults; 554 555 I915_SELFTEST_DECLARE(struct fault_attr reset_timeout); 556 }; 557 558 static inline bool 559 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 560 { 561 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 562 } 563 564 static inline bool 565 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 566 { 567 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 568 } 569 570 static inline bool 571 intel_engine_supports_stats(const struct intel_engine_cs *engine) 572 { 573 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 574 } 575 576 static inline bool 577 intel_engine_has_preemption(const struct intel_engine_cs *engine) 578 { 579 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 580 } 581 582 static inline bool 583 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 584 { 585 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 586 } 587 588 static inline bool 589 intel_engine_has_timeslices(const struct intel_engine_cs *engine) 590 { 591 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 592 return false; 593 594 return engine->flags & I915_ENGINE_HAS_TIMESLICES; 595 } 596 597 static inline bool 598 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) 599 { 600 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; 601 } 602 603 static inline bool 604 intel_engine_is_virtual(const struct intel_engine_cs *engine) 605 { 606 return engine->flags & I915_ENGINE_IS_VIRTUAL; 607 } 608 609 static inline bool 610 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 611 { 612 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 613 } 614 615 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 616 ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 617 618 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 619 (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ 620 intel_sseu_has_subslice(sseu__, 0, subslice__)) 621 622 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 623 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 624 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 625 (slice_) += ((subslice_) == 0)) \ 626 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 627 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 628 subslice_))) 629 #endif /* __INTEL_ENGINE_TYPES_H__ */ 630