1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2013 Red Hat 4 * Author: Rob Clark <robdclark@gmail.com> 5 */ 6 7 #ifndef __MSM_GPU_H__ 8 #define __MSM_GPU_H__ 9 10 #include <linux/adreno-smmu-priv.h> 11 #include <linux/clk.h> 12 #include <linux/interconnect.h> 13 #include <linux/pm_opp.h> 14 #include <linux/regulator/consumer.h> 15 16 #include "msm_drv.h" 17 #include "msm_fence.h" 18 #include "msm_ringbuffer.h" 19 #include "msm_gem.h" 20 21 struct msm_gem_submit; 22 struct msm_gpu_perfcntr; 23 struct msm_gpu_state; 24 25 struct msm_gpu_config { 26 const char *ioname; 27 unsigned int nr_rings; 28 }; 29 30 /* So far, with hardware that I've seen to date, we can have: 31 * + zero, one, or two z180 2d cores 32 * + a3xx or a2xx 3d core, which share a common CP (the firmware 33 * for the CP seems to implement some different PM4 packet types 34 * but the basics of cmdstream submission are the same) 35 * 36 * Which means that the eventual complete "class" hierarchy, once 37 * support for all past and present hw is in place, becomes: 38 * + msm_gpu 39 * + adreno_gpu 40 * + a3xx_gpu 41 * + a2xx_gpu 42 * + z180_gpu 43 */ 44 struct msm_gpu_funcs { 45 int (*get_param)(struct msm_gpu *gpu, uint32_t param, uint64_t *value); 46 int (*hw_init)(struct msm_gpu *gpu); 47 int (*pm_suspend)(struct msm_gpu *gpu); 48 int (*pm_resume)(struct msm_gpu *gpu); 49 void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit); 50 void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 51 irqreturn_t (*irq)(struct msm_gpu *irq); 52 struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); 53 void (*recover)(struct msm_gpu *gpu); 54 void (*destroy)(struct msm_gpu *gpu); 55 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 56 /* show GPU status in debugfs: */ 57 void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state, 58 struct drm_printer *p); 59 /* for generation specific debugfs: */ 60 void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor); 61 #endif 62 unsigned long (*gpu_busy)(struct msm_gpu *gpu); 63 struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); 64 int (*gpu_state_put)(struct msm_gpu_state *state); 65 unsigned long (*gpu_get_freq)(struct msm_gpu *gpu); 66 void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp); 67 struct msm_gem_address_space *(*create_address_space) 68 (struct msm_gpu *gpu, struct platform_device *pdev); 69 struct msm_gem_address_space *(*create_private_address_space) 70 (struct msm_gpu *gpu); 71 uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 72 }; 73 74 /* Additional state for iommu faults: */ 75 struct msm_gpu_fault_info { 76 u64 ttbr0; 77 unsigned long iova; 78 int flags; 79 const char *type; 80 const char *block; 81 }; 82 83 /** 84 * struct msm_gpu_devfreq - devfreq related state 85 */ 86 struct msm_gpu_devfreq { 87 /** devfreq: devfreq instance */ 88 struct devfreq *devfreq; 89 90 /** 91 * busy_cycles: 92 * 93 * Used by implementation of gpu->gpu_busy() to track the last 94 * busy counter value, for calculating elapsed busy cycles since 95 * last sampling period. 96 */ 97 u64 busy_cycles; 98 99 /** time: Time of last sampling period. */ 100 ktime_t time; 101 102 /** idle_time: Time of last transition to idle: */ 103 ktime_t idle_time; 104 105 /** 106 * idle_freq: 107 * 108 * Shadow frequency used while the GPU is idle. From the PoV of 109 * the devfreq governor, we are continuing to sample busyness and 110 * adjust frequency while the GPU is idle, but we use this shadow 111 * value as the GPU is actually clamped to minimum frequency while 112 * it is inactive. 113 */ 114 unsigned long idle_freq; 115 116 /** 117 * idle_work: 118 * 119 * Used to delay clamping to idle freq on active->idle transition. 120 */ 121 struct msm_hrtimer_work idle_work; 122 }; 123 124 struct msm_gpu { 125 const char *name; 126 struct drm_device *dev; 127 struct platform_device *pdev; 128 const struct msm_gpu_funcs *funcs; 129 130 struct adreno_smmu_priv adreno_smmu; 131 132 /* performance counters (hw & sw): */ 133 spinlock_t perf_lock; 134 bool perfcntr_active; 135 struct { 136 bool active; 137 ktime_t time; 138 } last_sample; 139 uint32_t totaltime, activetime; /* sw counters */ 140 uint32_t last_cntrs[5]; /* hw counters */ 141 const struct msm_gpu_perfcntr *perfcntrs; 142 uint32_t num_perfcntrs; 143 144 struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; 145 int nr_rings; 146 147 /* 148 * List of GEM active objects on this gpu. Protected by 149 * msm_drm_private::mm_lock 150 */ 151 struct list_head active_list; 152 153 /** 154 * active_submits: 155 * 156 * The number of submitted but not yet retired submits, used to 157 * determine transitions between active and idle. 158 * 159 * Protected by lock 160 */ 161 int active_submits; 162 163 /** lock: protects active_submits and idle/active transitions */ 164 struct mutex active_lock; 165 166 /* does gpu need hw_init? */ 167 bool needs_hw_init; 168 169 /* number of GPU hangs (for all contexts) */ 170 int global_faults; 171 172 void __iomem *mmio; 173 int irq; 174 175 struct msm_gem_address_space *aspace; 176 177 /* Power Control: */ 178 struct regulator *gpu_reg, *gpu_cx; 179 struct clk_bulk_data *grp_clks; 180 int nr_clocks; 181 struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; 182 uint32_t fast_rate; 183 184 /* Hang and Inactivity Detection: 185 */ 186 #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ 187 188 #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */ 189 struct timer_list hangcheck_timer; 190 191 /* Fault info for most recent iova fault: */ 192 struct msm_gpu_fault_info fault_info; 193 194 /* work for handling GPU ioval faults: */ 195 struct kthread_work fault_work; 196 197 /* work for handling GPU recovery: */ 198 struct kthread_work recover_work; 199 200 /* work for handling active-list retiring: */ 201 struct kthread_work retire_work; 202 203 /* worker for retire/recover: */ 204 struct kthread_worker *worker; 205 206 struct drm_gem_object *memptrs_bo; 207 208 struct msm_gpu_devfreq devfreq; 209 210 uint32_t suspend_count; 211 212 struct msm_gpu_state *crashstate; 213 214 /* Enable clamping to idle freq when inactive: */ 215 bool clamp_to_idle; 216 217 /* True if the hardware supports expanded apriv (a650 and newer) */ 218 bool hw_apriv; 219 220 struct thermal_cooling_device *cooling; 221 }; 222 223 static inline struct msm_gpu *dev_to_gpu(struct device *dev) 224 { 225 struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev); 226 return container_of(adreno_smmu, struct msm_gpu, adreno_smmu); 227 } 228 229 /* It turns out that all targets use the same ringbuffer size */ 230 #define MSM_GPU_RINGBUFFER_SZ SZ_32K 231 #define MSM_GPU_RINGBUFFER_BLKSIZE 32 232 233 #define MSM_GPU_RB_CNTL_DEFAULT \ 234 (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ 235 AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) 236 237 static inline bool msm_gpu_active(struct msm_gpu *gpu) 238 { 239 int i; 240 241 for (i = 0; i < gpu->nr_rings; i++) { 242 struct msm_ringbuffer *ring = gpu->rb[i]; 243 244 if (ring->seqno > ring->memptrs->fence) 245 return true; 246 } 247 248 return false; 249 } 250 251 /* Perf-Counters: 252 * The select_reg and select_val are just there for the benefit of the child 253 * class that actually enables the perf counter.. but msm_gpu base class 254 * will handle sampling/displaying the counters. 255 */ 256 257 struct msm_gpu_perfcntr { 258 uint32_t select_reg; 259 uint32_t sample_reg; 260 uint32_t select_val; 261 const char *name; 262 }; 263 264 /* 265 * The number of priority levels provided by drm gpu scheduler. The 266 * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some 267 * cases, so we don't use it (no need for kernel generated jobs). 268 */ 269 #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) 270 271 /** 272 * struct msm_file_private - per-drm_file context 273 * 274 * @queuelock: synchronizes access to submitqueues list 275 * @submitqueues: list of &msm_gpu_submitqueue created by userspace 276 * @queueid: counter incremented each time a submitqueue is created, 277 * used to assign &msm_gpu_submitqueue.id 278 * @aspace: the per-process GPU address-space 279 * @ref: reference count 280 * @seqno: unique per process seqno 281 */ 282 struct msm_file_private { 283 rwlock_t queuelock; 284 struct list_head submitqueues; 285 int queueid; 286 struct msm_gem_address_space *aspace; 287 struct kref ref; 288 int seqno; 289 290 /** 291 * entities: 292 * 293 * Table of per-priority-level sched entities used by submitqueues 294 * associated with this &drm_file. Because some userspace apps 295 * make assumptions about rendering from multiple gl contexts 296 * (of the same priority) within the process happening in FIFO 297 * order without requiring any fencing beyond MakeCurrent(), we 298 * create at most one &drm_sched_entity per-process per-priority- 299 * level. 300 */ 301 struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS]; 302 }; 303 304 /** 305 * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority 306 * 307 * @gpu: the gpu instance 308 * @prio: the userspace priority level 309 * @ring_nr: [out] the ringbuffer the userspace priority maps to 310 * @sched_prio: [out] the gpu scheduler priority level which the userspace 311 * priority maps to 312 * 313 * With drm/scheduler providing it's own level of prioritization, our total 314 * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES). 315 * Each ring is associated with it's own scheduler instance. However, our 316 * UABI is that lower numerical values are higher priority. So mapping the 317 * single userspace priority level into ring_nr and sched_prio takes some 318 * care. The userspace provided priority (when a submitqueue is created) 319 * is mapped to ring nr and scheduler priority as such: 320 * 321 * ring_nr = userspace_prio / NR_SCHED_PRIORITIES 322 * sched_prio = NR_SCHED_PRIORITIES - 323 * (userspace_prio % NR_SCHED_PRIORITIES) - 1 324 * 325 * This allows generations without preemption (nr_rings==1) to have some 326 * amount of prioritization, and provides more priority levels for gens 327 * that do have preemption. 328 */ 329 static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, 330 unsigned *ring_nr, enum drm_sched_priority *sched_prio) 331 { 332 unsigned rn, sp; 333 334 rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp); 335 336 /* invert sched priority to map to higher-numeric-is-higher- 337 * priority convention 338 */ 339 sp = NR_SCHED_PRIORITIES - sp - 1; 340 341 if (rn >= gpu->nr_rings) 342 return -EINVAL; 343 344 *ring_nr = rn; 345 *sched_prio = sp; 346 347 return 0; 348 } 349 350 /** 351 * struct msm_gpu_submitqueues - Userspace created context. 352 * 353 * A submitqueue is associated with a gl context or vk queue (or equiv) 354 * in userspace. 355 * 356 * @id: userspace id for the submitqueue, unique within the drm_file 357 * @flags: userspace flags for the submitqueue, specified at creation 358 * (currently unusued) 359 * @ring_nr: the ringbuffer used by this submitqueue, which is determined 360 * by the submitqueue's priority 361 * @faults: the number of GPU hangs associated with this submitqueue 362 * @ctx: the per-drm_file context associated with the submitqueue (ie. 363 * which set of pgtables do submits jobs associated with the 364 * submitqueue use) 365 * @node: node in the context's list of submitqueues 366 * @fence_idr: maps fence-id to dma_fence for userspace visible fence 367 * seqno, protected by submitqueue lock 368 * @lock: submitqueue lock 369 * @ref: reference count 370 * @entity: the submit job-queue 371 */ 372 struct msm_gpu_submitqueue { 373 int id; 374 u32 flags; 375 u32 ring_nr; 376 int faults; 377 struct msm_file_private *ctx; 378 struct list_head node; 379 struct idr fence_idr; 380 struct mutex lock; 381 struct kref ref; 382 struct drm_sched_entity *entity; 383 }; 384 385 struct msm_gpu_state_bo { 386 u64 iova; 387 size_t size; 388 void *data; 389 bool encoded; 390 }; 391 392 struct msm_gpu_state { 393 struct kref ref; 394 struct timespec64 time; 395 396 struct { 397 u64 iova; 398 u32 fence; 399 u32 seqno; 400 u32 rptr; 401 u32 wptr; 402 void *data; 403 int data_size; 404 bool encoded; 405 } ring[MSM_GPU_MAX_RINGS]; 406 407 int nr_registers; 408 u32 *registers; 409 410 u32 rbbm_status; 411 412 char *comm; 413 char *cmd; 414 415 struct msm_gpu_fault_info fault_info; 416 417 int nr_bos; 418 struct msm_gpu_state_bo *bos; 419 }; 420 421 static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) 422 { 423 msm_writel(data, gpu->mmio + (reg << 2)); 424 } 425 426 static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) 427 { 428 return msm_readl(gpu->mmio + (reg << 2)); 429 } 430 431 static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) 432 { 433 msm_rmw(gpu->mmio + (reg << 2), mask, or); 434 } 435 436 static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) 437 { 438 u64 val; 439 440 /* 441 * Why not a readq here? Two reasons: 1) many of the LO registers are 442 * not quad word aligned and 2) the GPU hardware designers have a bit 443 * of a history of putting registers where they fit, especially in 444 * spins. The longer a GPU family goes the higher the chance that 445 * we'll get burned. We could do a series of validity checks if we 446 * wanted to, but really is a readq() that much better? Nah. 447 */ 448 449 /* 450 * For some lo/hi registers (like perfcounters), the hi value is latched 451 * when the lo is read, so make sure to read the lo first to trigger 452 * that 453 */ 454 val = (u64) msm_readl(gpu->mmio + (lo << 2)); 455 val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32); 456 457 return val; 458 } 459 460 static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) 461 { 462 /* Why not a writeq here? Read the screed above */ 463 msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2)); 464 msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2)); 465 } 466 467 int msm_gpu_pm_suspend(struct msm_gpu *gpu); 468 int msm_gpu_pm_resume(struct msm_gpu *gpu); 469 470 int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); 471 struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, 472 u32 id); 473 int msm_submitqueue_create(struct drm_device *drm, 474 struct msm_file_private *ctx, 475 u32 prio, u32 flags, u32 *id); 476 int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx, 477 struct drm_msm_submitqueue_query *args); 478 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); 479 void msm_submitqueue_close(struct msm_file_private *ctx); 480 481 void msm_submitqueue_destroy(struct kref *kref); 482 483 void __msm_file_private_destroy(struct kref *kref); 484 485 static inline void msm_file_private_put(struct msm_file_private *ctx) 486 { 487 kref_put(&ctx->ref, __msm_file_private_destroy); 488 } 489 490 static inline struct msm_file_private *msm_file_private_get( 491 struct msm_file_private *ctx) 492 { 493 kref_get(&ctx->ref); 494 return ctx; 495 } 496 497 void msm_devfreq_init(struct msm_gpu *gpu); 498 void msm_devfreq_cleanup(struct msm_gpu *gpu); 499 void msm_devfreq_resume(struct msm_gpu *gpu); 500 void msm_devfreq_suspend(struct msm_gpu *gpu); 501 void msm_devfreq_active(struct msm_gpu *gpu); 502 void msm_devfreq_idle(struct msm_gpu *gpu); 503 504 int msm_gpu_hw_init(struct msm_gpu *gpu); 505 506 void msm_gpu_perfcntr_start(struct msm_gpu *gpu); 507 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); 508 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 509 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); 510 511 void msm_gpu_retire(struct msm_gpu *gpu); 512 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit); 513 514 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 515 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 516 const char *name, struct msm_gpu_config *config); 517 518 struct msm_gem_address_space * 519 msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task); 520 521 void msm_gpu_cleanup(struct msm_gpu *gpu); 522 523 struct msm_gpu *adreno_load_gpu(struct drm_device *dev); 524 void __init adreno_register(void); 525 void __exit adreno_unregister(void); 526 527 static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) 528 { 529 if (queue) 530 kref_put(&queue->ref, msm_submitqueue_destroy); 531 } 532 533 static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu) 534 { 535 struct msm_gpu_state *state = NULL; 536 537 mutex_lock(&gpu->dev->struct_mutex); 538 539 if (gpu->crashstate) { 540 kref_get(&gpu->crashstate->ref); 541 state = gpu->crashstate; 542 } 543 544 mutex_unlock(&gpu->dev->struct_mutex); 545 546 return state; 547 } 548 549 static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) 550 { 551 mutex_lock(&gpu->dev->struct_mutex); 552 553 if (gpu->crashstate) { 554 if (gpu->funcs->gpu_state_put(gpu->crashstate)) 555 gpu->crashstate = NULL; 556 } 557 558 mutex_unlock(&gpu->dev->struct_mutex); 559 } 560 561 /* 562 * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can 563 * support expanded privileges 564 */ 565 #define check_apriv(gpu, flags) \ 566 (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags)) 567 568 569 #endif /* __MSM_GPU_H__ */ 570