1caab277bSThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */ 27198e6b0SRob Clark /* 37198e6b0SRob Clark * Copyright (C) 2013 Red Hat 47198e6b0SRob Clark * Author: Rob Clark <robdclark@gmail.com> 57198e6b0SRob Clark */ 67198e6b0SRob Clark 77198e6b0SRob Clark #ifndef __MSM_GPU_H__ 87198e6b0SRob Clark #define __MSM_GPU_H__ 97198e6b0SRob Clark 109cba4056SRob Clark #include <linux/adreno-smmu-priv.h> 117198e6b0SRob Clark #include <linux/clk.h> 1278f815c1SChia-I Wu #include <linux/devfreq.h> 13fcf9d0b7SJordan Crouse #include <linux/interconnect.h> 141f60d114SSharat Masetty #include <linux/pm_opp.h> 157198e6b0SRob Clark #include <linux/regulator/consumer.h> 161f6cca40SAkhil P Oommen #include <linux/reset.h> 177198e6b0SRob Clark 187198e6b0SRob Clark #include "msm_drv.h" 19ca762a8aSRob Clark #include "msm_fence.h" 207198e6b0SRob Clark #include "msm_ringbuffer.h" 21604234f3SJordan Crouse #include "msm_gem.h" 227198e6b0SRob Clark 237198e6b0SRob Clark struct msm_gem_submit; 2470c70f09SRob Clark struct msm_gpu_perfcntr; 25e00e473dSJordan Crouse struct msm_gpu_state; 2610199333SRob Clark struct msm_file_private; 277198e6b0SRob Clark 285770fc7aSJordan Crouse struct msm_gpu_config { 295770fc7aSJordan Crouse const char *ioname; 30f97decacSJordan Crouse unsigned int nr_rings; 315770fc7aSJordan Crouse }; 325770fc7aSJordan Crouse 337198e6b0SRob Clark /* So far, with hardware that I've seen to date, we can have: 347198e6b0SRob Clark * + zero, one, or two z180 2d cores 357198e6b0SRob Clark * + a3xx or a2xx 3d core, which share a common CP (the firmware 367198e6b0SRob Clark * for the CP seems to implement some different PM4 packet types 377198e6b0SRob Clark * but the basics of cmdstream submission are the same) 387198e6b0SRob Clark * 397198e6b0SRob Clark * Which means that the eventual complete "class" hierarchy, once 407198e6b0SRob Clark * support for all past and present hw is in place, becomes: 417198e6b0SRob Clark * + msm_gpu 427198e6b0SRob Clark * + adreno_gpu 437198e6b0SRob Clark * + a3xx_gpu 447198e6b0SRob Clark * + a2xx_gpu 457198e6b0SRob Clark * + z180_gpu 467198e6b0SRob Clark */ 477198e6b0SRob Clark struct msm_gpu_funcs { 48f98f915bSRob Clark int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx, 494bfba716SRob Clark uint32_t param, uint64_t *value, uint32_t *len); 50f7ddbf55SRob Clark int (*set_param)(struct msm_gpu *gpu, struct msm_file_private *ctx, 514bfba716SRob Clark uint32_t param, uint64_t value, uint32_t len); 527198e6b0SRob Clark int (*hw_init)(struct msm_gpu *gpu); 537198e6b0SRob Clark int (*pm_suspend)(struct msm_gpu *gpu); 547198e6b0SRob Clark int (*pm_resume)(struct msm_gpu *gpu); 5515eb9ad0SJordan Crouse void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit); 56f97decacSJordan Crouse void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 577198e6b0SRob Clark irqreturn_t (*irq)(struct msm_gpu *irq); 58f97decacSJordan Crouse struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); 59bd6f82d8SRob Clark void (*recover)(struct msm_gpu *gpu); 607198e6b0SRob Clark void (*destroy)(struct msm_gpu *gpu); 61c878a628SArnd Bergmann #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 627198e6b0SRob Clark /* show GPU status in debugfs: */ 634f776f45SJordan Crouse void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state, 64c0fec7f5SJordan Crouse struct drm_printer *p); 65331dc0bcSRob Clark /* for generation specific debugfs: */ 667ce84471SWambui Karuga void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor); 677198e6b0SRob Clark #endif 686694482aSDouglas Anderson /* note: gpu_busy() can assume that we have been pm_resumed */ 6915c41198SChia-I Wu u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate); 70e00e473dSJordan Crouse struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); 71c0fec7f5SJordan Crouse int (*gpu_state_put)(struct msm_gpu_state *state); 72de0a3d09SSharat Masetty unsigned long (*gpu_get_freq)(struct msm_gpu *gpu); 736694482aSDouglas Anderson /* note: gpu_set_freq() can assume that we have been pm_resumed */ 746694482aSDouglas Anderson void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp, 756694482aSDouglas Anderson bool suspended); 76ccac7ce3SJordan Crouse struct msm_gem_address_space *(*create_address_space) 77ccac7ce3SJordan Crouse (struct msm_gpu *gpu, struct platform_device *pdev); 78933415e2SJordan Crouse struct msm_gem_address_space *(*create_private_address_space) 79933415e2SJordan Crouse (struct msm_gpu *gpu); 808907afb4SJordan Crouse uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 817198e6b0SRob Clark }; 827198e6b0SRob Clark 83e25e92e0SRob Clark /* Additional state for iommu faults: */ 84e25e92e0SRob Clark struct msm_gpu_fault_info { 85e25e92e0SRob Clark u64 ttbr0; 86e25e92e0SRob Clark unsigned long iova; 87e25e92e0SRob Clark int flags; 88e25e92e0SRob Clark const char *type; 89e25e92e0SRob Clark const char *block; 90e25e92e0SRob Clark }; 91e25e92e0SRob Clark 92af5b4fffSRob Clark /** 93af5b4fffSRob Clark * struct msm_gpu_devfreq - devfreq related state 94af5b4fffSRob Clark */ 95af5b4fffSRob Clark struct msm_gpu_devfreq { 96af5b4fffSRob Clark /** devfreq: devfreq instance */ 97af5b4fffSRob Clark struct devfreq *devfreq; 98af5b4fffSRob Clark 996694482aSDouglas Anderson /** lock: lock for "suspended", "busy_cycles", and "time" */ 1006694482aSDouglas Anderson struct mutex lock; 1016694482aSDouglas Anderson 102af5b4fffSRob Clark /** 1037c0ffcd4SRob Clark * idle_constraint: 1047c0ffcd4SRob Clark * 1057c0ffcd4SRob Clark * A PM QoS constraint to limit max freq while the GPU is idle. 1067c0ffcd4SRob Clark */ 1077c0ffcd4SRob Clark struct dev_pm_qos_request idle_freq; 1087c0ffcd4SRob Clark 1097c0ffcd4SRob Clark /** 1107c0ffcd4SRob Clark * boost_constraint: 1117c0ffcd4SRob Clark * 1127c0ffcd4SRob Clark * A PM QoS constraint to boost min freq for a period of time 1137c0ffcd4SRob Clark * until the boost expires. 1147c0ffcd4SRob Clark */ 1157c0ffcd4SRob Clark struct dev_pm_qos_request boost_freq; 1167c0ffcd4SRob Clark 1177c0ffcd4SRob Clark /** 11815c41198SChia-I Wu * busy_cycles: Last busy counter value, for calculating elapsed busy 11915c41198SChia-I Wu * cycles since last sampling period. 120af5b4fffSRob Clark */ 121af5b4fffSRob Clark u64 busy_cycles; 122af5b4fffSRob Clark 123af5b4fffSRob Clark /** time: Time of last sampling period. */ 124af5b4fffSRob Clark ktime_t time; 1259bc95570SRob Clark 1269bc95570SRob Clark /** idle_time: Time of last transition to idle: */ 1279bc95570SRob Clark ktime_t idle_time; 1289bc95570SRob Clark 12978f815c1SChia-I Wu struct devfreq_dev_status average_status; 13078f815c1SChia-I Wu 1319bc95570SRob Clark /** 132658f4c82SRob Clark * idle_work: 133658f4c82SRob Clark * 134658f4c82SRob Clark * Used to delay clamping to idle freq on active->idle transition. 135658f4c82SRob Clark */ 136658f4c82SRob Clark struct msm_hrtimer_work idle_work; 1377c0ffcd4SRob Clark 1387c0ffcd4SRob Clark /** 1397c0ffcd4SRob Clark * boost_work: 1407c0ffcd4SRob Clark * 1417c0ffcd4SRob Clark * Used to reset the boost_constraint after the boost period has 1427c0ffcd4SRob Clark * elapsed 1437c0ffcd4SRob Clark */ 1447c0ffcd4SRob Clark struct msm_hrtimer_work boost_work; 1456694482aSDouglas Anderson 1466694482aSDouglas Anderson /** suspended: tracks if we're suspended */ 1476694482aSDouglas Anderson bool suspended; 148af5b4fffSRob Clark }; 149af5b4fffSRob Clark 1507198e6b0SRob Clark struct msm_gpu { 1517198e6b0SRob Clark const char *name; 1527198e6b0SRob Clark struct drm_device *dev; 153eeb75474SRob Clark struct platform_device *pdev; 1547198e6b0SRob Clark const struct msm_gpu_funcs *funcs; 1557198e6b0SRob Clark 1569cba4056SRob Clark struct adreno_smmu_priv adreno_smmu; 1579cba4056SRob Clark 15870c70f09SRob Clark /* performance counters (hw & sw): */ 15970c70f09SRob Clark spinlock_t perf_lock; 16070c70f09SRob Clark bool perfcntr_active; 16170c70f09SRob Clark struct { 16270c70f09SRob Clark bool active; 16370c70f09SRob Clark ktime_t time; 16470c70f09SRob Clark } last_sample; 16570c70f09SRob Clark uint32_t totaltime, activetime; /* sw counters */ 16670c70f09SRob Clark uint32_t last_cntrs[5]; /* hw counters */ 16770c70f09SRob Clark const struct msm_gpu_perfcntr *perfcntrs; 16870c70f09SRob Clark uint32_t num_perfcntrs; 16970c70f09SRob Clark 170f97decacSJordan Crouse struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; 171f97decacSJordan Crouse int nr_rings; 1727198e6b0SRob Clark 1731d054c9bSRob Clark /** 17490f45c42SRob Clark * sysprof_active: 17590f45c42SRob Clark * 17690f45c42SRob Clark * The count of contexts that have enabled system profiling. 17790f45c42SRob Clark */ 17890f45c42SRob Clark refcount_t sysprof_active; 17990f45c42SRob Clark 18090f45c42SRob Clark /** 1811d054c9bSRob Clark * cur_ctx_seqno: 1821d054c9bSRob Clark * 1831d054c9bSRob Clark * The ctx->seqno value of the last context to submit rendering, 1841d054c9bSRob Clark * and the one with current pgtables installed (for generations 1851d054c9bSRob Clark * that support per-context pgtables). Tracked by seqno rather 1861d054c9bSRob Clark * than pointer value to avoid dangling pointers, and cases where 1871d054c9bSRob Clark * a ctx can be freed and a new one created with the same address. 1881d054c9bSRob Clark */ 1891d054c9bSRob Clark int cur_ctx_seqno; 1901d054c9bSRob Clark 1919bc95570SRob Clark /** 192c28e2f2bSRob Clark * lock: 193c28e2f2bSRob Clark * 194c28e2f2bSRob Clark * General lock for serializing all the gpu things. 195c28e2f2bSRob Clark * 196c28e2f2bSRob Clark * TODO move to per-ring locking where feasible (ie. submit/retire 197c28e2f2bSRob Clark * path, etc) 198c28e2f2bSRob Clark */ 199c28e2f2bSRob Clark struct mutex lock; 200c28e2f2bSRob Clark 201c28e2f2bSRob Clark /** 2029bc95570SRob Clark * active_submits: 2039bc95570SRob Clark * 2049bc95570SRob Clark * The number of submitted but not yet retired submits, used to 2059bc95570SRob Clark * determine transitions between active and idle. 2069bc95570SRob Clark * 207c28e2f2bSRob Clark * Protected by active_lock 2089bc95570SRob Clark */ 2099bc95570SRob Clark int active_submits; 2109bc95570SRob Clark 2119bc95570SRob Clark /** lock: protects active_submits and idle/active transitions */ 2129bc95570SRob Clark struct mutex active_lock; 2139bc95570SRob Clark 214eeb75474SRob Clark /* does gpu need hw_init? */ 215eeb75474SRob Clark bool needs_hw_init; 21637d77c3aSRob Clark 217bc211258SRob Clark /** 218bc211258SRob Clark * global_faults: number of GPU hangs not attributed to a particular 219bc211258SRob Clark * address space 220bc211258SRob Clark */ 22148dc4241SRob Clark int global_faults; 22248dc4241SRob Clark 2237198e6b0SRob Clark void __iomem *mmio; 2247198e6b0SRob Clark int irq; 2257198e6b0SRob Clark 226667ce33eSRob Clark struct msm_gem_address_space *aspace; 2277198e6b0SRob Clark 2287198e6b0SRob Clark /* Power Control: */ 2297198e6b0SRob Clark struct regulator *gpu_reg, *gpu_cx; 2308e54eea5SJordan Crouse struct clk_bulk_data *grp_clks; 23198db803fSJordan Crouse int nr_clocks; 23298db803fSJordan Crouse struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; 2331babd706SJordan Crouse uint32_t fast_rate; 234bd6f82d8SRob Clark 23537d77c3aSRob Clark /* Hang and Inactivity Detection: 23637d77c3aSRob Clark */ 23737d77c3aSRob Clark #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ 238eeb75474SRob Clark 2391d2fa58eSSamuel Iglesias Gonsalvez #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */ 240bd6f82d8SRob Clark struct timer_list hangcheck_timer; 2417e688294SRob Clark 242e25e92e0SRob Clark /* Fault info for most recent iova fault: */ 243e25e92e0SRob Clark struct msm_gpu_fault_info fault_info; 244e25e92e0SRob Clark 245e25e92e0SRob Clark /* work for handling GPU ioval faults: */ 246e25e92e0SRob Clark struct kthread_work fault_work; 247e25e92e0SRob Clark 2487e688294SRob Clark /* work for handling GPU recovery: */ 2497e688294SRob Clark struct kthread_work recover_work; 2507e688294SRob Clark 251167a668aSRob Clark /** retire_event: notified when submits are retired: */ 252167a668aSRob Clark wait_queue_head_t retire_event; 253167a668aSRob Clark 2547e688294SRob Clark /* work for handling active-list retiring: */ 2557e688294SRob Clark struct kthread_work retire_work; 2567e688294SRob Clark 2577e688294SRob Clark /* worker for retire/recover: */ 2587e688294SRob Clark struct kthread_worker *worker; 2591a370be9SRob Clark 260cd414f3dSJordan Crouse struct drm_gem_object *memptrs_bo; 261f91c14abSJordan Crouse 262af5b4fffSRob Clark struct msm_gpu_devfreq devfreq; 263c0fec7f5SJordan Crouse 2643ab1c5ccSRob Clark uint32_t suspend_count; 2653ab1c5ccSRob Clark 266c0fec7f5SJordan Crouse struct msm_gpu_state *crashstate; 2675ca6779dSRob Clark 2685ca6779dSRob Clark /* Enable clamping to idle freq when inactive: */ 2695ca6779dSRob Clark bool clamp_to_idle; 2705ca6779dSRob Clark 271604234f3SJordan Crouse /* True if the hardware supports expanded apriv (a650 and newer) */ 272604234f3SJordan Crouse bool hw_apriv; 273ec793cf0SAkhil P Oommen 274ec793cf0SAkhil P Oommen struct thermal_cooling_device *cooling; 2751f6cca40SAkhil P Oommen 2761f6cca40SAkhil P Oommen /* To poll for cx gdsc collapse during gpu recovery */ 2771f6cca40SAkhil P Oommen struct reset_control *cx_collapse; 2787198e6b0SRob Clark }; 2797198e6b0SRob Clark 28069a9313bSRob Clark static inline struct msm_gpu *dev_to_gpu(struct device *dev) 28169a9313bSRob Clark { 2829cba4056SRob Clark struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev); 283*76efc245SAkhil P Oommen 284*76efc245SAkhil P Oommen if (!adreno_smmu) 285*76efc245SAkhil P Oommen return NULL; 286*76efc245SAkhil P Oommen 2879cba4056SRob Clark return container_of(adreno_smmu, struct msm_gpu, adreno_smmu); 28869a9313bSRob Clark } 28969a9313bSRob Clark 290f97decacSJordan Crouse /* It turns out that all targets use the same ringbuffer size */ 291f97decacSJordan Crouse #define MSM_GPU_RINGBUFFER_SZ SZ_32K 2924d87fc32SJordan Crouse #define MSM_GPU_RINGBUFFER_BLKSIZE 32 2934d87fc32SJordan Crouse 2944d87fc32SJordan Crouse #define MSM_GPU_RB_CNTL_DEFAULT \ 2954d87fc32SJordan Crouse (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ 2964d87fc32SJordan Crouse AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) 297f97decacSJordan Crouse 29837d77c3aSRob Clark static inline bool msm_gpu_active(struct msm_gpu *gpu) 29937d77c3aSRob Clark { 300f97decacSJordan Crouse int i; 301f97decacSJordan Crouse 302f97decacSJordan Crouse for (i = 0; i < gpu->nr_rings; i++) { 303f97decacSJordan Crouse struct msm_ringbuffer *ring = gpu->rb[i]; 304f97decacSJordan Crouse 305f9d5355fSRob Clark if (fence_after(ring->fctx->last_fence, ring->memptrs->fence)) 306f97decacSJordan Crouse return true; 307f97decacSJordan Crouse } 308f97decacSJordan Crouse 309f97decacSJordan Crouse return false; 31037d77c3aSRob Clark } 31137d77c3aSRob Clark 31270c70f09SRob Clark /* Perf-Counters: 31370c70f09SRob Clark * The select_reg and select_val are just there for the benefit of the child 31470c70f09SRob Clark * class that actually enables the perf counter.. but msm_gpu base class 31570c70f09SRob Clark * will handle sampling/displaying the counters. 31670c70f09SRob Clark */ 31770c70f09SRob Clark 31870c70f09SRob Clark struct msm_gpu_perfcntr { 31970c70f09SRob Clark uint32_t select_reg; 32070c70f09SRob Clark uint32_t sample_reg; 32170c70f09SRob Clark uint32_t select_val; 32270c70f09SRob Clark const char *name; 32370c70f09SRob Clark }; 32470c70f09SRob Clark 325fc40e5e1SRob Clark /* 326fc40e5e1SRob Clark * The number of priority levels provided by drm gpu scheduler. The 327fc40e5e1SRob Clark * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some 328fc40e5e1SRob Clark * cases, so we don't use it (no need for kernel generated jobs). 329fc40e5e1SRob Clark */ 330fc40e5e1SRob Clark #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) 331fc40e5e1SRob Clark 332fc40e5e1SRob Clark /** 3334cd82aa3SRob Clark * struct msm_file_private - per-drm_file context 3344cd82aa3SRob Clark * 3354cd82aa3SRob Clark * @queuelock: synchronizes access to submitqueues list 3364cd82aa3SRob Clark * @submitqueues: list of &msm_gpu_submitqueue created by userspace 3374cd82aa3SRob Clark * @queueid: counter incremented each time a submitqueue is created, 3384cd82aa3SRob Clark * used to assign &msm_gpu_submitqueue.id 3394cd82aa3SRob Clark * @aspace: the per-process GPU address-space 3404cd82aa3SRob Clark * @ref: reference count 3414cd82aa3SRob Clark * @seqno: unique per process seqno 3424cd82aa3SRob Clark */ 3434cd82aa3SRob Clark struct msm_file_private { 3444cd82aa3SRob Clark rwlock_t queuelock; 3454cd82aa3SRob Clark struct list_head submitqueues; 3464cd82aa3SRob Clark int queueid; 3474cd82aa3SRob Clark struct msm_gem_address_space *aspace; 3484cd82aa3SRob Clark struct kref ref; 3494cd82aa3SRob Clark int seqno; 35068002469SRob Clark 35168002469SRob Clark /** 35290f45c42SRob Clark * sysprof: 35390f45c42SRob Clark * 35490f45c42SRob Clark * The value of MSM_PARAM_SYSPROF set by userspace. This is 35590f45c42SRob Clark * intended to be used by system profiling tools like Mesa's 35690f45c42SRob Clark * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN. 35790f45c42SRob Clark * 35890f45c42SRob Clark * Setting a value of 1 will preserve performance counters across 35990f45c42SRob Clark * context switches. Setting a value of 2 will in addition 36090f45c42SRob Clark * suppress suspend. (Performance counters lose state across 36190f45c42SRob Clark * power collapse, which is undesirable for profiling in some 36290f45c42SRob Clark * cases.) 36390f45c42SRob Clark * 36490f45c42SRob Clark * The value automatically reverts to zero when the drm device 36590f45c42SRob Clark * file is closed. 36690f45c42SRob Clark */ 36790f45c42SRob Clark int sysprof; 36890f45c42SRob Clark 369d4726d77SRob Clark /** comm: Overridden task comm, see MSM_PARAM_COMM */ 370d4726d77SRob Clark char *comm; 371d4726d77SRob Clark 372d4726d77SRob Clark /** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */ 373d4726d77SRob Clark char *cmdline; 374d4726d77SRob Clark 37590f45c42SRob Clark /** 376cfebe3fdSRob Clark * elapsed: 377cfebe3fdSRob Clark * 378cfebe3fdSRob Clark * The total (cumulative) elapsed time GPU was busy with rendering 379cfebe3fdSRob Clark * from this context in ns. 380cfebe3fdSRob Clark */ 381cfebe3fdSRob Clark uint64_t elapsed_ns; 382cfebe3fdSRob Clark 383cfebe3fdSRob Clark /** 384cfebe3fdSRob Clark * cycles: 385cfebe3fdSRob Clark * 386cfebe3fdSRob Clark * The total (cumulative) GPU cycles elapsed attributed to this 387cfebe3fdSRob Clark * context. 388cfebe3fdSRob Clark */ 389cfebe3fdSRob Clark uint64_t cycles; 390cfebe3fdSRob Clark 391cfebe3fdSRob Clark /** 39268002469SRob Clark * entities: 39368002469SRob Clark * 39468002469SRob Clark * Table of per-priority-level sched entities used by submitqueues 39568002469SRob Clark * associated with this &drm_file. Because some userspace apps 39668002469SRob Clark * make assumptions about rendering from multiple gl contexts 39768002469SRob Clark * (of the same priority) within the process happening in FIFO 39868002469SRob Clark * order without requiring any fencing beyond MakeCurrent(), we 39968002469SRob Clark * create at most one &drm_sched_entity per-process per-priority- 40068002469SRob Clark * level. 40168002469SRob Clark */ 40268002469SRob Clark struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS]; 4034cd82aa3SRob Clark }; 4044cd82aa3SRob Clark 4054cd82aa3SRob Clark /** 406fc40e5e1SRob Clark * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority 407fc40e5e1SRob Clark * 408fc40e5e1SRob Clark * @gpu: the gpu instance 409fc40e5e1SRob Clark * @prio: the userspace priority level 410fc40e5e1SRob Clark * @ring_nr: [out] the ringbuffer the userspace priority maps to 411fc40e5e1SRob Clark * @sched_prio: [out] the gpu scheduler priority level which the userspace 412fc40e5e1SRob Clark * priority maps to 413fc40e5e1SRob Clark * 414fc40e5e1SRob Clark * With drm/scheduler providing it's own level of prioritization, our total 415fc40e5e1SRob Clark * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES). 416fc40e5e1SRob Clark * Each ring is associated with it's own scheduler instance. However, our 417fc40e5e1SRob Clark * UABI is that lower numerical values are higher priority. So mapping the 418fc40e5e1SRob Clark * single userspace priority level into ring_nr and sched_prio takes some 419fc40e5e1SRob Clark * care. The userspace provided priority (when a submitqueue is created) 420fc40e5e1SRob Clark * is mapped to ring nr and scheduler priority as such: 421fc40e5e1SRob Clark * 422fc40e5e1SRob Clark * ring_nr = userspace_prio / NR_SCHED_PRIORITIES 423fc40e5e1SRob Clark * sched_prio = NR_SCHED_PRIORITIES - 424fc40e5e1SRob Clark * (userspace_prio % NR_SCHED_PRIORITIES) - 1 425fc40e5e1SRob Clark * 426fc40e5e1SRob Clark * This allows generations without preemption (nr_rings==1) to have some 427fc40e5e1SRob Clark * amount of prioritization, and provides more priority levels for gens 428fc40e5e1SRob Clark * that do have preemption. 429fc40e5e1SRob Clark */ 430fc40e5e1SRob Clark static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, 431fc40e5e1SRob Clark unsigned *ring_nr, enum drm_sched_priority *sched_prio) 432fc40e5e1SRob Clark { 433fc40e5e1SRob Clark unsigned rn, sp; 434fc40e5e1SRob Clark 435fc40e5e1SRob Clark rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp); 436fc40e5e1SRob Clark 437fc40e5e1SRob Clark /* invert sched priority to map to higher-numeric-is-higher- 438fc40e5e1SRob Clark * priority convention 439fc40e5e1SRob Clark */ 440fc40e5e1SRob Clark sp = NR_SCHED_PRIORITIES - sp - 1; 441fc40e5e1SRob Clark 442fc40e5e1SRob Clark if (rn >= gpu->nr_rings) 443fc40e5e1SRob Clark return -EINVAL; 444fc40e5e1SRob Clark 445fc40e5e1SRob Clark *ring_nr = rn; 446fc40e5e1SRob Clark *sched_prio = sp; 447fc40e5e1SRob Clark 448fc40e5e1SRob Clark return 0; 449fc40e5e1SRob Clark } 450fc40e5e1SRob Clark 451375f9a63SRob Clark /** 4524cd82aa3SRob Clark * struct msm_gpu_submitqueues - Userspace created context. 4534cd82aa3SRob Clark * 454375f9a63SRob Clark * A submitqueue is associated with a gl context or vk queue (or equiv) 455375f9a63SRob Clark * in userspace. 456375f9a63SRob Clark * 457375f9a63SRob Clark * @id: userspace id for the submitqueue, unique within the drm_file 458375f9a63SRob Clark * @flags: userspace flags for the submitqueue, specified at creation 459375f9a63SRob Clark * (currently unusued) 460fc40e5e1SRob Clark * @ring_nr: the ringbuffer used by this submitqueue, which is determined 461fc40e5e1SRob Clark * by the submitqueue's priority 462375f9a63SRob Clark * @faults: the number of GPU hangs associated with this submitqueue 463067ecab9SRob Clark * @last_fence: the sequence number of the last allocated fence (for error 464067ecab9SRob Clark * checking) 465375f9a63SRob Clark * @ctx: the per-drm_file context associated with the submitqueue (ie. 466375f9a63SRob Clark * which set of pgtables do submits jobs associated with the 467375f9a63SRob Clark * submitqueue use) 468375f9a63SRob Clark * @node: node in the context's list of submitqueues 469a61acbbeSRob Clark * @fence_idr: maps fence-id to dma_fence for userspace visible fence 470a61acbbeSRob Clark * seqno, protected by submitqueue lock 47105ba44b3SRob Clark * @idr_lock: for serializing access to fence_idr 47205ba44b3SRob Clark * @lock: submitqueue lock for serializing submits on a queue 473375f9a63SRob Clark * @ref: reference count 4741d8a5ca4SRob Clark * @entity: the submit job-queue 475375f9a63SRob Clark */ 476f7de1545SJordan Crouse struct msm_gpu_submitqueue { 477f7de1545SJordan Crouse int id; 478f7de1545SJordan Crouse u32 flags; 479fc40e5e1SRob Clark u32 ring_nr; 480f7de1545SJordan Crouse int faults; 481067ecab9SRob Clark uint32_t last_fence; 482cf655d61SJordan Crouse struct msm_file_private *ctx; 483f7de1545SJordan Crouse struct list_head node; 484a61acbbeSRob Clark struct idr fence_idr; 48505ba44b3SRob Clark struct mutex idr_lock; 486a61acbbeSRob Clark struct mutex lock; 487f7de1545SJordan Crouse struct kref ref; 48868002469SRob Clark struct drm_sched_entity *entity; 489f7de1545SJordan Crouse }; 490f7de1545SJordan Crouse 491cdb95931SJordan Crouse struct msm_gpu_state_bo { 492cdb95931SJordan Crouse u64 iova; 493cdb95931SJordan Crouse size_t size; 494cdb95931SJordan Crouse void *data; 4951df4289dSSharat Masetty bool encoded; 49618514c38SRob Clark char name[32]; 497cdb95931SJordan Crouse }; 498cdb95931SJordan Crouse 499e00e473dSJordan Crouse struct msm_gpu_state { 500c0fec7f5SJordan Crouse struct kref ref; 5013530a17fSArnd Bergmann struct timespec64 time; 502e00e473dSJordan Crouse 503e00e473dSJordan Crouse struct { 504e00e473dSJordan Crouse u64 iova; 505e00e473dSJordan Crouse u32 fence; 506e00e473dSJordan Crouse u32 seqno; 507e00e473dSJordan Crouse u32 rptr; 508e00e473dSJordan Crouse u32 wptr; 50943a56687SJordan Crouse void *data; 51043a56687SJordan Crouse int data_size; 5111df4289dSSharat Masetty bool encoded; 512e00e473dSJordan Crouse } ring[MSM_GPU_MAX_RINGS]; 513e00e473dSJordan Crouse 514e00e473dSJordan Crouse int nr_registers; 515e00e473dSJordan Crouse u32 *registers; 516e00e473dSJordan Crouse 517e00e473dSJordan Crouse u32 rbbm_status; 518c0fec7f5SJordan Crouse 519c0fec7f5SJordan Crouse char *comm; 520c0fec7f5SJordan Crouse char *cmd; 521cdb95931SJordan Crouse 522e25e92e0SRob Clark struct msm_gpu_fault_info fault_info; 523e25e92e0SRob Clark 524cdb95931SJordan Crouse int nr_bos; 525cdb95931SJordan Crouse struct msm_gpu_state_bo *bos; 526e00e473dSJordan Crouse }; 527e00e473dSJordan Crouse 5287198e6b0SRob Clark static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) 5297198e6b0SRob Clark { 5307198e6b0SRob Clark msm_writel(data, gpu->mmio + (reg << 2)); 5317198e6b0SRob Clark } 5327198e6b0SRob Clark 5337198e6b0SRob Clark static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) 5347198e6b0SRob Clark { 5357198e6b0SRob Clark return msm_readl(gpu->mmio + (reg << 2)); 5367198e6b0SRob Clark } 5377198e6b0SRob Clark 538ae53a829SJordan Crouse static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) 539ae53a829SJordan Crouse { 54040a72b0cSSharat Masetty msm_rmw(gpu->mmio + (reg << 2), mask, or); 541ae53a829SJordan Crouse } 542ae53a829SJordan Crouse 543ae53a829SJordan Crouse static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) 544ae53a829SJordan Crouse { 545ae53a829SJordan Crouse u64 val; 546ae53a829SJordan Crouse 547ae53a829SJordan Crouse /* 548ae53a829SJordan Crouse * Why not a readq here? Two reasons: 1) many of the LO registers are 549ae53a829SJordan Crouse * not quad word aligned and 2) the GPU hardware designers have a bit 550ae53a829SJordan Crouse * of a history of putting registers where they fit, especially in 551ae53a829SJordan Crouse * spins. The longer a GPU family goes the higher the chance that 552ae53a829SJordan Crouse * we'll get burned. We could do a series of validity checks if we 553ae53a829SJordan Crouse * wanted to, but really is a readq() that much better? Nah. 554ae53a829SJordan Crouse */ 555ae53a829SJordan Crouse 556ae53a829SJordan Crouse /* 557ae53a829SJordan Crouse * For some lo/hi registers (like perfcounters), the hi value is latched 558ae53a829SJordan Crouse * when the lo is read, so make sure to read the lo first to trigger 559ae53a829SJordan Crouse * that 560ae53a829SJordan Crouse */ 561ae53a829SJordan Crouse val = (u64) msm_readl(gpu->mmio + (lo << 2)); 562ae53a829SJordan Crouse val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32); 563ae53a829SJordan Crouse 564ae53a829SJordan Crouse return val; 565ae53a829SJordan Crouse } 566ae53a829SJordan Crouse 567ae53a829SJordan Crouse static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) 568ae53a829SJordan Crouse { 569ae53a829SJordan Crouse /* Why not a writeq here? Read the screed above */ 570ae53a829SJordan Crouse msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2)); 571ae53a829SJordan Crouse msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2)); 572ae53a829SJordan Crouse } 573ae53a829SJordan Crouse 5747198e6b0SRob Clark int msm_gpu_pm_suspend(struct msm_gpu *gpu); 5757198e6b0SRob Clark int msm_gpu_pm_resume(struct msm_gpu *gpu); 576af5b4fffSRob Clark 577cfebe3fdSRob Clark void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx, 578cfebe3fdSRob Clark struct drm_printer *p); 579cfebe3fdSRob Clark 5804cd82aa3SRob Clark int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); 5814cd82aa3SRob Clark struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, 5824cd82aa3SRob Clark u32 id); 5834cd82aa3SRob Clark int msm_submitqueue_create(struct drm_device *drm, 5844cd82aa3SRob Clark struct msm_file_private *ctx, 5854cd82aa3SRob Clark u32 prio, u32 flags, u32 *id); 5864cd82aa3SRob Clark int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx, 5874cd82aa3SRob Clark struct drm_msm_submitqueue_query *args); 5884cd82aa3SRob Clark int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); 5894cd82aa3SRob Clark void msm_submitqueue_close(struct msm_file_private *ctx); 5904cd82aa3SRob Clark 5914cd82aa3SRob Clark void msm_submitqueue_destroy(struct kref *kref); 5924cd82aa3SRob Clark 59390f45c42SRob Clark int msm_file_private_set_sysprof(struct msm_file_private *ctx, 59490f45c42SRob Clark struct msm_gpu *gpu, int sysprof); 59568002469SRob Clark void __msm_file_private_destroy(struct kref *kref); 5964cd82aa3SRob Clark 5974cd82aa3SRob Clark static inline void msm_file_private_put(struct msm_file_private *ctx) 5984cd82aa3SRob Clark { 5994cd82aa3SRob Clark kref_put(&ctx->ref, __msm_file_private_destroy); 6004cd82aa3SRob Clark } 6014cd82aa3SRob Clark 6024cd82aa3SRob Clark static inline struct msm_file_private *msm_file_private_get( 6034cd82aa3SRob Clark struct msm_file_private *ctx) 6044cd82aa3SRob Clark { 6054cd82aa3SRob Clark kref_get(&ctx->ref); 6064cd82aa3SRob Clark return ctx; 6074cd82aa3SRob Clark } 6084cd82aa3SRob Clark 609af5b4fffSRob Clark void msm_devfreq_init(struct msm_gpu *gpu); 610af5b4fffSRob Clark void msm_devfreq_cleanup(struct msm_gpu *gpu); 611af5b4fffSRob Clark void msm_devfreq_resume(struct msm_gpu *gpu); 612af5b4fffSRob Clark void msm_devfreq_suspend(struct msm_gpu *gpu); 6137c0ffcd4SRob Clark void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor); 6149bc95570SRob Clark void msm_devfreq_active(struct msm_gpu *gpu); 6159bc95570SRob Clark void msm_devfreq_idle(struct msm_gpu *gpu); 6167198e6b0SRob Clark 617eeb75474SRob Clark int msm_gpu_hw_init(struct msm_gpu *gpu); 618eeb75474SRob Clark 61970c70f09SRob Clark void msm_gpu_perfcntr_start(struct msm_gpu *gpu); 62070c70f09SRob Clark void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); 62170c70f09SRob Clark int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 62270c70f09SRob Clark uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); 62370c70f09SRob Clark 6247198e6b0SRob Clark void msm_gpu_retire(struct msm_gpu *gpu); 62515eb9ad0SJordan Crouse void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit); 6267198e6b0SRob Clark 6277198e6b0SRob Clark int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 6287198e6b0SRob Clark struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 6295770fc7aSJordan Crouse const char *name, struct msm_gpu_config *config); 6305770fc7aSJordan Crouse 631933415e2SJordan Crouse struct msm_gem_address_space * 63225faf2f2SRob Clark msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task); 633933415e2SJordan Crouse 6347198e6b0SRob Clark void msm_gpu_cleanup(struct msm_gpu *gpu); 6357198e6b0SRob Clark 636e2550b7aSRob Clark struct msm_gpu *adreno_load_gpu(struct drm_device *dev); 637bfd28b13SRob Clark void __init adreno_register(void); 638bfd28b13SRob Clark void __exit adreno_unregister(void); 6397198e6b0SRob Clark 640f7de1545SJordan Crouse static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) 641f7de1545SJordan Crouse { 642f7de1545SJordan Crouse if (queue) 643f7de1545SJordan Crouse kref_put(&queue->ref, msm_submitqueue_destroy); 644f7de1545SJordan Crouse } 645f7de1545SJordan Crouse 646c0fec7f5SJordan Crouse static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu) 647c0fec7f5SJordan Crouse { 648c0fec7f5SJordan Crouse struct msm_gpu_state *state = NULL; 649c0fec7f5SJordan Crouse 650c28e2f2bSRob Clark mutex_lock(&gpu->lock); 651c0fec7f5SJordan Crouse 652c0fec7f5SJordan Crouse if (gpu->crashstate) { 653c0fec7f5SJordan Crouse kref_get(&gpu->crashstate->ref); 654c0fec7f5SJordan Crouse state = gpu->crashstate; 655c0fec7f5SJordan Crouse } 656c0fec7f5SJordan Crouse 657c28e2f2bSRob Clark mutex_unlock(&gpu->lock); 658c0fec7f5SJordan Crouse 659c0fec7f5SJordan Crouse return state; 660c0fec7f5SJordan Crouse } 661c0fec7f5SJordan Crouse 662c0fec7f5SJordan Crouse static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) 663c0fec7f5SJordan Crouse { 664c28e2f2bSRob Clark mutex_lock(&gpu->lock); 665c0fec7f5SJordan Crouse 666c0fec7f5SJordan Crouse if (gpu->crashstate) { 667c0fec7f5SJordan Crouse if (gpu->funcs->gpu_state_put(gpu->crashstate)) 668c0fec7f5SJordan Crouse gpu->crashstate = NULL; 669c0fec7f5SJordan Crouse } 670c0fec7f5SJordan Crouse 671c28e2f2bSRob Clark mutex_unlock(&gpu->lock); 672c0fec7f5SJordan Crouse } 673c0fec7f5SJordan Crouse 674604234f3SJordan Crouse /* 675604234f3SJordan Crouse * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can 676604234f3SJordan Crouse * support expanded privileges 677604234f3SJordan Crouse */ 678604234f3SJordan Crouse #define check_apriv(gpu, flags) \ 679604234f3SJordan Crouse (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags)) 680604234f3SJordan Crouse 681604234f3SJordan Crouse 6827198e6b0SRob Clark #endif /* __MSM_GPU_H__ */ 683