1caab277bSThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */
27198e6b0SRob Clark /*
37198e6b0SRob Clark * Copyright (C) 2013 Red Hat
47198e6b0SRob Clark * Author: Rob Clark <robdclark@gmail.com>
57198e6b0SRob Clark */
67198e6b0SRob Clark
77198e6b0SRob Clark #ifndef __MSM_GPU_H__
87198e6b0SRob Clark #define __MSM_GPU_H__
97198e6b0SRob Clark
109cba4056SRob Clark #include <linux/adreno-smmu-priv.h>
117198e6b0SRob Clark #include <linux/clk.h>
1278f815c1SChia-I Wu #include <linux/devfreq.h>
13fcf9d0b7SJordan Crouse #include <linux/interconnect.h>
141f60d114SSharat Masetty #include <linux/pm_opp.h>
157198e6b0SRob Clark #include <linux/regulator/consumer.h>
167198e6b0SRob Clark
177198e6b0SRob Clark #include "msm_drv.h"
18ca762a8aSRob Clark #include "msm_fence.h"
197198e6b0SRob Clark #include "msm_ringbuffer.h"
20604234f3SJordan Crouse #include "msm_gem.h"
217198e6b0SRob Clark
227198e6b0SRob Clark struct msm_gem_submit;
2370c70f09SRob Clark struct msm_gpu_perfcntr;
24e00e473dSJordan Crouse struct msm_gpu_state;
2510199333SRob Clark struct msm_file_private;
267198e6b0SRob Clark
275770fc7aSJordan Crouse struct msm_gpu_config {
285770fc7aSJordan Crouse const char *ioname;
29f97decacSJordan Crouse unsigned int nr_rings;
305770fc7aSJordan Crouse };
315770fc7aSJordan Crouse
327198e6b0SRob Clark /* So far, with hardware that I've seen to date, we can have:
337198e6b0SRob Clark * + zero, one, or two z180 2d cores
347198e6b0SRob Clark * + a3xx or a2xx 3d core, which share a common CP (the firmware
357198e6b0SRob Clark * for the CP seems to implement some different PM4 packet types
367198e6b0SRob Clark * but the basics of cmdstream submission are the same)
377198e6b0SRob Clark *
387198e6b0SRob Clark * Which means that the eventual complete "class" hierarchy, once
397198e6b0SRob Clark * support for all past and present hw is in place, becomes:
407198e6b0SRob Clark * + msm_gpu
417198e6b0SRob Clark * + adreno_gpu
427198e6b0SRob Clark * + a3xx_gpu
437198e6b0SRob Clark * + a2xx_gpu
447198e6b0SRob Clark * + z180_gpu
457198e6b0SRob Clark */
467198e6b0SRob Clark struct msm_gpu_funcs {
47f98f915bSRob Clark int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
484bfba716SRob Clark uint32_t param, uint64_t *value, uint32_t *len);
49f7ddbf55SRob Clark int (*set_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
504bfba716SRob Clark uint32_t param, uint64_t value, uint32_t len);
517198e6b0SRob Clark int (*hw_init)(struct msm_gpu *gpu);
528ead9678SRob Clark
538ead9678SRob Clark /**
548ead9678SRob Clark * @ucode_load: Optional hook to upload fw to GEM objs
558ead9678SRob Clark */
568ead9678SRob Clark int (*ucode_load)(struct msm_gpu *gpu);
578ead9678SRob Clark
587198e6b0SRob Clark int (*pm_suspend)(struct msm_gpu *gpu);
597198e6b0SRob Clark int (*pm_resume)(struct msm_gpu *gpu);
6015eb9ad0SJordan Crouse void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit);
61f97decacSJordan Crouse void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
627198e6b0SRob Clark irqreturn_t (*irq)(struct msm_gpu *irq);
63f97decacSJordan Crouse struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
64bd6f82d8SRob Clark void (*recover)(struct msm_gpu *gpu);
657198e6b0SRob Clark void (*destroy)(struct msm_gpu *gpu);
66c878a628SArnd Bergmann #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
677198e6b0SRob Clark /* show GPU status in debugfs: */
684f776f45SJordan Crouse void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state,
69c0fec7f5SJordan Crouse struct drm_printer *p);
70331dc0bcSRob Clark /* for generation specific debugfs: */
717ce84471SWambui Karuga void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
727198e6b0SRob Clark #endif
736694482aSDouglas Anderson /* note: gpu_busy() can assume that we have been pm_resumed */
7415c41198SChia-I Wu u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate);
75e00e473dSJordan Crouse struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
76c0fec7f5SJordan Crouse int (*gpu_state_put)(struct msm_gpu_state *state);
77de0a3d09SSharat Masetty unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
786694482aSDouglas Anderson /* note: gpu_set_freq() can assume that we have been pm_resumed */
796694482aSDouglas Anderson void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp,
806694482aSDouglas Anderson bool suspended);
81ccac7ce3SJordan Crouse struct msm_gem_address_space *(*create_address_space)
82ccac7ce3SJordan Crouse (struct msm_gpu *gpu, struct platform_device *pdev);
83933415e2SJordan Crouse struct msm_gem_address_space *(*create_private_address_space)
84933415e2SJordan Crouse (struct msm_gpu *gpu);
858907afb4SJordan Crouse uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
86d73b1d02SRob Clark
87d73b1d02SRob Clark /**
88d73b1d02SRob Clark * progress: Has the GPU made progress?
89d73b1d02SRob Clark *
90d73b1d02SRob Clark * Return true if GPU position in cmdstream has advanced (or changed)
91d73b1d02SRob Clark * since the last call. To avoid false negatives, this should account
92d73b1d02SRob Clark * for cmdstream that is buffered in this FIFO upstream of the CP fw.
93d73b1d02SRob Clark */
94d73b1d02SRob Clark bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
957198e6b0SRob Clark };
967198e6b0SRob Clark
97e25e92e0SRob Clark /* Additional state for iommu faults: */
98e25e92e0SRob Clark struct msm_gpu_fault_info {
99e25e92e0SRob Clark u64 ttbr0;
100e25e92e0SRob Clark unsigned long iova;
101e25e92e0SRob Clark int flags;
102e25e92e0SRob Clark const char *type;
103e25e92e0SRob Clark const char *block;
104e25e92e0SRob Clark };
105e25e92e0SRob Clark
106af5b4fffSRob Clark /**
107af5b4fffSRob Clark * struct msm_gpu_devfreq - devfreq related state
108af5b4fffSRob Clark */
109af5b4fffSRob Clark struct msm_gpu_devfreq {
110af5b4fffSRob Clark /** devfreq: devfreq instance */
111af5b4fffSRob Clark struct devfreq *devfreq;
112af5b4fffSRob Clark
1136694482aSDouglas Anderson /** lock: lock for "suspended", "busy_cycles", and "time" */
1146694482aSDouglas Anderson struct mutex lock;
1156694482aSDouglas Anderson
116af5b4fffSRob Clark /**
117fadcc3abSRob Clark * idle_freq:
1187c0ffcd4SRob Clark *
119fadcc3abSRob Clark * Shadow frequency used while the GPU is idle. From the PoV of
120fadcc3abSRob Clark * the devfreq governor, we are continuing to sample busyness and
121fadcc3abSRob Clark * adjust frequency while the GPU is idle, but we use this shadow
122fadcc3abSRob Clark * value as the GPU is actually clamped to minimum frequency while
123fadcc3abSRob Clark * it is inactive.
1247c0ffcd4SRob Clark */
125fadcc3abSRob Clark unsigned long idle_freq;
1267c0ffcd4SRob Clark
1277c0ffcd4SRob Clark /**
1287c0ffcd4SRob Clark * boost_constraint:
1297c0ffcd4SRob Clark *
1307c0ffcd4SRob Clark * A PM QoS constraint to boost min freq for a period of time
1317c0ffcd4SRob Clark * until the boost expires.
1327c0ffcd4SRob Clark */
1337c0ffcd4SRob Clark struct dev_pm_qos_request boost_freq;
1347c0ffcd4SRob Clark
1357c0ffcd4SRob Clark /**
13615c41198SChia-I Wu * busy_cycles: Last busy counter value, for calculating elapsed busy
13715c41198SChia-I Wu * cycles since last sampling period.
138af5b4fffSRob Clark */
139af5b4fffSRob Clark u64 busy_cycles;
140af5b4fffSRob Clark
141af5b4fffSRob Clark /** time: Time of last sampling period. */
142af5b4fffSRob Clark ktime_t time;
1439bc95570SRob Clark
1449bc95570SRob Clark /** idle_time: Time of last transition to idle: */
1459bc95570SRob Clark ktime_t idle_time;
1469bc95570SRob Clark
1479bc95570SRob Clark /**
148658f4c82SRob Clark * idle_work:
149658f4c82SRob Clark *
150658f4c82SRob Clark * Used to delay clamping to idle freq on active->idle transition.
151658f4c82SRob Clark */
152658f4c82SRob Clark struct msm_hrtimer_work idle_work;
1537c0ffcd4SRob Clark
1547c0ffcd4SRob Clark /**
1557c0ffcd4SRob Clark * boost_work:
1567c0ffcd4SRob Clark *
1577c0ffcd4SRob Clark * Used to reset the boost_constraint after the boost period has
1587c0ffcd4SRob Clark * elapsed
1597c0ffcd4SRob Clark */
1607c0ffcd4SRob Clark struct msm_hrtimer_work boost_work;
1616694482aSDouglas Anderson
1626694482aSDouglas Anderson /** suspended: tracks if we're suspended */
1636694482aSDouglas Anderson bool suspended;
164af5b4fffSRob Clark };
165af5b4fffSRob Clark
1667198e6b0SRob Clark struct msm_gpu {
1677198e6b0SRob Clark const char *name;
1687198e6b0SRob Clark struct drm_device *dev;
169eeb75474SRob Clark struct platform_device *pdev;
1707198e6b0SRob Clark const struct msm_gpu_funcs *funcs;
1717198e6b0SRob Clark
1729cba4056SRob Clark struct adreno_smmu_priv adreno_smmu;
1739cba4056SRob Clark
17470c70f09SRob Clark /* performance counters (hw & sw): */
17570c70f09SRob Clark spinlock_t perf_lock;
17670c70f09SRob Clark bool perfcntr_active;
17770c70f09SRob Clark struct {
17870c70f09SRob Clark bool active;
17970c70f09SRob Clark ktime_t time;
18070c70f09SRob Clark } last_sample;
18170c70f09SRob Clark uint32_t totaltime, activetime; /* sw counters */
18270c70f09SRob Clark uint32_t last_cntrs[5]; /* hw counters */
18370c70f09SRob Clark const struct msm_gpu_perfcntr *perfcntrs;
18470c70f09SRob Clark uint32_t num_perfcntrs;
18570c70f09SRob Clark
186f97decacSJordan Crouse struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
187f97decacSJordan Crouse int nr_rings;
1887198e6b0SRob Clark
1891d054c9bSRob Clark /**
19090f45c42SRob Clark * sysprof_active:
19190f45c42SRob Clark *
19290f45c42SRob Clark * The count of contexts that have enabled system profiling.
19390f45c42SRob Clark */
19490f45c42SRob Clark refcount_t sysprof_active;
19590f45c42SRob Clark
19690f45c42SRob Clark /**
1971d054c9bSRob Clark * cur_ctx_seqno:
1981d054c9bSRob Clark *
1991d054c9bSRob Clark * The ctx->seqno value of the last context to submit rendering,
2001d054c9bSRob Clark * and the one with current pgtables installed (for generations
2011d054c9bSRob Clark * that support per-context pgtables). Tracked by seqno rather
2021d054c9bSRob Clark * than pointer value to avoid dangling pointers, and cases where
2031d054c9bSRob Clark * a ctx can be freed and a new one created with the same address.
2041d054c9bSRob Clark */
2051d054c9bSRob Clark int cur_ctx_seqno;
2061d054c9bSRob Clark
2079bc95570SRob Clark /**
208c28e2f2bSRob Clark * lock:
209c28e2f2bSRob Clark *
210c28e2f2bSRob Clark * General lock for serializing all the gpu things.
211c28e2f2bSRob Clark *
212c28e2f2bSRob Clark * TODO move to per-ring locking where feasible (ie. submit/retire
213c28e2f2bSRob Clark * path, etc)
214c28e2f2bSRob Clark */
215c28e2f2bSRob Clark struct mutex lock;
216c28e2f2bSRob Clark
217c28e2f2bSRob Clark /**
2189bc95570SRob Clark * active_submits:
2199bc95570SRob Clark *
2209bc95570SRob Clark * The number of submitted but not yet retired submits, used to
2219bc95570SRob Clark * determine transitions between active and idle.
2229bc95570SRob Clark *
223c28e2f2bSRob Clark * Protected by active_lock
2249bc95570SRob Clark */
2259bc95570SRob Clark int active_submits;
2269bc95570SRob Clark
2279bc95570SRob Clark /** lock: protects active_submits and idle/active transitions */
2289bc95570SRob Clark struct mutex active_lock;
2299bc95570SRob Clark
230eeb75474SRob Clark /* does gpu need hw_init? */
231eeb75474SRob Clark bool needs_hw_init;
23237d77c3aSRob Clark
233bc211258SRob Clark /**
234bc211258SRob Clark * global_faults: number of GPU hangs not attributed to a particular
235bc211258SRob Clark * address space
236bc211258SRob Clark */
23748dc4241SRob Clark int global_faults;
23848dc4241SRob Clark
2397198e6b0SRob Clark void __iomem *mmio;
2407198e6b0SRob Clark int irq;
2417198e6b0SRob Clark
242667ce33eSRob Clark struct msm_gem_address_space *aspace;
2437198e6b0SRob Clark
2447198e6b0SRob Clark /* Power Control: */
2457198e6b0SRob Clark struct regulator *gpu_reg, *gpu_cx;
2468e54eea5SJordan Crouse struct clk_bulk_data *grp_clks;
24798db803fSJordan Crouse int nr_clocks;
24898db803fSJordan Crouse struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
2491babd706SJordan Crouse uint32_t fast_rate;
250bd6f82d8SRob Clark
25137d77c3aSRob Clark /* Hang and Inactivity Detection:
25237d77c3aSRob Clark */
25337d77c3aSRob Clark #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */
254eeb75474SRob Clark
2551d2fa58eSSamuel Iglesias Gonsalvez #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
256d73b1d02SRob Clark #define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
257bd6f82d8SRob Clark struct timer_list hangcheck_timer;
2587e688294SRob Clark
259e25e92e0SRob Clark /* Fault info for most recent iova fault: */
260e25e92e0SRob Clark struct msm_gpu_fault_info fault_info;
261e25e92e0SRob Clark
262e25e92e0SRob Clark /* work for handling GPU ioval faults: */
263e25e92e0SRob Clark struct kthread_work fault_work;
264e25e92e0SRob Clark
2657e688294SRob Clark /* work for handling GPU recovery: */
2667e688294SRob Clark struct kthread_work recover_work;
2677e688294SRob Clark
268167a668aSRob Clark /** retire_event: notified when submits are retired: */
269167a668aSRob Clark wait_queue_head_t retire_event;
270167a668aSRob Clark
2717e688294SRob Clark /* work for handling active-list retiring: */
2727e688294SRob Clark struct kthread_work retire_work;
2737e688294SRob Clark
2747e688294SRob Clark /* worker for retire/recover: */
2757e688294SRob Clark struct kthread_worker *worker;
2761a370be9SRob Clark
277cd414f3dSJordan Crouse struct drm_gem_object *memptrs_bo;
278f91c14abSJordan Crouse
279af5b4fffSRob Clark struct msm_gpu_devfreq devfreq;
280c0fec7f5SJordan Crouse
2813ab1c5ccSRob Clark uint32_t suspend_count;
2823ab1c5ccSRob Clark
283c0fec7f5SJordan Crouse struct msm_gpu_state *crashstate;
2845ca6779dSRob Clark
285604234f3SJordan Crouse /* True if the hardware supports expanded apriv (a650 and newer) */
286604234f3SJordan Crouse bool hw_apriv;
287ec793cf0SAkhil P Oommen
288*42854f8dSRob Clark /**
289*42854f8dSRob Clark * @allow_relocs: allow relocs in SUBMIT ioctl
290*42854f8dSRob Clark *
291*42854f8dSRob Clark * Mesa won't use relocs for driver version 1.4.0 and later. This
292*42854f8dSRob Clark * switch-over happened early enough in mesa a6xx bringup that we
293*42854f8dSRob Clark * can disallow relocs for a6xx and newer.
294*42854f8dSRob Clark */
295*42854f8dSRob Clark bool allow_relocs;
296*42854f8dSRob Clark
297ec793cf0SAkhil P Oommen struct thermal_cooling_device *cooling;
2987198e6b0SRob Clark };
2997198e6b0SRob Clark
dev_to_gpu(struct device * dev)30069a9313bSRob Clark static inline struct msm_gpu *dev_to_gpu(struct device *dev)
30169a9313bSRob Clark {
3029cba4056SRob Clark struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
30376efc245SAkhil P Oommen
30476efc245SAkhil P Oommen if (!adreno_smmu)
30576efc245SAkhil P Oommen return NULL;
30676efc245SAkhil P Oommen
3079cba4056SRob Clark return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
30869a9313bSRob Clark }
30969a9313bSRob Clark
310f97decacSJordan Crouse /* It turns out that all targets use the same ringbuffer size */
311f97decacSJordan Crouse #define MSM_GPU_RINGBUFFER_SZ SZ_32K
3124d87fc32SJordan Crouse #define MSM_GPU_RINGBUFFER_BLKSIZE 32
3134d87fc32SJordan Crouse
3144d87fc32SJordan Crouse #define MSM_GPU_RB_CNTL_DEFAULT \
3154d87fc32SJordan Crouse (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \
3164d87fc32SJordan Crouse AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
317f97decacSJordan Crouse
msm_gpu_active(struct msm_gpu * gpu)31837d77c3aSRob Clark static inline bool msm_gpu_active(struct msm_gpu *gpu)
31937d77c3aSRob Clark {
320f97decacSJordan Crouse int i;
321f97decacSJordan Crouse
322f97decacSJordan Crouse for (i = 0; i < gpu->nr_rings; i++) {
323f97decacSJordan Crouse struct msm_ringbuffer *ring = gpu->rb[i];
324f97decacSJordan Crouse
325f9d5355fSRob Clark if (fence_after(ring->fctx->last_fence, ring->memptrs->fence))
326f97decacSJordan Crouse return true;
327f97decacSJordan Crouse }
328f97decacSJordan Crouse
329f97decacSJordan Crouse return false;
33037d77c3aSRob Clark }
33137d77c3aSRob Clark
33270c70f09SRob Clark /* Perf-Counters:
33370c70f09SRob Clark * The select_reg and select_val are just there for the benefit of the child
33470c70f09SRob Clark * class that actually enables the perf counter.. but msm_gpu base class
33570c70f09SRob Clark * will handle sampling/displaying the counters.
33670c70f09SRob Clark */
33770c70f09SRob Clark
33870c70f09SRob Clark struct msm_gpu_perfcntr {
33970c70f09SRob Clark uint32_t select_reg;
34070c70f09SRob Clark uint32_t sample_reg;
34170c70f09SRob Clark uint32_t select_val;
34270c70f09SRob Clark const char *name;
34370c70f09SRob Clark };
34470c70f09SRob Clark
345fc40e5e1SRob Clark /*
346fc40e5e1SRob Clark * The number of priority levels provided by drm gpu scheduler. The
347fc40e5e1SRob Clark * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
348fc40e5e1SRob Clark * cases, so we don't use it (no need for kernel generated jobs).
349fc40e5e1SRob Clark */
350fc40e5e1SRob Clark #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN)
351fc40e5e1SRob Clark
352fc40e5e1SRob Clark /**
3534cd82aa3SRob Clark * struct msm_file_private - per-drm_file context
3544cd82aa3SRob Clark *
3554cd82aa3SRob Clark * @queuelock: synchronizes access to submitqueues list
3564cd82aa3SRob Clark * @submitqueues: list of &msm_gpu_submitqueue created by userspace
3574cd82aa3SRob Clark * @queueid: counter incremented each time a submitqueue is created,
3584cd82aa3SRob Clark * used to assign &msm_gpu_submitqueue.id
3594cd82aa3SRob Clark * @aspace: the per-process GPU address-space
3604cd82aa3SRob Clark * @ref: reference count
3614cd82aa3SRob Clark * @seqno: unique per process seqno
3624cd82aa3SRob Clark */
3634cd82aa3SRob Clark struct msm_file_private {
3644cd82aa3SRob Clark rwlock_t queuelock;
3654cd82aa3SRob Clark struct list_head submitqueues;
3664cd82aa3SRob Clark int queueid;
3674cd82aa3SRob Clark struct msm_gem_address_space *aspace;
3684cd82aa3SRob Clark struct kref ref;
3694cd82aa3SRob Clark int seqno;
37068002469SRob Clark
37168002469SRob Clark /**
37290f45c42SRob Clark * sysprof:
37390f45c42SRob Clark *
37490f45c42SRob Clark * The value of MSM_PARAM_SYSPROF set by userspace. This is
37590f45c42SRob Clark * intended to be used by system profiling tools like Mesa's
37690f45c42SRob Clark * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN.
37790f45c42SRob Clark *
37890f45c42SRob Clark * Setting a value of 1 will preserve performance counters across
37990f45c42SRob Clark * context switches. Setting a value of 2 will in addition
38090f45c42SRob Clark * suppress suspend. (Performance counters lose state across
38190f45c42SRob Clark * power collapse, which is undesirable for profiling in some
38290f45c42SRob Clark * cases.)
38390f45c42SRob Clark *
38490f45c42SRob Clark * The value automatically reverts to zero when the drm device
38590f45c42SRob Clark * file is closed.
38690f45c42SRob Clark */
38790f45c42SRob Clark int sysprof;
38890f45c42SRob Clark
389a66f1efcSRob Clark /**
390a66f1efcSRob Clark * comm: Overridden task comm, see MSM_PARAM_COMM
391a66f1efcSRob Clark *
392a66f1efcSRob Clark * Accessed under msm_gpu::lock
393a66f1efcSRob Clark */
394d4726d77SRob Clark char *comm;
395d4726d77SRob Clark
396a66f1efcSRob Clark /**
397a66f1efcSRob Clark * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
398a66f1efcSRob Clark *
399a66f1efcSRob Clark * Accessed under msm_gpu::lock
400a66f1efcSRob Clark */
401d4726d77SRob Clark char *cmdline;
402d4726d77SRob Clark
40390f45c42SRob Clark /**
404cfebe3fdSRob Clark * elapsed:
405cfebe3fdSRob Clark *
406cfebe3fdSRob Clark * The total (cumulative) elapsed time GPU was busy with rendering
407cfebe3fdSRob Clark * from this context in ns.
408cfebe3fdSRob Clark */
409cfebe3fdSRob Clark uint64_t elapsed_ns;
410cfebe3fdSRob Clark
411cfebe3fdSRob Clark /**
412cfebe3fdSRob Clark * cycles:
413cfebe3fdSRob Clark *
414cfebe3fdSRob Clark * The total (cumulative) GPU cycles elapsed attributed to this
415cfebe3fdSRob Clark * context.
416cfebe3fdSRob Clark */
417cfebe3fdSRob Clark uint64_t cycles;
418cfebe3fdSRob Clark
419cfebe3fdSRob Clark /**
42068002469SRob Clark * entities:
42168002469SRob Clark *
42268002469SRob Clark * Table of per-priority-level sched entities used by submitqueues
42368002469SRob Clark * associated with this &drm_file. Because some userspace apps
42468002469SRob Clark * make assumptions about rendering from multiple gl contexts
42568002469SRob Clark * (of the same priority) within the process happening in FIFO
42668002469SRob Clark * order without requiring any fencing beyond MakeCurrent(), we
42768002469SRob Clark * create at most one &drm_sched_entity per-process per-priority-
42868002469SRob Clark * level.
42968002469SRob Clark */
43068002469SRob Clark struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS];
4314cd82aa3SRob Clark };
4324cd82aa3SRob Clark
4334cd82aa3SRob Clark /**
434fc40e5e1SRob Clark * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority
435fc40e5e1SRob Clark *
436fc40e5e1SRob Clark * @gpu: the gpu instance
437fc40e5e1SRob Clark * @prio: the userspace priority level
438fc40e5e1SRob Clark * @ring_nr: [out] the ringbuffer the userspace priority maps to
439fc40e5e1SRob Clark * @sched_prio: [out] the gpu scheduler priority level which the userspace
440fc40e5e1SRob Clark * priority maps to
441fc40e5e1SRob Clark *
442fc40e5e1SRob Clark * With drm/scheduler providing it's own level of prioritization, our total
443fc40e5e1SRob Clark * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES).
444fc40e5e1SRob Clark * Each ring is associated with it's own scheduler instance. However, our
445fc40e5e1SRob Clark * UABI is that lower numerical values are higher priority. So mapping the
446fc40e5e1SRob Clark * single userspace priority level into ring_nr and sched_prio takes some
447fc40e5e1SRob Clark * care. The userspace provided priority (when a submitqueue is created)
448fc40e5e1SRob Clark * is mapped to ring nr and scheduler priority as such:
449fc40e5e1SRob Clark *
450fc40e5e1SRob Clark * ring_nr = userspace_prio / NR_SCHED_PRIORITIES
451fc40e5e1SRob Clark * sched_prio = NR_SCHED_PRIORITIES -
452fc40e5e1SRob Clark * (userspace_prio % NR_SCHED_PRIORITIES) - 1
453fc40e5e1SRob Clark *
454fc40e5e1SRob Clark * This allows generations without preemption (nr_rings==1) to have some
455fc40e5e1SRob Clark * amount of prioritization, and provides more priority levels for gens
456fc40e5e1SRob Clark * that do have preemption.
457fc40e5e1SRob Clark */
msm_gpu_convert_priority(struct msm_gpu * gpu,int prio,unsigned * ring_nr,enum drm_sched_priority * sched_prio)458fc40e5e1SRob Clark static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
459fc40e5e1SRob Clark unsigned *ring_nr, enum drm_sched_priority *sched_prio)
460fc40e5e1SRob Clark {
461fc40e5e1SRob Clark unsigned rn, sp;
462fc40e5e1SRob Clark
463fc40e5e1SRob Clark rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp);
464fc40e5e1SRob Clark
465fc40e5e1SRob Clark /* invert sched priority to map to higher-numeric-is-higher-
466fc40e5e1SRob Clark * priority convention
467fc40e5e1SRob Clark */
468fc40e5e1SRob Clark sp = NR_SCHED_PRIORITIES - sp - 1;
469fc40e5e1SRob Clark
470fc40e5e1SRob Clark if (rn >= gpu->nr_rings)
471fc40e5e1SRob Clark return -EINVAL;
472fc40e5e1SRob Clark
473fc40e5e1SRob Clark *ring_nr = rn;
474fc40e5e1SRob Clark *sched_prio = sp;
475fc40e5e1SRob Clark
476fc40e5e1SRob Clark return 0;
477fc40e5e1SRob Clark }
478fc40e5e1SRob Clark
479375f9a63SRob Clark /**
4804cd82aa3SRob Clark * struct msm_gpu_submitqueues - Userspace created context.
4814cd82aa3SRob Clark *
482375f9a63SRob Clark * A submitqueue is associated with a gl context or vk queue (or equiv)
483375f9a63SRob Clark * in userspace.
484375f9a63SRob Clark *
485375f9a63SRob Clark * @id: userspace id for the submitqueue, unique within the drm_file
486375f9a63SRob Clark * @flags: userspace flags for the submitqueue, specified at creation
487375f9a63SRob Clark * (currently unusued)
488fc40e5e1SRob Clark * @ring_nr: the ringbuffer used by this submitqueue, which is determined
489fc40e5e1SRob Clark * by the submitqueue's priority
490375f9a63SRob Clark * @faults: the number of GPU hangs associated with this submitqueue
491067ecab9SRob Clark * @last_fence: the sequence number of the last allocated fence (for error
492067ecab9SRob Clark * checking)
493375f9a63SRob Clark * @ctx: the per-drm_file context associated with the submitqueue (ie.
494375f9a63SRob Clark * which set of pgtables do submits jobs associated with the
495375f9a63SRob Clark * submitqueue use)
496375f9a63SRob Clark * @node: node in the context's list of submitqueues
497a61acbbeSRob Clark * @fence_idr: maps fence-id to dma_fence for userspace visible fence
498a61acbbeSRob Clark * seqno, protected by submitqueue lock
49905ba44b3SRob Clark * @idr_lock: for serializing access to fence_idr
50005ba44b3SRob Clark * @lock: submitqueue lock for serializing submits on a queue
501375f9a63SRob Clark * @ref: reference count
5021d8a5ca4SRob Clark * @entity: the submit job-queue
503375f9a63SRob Clark */
504f7de1545SJordan Crouse struct msm_gpu_submitqueue {
505f7de1545SJordan Crouse int id;
506f7de1545SJordan Crouse u32 flags;
507fc40e5e1SRob Clark u32 ring_nr;
508f7de1545SJordan Crouse int faults;
509067ecab9SRob Clark uint32_t last_fence;
510cf655d61SJordan Crouse struct msm_file_private *ctx;
511f7de1545SJordan Crouse struct list_head node;
512a61acbbeSRob Clark struct idr fence_idr;
513e4f020c6SRob Clark struct spinlock idr_lock;
514a61acbbeSRob Clark struct mutex lock;
515f7de1545SJordan Crouse struct kref ref;
51668002469SRob Clark struct drm_sched_entity *entity;
517f7de1545SJordan Crouse };
518f7de1545SJordan Crouse
519cdb95931SJordan Crouse struct msm_gpu_state_bo {
520cdb95931SJordan Crouse u64 iova;
521cdb95931SJordan Crouse size_t size;
522cdb95931SJordan Crouse void *data;
5231df4289dSSharat Masetty bool encoded;
52418514c38SRob Clark char name[32];
525cdb95931SJordan Crouse };
526cdb95931SJordan Crouse
527e00e473dSJordan Crouse struct msm_gpu_state {
528c0fec7f5SJordan Crouse struct kref ref;
5293530a17fSArnd Bergmann struct timespec64 time;
530e00e473dSJordan Crouse
531e00e473dSJordan Crouse struct {
532e00e473dSJordan Crouse u64 iova;
533e00e473dSJordan Crouse u32 fence;
534e00e473dSJordan Crouse u32 seqno;
535e00e473dSJordan Crouse u32 rptr;
536e00e473dSJordan Crouse u32 wptr;
53743a56687SJordan Crouse void *data;
53843a56687SJordan Crouse int data_size;
5391df4289dSSharat Masetty bool encoded;
540e00e473dSJordan Crouse } ring[MSM_GPU_MAX_RINGS];
541e00e473dSJordan Crouse
542e00e473dSJordan Crouse int nr_registers;
543e00e473dSJordan Crouse u32 *registers;
544e00e473dSJordan Crouse
545e00e473dSJordan Crouse u32 rbbm_status;
546c0fec7f5SJordan Crouse
547c0fec7f5SJordan Crouse char *comm;
548c0fec7f5SJordan Crouse char *cmd;
549cdb95931SJordan Crouse
550e25e92e0SRob Clark struct msm_gpu_fault_info fault_info;
551e25e92e0SRob Clark
552cdb95931SJordan Crouse int nr_bos;
553cdb95931SJordan Crouse struct msm_gpu_state_bo *bos;
554e00e473dSJordan Crouse };
555e00e473dSJordan Crouse
gpu_write(struct msm_gpu * gpu,u32 reg,u32 data)5567198e6b0SRob Clark static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
5577198e6b0SRob Clark {
5587198e6b0SRob Clark msm_writel(data, gpu->mmio + (reg << 2));
5597198e6b0SRob Clark }
5607198e6b0SRob Clark
gpu_read(struct msm_gpu * gpu,u32 reg)5617198e6b0SRob Clark static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
5627198e6b0SRob Clark {
5637198e6b0SRob Clark return msm_readl(gpu->mmio + (reg << 2));
5647198e6b0SRob Clark }
5657198e6b0SRob Clark
gpu_rmw(struct msm_gpu * gpu,u32 reg,u32 mask,u32 or)566ae53a829SJordan Crouse static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
567ae53a829SJordan Crouse {
56840a72b0cSSharat Masetty msm_rmw(gpu->mmio + (reg << 2), mask, or);
569ae53a829SJordan Crouse }
570ae53a829SJordan Crouse
gpu_read64(struct msm_gpu * gpu,u32 reg)571cade05b2SRob Clark static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
572ae53a829SJordan Crouse {
573ae53a829SJordan Crouse u64 val;
574ae53a829SJordan Crouse
575ae53a829SJordan Crouse /*
576ae53a829SJordan Crouse * Why not a readq here? Two reasons: 1) many of the LO registers are
577ae53a829SJordan Crouse * not quad word aligned and 2) the GPU hardware designers have a bit
578ae53a829SJordan Crouse * of a history of putting registers where they fit, especially in
579ae53a829SJordan Crouse * spins. The longer a GPU family goes the higher the chance that
580ae53a829SJordan Crouse * we'll get burned. We could do a series of validity checks if we
581ae53a829SJordan Crouse * wanted to, but really is a readq() that much better? Nah.
582ae53a829SJordan Crouse */
583ae53a829SJordan Crouse
584ae53a829SJordan Crouse /*
585ae53a829SJordan Crouse * For some lo/hi registers (like perfcounters), the hi value is latched
586ae53a829SJordan Crouse * when the lo is read, so make sure to read the lo first to trigger
587ae53a829SJordan Crouse * that
588ae53a829SJordan Crouse */
589cade05b2SRob Clark val = (u64) msm_readl(gpu->mmio + (reg << 2));
590cade05b2SRob Clark val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32);
591ae53a829SJordan Crouse
592ae53a829SJordan Crouse return val;
593ae53a829SJordan Crouse }
594ae53a829SJordan Crouse
gpu_write64(struct msm_gpu * gpu,u32 reg,u64 val)595cade05b2SRob Clark static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
596ae53a829SJordan Crouse {
597ae53a829SJordan Crouse /* Why not a writeq here? Read the screed above */
598cade05b2SRob Clark msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2));
599cade05b2SRob Clark msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
600ae53a829SJordan Crouse }
601ae53a829SJordan Crouse
6027198e6b0SRob Clark int msm_gpu_pm_suspend(struct msm_gpu *gpu);
6037198e6b0SRob Clark int msm_gpu_pm_resume(struct msm_gpu *gpu);
604af5b4fffSRob Clark
605cfebe3fdSRob Clark void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
606cfebe3fdSRob Clark struct drm_printer *p);
607cfebe3fdSRob Clark
6084cd82aa3SRob Clark int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
6094cd82aa3SRob Clark struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
6104cd82aa3SRob Clark u32 id);
6114cd82aa3SRob Clark int msm_submitqueue_create(struct drm_device *drm,
6124cd82aa3SRob Clark struct msm_file_private *ctx,
6134cd82aa3SRob Clark u32 prio, u32 flags, u32 *id);
6144cd82aa3SRob Clark int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
6154cd82aa3SRob Clark struct drm_msm_submitqueue_query *args);
6164cd82aa3SRob Clark int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
6174cd82aa3SRob Clark void msm_submitqueue_close(struct msm_file_private *ctx);
6184cd82aa3SRob Clark
6194cd82aa3SRob Clark void msm_submitqueue_destroy(struct kref *kref);
6204cd82aa3SRob Clark
62190f45c42SRob Clark int msm_file_private_set_sysprof(struct msm_file_private *ctx,
62290f45c42SRob Clark struct msm_gpu *gpu, int sysprof);
62368002469SRob Clark void __msm_file_private_destroy(struct kref *kref);
6244cd82aa3SRob Clark
msm_file_private_put(struct msm_file_private * ctx)6254cd82aa3SRob Clark static inline void msm_file_private_put(struct msm_file_private *ctx)
6264cd82aa3SRob Clark {
6274cd82aa3SRob Clark kref_put(&ctx->ref, __msm_file_private_destroy);
6284cd82aa3SRob Clark }
6294cd82aa3SRob Clark
msm_file_private_get(struct msm_file_private * ctx)6304cd82aa3SRob Clark static inline struct msm_file_private *msm_file_private_get(
6314cd82aa3SRob Clark struct msm_file_private *ctx)
6324cd82aa3SRob Clark {
6334cd82aa3SRob Clark kref_get(&ctx->ref);
6344cd82aa3SRob Clark return ctx;
6354cd82aa3SRob Clark }
6364cd82aa3SRob Clark
637af5b4fffSRob Clark void msm_devfreq_init(struct msm_gpu *gpu);
638af5b4fffSRob Clark void msm_devfreq_cleanup(struct msm_gpu *gpu);
639af5b4fffSRob Clark void msm_devfreq_resume(struct msm_gpu *gpu);
640af5b4fffSRob Clark void msm_devfreq_suspend(struct msm_gpu *gpu);
6417c0ffcd4SRob Clark void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor);
6429bc95570SRob Clark void msm_devfreq_active(struct msm_gpu *gpu);
6439bc95570SRob Clark void msm_devfreq_idle(struct msm_gpu *gpu);
6447198e6b0SRob Clark
645eeb75474SRob Clark int msm_gpu_hw_init(struct msm_gpu *gpu);
646eeb75474SRob Clark
64770c70f09SRob Clark void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
64870c70f09SRob Clark void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
64970c70f09SRob Clark int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
65070c70f09SRob Clark uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
65170c70f09SRob Clark
6527198e6b0SRob Clark void msm_gpu_retire(struct msm_gpu *gpu);
65315eb9ad0SJordan Crouse void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
6547198e6b0SRob Clark
6557198e6b0SRob Clark int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
6567198e6b0SRob Clark struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
6575770fc7aSJordan Crouse const char *name, struct msm_gpu_config *config);
6585770fc7aSJordan Crouse
659933415e2SJordan Crouse struct msm_gem_address_space *
66025faf2f2SRob Clark msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task);
661933415e2SJordan Crouse
6627198e6b0SRob Clark void msm_gpu_cleanup(struct msm_gpu *gpu);
6637198e6b0SRob Clark
664e2550b7aSRob Clark struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
665bfd28b13SRob Clark void __init adreno_register(void);
666bfd28b13SRob Clark void __exit adreno_unregister(void);
6677198e6b0SRob Clark
msm_submitqueue_put(struct msm_gpu_submitqueue * queue)668f7de1545SJordan Crouse static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue)
669f7de1545SJordan Crouse {
670f7de1545SJordan Crouse if (queue)
671f7de1545SJordan Crouse kref_put(&queue->ref, msm_submitqueue_destroy);
672f7de1545SJordan Crouse }
673f7de1545SJordan Crouse
msm_gpu_crashstate_get(struct msm_gpu * gpu)674c0fec7f5SJordan Crouse static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu)
675c0fec7f5SJordan Crouse {
676c0fec7f5SJordan Crouse struct msm_gpu_state *state = NULL;
677c0fec7f5SJordan Crouse
678c28e2f2bSRob Clark mutex_lock(&gpu->lock);
679c0fec7f5SJordan Crouse
680c0fec7f5SJordan Crouse if (gpu->crashstate) {
681c0fec7f5SJordan Crouse kref_get(&gpu->crashstate->ref);
682c0fec7f5SJordan Crouse state = gpu->crashstate;
683c0fec7f5SJordan Crouse }
684c0fec7f5SJordan Crouse
685c28e2f2bSRob Clark mutex_unlock(&gpu->lock);
686c0fec7f5SJordan Crouse
687c0fec7f5SJordan Crouse return state;
688c0fec7f5SJordan Crouse }
689c0fec7f5SJordan Crouse
msm_gpu_crashstate_put(struct msm_gpu * gpu)690c0fec7f5SJordan Crouse static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
691c0fec7f5SJordan Crouse {
692c28e2f2bSRob Clark mutex_lock(&gpu->lock);
693c0fec7f5SJordan Crouse
694c0fec7f5SJordan Crouse if (gpu->crashstate) {
695c0fec7f5SJordan Crouse if (gpu->funcs->gpu_state_put(gpu->crashstate))
696c0fec7f5SJordan Crouse gpu->crashstate = NULL;
697c0fec7f5SJordan Crouse }
698c0fec7f5SJordan Crouse
699c28e2f2bSRob Clark mutex_unlock(&gpu->lock);
700c0fec7f5SJordan Crouse }
701c0fec7f5SJordan Crouse
702604234f3SJordan Crouse /*
703604234f3SJordan Crouse * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can
704604234f3SJordan Crouse * support expanded privileges
705604234f3SJordan Crouse */
706604234f3SJordan Crouse #define check_apriv(gpu, flags) \
707604234f3SJordan Crouse (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags))
708604234f3SJordan Crouse
709604234f3SJordan Crouse
7107198e6b0SRob Clark #endif /* __MSM_GPU_H__ */
711