xref: /openbmc/linux/drivers/gpu/drm/msm/msm_gpu.h (revision 42854f8d)
1caab277bSThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */
27198e6b0SRob Clark /*
37198e6b0SRob Clark  * Copyright (C) 2013 Red Hat
47198e6b0SRob Clark  * Author: Rob Clark <robdclark@gmail.com>
57198e6b0SRob Clark  */
67198e6b0SRob Clark 
77198e6b0SRob Clark #ifndef __MSM_GPU_H__
87198e6b0SRob Clark #define __MSM_GPU_H__
97198e6b0SRob Clark 
109cba4056SRob Clark #include <linux/adreno-smmu-priv.h>
117198e6b0SRob Clark #include <linux/clk.h>
1278f815c1SChia-I Wu #include <linux/devfreq.h>
13fcf9d0b7SJordan Crouse #include <linux/interconnect.h>
141f60d114SSharat Masetty #include <linux/pm_opp.h>
157198e6b0SRob Clark #include <linux/regulator/consumer.h>
167198e6b0SRob Clark 
177198e6b0SRob Clark #include "msm_drv.h"
18ca762a8aSRob Clark #include "msm_fence.h"
197198e6b0SRob Clark #include "msm_ringbuffer.h"
20604234f3SJordan Crouse #include "msm_gem.h"
217198e6b0SRob Clark 
227198e6b0SRob Clark struct msm_gem_submit;
2370c70f09SRob Clark struct msm_gpu_perfcntr;
24e00e473dSJordan Crouse struct msm_gpu_state;
2510199333SRob Clark struct msm_file_private;
267198e6b0SRob Clark 
275770fc7aSJordan Crouse struct msm_gpu_config {
285770fc7aSJordan Crouse 	const char *ioname;
29f97decacSJordan Crouse 	unsigned int nr_rings;
305770fc7aSJordan Crouse };
315770fc7aSJordan Crouse 
327198e6b0SRob Clark /* So far, with hardware that I've seen to date, we can have:
337198e6b0SRob Clark  *  + zero, one, or two z180 2d cores
347198e6b0SRob Clark  *  + a3xx or a2xx 3d core, which share a common CP (the firmware
357198e6b0SRob Clark  *    for the CP seems to implement some different PM4 packet types
367198e6b0SRob Clark  *    but the basics of cmdstream submission are the same)
377198e6b0SRob Clark  *
387198e6b0SRob Clark  * Which means that the eventual complete "class" hierarchy, once
397198e6b0SRob Clark  * support for all past and present hw is in place, becomes:
407198e6b0SRob Clark  *  + msm_gpu
417198e6b0SRob Clark  *    + adreno_gpu
427198e6b0SRob Clark  *      + a3xx_gpu
437198e6b0SRob Clark  *      + a2xx_gpu
447198e6b0SRob Clark  *    + z180_gpu
457198e6b0SRob Clark  */
467198e6b0SRob Clark struct msm_gpu_funcs {
47f98f915bSRob Clark 	int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
484bfba716SRob Clark 			 uint32_t param, uint64_t *value, uint32_t *len);
49f7ddbf55SRob Clark 	int (*set_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
504bfba716SRob Clark 			 uint32_t param, uint64_t value, uint32_t len);
517198e6b0SRob Clark 	int (*hw_init)(struct msm_gpu *gpu);
528ead9678SRob Clark 
538ead9678SRob Clark 	/**
548ead9678SRob Clark 	 * @ucode_load: Optional hook to upload fw to GEM objs
558ead9678SRob Clark 	 */
568ead9678SRob Clark 	int (*ucode_load)(struct msm_gpu *gpu);
578ead9678SRob Clark 
587198e6b0SRob Clark 	int (*pm_suspend)(struct msm_gpu *gpu);
597198e6b0SRob Clark 	int (*pm_resume)(struct msm_gpu *gpu);
6015eb9ad0SJordan Crouse 	void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit);
61f97decacSJordan Crouse 	void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
627198e6b0SRob Clark 	irqreturn_t (*irq)(struct msm_gpu *irq);
63f97decacSJordan Crouse 	struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
64bd6f82d8SRob Clark 	void (*recover)(struct msm_gpu *gpu);
657198e6b0SRob Clark 	void (*destroy)(struct msm_gpu *gpu);
66c878a628SArnd Bergmann #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
677198e6b0SRob Clark 	/* show GPU status in debugfs: */
684f776f45SJordan Crouse 	void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state,
69c0fec7f5SJordan Crouse 			struct drm_printer *p);
70331dc0bcSRob Clark 	/* for generation specific debugfs: */
717ce84471SWambui Karuga 	void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
727198e6b0SRob Clark #endif
736694482aSDouglas Anderson 	/* note: gpu_busy() can assume that we have been pm_resumed */
7415c41198SChia-I Wu 	u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate);
75e00e473dSJordan Crouse 	struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
76c0fec7f5SJordan Crouse 	int (*gpu_state_put)(struct msm_gpu_state *state);
77de0a3d09SSharat Masetty 	unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
786694482aSDouglas Anderson 	/* note: gpu_set_freq() can assume that we have been pm_resumed */
796694482aSDouglas Anderson 	void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp,
806694482aSDouglas Anderson 			     bool suspended);
81ccac7ce3SJordan Crouse 	struct msm_gem_address_space *(*create_address_space)
82ccac7ce3SJordan Crouse 		(struct msm_gpu *gpu, struct platform_device *pdev);
83933415e2SJordan Crouse 	struct msm_gem_address_space *(*create_private_address_space)
84933415e2SJordan Crouse 		(struct msm_gpu *gpu);
858907afb4SJordan Crouse 	uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
86d73b1d02SRob Clark 
87d73b1d02SRob Clark 	/**
88d73b1d02SRob Clark 	 * progress: Has the GPU made progress?
89d73b1d02SRob Clark 	 *
90d73b1d02SRob Clark 	 * Return true if GPU position in cmdstream has advanced (or changed)
91d73b1d02SRob Clark 	 * since the last call.  To avoid false negatives, this should account
92d73b1d02SRob Clark 	 * for cmdstream that is buffered in this FIFO upstream of the CP fw.
93d73b1d02SRob Clark 	 */
94d73b1d02SRob Clark 	bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
957198e6b0SRob Clark };
967198e6b0SRob Clark 
97e25e92e0SRob Clark /* Additional state for iommu faults: */
98e25e92e0SRob Clark struct msm_gpu_fault_info {
99e25e92e0SRob Clark 	u64 ttbr0;
100e25e92e0SRob Clark 	unsigned long iova;
101e25e92e0SRob Clark 	int flags;
102e25e92e0SRob Clark 	const char *type;
103e25e92e0SRob Clark 	const char *block;
104e25e92e0SRob Clark };
105e25e92e0SRob Clark 
106af5b4fffSRob Clark /**
107af5b4fffSRob Clark  * struct msm_gpu_devfreq - devfreq related state
108af5b4fffSRob Clark  */
109af5b4fffSRob Clark struct msm_gpu_devfreq {
110af5b4fffSRob Clark 	/** devfreq: devfreq instance */
111af5b4fffSRob Clark 	struct devfreq *devfreq;
112af5b4fffSRob Clark 
1136694482aSDouglas Anderson 	/** lock: lock for "suspended", "busy_cycles", and "time" */
1146694482aSDouglas Anderson 	struct mutex lock;
1156694482aSDouglas Anderson 
116af5b4fffSRob Clark 	/**
117fadcc3abSRob Clark 	 * idle_freq:
1187c0ffcd4SRob Clark 	 *
119fadcc3abSRob Clark 	 * Shadow frequency used while the GPU is idle.  From the PoV of
120fadcc3abSRob Clark 	 * the devfreq governor, we are continuing to sample busyness and
121fadcc3abSRob Clark 	 * adjust frequency while the GPU is idle, but we use this shadow
122fadcc3abSRob Clark 	 * value as the GPU is actually clamped to minimum frequency while
123fadcc3abSRob Clark 	 * it is inactive.
1247c0ffcd4SRob Clark 	 */
125fadcc3abSRob Clark 	unsigned long idle_freq;
1267c0ffcd4SRob Clark 
1277c0ffcd4SRob Clark 	/**
1287c0ffcd4SRob Clark 	 * boost_constraint:
1297c0ffcd4SRob Clark 	 *
1307c0ffcd4SRob Clark 	 * A PM QoS constraint to boost min freq for a period of time
1317c0ffcd4SRob Clark 	 * until the boost expires.
1327c0ffcd4SRob Clark 	 */
1337c0ffcd4SRob Clark 	struct dev_pm_qos_request boost_freq;
1347c0ffcd4SRob Clark 
1357c0ffcd4SRob Clark 	/**
13615c41198SChia-I Wu 	 * busy_cycles: Last busy counter value, for calculating elapsed busy
13715c41198SChia-I Wu 	 * cycles since last sampling period.
138af5b4fffSRob Clark 	 */
139af5b4fffSRob Clark 	u64 busy_cycles;
140af5b4fffSRob Clark 
141af5b4fffSRob Clark 	/** time: Time of last sampling period. */
142af5b4fffSRob Clark 	ktime_t time;
1439bc95570SRob Clark 
1449bc95570SRob Clark 	/** idle_time: Time of last transition to idle: */
1459bc95570SRob Clark 	ktime_t idle_time;
1469bc95570SRob Clark 
1479bc95570SRob Clark 	/**
148658f4c82SRob Clark 	 * idle_work:
149658f4c82SRob Clark 	 *
150658f4c82SRob Clark 	 * Used to delay clamping to idle freq on active->idle transition.
151658f4c82SRob Clark 	 */
152658f4c82SRob Clark 	struct msm_hrtimer_work idle_work;
1537c0ffcd4SRob Clark 
1547c0ffcd4SRob Clark 	/**
1557c0ffcd4SRob Clark 	 * boost_work:
1567c0ffcd4SRob Clark 	 *
1577c0ffcd4SRob Clark 	 * Used to reset the boost_constraint after the boost period has
1587c0ffcd4SRob Clark 	 * elapsed
1597c0ffcd4SRob Clark 	 */
1607c0ffcd4SRob Clark 	struct msm_hrtimer_work boost_work;
1616694482aSDouglas Anderson 
1626694482aSDouglas Anderson 	/** suspended: tracks if we're suspended */
1636694482aSDouglas Anderson 	bool suspended;
164af5b4fffSRob Clark };
165af5b4fffSRob Clark 
1667198e6b0SRob Clark struct msm_gpu {
1677198e6b0SRob Clark 	const char *name;
1687198e6b0SRob Clark 	struct drm_device *dev;
169eeb75474SRob Clark 	struct platform_device *pdev;
1707198e6b0SRob Clark 	const struct msm_gpu_funcs *funcs;
1717198e6b0SRob Clark 
1729cba4056SRob Clark 	struct adreno_smmu_priv adreno_smmu;
1739cba4056SRob Clark 
17470c70f09SRob Clark 	/* performance counters (hw & sw): */
17570c70f09SRob Clark 	spinlock_t perf_lock;
17670c70f09SRob Clark 	bool perfcntr_active;
17770c70f09SRob Clark 	struct {
17870c70f09SRob Clark 		bool active;
17970c70f09SRob Clark 		ktime_t time;
18070c70f09SRob Clark 	} last_sample;
18170c70f09SRob Clark 	uint32_t totaltime, activetime;    /* sw counters */
18270c70f09SRob Clark 	uint32_t last_cntrs[5];            /* hw counters */
18370c70f09SRob Clark 	const struct msm_gpu_perfcntr *perfcntrs;
18470c70f09SRob Clark 	uint32_t num_perfcntrs;
18570c70f09SRob Clark 
186f97decacSJordan Crouse 	struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
187f97decacSJordan Crouse 	int nr_rings;
1887198e6b0SRob Clark 
1891d054c9bSRob Clark 	/**
19090f45c42SRob Clark 	 * sysprof_active:
19190f45c42SRob Clark 	 *
19290f45c42SRob Clark 	 * The count of contexts that have enabled system profiling.
19390f45c42SRob Clark 	 */
19490f45c42SRob Clark 	refcount_t sysprof_active;
19590f45c42SRob Clark 
19690f45c42SRob Clark 	/**
1971d054c9bSRob Clark 	 * cur_ctx_seqno:
1981d054c9bSRob Clark 	 *
1991d054c9bSRob Clark 	 * The ctx->seqno value of the last context to submit rendering,
2001d054c9bSRob Clark 	 * and the one with current pgtables installed (for generations
2011d054c9bSRob Clark 	 * that support per-context pgtables).  Tracked by seqno rather
2021d054c9bSRob Clark 	 * than pointer value to avoid dangling pointers, and cases where
2031d054c9bSRob Clark 	 * a ctx can be freed and a new one created with the same address.
2041d054c9bSRob Clark 	 */
2051d054c9bSRob Clark 	int cur_ctx_seqno;
2061d054c9bSRob Clark 
2079bc95570SRob Clark 	/**
208c28e2f2bSRob Clark 	 * lock:
209c28e2f2bSRob Clark 	 *
210c28e2f2bSRob Clark 	 * General lock for serializing all the gpu things.
211c28e2f2bSRob Clark 	 *
212c28e2f2bSRob Clark 	 * TODO move to per-ring locking where feasible (ie. submit/retire
213c28e2f2bSRob Clark 	 * path, etc)
214c28e2f2bSRob Clark 	 */
215c28e2f2bSRob Clark 	struct mutex lock;
216c28e2f2bSRob Clark 
217c28e2f2bSRob Clark 	/**
2189bc95570SRob Clark 	 * active_submits:
2199bc95570SRob Clark 	 *
2209bc95570SRob Clark 	 * The number of submitted but not yet retired submits, used to
2219bc95570SRob Clark 	 * determine transitions between active and idle.
2229bc95570SRob Clark 	 *
223c28e2f2bSRob Clark 	 * Protected by active_lock
2249bc95570SRob Clark 	 */
2259bc95570SRob Clark 	int active_submits;
2269bc95570SRob Clark 
2279bc95570SRob Clark 	/** lock: protects active_submits and idle/active transitions */
2289bc95570SRob Clark 	struct mutex active_lock;
2299bc95570SRob Clark 
230eeb75474SRob Clark 	/* does gpu need hw_init? */
231eeb75474SRob Clark 	bool needs_hw_init;
23237d77c3aSRob Clark 
233bc211258SRob Clark 	/**
234bc211258SRob Clark 	 * global_faults: number of GPU hangs not attributed to a particular
235bc211258SRob Clark 	 * address space
236bc211258SRob Clark 	 */
23748dc4241SRob Clark 	int global_faults;
23848dc4241SRob Clark 
2397198e6b0SRob Clark 	void __iomem *mmio;
2407198e6b0SRob Clark 	int irq;
2417198e6b0SRob Clark 
242667ce33eSRob Clark 	struct msm_gem_address_space *aspace;
2437198e6b0SRob Clark 
2447198e6b0SRob Clark 	/* Power Control: */
2457198e6b0SRob Clark 	struct regulator *gpu_reg, *gpu_cx;
2468e54eea5SJordan Crouse 	struct clk_bulk_data *grp_clks;
24798db803fSJordan Crouse 	int nr_clocks;
24898db803fSJordan Crouse 	struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
2491babd706SJordan Crouse 	uint32_t fast_rate;
250bd6f82d8SRob Clark 
25137d77c3aSRob Clark 	/* Hang and Inactivity Detection:
25237d77c3aSRob Clark 	 */
25337d77c3aSRob Clark #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
254eeb75474SRob Clark 
2551d2fa58eSSamuel Iglesias Gonsalvez #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
256d73b1d02SRob Clark #define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
257bd6f82d8SRob Clark 	struct timer_list hangcheck_timer;
2587e688294SRob Clark 
259e25e92e0SRob Clark 	/* Fault info for most recent iova fault: */
260e25e92e0SRob Clark 	struct msm_gpu_fault_info fault_info;
261e25e92e0SRob Clark 
262e25e92e0SRob Clark 	/* work for handling GPU ioval faults: */
263e25e92e0SRob Clark 	struct kthread_work fault_work;
264e25e92e0SRob Clark 
2657e688294SRob Clark 	/* work for handling GPU recovery: */
2667e688294SRob Clark 	struct kthread_work recover_work;
2677e688294SRob Clark 
268167a668aSRob Clark 	/** retire_event: notified when submits are retired: */
269167a668aSRob Clark 	wait_queue_head_t retire_event;
270167a668aSRob Clark 
2717e688294SRob Clark 	/* work for handling active-list retiring: */
2727e688294SRob Clark 	struct kthread_work retire_work;
2737e688294SRob Clark 
2747e688294SRob Clark 	/* worker for retire/recover: */
2757e688294SRob Clark 	struct kthread_worker *worker;
2761a370be9SRob Clark 
277cd414f3dSJordan Crouse 	struct drm_gem_object *memptrs_bo;
278f91c14abSJordan Crouse 
279af5b4fffSRob Clark 	struct msm_gpu_devfreq devfreq;
280c0fec7f5SJordan Crouse 
2813ab1c5ccSRob Clark 	uint32_t suspend_count;
2823ab1c5ccSRob Clark 
283c0fec7f5SJordan Crouse 	struct msm_gpu_state *crashstate;
2845ca6779dSRob Clark 
285604234f3SJordan Crouse 	/* True if the hardware supports expanded apriv (a650 and newer) */
286604234f3SJordan Crouse 	bool hw_apriv;
287ec793cf0SAkhil P Oommen 
288*42854f8dSRob Clark 	/**
289*42854f8dSRob Clark 	 * @allow_relocs: allow relocs in SUBMIT ioctl
290*42854f8dSRob Clark 	 *
291*42854f8dSRob Clark 	 * Mesa won't use relocs for driver version 1.4.0 and later.  This
292*42854f8dSRob Clark 	 * switch-over happened early enough in mesa a6xx bringup that we
293*42854f8dSRob Clark 	 * can disallow relocs for a6xx and newer.
294*42854f8dSRob Clark 	 */
295*42854f8dSRob Clark 	bool allow_relocs;
296*42854f8dSRob Clark 
297ec793cf0SAkhil P Oommen 	struct thermal_cooling_device *cooling;
2987198e6b0SRob Clark };
2997198e6b0SRob Clark 
dev_to_gpu(struct device * dev)30069a9313bSRob Clark static inline struct msm_gpu *dev_to_gpu(struct device *dev)
30169a9313bSRob Clark {
3029cba4056SRob Clark 	struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
30376efc245SAkhil P Oommen 
30476efc245SAkhil P Oommen 	if (!adreno_smmu)
30576efc245SAkhil P Oommen 		return NULL;
30676efc245SAkhil P Oommen 
3079cba4056SRob Clark 	return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
30869a9313bSRob Clark }
30969a9313bSRob Clark 
310f97decacSJordan Crouse /* It turns out that all targets use the same ringbuffer size */
311f97decacSJordan Crouse #define MSM_GPU_RINGBUFFER_SZ SZ_32K
3124d87fc32SJordan Crouse #define MSM_GPU_RINGBUFFER_BLKSIZE 32
3134d87fc32SJordan Crouse 
3144d87fc32SJordan Crouse #define MSM_GPU_RB_CNTL_DEFAULT \
3154d87fc32SJordan Crouse 		(AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \
3164d87fc32SJordan Crouse 		AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
317f97decacSJordan Crouse 
msm_gpu_active(struct msm_gpu * gpu)31837d77c3aSRob Clark static inline bool msm_gpu_active(struct msm_gpu *gpu)
31937d77c3aSRob Clark {
320f97decacSJordan Crouse 	int i;
321f97decacSJordan Crouse 
322f97decacSJordan Crouse 	for (i = 0; i < gpu->nr_rings; i++) {
323f97decacSJordan Crouse 		struct msm_ringbuffer *ring = gpu->rb[i];
324f97decacSJordan Crouse 
325f9d5355fSRob Clark 		if (fence_after(ring->fctx->last_fence, ring->memptrs->fence))
326f97decacSJordan Crouse 			return true;
327f97decacSJordan Crouse 	}
328f97decacSJordan Crouse 
329f97decacSJordan Crouse 	return false;
33037d77c3aSRob Clark }
33137d77c3aSRob Clark 
33270c70f09SRob Clark /* Perf-Counters:
33370c70f09SRob Clark  * The select_reg and select_val are just there for the benefit of the child
33470c70f09SRob Clark  * class that actually enables the perf counter..  but msm_gpu base class
33570c70f09SRob Clark  * will handle sampling/displaying the counters.
33670c70f09SRob Clark  */
33770c70f09SRob Clark 
33870c70f09SRob Clark struct msm_gpu_perfcntr {
33970c70f09SRob Clark 	uint32_t select_reg;
34070c70f09SRob Clark 	uint32_t sample_reg;
34170c70f09SRob Clark 	uint32_t select_val;
34270c70f09SRob Clark 	const char *name;
34370c70f09SRob Clark };
34470c70f09SRob Clark 
345fc40e5e1SRob Clark /*
346fc40e5e1SRob Clark  * The number of priority levels provided by drm gpu scheduler.  The
347fc40e5e1SRob Clark  * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
348fc40e5e1SRob Clark  * cases, so we don't use it (no need for kernel generated jobs).
349fc40e5e1SRob Clark  */
350fc40e5e1SRob Clark #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN)
351fc40e5e1SRob Clark 
352fc40e5e1SRob Clark /**
3534cd82aa3SRob Clark  * struct msm_file_private - per-drm_file context
3544cd82aa3SRob Clark  *
3554cd82aa3SRob Clark  * @queuelock:    synchronizes access to submitqueues list
3564cd82aa3SRob Clark  * @submitqueues: list of &msm_gpu_submitqueue created by userspace
3574cd82aa3SRob Clark  * @queueid:      counter incremented each time a submitqueue is created,
3584cd82aa3SRob Clark  *                used to assign &msm_gpu_submitqueue.id
3594cd82aa3SRob Clark  * @aspace:       the per-process GPU address-space
3604cd82aa3SRob Clark  * @ref:          reference count
3614cd82aa3SRob Clark  * @seqno:        unique per process seqno
3624cd82aa3SRob Clark  */
3634cd82aa3SRob Clark struct msm_file_private {
3644cd82aa3SRob Clark 	rwlock_t queuelock;
3654cd82aa3SRob Clark 	struct list_head submitqueues;
3664cd82aa3SRob Clark 	int queueid;
3674cd82aa3SRob Clark 	struct msm_gem_address_space *aspace;
3684cd82aa3SRob Clark 	struct kref ref;
3694cd82aa3SRob Clark 	int seqno;
37068002469SRob Clark 
37168002469SRob Clark 	/**
37290f45c42SRob Clark 	 * sysprof:
37390f45c42SRob Clark 	 *
37490f45c42SRob Clark 	 * The value of MSM_PARAM_SYSPROF set by userspace.  This is
37590f45c42SRob Clark 	 * intended to be used by system profiling tools like Mesa's
37690f45c42SRob Clark 	 * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN.
37790f45c42SRob Clark 	 *
37890f45c42SRob Clark 	 * Setting a value of 1 will preserve performance counters across
37990f45c42SRob Clark 	 * context switches.  Setting a value of 2 will in addition
38090f45c42SRob Clark 	 * suppress suspend.  (Performance counters lose state across
38190f45c42SRob Clark 	 * power collapse, which is undesirable for profiling in some
38290f45c42SRob Clark 	 * cases.)
38390f45c42SRob Clark 	 *
38490f45c42SRob Clark 	 * The value automatically reverts to zero when the drm device
38590f45c42SRob Clark 	 * file is closed.
38690f45c42SRob Clark 	 */
38790f45c42SRob Clark 	int sysprof;
38890f45c42SRob Clark 
389a66f1efcSRob Clark 	/**
390a66f1efcSRob Clark 	 * comm: Overridden task comm, see MSM_PARAM_COMM
391a66f1efcSRob Clark 	 *
392a66f1efcSRob Clark 	 * Accessed under msm_gpu::lock
393a66f1efcSRob Clark 	 */
394d4726d77SRob Clark 	char *comm;
395d4726d77SRob Clark 
396a66f1efcSRob Clark 	/**
397a66f1efcSRob Clark 	 * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE
398a66f1efcSRob Clark 	 *
399a66f1efcSRob Clark 	 * Accessed under msm_gpu::lock
400a66f1efcSRob Clark 	 */
401d4726d77SRob Clark 	char *cmdline;
402d4726d77SRob Clark 
40390f45c42SRob Clark 	/**
404cfebe3fdSRob Clark 	 * elapsed:
405cfebe3fdSRob Clark 	 *
406cfebe3fdSRob Clark 	 * The total (cumulative) elapsed time GPU was busy with rendering
407cfebe3fdSRob Clark 	 * from this context in ns.
408cfebe3fdSRob Clark 	 */
409cfebe3fdSRob Clark 	uint64_t elapsed_ns;
410cfebe3fdSRob Clark 
411cfebe3fdSRob Clark 	/**
412cfebe3fdSRob Clark 	 * cycles:
413cfebe3fdSRob Clark 	 *
414cfebe3fdSRob Clark 	 * The total (cumulative) GPU cycles elapsed attributed to this
415cfebe3fdSRob Clark 	 * context.
416cfebe3fdSRob Clark 	 */
417cfebe3fdSRob Clark 	uint64_t cycles;
418cfebe3fdSRob Clark 
419cfebe3fdSRob Clark 	/**
42068002469SRob Clark 	 * entities:
42168002469SRob Clark 	 *
42268002469SRob Clark 	 * Table of per-priority-level sched entities used by submitqueues
42368002469SRob Clark 	 * associated with this &drm_file.  Because some userspace apps
42468002469SRob Clark 	 * make assumptions about rendering from multiple gl contexts
42568002469SRob Clark 	 * (of the same priority) within the process happening in FIFO
42668002469SRob Clark 	 * order without requiring any fencing beyond MakeCurrent(), we
42768002469SRob Clark 	 * create at most one &drm_sched_entity per-process per-priority-
42868002469SRob Clark 	 * level.
42968002469SRob Clark 	 */
43068002469SRob Clark 	struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS];
4314cd82aa3SRob Clark };
4324cd82aa3SRob Clark 
4334cd82aa3SRob Clark /**
434fc40e5e1SRob Clark  * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority
435fc40e5e1SRob Clark  *
436fc40e5e1SRob Clark  * @gpu:        the gpu instance
437fc40e5e1SRob Clark  * @prio:       the userspace priority level
438fc40e5e1SRob Clark  * @ring_nr:    [out] the ringbuffer the userspace priority maps to
439fc40e5e1SRob Clark  * @sched_prio: [out] the gpu scheduler priority level which the userspace
440fc40e5e1SRob Clark  *              priority maps to
441fc40e5e1SRob Clark  *
442fc40e5e1SRob Clark  * With drm/scheduler providing it's own level of prioritization, our total
443fc40e5e1SRob Clark  * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES).
444fc40e5e1SRob Clark  * Each ring is associated with it's own scheduler instance.  However, our
445fc40e5e1SRob Clark  * UABI is that lower numerical values are higher priority.  So mapping the
446fc40e5e1SRob Clark  * single userspace priority level into ring_nr and sched_prio takes some
447fc40e5e1SRob Clark  * care.  The userspace provided priority (when a submitqueue is created)
448fc40e5e1SRob Clark  * is mapped to ring nr and scheduler priority as such:
449fc40e5e1SRob Clark  *
450fc40e5e1SRob Clark  *   ring_nr    = userspace_prio / NR_SCHED_PRIORITIES
451fc40e5e1SRob Clark  *   sched_prio = NR_SCHED_PRIORITIES -
452fc40e5e1SRob Clark  *                (userspace_prio % NR_SCHED_PRIORITIES) - 1
453fc40e5e1SRob Clark  *
454fc40e5e1SRob Clark  * This allows generations without preemption (nr_rings==1) to have some
455fc40e5e1SRob Clark  * amount of prioritization, and provides more priority levels for gens
456fc40e5e1SRob Clark  * that do have preemption.
457fc40e5e1SRob Clark  */
msm_gpu_convert_priority(struct msm_gpu * gpu,int prio,unsigned * ring_nr,enum drm_sched_priority * sched_prio)458fc40e5e1SRob Clark static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
459fc40e5e1SRob Clark 		unsigned *ring_nr, enum drm_sched_priority *sched_prio)
460fc40e5e1SRob Clark {
461fc40e5e1SRob Clark 	unsigned rn, sp;
462fc40e5e1SRob Clark 
463fc40e5e1SRob Clark 	rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp);
464fc40e5e1SRob Clark 
465fc40e5e1SRob Clark 	/* invert sched priority to map to higher-numeric-is-higher-
466fc40e5e1SRob Clark 	 * priority convention
467fc40e5e1SRob Clark 	 */
468fc40e5e1SRob Clark 	sp = NR_SCHED_PRIORITIES - sp - 1;
469fc40e5e1SRob Clark 
470fc40e5e1SRob Clark 	if (rn >= gpu->nr_rings)
471fc40e5e1SRob Clark 		return -EINVAL;
472fc40e5e1SRob Clark 
473fc40e5e1SRob Clark 	*ring_nr = rn;
474fc40e5e1SRob Clark 	*sched_prio = sp;
475fc40e5e1SRob Clark 
476fc40e5e1SRob Clark 	return 0;
477fc40e5e1SRob Clark }
478fc40e5e1SRob Clark 
479375f9a63SRob Clark /**
4804cd82aa3SRob Clark  * struct msm_gpu_submitqueues - Userspace created context.
4814cd82aa3SRob Clark  *
482375f9a63SRob Clark  * A submitqueue is associated with a gl context or vk queue (or equiv)
483375f9a63SRob Clark  * in userspace.
484375f9a63SRob Clark  *
485375f9a63SRob Clark  * @id:        userspace id for the submitqueue, unique within the drm_file
486375f9a63SRob Clark  * @flags:     userspace flags for the submitqueue, specified at creation
487375f9a63SRob Clark  *             (currently unusued)
488fc40e5e1SRob Clark  * @ring_nr:   the ringbuffer used by this submitqueue, which is determined
489fc40e5e1SRob Clark  *             by the submitqueue's priority
490375f9a63SRob Clark  * @faults:    the number of GPU hangs associated with this submitqueue
491067ecab9SRob Clark  * @last_fence: the sequence number of the last allocated fence (for error
492067ecab9SRob Clark  *             checking)
493375f9a63SRob Clark  * @ctx:       the per-drm_file context associated with the submitqueue (ie.
494375f9a63SRob Clark  *             which set of pgtables do submits jobs associated with the
495375f9a63SRob Clark  *             submitqueue use)
496375f9a63SRob Clark  * @node:      node in the context's list of submitqueues
497a61acbbeSRob Clark  * @fence_idr: maps fence-id to dma_fence for userspace visible fence
498a61acbbeSRob Clark  *             seqno, protected by submitqueue lock
49905ba44b3SRob Clark  * @idr_lock:  for serializing access to fence_idr
50005ba44b3SRob Clark  * @lock:      submitqueue lock for serializing submits on a queue
501375f9a63SRob Clark  * @ref:       reference count
5021d8a5ca4SRob Clark  * @entity:    the submit job-queue
503375f9a63SRob Clark  */
504f7de1545SJordan Crouse struct msm_gpu_submitqueue {
505f7de1545SJordan Crouse 	int id;
506f7de1545SJordan Crouse 	u32 flags;
507fc40e5e1SRob Clark 	u32 ring_nr;
508f7de1545SJordan Crouse 	int faults;
509067ecab9SRob Clark 	uint32_t last_fence;
510cf655d61SJordan Crouse 	struct msm_file_private *ctx;
511f7de1545SJordan Crouse 	struct list_head node;
512a61acbbeSRob Clark 	struct idr fence_idr;
513e4f020c6SRob Clark 	struct spinlock idr_lock;
514a61acbbeSRob Clark 	struct mutex lock;
515f7de1545SJordan Crouse 	struct kref ref;
51668002469SRob Clark 	struct drm_sched_entity *entity;
517f7de1545SJordan Crouse };
518f7de1545SJordan Crouse 
519cdb95931SJordan Crouse struct msm_gpu_state_bo {
520cdb95931SJordan Crouse 	u64 iova;
521cdb95931SJordan Crouse 	size_t size;
522cdb95931SJordan Crouse 	void *data;
5231df4289dSSharat Masetty 	bool encoded;
52418514c38SRob Clark 	char name[32];
525cdb95931SJordan Crouse };
526cdb95931SJordan Crouse 
527e00e473dSJordan Crouse struct msm_gpu_state {
528c0fec7f5SJordan Crouse 	struct kref ref;
5293530a17fSArnd Bergmann 	struct timespec64 time;
530e00e473dSJordan Crouse 
531e00e473dSJordan Crouse 	struct {
532e00e473dSJordan Crouse 		u64 iova;
533e00e473dSJordan Crouse 		u32 fence;
534e00e473dSJordan Crouse 		u32 seqno;
535e00e473dSJordan Crouse 		u32 rptr;
536e00e473dSJordan Crouse 		u32 wptr;
53743a56687SJordan Crouse 		void *data;
53843a56687SJordan Crouse 		int data_size;
5391df4289dSSharat Masetty 		bool encoded;
540e00e473dSJordan Crouse 	} ring[MSM_GPU_MAX_RINGS];
541e00e473dSJordan Crouse 
542e00e473dSJordan Crouse 	int nr_registers;
543e00e473dSJordan Crouse 	u32 *registers;
544e00e473dSJordan Crouse 
545e00e473dSJordan Crouse 	u32 rbbm_status;
546c0fec7f5SJordan Crouse 
547c0fec7f5SJordan Crouse 	char *comm;
548c0fec7f5SJordan Crouse 	char *cmd;
549cdb95931SJordan Crouse 
550e25e92e0SRob Clark 	struct msm_gpu_fault_info fault_info;
551e25e92e0SRob Clark 
552cdb95931SJordan Crouse 	int nr_bos;
553cdb95931SJordan Crouse 	struct msm_gpu_state_bo *bos;
554e00e473dSJordan Crouse };
555e00e473dSJordan Crouse 
gpu_write(struct msm_gpu * gpu,u32 reg,u32 data)5567198e6b0SRob Clark static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
5577198e6b0SRob Clark {
5587198e6b0SRob Clark 	msm_writel(data, gpu->mmio + (reg << 2));
5597198e6b0SRob Clark }
5607198e6b0SRob Clark 
gpu_read(struct msm_gpu * gpu,u32 reg)5617198e6b0SRob Clark static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
5627198e6b0SRob Clark {
5637198e6b0SRob Clark 	return msm_readl(gpu->mmio + (reg << 2));
5647198e6b0SRob Clark }
5657198e6b0SRob Clark 
gpu_rmw(struct msm_gpu * gpu,u32 reg,u32 mask,u32 or)566ae53a829SJordan Crouse static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
567ae53a829SJordan Crouse {
56840a72b0cSSharat Masetty 	msm_rmw(gpu->mmio + (reg << 2), mask, or);
569ae53a829SJordan Crouse }
570ae53a829SJordan Crouse 
gpu_read64(struct msm_gpu * gpu,u32 reg)571cade05b2SRob Clark static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
572ae53a829SJordan Crouse {
573ae53a829SJordan Crouse 	u64 val;
574ae53a829SJordan Crouse 
575ae53a829SJordan Crouse 	/*
576ae53a829SJordan Crouse 	 * Why not a readq here? Two reasons: 1) many of the LO registers are
577ae53a829SJordan Crouse 	 * not quad word aligned and 2) the GPU hardware designers have a bit
578ae53a829SJordan Crouse 	 * of a history of putting registers where they fit, especially in
579ae53a829SJordan Crouse 	 * spins. The longer a GPU family goes the higher the chance that
580ae53a829SJordan Crouse 	 * we'll get burned.  We could do a series of validity checks if we
581ae53a829SJordan Crouse 	 * wanted to, but really is a readq() that much better? Nah.
582ae53a829SJordan Crouse 	 */
583ae53a829SJordan Crouse 
584ae53a829SJordan Crouse 	/*
585ae53a829SJordan Crouse 	 * For some lo/hi registers (like perfcounters), the hi value is latched
586ae53a829SJordan Crouse 	 * when the lo is read, so make sure to read the lo first to trigger
587ae53a829SJordan Crouse 	 * that
588ae53a829SJordan Crouse 	 */
589cade05b2SRob Clark 	val = (u64) msm_readl(gpu->mmio + (reg << 2));
590cade05b2SRob Clark 	val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32);
591ae53a829SJordan Crouse 
592ae53a829SJordan Crouse 	return val;
593ae53a829SJordan Crouse }
594ae53a829SJordan Crouse 
gpu_write64(struct msm_gpu * gpu,u32 reg,u64 val)595cade05b2SRob Clark static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
596ae53a829SJordan Crouse {
597ae53a829SJordan Crouse 	/* Why not a writeq here? Read the screed above */
598cade05b2SRob Clark 	msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2));
599cade05b2SRob Clark 	msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
600ae53a829SJordan Crouse }
601ae53a829SJordan Crouse 
6027198e6b0SRob Clark int msm_gpu_pm_suspend(struct msm_gpu *gpu);
6037198e6b0SRob Clark int msm_gpu_pm_resume(struct msm_gpu *gpu);
604af5b4fffSRob Clark 
605cfebe3fdSRob Clark void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
606cfebe3fdSRob Clark 			 struct drm_printer *p);
607cfebe3fdSRob Clark 
6084cd82aa3SRob Clark int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
6094cd82aa3SRob Clark struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
6104cd82aa3SRob Clark 		u32 id);
6114cd82aa3SRob Clark int msm_submitqueue_create(struct drm_device *drm,
6124cd82aa3SRob Clark 		struct msm_file_private *ctx,
6134cd82aa3SRob Clark 		u32 prio, u32 flags, u32 *id);
6144cd82aa3SRob Clark int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
6154cd82aa3SRob Clark 		struct drm_msm_submitqueue_query *args);
6164cd82aa3SRob Clark int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
6174cd82aa3SRob Clark void msm_submitqueue_close(struct msm_file_private *ctx);
6184cd82aa3SRob Clark 
6194cd82aa3SRob Clark void msm_submitqueue_destroy(struct kref *kref);
6204cd82aa3SRob Clark 
62190f45c42SRob Clark int msm_file_private_set_sysprof(struct msm_file_private *ctx,
62290f45c42SRob Clark 				 struct msm_gpu *gpu, int sysprof);
62368002469SRob Clark void __msm_file_private_destroy(struct kref *kref);
6244cd82aa3SRob Clark 
msm_file_private_put(struct msm_file_private * ctx)6254cd82aa3SRob Clark static inline void msm_file_private_put(struct msm_file_private *ctx)
6264cd82aa3SRob Clark {
6274cd82aa3SRob Clark 	kref_put(&ctx->ref, __msm_file_private_destroy);
6284cd82aa3SRob Clark }
6294cd82aa3SRob Clark 
msm_file_private_get(struct msm_file_private * ctx)6304cd82aa3SRob Clark static inline struct msm_file_private *msm_file_private_get(
6314cd82aa3SRob Clark 	struct msm_file_private *ctx)
6324cd82aa3SRob Clark {
6334cd82aa3SRob Clark 	kref_get(&ctx->ref);
6344cd82aa3SRob Clark 	return ctx;
6354cd82aa3SRob Clark }
6364cd82aa3SRob Clark 
637af5b4fffSRob Clark void msm_devfreq_init(struct msm_gpu *gpu);
638af5b4fffSRob Clark void msm_devfreq_cleanup(struct msm_gpu *gpu);
639af5b4fffSRob Clark void msm_devfreq_resume(struct msm_gpu *gpu);
640af5b4fffSRob Clark void msm_devfreq_suspend(struct msm_gpu *gpu);
6417c0ffcd4SRob Clark void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor);
6429bc95570SRob Clark void msm_devfreq_active(struct msm_gpu *gpu);
6439bc95570SRob Clark void msm_devfreq_idle(struct msm_gpu *gpu);
6447198e6b0SRob Clark 
645eeb75474SRob Clark int msm_gpu_hw_init(struct msm_gpu *gpu);
646eeb75474SRob Clark 
64770c70f09SRob Clark void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
64870c70f09SRob Clark void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
64970c70f09SRob Clark int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
65070c70f09SRob Clark 		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
65170c70f09SRob Clark 
6527198e6b0SRob Clark void msm_gpu_retire(struct msm_gpu *gpu);
65315eb9ad0SJordan Crouse void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
6547198e6b0SRob Clark 
6557198e6b0SRob Clark int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
6567198e6b0SRob Clark 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
6575770fc7aSJordan Crouse 		const char *name, struct msm_gpu_config *config);
6585770fc7aSJordan Crouse 
659933415e2SJordan Crouse struct msm_gem_address_space *
66025faf2f2SRob Clark msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task);
661933415e2SJordan Crouse 
6627198e6b0SRob Clark void msm_gpu_cleanup(struct msm_gpu *gpu);
6637198e6b0SRob Clark 
664e2550b7aSRob Clark struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
665bfd28b13SRob Clark void __init adreno_register(void);
666bfd28b13SRob Clark void __exit adreno_unregister(void);
6677198e6b0SRob Clark 
msm_submitqueue_put(struct msm_gpu_submitqueue * queue)668f7de1545SJordan Crouse static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue)
669f7de1545SJordan Crouse {
670f7de1545SJordan Crouse 	if (queue)
671f7de1545SJordan Crouse 		kref_put(&queue->ref, msm_submitqueue_destroy);
672f7de1545SJordan Crouse }
673f7de1545SJordan Crouse 
msm_gpu_crashstate_get(struct msm_gpu * gpu)674c0fec7f5SJordan Crouse static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu)
675c0fec7f5SJordan Crouse {
676c0fec7f5SJordan Crouse 	struct msm_gpu_state *state = NULL;
677c0fec7f5SJordan Crouse 
678c28e2f2bSRob Clark 	mutex_lock(&gpu->lock);
679c0fec7f5SJordan Crouse 
680c0fec7f5SJordan Crouse 	if (gpu->crashstate) {
681c0fec7f5SJordan Crouse 		kref_get(&gpu->crashstate->ref);
682c0fec7f5SJordan Crouse 		state = gpu->crashstate;
683c0fec7f5SJordan Crouse 	}
684c0fec7f5SJordan Crouse 
685c28e2f2bSRob Clark 	mutex_unlock(&gpu->lock);
686c0fec7f5SJordan Crouse 
687c0fec7f5SJordan Crouse 	return state;
688c0fec7f5SJordan Crouse }
689c0fec7f5SJordan Crouse 
msm_gpu_crashstate_put(struct msm_gpu * gpu)690c0fec7f5SJordan Crouse static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
691c0fec7f5SJordan Crouse {
692c28e2f2bSRob Clark 	mutex_lock(&gpu->lock);
693c0fec7f5SJordan Crouse 
694c0fec7f5SJordan Crouse 	if (gpu->crashstate) {
695c0fec7f5SJordan Crouse 		if (gpu->funcs->gpu_state_put(gpu->crashstate))
696c0fec7f5SJordan Crouse 			gpu->crashstate = NULL;
697c0fec7f5SJordan Crouse 	}
698c0fec7f5SJordan Crouse 
699c28e2f2bSRob Clark 	mutex_unlock(&gpu->lock);
700c0fec7f5SJordan Crouse }
701c0fec7f5SJordan Crouse 
702604234f3SJordan Crouse /*
703604234f3SJordan Crouse  * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can
704604234f3SJordan Crouse  * support expanded privileges
705604234f3SJordan Crouse  */
706604234f3SJordan Crouse #define check_apriv(gpu, flags) \
707604234f3SJordan Crouse 	(((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags))
708604234f3SJordan Crouse 
709604234f3SJordan Crouse 
7107198e6b0SRob Clark #endif /* __MSM_GPU_H__ */
711