xref: /openbmc/linux/drivers/gpu/drm/msm/msm_gpu.h (revision 76efc2453d0e8e5d6692ef69981b183ad674edea)
1caab277bSThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */
27198e6b0SRob Clark /*
37198e6b0SRob Clark  * Copyright (C) 2013 Red Hat
47198e6b0SRob Clark  * Author: Rob Clark <robdclark@gmail.com>
57198e6b0SRob Clark  */
67198e6b0SRob Clark 
77198e6b0SRob Clark #ifndef __MSM_GPU_H__
87198e6b0SRob Clark #define __MSM_GPU_H__
97198e6b0SRob Clark 
109cba4056SRob Clark #include <linux/adreno-smmu-priv.h>
117198e6b0SRob Clark #include <linux/clk.h>
1278f815c1SChia-I Wu #include <linux/devfreq.h>
13fcf9d0b7SJordan Crouse #include <linux/interconnect.h>
141f60d114SSharat Masetty #include <linux/pm_opp.h>
157198e6b0SRob Clark #include <linux/regulator/consumer.h>
161f6cca40SAkhil P Oommen #include <linux/reset.h>
177198e6b0SRob Clark 
187198e6b0SRob Clark #include "msm_drv.h"
19ca762a8aSRob Clark #include "msm_fence.h"
207198e6b0SRob Clark #include "msm_ringbuffer.h"
21604234f3SJordan Crouse #include "msm_gem.h"
227198e6b0SRob Clark 
237198e6b0SRob Clark struct msm_gem_submit;
2470c70f09SRob Clark struct msm_gpu_perfcntr;
25e00e473dSJordan Crouse struct msm_gpu_state;
2610199333SRob Clark struct msm_file_private;
277198e6b0SRob Clark 
285770fc7aSJordan Crouse struct msm_gpu_config {
295770fc7aSJordan Crouse 	const char *ioname;
30f97decacSJordan Crouse 	unsigned int nr_rings;
315770fc7aSJordan Crouse };
325770fc7aSJordan Crouse 
337198e6b0SRob Clark /* So far, with hardware that I've seen to date, we can have:
347198e6b0SRob Clark  *  + zero, one, or two z180 2d cores
357198e6b0SRob Clark  *  + a3xx or a2xx 3d core, which share a common CP (the firmware
367198e6b0SRob Clark  *    for the CP seems to implement some different PM4 packet types
377198e6b0SRob Clark  *    but the basics of cmdstream submission are the same)
387198e6b0SRob Clark  *
397198e6b0SRob Clark  * Which means that the eventual complete "class" hierarchy, once
407198e6b0SRob Clark  * support for all past and present hw is in place, becomes:
417198e6b0SRob Clark  *  + msm_gpu
427198e6b0SRob Clark  *    + adreno_gpu
437198e6b0SRob Clark  *      + a3xx_gpu
447198e6b0SRob Clark  *      + a2xx_gpu
457198e6b0SRob Clark  *    + z180_gpu
467198e6b0SRob Clark  */
477198e6b0SRob Clark struct msm_gpu_funcs {
48f98f915bSRob Clark 	int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
494bfba716SRob Clark 			 uint32_t param, uint64_t *value, uint32_t *len);
50f7ddbf55SRob Clark 	int (*set_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
514bfba716SRob Clark 			 uint32_t param, uint64_t value, uint32_t len);
527198e6b0SRob Clark 	int (*hw_init)(struct msm_gpu *gpu);
537198e6b0SRob Clark 	int (*pm_suspend)(struct msm_gpu *gpu);
547198e6b0SRob Clark 	int (*pm_resume)(struct msm_gpu *gpu);
5515eb9ad0SJordan Crouse 	void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit);
56f97decacSJordan Crouse 	void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
577198e6b0SRob Clark 	irqreturn_t (*irq)(struct msm_gpu *irq);
58f97decacSJordan Crouse 	struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
59bd6f82d8SRob Clark 	void (*recover)(struct msm_gpu *gpu);
607198e6b0SRob Clark 	void (*destroy)(struct msm_gpu *gpu);
61c878a628SArnd Bergmann #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
627198e6b0SRob Clark 	/* show GPU status in debugfs: */
634f776f45SJordan Crouse 	void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state,
64c0fec7f5SJordan Crouse 			struct drm_printer *p);
65331dc0bcSRob Clark 	/* for generation specific debugfs: */
667ce84471SWambui Karuga 	void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
677198e6b0SRob Clark #endif
686694482aSDouglas Anderson 	/* note: gpu_busy() can assume that we have been pm_resumed */
6915c41198SChia-I Wu 	u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate);
70e00e473dSJordan Crouse 	struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
71c0fec7f5SJordan Crouse 	int (*gpu_state_put)(struct msm_gpu_state *state);
72de0a3d09SSharat Masetty 	unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
736694482aSDouglas Anderson 	/* note: gpu_set_freq() can assume that we have been pm_resumed */
746694482aSDouglas Anderson 	void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp,
756694482aSDouglas Anderson 			     bool suspended);
76ccac7ce3SJordan Crouse 	struct msm_gem_address_space *(*create_address_space)
77ccac7ce3SJordan Crouse 		(struct msm_gpu *gpu, struct platform_device *pdev);
78933415e2SJordan Crouse 	struct msm_gem_address_space *(*create_private_address_space)
79933415e2SJordan Crouse 		(struct msm_gpu *gpu);
808907afb4SJordan Crouse 	uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
817198e6b0SRob Clark };
827198e6b0SRob Clark 
83e25e92e0SRob Clark /* Additional state for iommu faults: */
84e25e92e0SRob Clark struct msm_gpu_fault_info {
85e25e92e0SRob Clark 	u64 ttbr0;
86e25e92e0SRob Clark 	unsigned long iova;
87e25e92e0SRob Clark 	int flags;
88e25e92e0SRob Clark 	const char *type;
89e25e92e0SRob Clark 	const char *block;
90e25e92e0SRob Clark };
91e25e92e0SRob Clark 
92af5b4fffSRob Clark /**
93af5b4fffSRob Clark  * struct msm_gpu_devfreq - devfreq related state
94af5b4fffSRob Clark  */
95af5b4fffSRob Clark struct msm_gpu_devfreq {
96af5b4fffSRob Clark 	/** devfreq: devfreq instance */
97af5b4fffSRob Clark 	struct devfreq *devfreq;
98af5b4fffSRob Clark 
996694482aSDouglas Anderson 	/** lock: lock for "suspended", "busy_cycles", and "time" */
1006694482aSDouglas Anderson 	struct mutex lock;
1016694482aSDouglas Anderson 
102af5b4fffSRob Clark 	/**
1037c0ffcd4SRob Clark 	 * idle_constraint:
1047c0ffcd4SRob Clark 	 *
1057c0ffcd4SRob Clark 	 * A PM QoS constraint to limit max freq while the GPU is idle.
1067c0ffcd4SRob Clark 	 */
1077c0ffcd4SRob Clark 	struct dev_pm_qos_request idle_freq;
1087c0ffcd4SRob Clark 
1097c0ffcd4SRob Clark 	/**
1107c0ffcd4SRob Clark 	 * boost_constraint:
1117c0ffcd4SRob Clark 	 *
1127c0ffcd4SRob Clark 	 * A PM QoS constraint to boost min freq for a period of time
1137c0ffcd4SRob Clark 	 * until the boost expires.
1147c0ffcd4SRob Clark 	 */
1157c0ffcd4SRob Clark 	struct dev_pm_qos_request boost_freq;
1167c0ffcd4SRob Clark 
1177c0ffcd4SRob Clark 	/**
11815c41198SChia-I Wu 	 * busy_cycles: Last busy counter value, for calculating elapsed busy
11915c41198SChia-I Wu 	 * cycles since last sampling period.
120af5b4fffSRob Clark 	 */
121af5b4fffSRob Clark 	u64 busy_cycles;
122af5b4fffSRob Clark 
123af5b4fffSRob Clark 	/** time: Time of last sampling period. */
124af5b4fffSRob Clark 	ktime_t time;
1259bc95570SRob Clark 
1269bc95570SRob Clark 	/** idle_time: Time of last transition to idle: */
1279bc95570SRob Clark 	ktime_t idle_time;
1289bc95570SRob Clark 
12978f815c1SChia-I Wu 	struct devfreq_dev_status average_status;
13078f815c1SChia-I Wu 
1319bc95570SRob Clark 	/**
132658f4c82SRob Clark 	 * idle_work:
133658f4c82SRob Clark 	 *
134658f4c82SRob Clark 	 * Used to delay clamping to idle freq on active->idle transition.
135658f4c82SRob Clark 	 */
136658f4c82SRob Clark 	struct msm_hrtimer_work idle_work;
1377c0ffcd4SRob Clark 
1387c0ffcd4SRob Clark 	/**
1397c0ffcd4SRob Clark 	 * boost_work:
1407c0ffcd4SRob Clark 	 *
1417c0ffcd4SRob Clark 	 * Used to reset the boost_constraint after the boost period has
1427c0ffcd4SRob Clark 	 * elapsed
1437c0ffcd4SRob Clark 	 */
1447c0ffcd4SRob Clark 	struct msm_hrtimer_work boost_work;
1456694482aSDouglas Anderson 
1466694482aSDouglas Anderson 	/** suspended: tracks if we're suspended */
1476694482aSDouglas Anderson 	bool suspended;
148af5b4fffSRob Clark };
149af5b4fffSRob Clark 
1507198e6b0SRob Clark struct msm_gpu {
1517198e6b0SRob Clark 	const char *name;
1527198e6b0SRob Clark 	struct drm_device *dev;
153eeb75474SRob Clark 	struct platform_device *pdev;
1547198e6b0SRob Clark 	const struct msm_gpu_funcs *funcs;
1557198e6b0SRob Clark 
1569cba4056SRob Clark 	struct adreno_smmu_priv adreno_smmu;
1579cba4056SRob Clark 
15870c70f09SRob Clark 	/* performance counters (hw & sw): */
15970c70f09SRob Clark 	spinlock_t perf_lock;
16070c70f09SRob Clark 	bool perfcntr_active;
16170c70f09SRob Clark 	struct {
16270c70f09SRob Clark 		bool active;
16370c70f09SRob Clark 		ktime_t time;
16470c70f09SRob Clark 	} last_sample;
16570c70f09SRob Clark 	uint32_t totaltime, activetime;    /* sw counters */
16670c70f09SRob Clark 	uint32_t last_cntrs[5];            /* hw counters */
16770c70f09SRob Clark 	const struct msm_gpu_perfcntr *perfcntrs;
16870c70f09SRob Clark 	uint32_t num_perfcntrs;
16970c70f09SRob Clark 
170f97decacSJordan Crouse 	struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
171f97decacSJordan Crouse 	int nr_rings;
1727198e6b0SRob Clark 
1731d054c9bSRob Clark 	/**
17490f45c42SRob Clark 	 * sysprof_active:
17590f45c42SRob Clark 	 *
17690f45c42SRob Clark 	 * The count of contexts that have enabled system profiling.
17790f45c42SRob Clark 	 */
17890f45c42SRob Clark 	refcount_t sysprof_active;
17990f45c42SRob Clark 
18090f45c42SRob Clark 	/**
1811d054c9bSRob Clark 	 * cur_ctx_seqno:
1821d054c9bSRob Clark 	 *
1831d054c9bSRob Clark 	 * The ctx->seqno value of the last context to submit rendering,
1841d054c9bSRob Clark 	 * and the one with current pgtables installed (for generations
1851d054c9bSRob Clark 	 * that support per-context pgtables).  Tracked by seqno rather
1861d054c9bSRob Clark 	 * than pointer value to avoid dangling pointers, and cases where
1871d054c9bSRob Clark 	 * a ctx can be freed and a new one created with the same address.
1881d054c9bSRob Clark 	 */
1891d054c9bSRob Clark 	int cur_ctx_seqno;
1901d054c9bSRob Clark 
1919bc95570SRob Clark 	/**
192c28e2f2bSRob Clark 	 * lock:
193c28e2f2bSRob Clark 	 *
194c28e2f2bSRob Clark 	 * General lock for serializing all the gpu things.
195c28e2f2bSRob Clark 	 *
196c28e2f2bSRob Clark 	 * TODO move to per-ring locking where feasible (ie. submit/retire
197c28e2f2bSRob Clark 	 * path, etc)
198c28e2f2bSRob Clark 	 */
199c28e2f2bSRob Clark 	struct mutex lock;
200c28e2f2bSRob Clark 
201c28e2f2bSRob Clark 	/**
2029bc95570SRob Clark 	 * active_submits:
2039bc95570SRob Clark 	 *
2049bc95570SRob Clark 	 * The number of submitted but not yet retired submits, used to
2059bc95570SRob Clark 	 * determine transitions between active and idle.
2069bc95570SRob Clark 	 *
207c28e2f2bSRob Clark 	 * Protected by active_lock
2089bc95570SRob Clark 	 */
2099bc95570SRob Clark 	int active_submits;
2109bc95570SRob Clark 
2119bc95570SRob Clark 	/** lock: protects active_submits and idle/active transitions */
2129bc95570SRob Clark 	struct mutex active_lock;
2139bc95570SRob Clark 
214eeb75474SRob Clark 	/* does gpu need hw_init? */
215eeb75474SRob Clark 	bool needs_hw_init;
21637d77c3aSRob Clark 
217bc211258SRob Clark 	/**
218bc211258SRob Clark 	 * global_faults: number of GPU hangs not attributed to a particular
219bc211258SRob Clark 	 * address space
220bc211258SRob Clark 	 */
22148dc4241SRob Clark 	int global_faults;
22248dc4241SRob Clark 
2237198e6b0SRob Clark 	void __iomem *mmio;
2247198e6b0SRob Clark 	int irq;
2257198e6b0SRob Clark 
226667ce33eSRob Clark 	struct msm_gem_address_space *aspace;
2277198e6b0SRob Clark 
2287198e6b0SRob Clark 	/* Power Control: */
2297198e6b0SRob Clark 	struct regulator *gpu_reg, *gpu_cx;
2308e54eea5SJordan Crouse 	struct clk_bulk_data *grp_clks;
23198db803fSJordan Crouse 	int nr_clocks;
23298db803fSJordan Crouse 	struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
2331babd706SJordan Crouse 	uint32_t fast_rate;
234bd6f82d8SRob Clark 
23537d77c3aSRob Clark 	/* Hang and Inactivity Detection:
23637d77c3aSRob Clark 	 */
23737d77c3aSRob Clark #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
238eeb75474SRob Clark 
2391d2fa58eSSamuel Iglesias Gonsalvez #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
240bd6f82d8SRob Clark 	struct timer_list hangcheck_timer;
2417e688294SRob Clark 
242e25e92e0SRob Clark 	/* Fault info for most recent iova fault: */
243e25e92e0SRob Clark 	struct msm_gpu_fault_info fault_info;
244e25e92e0SRob Clark 
245e25e92e0SRob Clark 	/* work for handling GPU ioval faults: */
246e25e92e0SRob Clark 	struct kthread_work fault_work;
247e25e92e0SRob Clark 
2487e688294SRob Clark 	/* work for handling GPU recovery: */
2497e688294SRob Clark 	struct kthread_work recover_work;
2507e688294SRob Clark 
251167a668aSRob Clark 	/** retire_event: notified when submits are retired: */
252167a668aSRob Clark 	wait_queue_head_t retire_event;
253167a668aSRob Clark 
2547e688294SRob Clark 	/* work for handling active-list retiring: */
2557e688294SRob Clark 	struct kthread_work retire_work;
2567e688294SRob Clark 
2577e688294SRob Clark 	/* worker for retire/recover: */
2587e688294SRob Clark 	struct kthread_worker *worker;
2591a370be9SRob Clark 
260cd414f3dSJordan Crouse 	struct drm_gem_object *memptrs_bo;
261f91c14abSJordan Crouse 
262af5b4fffSRob Clark 	struct msm_gpu_devfreq devfreq;
263c0fec7f5SJordan Crouse 
2643ab1c5ccSRob Clark 	uint32_t suspend_count;
2653ab1c5ccSRob Clark 
266c0fec7f5SJordan Crouse 	struct msm_gpu_state *crashstate;
2675ca6779dSRob Clark 
2685ca6779dSRob Clark 	/* Enable clamping to idle freq when inactive: */
2695ca6779dSRob Clark 	bool clamp_to_idle;
2705ca6779dSRob Clark 
271604234f3SJordan Crouse 	/* True if the hardware supports expanded apriv (a650 and newer) */
272604234f3SJordan Crouse 	bool hw_apriv;
273ec793cf0SAkhil P Oommen 
274ec793cf0SAkhil P Oommen 	struct thermal_cooling_device *cooling;
2751f6cca40SAkhil P Oommen 
2761f6cca40SAkhil P Oommen 	/* To poll for cx gdsc collapse during gpu recovery */
2771f6cca40SAkhil P Oommen 	struct reset_control *cx_collapse;
2787198e6b0SRob Clark };
2797198e6b0SRob Clark 
28069a9313bSRob Clark static inline struct msm_gpu *dev_to_gpu(struct device *dev)
28169a9313bSRob Clark {
2829cba4056SRob Clark 	struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
283*76efc245SAkhil P Oommen 
284*76efc245SAkhil P Oommen 	if (!adreno_smmu)
285*76efc245SAkhil P Oommen 		return NULL;
286*76efc245SAkhil P Oommen 
2879cba4056SRob Clark 	return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
28869a9313bSRob Clark }
28969a9313bSRob Clark 
290f97decacSJordan Crouse /* It turns out that all targets use the same ringbuffer size */
291f97decacSJordan Crouse #define MSM_GPU_RINGBUFFER_SZ SZ_32K
2924d87fc32SJordan Crouse #define MSM_GPU_RINGBUFFER_BLKSIZE 32
2934d87fc32SJordan Crouse 
2944d87fc32SJordan Crouse #define MSM_GPU_RB_CNTL_DEFAULT \
2954d87fc32SJordan Crouse 		(AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \
2964d87fc32SJordan Crouse 		AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
297f97decacSJordan Crouse 
29837d77c3aSRob Clark static inline bool msm_gpu_active(struct msm_gpu *gpu)
29937d77c3aSRob Clark {
300f97decacSJordan Crouse 	int i;
301f97decacSJordan Crouse 
302f97decacSJordan Crouse 	for (i = 0; i < gpu->nr_rings; i++) {
303f97decacSJordan Crouse 		struct msm_ringbuffer *ring = gpu->rb[i];
304f97decacSJordan Crouse 
305f9d5355fSRob Clark 		if (fence_after(ring->fctx->last_fence, ring->memptrs->fence))
306f97decacSJordan Crouse 			return true;
307f97decacSJordan Crouse 	}
308f97decacSJordan Crouse 
309f97decacSJordan Crouse 	return false;
31037d77c3aSRob Clark }
31137d77c3aSRob Clark 
31270c70f09SRob Clark /* Perf-Counters:
31370c70f09SRob Clark  * The select_reg and select_val are just there for the benefit of the child
31470c70f09SRob Clark  * class that actually enables the perf counter..  but msm_gpu base class
31570c70f09SRob Clark  * will handle sampling/displaying the counters.
31670c70f09SRob Clark  */
31770c70f09SRob Clark 
31870c70f09SRob Clark struct msm_gpu_perfcntr {
31970c70f09SRob Clark 	uint32_t select_reg;
32070c70f09SRob Clark 	uint32_t sample_reg;
32170c70f09SRob Clark 	uint32_t select_val;
32270c70f09SRob Clark 	const char *name;
32370c70f09SRob Clark };
32470c70f09SRob Clark 
325fc40e5e1SRob Clark /*
326fc40e5e1SRob Clark  * The number of priority levels provided by drm gpu scheduler.  The
327fc40e5e1SRob Clark  * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
328fc40e5e1SRob Clark  * cases, so we don't use it (no need for kernel generated jobs).
329fc40e5e1SRob Clark  */
330fc40e5e1SRob Clark #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN)
331fc40e5e1SRob Clark 
332fc40e5e1SRob Clark /**
3334cd82aa3SRob Clark  * struct msm_file_private - per-drm_file context
3344cd82aa3SRob Clark  *
3354cd82aa3SRob Clark  * @queuelock:    synchronizes access to submitqueues list
3364cd82aa3SRob Clark  * @submitqueues: list of &msm_gpu_submitqueue created by userspace
3374cd82aa3SRob Clark  * @queueid:      counter incremented each time a submitqueue is created,
3384cd82aa3SRob Clark  *                used to assign &msm_gpu_submitqueue.id
3394cd82aa3SRob Clark  * @aspace:       the per-process GPU address-space
3404cd82aa3SRob Clark  * @ref:          reference count
3414cd82aa3SRob Clark  * @seqno:        unique per process seqno
3424cd82aa3SRob Clark  */
3434cd82aa3SRob Clark struct msm_file_private {
3444cd82aa3SRob Clark 	rwlock_t queuelock;
3454cd82aa3SRob Clark 	struct list_head submitqueues;
3464cd82aa3SRob Clark 	int queueid;
3474cd82aa3SRob Clark 	struct msm_gem_address_space *aspace;
3484cd82aa3SRob Clark 	struct kref ref;
3494cd82aa3SRob Clark 	int seqno;
35068002469SRob Clark 
35168002469SRob Clark 	/**
35290f45c42SRob Clark 	 * sysprof:
35390f45c42SRob Clark 	 *
35490f45c42SRob Clark 	 * The value of MSM_PARAM_SYSPROF set by userspace.  This is
35590f45c42SRob Clark 	 * intended to be used by system profiling tools like Mesa's
35690f45c42SRob Clark 	 * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN.
35790f45c42SRob Clark 	 *
35890f45c42SRob Clark 	 * Setting a value of 1 will preserve performance counters across
35990f45c42SRob Clark 	 * context switches.  Setting a value of 2 will in addition
36090f45c42SRob Clark 	 * suppress suspend.  (Performance counters lose state across
36190f45c42SRob Clark 	 * power collapse, which is undesirable for profiling in some
36290f45c42SRob Clark 	 * cases.)
36390f45c42SRob Clark 	 *
36490f45c42SRob Clark 	 * The value automatically reverts to zero when the drm device
36590f45c42SRob Clark 	 * file is closed.
36690f45c42SRob Clark 	 */
36790f45c42SRob Clark 	int sysprof;
36890f45c42SRob Clark 
369d4726d77SRob Clark 	/** comm: Overridden task comm, see MSM_PARAM_COMM */
370d4726d77SRob Clark 	char *comm;
371d4726d77SRob Clark 
372d4726d77SRob Clark 	/** cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE */
373d4726d77SRob Clark 	char *cmdline;
374d4726d77SRob Clark 
37590f45c42SRob Clark 	/**
376cfebe3fdSRob Clark 	 * elapsed:
377cfebe3fdSRob Clark 	 *
378cfebe3fdSRob Clark 	 * The total (cumulative) elapsed time GPU was busy with rendering
379cfebe3fdSRob Clark 	 * from this context in ns.
380cfebe3fdSRob Clark 	 */
381cfebe3fdSRob Clark 	uint64_t elapsed_ns;
382cfebe3fdSRob Clark 
383cfebe3fdSRob Clark 	/**
384cfebe3fdSRob Clark 	 * cycles:
385cfebe3fdSRob Clark 	 *
386cfebe3fdSRob Clark 	 * The total (cumulative) GPU cycles elapsed attributed to this
387cfebe3fdSRob Clark 	 * context.
388cfebe3fdSRob Clark 	 */
389cfebe3fdSRob Clark 	uint64_t cycles;
390cfebe3fdSRob Clark 
391cfebe3fdSRob Clark 	/**
39268002469SRob Clark 	 * entities:
39368002469SRob Clark 	 *
39468002469SRob Clark 	 * Table of per-priority-level sched entities used by submitqueues
39568002469SRob Clark 	 * associated with this &drm_file.  Because some userspace apps
39668002469SRob Clark 	 * make assumptions about rendering from multiple gl contexts
39768002469SRob Clark 	 * (of the same priority) within the process happening in FIFO
39868002469SRob Clark 	 * order without requiring any fencing beyond MakeCurrent(), we
39968002469SRob Clark 	 * create at most one &drm_sched_entity per-process per-priority-
40068002469SRob Clark 	 * level.
40168002469SRob Clark 	 */
40268002469SRob Clark 	struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS];
4034cd82aa3SRob Clark };
4044cd82aa3SRob Clark 
4054cd82aa3SRob Clark /**
406fc40e5e1SRob Clark  * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority
407fc40e5e1SRob Clark  *
408fc40e5e1SRob Clark  * @gpu:        the gpu instance
409fc40e5e1SRob Clark  * @prio:       the userspace priority level
410fc40e5e1SRob Clark  * @ring_nr:    [out] the ringbuffer the userspace priority maps to
411fc40e5e1SRob Clark  * @sched_prio: [out] the gpu scheduler priority level which the userspace
412fc40e5e1SRob Clark  *              priority maps to
413fc40e5e1SRob Clark  *
414fc40e5e1SRob Clark  * With drm/scheduler providing it's own level of prioritization, our total
415fc40e5e1SRob Clark  * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES).
416fc40e5e1SRob Clark  * Each ring is associated with it's own scheduler instance.  However, our
417fc40e5e1SRob Clark  * UABI is that lower numerical values are higher priority.  So mapping the
418fc40e5e1SRob Clark  * single userspace priority level into ring_nr and sched_prio takes some
419fc40e5e1SRob Clark  * care.  The userspace provided priority (when a submitqueue is created)
420fc40e5e1SRob Clark  * is mapped to ring nr and scheduler priority as such:
421fc40e5e1SRob Clark  *
422fc40e5e1SRob Clark  *   ring_nr    = userspace_prio / NR_SCHED_PRIORITIES
423fc40e5e1SRob Clark  *   sched_prio = NR_SCHED_PRIORITIES -
424fc40e5e1SRob Clark  *                (userspace_prio % NR_SCHED_PRIORITIES) - 1
425fc40e5e1SRob Clark  *
426fc40e5e1SRob Clark  * This allows generations without preemption (nr_rings==1) to have some
427fc40e5e1SRob Clark  * amount of prioritization, and provides more priority levels for gens
428fc40e5e1SRob Clark  * that do have preemption.
429fc40e5e1SRob Clark  */
430fc40e5e1SRob Clark static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
431fc40e5e1SRob Clark 		unsigned *ring_nr, enum drm_sched_priority *sched_prio)
432fc40e5e1SRob Clark {
433fc40e5e1SRob Clark 	unsigned rn, sp;
434fc40e5e1SRob Clark 
435fc40e5e1SRob Clark 	rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp);
436fc40e5e1SRob Clark 
437fc40e5e1SRob Clark 	/* invert sched priority to map to higher-numeric-is-higher-
438fc40e5e1SRob Clark 	 * priority convention
439fc40e5e1SRob Clark 	 */
440fc40e5e1SRob Clark 	sp = NR_SCHED_PRIORITIES - sp - 1;
441fc40e5e1SRob Clark 
442fc40e5e1SRob Clark 	if (rn >= gpu->nr_rings)
443fc40e5e1SRob Clark 		return -EINVAL;
444fc40e5e1SRob Clark 
445fc40e5e1SRob Clark 	*ring_nr = rn;
446fc40e5e1SRob Clark 	*sched_prio = sp;
447fc40e5e1SRob Clark 
448fc40e5e1SRob Clark 	return 0;
449fc40e5e1SRob Clark }
450fc40e5e1SRob Clark 
451375f9a63SRob Clark /**
4524cd82aa3SRob Clark  * struct msm_gpu_submitqueues - Userspace created context.
4534cd82aa3SRob Clark  *
454375f9a63SRob Clark  * A submitqueue is associated with a gl context or vk queue (or equiv)
455375f9a63SRob Clark  * in userspace.
456375f9a63SRob Clark  *
457375f9a63SRob Clark  * @id:        userspace id for the submitqueue, unique within the drm_file
458375f9a63SRob Clark  * @flags:     userspace flags for the submitqueue, specified at creation
459375f9a63SRob Clark  *             (currently unusued)
460fc40e5e1SRob Clark  * @ring_nr:   the ringbuffer used by this submitqueue, which is determined
461fc40e5e1SRob Clark  *             by the submitqueue's priority
462375f9a63SRob Clark  * @faults:    the number of GPU hangs associated with this submitqueue
463067ecab9SRob Clark  * @last_fence: the sequence number of the last allocated fence (for error
464067ecab9SRob Clark  *             checking)
465375f9a63SRob Clark  * @ctx:       the per-drm_file context associated with the submitqueue (ie.
466375f9a63SRob Clark  *             which set of pgtables do submits jobs associated with the
467375f9a63SRob Clark  *             submitqueue use)
468375f9a63SRob Clark  * @node:      node in the context's list of submitqueues
469a61acbbeSRob Clark  * @fence_idr: maps fence-id to dma_fence for userspace visible fence
470a61acbbeSRob Clark  *             seqno, protected by submitqueue lock
47105ba44b3SRob Clark  * @idr_lock:  for serializing access to fence_idr
47205ba44b3SRob Clark  * @lock:      submitqueue lock for serializing submits on a queue
473375f9a63SRob Clark  * @ref:       reference count
4741d8a5ca4SRob Clark  * @entity:    the submit job-queue
475375f9a63SRob Clark  */
476f7de1545SJordan Crouse struct msm_gpu_submitqueue {
477f7de1545SJordan Crouse 	int id;
478f7de1545SJordan Crouse 	u32 flags;
479fc40e5e1SRob Clark 	u32 ring_nr;
480f7de1545SJordan Crouse 	int faults;
481067ecab9SRob Clark 	uint32_t last_fence;
482cf655d61SJordan Crouse 	struct msm_file_private *ctx;
483f7de1545SJordan Crouse 	struct list_head node;
484a61acbbeSRob Clark 	struct idr fence_idr;
48505ba44b3SRob Clark 	struct mutex idr_lock;
486a61acbbeSRob Clark 	struct mutex lock;
487f7de1545SJordan Crouse 	struct kref ref;
48868002469SRob Clark 	struct drm_sched_entity *entity;
489f7de1545SJordan Crouse };
490f7de1545SJordan Crouse 
491cdb95931SJordan Crouse struct msm_gpu_state_bo {
492cdb95931SJordan Crouse 	u64 iova;
493cdb95931SJordan Crouse 	size_t size;
494cdb95931SJordan Crouse 	void *data;
4951df4289dSSharat Masetty 	bool encoded;
49618514c38SRob Clark 	char name[32];
497cdb95931SJordan Crouse };
498cdb95931SJordan Crouse 
499e00e473dSJordan Crouse struct msm_gpu_state {
500c0fec7f5SJordan Crouse 	struct kref ref;
5013530a17fSArnd Bergmann 	struct timespec64 time;
502e00e473dSJordan Crouse 
503e00e473dSJordan Crouse 	struct {
504e00e473dSJordan Crouse 		u64 iova;
505e00e473dSJordan Crouse 		u32 fence;
506e00e473dSJordan Crouse 		u32 seqno;
507e00e473dSJordan Crouse 		u32 rptr;
508e00e473dSJordan Crouse 		u32 wptr;
50943a56687SJordan Crouse 		void *data;
51043a56687SJordan Crouse 		int data_size;
5111df4289dSSharat Masetty 		bool encoded;
512e00e473dSJordan Crouse 	} ring[MSM_GPU_MAX_RINGS];
513e00e473dSJordan Crouse 
514e00e473dSJordan Crouse 	int nr_registers;
515e00e473dSJordan Crouse 	u32 *registers;
516e00e473dSJordan Crouse 
517e00e473dSJordan Crouse 	u32 rbbm_status;
518c0fec7f5SJordan Crouse 
519c0fec7f5SJordan Crouse 	char *comm;
520c0fec7f5SJordan Crouse 	char *cmd;
521cdb95931SJordan Crouse 
522e25e92e0SRob Clark 	struct msm_gpu_fault_info fault_info;
523e25e92e0SRob Clark 
524cdb95931SJordan Crouse 	int nr_bos;
525cdb95931SJordan Crouse 	struct msm_gpu_state_bo *bos;
526e00e473dSJordan Crouse };
527e00e473dSJordan Crouse 
5287198e6b0SRob Clark static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
5297198e6b0SRob Clark {
5307198e6b0SRob Clark 	msm_writel(data, gpu->mmio + (reg << 2));
5317198e6b0SRob Clark }
5327198e6b0SRob Clark 
5337198e6b0SRob Clark static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
5347198e6b0SRob Clark {
5357198e6b0SRob Clark 	return msm_readl(gpu->mmio + (reg << 2));
5367198e6b0SRob Clark }
5377198e6b0SRob Clark 
538ae53a829SJordan Crouse static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
539ae53a829SJordan Crouse {
54040a72b0cSSharat Masetty 	msm_rmw(gpu->mmio + (reg << 2), mask, or);
541ae53a829SJordan Crouse }
542ae53a829SJordan Crouse 
543ae53a829SJordan Crouse static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
544ae53a829SJordan Crouse {
545ae53a829SJordan Crouse 	u64 val;
546ae53a829SJordan Crouse 
547ae53a829SJordan Crouse 	/*
548ae53a829SJordan Crouse 	 * Why not a readq here? Two reasons: 1) many of the LO registers are
549ae53a829SJordan Crouse 	 * not quad word aligned and 2) the GPU hardware designers have a bit
550ae53a829SJordan Crouse 	 * of a history of putting registers where they fit, especially in
551ae53a829SJordan Crouse 	 * spins. The longer a GPU family goes the higher the chance that
552ae53a829SJordan Crouse 	 * we'll get burned.  We could do a series of validity checks if we
553ae53a829SJordan Crouse 	 * wanted to, but really is a readq() that much better? Nah.
554ae53a829SJordan Crouse 	 */
555ae53a829SJordan Crouse 
556ae53a829SJordan Crouse 	/*
557ae53a829SJordan Crouse 	 * For some lo/hi registers (like perfcounters), the hi value is latched
558ae53a829SJordan Crouse 	 * when the lo is read, so make sure to read the lo first to trigger
559ae53a829SJordan Crouse 	 * that
560ae53a829SJordan Crouse 	 */
561ae53a829SJordan Crouse 	val = (u64) msm_readl(gpu->mmio + (lo << 2));
562ae53a829SJordan Crouse 	val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);
563ae53a829SJordan Crouse 
564ae53a829SJordan Crouse 	return val;
565ae53a829SJordan Crouse }
566ae53a829SJordan Crouse 
567ae53a829SJordan Crouse static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
568ae53a829SJordan Crouse {
569ae53a829SJordan Crouse 	/* Why not a writeq here? Read the screed above */
570ae53a829SJordan Crouse 	msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
571ae53a829SJordan Crouse 	msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
572ae53a829SJordan Crouse }
573ae53a829SJordan Crouse 
5747198e6b0SRob Clark int msm_gpu_pm_suspend(struct msm_gpu *gpu);
5757198e6b0SRob Clark int msm_gpu_pm_resume(struct msm_gpu *gpu);
576af5b4fffSRob Clark 
577cfebe3fdSRob Clark void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx,
578cfebe3fdSRob Clark 			 struct drm_printer *p);
579cfebe3fdSRob Clark 
5804cd82aa3SRob Clark int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
5814cd82aa3SRob Clark struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
5824cd82aa3SRob Clark 		u32 id);
5834cd82aa3SRob Clark int msm_submitqueue_create(struct drm_device *drm,
5844cd82aa3SRob Clark 		struct msm_file_private *ctx,
5854cd82aa3SRob Clark 		u32 prio, u32 flags, u32 *id);
5864cd82aa3SRob Clark int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
5874cd82aa3SRob Clark 		struct drm_msm_submitqueue_query *args);
5884cd82aa3SRob Clark int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
5894cd82aa3SRob Clark void msm_submitqueue_close(struct msm_file_private *ctx);
5904cd82aa3SRob Clark 
5914cd82aa3SRob Clark void msm_submitqueue_destroy(struct kref *kref);
5924cd82aa3SRob Clark 
59390f45c42SRob Clark int msm_file_private_set_sysprof(struct msm_file_private *ctx,
59490f45c42SRob Clark 				 struct msm_gpu *gpu, int sysprof);
59568002469SRob Clark void __msm_file_private_destroy(struct kref *kref);
5964cd82aa3SRob Clark 
5974cd82aa3SRob Clark static inline void msm_file_private_put(struct msm_file_private *ctx)
5984cd82aa3SRob Clark {
5994cd82aa3SRob Clark 	kref_put(&ctx->ref, __msm_file_private_destroy);
6004cd82aa3SRob Clark }
6014cd82aa3SRob Clark 
6024cd82aa3SRob Clark static inline struct msm_file_private *msm_file_private_get(
6034cd82aa3SRob Clark 	struct msm_file_private *ctx)
6044cd82aa3SRob Clark {
6054cd82aa3SRob Clark 	kref_get(&ctx->ref);
6064cd82aa3SRob Clark 	return ctx;
6074cd82aa3SRob Clark }
6084cd82aa3SRob Clark 
609af5b4fffSRob Clark void msm_devfreq_init(struct msm_gpu *gpu);
610af5b4fffSRob Clark void msm_devfreq_cleanup(struct msm_gpu *gpu);
611af5b4fffSRob Clark void msm_devfreq_resume(struct msm_gpu *gpu);
612af5b4fffSRob Clark void msm_devfreq_suspend(struct msm_gpu *gpu);
6137c0ffcd4SRob Clark void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor);
6149bc95570SRob Clark void msm_devfreq_active(struct msm_gpu *gpu);
6159bc95570SRob Clark void msm_devfreq_idle(struct msm_gpu *gpu);
6167198e6b0SRob Clark 
617eeb75474SRob Clark int msm_gpu_hw_init(struct msm_gpu *gpu);
618eeb75474SRob Clark 
61970c70f09SRob Clark void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
62070c70f09SRob Clark void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
62170c70f09SRob Clark int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
62270c70f09SRob Clark 		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
62370c70f09SRob Clark 
6247198e6b0SRob Clark void msm_gpu_retire(struct msm_gpu *gpu);
62515eb9ad0SJordan Crouse void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
6267198e6b0SRob Clark 
6277198e6b0SRob Clark int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
6287198e6b0SRob Clark 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
6295770fc7aSJordan Crouse 		const char *name, struct msm_gpu_config *config);
6305770fc7aSJordan Crouse 
631933415e2SJordan Crouse struct msm_gem_address_space *
63225faf2f2SRob Clark msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task);
633933415e2SJordan Crouse 
6347198e6b0SRob Clark void msm_gpu_cleanup(struct msm_gpu *gpu);
6357198e6b0SRob Clark 
636e2550b7aSRob Clark struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
637bfd28b13SRob Clark void __init adreno_register(void);
638bfd28b13SRob Clark void __exit adreno_unregister(void);
6397198e6b0SRob Clark 
640f7de1545SJordan Crouse static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue)
641f7de1545SJordan Crouse {
642f7de1545SJordan Crouse 	if (queue)
643f7de1545SJordan Crouse 		kref_put(&queue->ref, msm_submitqueue_destroy);
644f7de1545SJordan Crouse }
645f7de1545SJordan Crouse 
646c0fec7f5SJordan Crouse static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu)
647c0fec7f5SJordan Crouse {
648c0fec7f5SJordan Crouse 	struct msm_gpu_state *state = NULL;
649c0fec7f5SJordan Crouse 
650c28e2f2bSRob Clark 	mutex_lock(&gpu->lock);
651c0fec7f5SJordan Crouse 
652c0fec7f5SJordan Crouse 	if (gpu->crashstate) {
653c0fec7f5SJordan Crouse 		kref_get(&gpu->crashstate->ref);
654c0fec7f5SJordan Crouse 		state = gpu->crashstate;
655c0fec7f5SJordan Crouse 	}
656c0fec7f5SJordan Crouse 
657c28e2f2bSRob Clark 	mutex_unlock(&gpu->lock);
658c0fec7f5SJordan Crouse 
659c0fec7f5SJordan Crouse 	return state;
660c0fec7f5SJordan Crouse }
661c0fec7f5SJordan Crouse 
662c0fec7f5SJordan Crouse static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
663c0fec7f5SJordan Crouse {
664c28e2f2bSRob Clark 	mutex_lock(&gpu->lock);
665c0fec7f5SJordan Crouse 
666c0fec7f5SJordan Crouse 	if (gpu->crashstate) {
667c0fec7f5SJordan Crouse 		if (gpu->funcs->gpu_state_put(gpu->crashstate))
668c0fec7f5SJordan Crouse 			gpu->crashstate = NULL;
669c0fec7f5SJordan Crouse 	}
670c0fec7f5SJordan Crouse 
671c28e2f2bSRob Clark 	mutex_unlock(&gpu->lock);
672c0fec7f5SJordan Crouse }
673c0fec7f5SJordan Crouse 
674604234f3SJordan Crouse /*
675604234f3SJordan Crouse  * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can
676604234f3SJordan Crouse  * support expanded privileges
677604234f3SJordan Crouse  */
678604234f3SJordan Crouse #define check_apriv(gpu, flags) \
679604234f3SJordan Crouse 	(((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags))
680604234f3SJordan Crouse 
681604234f3SJordan Crouse 
6827198e6b0SRob Clark #endif /* __MSM_GPU_H__ */
683