1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #ifndef __MSM_GPU_H__ 19 #define __MSM_GPU_H__ 20 21 #include <linux/clk.h> 22 #include <linux/regulator/consumer.h> 23 24 #include "msm_drv.h" 25 #include "msm_fence.h" 26 #include "msm_ringbuffer.h" 27 28 struct msm_gem_submit; 29 struct msm_gpu_perfcntr; 30 struct msm_gpu_state; 31 32 struct msm_gpu_config { 33 const char *ioname; 34 const char *irqname; 35 uint64_t va_start; 36 uint64_t va_end; 37 unsigned int nr_rings; 38 }; 39 40 /* So far, with hardware that I've seen to date, we can have: 41 * + zero, one, or two z180 2d cores 42 * + a3xx or a2xx 3d core, which share a common CP (the firmware 43 * for the CP seems to implement some different PM4 packet types 44 * but the basics of cmdstream submission are the same) 45 * 46 * Which means that the eventual complete "class" hierarchy, once 47 * support for all past and present hw is in place, becomes: 48 * + msm_gpu 49 * + adreno_gpu 50 * + a3xx_gpu 51 * + a2xx_gpu 52 * + z180_gpu 53 */ 54 struct msm_gpu_funcs { 55 int (*get_param)(struct msm_gpu *gpu, uint32_t param, uint64_t *value); 56 int (*hw_init)(struct msm_gpu *gpu); 57 int (*pm_suspend)(struct msm_gpu *gpu); 58 int (*pm_resume)(struct msm_gpu *gpu); 59 void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, 60 struct msm_file_private *ctx); 61 void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); 62 irqreturn_t (*irq)(struct msm_gpu *irq); 63 struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); 64 void (*recover)(struct msm_gpu *gpu); 65 void (*destroy)(struct msm_gpu *gpu); 66 #ifdef CONFIG_DEBUG_FS 67 /* show GPU status in debugfs: */ 68 void (*show)(struct msm_gpu *gpu, struct seq_file *m); 69 /* for generation specific debugfs: */ 70 int (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor); 71 #endif 72 int (*gpu_busy)(struct msm_gpu *gpu, uint64_t *value); 73 struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); 74 void (*gpu_state_put)(struct msm_gpu_state *state); 75 }; 76 77 struct msm_gpu { 78 const char *name; 79 struct drm_device *dev; 80 struct platform_device *pdev; 81 const struct msm_gpu_funcs *funcs; 82 83 /* performance counters (hw & sw): */ 84 spinlock_t perf_lock; 85 bool perfcntr_active; 86 struct { 87 bool active; 88 ktime_t time; 89 } last_sample; 90 uint32_t totaltime, activetime; /* sw counters */ 91 uint32_t last_cntrs[5]; /* hw counters */ 92 const struct msm_gpu_perfcntr *perfcntrs; 93 uint32_t num_perfcntrs; 94 95 struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; 96 int nr_rings; 97 98 /* list of GEM active objects: */ 99 struct list_head active_list; 100 101 /* does gpu need hw_init? */ 102 bool needs_hw_init; 103 104 /* worker for handling active-list retiring: */ 105 struct work_struct retire_work; 106 107 void __iomem *mmio; 108 int irq; 109 110 struct msm_gem_address_space *aspace; 111 112 /* Power Control: */ 113 struct regulator *gpu_reg, *gpu_cx; 114 struct clk **grp_clks; 115 int nr_clocks; 116 struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; 117 uint32_t fast_rate; 118 119 /* Hang and Inactivity Detection: 120 */ 121 #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ 122 123 #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ 124 #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) 125 struct timer_list hangcheck_timer; 126 struct work_struct recover_work; 127 128 struct drm_gem_object *memptrs_bo; 129 130 struct { 131 struct devfreq *devfreq; 132 u64 busy_cycles; 133 ktime_t time; 134 } devfreq; 135 }; 136 137 /* It turns out that all targets use the same ringbuffer size */ 138 #define MSM_GPU_RINGBUFFER_SZ SZ_32K 139 #define MSM_GPU_RINGBUFFER_BLKSIZE 32 140 141 #define MSM_GPU_RB_CNTL_DEFAULT \ 142 (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ 143 AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) 144 145 static inline bool msm_gpu_active(struct msm_gpu *gpu) 146 { 147 int i; 148 149 for (i = 0; i < gpu->nr_rings; i++) { 150 struct msm_ringbuffer *ring = gpu->rb[i]; 151 152 if (ring->seqno > ring->memptrs->fence) 153 return true; 154 } 155 156 return false; 157 } 158 159 /* Perf-Counters: 160 * The select_reg and select_val are just there for the benefit of the child 161 * class that actually enables the perf counter.. but msm_gpu base class 162 * will handle sampling/displaying the counters. 163 */ 164 165 struct msm_gpu_perfcntr { 166 uint32_t select_reg; 167 uint32_t sample_reg; 168 uint32_t select_val; 169 const char *name; 170 }; 171 172 struct msm_gpu_submitqueue { 173 int id; 174 u32 flags; 175 u32 prio; 176 int faults; 177 struct list_head node; 178 struct kref ref; 179 }; 180 181 struct msm_gpu_state { 182 struct timeval time; 183 184 struct { 185 u64 iova; 186 u32 fence; 187 u32 seqno; 188 u32 rptr; 189 u32 wptr; 190 } ring[MSM_GPU_MAX_RINGS]; 191 192 int nr_registers; 193 u32 *registers; 194 195 u32 rbbm_status; 196 }; 197 198 static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) 199 { 200 msm_writel(data, gpu->mmio + (reg << 2)); 201 } 202 203 static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) 204 { 205 return msm_readl(gpu->mmio + (reg << 2)); 206 } 207 208 static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) 209 { 210 uint32_t val = gpu_read(gpu, reg); 211 212 val &= ~mask; 213 gpu_write(gpu, reg, val | or); 214 } 215 216 static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi) 217 { 218 u64 val; 219 220 /* 221 * Why not a readq here? Two reasons: 1) many of the LO registers are 222 * not quad word aligned and 2) the GPU hardware designers have a bit 223 * of a history of putting registers where they fit, especially in 224 * spins. The longer a GPU family goes the higher the chance that 225 * we'll get burned. We could do a series of validity checks if we 226 * wanted to, but really is a readq() that much better? Nah. 227 */ 228 229 /* 230 * For some lo/hi registers (like perfcounters), the hi value is latched 231 * when the lo is read, so make sure to read the lo first to trigger 232 * that 233 */ 234 val = (u64) msm_readl(gpu->mmio + (lo << 2)); 235 val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32); 236 237 return val; 238 } 239 240 static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) 241 { 242 /* Why not a writeq here? Read the screed above */ 243 msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2)); 244 msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2)); 245 } 246 247 int msm_gpu_pm_suspend(struct msm_gpu *gpu); 248 int msm_gpu_pm_resume(struct msm_gpu *gpu); 249 250 int msm_gpu_hw_init(struct msm_gpu *gpu); 251 252 void msm_gpu_perfcntr_start(struct msm_gpu *gpu); 253 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); 254 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 255 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); 256 257 void msm_gpu_retire(struct msm_gpu *gpu); 258 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 259 struct msm_file_private *ctx); 260 261 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 262 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 263 const char *name, struct msm_gpu_config *config); 264 265 void msm_gpu_cleanup(struct msm_gpu *gpu); 266 267 struct msm_gpu *adreno_load_gpu(struct drm_device *dev); 268 void __init adreno_register(void); 269 void __exit adreno_unregister(void); 270 271 static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) 272 { 273 if (queue) 274 kref_put(&queue->ref, msm_submitqueue_destroy); 275 } 276 277 #endif /* __MSM_GPU_H__ */ 278