1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "adreno_gpu.h" 19 #include "msm_gem.h" 20 #include "msm_mmu.h" 21 22 struct adreno_info { 23 struct adreno_rev rev; 24 uint32_t revn; 25 const char *name; 26 const char *pm4fw, *pfpfw; 27 uint32_t gmem; 28 }; 29 30 #define ANY_ID 0xff 31 32 static const struct adreno_info gpulist[] = { 33 { 34 .rev = ADRENO_REV(3, 0, 5, ANY_ID), 35 .revn = 305, 36 .name = "A305", 37 .pm4fw = "a300_pm4.fw", 38 .pfpfw = "a300_pfp.fw", 39 .gmem = SZ_256K, 40 }, { 41 .rev = ADRENO_REV(3, 2, ANY_ID, ANY_ID), 42 .revn = 320, 43 .name = "A320", 44 .pm4fw = "a300_pm4.fw", 45 .pfpfw = "a300_pfp.fw", 46 .gmem = SZ_512K, 47 }, { 48 .rev = ADRENO_REV(3, 3, 0, ANY_ID), 49 .revn = 330, 50 .name = "A330", 51 .pm4fw = "a330_pm4.fw", 52 .pfpfw = "a330_pfp.fw", 53 .gmem = SZ_1M, 54 }, 55 }; 56 57 MODULE_FIRMWARE("a300_pm4.fw"); 58 MODULE_FIRMWARE("a300_pfp.fw"); 59 MODULE_FIRMWARE("a330_pm4.fw"); 60 MODULE_FIRMWARE("a330_pfp.fw"); 61 62 #define RB_SIZE SZ_32K 63 #define RB_BLKSIZE 16 64 65 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) 66 { 67 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 68 69 switch (param) { 70 case MSM_PARAM_GPU_ID: 71 *value = adreno_gpu->info->revn; 72 return 0; 73 case MSM_PARAM_GMEM_SIZE: 74 *value = adreno_gpu->gmem; 75 return 0; 76 case MSM_PARAM_CHIP_ID: 77 *value = adreno_gpu->rev.patchid | 78 (adreno_gpu->rev.minor << 8) | 79 (adreno_gpu->rev.major << 16) | 80 (adreno_gpu->rev.core << 24); 81 return 0; 82 default: 83 DBG("%s: invalid param: %u", gpu->name, param); 84 return -EINVAL; 85 } 86 } 87 88 #define rbmemptr(adreno_gpu, member) \ 89 ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) 90 91 int adreno_hw_init(struct msm_gpu *gpu) 92 { 93 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 94 95 DBG("%s", gpu->name); 96 97 /* Setup REG_CP_RB_CNTL: */ 98 gpu_write(gpu, REG_AXXX_CP_RB_CNTL, 99 /* size is log2(quad-words): */ 100 AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | 101 AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8))); 102 103 /* Setup ringbuffer address: */ 104 gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova); 105 gpu_write(gpu, REG_AXXX_CP_RB_RPTR_ADDR, rbmemptr(adreno_gpu, rptr)); 106 107 /* Setup scratch/timestamp: */ 108 gpu_write(gpu, REG_AXXX_SCRATCH_ADDR, rbmemptr(adreno_gpu, fence)); 109 110 gpu_write(gpu, REG_AXXX_SCRATCH_UMSK, 0x1); 111 112 return 0; 113 } 114 115 static uint32_t get_wptr(struct msm_ringbuffer *ring) 116 { 117 return ring->cur - ring->start; 118 } 119 120 uint32_t adreno_last_fence(struct msm_gpu *gpu) 121 { 122 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 123 return adreno_gpu->memptrs->fence; 124 } 125 126 void adreno_recover(struct msm_gpu *gpu) 127 { 128 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 129 struct drm_device *dev = gpu->dev; 130 int ret; 131 132 gpu->funcs->pm_suspend(gpu); 133 134 /* reset ringbuffer: */ 135 gpu->rb->cur = gpu->rb->start; 136 137 /* reset completed fence seqno, just discard anything pending: */ 138 adreno_gpu->memptrs->fence = gpu->submitted_fence; 139 adreno_gpu->memptrs->rptr = 0; 140 adreno_gpu->memptrs->wptr = 0; 141 142 gpu->funcs->pm_resume(gpu); 143 ret = gpu->funcs->hw_init(gpu); 144 if (ret) { 145 dev_err(dev->dev, "gpu hw init failed: %d\n", ret); 146 /* hmm, oh well? */ 147 } 148 } 149 150 int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 151 struct msm_file_private *ctx) 152 { 153 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 154 struct msm_drm_private *priv = gpu->dev->dev_private; 155 struct msm_ringbuffer *ring = gpu->rb; 156 unsigned i, ibs = 0; 157 158 for (i = 0; i < submit->nr_cmds; i++) { 159 switch (submit->cmd[i].type) { 160 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 161 /* ignore IB-targets */ 162 break; 163 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 164 /* ignore if there has not been a ctx switch: */ 165 if (priv->lastctx == ctx) 166 break; 167 case MSM_SUBMIT_CMD_BUF: 168 OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); 169 OUT_RING(ring, submit->cmd[i].iova); 170 OUT_RING(ring, submit->cmd[i].size); 171 ibs++; 172 break; 173 } 174 } 175 176 /* on a320, at least, we seem to need to pad things out to an 177 * even number of qwords to avoid issue w/ CP hanging on wrap- 178 * around: 179 */ 180 if (ibs % 2) 181 OUT_PKT2(ring); 182 183 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); 184 OUT_RING(ring, submit->fence); 185 186 if (adreno_is_a3xx(adreno_gpu)) { 187 /* Flush HLSQ lazy updates to make sure there is nothing 188 * pending for indirect loads after the timestamp has 189 * passed: 190 */ 191 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 192 OUT_RING(ring, HLSQ_FLUSH); 193 194 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 195 OUT_RING(ring, 0x00000000); 196 } 197 198 OUT_PKT3(ring, CP_EVENT_WRITE, 3); 199 OUT_RING(ring, CACHE_FLUSH_TS); 200 OUT_RING(ring, rbmemptr(adreno_gpu, fence)); 201 OUT_RING(ring, submit->fence); 202 203 /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ 204 OUT_PKT3(ring, CP_INTERRUPT, 1); 205 OUT_RING(ring, 0x80000000); 206 207 #if 0 208 if (adreno_is_a3xx(adreno_gpu)) { 209 /* Dummy set-constant to trigger context rollover */ 210 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 211 OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); 212 OUT_RING(ring, 0x00000000); 213 } 214 #endif 215 216 gpu->funcs->flush(gpu); 217 218 return 0; 219 } 220 221 void adreno_flush(struct msm_gpu *gpu) 222 { 223 uint32_t wptr = get_wptr(gpu->rb); 224 225 /* ensure writes to ringbuffer have hit system memory: */ 226 mb(); 227 228 gpu_write(gpu, REG_AXXX_CP_RB_WPTR, wptr); 229 } 230 231 void adreno_idle(struct msm_gpu *gpu) 232 { 233 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 234 uint32_t wptr = get_wptr(gpu->rb); 235 236 /* wait for CP to drain ringbuffer: */ 237 if (spin_until(adreno_gpu->memptrs->rptr == wptr)) 238 DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); 239 240 /* TODO maybe we need to reset GPU here to recover from hang? */ 241 } 242 243 #ifdef CONFIG_DEBUG_FS 244 void adreno_show(struct msm_gpu *gpu, struct seq_file *m) 245 { 246 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 247 248 seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", 249 adreno_gpu->info->revn, adreno_gpu->rev.core, 250 adreno_gpu->rev.major, adreno_gpu->rev.minor, 251 adreno_gpu->rev.patchid); 252 253 seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, 254 gpu->submitted_fence); 255 seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); 256 seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); 257 seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); 258 } 259 #endif 260 261 /* would be nice to not have to duplicate the _show() stuff with printk(): */ 262 void adreno_dump(struct msm_gpu *gpu) 263 { 264 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 265 266 printk("revision: %d (%d.%d.%d.%d)\n", 267 adreno_gpu->info->revn, adreno_gpu->rev.core, 268 adreno_gpu->rev.major, adreno_gpu->rev.minor, 269 adreno_gpu->rev.patchid); 270 271 printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, 272 gpu->submitted_fence); 273 printk("rptr: %d\n", adreno_gpu->memptrs->rptr); 274 printk("wptr: %d\n", adreno_gpu->memptrs->wptr); 275 printk("rb wptr: %d\n", get_wptr(gpu->rb)); 276 277 } 278 279 static uint32_t ring_freewords(struct msm_gpu *gpu) 280 { 281 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 282 uint32_t size = gpu->rb->size / 4; 283 uint32_t wptr = get_wptr(gpu->rb); 284 uint32_t rptr = adreno_gpu->memptrs->rptr; 285 return (rptr + (size - 1) - wptr) % size; 286 } 287 288 void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) 289 { 290 if (spin_until(ring_freewords(gpu) >= ndwords)) 291 DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); 292 } 293 294 static const char *iommu_ports[] = { 295 "gfx3d_user", "gfx3d_priv", 296 "gfx3d1_user", "gfx3d1_priv", 297 }; 298 299 static inline bool _rev_match(uint8_t entry, uint8_t id) 300 { 301 return (entry == ANY_ID) || (entry == id); 302 } 303 304 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, 305 struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, 306 struct adreno_rev rev) 307 { 308 struct msm_mmu *mmu; 309 int i, ret; 310 311 /* identify gpu: */ 312 for (i = 0; i < ARRAY_SIZE(gpulist); i++) { 313 const struct adreno_info *info = &gpulist[i]; 314 if (_rev_match(info->rev.core, rev.core) && 315 _rev_match(info->rev.major, rev.major) && 316 _rev_match(info->rev.minor, rev.minor) && 317 _rev_match(info->rev.patchid, rev.patchid)) { 318 gpu->info = info; 319 gpu->revn = info->revn; 320 break; 321 } 322 } 323 324 if (i == ARRAY_SIZE(gpulist)) { 325 dev_err(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n", 326 rev.core, rev.major, rev.minor, rev.patchid); 327 return -ENXIO; 328 } 329 330 DBG("Found GPU: %s (%u.%u.%u.%u)", gpu->info->name, 331 rev.core, rev.major, rev.minor, rev.patchid); 332 333 gpu->funcs = funcs; 334 gpu->gmem = gpu->info->gmem; 335 gpu->rev = rev; 336 337 ret = request_firmware(&gpu->pm4, gpu->info->pm4fw, drm->dev); 338 if (ret) { 339 dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", 340 gpu->info->pm4fw, ret); 341 return ret; 342 } 343 344 ret = request_firmware(&gpu->pfp, gpu->info->pfpfw, drm->dev); 345 if (ret) { 346 dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", 347 gpu->info->pfpfw, ret); 348 return ret; 349 } 350 351 ret = msm_gpu_init(drm, pdev, &gpu->base, &funcs->base, 352 gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq", 353 RB_SIZE); 354 if (ret) 355 return ret; 356 357 mmu = gpu->base.mmu; 358 if (mmu) { 359 ret = mmu->funcs->attach(mmu, iommu_ports, 360 ARRAY_SIZE(iommu_ports)); 361 if (ret) 362 return ret; 363 } 364 365 gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs), 366 MSM_BO_UNCACHED); 367 if (IS_ERR(gpu->memptrs_bo)) { 368 ret = PTR_ERR(gpu->memptrs_bo); 369 gpu->memptrs_bo = NULL; 370 dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); 371 return ret; 372 } 373 374 gpu->memptrs = msm_gem_vaddr_locked(gpu->memptrs_bo); 375 if (!gpu->memptrs) { 376 dev_err(drm->dev, "could not vmap memptrs\n"); 377 return -ENOMEM; 378 } 379 380 ret = msm_gem_get_iova_locked(gpu->memptrs_bo, gpu->base.id, 381 &gpu->memptrs_iova); 382 if (ret) { 383 dev_err(drm->dev, "could not map memptrs: %d\n", ret); 384 return ret; 385 } 386 387 return 0; 388 } 389 390 void adreno_gpu_cleanup(struct adreno_gpu *gpu) 391 { 392 if (gpu->memptrs_bo) { 393 if (gpu->memptrs_iova) 394 msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); 395 drm_gem_object_unreference(gpu->memptrs_bo); 396 } 397 if (gpu->pm4) 398 release_firmware(gpu->pm4); 399 if (gpu->pfp) 400 release_firmware(gpu->pfp); 401 msm_gpu_cleanup(&gpu->base); 402 } 403