1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "adreno_gpu.h" 19 #include "msm_gem.h" 20 #include "msm_mmu.h" 21 22 struct adreno_info { 23 struct adreno_rev rev; 24 uint32_t revn; 25 const char *name; 26 const char *pm4fw, *pfpfw; 27 uint32_t gmem; 28 }; 29 30 #define ANY_ID 0xff 31 32 static const struct adreno_info gpulist[] = { 33 { 34 .rev = ADRENO_REV(3, 0, 5, ANY_ID), 35 .revn = 305, 36 .name = "A305", 37 .pm4fw = "a300_pm4.fw", 38 .pfpfw = "a300_pfp.fw", 39 .gmem = SZ_256K, 40 }, { 41 .rev = ADRENO_REV(3, 2, ANY_ID, ANY_ID), 42 .revn = 320, 43 .name = "A320", 44 .pm4fw = "a300_pm4.fw", 45 .pfpfw = "a300_pfp.fw", 46 .gmem = SZ_512K, 47 }, { 48 .rev = ADRENO_REV(3, 3, 0, ANY_ID), 49 .revn = 330, 50 .name = "A330", 51 .pm4fw = "a330_pm4.fw", 52 .pfpfw = "a330_pfp.fw", 53 .gmem = SZ_1M, 54 }, 55 }; 56 57 MODULE_FIRMWARE("a300_pm4.fw"); 58 MODULE_FIRMWARE("a300_pfp.fw"); 59 MODULE_FIRMWARE("a330_pm4.fw"); 60 MODULE_FIRMWARE("a330_pfp.fw"); 61 62 #define RB_SIZE SZ_32K 63 #define RB_BLKSIZE 16 64 65 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) 66 { 67 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 68 69 switch (param) { 70 case MSM_PARAM_GPU_ID: 71 *value = adreno_gpu->info->revn; 72 return 0; 73 case MSM_PARAM_GMEM_SIZE: 74 *value = adreno_gpu->gmem; 75 return 0; 76 case MSM_PARAM_CHIP_ID: 77 *value = adreno_gpu->rev.patchid | 78 (adreno_gpu->rev.minor << 8) | 79 (adreno_gpu->rev.major << 16) | 80 (adreno_gpu->rev.core << 24); 81 return 0; 82 default: 83 DBG("%s: invalid param: %u", gpu->name, param); 84 return -EINVAL; 85 } 86 } 87 88 #define rbmemptr(adreno_gpu, member) \ 89 ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) 90 91 int adreno_hw_init(struct msm_gpu *gpu) 92 { 93 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 94 int ret; 95 96 DBG("%s", gpu->name); 97 98 ret = msm_gem_get_iova(gpu->rb->bo, gpu->id, &gpu->rb_iova); 99 if (ret) { 100 gpu->rb_iova = 0; 101 dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); 102 return ret; 103 } 104 105 /* Setup REG_CP_RB_CNTL: */ 106 gpu_write(gpu, REG_AXXX_CP_RB_CNTL, 107 /* size is log2(quad-words): */ 108 AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | 109 AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8))); 110 111 /* Setup ringbuffer address: */ 112 gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova); 113 gpu_write(gpu, REG_AXXX_CP_RB_RPTR_ADDR, rbmemptr(adreno_gpu, rptr)); 114 115 /* Setup scratch/timestamp: */ 116 gpu_write(gpu, REG_AXXX_SCRATCH_ADDR, rbmemptr(adreno_gpu, fence)); 117 118 gpu_write(gpu, REG_AXXX_SCRATCH_UMSK, 0x1); 119 120 return 0; 121 } 122 123 static uint32_t get_wptr(struct msm_ringbuffer *ring) 124 { 125 return ring->cur - ring->start; 126 } 127 128 uint32_t adreno_last_fence(struct msm_gpu *gpu) 129 { 130 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 131 return adreno_gpu->memptrs->fence; 132 } 133 134 void adreno_recover(struct msm_gpu *gpu) 135 { 136 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 137 struct drm_device *dev = gpu->dev; 138 int ret; 139 140 gpu->funcs->pm_suspend(gpu); 141 142 /* reset ringbuffer: */ 143 gpu->rb->cur = gpu->rb->start; 144 145 /* reset completed fence seqno, just discard anything pending: */ 146 adreno_gpu->memptrs->fence = gpu->submitted_fence; 147 adreno_gpu->memptrs->rptr = 0; 148 adreno_gpu->memptrs->wptr = 0; 149 150 gpu->funcs->pm_resume(gpu); 151 ret = gpu->funcs->hw_init(gpu); 152 if (ret) { 153 dev_err(dev->dev, "gpu hw init failed: %d\n", ret); 154 /* hmm, oh well? */ 155 } 156 } 157 158 int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 159 struct msm_file_private *ctx) 160 { 161 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 162 struct msm_drm_private *priv = gpu->dev->dev_private; 163 struct msm_ringbuffer *ring = gpu->rb; 164 unsigned i, ibs = 0; 165 166 for (i = 0; i < submit->nr_cmds; i++) { 167 switch (submit->cmd[i].type) { 168 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 169 /* ignore IB-targets */ 170 break; 171 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 172 /* ignore if there has not been a ctx switch: */ 173 if (priv->lastctx == ctx) 174 break; 175 case MSM_SUBMIT_CMD_BUF: 176 OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); 177 OUT_RING(ring, submit->cmd[i].iova); 178 OUT_RING(ring, submit->cmd[i].size); 179 ibs++; 180 break; 181 } 182 } 183 184 /* on a320, at least, we seem to need to pad things out to an 185 * even number of qwords to avoid issue w/ CP hanging on wrap- 186 * around: 187 */ 188 if (ibs % 2) 189 OUT_PKT2(ring); 190 191 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); 192 OUT_RING(ring, submit->fence); 193 194 if (adreno_is_a3xx(adreno_gpu)) { 195 /* Flush HLSQ lazy updates to make sure there is nothing 196 * pending for indirect loads after the timestamp has 197 * passed: 198 */ 199 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 200 OUT_RING(ring, HLSQ_FLUSH); 201 202 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 203 OUT_RING(ring, 0x00000000); 204 } 205 206 OUT_PKT3(ring, CP_EVENT_WRITE, 3); 207 OUT_RING(ring, CACHE_FLUSH_TS); 208 OUT_RING(ring, rbmemptr(adreno_gpu, fence)); 209 OUT_RING(ring, submit->fence); 210 211 /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ 212 OUT_PKT3(ring, CP_INTERRUPT, 1); 213 OUT_RING(ring, 0x80000000); 214 215 #if 0 216 if (adreno_is_a3xx(adreno_gpu)) { 217 /* Dummy set-constant to trigger context rollover */ 218 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 219 OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); 220 OUT_RING(ring, 0x00000000); 221 } 222 #endif 223 224 gpu->funcs->flush(gpu); 225 226 return 0; 227 } 228 229 void adreno_flush(struct msm_gpu *gpu) 230 { 231 uint32_t wptr = get_wptr(gpu->rb); 232 233 /* ensure writes to ringbuffer have hit system memory: */ 234 mb(); 235 236 gpu_write(gpu, REG_AXXX_CP_RB_WPTR, wptr); 237 } 238 239 void adreno_idle(struct msm_gpu *gpu) 240 { 241 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 242 uint32_t wptr = get_wptr(gpu->rb); 243 244 /* wait for CP to drain ringbuffer: */ 245 if (spin_until(adreno_gpu->memptrs->rptr == wptr)) 246 DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); 247 248 /* TODO maybe we need to reset GPU here to recover from hang? */ 249 } 250 251 #ifdef CONFIG_DEBUG_FS 252 void adreno_show(struct msm_gpu *gpu, struct seq_file *m) 253 { 254 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 255 256 seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", 257 adreno_gpu->info->revn, adreno_gpu->rev.core, 258 adreno_gpu->rev.major, adreno_gpu->rev.minor, 259 adreno_gpu->rev.patchid); 260 261 seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, 262 gpu->submitted_fence); 263 seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); 264 seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); 265 seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); 266 } 267 #endif 268 269 /* would be nice to not have to duplicate the _show() stuff with printk(): */ 270 void adreno_dump(struct msm_gpu *gpu) 271 { 272 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 273 274 printk("revision: %d (%d.%d.%d.%d)\n", 275 adreno_gpu->info->revn, adreno_gpu->rev.core, 276 adreno_gpu->rev.major, adreno_gpu->rev.minor, 277 adreno_gpu->rev.patchid); 278 279 printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, 280 gpu->submitted_fence); 281 printk("rptr: %d\n", adreno_gpu->memptrs->rptr); 282 printk("wptr: %d\n", adreno_gpu->memptrs->wptr); 283 printk("rb wptr: %d\n", get_wptr(gpu->rb)); 284 285 } 286 287 static uint32_t ring_freewords(struct msm_gpu *gpu) 288 { 289 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 290 uint32_t size = gpu->rb->size / 4; 291 uint32_t wptr = get_wptr(gpu->rb); 292 uint32_t rptr = adreno_gpu->memptrs->rptr; 293 return (rptr + (size - 1) - wptr) % size; 294 } 295 296 void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) 297 { 298 if (spin_until(ring_freewords(gpu) >= ndwords)) 299 DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); 300 } 301 302 static const char *iommu_ports[] = { 303 "gfx3d_user", "gfx3d_priv", 304 "gfx3d1_user", "gfx3d1_priv", 305 }; 306 307 static inline bool _rev_match(uint8_t entry, uint8_t id) 308 { 309 return (entry == ANY_ID) || (entry == id); 310 } 311 312 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, 313 struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, 314 struct adreno_rev rev) 315 { 316 struct msm_mmu *mmu; 317 int i, ret; 318 319 /* identify gpu: */ 320 for (i = 0; i < ARRAY_SIZE(gpulist); i++) { 321 const struct adreno_info *info = &gpulist[i]; 322 if (_rev_match(info->rev.core, rev.core) && 323 _rev_match(info->rev.major, rev.major) && 324 _rev_match(info->rev.minor, rev.minor) && 325 _rev_match(info->rev.patchid, rev.patchid)) { 326 gpu->info = info; 327 gpu->revn = info->revn; 328 break; 329 } 330 } 331 332 if (i == ARRAY_SIZE(gpulist)) { 333 dev_err(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n", 334 rev.core, rev.major, rev.minor, rev.patchid); 335 return -ENXIO; 336 } 337 338 DBG("Found GPU: %s (%u.%u.%u.%u)", gpu->info->name, 339 rev.core, rev.major, rev.minor, rev.patchid); 340 341 gpu->funcs = funcs; 342 gpu->gmem = gpu->info->gmem; 343 gpu->rev = rev; 344 345 ret = request_firmware(&gpu->pm4, gpu->info->pm4fw, drm->dev); 346 if (ret) { 347 dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", 348 gpu->info->pm4fw, ret); 349 return ret; 350 } 351 352 ret = request_firmware(&gpu->pfp, gpu->info->pfpfw, drm->dev); 353 if (ret) { 354 dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", 355 gpu->info->pfpfw, ret); 356 return ret; 357 } 358 359 ret = msm_gpu_init(drm, pdev, &gpu->base, &funcs->base, 360 gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq", 361 RB_SIZE); 362 if (ret) 363 return ret; 364 365 mmu = gpu->base.mmu; 366 if (mmu) { 367 ret = mmu->funcs->attach(mmu, iommu_ports, 368 ARRAY_SIZE(iommu_ports)); 369 if (ret) 370 return ret; 371 } 372 373 mutex_lock(&drm->struct_mutex); 374 gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs), 375 MSM_BO_UNCACHED); 376 mutex_unlock(&drm->struct_mutex); 377 if (IS_ERR(gpu->memptrs_bo)) { 378 ret = PTR_ERR(gpu->memptrs_bo); 379 gpu->memptrs_bo = NULL; 380 dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); 381 return ret; 382 } 383 384 gpu->memptrs = msm_gem_vaddr(gpu->memptrs_bo); 385 if (!gpu->memptrs) { 386 dev_err(drm->dev, "could not vmap memptrs\n"); 387 return -ENOMEM; 388 } 389 390 ret = msm_gem_get_iova(gpu->memptrs_bo, gpu->base.id, 391 &gpu->memptrs_iova); 392 if (ret) { 393 dev_err(drm->dev, "could not map memptrs: %d\n", ret); 394 return ret; 395 } 396 397 return 0; 398 } 399 400 void adreno_gpu_cleanup(struct adreno_gpu *gpu) 401 { 402 if (gpu->memptrs_bo) { 403 if (gpu->memptrs_iova) 404 msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); 405 drm_gem_object_unreference(gpu->memptrs_bo); 406 } 407 if (gpu->pm4) 408 release_firmware(gpu->pm4); 409 if (gpu->pfp) 410 release_firmware(gpu->pfp); 411 msm_gpu_cleanup(&gpu->base); 412 } 413