1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 3 */ 4 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/cpumask.h> 8 #include <linux/qcom_scm.h> 9 #include <linux/pm_opp.h> 10 #include <linux/nvmem-consumer.h> 11 #include <linux/slab.h> 12 #include "msm_gem.h" 13 #include "msm_mmu.h" 14 #include "a5xx_gpu.h" 15 16 extern bool hang_debug; 17 static void a5xx_dump(struct msm_gpu *gpu); 18 19 #define GPU_PAS_ID 13 20 21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 22 { 23 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 24 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 25 uint32_t wptr; 26 unsigned long flags; 27 28 spin_lock_irqsave(&ring->lock, flags); 29 30 /* Copy the shadow to the actual register */ 31 ring->cur = ring->next; 32 33 /* Make sure to wrap wptr if we need to */ 34 wptr = get_wptr(ring); 35 36 spin_unlock_irqrestore(&ring->lock, flags); 37 38 /* Make sure everything is posted before making a decision */ 39 mb(); 40 41 /* Update HW if this is the current ring and we are not in preempt */ 42 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 43 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 44 } 45 46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit, 47 struct msm_file_private *ctx) 48 { 49 struct msm_drm_private *priv = gpu->dev->dev_private; 50 struct msm_ringbuffer *ring = submit->ring; 51 struct msm_gem_object *obj; 52 uint32_t *ptr, dwords; 53 unsigned int i; 54 55 for (i = 0; i < submit->nr_cmds; i++) { 56 switch (submit->cmd[i].type) { 57 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 58 break; 59 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 60 if (priv->lastctx == ctx) 61 break; 62 /* fall-thru */ 63 case MSM_SUBMIT_CMD_BUF: 64 /* copy commands into RB: */ 65 obj = submit->bos[submit->cmd[i].idx].obj; 66 dwords = submit->cmd[i].size; 67 68 ptr = msm_gem_get_vaddr(&obj->base); 69 70 /* _get_vaddr() shouldn't fail at this point, 71 * since we've already mapped it once in 72 * submit_reloc() 73 */ 74 if (WARN_ON(!ptr)) 75 return; 76 77 for (i = 0; i < dwords; i++) { 78 /* normally the OUT_PKTn() would wait 79 * for space for the packet. But since 80 * we just OUT_RING() the whole thing, 81 * need to call adreno_wait_ring() 82 * ourself: 83 */ 84 adreno_wait_ring(ring, 1); 85 OUT_RING(ring, ptr[i]); 86 } 87 88 msm_gem_put_vaddr(&obj->base); 89 90 break; 91 } 92 } 93 94 a5xx_flush(gpu, ring); 95 a5xx_preempt_trigger(gpu); 96 97 /* we might not necessarily have a cmd from userspace to 98 * trigger an event to know that submit has completed, so 99 * do this manually: 100 */ 101 a5xx_idle(gpu, ring); 102 ring->memptrs->fence = submit->seqno; 103 msm_gpu_retire(gpu); 104 } 105 106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 107 struct msm_file_private *ctx) 108 { 109 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 110 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 111 struct msm_drm_private *priv = gpu->dev->dev_private; 112 struct msm_ringbuffer *ring = submit->ring; 113 unsigned int i, ibs = 0; 114 115 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { 116 priv->lastctx = NULL; 117 a5xx_submit_in_rb(gpu, submit, ctx); 118 return; 119 } 120 121 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 122 OUT_RING(ring, 0x02); 123 124 /* Turn off protected mode to write to special registers */ 125 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 126 OUT_RING(ring, 0); 127 128 /* Set the save preemption record for the ring/command */ 129 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 130 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 131 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 132 133 /* Turn back on protected mode */ 134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 135 OUT_RING(ring, 1); 136 137 /* Enable local preemption for finegrain preemption */ 138 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 139 OUT_RING(ring, 0x02); 140 141 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 142 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 143 OUT_RING(ring, 0x02); 144 145 /* Submit the commands */ 146 for (i = 0; i < submit->nr_cmds; i++) { 147 switch (submit->cmd[i].type) { 148 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 149 break; 150 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 151 if (priv->lastctx == ctx) 152 break; 153 /* fall-thru */ 154 case MSM_SUBMIT_CMD_BUF: 155 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 156 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 157 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 158 OUT_RING(ring, submit->cmd[i].size); 159 ibs++; 160 break; 161 } 162 } 163 164 /* 165 * Write the render mode to NULL (0) to indicate to the CP that the IBs 166 * are done rendering - otherwise a lucky preemption would start 167 * replaying from the last checkpoint 168 */ 169 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 170 OUT_RING(ring, 0); 171 OUT_RING(ring, 0); 172 OUT_RING(ring, 0); 173 OUT_RING(ring, 0); 174 OUT_RING(ring, 0); 175 176 /* Turn off IB level preemptions */ 177 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 178 OUT_RING(ring, 0x01); 179 180 /* Write the fence to the scratch register */ 181 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 182 OUT_RING(ring, submit->seqno); 183 184 /* 185 * Execute a CACHE_FLUSH_TS event. This will ensure that the 186 * timestamp is written to the memory and then triggers the interrupt 187 */ 188 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 189 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); 190 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 191 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 192 OUT_RING(ring, submit->seqno); 193 194 /* Yield the floor on command completion */ 195 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 196 /* 197 * If dword[2:1] are non zero, they specify an address for the CP to 198 * write the value of dword[3] to on preemption complete. Write 0 to 199 * skip the write 200 */ 201 OUT_RING(ring, 0x00); 202 OUT_RING(ring, 0x00); 203 /* Data value - not used if the address above is 0 */ 204 OUT_RING(ring, 0x01); 205 /* Set bit 0 to trigger an interrupt on preempt complete */ 206 OUT_RING(ring, 0x01); 207 208 a5xx_flush(gpu, ring); 209 210 /* Check to see if we need to start preemption */ 211 a5xx_preempt_trigger(gpu); 212 } 213 214 static const struct { 215 u32 offset; 216 u32 value; 217 } a5xx_hwcg[] = { 218 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, 219 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, 220 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, 221 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, 222 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, 223 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, 224 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, 225 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, 226 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, 227 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, 228 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, 229 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, 230 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, 231 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, 232 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, 233 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, 234 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, 235 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, 236 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, 237 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, 238 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, 239 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, 240 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, 241 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, 242 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, 243 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, 244 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, 245 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, 246 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, 247 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, 248 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, 249 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, 250 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, 251 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, 252 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, 253 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, 254 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, 255 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, 256 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, 257 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, 258 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, 259 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, 260 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, 261 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, 262 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, 263 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, 264 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, 265 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, 266 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, 267 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, 268 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, 269 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, 270 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, 271 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, 272 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, 273 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, 274 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, 275 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, 276 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, 277 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, 278 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, 279 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, 280 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, 281 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, 282 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, 283 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, 284 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, 285 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, 286 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, 287 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, 288 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, 289 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, 290 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, 291 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, 292 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, 293 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, 294 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, 295 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, 296 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, 297 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, 298 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, 299 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, 300 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, 301 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, 302 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, 303 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, 304 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, 305 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, 306 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, 307 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, 308 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, 309 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} 310 }; 311 312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) 313 { 314 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 315 unsigned int i; 316 317 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) 318 gpu_write(gpu, a5xx_hwcg[i].offset, 319 state ? a5xx_hwcg[i].value : 0); 320 321 if (adreno_is_a540(adreno_gpu)) { 322 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0); 323 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0); 324 } 325 326 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); 327 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); 328 } 329 330 static int a5xx_me_init(struct msm_gpu *gpu) 331 { 332 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 333 struct msm_ringbuffer *ring = gpu->rb[0]; 334 335 OUT_PKT7(ring, CP_ME_INIT, 8); 336 337 OUT_RING(ring, 0x0000002F); 338 339 /* Enable multiple hardware contexts */ 340 OUT_RING(ring, 0x00000003); 341 342 /* Enable error detection */ 343 OUT_RING(ring, 0x20000000); 344 345 /* Don't enable header dump */ 346 OUT_RING(ring, 0x00000000); 347 OUT_RING(ring, 0x00000000); 348 349 /* Specify workarounds for various microcode issues */ 350 if (adreno_is_a530(adreno_gpu)) { 351 /* Workaround for token end syncs 352 * Force a WFI after every direct-render 3D mode draw and every 353 * 2D mode 3 draw 354 */ 355 OUT_RING(ring, 0x0000000B); 356 } else if (adreno_is_a510(adreno_gpu)) { 357 /* Workaround for token and syncs */ 358 OUT_RING(ring, 0x00000001); 359 } else { 360 /* No workarounds enabled */ 361 OUT_RING(ring, 0x00000000); 362 } 363 364 OUT_RING(ring, 0x00000000); 365 OUT_RING(ring, 0x00000000); 366 367 gpu->funcs->flush(gpu, ring); 368 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 369 } 370 371 static int a5xx_preempt_start(struct msm_gpu *gpu) 372 { 373 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 374 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 375 struct msm_ringbuffer *ring = gpu->rb[0]; 376 377 if (gpu->nr_rings == 1) 378 return 0; 379 380 /* Turn off protected mode to write to special registers */ 381 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 382 OUT_RING(ring, 0); 383 384 /* Set the save preemption record for the ring/command */ 385 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 386 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 387 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 388 389 /* Turn back on protected mode */ 390 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 391 OUT_RING(ring, 1); 392 393 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 394 OUT_RING(ring, 0x00); 395 396 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 397 OUT_RING(ring, 0x01); 398 399 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 400 OUT_RING(ring, 0x01); 401 402 /* Yield the floor on command completion */ 403 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 404 OUT_RING(ring, 0x00); 405 OUT_RING(ring, 0x00); 406 OUT_RING(ring, 0x01); 407 OUT_RING(ring, 0x01); 408 409 gpu->funcs->flush(gpu, ring); 410 411 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 412 } 413 414 static int a5xx_ucode_init(struct msm_gpu *gpu) 415 { 416 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 417 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 418 int ret; 419 420 if (!a5xx_gpu->pm4_bo) { 421 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu, 422 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova); 423 424 425 if (IS_ERR(a5xx_gpu->pm4_bo)) { 426 ret = PTR_ERR(a5xx_gpu->pm4_bo); 427 a5xx_gpu->pm4_bo = NULL; 428 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n", 429 ret); 430 return ret; 431 } 432 433 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw"); 434 } 435 436 if (!a5xx_gpu->pfp_bo) { 437 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu, 438 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova); 439 440 if (IS_ERR(a5xx_gpu->pfp_bo)) { 441 ret = PTR_ERR(a5xx_gpu->pfp_bo); 442 a5xx_gpu->pfp_bo = NULL; 443 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n", 444 ret); 445 return ret; 446 } 447 448 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw"); 449 } 450 451 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, 452 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); 453 454 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, 455 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); 456 457 return 0; 458 } 459 460 #define SCM_GPU_ZAP_SHADER_RESUME 0 461 462 static int a5xx_zap_shader_resume(struct msm_gpu *gpu) 463 { 464 int ret; 465 466 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); 467 if (ret) 468 DRM_ERROR("%s: zap-shader resume failed: %d\n", 469 gpu->name, ret); 470 471 return ret; 472 } 473 474 static int a5xx_zap_shader_init(struct msm_gpu *gpu) 475 { 476 static bool loaded; 477 int ret; 478 479 /* 480 * If the zap shader is already loaded into memory we just need to kick 481 * the remote processor to reinitialize it 482 */ 483 if (loaded) 484 return a5xx_zap_shader_resume(gpu); 485 486 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 487 488 loaded = !ret; 489 return ret; 490 } 491 492 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 493 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 494 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 495 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 496 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 497 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 498 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 499 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 500 A5XX_RBBM_INT_0_MASK_CP_SW | \ 501 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 502 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 503 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 504 505 static int a5xx_hw_init(struct msm_gpu *gpu) 506 { 507 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 508 int ret; 509 510 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 511 512 if (adreno_is_a540(adreno_gpu)) 513 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 514 515 /* Make all blocks contribute to the GPU BUSY perf counter */ 516 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); 517 518 /* Enable RBBM error reporting bits */ 519 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001); 520 521 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) { 522 /* 523 * Mask out the activity signals from RB1-3 to avoid false 524 * positives 525 */ 526 527 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, 528 0xF0000000); 529 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, 530 0xFFFFFFFF); 531 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, 532 0xFFFFFFFF); 533 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, 534 0xFFFFFFFF); 535 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, 536 0xFFFFFFFF); 537 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, 538 0xFFFFFFFF); 539 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, 540 0xFFFFFFFF); 541 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, 542 0xFFFFFFFF); 543 } 544 545 /* Enable fault detection */ 546 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL, 547 (1 << 30) | 0xFFFF); 548 549 /* Turn on performance counters */ 550 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); 551 552 /* Select CP0 to always count cycles */ 553 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); 554 555 /* Select RBBM0 to countable 6 to get the busy status for devfreq */ 556 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); 557 558 /* Increase VFD cache access so LRZ and other data gets evicted less */ 559 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); 560 561 /* Disable L2 bypass in the UCHE */ 562 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); 563 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); 564 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); 565 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); 566 567 /* Set the GMEM VA range (0 to gpu->gmem) */ 568 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); 569 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); 570 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, 571 0x00100000 + adreno_gpu->gmem - 1); 572 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); 573 574 if (adreno_is_a510(adreno_gpu)) { 575 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); 576 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); 577 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); 578 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); 579 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 580 (0x200 << 11 | 0x200 << 22)); 581 } else { 582 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); 583 if (adreno_is_a530(adreno_gpu)) 584 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); 585 if (adreno_is_a540(adreno_gpu)) 586 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); 587 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); 588 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); 589 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 590 (0x400 << 11 | 0x300 << 22)); 591 } 592 593 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) 594 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); 595 596 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); 597 598 /* Enable USE_RETENTION_FLOPS */ 599 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); 600 601 /* Enable ME/PFP split notification */ 602 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); 603 604 /* 605 * In A5x, CCU can send context_done event of a particular context to 606 * UCHE which ultimately reaches CP even when there is valid 607 * transaction of that context inside CCU. This can let CP to program 608 * config registers, which will make the "valid transaction" inside 609 * CCU to be interpreted differently. This can cause gpu fault. This 610 * bug is fixed in latest A510 revision. To enable this bug fix - 611 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 612 * (disable). For older A510 version this bit is unused. 613 */ 614 if (adreno_is_a510(adreno_gpu)) 615 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0); 616 617 /* Enable HWCG */ 618 a5xx_set_hwcg(gpu, true); 619 620 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); 621 622 /* Set the highest bank bit */ 623 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); 624 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); 625 if (adreno_is_a540(adreno_gpu)) 626 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2); 627 628 /* Protect registers from the CP */ 629 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); 630 631 /* RBBM */ 632 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4)); 633 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8)); 634 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16)); 635 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32)); 636 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64)); 637 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64)); 638 639 /* Content protect */ 640 gpu_write(gpu, REG_A5XX_CP_PROTECT(6), 641 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 642 16)); 643 gpu_write(gpu, REG_A5XX_CP_PROTECT(7), 644 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2)); 645 646 /* CP */ 647 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64)); 648 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8)); 649 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32)); 650 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1)); 651 652 /* RB */ 653 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1)); 654 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2)); 655 656 /* VPC */ 657 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); 658 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); 659 660 /* UCHE */ 661 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); 662 663 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) 664 gpu_write(gpu, REG_A5XX_CP_PROTECT(17), 665 ADRENO_PROTECT_RW(0x10000, 0x8000)); 666 667 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); 668 /* 669 * Disable the trusted memory range - we don't actually supported secure 670 * memory rendering at this point in time and we don't want to block off 671 * part of the virtual memory space. 672 */ 673 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 674 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); 675 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 676 677 /* Put the GPU into 64 bit by default */ 678 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1); 679 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1); 680 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1); 681 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1); 682 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1); 683 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1); 684 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1); 685 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1); 686 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1); 687 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1); 688 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1); 689 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 690 691 /* 692 * VPC corner case with local memory load kill leads to corrupt 693 * internal state. Normal Disable does not work for all a5x chips. 694 * So do the following setting to disable it. 695 */ 696 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) { 697 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23)); 698 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0); 699 } 700 701 ret = adreno_hw_init(gpu); 702 if (ret) 703 return ret; 704 705 a5xx_preempt_hw_init(gpu); 706 707 if (!adreno_is_a510(adreno_gpu)) 708 a5xx_gpmu_ucode_init(gpu); 709 710 ret = a5xx_ucode_init(gpu); 711 if (ret) 712 return ret; 713 714 /* Disable the interrupts through the initial bringup stage */ 715 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); 716 717 /* Clear ME_HALT to start the micro engine */ 718 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0); 719 ret = a5xx_me_init(gpu); 720 if (ret) 721 return ret; 722 723 ret = a5xx_power_init(gpu); 724 if (ret) 725 return ret; 726 727 /* 728 * Send a pipeline event stat to get misbehaving counters to start 729 * ticking correctly 730 */ 731 if (adreno_is_a530(adreno_gpu)) { 732 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); 733 OUT_RING(gpu->rb[0], 0x0F); 734 735 gpu->funcs->flush(gpu, gpu->rb[0]); 736 if (!a5xx_idle(gpu, gpu->rb[0])) 737 return -EINVAL; 738 } 739 740 /* 741 * If the chip that we are using does support loading one, then 742 * try to load a zap shader into the secure world. If successful 743 * we can use the CP to switch out of secure mode. If not then we 744 * have no resource but to try to switch ourselves out manually. If we 745 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 746 * be blocked and a permissions violation will soon follow. 747 */ 748 ret = a5xx_zap_shader_init(gpu); 749 if (!ret) { 750 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 751 OUT_RING(gpu->rb[0], 0x00000000); 752 753 gpu->funcs->flush(gpu, gpu->rb[0]); 754 if (!a5xx_idle(gpu, gpu->rb[0])) 755 return -EINVAL; 756 } else { 757 /* Print a warning so if we die, we know why */ 758 dev_warn_once(gpu->dev->dev, 759 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 760 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 761 } 762 763 /* Last step - yield the ringbuffer */ 764 a5xx_preempt_start(gpu); 765 766 return 0; 767 } 768 769 static void a5xx_recover(struct msm_gpu *gpu) 770 { 771 int i; 772 773 adreno_dump_info(gpu); 774 775 for (i = 0; i < 8; i++) { 776 printk("CP_SCRATCH_REG%d: %u\n", i, 777 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i))); 778 } 779 780 if (hang_debug) 781 a5xx_dump(gpu); 782 783 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); 784 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD); 785 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); 786 adreno_recover(gpu); 787 } 788 789 static void a5xx_destroy(struct msm_gpu *gpu) 790 { 791 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 792 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 793 794 DBG("%s", gpu->name); 795 796 a5xx_preempt_fini(gpu); 797 798 if (a5xx_gpu->pm4_bo) { 799 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace); 800 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo); 801 } 802 803 if (a5xx_gpu->pfp_bo) { 804 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace); 805 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo); 806 } 807 808 if (a5xx_gpu->gpmu_bo) { 809 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace); 810 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo); 811 } 812 813 adreno_gpu_cleanup(adreno_gpu); 814 kfree(a5xx_gpu); 815 } 816 817 static inline bool _a5xx_check_idle(struct msm_gpu *gpu) 818 { 819 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY) 820 return false; 821 822 /* 823 * Nearly every abnormality ends up pausing the GPU and triggering a 824 * fault so we can safely just watch for this one interrupt to fire 825 */ 826 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) & 827 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); 828 } 829 830 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 831 { 832 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 833 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 834 835 if (ring != a5xx_gpu->cur_ring) { 836 WARN(1, "Tried to idle a non-current ringbuffer\n"); 837 return false; 838 } 839 840 /* wait for CP to drain ringbuffer: */ 841 if (!adreno_idle(gpu, ring)) 842 return false; 843 844 if (spin_until(_a5xx_check_idle(gpu))) { 845 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 846 gpu->name, __builtin_return_address(0), 847 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 848 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), 849 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 850 gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); 851 return false; 852 } 853 854 return true; 855 } 856 857 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags) 858 { 859 struct msm_gpu *gpu = arg; 860 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", 861 iova, flags, 862 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)), 863 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)), 864 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)), 865 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7))); 866 867 return -EFAULT; 868 } 869 870 static void a5xx_cp_err_irq(struct msm_gpu *gpu) 871 { 872 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS); 873 874 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) { 875 u32 val; 876 877 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0); 878 879 /* 880 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so 881 * read it twice 882 */ 883 884 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 885 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 886 887 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n", 888 val); 889 } 890 891 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR) 892 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n", 893 gpu_read(gpu, REG_A5XX_CP_HW_FAULT)); 894 895 if (status & A5XX_CP_INT_CP_DMA_ERROR) 896 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n"); 897 898 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 899 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS); 900 901 dev_err_ratelimited(gpu->dev->dev, 902 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 903 val & (1 << 24) ? "WRITE" : "READ", 904 (val & 0xFFFFF) >> 2, val); 905 } 906 907 if (status & A5XX_CP_INT_CP_AHB_ERROR) { 908 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT); 909 const char *access[16] = { "reserved", "reserved", 910 "timestamp lo", "timestamp hi", "pfp read", "pfp write", 911 "", "", "me read", "me write", "", "", "crashdump read", 912 "crashdump write" }; 913 914 dev_err_ratelimited(gpu->dev->dev, 915 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n", 916 status & 0xFFFFF, access[(status >> 24) & 0xF], 917 (status & (1 << 31)), status); 918 } 919 } 920 921 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) 922 { 923 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) { 924 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS); 925 926 dev_err_ratelimited(gpu->dev->dev, 927 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n", 928 val & (1 << 28) ? "WRITE" : "READ", 929 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3, 930 (val >> 24) & 0xF); 931 932 /* Clear the error */ 933 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4)); 934 935 /* Clear the interrupt */ 936 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 937 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 938 } 939 940 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT) 941 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n"); 942 943 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT) 944 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n", 945 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS)); 946 947 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT) 948 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n", 949 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS)); 950 951 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT) 952 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n", 953 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS)); 954 955 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 956 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n"); 957 958 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 959 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n"); 960 } 961 962 static void a5xx_uche_err_irq(struct msm_gpu *gpu) 963 { 964 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); 965 966 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); 967 968 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", 969 addr); 970 } 971 972 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu) 973 { 974 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n"); 975 } 976 977 static void a5xx_fault_detect_irq(struct msm_gpu *gpu) 978 { 979 struct drm_device *dev = gpu->dev; 980 struct msm_drm_private *priv = dev->dev_private; 981 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 982 983 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 984 ring ? ring->id : -1, ring ? ring->seqno : 0, 985 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 986 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 987 gpu_read(gpu, REG_A5XX_CP_RB_WPTR), 988 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), 989 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), 990 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), 991 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); 992 993 /* Turn off the hangcheck timer to keep it from bothering us */ 994 del_timer(&gpu->hangcheck_timer); 995 996 queue_work(priv->wq, &gpu->recover_work); 997 } 998 999 #define RBBM_ERROR_MASK \ 1000 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 1001 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 1002 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 1003 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 1004 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 1005 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1006 1007 static irqreturn_t a5xx_irq(struct msm_gpu *gpu) 1008 { 1009 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); 1010 1011 /* 1012 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it 1013 * before the source is cleared the interrupt will storm. 1014 */ 1015 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1016 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1017 1018 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ 1019 if (status & RBBM_ERROR_MASK) 1020 a5xx_rbbm_err_irq(gpu, status); 1021 1022 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1023 a5xx_cp_err_irq(gpu); 1024 1025 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT) 1026 a5xx_fault_detect_irq(gpu); 1027 1028 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1029 a5xx_uche_err_irq(gpu); 1030 1031 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1032 a5xx_gpmu_err_irq(gpu); 1033 1034 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1035 a5xx_preempt_trigger(gpu); 1036 msm_gpu_retire(gpu); 1037 } 1038 1039 if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 1040 a5xx_preempt_irq(gpu); 1041 1042 return IRQ_HANDLED; 1043 } 1044 1045 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { 1046 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE), 1047 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI), 1048 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR), 1049 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, 1050 REG_A5XX_CP_RB_RPTR_ADDR_HI), 1051 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR), 1052 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR), 1053 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL), 1054 }; 1055 1056 static const u32 a5xx_registers[] = { 1057 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 1058 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 1059 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, 1060 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, 1061 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, 1062 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 1063 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 1064 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, 1065 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 1066 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 1067 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, 1068 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, 1069 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 1070 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 1071 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 1072 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 1073 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, 1074 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, 1075 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 1076 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, 1077 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 1078 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 1079 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 1080 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 1081 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 1082 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D, 1083 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5, 1084 0xAC60, 0xAC60, ~0, 1085 }; 1086 1087 static void a5xx_dump(struct msm_gpu *gpu) 1088 { 1089 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n", 1090 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1091 adreno_dump(gpu); 1092 } 1093 1094 static int a5xx_pm_resume(struct msm_gpu *gpu) 1095 { 1096 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1097 int ret; 1098 1099 /* Turn on the core power */ 1100 ret = msm_gpu_pm_resume(gpu); 1101 if (ret) 1102 return ret; 1103 1104 if (adreno_is_a510(adreno_gpu)) { 1105 /* Halt the sp_input_clk at HM level */ 1106 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); 1107 a5xx_set_hwcg(gpu, true); 1108 /* Turn on sp_input_clk at HM level */ 1109 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0); 1110 return 0; 1111 } 1112 1113 /* Turn the RBCCU domain first to limit the chances of voltage droop */ 1114 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); 1115 1116 /* Wait 3 usecs before polling */ 1117 udelay(3); 1118 1119 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1120 (1 << 20), (1 << 20)); 1121 if (ret) { 1122 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n", 1123 gpu->name, 1124 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS)); 1125 return ret; 1126 } 1127 1128 /* Turn on the SP domain */ 1129 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000); 1130 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS, 1131 (1 << 20), (1 << 20)); 1132 if (ret) 1133 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n", 1134 gpu->name); 1135 1136 return ret; 1137 } 1138 1139 static int a5xx_pm_suspend(struct msm_gpu *gpu) 1140 { 1141 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1142 u32 mask = 0xf; 1143 1144 /* A510 has 3 XIN ports in VBIF */ 1145 if (adreno_is_a510(adreno_gpu)) 1146 mask = 0x7; 1147 1148 /* Clear the VBIF pipe before shutting down */ 1149 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask); 1150 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 1151 mask) == mask); 1152 1153 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); 1154 1155 /* 1156 * Reset the VBIF before power collapse to avoid issue with FIFO 1157 * entries 1158 */ 1159 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); 1160 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); 1161 1162 return msm_gpu_pm_suspend(gpu); 1163 } 1164 1165 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1166 { 1167 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO, 1168 REG_A5XX_RBBM_PERFCTR_CP_0_HI); 1169 1170 return 0; 1171 } 1172 1173 struct a5xx_crashdumper { 1174 void *ptr; 1175 struct drm_gem_object *bo; 1176 u64 iova; 1177 }; 1178 1179 struct a5xx_gpu_state { 1180 struct msm_gpu_state base; 1181 u32 *hlsqregs; 1182 }; 1183 1184 static int a5xx_crashdumper_init(struct msm_gpu *gpu, 1185 struct a5xx_crashdumper *dumper) 1186 { 1187 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 1188 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 1189 &dumper->bo, &dumper->iova); 1190 1191 if (!IS_ERR(dumper->ptr)) 1192 msm_gem_object_set_name(dumper->bo, "crashdump"); 1193 1194 return PTR_ERR_OR_ZERO(dumper->ptr); 1195 } 1196 1197 static int a5xx_crashdumper_run(struct msm_gpu *gpu, 1198 struct a5xx_crashdumper *dumper) 1199 { 1200 u32 val; 1201 1202 if (IS_ERR_OR_NULL(dumper->ptr)) 1203 return -EINVAL; 1204 1205 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, 1206 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 1207 1208 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); 1209 1210 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val, 1211 val & 0x04, 100, 10000); 1212 } 1213 1214 /* 1215 * These are a list of the registers that need to be read through the HLSQ 1216 * aperture through the crashdumper. These are not nominally accessible from 1217 * the CPU on a secure platform. 1218 */ 1219 static const struct { 1220 u32 type; 1221 u32 regoffset; 1222 u32 count; 1223 } a5xx_hlsq_aperture_regs[] = { 1224 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */ 1225 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */ 1226 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */ 1227 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */ 1228 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */ 1229 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */ 1230 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */ 1231 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */ 1232 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */ 1233 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */ 1234 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */ 1235 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */ 1236 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */ 1237 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */ 1238 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */ 1239 }; 1240 1241 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, 1242 struct a5xx_gpu_state *a5xx_state) 1243 { 1244 struct a5xx_crashdumper dumper = { 0 }; 1245 u32 offset, count = 0; 1246 u64 *ptr; 1247 int i; 1248 1249 if (a5xx_crashdumper_init(gpu, &dumper)) 1250 return; 1251 1252 /* The script will be written at offset 0 */ 1253 ptr = dumper.ptr; 1254 1255 /* Start writing the data at offset 256k */ 1256 offset = dumper.iova + (256 * SZ_1K); 1257 1258 /* Count how many additional registers to get from the HLSQ aperture */ 1259 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) 1260 count += a5xx_hlsq_aperture_regs[i].count; 1261 1262 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL); 1263 if (!a5xx_state->hlsqregs) 1264 return; 1265 1266 /* Build the crashdump script */ 1267 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1268 u32 type = a5xx_hlsq_aperture_regs[i].type; 1269 u32 c = a5xx_hlsq_aperture_regs[i].count; 1270 1271 /* Write the register to select the desired bank */ 1272 *ptr++ = ((u64) type << 8); 1273 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) | 1274 (1 << 21) | 1; 1275 1276 *ptr++ = offset; 1277 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44) 1278 | c; 1279 1280 offset += c * sizeof(u32); 1281 } 1282 1283 /* Write two zeros to close off the script */ 1284 *ptr++ = 0; 1285 *ptr++ = 0; 1286 1287 if (a5xx_crashdumper_run(gpu, &dumper)) { 1288 kfree(a5xx_state->hlsqregs); 1289 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1290 return; 1291 } 1292 1293 /* Copy the data from the crashdumper to the state */ 1294 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K), 1295 count * sizeof(u32)); 1296 1297 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1298 } 1299 1300 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) 1301 { 1302 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state), 1303 GFP_KERNEL); 1304 1305 if (!a5xx_state) 1306 return ERR_PTR(-ENOMEM); 1307 1308 /* Temporarily disable hardware clock gating before reading the hw */ 1309 a5xx_set_hwcg(gpu, false); 1310 1311 /* First get the generic state from the adreno core */ 1312 adreno_gpu_state_get(gpu, &(a5xx_state->base)); 1313 1314 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS); 1315 1316 /* Get the HLSQ regs with the help of the crashdumper */ 1317 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state); 1318 1319 a5xx_set_hwcg(gpu, true); 1320 1321 return &a5xx_state->base; 1322 } 1323 1324 static void a5xx_gpu_state_destroy(struct kref *kref) 1325 { 1326 struct msm_gpu_state *state = container_of(kref, 1327 struct msm_gpu_state, ref); 1328 struct a5xx_gpu_state *a5xx_state = container_of(state, 1329 struct a5xx_gpu_state, base); 1330 1331 kfree(a5xx_state->hlsqregs); 1332 1333 adreno_gpu_state_destroy(state); 1334 kfree(a5xx_state); 1335 } 1336 1337 static int a5xx_gpu_state_put(struct msm_gpu_state *state) 1338 { 1339 if (IS_ERR_OR_NULL(state)) 1340 return 1; 1341 1342 return kref_put(&state->ref, a5xx_gpu_state_destroy); 1343 } 1344 1345 1346 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1347 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1348 struct drm_printer *p) 1349 { 1350 int i, j; 1351 u32 pos = 0; 1352 struct a5xx_gpu_state *a5xx_state = container_of(state, 1353 struct a5xx_gpu_state, base); 1354 1355 if (IS_ERR_OR_NULL(state)) 1356 return; 1357 1358 adreno_show(gpu, state, p); 1359 1360 /* Dump the additional a5xx HLSQ registers */ 1361 if (!a5xx_state->hlsqregs) 1362 return; 1363 1364 drm_printf(p, "registers-hlsq:\n"); 1365 1366 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1367 u32 o = a5xx_hlsq_aperture_regs[i].regoffset; 1368 u32 c = a5xx_hlsq_aperture_regs[i].count; 1369 1370 for (j = 0; j < c; j++, pos++, o++) { 1371 /* 1372 * To keep the crashdump simple we pull the entire range 1373 * for each register type but not all of the registers 1374 * in the range are valid. Fortunately invalid registers 1375 * stick out like a sore thumb with a value of 1376 * 0xdeadbeef 1377 */ 1378 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef) 1379 continue; 1380 1381 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n", 1382 o << 2, a5xx_state->hlsqregs[pos]); 1383 } 1384 } 1385 } 1386 #endif 1387 1388 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1389 { 1390 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1391 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1392 1393 return a5xx_gpu->cur_ring; 1394 } 1395 1396 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu) 1397 { 1398 u64 busy_cycles, busy_time; 1399 1400 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, 1401 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); 1402 1403 busy_time = busy_cycles - gpu->devfreq.busy_cycles; 1404 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000); 1405 1406 gpu->devfreq.busy_cycles = busy_cycles; 1407 1408 if (WARN_ON(busy_time > ~0LU)) 1409 return ~0LU; 1410 1411 return (unsigned long)busy_time; 1412 } 1413 1414 static const struct adreno_gpu_funcs funcs = { 1415 .base = { 1416 .get_param = adreno_get_param, 1417 .hw_init = a5xx_hw_init, 1418 .pm_suspend = a5xx_pm_suspend, 1419 .pm_resume = a5xx_pm_resume, 1420 .recover = a5xx_recover, 1421 .submit = a5xx_submit, 1422 .flush = a5xx_flush, 1423 .active_ring = a5xx_active_ring, 1424 .irq = a5xx_irq, 1425 .destroy = a5xx_destroy, 1426 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1427 .show = a5xx_show, 1428 #endif 1429 #if defined(CONFIG_DEBUG_FS) 1430 .debugfs_init = a5xx_debugfs_init, 1431 #endif 1432 .gpu_busy = a5xx_gpu_busy, 1433 .gpu_state_get = a5xx_gpu_state_get, 1434 .gpu_state_put = a5xx_gpu_state_put, 1435 }, 1436 .get_timestamp = a5xx_get_timestamp, 1437 }; 1438 1439 static void check_speed_bin(struct device *dev) 1440 { 1441 struct nvmem_cell *cell; 1442 u32 bin, val; 1443 1444 cell = nvmem_cell_get(dev, "speed_bin"); 1445 1446 /* If a nvmem cell isn't defined, nothing to do */ 1447 if (IS_ERR(cell)) 1448 return; 1449 1450 bin = *((u32 *) nvmem_cell_read(cell, NULL)); 1451 nvmem_cell_put(cell); 1452 1453 val = (1 << bin); 1454 1455 dev_pm_opp_set_supported_hw(dev, &val, 1); 1456 } 1457 1458 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) 1459 { 1460 struct msm_drm_private *priv = dev->dev_private; 1461 struct platform_device *pdev = priv->gpu_pdev; 1462 struct a5xx_gpu *a5xx_gpu = NULL; 1463 struct adreno_gpu *adreno_gpu; 1464 struct msm_gpu *gpu; 1465 int ret; 1466 1467 if (!pdev) { 1468 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); 1469 return ERR_PTR(-ENXIO); 1470 } 1471 1472 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); 1473 if (!a5xx_gpu) 1474 return ERR_PTR(-ENOMEM); 1475 1476 adreno_gpu = &a5xx_gpu->base; 1477 gpu = &adreno_gpu->base; 1478 1479 adreno_gpu->registers = a5xx_registers; 1480 adreno_gpu->reg_offsets = a5xx_register_offsets; 1481 1482 a5xx_gpu->lm_leakage = 0x4E001A; 1483 1484 check_speed_bin(&pdev->dev); 1485 1486 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); 1487 if (ret) { 1488 a5xx_destroy(&(a5xx_gpu->base.base)); 1489 return ERR_PTR(ret); 1490 } 1491 1492 if (gpu->aspace) 1493 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1494 1495 /* Set up the preemption specific bits and pieces for each ringbuffer */ 1496 a5xx_preempt_init(gpu); 1497 1498 return gpu; 1499 } 1500