1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 3 */ 4 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/cpumask.h> 8 #include <linux/qcom_scm.h> 9 #include <linux/pm_opp.h> 10 #include <linux/nvmem-consumer.h> 11 #include <linux/slab.h> 12 #include "msm_gem.h" 13 #include "msm_mmu.h" 14 #include "a5xx_gpu.h" 15 16 extern bool hang_debug; 17 static void a5xx_dump(struct msm_gpu *gpu); 18 19 #define GPU_PAS_ID 13 20 21 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 22 bool sync) 23 { 24 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 25 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 26 uint32_t wptr; 27 unsigned long flags; 28 29 /* 30 * Most flush operations need to issue a WHERE_AM_I opcode to sync up 31 * the rptr shadow 32 */ 33 if (a5xx_gpu->has_whereami && sync) { 34 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 35 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring))); 36 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); 37 } 38 39 spin_lock_irqsave(&ring->lock, flags); 40 41 /* Copy the shadow to the actual register */ 42 ring->cur = ring->next; 43 44 /* Make sure to wrap wptr if we need to */ 45 wptr = get_wptr(ring); 46 47 spin_unlock_irqrestore(&ring->lock, flags); 48 49 /* Make sure everything is posted before making a decision */ 50 mb(); 51 52 /* Update HW if this is the current ring and we are not in preempt */ 53 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 54 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 55 } 56 57 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit) 58 { 59 struct msm_drm_private *priv = gpu->dev->dev_private; 60 struct msm_ringbuffer *ring = submit->ring; 61 struct msm_gem_object *obj; 62 uint32_t *ptr, dwords; 63 unsigned int i; 64 65 for (i = 0; i < submit->nr_cmds; i++) { 66 switch (submit->cmd[i].type) { 67 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 68 break; 69 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 70 if (priv->lastctx == submit->queue->ctx) 71 break; 72 fallthrough; 73 case MSM_SUBMIT_CMD_BUF: 74 /* copy commands into RB: */ 75 obj = submit->bos[submit->cmd[i].idx].obj; 76 dwords = submit->cmd[i].size; 77 78 ptr = msm_gem_get_vaddr(&obj->base); 79 80 /* _get_vaddr() shouldn't fail at this point, 81 * since we've already mapped it once in 82 * submit_reloc() 83 */ 84 if (WARN_ON(!ptr)) 85 return; 86 87 for (i = 0; i < dwords; i++) { 88 /* normally the OUT_PKTn() would wait 89 * for space for the packet. But since 90 * we just OUT_RING() the whole thing, 91 * need to call adreno_wait_ring() 92 * ourself: 93 */ 94 adreno_wait_ring(ring, 1); 95 OUT_RING(ring, ptr[i]); 96 } 97 98 msm_gem_put_vaddr(&obj->base); 99 100 break; 101 } 102 } 103 104 a5xx_flush(gpu, ring, true); 105 a5xx_preempt_trigger(gpu); 106 107 /* we might not necessarily have a cmd from userspace to 108 * trigger an event to know that submit has completed, so 109 * do this manually: 110 */ 111 a5xx_idle(gpu, ring); 112 ring->memptrs->fence = submit->seqno; 113 msm_gpu_retire(gpu); 114 } 115 116 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 117 { 118 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 119 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 120 struct msm_drm_private *priv = gpu->dev->dev_private; 121 struct msm_ringbuffer *ring = submit->ring; 122 unsigned int i, ibs = 0; 123 124 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { 125 priv->lastctx = NULL; 126 a5xx_submit_in_rb(gpu, submit); 127 return; 128 } 129 130 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 131 OUT_RING(ring, 0x02); 132 133 /* Turn off protected mode to write to special registers */ 134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 135 OUT_RING(ring, 0); 136 137 /* Set the save preemption record for the ring/command */ 138 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 139 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 140 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 141 142 /* Turn back on protected mode */ 143 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 144 OUT_RING(ring, 1); 145 146 /* Enable local preemption for finegrain preemption */ 147 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 148 OUT_RING(ring, 0x02); 149 150 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 151 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 152 OUT_RING(ring, 0x02); 153 154 /* Submit the commands */ 155 for (i = 0; i < submit->nr_cmds; i++) { 156 switch (submit->cmd[i].type) { 157 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 158 break; 159 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 160 if (priv->lastctx == submit->queue->ctx) 161 break; 162 fallthrough; 163 case MSM_SUBMIT_CMD_BUF: 164 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 165 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 166 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 167 OUT_RING(ring, submit->cmd[i].size); 168 ibs++; 169 break; 170 } 171 } 172 173 /* 174 * Write the render mode to NULL (0) to indicate to the CP that the IBs 175 * are done rendering - otherwise a lucky preemption would start 176 * replaying from the last checkpoint 177 */ 178 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 179 OUT_RING(ring, 0); 180 OUT_RING(ring, 0); 181 OUT_RING(ring, 0); 182 OUT_RING(ring, 0); 183 OUT_RING(ring, 0); 184 185 /* Turn off IB level preemptions */ 186 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 187 OUT_RING(ring, 0x01); 188 189 /* Write the fence to the scratch register */ 190 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 191 OUT_RING(ring, submit->seqno); 192 193 /* 194 * Execute a CACHE_FLUSH_TS event. This will ensure that the 195 * timestamp is written to the memory and then triggers the interrupt 196 */ 197 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 198 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 199 CP_EVENT_WRITE_0_IRQ); 200 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 201 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 202 OUT_RING(ring, submit->seqno); 203 204 /* Yield the floor on command completion */ 205 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 206 /* 207 * If dword[2:1] are non zero, they specify an address for the CP to 208 * write the value of dword[3] to on preemption complete. Write 0 to 209 * skip the write 210 */ 211 OUT_RING(ring, 0x00); 212 OUT_RING(ring, 0x00); 213 /* Data value - not used if the address above is 0 */ 214 OUT_RING(ring, 0x01); 215 /* Set bit 0 to trigger an interrupt on preempt complete */ 216 OUT_RING(ring, 0x01); 217 218 /* A WHERE_AM_I packet is not needed after a YIELD */ 219 a5xx_flush(gpu, ring, false); 220 221 /* Check to see if we need to start preemption */ 222 a5xx_preempt_trigger(gpu); 223 } 224 225 static const struct { 226 u32 offset; 227 u32 value; 228 } a5xx_hwcg[] = { 229 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, 230 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, 231 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, 232 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, 233 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, 234 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, 235 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, 236 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, 237 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, 238 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, 239 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, 240 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, 241 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, 242 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, 243 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, 244 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, 245 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, 246 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, 247 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, 248 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, 249 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, 250 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, 251 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, 252 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, 253 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, 254 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, 255 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, 256 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, 257 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, 258 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, 259 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, 260 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, 261 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, 262 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, 263 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, 264 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, 265 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, 266 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, 267 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, 268 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, 269 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, 270 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, 271 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, 272 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, 273 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, 274 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, 275 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, 276 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, 277 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, 278 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, 279 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, 280 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, 281 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, 282 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, 283 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, 284 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, 285 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, 286 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, 287 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, 288 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, 289 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, 290 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, 291 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, 292 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, 293 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, 294 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, 295 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, 296 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, 297 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, 298 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, 299 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, 300 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, 301 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, 302 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, 303 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, 304 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, 305 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, 306 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, 307 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, 308 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, 309 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, 310 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, 311 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, 312 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, 313 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, 314 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, 315 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, 316 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, 317 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, 318 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, 319 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, 320 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} 321 }; 322 323 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) 324 { 325 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 326 unsigned int i; 327 328 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) 329 gpu_write(gpu, a5xx_hwcg[i].offset, 330 state ? a5xx_hwcg[i].value : 0); 331 332 if (adreno_is_a540(adreno_gpu)) { 333 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0); 334 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0); 335 } 336 337 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); 338 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); 339 } 340 341 static int a5xx_me_init(struct msm_gpu *gpu) 342 { 343 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 344 struct msm_ringbuffer *ring = gpu->rb[0]; 345 346 OUT_PKT7(ring, CP_ME_INIT, 8); 347 348 OUT_RING(ring, 0x0000002F); 349 350 /* Enable multiple hardware contexts */ 351 OUT_RING(ring, 0x00000003); 352 353 /* Enable error detection */ 354 OUT_RING(ring, 0x20000000); 355 356 /* Don't enable header dump */ 357 OUT_RING(ring, 0x00000000); 358 OUT_RING(ring, 0x00000000); 359 360 /* Specify workarounds for various microcode issues */ 361 if (adreno_is_a530(adreno_gpu)) { 362 /* Workaround for token end syncs 363 * Force a WFI after every direct-render 3D mode draw and every 364 * 2D mode 3 draw 365 */ 366 OUT_RING(ring, 0x0000000B); 367 } else if (adreno_is_a510(adreno_gpu)) { 368 /* Workaround for token and syncs */ 369 OUT_RING(ring, 0x00000001); 370 } else { 371 /* No workarounds enabled */ 372 OUT_RING(ring, 0x00000000); 373 } 374 375 OUT_RING(ring, 0x00000000); 376 OUT_RING(ring, 0x00000000); 377 378 a5xx_flush(gpu, ring, true); 379 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 380 } 381 382 static int a5xx_preempt_start(struct msm_gpu *gpu) 383 { 384 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 385 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 386 struct msm_ringbuffer *ring = gpu->rb[0]; 387 388 if (gpu->nr_rings == 1) 389 return 0; 390 391 /* Turn off protected mode to write to special registers */ 392 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 393 OUT_RING(ring, 0); 394 395 /* Set the save preemption record for the ring/command */ 396 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 397 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 398 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 399 400 /* Turn back on protected mode */ 401 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 402 OUT_RING(ring, 1); 403 404 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 405 OUT_RING(ring, 0x00); 406 407 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 408 OUT_RING(ring, 0x01); 409 410 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 411 OUT_RING(ring, 0x01); 412 413 /* Yield the floor on command completion */ 414 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 415 OUT_RING(ring, 0x00); 416 OUT_RING(ring, 0x00); 417 OUT_RING(ring, 0x01); 418 OUT_RING(ring, 0x01); 419 420 /* The WHERE_AMI_I packet is not needed after a YIELD is issued */ 421 a5xx_flush(gpu, ring, false); 422 423 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 424 } 425 426 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu, 427 struct drm_gem_object *obj) 428 { 429 u32 *buf = msm_gem_get_vaddr_active(obj); 430 431 if (IS_ERR(buf)) 432 return; 433 434 /* 435 * If the lowest nibble is 0xa that is an indication that this microcode 436 * has been patched. The actual version is in dword [3] but we only care 437 * about the patchlevel which is the lowest nibble of dword [3] 438 */ 439 if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) 440 a5xx_gpu->has_whereami = true; 441 442 msm_gem_put_vaddr(obj); 443 } 444 445 static int a5xx_ucode_init(struct msm_gpu *gpu) 446 { 447 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 448 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 449 int ret; 450 451 if (!a5xx_gpu->pm4_bo) { 452 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu, 453 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova); 454 455 456 if (IS_ERR(a5xx_gpu->pm4_bo)) { 457 ret = PTR_ERR(a5xx_gpu->pm4_bo); 458 a5xx_gpu->pm4_bo = NULL; 459 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n", 460 ret); 461 return ret; 462 } 463 464 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw"); 465 } 466 467 if (!a5xx_gpu->pfp_bo) { 468 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu, 469 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova); 470 471 if (IS_ERR(a5xx_gpu->pfp_bo)) { 472 ret = PTR_ERR(a5xx_gpu->pfp_bo); 473 a5xx_gpu->pfp_bo = NULL; 474 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n", 475 ret); 476 return ret; 477 } 478 479 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw"); 480 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo); 481 } 482 483 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, 484 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); 485 486 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, 487 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); 488 489 return 0; 490 } 491 492 #define SCM_GPU_ZAP_SHADER_RESUME 0 493 494 static int a5xx_zap_shader_resume(struct msm_gpu *gpu) 495 { 496 int ret; 497 498 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); 499 if (ret) 500 DRM_ERROR("%s: zap-shader resume failed: %d\n", 501 gpu->name, ret); 502 503 return ret; 504 } 505 506 static int a5xx_zap_shader_init(struct msm_gpu *gpu) 507 { 508 static bool loaded; 509 int ret; 510 511 /* 512 * If the zap shader is already loaded into memory we just need to kick 513 * the remote processor to reinitialize it 514 */ 515 if (loaded) 516 return a5xx_zap_shader_resume(gpu); 517 518 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 519 520 loaded = !ret; 521 return ret; 522 } 523 524 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 525 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 526 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 527 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 528 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 529 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 530 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 531 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 532 A5XX_RBBM_INT_0_MASK_CP_SW | \ 533 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 534 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 535 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 536 537 static int a5xx_hw_init(struct msm_gpu *gpu) 538 { 539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 540 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 541 int ret; 542 543 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 544 545 if (adreno_is_a540(adreno_gpu)) 546 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 547 548 /* Make all blocks contribute to the GPU BUSY perf counter */ 549 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); 550 551 /* Enable RBBM error reporting bits */ 552 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001); 553 554 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) { 555 /* 556 * Mask out the activity signals from RB1-3 to avoid false 557 * positives 558 */ 559 560 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, 561 0xF0000000); 562 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, 563 0xFFFFFFFF); 564 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, 565 0xFFFFFFFF); 566 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, 567 0xFFFFFFFF); 568 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, 569 0xFFFFFFFF); 570 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, 571 0xFFFFFFFF); 572 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, 573 0xFFFFFFFF); 574 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, 575 0xFFFFFFFF); 576 } 577 578 /* Enable fault detection */ 579 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL, 580 (1 << 30) | 0xFFFF); 581 582 /* Turn on performance counters */ 583 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); 584 585 /* Select CP0 to always count cycles */ 586 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); 587 588 /* Select RBBM0 to countable 6 to get the busy status for devfreq */ 589 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); 590 591 /* Increase VFD cache access so LRZ and other data gets evicted less */ 592 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); 593 594 /* Disable L2 bypass in the UCHE */ 595 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); 596 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); 597 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); 598 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); 599 600 /* Set the GMEM VA range (0 to gpu->gmem) */ 601 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); 602 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); 603 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, 604 0x00100000 + adreno_gpu->gmem - 1); 605 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); 606 607 if (adreno_is_a510(adreno_gpu)) { 608 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20); 609 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20); 610 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); 611 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); 612 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 613 (0x200 << 11 | 0x200 << 22)); 614 } else { 615 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); 616 if (adreno_is_a530(adreno_gpu)) 617 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); 618 if (adreno_is_a540(adreno_gpu)) 619 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400); 620 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); 621 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); 622 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 623 (0x400 << 11 | 0x300 << 22)); 624 } 625 626 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) 627 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); 628 629 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); 630 631 /* Enable USE_RETENTION_FLOPS */ 632 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); 633 634 /* Enable ME/PFP split notification */ 635 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); 636 637 /* 638 * In A5x, CCU can send context_done event of a particular context to 639 * UCHE which ultimately reaches CP even when there is valid 640 * transaction of that context inside CCU. This can let CP to program 641 * config registers, which will make the "valid transaction" inside 642 * CCU to be interpreted differently. This can cause gpu fault. This 643 * bug is fixed in latest A510 revision. To enable this bug fix - 644 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 645 * (disable). For older A510 version this bit is unused. 646 */ 647 if (adreno_is_a510(adreno_gpu)) 648 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0); 649 650 /* Enable HWCG */ 651 a5xx_set_hwcg(gpu, true); 652 653 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); 654 655 /* Set the highest bank bit */ 656 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); 657 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); 658 if (adreno_is_a540(adreno_gpu)) 659 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2); 660 661 /* Protect registers from the CP */ 662 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); 663 664 /* RBBM */ 665 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4)); 666 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8)); 667 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16)); 668 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32)); 669 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64)); 670 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64)); 671 672 /* Content protect */ 673 gpu_write(gpu, REG_A5XX_CP_PROTECT(6), 674 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 675 16)); 676 gpu_write(gpu, REG_A5XX_CP_PROTECT(7), 677 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2)); 678 679 /* CP */ 680 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64)); 681 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8)); 682 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32)); 683 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1)); 684 685 /* RB */ 686 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1)); 687 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2)); 688 689 /* VPC */ 690 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); 691 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); 692 693 /* UCHE */ 694 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); 695 696 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu)) 697 gpu_write(gpu, REG_A5XX_CP_PROTECT(17), 698 ADRENO_PROTECT_RW(0x10000, 0x8000)); 699 700 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); 701 /* 702 * Disable the trusted memory range - we don't actually supported secure 703 * memory rendering at this point in time and we don't want to block off 704 * part of the virtual memory space. 705 */ 706 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 707 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); 708 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 709 710 /* Put the GPU into 64 bit by default */ 711 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1); 712 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1); 713 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1); 714 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1); 715 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1); 716 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1); 717 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1); 718 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1); 719 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1); 720 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1); 721 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1); 722 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 723 724 /* 725 * VPC corner case with local memory load kill leads to corrupt 726 * internal state. Normal Disable does not work for all a5x chips. 727 * So do the following setting to disable it. 728 */ 729 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) { 730 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23)); 731 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0); 732 } 733 734 ret = adreno_hw_init(gpu); 735 if (ret) 736 return ret; 737 738 if (!adreno_is_a510(adreno_gpu)) 739 a5xx_gpmu_ucode_init(gpu); 740 741 ret = a5xx_ucode_init(gpu); 742 if (ret) 743 return ret; 744 745 /* Set the ringbuffer address */ 746 gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, 747 gpu->rb[0]->iova); 748 749 /* 750 * If the microcode supports the WHERE_AM_I opcode then we can use that 751 * in lieu of the RPTR shadow and enable preemption. Otherwise, we 752 * can't safely use the RPTR shadow or preemption. In either case, the 753 * RPTR shadow should be disabled in hardware. 754 */ 755 gpu_write(gpu, REG_A5XX_CP_RB_CNTL, 756 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 757 758 /* Disable preemption if WHERE_AM_I isn't available */ 759 if (!a5xx_gpu->has_whereami && gpu->nr_rings > 1) { 760 a5xx_preempt_fini(gpu); 761 gpu->nr_rings = 1; 762 } else { 763 /* Create a privileged buffer for the RPTR shadow */ 764 if (!a5xx_gpu->shadow_bo) { 765 a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 766 sizeof(u32) * gpu->nr_rings, 767 MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, 768 gpu->aspace, &a5xx_gpu->shadow_bo, 769 &a5xx_gpu->shadow_iova); 770 771 if (IS_ERR(a5xx_gpu->shadow)) 772 return PTR_ERR(a5xx_gpu->shadow); 773 } 774 775 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR, 776 REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0])); 777 } 778 779 a5xx_preempt_hw_init(gpu); 780 781 /* Disable the interrupts through the initial bringup stage */ 782 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); 783 784 /* Clear ME_HALT to start the micro engine */ 785 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0); 786 ret = a5xx_me_init(gpu); 787 if (ret) 788 return ret; 789 790 ret = a5xx_power_init(gpu); 791 if (ret) 792 return ret; 793 794 /* 795 * Send a pipeline event stat to get misbehaving counters to start 796 * ticking correctly 797 */ 798 if (adreno_is_a530(adreno_gpu)) { 799 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); 800 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT)); 801 802 a5xx_flush(gpu, gpu->rb[0], true); 803 if (!a5xx_idle(gpu, gpu->rb[0])) 804 return -EINVAL; 805 } 806 807 /* 808 * If the chip that we are using does support loading one, then 809 * try to load a zap shader into the secure world. If successful 810 * we can use the CP to switch out of secure mode. If not then we 811 * have no resource but to try to switch ourselves out manually. If we 812 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 813 * be blocked and a permissions violation will soon follow. 814 */ 815 ret = a5xx_zap_shader_init(gpu); 816 if (!ret) { 817 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 818 OUT_RING(gpu->rb[0], 0x00000000); 819 820 a5xx_flush(gpu, gpu->rb[0], true); 821 if (!a5xx_idle(gpu, gpu->rb[0])) 822 return -EINVAL; 823 } else if (ret == -ENODEV) { 824 /* 825 * This device does not use zap shader (but print a warning 826 * just in case someone got their dt wrong.. hopefully they 827 * have a debug UART to realize the error of their ways... 828 * if you mess this up you are about to crash horribly) 829 */ 830 dev_warn_once(gpu->dev->dev, 831 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 832 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 833 } else { 834 return ret; 835 } 836 837 /* Last step - yield the ringbuffer */ 838 a5xx_preempt_start(gpu); 839 840 return 0; 841 } 842 843 static void a5xx_recover(struct msm_gpu *gpu) 844 { 845 int i; 846 847 adreno_dump_info(gpu); 848 849 for (i = 0; i < 8; i++) { 850 printk("CP_SCRATCH_REG%d: %u\n", i, 851 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i))); 852 } 853 854 if (hang_debug) 855 a5xx_dump(gpu); 856 857 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); 858 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD); 859 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); 860 adreno_recover(gpu); 861 } 862 863 static void a5xx_destroy(struct msm_gpu *gpu) 864 { 865 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 866 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 867 868 DBG("%s", gpu->name); 869 870 a5xx_preempt_fini(gpu); 871 872 if (a5xx_gpu->pm4_bo) { 873 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace); 874 drm_gem_object_put(a5xx_gpu->pm4_bo); 875 } 876 877 if (a5xx_gpu->pfp_bo) { 878 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace); 879 drm_gem_object_put(a5xx_gpu->pfp_bo); 880 } 881 882 if (a5xx_gpu->gpmu_bo) { 883 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace); 884 drm_gem_object_put(a5xx_gpu->gpmu_bo); 885 } 886 887 if (a5xx_gpu->shadow_bo) { 888 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace); 889 drm_gem_object_put(a5xx_gpu->shadow_bo); 890 } 891 892 adreno_gpu_cleanup(adreno_gpu); 893 kfree(a5xx_gpu); 894 } 895 896 static inline bool _a5xx_check_idle(struct msm_gpu *gpu) 897 { 898 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY) 899 return false; 900 901 /* 902 * Nearly every abnormality ends up pausing the GPU and triggering a 903 * fault so we can safely just watch for this one interrupt to fire 904 */ 905 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) & 906 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); 907 } 908 909 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 910 { 911 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 912 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 913 914 if (ring != a5xx_gpu->cur_ring) { 915 WARN(1, "Tried to idle a non-current ringbuffer\n"); 916 return false; 917 } 918 919 /* wait for CP to drain ringbuffer: */ 920 if (!adreno_idle(gpu, ring)) 921 return false; 922 923 if (spin_until(_a5xx_check_idle(gpu))) { 924 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 925 gpu->name, __builtin_return_address(0), 926 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 927 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), 928 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 929 gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); 930 return false; 931 } 932 933 return true; 934 } 935 936 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags) 937 { 938 struct msm_gpu *gpu = arg; 939 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", 940 iova, flags, 941 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)), 942 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)), 943 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)), 944 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7))); 945 946 return -EFAULT; 947 } 948 949 static void a5xx_cp_err_irq(struct msm_gpu *gpu) 950 { 951 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS); 952 953 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) { 954 u32 val; 955 956 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0); 957 958 /* 959 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so 960 * read it twice 961 */ 962 963 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 964 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 965 966 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n", 967 val); 968 } 969 970 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR) 971 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n", 972 gpu_read(gpu, REG_A5XX_CP_HW_FAULT)); 973 974 if (status & A5XX_CP_INT_CP_DMA_ERROR) 975 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n"); 976 977 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 978 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS); 979 980 dev_err_ratelimited(gpu->dev->dev, 981 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 982 val & (1 << 24) ? "WRITE" : "READ", 983 (val & 0xFFFFF) >> 2, val); 984 } 985 986 if (status & A5XX_CP_INT_CP_AHB_ERROR) { 987 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT); 988 const char *access[16] = { "reserved", "reserved", 989 "timestamp lo", "timestamp hi", "pfp read", "pfp write", 990 "", "", "me read", "me write", "", "", "crashdump read", 991 "crashdump write" }; 992 993 dev_err_ratelimited(gpu->dev->dev, 994 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n", 995 status & 0xFFFFF, access[(status >> 24) & 0xF], 996 (status & (1 << 31)), status); 997 } 998 } 999 1000 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) 1001 { 1002 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) { 1003 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS); 1004 1005 dev_err_ratelimited(gpu->dev->dev, 1006 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n", 1007 val & (1 << 28) ? "WRITE" : "READ", 1008 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3, 1009 (val >> 24) & 0xF); 1010 1011 /* Clear the error */ 1012 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4)); 1013 1014 /* Clear the interrupt */ 1015 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1016 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1017 } 1018 1019 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT) 1020 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n"); 1021 1022 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT) 1023 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n", 1024 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS)); 1025 1026 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT) 1027 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n", 1028 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS)); 1029 1030 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT) 1031 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n", 1032 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS)); 1033 1034 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1035 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n"); 1036 1037 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1038 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n"); 1039 } 1040 1041 static void a5xx_uche_err_irq(struct msm_gpu *gpu) 1042 { 1043 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); 1044 1045 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); 1046 1047 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", 1048 addr); 1049 } 1050 1051 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu) 1052 { 1053 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n"); 1054 } 1055 1056 static void a5xx_fault_detect_irq(struct msm_gpu *gpu) 1057 { 1058 struct drm_device *dev = gpu->dev; 1059 struct msm_drm_private *priv = dev->dev_private; 1060 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1061 1062 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1063 ring ? ring->id : -1, ring ? ring->seqno : 0, 1064 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 1065 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 1066 gpu_read(gpu, REG_A5XX_CP_RB_WPTR), 1067 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), 1068 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), 1069 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), 1070 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); 1071 1072 /* Turn off the hangcheck timer to keep it from bothering us */ 1073 del_timer(&gpu->hangcheck_timer); 1074 1075 queue_work(priv->wq, &gpu->recover_work); 1076 } 1077 1078 #define RBBM_ERROR_MASK \ 1079 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 1080 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 1081 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 1082 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 1083 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 1084 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1085 1086 static irqreturn_t a5xx_irq(struct msm_gpu *gpu) 1087 { 1088 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); 1089 1090 /* 1091 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it 1092 * before the source is cleared the interrupt will storm. 1093 */ 1094 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1095 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1096 1097 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ 1098 if (status & RBBM_ERROR_MASK) 1099 a5xx_rbbm_err_irq(gpu, status); 1100 1101 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1102 a5xx_cp_err_irq(gpu); 1103 1104 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT) 1105 a5xx_fault_detect_irq(gpu); 1106 1107 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1108 a5xx_uche_err_irq(gpu); 1109 1110 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1111 a5xx_gpmu_err_irq(gpu); 1112 1113 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1114 a5xx_preempt_trigger(gpu); 1115 msm_gpu_retire(gpu); 1116 } 1117 1118 if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 1119 a5xx_preempt_irq(gpu); 1120 1121 return IRQ_HANDLED; 1122 } 1123 1124 static const u32 a5xx_registers[] = { 1125 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 1126 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 1127 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, 1128 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, 1129 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, 1130 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 1131 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 1132 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, 1133 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 1134 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 1135 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, 1136 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, 1137 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 1138 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 1139 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 1140 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 1141 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, 1142 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, 1143 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 1144 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, 1145 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 1146 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 1147 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 1148 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 1149 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 1150 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D, 1151 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5, 1152 0xAC60, 0xAC60, ~0, 1153 }; 1154 1155 static void a5xx_dump(struct msm_gpu *gpu) 1156 { 1157 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n", 1158 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1159 adreno_dump(gpu); 1160 } 1161 1162 static int a5xx_pm_resume(struct msm_gpu *gpu) 1163 { 1164 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1165 int ret; 1166 1167 /* Turn on the core power */ 1168 ret = msm_gpu_pm_resume(gpu); 1169 if (ret) 1170 return ret; 1171 1172 if (adreno_is_a510(adreno_gpu)) { 1173 /* Halt the sp_input_clk at HM level */ 1174 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055); 1175 a5xx_set_hwcg(gpu, true); 1176 /* Turn on sp_input_clk at HM level */ 1177 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0); 1178 return 0; 1179 } 1180 1181 /* Turn the RBCCU domain first to limit the chances of voltage droop */ 1182 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); 1183 1184 /* Wait 3 usecs before polling */ 1185 udelay(3); 1186 1187 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1188 (1 << 20), (1 << 20)); 1189 if (ret) { 1190 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n", 1191 gpu->name, 1192 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS)); 1193 return ret; 1194 } 1195 1196 /* Turn on the SP domain */ 1197 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000); 1198 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS, 1199 (1 << 20), (1 << 20)); 1200 if (ret) 1201 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n", 1202 gpu->name); 1203 1204 return ret; 1205 } 1206 1207 static int a5xx_pm_suspend(struct msm_gpu *gpu) 1208 { 1209 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1210 u32 mask = 0xf; 1211 1212 /* A510 has 3 XIN ports in VBIF */ 1213 if (adreno_is_a510(adreno_gpu)) 1214 mask = 0x7; 1215 1216 /* Clear the VBIF pipe before shutting down */ 1217 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask); 1218 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 1219 mask) == mask); 1220 1221 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); 1222 1223 /* 1224 * Reset the VBIF before power collapse to avoid issue with FIFO 1225 * entries 1226 */ 1227 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); 1228 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); 1229 1230 return msm_gpu_pm_suspend(gpu); 1231 } 1232 1233 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1234 { 1235 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO, 1236 REG_A5XX_RBBM_PERFCTR_CP_0_HI); 1237 1238 return 0; 1239 } 1240 1241 struct a5xx_crashdumper { 1242 void *ptr; 1243 struct drm_gem_object *bo; 1244 u64 iova; 1245 }; 1246 1247 struct a5xx_gpu_state { 1248 struct msm_gpu_state base; 1249 u32 *hlsqregs; 1250 }; 1251 1252 static int a5xx_crashdumper_init(struct msm_gpu *gpu, 1253 struct a5xx_crashdumper *dumper) 1254 { 1255 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 1256 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 1257 &dumper->bo, &dumper->iova); 1258 1259 if (!IS_ERR(dumper->ptr)) 1260 msm_gem_object_set_name(dumper->bo, "crashdump"); 1261 1262 return PTR_ERR_OR_ZERO(dumper->ptr); 1263 } 1264 1265 static int a5xx_crashdumper_run(struct msm_gpu *gpu, 1266 struct a5xx_crashdumper *dumper) 1267 { 1268 u32 val; 1269 1270 if (IS_ERR_OR_NULL(dumper->ptr)) 1271 return -EINVAL; 1272 1273 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, 1274 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 1275 1276 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); 1277 1278 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val, 1279 val & 0x04, 100, 10000); 1280 } 1281 1282 /* 1283 * These are a list of the registers that need to be read through the HLSQ 1284 * aperture through the crashdumper. These are not nominally accessible from 1285 * the CPU on a secure platform. 1286 */ 1287 static const struct { 1288 u32 type; 1289 u32 regoffset; 1290 u32 count; 1291 } a5xx_hlsq_aperture_regs[] = { 1292 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */ 1293 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */ 1294 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */ 1295 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */ 1296 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */ 1297 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */ 1298 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */ 1299 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */ 1300 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */ 1301 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */ 1302 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */ 1303 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */ 1304 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */ 1305 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */ 1306 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */ 1307 }; 1308 1309 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, 1310 struct a5xx_gpu_state *a5xx_state) 1311 { 1312 struct a5xx_crashdumper dumper = { 0 }; 1313 u32 offset, count = 0; 1314 u64 *ptr; 1315 int i; 1316 1317 if (a5xx_crashdumper_init(gpu, &dumper)) 1318 return; 1319 1320 /* The script will be written at offset 0 */ 1321 ptr = dumper.ptr; 1322 1323 /* Start writing the data at offset 256k */ 1324 offset = dumper.iova + (256 * SZ_1K); 1325 1326 /* Count how many additional registers to get from the HLSQ aperture */ 1327 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) 1328 count += a5xx_hlsq_aperture_regs[i].count; 1329 1330 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL); 1331 if (!a5xx_state->hlsqregs) 1332 return; 1333 1334 /* Build the crashdump script */ 1335 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1336 u32 type = a5xx_hlsq_aperture_regs[i].type; 1337 u32 c = a5xx_hlsq_aperture_regs[i].count; 1338 1339 /* Write the register to select the desired bank */ 1340 *ptr++ = ((u64) type << 8); 1341 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) | 1342 (1 << 21) | 1; 1343 1344 *ptr++ = offset; 1345 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44) 1346 | c; 1347 1348 offset += c * sizeof(u32); 1349 } 1350 1351 /* Write two zeros to close off the script */ 1352 *ptr++ = 0; 1353 *ptr++ = 0; 1354 1355 if (a5xx_crashdumper_run(gpu, &dumper)) { 1356 kfree(a5xx_state->hlsqregs); 1357 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1358 return; 1359 } 1360 1361 /* Copy the data from the crashdumper to the state */ 1362 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K), 1363 count * sizeof(u32)); 1364 1365 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1366 } 1367 1368 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) 1369 { 1370 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state), 1371 GFP_KERNEL); 1372 1373 if (!a5xx_state) 1374 return ERR_PTR(-ENOMEM); 1375 1376 /* Temporarily disable hardware clock gating before reading the hw */ 1377 a5xx_set_hwcg(gpu, false); 1378 1379 /* First get the generic state from the adreno core */ 1380 adreno_gpu_state_get(gpu, &(a5xx_state->base)); 1381 1382 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS); 1383 1384 /* Get the HLSQ regs with the help of the crashdumper */ 1385 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state); 1386 1387 a5xx_set_hwcg(gpu, true); 1388 1389 return &a5xx_state->base; 1390 } 1391 1392 static void a5xx_gpu_state_destroy(struct kref *kref) 1393 { 1394 struct msm_gpu_state *state = container_of(kref, 1395 struct msm_gpu_state, ref); 1396 struct a5xx_gpu_state *a5xx_state = container_of(state, 1397 struct a5xx_gpu_state, base); 1398 1399 kfree(a5xx_state->hlsqregs); 1400 1401 adreno_gpu_state_destroy(state); 1402 kfree(a5xx_state); 1403 } 1404 1405 static int a5xx_gpu_state_put(struct msm_gpu_state *state) 1406 { 1407 if (IS_ERR_OR_NULL(state)) 1408 return 1; 1409 1410 return kref_put(&state->ref, a5xx_gpu_state_destroy); 1411 } 1412 1413 1414 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1415 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1416 struct drm_printer *p) 1417 { 1418 int i, j; 1419 u32 pos = 0; 1420 struct a5xx_gpu_state *a5xx_state = container_of(state, 1421 struct a5xx_gpu_state, base); 1422 1423 if (IS_ERR_OR_NULL(state)) 1424 return; 1425 1426 adreno_show(gpu, state, p); 1427 1428 /* Dump the additional a5xx HLSQ registers */ 1429 if (!a5xx_state->hlsqregs) 1430 return; 1431 1432 drm_printf(p, "registers-hlsq:\n"); 1433 1434 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1435 u32 o = a5xx_hlsq_aperture_regs[i].regoffset; 1436 u32 c = a5xx_hlsq_aperture_regs[i].count; 1437 1438 for (j = 0; j < c; j++, pos++, o++) { 1439 /* 1440 * To keep the crashdump simple we pull the entire range 1441 * for each register type but not all of the registers 1442 * in the range are valid. Fortunately invalid registers 1443 * stick out like a sore thumb with a value of 1444 * 0xdeadbeef 1445 */ 1446 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef) 1447 continue; 1448 1449 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n", 1450 o << 2, a5xx_state->hlsqregs[pos]); 1451 } 1452 } 1453 } 1454 #endif 1455 1456 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1457 { 1458 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1459 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1460 1461 return a5xx_gpu->cur_ring; 1462 } 1463 1464 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu) 1465 { 1466 u64 busy_cycles, busy_time; 1467 1468 /* Only read the gpu busy if the hardware is already active */ 1469 if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0) 1470 return 0; 1471 1472 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, 1473 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); 1474 1475 busy_time = busy_cycles - gpu->devfreq.busy_cycles; 1476 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000); 1477 1478 gpu->devfreq.busy_cycles = busy_cycles; 1479 1480 pm_runtime_put(&gpu->pdev->dev); 1481 1482 if (WARN_ON(busy_time > ~0LU)) 1483 return ~0LU; 1484 1485 return (unsigned long)busy_time; 1486 } 1487 1488 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 1489 { 1490 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1491 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1492 1493 if (a5xx_gpu->has_whereami) 1494 return a5xx_gpu->shadow[ring->id]; 1495 1496 return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR); 1497 } 1498 1499 static const struct adreno_gpu_funcs funcs = { 1500 .base = { 1501 .get_param = adreno_get_param, 1502 .hw_init = a5xx_hw_init, 1503 .pm_suspend = a5xx_pm_suspend, 1504 .pm_resume = a5xx_pm_resume, 1505 .recover = a5xx_recover, 1506 .submit = a5xx_submit, 1507 .active_ring = a5xx_active_ring, 1508 .irq = a5xx_irq, 1509 .destroy = a5xx_destroy, 1510 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1511 .show = a5xx_show, 1512 #endif 1513 #if defined(CONFIG_DEBUG_FS) 1514 .debugfs_init = a5xx_debugfs_init, 1515 #endif 1516 .gpu_busy = a5xx_gpu_busy, 1517 .gpu_state_get = a5xx_gpu_state_get, 1518 .gpu_state_put = a5xx_gpu_state_put, 1519 .create_address_space = adreno_iommu_create_address_space, 1520 .get_rptr = a5xx_get_rptr, 1521 }, 1522 .get_timestamp = a5xx_get_timestamp, 1523 }; 1524 1525 static void check_speed_bin(struct device *dev) 1526 { 1527 struct nvmem_cell *cell; 1528 u32 val; 1529 1530 /* 1531 * If the OPP table specifies a opp-supported-hw property then we have 1532 * to set something with dev_pm_opp_set_supported_hw() or the table 1533 * doesn't get populated so pick an arbitrary value that should 1534 * ensure the default frequencies are selected but not conflict with any 1535 * actual bins 1536 */ 1537 val = 0x80; 1538 1539 cell = nvmem_cell_get(dev, "speed_bin"); 1540 1541 if (!IS_ERR(cell)) { 1542 void *buf = nvmem_cell_read(cell, NULL); 1543 1544 if (!IS_ERR(buf)) { 1545 u8 bin = *((u8 *) buf); 1546 1547 val = (1 << bin); 1548 kfree(buf); 1549 } 1550 1551 nvmem_cell_put(cell); 1552 } 1553 1554 dev_pm_opp_set_supported_hw(dev, &val, 1); 1555 } 1556 1557 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) 1558 { 1559 struct msm_drm_private *priv = dev->dev_private; 1560 struct platform_device *pdev = priv->gpu_pdev; 1561 struct a5xx_gpu *a5xx_gpu = NULL; 1562 struct adreno_gpu *adreno_gpu; 1563 struct msm_gpu *gpu; 1564 int ret; 1565 1566 if (!pdev) { 1567 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); 1568 return ERR_PTR(-ENXIO); 1569 } 1570 1571 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); 1572 if (!a5xx_gpu) 1573 return ERR_PTR(-ENOMEM); 1574 1575 adreno_gpu = &a5xx_gpu->base; 1576 gpu = &adreno_gpu->base; 1577 1578 adreno_gpu->registers = a5xx_registers; 1579 1580 a5xx_gpu->lm_leakage = 0x4E001A; 1581 1582 check_speed_bin(&pdev->dev); 1583 1584 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); 1585 if (ret) { 1586 a5xx_destroy(&(a5xx_gpu->base.base)); 1587 return ERR_PTR(ret); 1588 } 1589 1590 if (gpu->aspace) 1591 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1592 1593 /* Set up the preemption specific bits and pieces for each ringbuffer */ 1594 a5xx_preempt_init(gpu); 1595 1596 return gpu; 1597 } 1598