1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License version 2 and 5 * only version 2 as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/types.h> 16 #include <linux/cpumask.h> 17 #include <linux/qcom_scm.h> 18 #include <linux/dma-mapping.h> 19 #include <linux/of_address.h> 20 #include <linux/soc/qcom/mdt_loader.h> 21 #include <linux/pm_opp.h> 22 #include <linux/nvmem-consumer.h> 23 #include <linux/slab.h> 24 #include "msm_gem.h" 25 #include "msm_mmu.h" 26 #include "a5xx_gpu.h" 27 28 extern bool hang_debug; 29 static void a5xx_dump(struct msm_gpu *gpu); 30 31 #define GPU_PAS_ID 13 32 33 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) 34 { 35 struct device *dev = &gpu->pdev->dev; 36 const struct firmware *fw; 37 struct device_node *np; 38 struct resource r; 39 phys_addr_t mem_phys; 40 ssize_t mem_size; 41 void *mem_region = NULL; 42 int ret; 43 44 if (!IS_ENABLED(CONFIG_ARCH_QCOM)) 45 return -EINVAL; 46 47 np = of_get_child_by_name(dev->of_node, "zap-shader"); 48 if (!np) 49 return -ENODEV; 50 51 np = of_parse_phandle(np, "memory-region", 0); 52 if (!np) 53 return -EINVAL; 54 55 ret = of_address_to_resource(np, 0, &r); 56 if (ret) 57 return ret; 58 59 mem_phys = r.start; 60 mem_size = resource_size(&r); 61 62 /* Request the MDT file for the firmware */ 63 fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); 64 if (IS_ERR(fw)) { 65 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); 66 return PTR_ERR(fw); 67 } 68 69 /* Figure out how much memory we need */ 70 mem_size = qcom_mdt_get_size(fw); 71 if (mem_size < 0) { 72 ret = mem_size; 73 goto out; 74 } 75 76 /* Allocate memory for the firmware image */ 77 mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC); 78 if (!mem_region) { 79 ret = -ENOMEM; 80 goto out; 81 } 82 83 /* 84 * Load the rest of the MDT 85 * 86 * Note that we could be dealing with two different paths, since 87 * with upstream linux-firmware it would be in a qcom/ subdir.. 88 * adreno_request_fw() handles this, but qcom_mdt_load() does 89 * not. But since we've already gotten thru adreno_request_fw() 90 * we know which of the two cases it is: 91 */ 92 if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { 93 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, 94 mem_region, mem_phys, mem_size, NULL); 95 } else { 96 char *newname; 97 98 newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname); 99 100 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, 101 mem_region, mem_phys, mem_size, NULL); 102 kfree(newname); 103 } 104 if (ret) 105 goto out; 106 107 /* Send the image to the secure world */ 108 ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID); 109 if (ret) 110 DRM_DEV_ERROR(dev, "Unable to authorize the image\n"); 111 112 out: 113 if (mem_region) 114 memunmap(mem_region); 115 116 release_firmware(fw); 117 118 return ret; 119 } 120 121 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 122 { 123 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 124 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 125 uint32_t wptr; 126 unsigned long flags; 127 128 spin_lock_irqsave(&ring->lock, flags); 129 130 /* Copy the shadow to the actual register */ 131 ring->cur = ring->next; 132 133 /* Make sure to wrap wptr if we need to */ 134 wptr = get_wptr(ring); 135 136 spin_unlock_irqrestore(&ring->lock, flags); 137 138 /* Make sure everything is posted before making a decision */ 139 mb(); 140 141 /* Update HW if this is the current ring and we are not in preempt */ 142 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 143 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 144 } 145 146 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit, 147 struct msm_file_private *ctx) 148 { 149 struct msm_drm_private *priv = gpu->dev->dev_private; 150 struct msm_ringbuffer *ring = submit->ring; 151 struct msm_gem_object *obj; 152 uint32_t *ptr, dwords; 153 unsigned int i; 154 155 for (i = 0; i < submit->nr_cmds; i++) { 156 switch (submit->cmd[i].type) { 157 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 158 break; 159 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 160 if (priv->lastctx == ctx) 161 break; 162 case MSM_SUBMIT_CMD_BUF: 163 /* copy commands into RB: */ 164 obj = submit->bos[submit->cmd[i].idx].obj; 165 dwords = submit->cmd[i].size; 166 167 ptr = msm_gem_get_vaddr(&obj->base); 168 169 /* _get_vaddr() shouldn't fail at this point, 170 * since we've already mapped it once in 171 * submit_reloc() 172 */ 173 if (WARN_ON(!ptr)) 174 return; 175 176 for (i = 0; i < dwords; i++) { 177 /* normally the OUT_PKTn() would wait 178 * for space for the packet. But since 179 * we just OUT_RING() the whole thing, 180 * need to call adreno_wait_ring() 181 * ourself: 182 */ 183 adreno_wait_ring(ring, 1); 184 OUT_RING(ring, ptr[i]); 185 } 186 187 msm_gem_put_vaddr(&obj->base); 188 189 break; 190 } 191 } 192 193 a5xx_flush(gpu, ring); 194 a5xx_preempt_trigger(gpu); 195 196 /* we might not necessarily have a cmd from userspace to 197 * trigger an event to know that submit has completed, so 198 * do this manually: 199 */ 200 a5xx_idle(gpu, ring); 201 ring->memptrs->fence = submit->seqno; 202 msm_gpu_retire(gpu); 203 } 204 205 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 206 struct msm_file_private *ctx) 207 { 208 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 209 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 210 struct msm_drm_private *priv = gpu->dev->dev_private; 211 struct msm_ringbuffer *ring = submit->ring; 212 unsigned int i, ibs = 0; 213 214 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { 215 priv->lastctx = NULL; 216 a5xx_submit_in_rb(gpu, submit, ctx); 217 return; 218 } 219 220 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 221 OUT_RING(ring, 0x02); 222 223 /* Turn off protected mode to write to special registers */ 224 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 225 OUT_RING(ring, 0); 226 227 /* Set the save preemption record for the ring/command */ 228 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 229 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 230 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 231 232 /* Turn back on protected mode */ 233 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 234 OUT_RING(ring, 1); 235 236 /* Enable local preemption for finegrain preemption */ 237 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 238 OUT_RING(ring, 0x02); 239 240 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 241 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 242 OUT_RING(ring, 0x02); 243 244 /* Submit the commands */ 245 for (i = 0; i < submit->nr_cmds; i++) { 246 switch (submit->cmd[i].type) { 247 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 248 break; 249 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 250 if (priv->lastctx == ctx) 251 break; 252 case MSM_SUBMIT_CMD_BUF: 253 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 254 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 255 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 256 OUT_RING(ring, submit->cmd[i].size); 257 ibs++; 258 break; 259 } 260 } 261 262 /* 263 * Write the render mode to NULL (0) to indicate to the CP that the IBs 264 * are done rendering - otherwise a lucky preemption would start 265 * replaying from the last checkpoint 266 */ 267 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 268 OUT_RING(ring, 0); 269 OUT_RING(ring, 0); 270 OUT_RING(ring, 0); 271 OUT_RING(ring, 0); 272 OUT_RING(ring, 0); 273 274 /* Turn off IB level preemptions */ 275 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 276 OUT_RING(ring, 0x01); 277 278 /* Write the fence to the scratch register */ 279 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 280 OUT_RING(ring, submit->seqno); 281 282 /* 283 * Execute a CACHE_FLUSH_TS event. This will ensure that the 284 * timestamp is written to the memory and then triggers the interrupt 285 */ 286 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 287 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); 288 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 289 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 290 OUT_RING(ring, submit->seqno); 291 292 /* Yield the floor on command completion */ 293 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 294 /* 295 * If dword[2:1] are non zero, they specify an address for the CP to 296 * write the value of dword[3] to on preemption complete. Write 0 to 297 * skip the write 298 */ 299 OUT_RING(ring, 0x00); 300 OUT_RING(ring, 0x00); 301 /* Data value - not used if the address above is 0 */ 302 OUT_RING(ring, 0x01); 303 /* Set bit 0 to trigger an interrupt on preempt complete */ 304 OUT_RING(ring, 0x01); 305 306 a5xx_flush(gpu, ring); 307 308 /* Check to see if we need to start preemption */ 309 a5xx_preempt_trigger(gpu); 310 } 311 312 static const struct { 313 u32 offset; 314 u32 value; 315 } a5xx_hwcg[] = { 316 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, 317 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, 318 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, 319 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, 320 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, 321 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, 322 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, 323 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, 324 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, 325 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, 326 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, 327 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, 328 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, 329 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, 330 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, 331 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, 332 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, 333 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, 334 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, 335 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, 336 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, 337 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, 338 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, 339 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, 340 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, 341 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, 342 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, 343 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, 344 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, 345 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, 346 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, 347 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, 348 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, 349 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, 350 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, 351 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, 352 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, 353 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, 354 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, 355 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, 356 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, 357 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, 358 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, 359 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, 360 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, 361 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, 362 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, 363 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, 364 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, 365 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, 366 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, 367 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, 368 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, 369 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, 370 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, 371 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, 372 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, 373 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, 374 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, 375 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, 376 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, 377 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, 378 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, 379 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, 380 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, 381 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, 382 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, 383 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, 384 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, 385 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, 386 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, 387 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, 388 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, 389 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, 390 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, 391 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, 392 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, 393 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, 394 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, 395 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, 396 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, 397 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, 398 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, 399 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, 400 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, 401 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, 402 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, 403 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, 404 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, 405 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, 406 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, 407 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} 408 }; 409 410 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) 411 { 412 unsigned int i; 413 414 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) 415 gpu_write(gpu, a5xx_hwcg[i].offset, 416 state ? a5xx_hwcg[i].value : 0); 417 418 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); 419 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); 420 } 421 422 static int a5xx_me_init(struct msm_gpu *gpu) 423 { 424 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 425 struct msm_ringbuffer *ring = gpu->rb[0]; 426 427 OUT_PKT7(ring, CP_ME_INIT, 8); 428 429 OUT_RING(ring, 0x0000002F); 430 431 /* Enable multiple hardware contexts */ 432 OUT_RING(ring, 0x00000003); 433 434 /* Enable error detection */ 435 OUT_RING(ring, 0x20000000); 436 437 /* Don't enable header dump */ 438 OUT_RING(ring, 0x00000000); 439 OUT_RING(ring, 0x00000000); 440 441 /* Specify workarounds for various microcode issues */ 442 if (adreno_is_a530(adreno_gpu)) { 443 /* Workaround for token end syncs 444 * Force a WFI after every direct-render 3D mode draw and every 445 * 2D mode 3 draw 446 */ 447 OUT_RING(ring, 0x0000000B); 448 } else { 449 /* No workarounds enabled */ 450 OUT_RING(ring, 0x00000000); 451 } 452 453 OUT_RING(ring, 0x00000000); 454 OUT_RING(ring, 0x00000000); 455 456 gpu->funcs->flush(gpu, ring); 457 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 458 } 459 460 static int a5xx_preempt_start(struct msm_gpu *gpu) 461 { 462 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 463 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 464 struct msm_ringbuffer *ring = gpu->rb[0]; 465 466 if (gpu->nr_rings == 1) 467 return 0; 468 469 /* Turn off protected mode to write to special registers */ 470 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 471 OUT_RING(ring, 0); 472 473 /* Set the save preemption record for the ring/command */ 474 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 475 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 476 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 477 478 /* Turn back on protected mode */ 479 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 480 OUT_RING(ring, 1); 481 482 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 483 OUT_RING(ring, 0x00); 484 485 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 486 OUT_RING(ring, 0x01); 487 488 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 489 OUT_RING(ring, 0x01); 490 491 /* Yield the floor on command completion */ 492 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 493 OUT_RING(ring, 0x00); 494 OUT_RING(ring, 0x00); 495 OUT_RING(ring, 0x01); 496 OUT_RING(ring, 0x01); 497 498 gpu->funcs->flush(gpu, ring); 499 500 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 501 } 502 503 static int a5xx_ucode_init(struct msm_gpu *gpu) 504 { 505 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 506 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 507 int ret; 508 509 if (!a5xx_gpu->pm4_bo) { 510 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu, 511 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova); 512 513 514 if (IS_ERR(a5xx_gpu->pm4_bo)) { 515 ret = PTR_ERR(a5xx_gpu->pm4_bo); 516 a5xx_gpu->pm4_bo = NULL; 517 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n", 518 ret); 519 return ret; 520 } 521 522 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw"); 523 } 524 525 if (!a5xx_gpu->pfp_bo) { 526 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu, 527 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova); 528 529 if (IS_ERR(a5xx_gpu->pfp_bo)) { 530 ret = PTR_ERR(a5xx_gpu->pfp_bo); 531 a5xx_gpu->pfp_bo = NULL; 532 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n", 533 ret); 534 return ret; 535 } 536 537 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw"); 538 } 539 540 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, 541 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); 542 543 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, 544 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); 545 546 return 0; 547 } 548 549 #define SCM_GPU_ZAP_SHADER_RESUME 0 550 551 static int a5xx_zap_shader_resume(struct msm_gpu *gpu) 552 { 553 int ret; 554 555 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); 556 if (ret) 557 DRM_ERROR("%s: zap-shader resume failed: %d\n", 558 gpu->name, ret); 559 560 return ret; 561 } 562 563 static int a5xx_zap_shader_init(struct msm_gpu *gpu) 564 { 565 static bool loaded; 566 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 567 struct platform_device *pdev = gpu->pdev; 568 int ret; 569 570 /* 571 * If the zap shader is already loaded into memory we just need to kick 572 * the remote processor to reinitialize it 573 */ 574 if (loaded) 575 return a5xx_zap_shader_resume(gpu); 576 577 /* We need SCM to be able to load the firmware */ 578 if (!qcom_scm_is_available()) { 579 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n"); 580 return -EPROBE_DEFER; 581 } 582 583 /* Each GPU has a target specific zap shader firmware name to use */ 584 if (!adreno_gpu->info->zapfw) { 585 DRM_DEV_ERROR(&pdev->dev, 586 "Zap shader firmware file not specified for this target\n"); 587 return -ENODEV; 588 } 589 590 ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw); 591 592 loaded = !ret; 593 594 return ret; 595 } 596 597 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 598 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 599 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 600 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 601 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 602 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 603 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 604 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 605 A5XX_RBBM_INT_0_MASK_CP_SW | \ 606 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 607 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 608 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 609 610 static int a5xx_hw_init(struct msm_gpu *gpu) 611 { 612 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 613 int ret; 614 615 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 616 617 /* Make all blocks contribute to the GPU BUSY perf counter */ 618 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); 619 620 /* Enable RBBM error reporting bits */ 621 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001); 622 623 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) { 624 /* 625 * Mask out the activity signals from RB1-3 to avoid false 626 * positives 627 */ 628 629 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, 630 0xF0000000); 631 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, 632 0xFFFFFFFF); 633 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, 634 0xFFFFFFFF); 635 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, 636 0xFFFFFFFF); 637 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, 638 0xFFFFFFFF); 639 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, 640 0xFFFFFFFF); 641 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, 642 0xFFFFFFFF); 643 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, 644 0xFFFFFFFF); 645 } 646 647 /* Enable fault detection */ 648 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL, 649 (1 << 30) | 0xFFFF); 650 651 /* Turn on performance counters */ 652 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); 653 654 /* Select CP0 to always count cycles */ 655 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); 656 657 /* Select RBBM0 to countable 6 to get the busy status for devfreq */ 658 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); 659 660 /* Increase VFD cache access so LRZ and other data gets evicted less */ 661 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); 662 663 /* Disable L2 bypass in the UCHE */ 664 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); 665 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); 666 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); 667 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); 668 669 /* Set the GMEM VA range (0 to gpu->gmem) */ 670 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); 671 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); 672 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, 673 0x00100000 + adreno_gpu->gmem - 1); 674 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); 675 676 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); 677 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); 678 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); 679 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); 680 681 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22)); 682 683 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) 684 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); 685 686 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); 687 688 /* Enable USE_RETENTION_FLOPS */ 689 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); 690 691 /* Enable ME/PFP split notification */ 692 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); 693 694 /* Enable HWCG */ 695 a5xx_set_hwcg(gpu, true); 696 697 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); 698 699 /* Set the highest bank bit */ 700 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); 701 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); 702 703 /* Protect registers from the CP */ 704 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); 705 706 /* RBBM */ 707 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4)); 708 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8)); 709 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16)); 710 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32)); 711 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64)); 712 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64)); 713 714 /* Content protect */ 715 gpu_write(gpu, REG_A5XX_CP_PROTECT(6), 716 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 717 16)); 718 gpu_write(gpu, REG_A5XX_CP_PROTECT(7), 719 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2)); 720 721 /* CP */ 722 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64)); 723 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8)); 724 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32)); 725 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1)); 726 727 /* RB */ 728 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1)); 729 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2)); 730 731 /* VPC */ 732 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); 733 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); 734 735 /* UCHE */ 736 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); 737 738 if (adreno_is_a530(adreno_gpu)) 739 gpu_write(gpu, REG_A5XX_CP_PROTECT(17), 740 ADRENO_PROTECT_RW(0x10000, 0x8000)); 741 742 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); 743 /* 744 * Disable the trusted memory range - we don't actually supported secure 745 * memory rendering at this point in time and we don't want to block off 746 * part of the virtual memory space. 747 */ 748 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 749 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); 750 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 751 752 ret = adreno_hw_init(gpu); 753 if (ret) 754 return ret; 755 756 a5xx_preempt_hw_init(gpu); 757 758 a5xx_gpmu_ucode_init(gpu); 759 760 ret = a5xx_ucode_init(gpu); 761 if (ret) 762 return ret; 763 764 /* Disable the interrupts through the initial bringup stage */ 765 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); 766 767 /* Clear ME_HALT to start the micro engine */ 768 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0); 769 ret = a5xx_me_init(gpu); 770 if (ret) 771 return ret; 772 773 ret = a5xx_power_init(gpu); 774 if (ret) 775 return ret; 776 777 /* 778 * Send a pipeline event stat to get misbehaving counters to start 779 * ticking correctly 780 */ 781 if (adreno_is_a530(adreno_gpu)) { 782 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); 783 OUT_RING(gpu->rb[0], 0x0F); 784 785 gpu->funcs->flush(gpu, gpu->rb[0]); 786 if (!a5xx_idle(gpu, gpu->rb[0])) 787 return -EINVAL; 788 } 789 790 /* 791 * Try to load a zap shader into the secure world. If successful 792 * we can use the CP to switch out of secure mode. If not then we 793 * have no resource but to try to switch ourselves out manually. If we 794 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 795 * be blocked and a permissions violation will soon follow. 796 */ 797 ret = a5xx_zap_shader_init(gpu); 798 if (!ret) { 799 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 800 OUT_RING(gpu->rb[0], 0x00000000); 801 802 gpu->funcs->flush(gpu, gpu->rb[0]); 803 if (!a5xx_idle(gpu, gpu->rb[0])) 804 return -EINVAL; 805 } else { 806 /* Print a warning so if we die, we know why */ 807 dev_warn_once(gpu->dev->dev, 808 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 809 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 810 } 811 812 /* Last step - yield the ringbuffer */ 813 a5xx_preempt_start(gpu); 814 815 return 0; 816 } 817 818 static void a5xx_recover(struct msm_gpu *gpu) 819 { 820 int i; 821 822 adreno_dump_info(gpu); 823 824 for (i = 0; i < 8; i++) { 825 printk("CP_SCRATCH_REG%d: %u\n", i, 826 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i))); 827 } 828 829 if (hang_debug) 830 a5xx_dump(gpu); 831 832 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); 833 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD); 834 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); 835 adreno_recover(gpu); 836 } 837 838 static void a5xx_destroy(struct msm_gpu *gpu) 839 { 840 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 841 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 842 843 DBG("%s", gpu->name); 844 845 a5xx_preempt_fini(gpu); 846 847 if (a5xx_gpu->pm4_bo) { 848 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace); 849 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo); 850 } 851 852 if (a5xx_gpu->pfp_bo) { 853 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace); 854 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo); 855 } 856 857 if (a5xx_gpu->gpmu_bo) { 858 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace); 859 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo); 860 } 861 862 adreno_gpu_cleanup(adreno_gpu); 863 kfree(a5xx_gpu); 864 } 865 866 static inline bool _a5xx_check_idle(struct msm_gpu *gpu) 867 { 868 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY) 869 return false; 870 871 /* 872 * Nearly every abnormality ends up pausing the GPU and triggering a 873 * fault so we can safely just watch for this one interrupt to fire 874 */ 875 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) & 876 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); 877 } 878 879 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 880 { 881 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 882 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 883 884 if (ring != a5xx_gpu->cur_ring) { 885 WARN(1, "Tried to idle a non-current ringbuffer\n"); 886 return false; 887 } 888 889 /* wait for CP to drain ringbuffer: */ 890 if (!adreno_idle(gpu, ring)) 891 return false; 892 893 if (spin_until(_a5xx_check_idle(gpu))) { 894 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 895 gpu->name, __builtin_return_address(0), 896 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 897 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), 898 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 899 gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); 900 return false; 901 } 902 903 return true; 904 } 905 906 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags) 907 { 908 struct msm_gpu *gpu = arg; 909 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", 910 iova, flags, 911 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)), 912 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)), 913 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)), 914 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7))); 915 916 return -EFAULT; 917 } 918 919 static void a5xx_cp_err_irq(struct msm_gpu *gpu) 920 { 921 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS); 922 923 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) { 924 u32 val; 925 926 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0); 927 928 /* 929 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so 930 * read it twice 931 */ 932 933 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 934 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 935 936 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n", 937 val); 938 } 939 940 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR) 941 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n", 942 gpu_read(gpu, REG_A5XX_CP_HW_FAULT)); 943 944 if (status & A5XX_CP_INT_CP_DMA_ERROR) 945 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n"); 946 947 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 948 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS); 949 950 dev_err_ratelimited(gpu->dev->dev, 951 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 952 val & (1 << 24) ? "WRITE" : "READ", 953 (val & 0xFFFFF) >> 2, val); 954 } 955 956 if (status & A5XX_CP_INT_CP_AHB_ERROR) { 957 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT); 958 const char *access[16] = { "reserved", "reserved", 959 "timestamp lo", "timestamp hi", "pfp read", "pfp write", 960 "", "", "me read", "me write", "", "", "crashdump read", 961 "crashdump write" }; 962 963 dev_err_ratelimited(gpu->dev->dev, 964 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n", 965 status & 0xFFFFF, access[(status >> 24) & 0xF], 966 (status & (1 << 31)), status); 967 } 968 } 969 970 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) 971 { 972 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) { 973 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS); 974 975 dev_err_ratelimited(gpu->dev->dev, 976 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n", 977 val & (1 << 28) ? "WRITE" : "READ", 978 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3, 979 (val >> 24) & 0xF); 980 981 /* Clear the error */ 982 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4)); 983 984 /* Clear the interrupt */ 985 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 986 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 987 } 988 989 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT) 990 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n"); 991 992 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT) 993 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n", 994 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS)); 995 996 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT) 997 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n", 998 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS)); 999 1000 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT) 1001 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n", 1002 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS)); 1003 1004 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1005 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n"); 1006 1007 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1008 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n"); 1009 } 1010 1011 static void a5xx_uche_err_irq(struct msm_gpu *gpu) 1012 { 1013 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); 1014 1015 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); 1016 1017 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", 1018 addr); 1019 } 1020 1021 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu) 1022 { 1023 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n"); 1024 } 1025 1026 static void a5xx_fault_detect_irq(struct msm_gpu *gpu) 1027 { 1028 struct drm_device *dev = gpu->dev; 1029 struct msm_drm_private *priv = dev->dev_private; 1030 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1031 1032 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1033 ring ? ring->id : -1, ring ? ring->seqno : 0, 1034 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 1035 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 1036 gpu_read(gpu, REG_A5XX_CP_RB_WPTR), 1037 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), 1038 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), 1039 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), 1040 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); 1041 1042 /* Turn off the hangcheck timer to keep it from bothering us */ 1043 del_timer(&gpu->hangcheck_timer); 1044 1045 queue_work(priv->wq, &gpu->recover_work); 1046 } 1047 1048 #define RBBM_ERROR_MASK \ 1049 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 1050 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 1051 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 1052 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 1053 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 1054 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1055 1056 static irqreturn_t a5xx_irq(struct msm_gpu *gpu) 1057 { 1058 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); 1059 1060 /* 1061 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it 1062 * before the source is cleared the interrupt will storm. 1063 */ 1064 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1065 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1066 1067 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ 1068 if (status & RBBM_ERROR_MASK) 1069 a5xx_rbbm_err_irq(gpu, status); 1070 1071 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1072 a5xx_cp_err_irq(gpu); 1073 1074 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT) 1075 a5xx_fault_detect_irq(gpu); 1076 1077 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1078 a5xx_uche_err_irq(gpu); 1079 1080 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1081 a5xx_gpmu_err_irq(gpu); 1082 1083 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1084 a5xx_preempt_trigger(gpu); 1085 msm_gpu_retire(gpu); 1086 } 1087 1088 if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 1089 a5xx_preempt_irq(gpu); 1090 1091 return IRQ_HANDLED; 1092 } 1093 1094 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { 1095 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE), 1096 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI), 1097 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR), 1098 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, 1099 REG_A5XX_CP_RB_RPTR_ADDR_HI), 1100 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR), 1101 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR), 1102 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL), 1103 }; 1104 1105 static const u32 a5xx_registers[] = { 1106 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 1107 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 1108 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, 1109 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, 1110 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, 1111 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 1112 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 1113 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, 1114 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 1115 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 1116 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, 1117 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, 1118 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 1119 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 1120 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 1121 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 1122 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, 1123 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, 1124 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 1125 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, 1126 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 1127 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 1128 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 1129 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 1130 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 1131 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D, 1132 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5, 1133 0xAC60, 0xAC60, ~0, 1134 }; 1135 1136 static void a5xx_dump(struct msm_gpu *gpu) 1137 { 1138 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n", 1139 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1140 adreno_dump(gpu); 1141 } 1142 1143 static int a5xx_pm_resume(struct msm_gpu *gpu) 1144 { 1145 int ret; 1146 1147 /* Turn on the core power */ 1148 ret = msm_gpu_pm_resume(gpu); 1149 if (ret) 1150 return ret; 1151 1152 /* Turn the RBCCU domain first to limit the chances of voltage droop */ 1153 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); 1154 1155 /* Wait 3 usecs before polling */ 1156 udelay(3); 1157 1158 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1159 (1 << 20), (1 << 20)); 1160 if (ret) { 1161 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n", 1162 gpu->name, 1163 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS)); 1164 return ret; 1165 } 1166 1167 /* Turn on the SP domain */ 1168 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000); 1169 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS, 1170 (1 << 20), (1 << 20)); 1171 if (ret) 1172 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n", 1173 gpu->name); 1174 1175 return ret; 1176 } 1177 1178 static int a5xx_pm_suspend(struct msm_gpu *gpu) 1179 { 1180 /* Clear the VBIF pipe before shutting down */ 1181 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF); 1182 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF); 1183 1184 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); 1185 1186 /* 1187 * Reset the VBIF before power collapse to avoid issue with FIFO 1188 * entries 1189 */ 1190 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); 1191 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); 1192 1193 return msm_gpu_pm_suspend(gpu); 1194 } 1195 1196 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1197 { 1198 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO, 1199 REG_A5XX_RBBM_PERFCTR_CP_0_HI); 1200 1201 return 0; 1202 } 1203 1204 struct a5xx_crashdumper { 1205 void *ptr; 1206 struct drm_gem_object *bo; 1207 u64 iova; 1208 }; 1209 1210 struct a5xx_gpu_state { 1211 struct msm_gpu_state base; 1212 u32 *hlsqregs; 1213 }; 1214 1215 static int a5xx_crashdumper_init(struct msm_gpu *gpu, 1216 struct a5xx_crashdumper *dumper) 1217 { 1218 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 1219 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 1220 &dumper->bo, &dumper->iova); 1221 1222 if (!IS_ERR(dumper->ptr)) 1223 msm_gem_object_set_name(dumper->bo, "crashdump"); 1224 1225 return PTR_ERR_OR_ZERO(dumper->ptr); 1226 } 1227 1228 static int a5xx_crashdumper_run(struct msm_gpu *gpu, 1229 struct a5xx_crashdumper *dumper) 1230 { 1231 u32 val; 1232 1233 if (IS_ERR_OR_NULL(dumper->ptr)) 1234 return -EINVAL; 1235 1236 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, 1237 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 1238 1239 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); 1240 1241 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val, 1242 val & 0x04, 100, 10000); 1243 } 1244 1245 /* 1246 * These are a list of the registers that need to be read through the HLSQ 1247 * aperture through the crashdumper. These are not nominally accessible from 1248 * the CPU on a secure platform. 1249 */ 1250 static const struct { 1251 u32 type; 1252 u32 regoffset; 1253 u32 count; 1254 } a5xx_hlsq_aperture_regs[] = { 1255 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */ 1256 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */ 1257 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */ 1258 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */ 1259 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */ 1260 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */ 1261 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */ 1262 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */ 1263 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */ 1264 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */ 1265 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */ 1266 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */ 1267 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */ 1268 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */ 1269 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */ 1270 }; 1271 1272 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, 1273 struct a5xx_gpu_state *a5xx_state) 1274 { 1275 struct a5xx_crashdumper dumper = { 0 }; 1276 u32 offset, count = 0; 1277 u64 *ptr; 1278 int i; 1279 1280 if (a5xx_crashdumper_init(gpu, &dumper)) 1281 return; 1282 1283 /* The script will be written at offset 0 */ 1284 ptr = dumper.ptr; 1285 1286 /* Start writing the data at offset 256k */ 1287 offset = dumper.iova + (256 * SZ_1K); 1288 1289 /* Count how many additional registers to get from the HLSQ aperture */ 1290 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) 1291 count += a5xx_hlsq_aperture_regs[i].count; 1292 1293 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL); 1294 if (!a5xx_state->hlsqregs) 1295 return; 1296 1297 /* Build the crashdump script */ 1298 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1299 u32 type = a5xx_hlsq_aperture_regs[i].type; 1300 u32 c = a5xx_hlsq_aperture_regs[i].count; 1301 1302 /* Write the register to select the desired bank */ 1303 *ptr++ = ((u64) type << 8); 1304 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) | 1305 (1 << 21) | 1; 1306 1307 *ptr++ = offset; 1308 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44) 1309 | c; 1310 1311 offset += c * sizeof(u32); 1312 } 1313 1314 /* Write two zeros to close off the script */ 1315 *ptr++ = 0; 1316 *ptr++ = 0; 1317 1318 if (a5xx_crashdumper_run(gpu, &dumper)) { 1319 kfree(a5xx_state->hlsqregs); 1320 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1321 return; 1322 } 1323 1324 /* Copy the data from the crashdumper to the state */ 1325 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K), 1326 count * sizeof(u32)); 1327 1328 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 1329 } 1330 1331 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) 1332 { 1333 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state), 1334 GFP_KERNEL); 1335 1336 if (!a5xx_state) 1337 return ERR_PTR(-ENOMEM); 1338 1339 /* Temporarily disable hardware clock gating before reading the hw */ 1340 a5xx_set_hwcg(gpu, false); 1341 1342 /* First get the generic state from the adreno core */ 1343 adreno_gpu_state_get(gpu, &(a5xx_state->base)); 1344 1345 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS); 1346 1347 /* Get the HLSQ regs with the help of the crashdumper */ 1348 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state); 1349 1350 a5xx_set_hwcg(gpu, true); 1351 1352 return &a5xx_state->base; 1353 } 1354 1355 static void a5xx_gpu_state_destroy(struct kref *kref) 1356 { 1357 struct msm_gpu_state *state = container_of(kref, 1358 struct msm_gpu_state, ref); 1359 struct a5xx_gpu_state *a5xx_state = container_of(state, 1360 struct a5xx_gpu_state, base); 1361 1362 kfree(a5xx_state->hlsqregs); 1363 1364 adreno_gpu_state_destroy(state); 1365 kfree(a5xx_state); 1366 } 1367 1368 int a5xx_gpu_state_put(struct msm_gpu_state *state) 1369 { 1370 if (IS_ERR_OR_NULL(state)) 1371 return 1; 1372 1373 return kref_put(&state->ref, a5xx_gpu_state_destroy); 1374 } 1375 1376 1377 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1378 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1379 struct drm_printer *p) 1380 { 1381 int i, j; 1382 u32 pos = 0; 1383 struct a5xx_gpu_state *a5xx_state = container_of(state, 1384 struct a5xx_gpu_state, base); 1385 1386 if (IS_ERR_OR_NULL(state)) 1387 return; 1388 1389 adreno_show(gpu, state, p); 1390 1391 /* Dump the additional a5xx HLSQ registers */ 1392 if (!a5xx_state->hlsqregs) 1393 return; 1394 1395 drm_printf(p, "registers-hlsq:\n"); 1396 1397 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { 1398 u32 o = a5xx_hlsq_aperture_regs[i].regoffset; 1399 u32 c = a5xx_hlsq_aperture_regs[i].count; 1400 1401 for (j = 0; j < c; j++, pos++, o++) { 1402 /* 1403 * To keep the crashdump simple we pull the entire range 1404 * for each register type but not all of the registers 1405 * in the range are valid. Fortunately invalid registers 1406 * stick out like a sore thumb with a value of 1407 * 0xdeadbeef 1408 */ 1409 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef) 1410 continue; 1411 1412 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n", 1413 o << 2, a5xx_state->hlsqregs[pos]); 1414 } 1415 } 1416 } 1417 #endif 1418 1419 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1420 { 1421 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1422 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1423 1424 return a5xx_gpu->cur_ring; 1425 } 1426 1427 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu) 1428 { 1429 u64 busy_cycles, busy_time; 1430 1431 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, 1432 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); 1433 1434 busy_time = busy_cycles - gpu->devfreq.busy_cycles; 1435 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000); 1436 1437 gpu->devfreq.busy_cycles = busy_cycles; 1438 1439 if (WARN_ON(busy_time > ~0LU)) 1440 return ~0LU; 1441 1442 return (unsigned long)busy_time; 1443 } 1444 1445 static const struct adreno_gpu_funcs funcs = { 1446 .base = { 1447 .get_param = adreno_get_param, 1448 .hw_init = a5xx_hw_init, 1449 .pm_suspend = a5xx_pm_suspend, 1450 .pm_resume = a5xx_pm_resume, 1451 .recover = a5xx_recover, 1452 .submit = a5xx_submit, 1453 .flush = a5xx_flush, 1454 .active_ring = a5xx_active_ring, 1455 .irq = a5xx_irq, 1456 .destroy = a5xx_destroy, 1457 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 1458 .show = a5xx_show, 1459 #endif 1460 #if defined(CONFIG_DEBUG_FS) 1461 .debugfs_init = a5xx_debugfs_init, 1462 #endif 1463 .gpu_busy = a5xx_gpu_busy, 1464 .gpu_state_get = a5xx_gpu_state_get, 1465 .gpu_state_put = a5xx_gpu_state_put, 1466 }, 1467 .get_timestamp = a5xx_get_timestamp, 1468 }; 1469 1470 static void check_speed_bin(struct device *dev) 1471 { 1472 struct nvmem_cell *cell; 1473 u32 bin, val; 1474 1475 cell = nvmem_cell_get(dev, "speed_bin"); 1476 1477 /* If a nvmem cell isn't defined, nothing to do */ 1478 if (IS_ERR(cell)) 1479 return; 1480 1481 bin = *((u32 *) nvmem_cell_read(cell, NULL)); 1482 nvmem_cell_put(cell); 1483 1484 val = (1 << bin); 1485 1486 dev_pm_opp_set_supported_hw(dev, &val, 1); 1487 } 1488 1489 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) 1490 { 1491 struct msm_drm_private *priv = dev->dev_private; 1492 struct platform_device *pdev = priv->gpu_pdev; 1493 struct a5xx_gpu *a5xx_gpu = NULL; 1494 struct adreno_gpu *adreno_gpu; 1495 struct msm_gpu *gpu; 1496 int ret; 1497 1498 if (!pdev) { 1499 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n"); 1500 return ERR_PTR(-ENXIO); 1501 } 1502 1503 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); 1504 if (!a5xx_gpu) 1505 return ERR_PTR(-ENOMEM); 1506 1507 adreno_gpu = &a5xx_gpu->base; 1508 gpu = &adreno_gpu->base; 1509 1510 adreno_gpu->registers = a5xx_registers; 1511 adreno_gpu->reg_offsets = a5xx_register_offsets; 1512 1513 a5xx_gpu->lm_leakage = 0x4E001A; 1514 1515 check_speed_bin(&pdev->dev); 1516 1517 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); 1518 if (ret) { 1519 a5xx_destroy(&(a5xx_gpu->base.base)); 1520 return ERR_PTR(ret); 1521 } 1522 1523 if (gpu->aspace) 1524 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1525 1526 /* Set up the preemption specific bits and pieces for each ringbuffer */ 1527 a5xx_preempt_init(gpu); 1528 1529 return gpu; 1530 } 1531