1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License version 2 and 5 * only version 2 as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 */ 13 14 #include <linux/types.h> 15 #include <linux/cpumask.h> 16 #include <linux/qcom_scm.h> 17 #include <linux/dma-mapping.h> 18 #include <linux/of_address.h> 19 #include <linux/soc/qcom/mdt_loader.h> 20 #include <linux/pm_opp.h> 21 #include <linux/nvmem-consumer.h> 22 #include "msm_gem.h" 23 #include "msm_mmu.h" 24 #include "a5xx_gpu.h" 25 26 extern bool hang_debug; 27 static void a5xx_dump(struct msm_gpu *gpu); 28 29 #define GPU_PAS_ID 13 30 31 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) 32 { 33 struct device *dev = &gpu->pdev->dev; 34 const struct firmware *fw; 35 struct device_node *np; 36 struct resource r; 37 phys_addr_t mem_phys; 38 ssize_t mem_size; 39 void *mem_region = NULL; 40 int ret; 41 42 if (!IS_ENABLED(CONFIG_ARCH_QCOM)) 43 return -EINVAL; 44 45 np = of_get_child_by_name(dev->of_node, "zap-shader"); 46 if (!np) 47 return -ENODEV; 48 49 np = of_parse_phandle(np, "memory-region", 0); 50 if (!np) 51 return -EINVAL; 52 53 ret = of_address_to_resource(np, 0, &r); 54 if (ret) 55 return ret; 56 57 mem_phys = r.start; 58 mem_size = resource_size(&r); 59 60 /* Request the MDT file for the firmware */ 61 fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); 62 if (IS_ERR(fw)) { 63 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); 64 return PTR_ERR(fw); 65 } 66 67 /* Figure out how much memory we need */ 68 mem_size = qcom_mdt_get_size(fw); 69 if (mem_size < 0) { 70 ret = mem_size; 71 goto out; 72 } 73 74 /* Allocate memory for the firmware image */ 75 mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC); 76 if (!mem_region) { 77 ret = -ENOMEM; 78 goto out; 79 } 80 81 /* 82 * Load the rest of the MDT 83 * 84 * Note that we could be dealing with two different paths, since 85 * with upstream linux-firmware it would be in a qcom/ subdir.. 86 * adreno_request_fw() handles this, but qcom_mdt_load() does 87 * not. But since we've already gotten thru adreno_request_fw() 88 * we know which of the two cases it is: 89 */ 90 if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { 91 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, 92 mem_region, mem_phys, mem_size); 93 } else { 94 char newname[strlen("qcom/") + strlen(fwname) + 1]; 95 96 sprintf(newname, "qcom/%s", fwname); 97 98 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, 99 mem_region, mem_phys, mem_size); 100 } 101 if (ret) 102 goto out; 103 104 /* Send the image to the secure world */ 105 ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID); 106 if (ret) 107 DRM_DEV_ERROR(dev, "Unable to authorize the image\n"); 108 109 out: 110 if (mem_region) 111 memunmap(mem_region); 112 113 release_firmware(fw); 114 115 return ret; 116 } 117 118 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 119 { 120 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 121 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 122 uint32_t wptr; 123 unsigned long flags; 124 125 spin_lock_irqsave(&ring->lock, flags); 126 127 /* Copy the shadow to the actual register */ 128 ring->cur = ring->next; 129 130 /* Make sure to wrap wptr if we need to */ 131 wptr = get_wptr(ring); 132 133 spin_unlock_irqrestore(&ring->lock, flags); 134 135 /* Make sure everything is posted before making a decision */ 136 mb(); 137 138 /* Update HW if this is the current ring and we are not in preempt */ 139 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 140 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 141 } 142 143 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 144 struct msm_file_private *ctx) 145 { 146 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 147 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 148 struct msm_drm_private *priv = gpu->dev->dev_private; 149 struct msm_ringbuffer *ring = submit->ring; 150 unsigned int i, ibs = 0; 151 152 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 153 OUT_RING(ring, 0x02); 154 155 /* Turn off protected mode to write to special registers */ 156 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 157 OUT_RING(ring, 0); 158 159 /* Set the save preemption record for the ring/command */ 160 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 161 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 162 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 163 164 /* Turn back on protected mode */ 165 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 166 OUT_RING(ring, 1); 167 168 /* Enable local preemption for finegrain preemption */ 169 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 170 OUT_RING(ring, 0x02); 171 172 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 173 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 174 OUT_RING(ring, 0x02); 175 176 /* Submit the commands */ 177 for (i = 0; i < submit->nr_cmds; i++) { 178 switch (submit->cmd[i].type) { 179 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 180 break; 181 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 182 if (priv->lastctx == ctx) 183 break; 184 case MSM_SUBMIT_CMD_BUF: 185 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 186 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 187 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 188 OUT_RING(ring, submit->cmd[i].size); 189 ibs++; 190 break; 191 } 192 } 193 194 /* 195 * Write the render mode to NULL (0) to indicate to the CP that the IBs 196 * are done rendering - otherwise a lucky preemption would start 197 * replaying from the last checkpoint 198 */ 199 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 200 OUT_RING(ring, 0); 201 OUT_RING(ring, 0); 202 OUT_RING(ring, 0); 203 OUT_RING(ring, 0); 204 OUT_RING(ring, 0); 205 206 /* Turn off IB level preemptions */ 207 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 208 OUT_RING(ring, 0x01); 209 210 /* Write the fence to the scratch register */ 211 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 212 OUT_RING(ring, submit->seqno); 213 214 /* 215 * Execute a CACHE_FLUSH_TS event. This will ensure that the 216 * timestamp is written to the memory and then triggers the interrupt 217 */ 218 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 219 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); 220 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 221 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 222 OUT_RING(ring, submit->seqno); 223 224 /* Yield the floor on command completion */ 225 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 226 /* 227 * If dword[2:1] are non zero, they specify an address for the CP to 228 * write the value of dword[3] to on preemption complete. Write 0 to 229 * skip the write 230 */ 231 OUT_RING(ring, 0x00); 232 OUT_RING(ring, 0x00); 233 /* Data value - not used if the address above is 0 */ 234 OUT_RING(ring, 0x01); 235 /* Set bit 0 to trigger an interrupt on preempt complete */ 236 OUT_RING(ring, 0x01); 237 238 a5xx_flush(gpu, ring); 239 240 /* Check to see if we need to start preemption */ 241 a5xx_preempt_trigger(gpu); 242 } 243 244 static const struct { 245 u32 offset; 246 u32 value; 247 } a5xx_hwcg[] = { 248 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, 249 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, 250 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, 251 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, 252 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, 253 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, 254 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, 255 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, 256 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, 257 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, 258 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, 259 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, 260 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, 261 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, 262 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, 263 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, 264 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, 265 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, 266 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, 267 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, 268 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, 269 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, 270 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, 271 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, 272 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, 273 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, 274 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, 275 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, 276 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, 277 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, 278 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, 279 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, 280 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, 281 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, 282 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, 283 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, 284 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, 285 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, 286 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, 287 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, 288 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, 289 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, 290 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, 291 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, 292 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, 293 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, 294 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, 295 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, 296 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, 297 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, 298 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, 299 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, 300 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, 301 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, 302 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, 303 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, 304 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, 305 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, 306 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, 307 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, 308 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, 309 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, 310 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, 311 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, 312 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, 313 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, 314 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, 315 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, 316 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, 317 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, 318 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, 319 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, 320 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, 321 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, 322 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, 323 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, 324 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, 325 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, 326 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, 327 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, 328 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, 329 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, 330 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, 331 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, 332 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, 333 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, 334 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, 335 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, 336 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, 337 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, 338 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, 339 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} 340 }; 341 342 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) 343 { 344 unsigned int i; 345 346 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) 347 gpu_write(gpu, a5xx_hwcg[i].offset, 348 state ? a5xx_hwcg[i].value : 0); 349 350 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); 351 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); 352 } 353 354 static int a5xx_me_init(struct msm_gpu *gpu) 355 { 356 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 357 struct msm_ringbuffer *ring = gpu->rb[0]; 358 359 OUT_PKT7(ring, CP_ME_INIT, 8); 360 361 OUT_RING(ring, 0x0000002F); 362 363 /* Enable multiple hardware contexts */ 364 OUT_RING(ring, 0x00000003); 365 366 /* Enable error detection */ 367 OUT_RING(ring, 0x20000000); 368 369 /* Don't enable header dump */ 370 OUT_RING(ring, 0x00000000); 371 OUT_RING(ring, 0x00000000); 372 373 /* Specify workarounds for various microcode issues */ 374 if (adreno_is_a530(adreno_gpu)) { 375 /* Workaround for token end syncs 376 * Force a WFI after every direct-render 3D mode draw and every 377 * 2D mode 3 draw 378 */ 379 OUT_RING(ring, 0x0000000B); 380 } else { 381 /* No workarounds enabled */ 382 OUT_RING(ring, 0x00000000); 383 } 384 385 OUT_RING(ring, 0x00000000); 386 OUT_RING(ring, 0x00000000); 387 388 gpu->funcs->flush(gpu, ring); 389 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 390 } 391 392 static int a5xx_preempt_start(struct msm_gpu *gpu) 393 { 394 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 395 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 396 struct msm_ringbuffer *ring = gpu->rb[0]; 397 398 if (gpu->nr_rings == 1) 399 return 0; 400 401 /* Turn off protected mode to write to special registers */ 402 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 403 OUT_RING(ring, 0); 404 405 /* Set the save preemption record for the ring/command */ 406 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 407 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 408 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 409 410 /* Turn back on protected mode */ 411 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 412 OUT_RING(ring, 1); 413 414 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 415 OUT_RING(ring, 0x00); 416 417 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 418 OUT_RING(ring, 0x01); 419 420 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 421 OUT_RING(ring, 0x01); 422 423 /* Yield the floor on command completion */ 424 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 425 OUT_RING(ring, 0x00); 426 OUT_RING(ring, 0x00); 427 OUT_RING(ring, 0x01); 428 OUT_RING(ring, 0x01); 429 430 gpu->funcs->flush(gpu, ring); 431 432 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 433 } 434 435 436 static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, 437 const struct firmware *fw, u64 *iova) 438 { 439 struct drm_gem_object *bo; 440 void *ptr; 441 442 ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, 443 MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); 444 445 if (IS_ERR(ptr)) 446 return ERR_CAST(ptr); 447 448 memcpy(ptr, &fw->data[4], fw->size - 4); 449 450 msm_gem_put_vaddr(bo); 451 return bo; 452 } 453 454 static int a5xx_ucode_init(struct msm_gpu *gpu) 455 { 456 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 457 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 458 int ret; 459 460 if (!a5xx_gpu->pm4_bo) { 461 a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pm4, 462 &a5xx_gpu->pm4_iova); 463 464 if (IS_ERR(a5xx_gpu->pm4_bo)) { 465 ret = PTR_ERR(a5xx_gpu->pm4_bo); 466 a5xx_gpu->pm4_bo = NULL; 467 dev_err(gpu->dev->dev, "could not allocate PM4: %d\n", 468 ret); 469 return ret; 470 } 471 } 472 473 if (!a5xx_gpu->pfp_bo) { 474 a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pfp, 475 &a5xx_gpu->pfp_iova); 476 477 if (IS_ERR(a5xx_gpu->pfp_bo)) { 478 ret = PTR_ERR(a5xx_gpu->pfp_bo); 479 a5xx_gpu->pfp_bo = NULL; 480 dev_err(gpu->dev->dev, "could not allocate PFP: %d\n", 481 ret); 482 return ret; 483 } 484 } 485 486 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, 487 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); 488 489 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, 490 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); 491 492 return 0; 493 } 494 495 #define SCM_GPU_ZAP_SHADER_RESUME 0 496 497 static int a5xx_zap_shader_resume(struct msm_gpu *gpu) 498 { 499 int ret; 500 501 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); 502 if (ret) 503 DRM_ERROR("%s: zap-shader resume failed: %d\n", 504 gpu->name, ret); 505 506 return ret; 507 } 508 509 static int a5xx_zap_shader_init(struct msm_gpu *gpu) 510 { 511 static bool loaded; 512 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 513 struct platform_device *pdev = gpu->pdev; 514 int ret; 515 516 /* 517 * If the zap shader is already loaded into memory we just need to kick 518 * the remote processor to reinitialize it 519 */ 520 if (loaded) 521 return a5xx_zap_shader_resume(gpu); 522 523 /* We need SCM to be able to load the firmware */ 524 if (!qcom_scm_is_available()) { 525 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n"); 526 return -EPROBE_DEFER; 527 } 528 529 /* Each GPU has a target specific zap shader firmware name to use */ 530 if (!adreno_gpu->info->zapfw) { 531 DRM_DEV_ERROR(&pdev->dev, 532 "Zap shader firmware file not specified for this target\n"); 533 return -ENODEV; 534 } 535 536 ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw); 537 538 loaded = !ret; 539 540 return ret; 541 } 542 543 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 544 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 545 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 546 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 547 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 548 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 549 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 550 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 551 A5XX_RBBM_INT_0_MASK_CP_SW | \ 552 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 553 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 554 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 555 556 static int a5xx_hw_init(struct msm_gpu *gpu) 557 { 558 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 559 int ret; 560 561 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 562 563 /* Make all blocks contribute to the GPU BUSY perf counter */ 564 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); 565 566 /* Enable RBBM error reporting bits */ 567 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001); 568 569 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) { 570 /* 571 * Mask out the activity signals from RB1-3 to avoid false 572 * positives 573 */ 574 575 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, 576 0xF0000000); 577 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, 578 0xFFFFFFFF); 579 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, 580 0xFFFFFFFF); 581 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, 582 0xFFFFFFFF); 583 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, 584 0xFFFFFFFF); 585 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, 586 0xFFFFFFFF); 587 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, 588 0xFFFFFFFF); 589 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, 590 0xFFFFFFFF); 591 } 592 593 /* Enable fault detection */ 594 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL, 595 (1 << 30) | 0xFFFF); 596 597 /* Turn on performance counters */ 598 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); 599 600 /* Select CP0 to always count cycles */ 601 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); 602 603 /* Select RBBM0 to countable 6 to get the busy status for devfreq */ 604 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); 605 606 /* Increase VFD cache access so LRZ and other data gets evicted less */ 607 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); 608 609 /* Disable L2 bypass in the UCHE */ 610 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); 611 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); 612 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); 613 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); 614 615 /* Set the GMEM VA range (0 to gpu->gmem) */ 616 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); 617 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); 618 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, 619 0x00100000 + adreno_gpu->gmem - 1); 620 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); 621 622 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); 623 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); 624 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); 625 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); 626 627 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22)); 628 629 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) 630 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); 631 632 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); 633 634 /* Enable USE_RETENTION_FLOPS */ 635 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); 636 637 /* Enable ME/PFP split notification */ 638 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); 639 640 /* Enable HWCG */ 641 a5xx_set_hwcg(gpu, true); 642 643 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); 644 645 /* Set the highest bank bit */ 646 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); 647 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); 648 649 /* Protect registers from the CP */ 650 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); 651 652 /* RBBM */ 653 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4)); 654 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8)); 655 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16)); 656 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32)); 657 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64)); 658 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64)); 659 660 /* Content protect */ 661 gpu_write(gpu, REG_A5XX_CP_PROTECT(6), 662 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 663 16)); 664 gpu_write(gpu, REG_A5XX_CP_PROTECT(7), 665 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2)); 666 667 /* CP */ 668 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64)); 669 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8)); 670 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32)); 671 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1)); 672 673 /* RB */ 674 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1)); 675 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2)); 676 677 /* VPC */ 678 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); 679 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); 680 681 /* UCHE */ 682 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); 683 684 if (adreno_is_a530(adreno_gpu)) 685 gpu_write(gpu, REG_A5XX_CP_PROTECT(17), 686 ADRENO_PROTECT_RW(0x10000, 0x8000)); 687 688 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); 689 /* 690 * Disable the trusted memory range - we don't actually supported secure 691 * memory rendering at this point in time and we don't want to block off 692 * part of the virtual memory space. 693 */ 694 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 695 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); 696 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 697 698 ret = adreno_hw_init(gpu); 699 if (ret) 700 return ret; 701 702 a5xx_preempt_hw_init(gpu); 703 704 a5xx_gpmu_ucode_init(gpu); 705 706 ret = a5xx_ucode_init(gpu); 707 if (ret) 708 return ret; 709 710 /* Disable the interrupts through the initial bringup stage */ 711 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); 712 713 /* Clear ME_HALT to start the micro engine */ 714 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0); 715 ret = a5xx_me_init(gpu); 716 if (ret) 717 return ret; 718 719 ret = a5xx_power_init(gpu); 720 if (ret) 721 return ret; 722 723 /* 724 * Send a pipeline event stat to get misbehaving counters to start 725 * ticking correctly 726 */ 727 if (adreno_is_a530(adreno_gpu)) { 728 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); 729 OUT_RING(gpu->rb[0], 0x0F); 730 731 gpu->funcs->flush(gpu, gpu->rb[0]); 732 if (!a5xx_idle(gpu, gpu->rb[0])) 733 return -EINVAL; 734 } 735 736 /* 737 * Try to load a zap shader into the secure world. If successful 738 * we can use the CP to switch out of secure mode. If not then we 739 * have no resource but to try to switch ourselves out manually. If we 740 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 741 * be blocked and a permissions violation will soon follow. 742 */ 743 ret = a5xx_zap_shader_init(gpu); 744 if (!ret) { 745 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 746 OUT_RING(gpu->rb[0], 0x00000000); 747 748 gpu->funcs->flush(gpu, gpu->rb[0]); 749 if (!a5xx_idle(gpu, gpu->rb[0])) 750 return -EINVAL; 751 } else { 752 /* Print a warning so if we die, we know why */ 753 dev_warn_once(gpu->dev->dev, 754 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 755 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 756 } 757 758 /* Last step - yield the ringbuffer */ 759 a5xx_preempt_start(gpu); 760 761 return 0; 762 } 763 764 static void a5xx_recover(struct msm_gpu *gpu) 765 { 766 int i; 767 768 adreno_dump_info(gpu); 769 770 for (i = 0; i < 8; i++) { 771 printk("CP_SCRATCH_REG%d: %u\n", i, 772 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i))); 773 } 774 775 if (hang_debug) 776 a5xx_dump(gpu); 777 778 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); 779 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD); 780 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); 781 adreno_recover(gpu); 782 } 783 784 static void a5xx_destroy(struct msm_gpu *gpu) 785 { 786 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 787 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 788 789 DBG("%s", gpu->name); 790 791 a5xx_preempt_fini(gpu); 792 793 if (a5xx_gpu->pm4_bo) { 794 if (a5xx_gpu->pm4_iova) 795 msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); 796 drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo); 797 } 798 799 if (a5xx_gpu->pfp_bo) { 800 if (a5xx_gpu->pfp_iova) 801 msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace); 802 drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo); 803 } 804 805 if (a5xx_gpu->gpmu_bo) { 806 if (a5xx_gpu->gpmu_iova) 807 msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace); 808 drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); 809 } 810 811 adreno_gpu_cleanup(adreno_gpu); 812 kfree(a5xx_gpu); 813 } 814 815 static inline bool _a5xx_check_idle(struct msm_gpu *gpu) 816 { 817 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY) 818 return false; 819 820 /* 821 * Nearly every abnormality ends up pausing the GPU and triggering a 822 * fault so we can safely just watch for this one interrupt to fire 823 */ 824 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) & 825 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); 826 } 827 828 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 829 { 830 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 831 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 832 833 if (ring != a5xx_gpu->cur_ring) { 834 WARN(1, "Tried to idle a non-current ringbuffer\n"); 835 return false; 836 } 837 838 /* wait for CP to drain ringbuffer: */ 839 if (!adreno_idle(gpu, ring)) 840 return false; 841 842 if (spin_until(_a5xx_check_idle(gpu))) { 843 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 844 gpu->name, __builtin_return_address(0), 845 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 846 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), 847 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 848 gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); 849 return false; 850 } 851 852 return true; 853 } 854 855 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags) 856 { 857 struct msm_gpu *gpu = arg; 858 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", 859 iova, flags, 860 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)), 861 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)), 862 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)), 863 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7))); 864 865 return -EFAULT; 866 } 867 868 static void a5xx_cp_err_irq(struct msm_gpu *gpu) 869 { 870 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS); 871 872 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) { 873 u32 val; 874 875 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0); 876 877 /* 878 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so 879 * read it twice 880 */ 881 882 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 883 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 884 885 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n", 886 val); 887 } 888 889 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR) 890 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n", 891 gpu_read(gpu, REG_A5XX_CP_HW_FAULT)); 892 893 if (status & A5XX_CP_INT_CP_DMA_ERROR) 894 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n"); 895 896 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 897 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS); 898 899 dev_err_ratelimited(gpu->dev->dev, 900 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 901 val & (1 << 24) ? "WRITE" : "READ", 902 (val & 0xFFFFF) >> 2, val); 903 } 904 905 if (status & A5XX_CP_INT_CP_AHB_ERROR) { 906 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT); 907 const char *access[16] = { "reserved", "reserved", 908 "timestamp lo", "timestamp hi", "pfp read", "pfp write", 909 "", "", "me read", "me write", "", "", "crashdump read", 910 "crashdump write" }; 911 912 dev_err_ratelimited(gpu->dev->dev, 913 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n", 914 status & 0xFFFFF, access[(status >> 24) & 0xF], 915 (status & (1 << 31)), status); 916 } 917 } 918 919 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) 920 { 921 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) { 922 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS); 923 924 dev_err_ratelimited(gpu->dev->dev, 925 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n", 926 val & (1 << 28) ? "WRITE" : "READ", 927 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3, 928 (val >> 24) & 0xF); 929 930 /* Clear the error */ 931 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4)); 932 933 /* Clear the interrupt */ 934 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 935 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 936 } 937 938 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT) 939 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n"); 940 941 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT) 942 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n", 943 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS)); 944 945 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT) 946 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n", 947 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS)); 948 949 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT) 950 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n", 951 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS)); 952 953 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 954 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n"); 955 956 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 957 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n"); 958 } 959 960 static void a5xx_uche_err_irq(struct msm_gpu *gpu) 961 { 962 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); 963 964 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); 965 966 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", 967 addr); 968 } 969 970 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu) 971 { 972 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n"); 973 } 974 975 static void a5xx_fault_detect_irq(struct msm_gpu *gpu) 976 { 977 struct drm_device *dev = gpu->dev; 978 struct msm_drm_private *priv = dev->dev_private; 979 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 980 981 dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 982 ring ? ring->id : -1, ring ? ring->seqno : 0, 983 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 984 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 985 gpu_read(gpu, REG_A5XX_CP_RB_WPTR), 986 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), 987 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), 988 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), 989 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); 990 991 /* Turn off the hangcheck timer to keep it from bothering us */ 992 del_timer(&gpu->hangcheck_timer); 993 994 queue_work(priv->wq, &gpu->recover_work); 995 } 996 997 #define RBBM_ERROR_MASK \ 998 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 999 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 1000 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 1001 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 1002 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 1003 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 1004 1005 static irqreturn_t a5xx_irq(struct msm_gpu *gpu) 1006 { 1007 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); 1008 1009 /* 1010 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it 1011 * before the source is cleared the interrupt will storm. 1012 */ 1013 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1014 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1015 1016 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ 1017 if (status & RBBM_ERROR_MASK) 1018 a5xx_rbbm_err_irq(gpu, status); 1019 1020 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1021 a5xx_cp_err_irq(gpu); 1022 1023 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT) 1024 a5xx_fault_detect_irq(gpu); 1025 1026 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1027 a5xx_uche_err_irq(gpu); 1028 1029 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1030 a5xx_gpmu_err_irq(gpu); 1031 1032 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1033 a5xx_preempt_trigger(gpu); 1034 msm_gpu_retire(gpu); 1035 } 1036 1037 if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 1038 a5xx_preempt_irq(gpu); 1039 1040 return IRQ_HANDLED; 1041 } 1042 1043 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { 1044 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE), 1045 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI), 1046 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR), 1047 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, 1048 REG_A5XX_CP_RB_RPTR_ADDR_HI), 1049 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR), 1050 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR), 1051 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL), 1052 }; 1053 1054 static const u32 a5xx_registers[] = { 1055 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 1056 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 1057 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, 1058 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, 1059 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, 1060 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 1061 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 1062 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, 1063 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 1064 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 1065 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, 1066 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, 1067 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 1068 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 1069 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 1070 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 1071 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, 1072 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, 1073 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 1074 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, 1075 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 1076 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 1077 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 1078 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 1079 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 1080 0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F, 1081 0xB9A0, 0xB9BF, ~0 1082 }; 1083 1084 static void a5xx_dump(struct msm_gpu *gpu) 1085 { 1086 dev_info(gpu->dev->dev, "status: %08x\n", 1087 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1088 adreno_dump(gpu); 1089 } 1090 1091 static int a5xx_pm_resume(struct msm_gpu *gpu) 1092 { 1093 int ret; 1094 1095 /* Turn on the core power */ 1096 ret = msm_gpu_pm_resume(gpu); 1097 if (ret) 1098 return ret; 1099 1100 /* Turn the RBCCU domain first to limit the chances of voltage droop */ 1101 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); 1102 1103 /* Wait 3 usecs before polling */ 1104 udelay(3); 1105 1106 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1107 (1 << 20), (1 << 20)); 1108 if (ret) { 1109 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n", 1110 gpu->name, 1111 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS)); 1112 return ret; 1113 } 1114 1115 /* Turn on the SP domain */ 1116 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000); 1117 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS, 1118 (1 << 20), (1 << 20)); 1119 if (ret) 1120 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n", 1121 gpu->name); 1122 1123 return ret; 1124 } 1125 1126 static int a5xx_pm_suspend(struct msm_gpu *gpu) 1127 { 1128 /* Clear the VBIF pipe before shutting down */ 1129 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF); 1130 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF); 1131 1132 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); 1133 1134 /* 1135 * Reset the VBIF before power collapse to avoid issue with FIFO 1136 * entries 1137 */ 1138 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); 1139 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); 1140 1141 return msm_gpu_pm_suspend(gpu); 1142 } 1143 1144 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1145 { 1146 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO, 1147 REG_A5XX_RBBM_PERFCTR_CP_0_HI); 1148 1149 return 0; 1150 } 1151 1152 #ifdef CONFIG_DEBUG_FS 1153 static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) 1154 { 1155 seq_printf(m, "status: %08x\n", 1156 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1157 1158 /* 1159 * Temporarily disable hardware clock gating before going into 1160 * adreno_show to avoid issues while reading the registers 1161 */ 1162 a5xx_set_hwcg(gpu, false); 1163 adreno_show(gpu, m); 1164 a5xx_set_hwcg(gpu, true); 1165 } 1166 #endif 1167 1168 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1169 { 1170 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1171 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1172 1173 return a5xx_gpu->cur_ring; 1174 } 1175 1176 static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value) 1177 { 1178 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, 1179 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); 1180 1181 return 0; 1182 } 1183 1184 static const struct adreno_gpu_funcs funcs = { 1185 .base = { 1186 .get_param = adreno_get_param, 1187 .hw_init = a5xx_hw_init, 1188 .pm_suspend = a5xx_pm_suspend, 1189 .pm_resume = a5xx_pm_resume, 1190 .recover = a5xx_recover, 1191 .submit = a5xx_submit, 1192 .flush = a5xx_flush, 1193 .active_ring = a5xx_active_ring, 1194 .irq = a5xx_irq, 1195 .destroy = a5xx_destroy, 1196 #ifdef CONFIG_DEBUG_FS 1197 .show = a5xx_show, 1198 #endif 1199 .gpu_busy = a5xx_gpu_busy, 1200 }, 1201 .get_timestamp = a5xx_get_timestamp, 1202 }; 1203 1204 static void check_speed_bin(struct device *dev) 1205 { 1206 struct nvmem_cell *cell; 1207 u32 bin, val; 1208 1209 cell = nvmem_cell_get(dev, "speed_bin"); 1210 1211 /* If a nvmem cell isn't defined, nothing to do */ 1212 if (IS_ERR(cell)) 1213 return; 1214 1215 bin = *((u32 *) nvmem_cell_read(cell, NULL)); 1216 nvmem_cell_put(cell); 1217 1218 val = (1 << bin); 1219 1220 dev_pm_opp_set_supported_hw(dev, &val, 1); 1221 } 1222 1223 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) 1224 { 1225 struct msm_drm_private *priv = dev->dev_private; 1226 struct platform_device *pdev = priv->gpu_pdev; 1227 struct a5xx_gpu *a5xx_gpu = NULL; 1228 struct adreno_gpu *adreno_gpu; 1229 struct msm_gpu *gpu; 1230 int ret; 1231 1232 if (!pdev) { 1233 dev_err(dev->dev, "No A5XX device is defined\n"); 1234 return ERR_PTR(-ENXIO); 1235 } 1236 1237 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); 1238 if (!a5xx_gpu) 1239 return ERR_PTR(-ENOMEM); 1240 1241 adreno_gpu = &a5xx_gpu->base; 1242 gpu = &adreno_gpu->base; 1243 1244 adreno_gpu->registers = a5xx_registers; 1245 adreno_gpu->reg_offsets = a5xx_register_offsets; 1246 1247 a5xx_gpu->lm_leakage = 0x4E001A; 1248 1249 check_speed_bin(&pdev->dev); 1250 1251 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); 1252 if (ret) { 1253 a5xx_destroy(&(a5xx_gpu->base.base)); 1254 return ERR_PTR(ret); 1255 } 1256 1257 if (gpu->aspace) 1258 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1259 1260 /* Set up the preemption specific bits and pieces for each ringbuffer */ 1261 a5xx_preempt_init(gpu); 1262 1263 return gpu; 1264 } 1265