1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License version 2 and 5 * only version 2 as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 */ 13 14 #include <linux/types.h> 15 #include <linux/cpumask.h> 16 #include <linux/qcom_scm.h> 17 #include <linux/dma-mapping.h> 18 #include <linux/of_address.h> 19 #include <linux/soc/qcom/mdt_loader.h> 20 #include "msm_gem.h" 21 #include "msm_mmu.h" 22 #include "a5xx_gpu.h" 23 24 extern bool hang_debug; 25 static void a5xx_dump(struct msm_gpu *gpu); 26 27 #define GPU_PAS_ID 13 28 29 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) 30 { 31 struct device *dev = &gpu->pdev->dev; 32 const struct firmware *fw; 33 struct device_node *np; 34 struct resource r; 35 phys_addr_t mem_phys; 36 ssize_t mem_size; 37 void *mem_region = NULL; 38 int ret; 39 40 if (!IS_ENABLED(CONFIG_ARCH_QCOM)) 41 return -EINVAL; 42 43 np = of_get_child_by_name(dev->of_node, "zap-shader"); 44 if (!np) 45 return -ENODEV; 46 47 np = of_parse_phandle(np, "memory-region", 0); 48 if (!np) 49 return -EINVAL; 50 51 ret = of_address_to_resource(np, 0, &r); 52 if (ret) 53 return ret; 54 55 mem_phys = r.start; 56 mem_size = resource_size(&r); 57 58 /* Request the MDT file for the firmware */ 59 fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); 60 if (IS_ERR(fw)) { 61 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); 62 return PTR_ERR(fw); 63 } 64 65 /* Figure out how much memory we need */ 66 mem_size = qcom_mdt_get_size(fw); 67 if (mem_size < 0) { 68 ret = mem_size; 69 goto out; 70 } 71 72 /* Allocate memory for the firmware image */ 73 mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC); 74 if (!mem_region) { 75 ret = -ENOMEM; 76 goto out; 77 } 78 79 /* 80 * Load the rest of the MDT 81 * 82 * Note that we could be dealing with two different paths, since 83 * with upstream linux-firmware it would be in a qcom/ subdir.. 84 * adreno_request_fw() handles this, but qcom_mdt_load() does 85 * not. But since we've already gotten thru adreno_request_fw() 86 * we know which of the two cases it is: 87 */ 88 if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { 89 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, 90 mem_region, mem_phys, mem_size); 91 } else { 92 char newname[strlen("qcom/") + strlen(fwname) + 1]; 93 94 sprintf(newname, "qcom/%s", fwname); 95 96 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, 97 mem_region, mem_phys, mem_size); 98 } 99 if (ret) 100 goto out; 101 102 /* Send the image to the secure world */ 103 ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID); 104 if (ret) 105 DRM_DEV_ERROR(dev, "Unable to authorize the image\n"); 106 107 out: 108 if (mem_region) 109 memunmap(mem_region); 110 111 release_firmware(fw); 112 113 return ret; 114 } 115 116 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 117 { 118 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 119 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 120 uint32_t wptr; 121 unsigned long flags; 122 123 spin_lock_irqsave(&ring->lock, flags); 124 125 /* Copy the shadow to the actual register */ 126 ring->cur = ring->next; 127 128 /* Make sure to wrap wptr if we need to */ 129 wptr = get_wptr(ring); 130 131 spin_unlock_irqrestore(&ring->lock, flags); 132 133 /* Make sure everything is posted before making a decision */ 134 mb(); 135 136 /* Update HW if this is the current ring and we are not in preempt */ 137 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) 138 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); 139 } 140 141 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 142 struct msm_file_private *ctx) 143 { 144 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 145 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 146 struct msm_drm_private *priv = gpu->dev->dev_private; 147 struct msm_ringbuffer *ring = submit->ring; 148 unsigned int i, ibs = 0; 149 150 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 151 OUT_RING(ring, 0x02); 152 153 /* Turn off protected mode to write to special registers */ 154 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 155 OUT_RING(ring, 0); 156 157 /* Set the save preemption record for the ring/command */ 158 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 159 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 160 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); 161 162 /* Turn back on protected mode */ 163 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 164 OUT_RING(ring, 1); 165 166 /* Enable local preemption for finegrain preemption */ 167 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 168 OUT_RING(ring, 0x02); 169 170 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ 171 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 172 OUT_RING(ring, 0x02); 173 174 /* Submit the commands */ 175 for (i = 0; i < submit->nr_cmds; i++) { 176 switch (submit->cmd[i].type) { 177 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 178 break; 179 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 180 if (priv->lastctx == ctx) 181 break; 182 case MSM_SUBMIT_CMD_BUF: 183 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 184 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 185 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 186 OUT_RING(ring, submit->cmd[i].size); 187 ibs++; 188 break; 189 } 190 } 191 192 /* 193 * Write the render mode to NULL (0) to indicate to the CP that the IBs 194 * are done rendering - otherwise a lucky preemption would start 195 * replaying from the last checkpoint 196 */ 197 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); 198 OUT_RING(ring, 0); 199 OUT_RING(ring, 0); 200 OUT_RING(ring, 0); 201 OUT_RING(ring, 0); 202 OUT_RING(ring, 0); 203 204 /* Turn off IB level preemptions */ 205 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 206 OUT_RING(ring, 0x01); 207 208 /* Write the fence to the scratch register */ 209 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); 210 OUT_RING(ring, submit->seqno); 211 212 /* 213 * Execute a CACHE_FLUSH_TS event. This will ensure that the 214 * timestamp is written to the memory and then triggers the interrupt 215 */ 216 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 217 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); 218 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 219 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 220 OUT_RING(ring, submit->seqno); 221 222 /* Yield the floor on command completion */ 223 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 224 /* 225 * If dword[2:1] are non zero, they specify an address for the CP to 226 * write the value of dword[3] to on preemption complete. Write 0 to 227 * skip the write 228 */ 229 OUT_RING(ring, 0x00); 230 OUT_RING(ring, 0x00); 231 /* Data value - not used if the address above is 0 */ 232 OUT_RING(ring, 0x01); 233 /* Set bit 0 to trigger an interrupt on preempt complete */ 234 OUT_RING(ring, 0x01); 235 236 a5xx_flush(gpu, ring); 237 238 /* Check to see if we need to start preemption */ 239 a5xx_preempt_trigger(gpu); 240 } 241 242 static const struct { 243 u32 offset; 244 u32 value; 245 } a5xx_hwcg[] = { 246 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, 247 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, 248 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, 249 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, 250 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, 251 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, 252 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, 253 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, 254 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, 255 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, 256 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, 257 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, 258 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, 259 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, 260 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, 261 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, 262 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, 263 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, 264 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, 265 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, 266 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, 267 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, 268 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, 269 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, 270 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, 271 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, 272 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, 273 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, 274 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, 275 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, 276 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, 277 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, 278 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, 279 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, 280 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, 281 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, 282 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, 283 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, 284 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, 285 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, 286 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, 287 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, 288 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, 289 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, 290 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, 291 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, 292 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, 293 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, 294 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, 295 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, 296 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, 297 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, 298 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, 299 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, 300 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, 301 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, 302 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, 303 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, 304 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, 305 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, 306 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, 307 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, 308 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, 309 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, 310 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, 311 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, 312 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, 313 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, 314 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, 315 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, 316 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, 317 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555}, 318 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, 319 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, 320 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, 321 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, 322 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, 323 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, 324 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, 325 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, 326 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, 327 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, 328 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, 329 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, 330 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, 331 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, 332 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, 333 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, 334 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, 335 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, 336 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, 337 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} 338 }; 339 340 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) 341 { 342 unsigned int i; 343 344 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) 345 gpu_write(gpu, a5xx_hwcg[i].offset, 346 state ? a5xx_hwcg[i].value : 0); 347 348 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); 349 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); 350 } 351 352 static int a5xx_me_init(struct msm_gpu *gpu) 353 { 354 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 355 struct msm_ringbuffer *ring = gpu->rb[0]; 356 357 OUT_PKT7(ring, CP_ME_INIT, 8); 358 359 OUT_RING(ring, 0x0000002F); 360 361 /* Enable multiple hardware contexts */ 362 OUT_RING(ring, 0x00000003); 363 364 /* Enable error detection */ 365 OUT_RING(ring, 0x20000000); 366 367 /* Don't enable header dump */ 368 OUT_RING(ring, 0x00000000); 369 OUT_RING(ring, 0x00000000); 370 371 /* Specify workarounds for various microcode issues */ 372 if (adreno_is_a530(adreno_gpu)) { 373 /* Workaround for token end syncs 374 * Force a WFI after every direct-render 3D mode draw and every 375 * 2D mode 3 draw 376 */ 377 OUT_RING(ring, 0x0000000B); 378 } else { 379 /* No workarounds enabled */ 380 OUT_RING(ring, 0x00000000); 381 } 382 383 OUT_RING(ring, 0x00000000); 384 OUT_RING(ring, 0x00000000); 385 386 gpu->funcs->flush(gpu, ring); 387 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 388 } 389 390 static int a5xx_preempt_start(struct msm_gpu *gpu) 391 { 392 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 393 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 394 struct msm_ringbuffer *ring = gpu->rb[0]; 395 396 if (gpu->nr_rings == 1) 397 return 0; 398 399 /* Turn off protected mode to write to special registers */ 400 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 401 OUT_RING(ring, 0); 402 403 /* Set the save preemption record for the ring/command */ 404 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); 405 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); 406 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); 407 408 /* Turn back on protected mode */ 409 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 410 OUT_RING(ring, 1); 411 412 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); 413 OUT_RING(ring, 0x00); 414 415 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); 416 OUT_RING(ring, 0x01); 417 418 OUT_PKT7(ring, CP_YIELD_ENABLE, 1); 419 OUT_RING(ring, 0x01); 420 421 /* Yield the floor on command completion */ 422 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 423 OUT_RING(ring, 0x00); 424 OUT_RING(ring, 0x00); 425 OUT_RING(ring, 0x01); 426 OUT_RING(ring, 0x01); 427 428 gpu->funcs->flush(gpu, ring); 429 430 return a5xx_idle(gpu, ring) ? 0 : -EINVAL; 431 } 432 433 434 static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, 435 const struct firmware *fw, u64 *iova) 436 { 437 struct drm_gem_object *bo; 438 void *ptr; 439 440 ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, 441 MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); 442 443 if (IS_ERR(ptr)) 444 return ERR_CAST(ptr); 445 446 memcpy(ptr, &fw->data[4], fw->size - 4); 447 448 msm_gem_put_vaddr(bo); 449 return bo; 450 } 451 452 static int a5xx_ucode_init(struct msm_gpu *gpu) 453 { 454 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 455 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 456 int ret; 457 458 if (!a5xx_gpu->pm4_bo) { 459 a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pm4, 460 &a5xx_gpu->pm4_iova); 461 462 if (IS_ERR(a5xx_gpu->pm4_bo)) { 463 ret = PTR_ERR(a5xx_gpu->pm4_bo); 464 a5xx_gpu->pm4_bo = NULL; 465 dev_err(gpu->dev->dev, "could not allocate PM4: %d\n", 466 ret); 467 return ret; 468 } 469 } 470 471 if (!a5xx_gpu->pfp_bo) { 472 a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pfp, 473 &a5xx_gpu->pfp_iova); 474 475 if (IS_ERR(a5xx_gpu->pfp_bo)) { 476 ret = PTR_ERR(a5xx_gpu->pfp_bo); 477 a5xx_gpu->pfp_bo = NULL; 478 dev_err(gpu->dev->dev, "could not allocate PFP: %d\n", 479 ret); 480 return ret; 481 } 482 } 483 484 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, 485 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova); 486 487 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, 488 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova); 489 490 return 0; 491 } 492 493 #define SCM_GPU_ZAP_SHADER_RESUME 0 494 495 static int a5xx_zap_shader_resume(struct msm_gpu *gpu) 496 { 497 int ret; 498 499 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); 500 if (ret) 501 DRM_ERROR("%s: zap-shader resume failed: %d\n", 502 gpu->name, ret); 503 504 return ret; 505 } 506 507 static int a5xx_zap_shader_init(struct msm_gpu *gpu) 508 { 509 static bool loaded; 510 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 511 struct platform_device *pdev = gpu->pdev; 512 int ret; 513 514 /* 515 * If the zap shader is already loaded into memory we just need to kick 516 * the remote processor to reinitialize it 517 */ 518 if (loaded) 519 return a5xx_zap_shader_resume(gpu); 520 521 /* We need SCM to be able to load the firmware */ 522 if (!qcom_scm_is_available()) { 523 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n"); 524 return -EPROBE_DEFER; 525 } 526 527 /* Each GPU has a target specific zap shader firmware name to use */ 528 if (!adreno_gpu->info->zapfw) { 529 DRM_DEV_ERROR(&pdev->dev, 530 "Zap shader firmware file not specified for this target\n"); 531 return -ENODEV; 532 } 533 534 ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw); 535 536 loaded = !ret; 537 538 return ret; 539 } 540 541 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 542 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 543 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 544 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 545 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 546 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ 547 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 548 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ 549 A5XX_RBBM_INT_0_MASK_CP_SW | \ 550 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 551 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 552 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 553 554 static int a5xx_hw_init(struct msm_gpu *gpu) 555 { 556 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 557 int ret; 558 559 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 560 561 /* Make all blocks contribute to the GPU BUSY perf counter */ 562 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); 563 564 /* Enable RBBM error reporting bits */ 565 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001); 566 567 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) { 568 /* 569 * Mask out the activity signals from RB1-3 to avoid false 570 * positives 571 */ 572 573 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, 574 0xF0000000); 575 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, 576 0xFFFFFFFF); 577 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, 578 0xFFFFFFFF); 579 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, 580 0xFFFFFFFF); 581 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, 582 0xFFFFFFFF); 583 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, 584 0xFFFFFFFF); 585 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, 586 0xFFFFFFFF); 587 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, 588 0xFFFFFFFF); 589 } 590 591 /* Enable fault detection */ 592 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL, 593 (1 << 30) | 0xFFFF); 594 595 /* Turn on performance counters */ 596 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01); 597 598 /* Increase VFD cache access so LRZ and other data gets evicted less */ 599 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); 600 601 /* Disable L2 bypass in the UCHE */ 602 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); 603 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); 604 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); 605 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); 606 607 /* Set the GMEM VA range (0 to gpu->gmem) */ 608 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); 609 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000); 610 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO, 611 0x00100000 + adreno_gpu->gmem - 1); 612 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000); 613 614 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40); 615 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40); 616 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); 617 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); 618 619 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22)); 620 621 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) 622 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); 623 624 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); 625 626 /* Enable USE_RETENTION_FLOPS */ 627 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); 628 629 /* Enable ME/PFP split notification */ 630 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); 631 632 /* Enable HWCG */ 633 a5xx_set_hwcg(gpu, true); 634 635 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); 636 637 /* Set the highest bank bit */ 638 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7); 639 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1); 640 641 /* Protect registers from the CP */ 642 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007); 643 644 /* RBBM */ 645 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4)); 646 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8)); 647 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16)); 648 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32)); 649 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64)); 650 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64)); 651 652 /* Content protect */ 653 gpu_write(gpu, REG_A5XX_CP_PROTECT(6), 654 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 655 16)); 656 gpu_write(gpu, REG_A5XX_CP_PROTECT(7), 657 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2)); 658 659 /* CP */ 660 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64)); 661 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8)); 662 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32)); 663 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1)); 664 665 /* RB */ 666 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1)); 667 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2)); 668 669 /* VPC */ 670 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8)); 671 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4)); 672 673 /* UCHE */ 674 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16)); 675 676 if (adreno_is_a530(adreno_gpu)) 677 gpu_write(gpu, REG_A5XX_CP_PROTECT(17), 678 ADRENO_PROTECT_RW(0x10000, 0x8000)); 679 680 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); 681 /* 682 * Disable the trusted memory range - we don't actually supported secure 683 * memory rendering at this point in time and we don't want to block off 684 * part of the virtual memory space. 685 */ 686 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 687 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); 688 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 689 690 ret = adreno_hw_init(gpu); 691 if (ret) 692 return ret; 693 694 a5xx_preempt_hw_init(gpu); 695 696 a5xx_gpmu_ucode_init(gpu); 697 698 ret = a5xx_ucode_init(gpu); 699 if (ret) 700 return ret; 701 702 /* Disable the interrupts through the initial bringup stage */ 703 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); 704 705 /* Clear ME_HALT to start the micro engine */ 706 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0); 707 ret = a5xx_me_init(gpu); 708 if (ret) 709 return ret; 710 711 ret = a5xx_power_init(gpu); 712 if (ret) 713 return ret; 714 715 /* 716 * Send a pipeline event stat to get misbehaving counters to start 717 * ticking correctly 718 */ 719 if (adreno_is_a530(adreno_gpu)) { 720 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); 721 OUT_RING(gpu->rb[0], 0x0F); 722 723 gpu->funcs->flush(gpu, gpu->rb[0]); 724 if (!a5xx_idle(gpu, gpu->rb[0])) 725 return -EINVAL; 726 } 727 728 /* 729 * Try to load a zap shader into the secure world. If successful 730 * we can use the CP to switch out of secure mode. If not then we 731 * have no resource but to try to switch ourselves out manually. If we 732 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 733 * be blocked and a permissions violation will soon follow. 734 */ 735 ret = a5xx_zap_shader_init(gpu); 736 if (!ret) { 737 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 738 OUT_RING(gpu->rb[0], 0x00000000); 739 740 gpu->funcs->flush(gpu, gpu->rb[0]); 741 if (!a5xx_idle(gpu, gpu->rb[0])) 742 return -EINVAL; 743 } else { 744 /* Print a warning so if we die, we know why */ 745 dev_warn_once(gpu->dev->dev, 746 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 747 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); 748 } 749 750 /* Last step - yield the ringbuffer */ 751 a5xx_preempt_start(gpu); 752 753 return 0; 754 } 755 756 static void a5xx_recover(struct msm_gpu *gpu) 757 { 758 int i; 759 760 adreno_dump_info(gpu); 761 762 for (i = 0; i < 8; i++) { 763 printk("CP_SCRATCH_REG%d: %u\n", i, 764 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i))); 765 } 766 767 if (hang_debug) 768 a5xx_dump(gpu); 769 770 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); 771 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD); 772 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0); 773 adreno_recover(gpu); 774 } 775 776 static void a5xx_destroy(struct msm_gpu *gpu) 777 { 778 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 779 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 780 781 DBG("%s", gpu->name); 782 783 a5xx_preempt_fini(gpu); 784 785 if (a5xx_gpu->pm4_bo) { 786 if (a5xx_gpu->pm4_iova) 787 msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); 788 drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo); 789 } 790 791 if (a5xx_gpu->pfp_bo) { 792 if (a5xx_gpu->pfp_iova) 793 msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace); 794 drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo); 795 } 796 797 if (a5xx_gpu->gpmu_bo) { 798 if (a5xx_gpu->gpmu_iova) 799 msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace); 800 drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); 801 } 802 803 adreno_gpu_cleanup(adreno_gpu); 804 kfree(a5xx_gpu); 805 } 806 807 static inline bool _a5xx_check_idle(struct msm_gpu *gpu) 808 { 809 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY) 810 return false; 811 812 /* 813 * Nearly every abnormality ends up pausing the GPU and triggering a 814 * fault so we can safely just watch for this one interrupt to fire 815 */ 816 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) & 817 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); 818 } 819 820 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 821 { 822 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 823 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 824 825 if (ring != a5xx_gpu->cur_ring) { 826 WARN(1, "Tried to idle a non-current ringbuffer\n"); 827 return false; 828 } 829 830 /* wait for CP to drain ringbuffer: */ 831 if (!adreno_idle(gpu, ring)) 832 return false; 833 834 if (spin_until(_a5xx_check_idle(gpu))) { 835 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 836 gpu->name, __builtin_return_address(0), 837 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 838 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), 839 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 840 gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); 841 return false; 842 } 843 844 return true; 845 } 846 847 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags) 848 { 849 struct msm_gpu *gpu = arg; 850 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", 851 iova, flags, 852 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)), 853 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)), 854 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)), 855 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7))); 856 857 return -EFAULT; 858 } 859 860 static void a5xx_cp_err_irq(struct msm_gpu *gpu) 861 { 862 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS); 863 864 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) { 865 u32 val; 866 867 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0); 868 869 /* 870 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so 871 * read it twice 872 */ 873 874 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 875 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA); 876 877 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n", 878 val); 879 } 880 881 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR) 882 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n", 883 gpu_read(gpu, REG_A5XX_CP_HW_FAULT)); 884 885 if (status & A5XX_CP_INT_CP_DMA_ERROR) 886 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n"); 887 888 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 889 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS); 890 891 dev_err_ratelimited(gpu->dev->dev, 892 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 893 val & (1 << 24) ? "WRITE" : "READ", 894 (val & 0xFFFFF) >> 2, val); 895 } 896 897 if (status & A5XX_CP_INT_CP_AHB_ERROR) { 898 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT); 899 const char *access[16] = { "reserved", "reserved", 900 "timestamp lo", "timestamp hi", "pfp read", "pfp write", 901 "", "", "me read", "me write", "", "", "crashdump read", 902 "crashdump write" }; 903 904 dev_err_ratelimited(gpu->dev->dev, 905 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n", 906 status & 0xFFFFF, access[(status >> 24) & 0xF], 907 (status & (1 << 31)), status); 908 } 909 } 910 911 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status) 912 { 913 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) { 914 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS); 915 916 dev_err_ratelimited(gpu->dev->dev, 917 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n", 918 val & (1 << 28) ? "WRITE" : "READ", 919 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3, 920 (val >> 24) & 0xF); 921 922 /* Clear the error */ 923 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4)); 924 925 /* Clear the interrupt */ 926 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 927 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 928 } 929 930 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT) 931 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n"); 932 933 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT) 934 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n", 935 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS)); 936 937 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT) 938 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n", 939 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS)); 940 941 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT) 942 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n", 943 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS)); 944 945 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 946 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n"); 947 948 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 949 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n"); 950 } 951 952 static void a5xx_uche_err_irq(struct msm_gpu *gpu) 953 { 954 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI); 955 956 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO); 957 958 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n", 959 addr); 960 } 961 962 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu) 963 { 964 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n"); 965 } 966 967 static void a5xx_fault_detect_irq(struct msm_gpu *gpu) 968 { 969 struct drm_device *dev = gpu->dev; 970 struct msm_drm_private *priv = dev->dev_private; 971 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 972 973 dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 974 ring ? ring->id : -1, ring ? ring->seqno : 0, 975 gpu_read(gpu, REG_A5XX_RBBM_STATUS), 976 gpu_read(gpu, REG_A5XX_CP_RB_RPTR), 977 gpu_read(gpu, REG_A5XX_CP_RB_WPTR), 978 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), 979 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), 980 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), 981 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); 982 983 /* Turn off the hangcheck timer to keep it from bothering us */ 984 del_timer(&gpu->hangcheck_timer); 985 986 queue_work(priv->wq, &gpu->recover_work); 987 } 988 989 #define RBBM_ERROR_MASK \ 990 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \ 991 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \ 992 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \ 993 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \ 994 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \ 995 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW) 996 997 static irqreturn_t a5xx_irq(struct msm_gpu *gpu) 998 { 999 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); 1000 1001 /* 1002 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it 1003 * before the source is cleared the interrupt will storm. 1004 */ 1005 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, 1006 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); 1007 1008 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ 1009 if (status & RBBM_ERROR_MASK) 1010 a5xx_rbbm_err_irq(gpu, status); 1011 1012 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1013 a5xx_cp_err_irq(gpu); 1014 1015 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT) 1016 a5xx_fault_detect_irq(gpu); 1017 1018 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1019 a5xx_uche_err_irq(gpu); 1020 1021 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) 1022 a5xx_gpmu_err_irq(gpu); 1023 1024 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1025 a5xx_preempt_trigger(gpu); 1026 msm_gpu_retire(gpu); 1027 } 1028 1029 if (status & A5XX_RBBM_INT_0_MASK_CP_SW) 1030 a5xx_preempt_irq(gpu); 1031 1032 return IRQ_HANDLED; 1033 } 1034 1035 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { 1036 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE), 1037 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI), 1038 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR), 1039 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, 1040 REG_A5XX_CP_RB_RPTR_ADDR_HI), 1041 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR), 1042 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR), 1043 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL), 1044 }; 1045 1046 static const u32 a5xx_registers[] = { 1047 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 1048 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 1049 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, 1050 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, 1051 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, 1052 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 1053 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 1054 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, 1055 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 1056 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 1057 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, 1058 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, 1059 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 1060 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 1061 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 1062 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 1063 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, 1064 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, 1065 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 1066 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, 1067 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 1068 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 1069 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 1070 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 1071 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 1072 0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F, 1073 0xB9A0, 0xB9BF, ~0 1074 }; 1075 1076 static void a5xx_dump(struct msm_gpu *gpu) 1077 { 1078 dev_info(gpu->dev->dev, "status: %08x\n", 1079 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1080 adreno_dump(gpu); 1081 } 1082 1083 static int a5xx_pm_resume(struct msm_gpu *gpu) 1084 { 1085 int ret; 1086 1087 /* Turn on the core power */ 1088 ret = msm_gpu_pm_resume(gpu); 1089 if (ret) 1090 return ret; 1091 1092 /* Turn the RBCCU domain first to limit the chances of voltage droop */ 1093 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); 1094 1095 /* Wait 3 usecs before polling */ 1096 udelay(3); 1097 1098 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1099 (1 << 20), (1 << 20)); 1100 if (ret) { 1101 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n", 1102 gpu->name, 1103 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS)); 1104 return ret; 1105 } 1106 1107 /* Turn on the SP domain */ 1108 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000); 1109 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS, 1110 (1 << 20), (1 << 20)); 1111 if (ret) 1112 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n", 1113 gpu->name); 1114 1115 return ret; 1116 } 1117 1118 static int a5xx_pm_suspend(struct msm_gpu *gpu) 1119 { 1120 /* Clear the VBIF pipe before shutting down */ 1121 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF); 1122 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF); 1123 1124 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0); 1125 1126 /* 1127 * Reset the VBIF before power collapse to avoid issue with FIFO 1128 * entries 1129 */ 1130 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000); 1131 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000); 1132 1133 return msm_gpu_pm_suspend(gpu); 1134 } 1135 1136 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 1137 { 1138 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO, 1139 REG_A5XX_RBBM_PERFCTR_CP_0_HI); 1140 1141 return 0; 1142 } 1143 1144 #ifdef CONFIG_DEBUG_FS 1145 static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) 1146 { 1147 seq_printf(m, "status: %08x\n", 1148 gpu_read(gpu, REG_A5XX_RBBM_STATUS)); 1149 1150 /* 1151 * Temporarily disable hardware clock gating before going into 1152 * adreno_show to avoid issues while reading the registers 1153 */ 1154 a5xx_set_hwcg(gpu, false); 1155 adreno_show(gpu, m); 1156 a5xx_set_hwcg(gpu, true); 1157 } 1158 #endif 1159 1160 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) 1161 { 1162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1163 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); 1164 1165 return a5xx_gpu->cur_ring; 1166 } 1167 1168 static const struct adreno_gpu_funcs funcs = { 1169 .base = { 1170 .get_param = adreno_get_param, 1171 .hw_init = a5xx_hw_init, 1172 .pm_suspend = a5xx_pm_suspend, 1173 .pm_resume = a5xx_pm_resume, 1174 .recover = a5xx_recover, 1175 .submit = a5xx_submit, 1176 .flush = a5xx_flush, 1177 .active_ring = a5xx_active_ring, 1178 .irq = a5xx_irq, 1179 .destroy = a5xx_destroy, 1180 #ifdef CONFIG_DEBUG_FS 1181 .show = a5xx_show, 1182 #endif 1183 }, 1184 .get_timestamp = a5xx_get_timestamp, 1185 }; 1186 1187 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) 1188 { 1189 struct msm_drm_private *priv = dev->dev_private; 1190 struct platform_device *pdev = priv->gpu_pdev; 1191 struct a5xx_gpu *a5xx_gpu = NULL; 1192 struct adreno_gpu *adreno_gpu; 1193 struct msm_gpu *gpu; 1194 int ret; 1195 1196 if (!pdev) { 1197 dev_err(dev->dev, "No A5XX device is defined\n"); 1198 return ERR_PTR(-ENXIO); 1199 } 1200 1201 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL); 1202 if (!a5xx_gpu) 1203 return ERR_PTR(-ENOMEM); 1204 1205 adreno_gpu = &a5xx_gpu->base; 1206 gpu = &adreno_gpu->base; 1207 1208 adreno_gpu->registers = a5xx_registers; 1209 adreno_gpu->reg_offsets = a5xx_register_offsets; 1210 1211 a5xx_gpu->lm_leakage = 0x4E001A; 1212 1213 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); 1214 if (ret) { 1215 a5xx_destroy(&(a5xx_gpu->base.base)); 1216 return ERR_PTR(ret); 1217 } 1218 1219 if (gpu->aspace) 1220 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); 1221 1222 /* Set up the preemption specific bits and pieces for each ringbuffer */ 1223 a5xx_preempt_init(gpu); 1224 1225 return gpu; 1226 } 1227