1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved. 3 */ 4 #include "a4xx_gpu.h" 5 6 #define A4XX_INT0_MASK \ 7 (A4XX_INT0_RBBM_AHB_ERROR | \ 8 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \ 9 A4XX_INT0_CP_T0_PACKET_IN_IB | \ 10 A4XX_INT0_CP_OPCODE_ERROR | \ 11 A4XX_INT0_CP_RESERVED_BIT_ERROR | \ 12 A4XX_INT0_CP_HW_FAULT | \ 13 A4XX_INT0_CP_IB1_INT | \ 14 A4XX_INT0_CP_IB2_INT | \ 15 A4XX_INT0_CP_RB_INT | \ 16 A4XX_INT0_CP_REG_PROTECT_FAULT | \ 17 A4XX_INT0_CP_AHB_ERROR_HALT | \ 18 A4XX_INT0_CACHE_FLUSH_TS | \ 19 A4XX_INT0_UCHE_OOB_ACCESS) 20 21 extern bool hang_debug; 22 static void a4xx_dump(struct msm_gpu *gpu); 23 static bool a4xx_idle(struct msm_gpu *gpu); 24 25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 26 { 27 struct msm_drm_private *priv = gpu->dev->dev_private; 28 struct msm_ringbuffer *ring = submit->ring; 29 unsigned int i; 30 31 for (i = 0; i < submit->nr_cmds; i++) { 32 switch (submit->cmd[i].type) { 33 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 34 /* ignore IB-targets */ 35 break; 36 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 37 /* ignore if there has not been a ctx switch: */ 38 if (priv->lastctx == submit->queue->ctx) 39 break; 40 fallthrough; 41 case MSM_SUBMIT_CMD_BUF: 42 OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2); 43 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 44 OUT_RING(ring, submit->cmd[i].size); 45 OUT_PKT2(ring); 46 break; 47 } 48 } 49 50 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); 51 OUT_RING(ring, submit->seqno); 52 53 /* Flush HLSQ lazy updates to make sure there is nothing 54 * pending for indirect loads after the timestamp has 55 * passed: 56 */ 57 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 58 OUT_RING(ring, HLSQ_FLUSH); 59 60 /* wait for idle before cache flush/interrupt */ 61 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); 62 OUT_RING(ring, 0x00000000); 63 64 /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ 65 OUT_PKT3(ring, CP_EVENT_WRITE, 3); 66 OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); 67 OUT_RING(ring, rbmemptr(ring, fence)); 68 OUT_RING(ring, submit->seqno); 69 70 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); 71 } 72 73 /* 74 * a4xx_enable_hwcg() - Program the clock control registers 75 * @device: The adreno device pointer 76 */ 77 static void a4xx_enable_hwcg(struct msm_gpu *gpu) 78 { 79 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 80 unsigned int i; 81 for (i = 0; i < 4; i++) 82 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202); 83 for (i = 0; i < 4; i++) 84 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222); 85 for (i = 0; i < 4; i++) 86 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7); 87 for (i = 0; i < 4; i++) 88 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111); 89 for (i = 0; i < 4; i++) 90 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222); 91 for (i = 0; i < 4; i++) 92 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222); 93 for (i = 0; i < 4; i++) 94 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104); 95 for (i = 0; i < 4; i++) 96 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081); 97 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222); 98 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222); 99 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000); 100 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000); 101 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444); 102 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112); 103 for (i = 0; i < 4; i++) 104 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222); 105 106 /* Disable L1 clocking in A420 due to CCU issues with it */ 107 for (i = 0; i < 4; i++) { 108 if (adreno_is_a420(adreno_gpu)) { 109 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i), 110 0x00002020); 111 } else { 112 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i), 113 0x00022020); 114 } 115 } 116 117 /* No CCU for A405 */ 118 if (!adreno_is_a405(adreno_gpu)) { 119 for (i = 0; i < 4; i++) { 120 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i), 121 0x00000922); 122 } 123 124 for (i = 0; i < 4; i++) { 125 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i), 126 0x00000000); 127 } 128 129 for (i = 0; i < 4; i++) { 130 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i), 131 0x00000001); 132 } 133 } 134 135 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222); 136 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104); 137 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222); 138 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022); 139 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F); 140 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022); 141 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222); 142 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104); 143 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); 144 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); 145 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000); 146 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000); 147 /* Early A430's have a timing issue with SP/TP power collapse; 148 disabling HW clock gating prevents it. */ 149 if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2) 150 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0); 151 else 152 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); 153 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0); 154 } 155 156 157 static bool a4xx_me_init(struct msm_gpu *gpu) 158 { 159 struct msm_ringbuffer *ring = gpu->rb[0]; 160 161 OUT_PKT3(ring, CP_ME_INIT, 17); 162 OUT_RING(ring, 0x000003f7); 163 OUT_RING(ring, 0x00000000); 164 OUT_RING(ring, 0x00000000); 165 OUT_RING(ring, 0x00000000); 166 OUT_RING(ring, 0x00000080); 167 OUT_RING(ring, 0x00000100); 168 OUT_RING(ring, 0x00000180); 169 OUT_RING(ring, 0x00006600); 170 OUT_RING(ring, 0x00000150); 171 OUT_RING(ring, 0x0000014e); 172 OUT_RING(ring, 0x00000154); 173 OUT_RING(ring, 0x00000001); 174 OUT_RING(ring, 0x00000000); 175 OUT_RING(ring, 0x00000000); 176 OUT_RING(ring, 0x00000000); 177 OUT_RING(ring, 0x00000000); 178 OUT_RING(ring, 0x00000000); 179 180 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); 181 return a4xx_idle(gpu); 182 } 183 184 static int a4xx_hw_init(struct msm_gpu *gpu) 185 { 186 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 187 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu); 188 uint32_t *ptr, len; 189 int i, ret; 190 191 if (adreno_is_a405(adreno_gpu)) { 192 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 193 } else if (adreno_is_a420(adreno_gpu)) { 194 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); 195 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); 196 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); 197 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); 198 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); 199 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); 200 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); 201 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 202 } else if (adreno_is_a430(adreno_gpu)) { 203 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); 204 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); 205 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); 206 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); 207 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); 208 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); 209 } else { 210 BUG(); 211 } 212 213 /* Make all blocks contribute to the GPU BUSY perf counter */ 214 gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff); 215 216 /* Tune the hystersis counters for SP and CP idle detection */ 217 gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10); 218 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); 219 220 if (adreno_is_a430(adreno_gpu)) { 221 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30); 222 } 223 224 /* Enable the RBBM error reporting bits */ 225 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001); 226 227 /* Enable AHB error reporting*/ 228 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff); 229 230 /* Enable power counters*/ 231 gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030); 232 233 /* 234 * Turn on hang detection - this spews a lot of useful information 235 * into the RBBM registers on a hang: 236 */ 237 gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL, 238 (1 << 30) | 0xFFFF); 239 240 gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, 241 (unsigned int)(a4xx_gpu->ocmem.base >> 14)); 242 243 /* Turn on performance counters: */ 244 gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); 245 246 /* use the first CP counter for timestamp queries.. userspace may set 247 * this as well but it selects the same counter/countable: 248 */ 249 gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT); 250 251 if (adreno_is_a430(adreno_gpu)) 252 gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); 253 254 /* Disable L2 bypass to avoid UCHE out of bounds errors */ 255 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); 256 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); 257 258 gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | 259 (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); 260 261 /* On A430 enable SP regfile sleep for power savings */ 262 /* TODO downstream does this for !420, so maybe applies for 405 too? */ 263 if (!adreno_is_a420(adreno_gpu)) { 264 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0, 265 0x00000441); 266 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1, 267 0x00000441); 268 } 269 270 a4xx_enable_hwcg(gpu); 271 272 /* 273 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2 274 * due to timing issue with HLSQ_TP_CLK_EN 275 */ 276 if (adreno_is_a420(adreno_gpu)) { 277 unsigned int val; 278 val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ); 279 val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK; 280 val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT; 281 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val); 282 } 283 284 /* setup access protection: */ 285 gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007); 286 287 /* RBBM registers */ 288 gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010); 289 gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020); 290 gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040); 291 gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080); 292 gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100); 293 gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200); 294 295 /* CP registers */ 296 gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800); 297 gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600); 298 299 300 /* RB registers */ 301 gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300); 302 303 /* HLSQ registers */ 304 gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800); 305 306 /* VPC registers */ 307 gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980); 308 309 /* SMMU registers */ 310 gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000); 311 312 gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK); 313 314 ret = adreno_hw_init(gpu); 315 if (ret) 316 return ret; 317 318 /* 319 * Use the default ringbuffer size and block size but disable the RPTR 320 * shadow 321 */ 322 gpu_write(gpu, REG_A4XX_CP_RB_CNTL, 323 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 324 325 /* Set the ringbuffer address */ 326 gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); 327 328 /* Load PM4: */ 329 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data); 330 len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4; 331 DBG("loading PM4 ucode version: %u", ptr[0]); 332 gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0); 333 for (i = 1; i < len; i++) 334 gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]); 335 336 /* Load PFP: */ 337 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data); 338 len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4; 339 DBG("loading PFP ucode version: %u", ptr[0]); 340 341 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0); 342 for (i = 1; i < len; i++) 343 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]); 344 345 /* clear ME_HALT to start micro engine */ 346 gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0); 347 348 return a4xx_me_init(gpu) ? 0 : -EINVAL; 349 } 350 351 static void a4xx_recover(struct msm_gpu *gpu) 352 { 353 int i; 354 355 adreno_dump_info(gpu); 356 357 for (i = 0; i < 8; i++) { 358 printk("CP_SCRATCH_REG%d: %u\n", i, 359 gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i)); 360 } 361 362 /* dump registers before resetting gpu, if enabled: */ 363 if (hang_debug) 364 a4xx_dump(gpu); 365 366 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1); 367 gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD); 368 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0); 369 adreno_recover(gpu); 370 } 371 372 static void a4xx_destroy(struct msm_gpu *gpu) 373 { 374 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 375 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu); 376 377 DBG("%s", gpu->name); 378 379 adreno_gpu_cleanup(adreno_gpu); 380 381 adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem); 382 383 kfree(a4xx_gpu); 384 } 385 386 static bool a4xx_idle(struct msm_gpu *gpu) 387 { 388 /* wait for ringbuffer to drain: */ 389 if (!adreno_idle(gpu, gpu->rb[0])) 390 return false; 391 392 /* then wait for GPU to finish: */ 393 if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) & 394 A4XX_RBBM_STATUS_GPU_BUSY))) { 395 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name); 396 /* TODO maybe we need to reset GPU here to recover from hang? */ 397 return false; 398 } 399 400 return true; 401 } 402 403 static irqreturn_t a4xx_irq(struct msm_gpu *gpu) 404 { 405 uint32_t status; 406 407 status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS); 408 DBG("%s: Int status %08x", gpu->name, status); 409 410 if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) { 411 uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS); 412 printk("CP | Protected mode error| %s | addr=%x\n", 413 reg & (1 << 24) ? "WRITE" : "READ", 414 (reg & 0xFFFFF) >> 2); 415 } 416 417 gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status); 418 419 msm_gpu_retire(gpu); 420 421 return IRQ_HANDLED; 422 } 423 424 static const unsigned int a4xx_registers[] = { 425 /* RBBM */ 426 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026, 427 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066, 428 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF, 429 /* CP */ 430 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B, 431 0x0578, 0x058F, 432 /* VSC */ 433 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51, 434 /* GRAS */ 435 0x0C80, 0x0C81, 0x0C88, 0x0C8F, 436 /* RB */ 437 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2, 438 /* PC */ 439 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23, 440 /* VFD */ 441 0x0E40, 0x0E4A, 442 /* VPC */ 443 0x0E60, 0x0E61, 0x0E63, 0x0E68, 444 /* UCHE */ 445 0x0E80, 0x0E84, 0x0E88, 0x0E95, 446 /* VMIDMT */ 447 0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A, 448 0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024, 449 0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104, 450 0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300, 451 0x1380, 0x1380, 452 /* GRAS CTX 0 */ 453 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E, 454 /* PC CTX 0 */ 455 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7, 456 /* VFD CTX 0 */ 457 0x2200, 0x2204, 0x2208, 0x22A9, 458 /* GRAS CTX 1 */ 459 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E, 460 /* PC CTX 1 */ 461 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7, 462 /* VFD CTX 1 */ 463 0x2600, 0x2604, 0x2608, 0x26A9, 464 /* XPU */ 465 0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20, 466 0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40, 467 0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95, 468 /* VBIF */ 469 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022, 470 0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 471 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 472 0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 473 0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 474 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 475 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 476 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 477 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C, 478 0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416, 479 0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436, 480 0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480, 481 0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004, 482 0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016, 483 0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200, 484 0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802, 485 0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816, 486 0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF, 487 0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925, 488 0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E, 489 0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00, 490 0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10, 491 0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60, 492 0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3, 493 0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B, 494 0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0, 495 0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6, 496 0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416, 497 0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780, 498 0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4, 499 0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F, 500 0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C, 501 0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9, 502 0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE, 503 ~0 /* sentinel */ 504 }; 505 506 static const unsigned int a405_registers[] = { 507 /* RBBM */ 508 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026, 509 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066, 510 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF, 511 /* CP */ 512 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B, 513 0x0578, 0x058F, 514 /* VSC */ 515 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51, 516 /* GRAS */ 517 0x0C80, 0x0C81, 0x0C88, 0x0C8F, 518 /* RB */ 519 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2, 520 /* PC */ 521 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23, 522 /* VFD */ 523 0x0E40, 0x0E4A, 524 /* VPC */ 525 0x0E60, 0x0E61, 0x0E63, 0x0E68, 526 /* UCHE */ 527 0x0E80, 0x0E84, 0x0E88, 0x0E95, 528 /* GRAS CTX 0 */ 529 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E, 530 /* PC CTX 0 */ 531 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7, 532 /* VFD CTX 0 */ 533 0x2200, 0x2204, 0x2208, 0x22A9, 534 /* GRAS CTX 1 */ 535 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E, 536 /* PC CTX 1 */ 537 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7, 538 /* VFD CTX 1 */ 539 0x2600, 0x2604, 0x2608, 0x26A9, 540 /* VBIF version 0x20050000*/ 541 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036, 542 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049, 543 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D, 544 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098, 545 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0, 546 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108, 547 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125, 548 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410, 549 ~0 /* sentinel */ 550 }; 551 552 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu) 553 { 554 struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL); 555 556 if (!state) 557 return ERR_PTR(-ENOMEM); 558 559 adreno_gpu_state_get(gpu, state); 560 561 state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS); 562 563 return state; 564 } 565 566 static void a4xx_dump(struct msm_gpu *gpu) 567 { 568 printk("status: %08x\n", 569 gpu_read(gpu, REG_A4XX_RBBM_STATUS)); 570 adreno_dump(gpu); 571 } 572 573 static int a4xx_pm_resume(struct msm_gpu *gpu) { 574 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 575 int ret; 576 577 ret = msm_gpu_pm_resume(gpu); 578 if (ret) 579 return ret; 580 581 if (adreno_is_a430(adreno_gpu)) { 582 unsigned int reg; 583 /* Set the default register values; set SW_COLLAPSE to 0 */ 584 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000); 585 do { 586 udelay(5); 587 reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS); 588 } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON)); 589 } 590 return 0; 591 } 592 593 static int a4xx_pm_suspend(struct msm_gpu *gpu) { 594 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 595 int ret; 596 597 ret = msm_gpu_pm_suspend(gpu); 598 if (ret) 599 return ret; 600 601 if (adreno_is_a430(adreno_gpu)) { 602 /* Set the default register values; set SW_COLLAPSE to 1 */ 603 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001); 604 } 605 return 0; 606 } 607 608 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 609 { 610 *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO, 611 REG_A4XX_RBBM_PERFCTR_CP_0_HI); 612 613 return 0; 614 } 615 616 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 617 { 618 ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR); 619 return ring->memptrs->rptr; 620 } 621 622 static const struct adreno_gpu_funcs funcs = { 623 .base = { 624 .get_param = adreno_get_param, 625 .hw_init = a4xx_hw_init, 626 .pm_suspend = a4xx_pm_suspend, 627 .pm_resume = a4xx_pm_resume, 628 .recover = a4xx_recover, 629 .submit = a4xx_submit, 630 .active_ring = adreno_active_ring, 631 .irq = a4xx_irq, 632 .destroy = a4xx_destroy, 633 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 634 .show = adreno_show, 635 #endif 636 .gpu_state_get = a4xx_gpu_state_get, 637 .gpu_state_put = adreno_gpu_state_put, 638 .create_address_space = adreno_iommu_create_address_space, 639 .get_rptr = a4xx_get_rptr, 640 }, 641 .get_timestamp = a4xx_get_timestamp, 642 }; 643 644 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) 645 { 646 struct a4xx_gpu *a4xx_gpu = NULL; 647 struct adreno_gpu *adreno_gpu; 648 struct msm_gpu *gpu; 649 struct msm_drm_private *priv = dev->dev_private; 650 struct platform_device *pdev = priv->gpu_pdev; 651 struct icc_path *ocmem_icc_path; 652 struct icc_path *icc_path; 653 int ret; 654 655 if (!pdev) { 656 DRM_DEV_ERROR(dev->dev, "no a4xx device\n"); 657 ret = -ENXIO; 658 goto fail; 659 } 660 661 a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL); 662 if (!a4xx_gpu) { 663 ret = -ENOMEM; 664 goto fail; 665 } 666 667 adreno_gpu = &a4xx_gpu->base; 668 gpu = &adreno_gpu->base; 669 670 gpu->perfcntrs = NULL; 671 gpu->num_perfcntrs = 0; 672 673 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); 674 if (ret) 675 goto fail; 676 677 adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers : 678 a4xx_registers; 679 680 /* if needed, allocate gmem: */ 681 ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, 682 &a4xx_gpu->ocmem); 683 if (ret) 684 goto fail; 685 686 if (!gpu->aspace) { 687 /* TODO we think it is possible to configure the GPU to 688 * restrict access to VRAM carveout. But the required 689 * registers are unknown. For now just bail out and 690 * limp along with just modesetting. If it turns out 691 * to not be possible to restrict access, then we must 692 * implement a cmdstream validator. 693 */ 694 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); 695 ret = -ENXIO; 696 goto fail; 697 } 698 699 icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); 700 ret = IS_ERR(icc_path); 701 if (ret) 702 goto fail; 703 704 ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem"); 705 ret = IS_ERR(ocmem_icc_path); 706 if (ret) { 707 /* allow -ENODATA, ocmem icc is optional */ 708 if (ret != -ENODATA) 709 goto fail; 710 ocmem_icc_path = NULL; 711 } 712 713 /* 714 * Set the ICC path to maximum speed for now by multiplying the fastest 715 * frequency by the bus width (8). We'll want to scale this later on to 716 * improve battery life. 717 */ 718 icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 719 icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 720 721 return gpu; 722 723 fail: 724 if (a4xx_gpu) 725 a4xx_destroy(&a4xx_gpu->base.base); 726 727 return ERR_PTR(ret); 728 } 729