1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2013 Red Hat 4 * Author: Rob Clark <robdclark@gmail.com> 5 * 6 * Copyright (c) 2014 The Linux Foundation. All rights reserved. 7 */ 8 9 #include "a3xx_gpu.h" 10 11 #define A3XX_INT0_MASK \ 12 (A3XX_INT0_RBBM_AHB_ERROR | \ 13 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \ 14 A3XX_INT0_CP_T0_PACKET_IN_IB | \ 15 A3XX_INT0_CP_OPCODE_ERROR | \ 16 A3XX_INT0_CP_RESERVED_BIT_ERROR | \ 17 A3XX_INT0_CP_HW_FAULT | \ 18 A3XX_INT0_CP_IB1_INT | \ 19 A3XX_INT0_CP_IB2_INT | \ 20 A3XX_INT0_CP_RB_INT | \ 21 A3XX_INT0_CP_REG_PROTECT_FAULT | \ 22 A3XX_INT0_CP_AHB_ERROR_HALT | \ 23 A3XX_INT0_CACHE_FLUSH_TS | \ 24 A3XX_INT0_UCHE_OOB_ACCESS) 25 26 extern bool hang_debug; 27 28 static void a3xx_dump(struct msm_gpu *gpu); 29 static bool a3xx_idle(struct msm_gpu *gpu); 30 31 static bool a3xx_me_init(struct msm_gpu *gpu) 32 { 33 struct msm_ringbuffer *ring = gpu->rb[0]; 34 35 OUT_PKT3(ring, CP_ME_INIT, 17); 36 OUT_RING(ring, 0x000003f7); 37 OUT_RING(ring, 0x00000000); 38 OUT_RING(ring, 0x00000000); 39 OUT_RING(ring, 0x00000000); 40 OUT_RING(ring, 0x00000080); 41 OUT_RING(ring, 0x00000100); 42 OUT_RING(ring, 0x00000180); 43 OUT_RING(ring, 0x00006600); 44 OUT_RING(ring, 0x00000150); 45 OUT_RING(ring, 0x0000014e); 46 OUT_RING(ring, 0x00000154); 47 OUT_RING(ring, 0x00000001); 48 OUT_RING(ring, 0x00000000); 49 OUT_RING(ring, 0x00000000); 50 OUT_RING(ring, 0x00000000); 51 OUT_RING(ring, 0x00000000); 52 OUT_RING(ring, 0x00000000); 53 54 gpu->funcs->flush(gpu, ring); 55 return a3xx_idle(gpu); 56 } 57 58 static int a3xx_hw_init(struct msm_gpu *gpu) 59 { 60 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 61 struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); 62 uint32_t *ptr, len; 63 int i, ret; 64 65 DBG("%s", gpu->name); 66 67 if (adreno_is_a305(adreno_gpu)) { 68 /* Set up 16 deep read/write request queues: */ 69 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); 70 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); 71 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); 72 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); 73 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 74 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); 75 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); 76 /* Enable WR-REQ: */ 77 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); 78 /* Set up round robin arbitration between both AXI ports: */ 79 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); 80 /* Set up AOOO: */ 81 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); 82 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); 83 } else if (adreno_is_a306(adreno_gpu)) { 84 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003); 85 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a); 86 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a); 87 } else if (adreno_is_a320(adreno_gpu)) { 88 /* Set up 16 deep read/write request queues: */ 89 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); 90 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); 91 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); 92 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); 93 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 94 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); 95 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); 96 /* Enable WR-REQ: */ 97 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); 98 /* Set up round robin arbitration between both AXI ports: */ 99 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); 100 /* Set up AOOO: */ 101 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); 102 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); 103 /* Enable 1K sort: */ 104 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff); 105 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); 106 107 } else if (adreno_is_a330v2(adreno_gpu)) { 108 /* 109 * Most of the VBIF registers on 8974v2 have the correct 110 * values at power on, so we won't modify those if we don't 111 * need to 112 */ 113 /* Enable 1k sort: */ 114 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f); 115 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); 116 /* Enable WR-REQ: */ 117 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); 118 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 119 /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ 120 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003); 121 122 } else if (adreno_is_a330(adreno_gpu)) { 123 /* Set up 16 deep read/write request queues: */ 124 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); 125 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818); 126 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818); 127 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818); 128 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); 129 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); 130 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818); 131 /* Enable WR-REQ: */ 132 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); 133 /* Set up round robin arbitration between both AXI ports: */ 134 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); 135 /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ 136 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001); 137 /* Set up AOOO: */ 138 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f); 139 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f); 140 /* Enable 1K sort: */ 141 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f); 142 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); 143 /* Disable VBIF clock gating. This is to enable AXI running 144 * higher frequency than GPU: 145 */ 146 gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001); 147 148 } else { 149 BUG(); 150 } 151 152 /* Make all blocks contribute to the GPU BUSY perf counter: */ 153 gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff); 154 155 /* Tune the hystersis counters for SP and CP idle detection: */ 156 gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10); 157 gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); 158 159 /* Enable the RBBM error reporting bits. This lets us get 160 * useful information on failure: 161 */ 162 gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001); 163 164 /* Enable AHB error reporting: */ 165 gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff); 166 167 /* Turn on the power counters: */ 168 gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000); 169 170 /* Turn on hang detection - this spews a lot of useful information 171 * into the RBBM registers on a hang: 172 */ 173 gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff); 174 175 /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */ 176 gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); 177 178 /* Enable Clock gating: */ 179 if (adreno_is_a306(adreno_gpu)) 180 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa); 181 else if (adreno_is_a320(adreno_gpu)) 182 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff); 183 else if (adreno_is_a330v2(adreno_gpu)) 184 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa); 185 else if (adreno_is_a330(adreno_gpu)) 186 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff); 187 188 if (adreno_is_a330v2(adreno_gpu)) 189 gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455); 190 else if (adreno_is_a330(adreno_gpu)) 191 gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); 192 193 /* Set the OCMEM base address for A330, etc */ 194 if (a3xx_gpu->ocmem.hdl) { 195 gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, 196 (unsigned int)(a3xx_gpu->ocmem.base >> 14)); 197 } 198 199 /* Turn on performance counters: */ 200 gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); 201 202 /* Enable the perfcntrs that we use.. */ 203 for (i = 0; i < gpu->num_perfcntrs; i++) { 204 const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; 205 gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); 206 } 207 208 gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); 209 210 ret = adreno_hw_init(gpu); 211 if (ret) 212 return ret; 213 214 /* setup access protection: */ 215 gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); 216 217 /* RBBM registers */ 218 gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040); 219 gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080); 220 gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc); 221 gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108); 222 gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140); 223 gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400); 224 225 /* CP registers */ 226 gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700); 227 gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8); 228 gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0); 229 gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178); 230 gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180); 231 232 /* RB registers */ 233 gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300); 234 235 /* VBIF registers */ 236 gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000); 237 238 /* NOTE: PM4/micro-engine firmware registers look to be the same 239 * for a2xx and a3xx.. we could possibly push that part down to 240 * adreno_gpu base class. Or push both PM4 and PFP but 241 * parameterize the pfp ucode addr/data registers.. 242 */ 243 244 /* Load PM4: */ 245 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data); 246 len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4; 247 DBG("loading PM4 ucode version: %x", ptr[1]); 248 249 gpu_write(gpu, REG_AXXX_CP_DEBUG, 250 AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE | 251 AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE); 252 gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0); 253 for (i = 1; i < len; i++) 254 gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]); 255 256 /* Load PFP: */ 257 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data); 258 len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4; 259 DBG("loading PFP ucode version: %x", ptr[5]); 260 261 gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); 262 for (i = 1; i < len; i++) 263 gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]); 264 265 /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ 266 if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) || 267 adreno_is_a320(adreno_gpu)) { 268 gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 269 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) | 270 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) | 271 AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14)); 272 } else if (adreno_is_a330(adreno_gpu)) { 273 /* NOTE: this (value take from downstream android driver) 274 * includes some bits outside of the known bitfields. But 275 * A330 has this "MERCIU queue" thing too, which might 276 * explain a new bitfield or reshuffling: 277 */ 278 gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008); 279 } 280 281 /* clear ME_HALT to start micro engine */ 282 gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0); 283 284 return a3xx_me_init(gpu) ? 0 : -EINVAL; 285 } 286 287 static void a3xx_recover(struct msm_gpu *gpu) 288 { 289 int i; 290 291 adreno_dump_info(gpu); 292 293 for (i = 0; i < 8; i++) { 294 printk("CP_SCRATCH_REG%d: %u\n", i, 295 gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i)); 296 } 297 298 /* dump registers before resetting gpu, if enabled: */ 299 if (hang_debug) 300 a3xx_dump(gpu); 301 302 gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1); 303 gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD); 304 gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0); 305 adreno_recover(gpu); 306 } 307 308 static void a3xx_destroy(struct msm_gpu *gpu) 309 { 310 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 311 struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); 312 313 DBG("%s", gpu->name); 314 315 adreno_gpu_cleanup(adreno_gpu); 316 317 adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem); 318 319 kfree(a3xx_gpu); 320 } 321 322 static bool a3xx_idle(struct msm_gpu *gpu) 323 { 324 /* wait for ringbuffer to drain: */ 325 if (!adreno_idle(gpu, gpu->rb[0])) 326 return false; 327 328 /* then wait for GPU to finish: */ 329 if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) & 330 A3XX_RBBM_STATUS_GPU_BUSY))) { 331 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name); 332 333 /* TODO maybe we need to reset GPU here to recover from hang? */ 334 return false; 335 } 336 337 return true; 338 } 339 340 static irqreturn_t a3xx_irq(struct msm_gpu *gpu) 341 { 342 uint32_t status; 343 344 status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS); 345 DBG("%s: %08x", gpu->name, status); 346 347 // TODO 348 349 gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status); 350 351 msm_gpu_retire(gpu); 352 353 return IRQ_HANDLED; 354 } 355 356 static const unsigned int a3xx_registers[] = { 357 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, 358 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, 359 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, 360 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, 361 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, 362 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff, 363 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, 364 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, 365 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, 366 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, 367 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, 368 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05, 369 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65, 370 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, 371 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, 372 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, 373 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, 374 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, 375 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, 376 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, 377 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e, 378 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, 379 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, 380 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444, 381 0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 382 0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 383 0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 384 0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 385 0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 386 0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 387 0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 388 0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 389 0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d, 390 0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f, 391 ~0 /* sentinel */ 392 }; 393 394 /* would be nice to not have to duplicate the _show() stuff with printk(): */ 395 static void a3xx_dump(struct msm_gpu *gpu) 396 { 397 printk("status: %08x\n", 398 gpu_read(gpu, REG_A3XX_RBBM_STATUS)); 399 adreno_dump(gpu); 400 } 401 402 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu) 403 { 404 struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL); 405 406 if (!state) 407 return ERR_PTR(-ENOMEM); 408 409 adreno_gpu_state_get(gpu, state); 410 411 state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS); 412 413 return state; 414 } 415 416 /* Register offset defines for A3XX */ 417 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { 418 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE), 419 REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), 420 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR), 421 REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), 422 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR), 423 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR), 424 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL), 425 }; 426 427 static const struct adreno_gpu_funcs funcs = { 428 .base = { 429 .get_param = adreno_get_param, 430 .hw_init = a3xx_hw_init, 431 .pm_suspend = msm_gpu_pm_suspend, 432 .pm_resume = msm_gpu_pm_resume, 433 .recover = a3xx_recover, 434 .submit = adreno_submit, 435 .flush = adreno_flush, 436 .active_ring = adreno_active_ring, 437 .irq = a3xx_irq, 438 .destroy = a3xx_destroy, 439 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) 440 .show = adreno_show, 441 #endif 442 .gpu_state_get = a3xx_gpu_state_get, 443 .gpu_state_put = adreno_gpu_state_put, 444 .create_address_space = adreno_iommu_create_address_space, 445 }, 446 }; 447 448 static const struct msm_gpu_perfcntr perfcntrs[] = { 449 { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, 450 SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, 451 { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, 452 SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, 453 }; 454 455 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) 456 { 457 struct a3xx_gpu *a3xx_gpu = NULL; 458 struct adreno_gpu *adreno_gpu; 459 struct msm_gpu *gpu; 460 struct msm_drm_private *priv = dev->dev_private; 461 struct platform_device *pdev = priv->gpu_pdev; 462 int ret; 463 464 if (!pdev) { 465 DRM_DEV_ERROR(dev->dev, "no a3xx device\n"); 466 ret = -ENXIO; 467 goto fail; 468 } 469 470 a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL); 471 if (!a3xx_gpu) { 472 ret = -ENOMEM; 473 goto fail; 474 } 475 476 adreno_gpu = &a3xx_gpu->base; 477 gpu = &adreno_gpu->base; 478 479 gpu->perfcntrs = perfcntrs; 480 gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); 481 482 adreno_gpu->registers = a3xx_registers; 483 adreno_gpu->reg_offsets = a3xx_register_offsets; 484 485 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); 486 if (ret) 487 goto fail; 488 489 /* if needed, allocate gmem: */ 490 if (adreno_is_a330(adreno_gpu)) { 491 ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev, 492 adreno_gpu, &a3xx_gpu->ocmem); 493 if (ret) 494 goto fail; 495 } 496 497 if (!gpu->aspace) { 498 /* TODO we think it is possible to configure the GPU to 499 * restrict access to VRAM carveout. But the required 500 * registers are unknown. For now just bail out and 501 * limp along with just modesetting. If it turns out 502 * to not be possible to restrict access, then we must 503 * implement a cmdstream validator. 504 */ 505 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); 506 ret = -ENXIO; 507 goto fail; 508 } 509 510 /* 511 * Set the ICC path to maximum speed for now by multiplying the fastest 512 * frequency by the bus width (8). We'll want to scale this later on to 513 * improve battery life. 514 */ 515 icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 516 icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); 517 518 return gpu; 519 520 fail: 521 if (a3xx_gpu) 522 a3xx_destroy(&a3xx_gpu->base.base); 523 524 return ERR_PTR(ret); 525 } 526