1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 struct a6xx_gpu_state_obj { 12 const void *handle; 13 u32 *data; 14 }; 15 16 struct a6xx_gpu_state { 17 struct msm_gpu_state base; 18 19 struct a6xx_gpu_state_obj *gmu_registers; 20 int nr_gmu_registers; 21 22 struct a6xx_gpu_state_obj *registers; 23 int nr_registers; 24 25 struct a6xx_gpu_state_obj *shaders; 26 int nr_shaders; 27 28 struct a6xx_gpu_state_obj *clusters; 29 int nr_clusters; 30 31 struct a6xx_gpu_state_obj *dbgahb_clusters; 32 int nr_dbgahb_clusters; 33 34 struct a6xx_gpu_state_obj *indexed_regs; 35 int nr_indexed_regs; 36 37 struct a6xx_gpu_state_obj *debugbus; 38 int nr_debugbus; 39 40 struct a6xx_gpu_state_obj *vbif_debugbus; 41 42 struct a6xx_gpu_state_obj *cx_debugbus; 43 int nr_cx_debugbus; 44 45 struct list_head objs; 46 }; 47 48 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 49 { 50 in[0] = val; 51 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 52 53 return 2; 54 } 55 56 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 57 { 58 in[0] = target; 59 in[1] = (((u64) reg) << 44 | dwords); 60 61 return 2; 62 } 63 64 static inline int CRASHDUMP_FINI(u64 *in) 65 { 66 in[0] = 0; 67 in[1] = 0; 68 69 return 2; 70 } 71 72 struct a6xx_crashdumper { 73 void *ptr; 74 struct drm_gem_object *bo; 75 u64 iova; 76 }; 77 78 struct a6xx_state_memobj { 79 struct list_head node; 80 unsigned long long data[]; 81 }; 82 83 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 84 { 85 struct a6xx_state_memobj *obj = 86 kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 87 88 if (!obj) 89 return NULL; 90 91 list_add_tail(&obj->node, &a6xx_state->objs); 92 return &obj->data; 93 } 94 95 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 96 size_t size) 97 { 98 void *dst = state_kcalloc(a6xx_state, 1, size); 99 100 if (dst) 101 memcpy(dst, src, size); 102 return dst; 103 } 104 105 /* 106 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 107 * the rest for the data 108 */ 109 #define A6XX_CD_DATA_OFFSET 8192 110 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 111 112 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 113 struct a6xx_crashdumper *dumper) 114 { 115 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 116 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 117 &dumper->bo, &dumper->iova); 118 119 if (!IS_ERR(dumper->ptr)) 120 msm_gem_object_set_name(dumper->bo, "crashdump"); 121 122 return PTR_ERR_OR_ZERO(dumper->ptr); 123 } 124 125 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 126 struct a6xx_crashdumper *dumper) 127 { 128 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 129 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 130 u32 val; 131 int ret; 132 133 if (IS_ERR_OR_NULL(dumper->ptr)) 134 return -EINVAL; 135 136 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 137 return -EINVAL; 138 139 /* Make sure all pending memory writes are posted */ 140 wmb(); 141 142 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, 143 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 144 145 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 146 147 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 148 val & 0x02, 100, 10000); 149 150 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 151 152 return ret; 153 } 154 155 /* read a value from the GX debug bus */ 156 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 157 u32 *data) 158 { 159 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 160 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 161 162 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 163 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 164 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 165 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 166 167 /* Wait 1 us to make sure the data is flowing */ 168 udelay(1); 169 170 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 171 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 172 173 return 2; 174 } 175 176 #define cxdbg_write(ptr, offset, val) \ 177 msm_writel((val), (ptr) + ((offset) << 2)) 178 179 #define cxdbg_read(ptr, offset) \ 180 msm_readl((ptr) + ((offset) << 2)) 181 182 /* read a value from the CX debug bus */ 183 static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset, 184 u32 *data) 185 { 186 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 187 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 188 189 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 190 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 191 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 192 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 193 194 /* Wait 1 us to make sure the data is flowing */ 195 udelay(1); 196 197 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 198 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 199 200 return 2; 201 } 202 203 /* Read a chunk of data from the VBIF debug bus */ 204 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 205 u32 reg, int count, u32 *data) 206 { 207 int i; 208 209 gpu_write(gpu, ctrl0, reg); 210 211 for (i = 0; i < count; i++) { 212 gpu_write(gpu, ctrl1, i); 213 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 214 } 215 216 return count; 217 } 218 219 #define AXI_ARB_BLOCKS 2 220 #define XIN_AXI_BLOCKS 5 221 #define XIN_CORE_BLOCKS 4 222 223 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 224 ((16 * AXI_ARB_BLOCKS) + \ 225 (18 * XIN_AXI_BLOCKS) + \ 226 (12 * XIN_CORE_BLOCKS)) 227 228 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 229 struct a6xx_gpu_state *a6xx_state, 230 struct a6xx_gpu_state_obj *obj) 231 { 232 u32 clk, *ptr; 233 int i; 234 235 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 236 sizeof(u32)); 237 if (!obj->data) 238 return; 239 240 obj->handle = NULL; 241 242 /* Get the current clock setting */ 243 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 244 245 /* Force on the bus so we can read it */ 246 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 247 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 248 249 /* We will read from BUS2 first, so disable BUS1 */ 250 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 251 252 /* Enable the VBIF bus for reading */ 253 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 254 255 ptr = obj->data; 256 257 for (i = 0; i < AXI_ARB_BLOCKS; i++) 258 ptr += vbif_debugbus_read(gpu, 259 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 260 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 261 1 << (i + 16), 16, ptr); 262 263 for (i = 0; i < XIN_AXI_BLOCKS; i++) 264 ptr += vbif_debugbus_read(gpu, 265 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 266 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 267 1 << i, 18, ptr); 268 269 /* Stop BUS2 so we can turn on BUS1 */ 270 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 271 272 for (i = 0; i < XIN_CORE_BLOCKS; i++) 273 ptr += vbif_debugbus_read(gpu, 274 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 275 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 276 1 << i, 12, ptr); 277 278 /* Restore the VBIF clock setting */ 279 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 280 } 281 282 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 283 struct a6xx_gpu_state *a6xx_state, 284 const struct a6xx_debugbus_block *block, 285 struct a6xx_gpu_state_obj *obj) 286 { 287 int i; 288 u32 *ptr; 289 290 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 291 if (!obj->data) 292 return; 293 294 obj->handle = block; 295 296 for (ptr = obj->data, i = 0; i < block->count; i++) 297 ptr += debugbus_read(gpu, block->id, i, ptr); 298 } 299 300 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 301 struct a6xx_gpu_state *a6xx_state, 302 const struct a6xx_debugbus_block *block, 303 struct a6xx_gpu_state_obj *obj) 304 { 305 int i; 306 u32 *ptr; 307 308 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 309 if (!obj->data) 310 return; 311 312 obj->handle = block; 313 314 for (ptr = obj->data, i = 0; i < block->count; i++) 315 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 316 } 317 318 static void a6xx_get_debugbus(struct msm_gpu *gpu, 319 struct a6xx_gpu_state *a6xx_state) 320 { 321 struct resource *res; 322 void __iomem *cxdbg = NULL; 323 int nr_debugbus_blocks; 324 325 /* Set up the GX debug bus */ 326 327 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 328 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 329 330 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 331 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 332 333 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 335 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 336 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 337 338 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 339 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 340 341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 344 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 345 346 /* Set up the CX debug bus - it lives elsewhere in the system so do a 347 * temporary ioremap for the registers 348 */ 349 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 350 "cx_dbgc"); 351 352 if (res) 353 cxdbg = ioremap(res->start, resource_size(res)); 354 355 if (cxdbg) { 356 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 357 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 358 359 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 360 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 361 362 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 364 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 365 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 366 367 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 368 0x76543210); 369 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 370 0xFEDCBA98); 371 372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 373 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 375 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 376 } 377 378 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 379 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 380 381 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 382 sizeof(*a6xx_state->debugbus)); 383 384 if (a6xx_state->debugbus) { 385 int i; 386 387 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 388 a6xx_get_debugbus_block(gpu, 389 a6xx_state, 390 &a6xx_debugbus_blocks[i], 391 &a6xx_state->debugbus[i]); 392 393 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 394 395 /* 396 * GBIF has same debugbus as of other GPU blocks, fall back to 397 * default path if GPU uses GBIF, also GBIF uses exactly same 398 * ID as of VBIF. 399 */ 400 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 401 a6xx_get_debugbus_block(gpu, a6xx_state, 402 &a6xx_gbif_debugbus_block, 403 &a6xx_state->debugbus[i]); 404 405 a6xx_state->nr_debugbus += 1; 406 } 407 } 408 409 /* Dump the VBIF debugbus on applicable targets */ 410 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) { 411 a6xx_state->vbif_debugbus = 412 state_kcalloc(a6xx_state, 1, 413 sizeof(*a6xx_state->vbif_debugbus)); 414 415 if (a6xx_state->vbif_debugbus) 416 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 417 a6xx_state->vbif_debugbus); 418 } 419 420 if (cxdbg) { 421 a6xx_state->cx_debugbus = 422 state_kcalloc(a6xx_state, 423 ARRAY_SIZE(a6xx_cx_debugbus_blocks), 424 sizeof(*a6xx_state->cx_debugbus)); 425 426 if (a6xx_state->cx_debugbus) { 427 int i; 428 429 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++) 430 a6xx_get_cx_debugbus_block(cxdbg, 431 a6xx_state, 432 &a6xx_cx_debugbus_blocks[i], 433 &a6xx_state->cx_debugbus[i]); 434 435 a6xx_state->nr_cx_debugbus = 436 ARRAY_SIZE(a6xx_cx_debugbus_blocks); 437 } 438 439 iounmap(cxdbg); 440 } 441 } 442 443 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 444 445 /* Read a data cluster from behind the AHB aperture */ 446 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 447 struct a6xx_gpu_state *a6xx_state, 448 const struct a6xx_dbgahb_cluster *dbgahb, 449 struct a6xx_gpu_state_obj *obj, 450 struct a6xx_crashdumper *dumper) 451 { 452 u64 *in = dumper->ptr; 453 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 454 size_t datasize; 455 int i, regcount = 0; 456 457 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 458 int j; 459 460 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 461 (dbgahb->statetype + i * 2) << 8); 462 463 for (j = 0; j < dbgahb->count; j += 2) { 464 int count = RANGE(dbgahb->registers, j); 465 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 466 dbgahb->registers[j] - (dbgahb->base >> 2); 467 468 in += CRASHDUMP_READ(in, offset, count, out); 469 470 out += count * sizeof(u32); 471 472 if (i == 0) 473 regcount += count; 474 } 475 } 476 477 CRASHDUMP_FINI(in); 478 479 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 480 481 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 482 return; 483 484 if (a6xx_crashdumper_run(gpu, dumper)) 485 return; 486 487 obj->handle = dbgahb; 488 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 489 datasize); 490 } 491 492 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 493 struct a6xx_gpu_state *a6xx_state, 494 struct a6xx_crashdumper *dumper) 495 { 496 int i; 497 498 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 499 ARRAY_SIZE(a6xx_dbgahb_clusters), 500 sizeof(*a6xx_state->dbgahb_clusters)); 501 502 if (!a6xx_state->dbgahb_clusters) 503 return; 504 505 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 506 507 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 508 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 509 &a6xx_dbgahb_clusters[i], 510 &a6xx_state->dbgahb_clusters[i], dumper); 511 } 512 513 /* Read a data cluster from the CP aperture with the crashdumper */ 514 static void a6xx_get_cluster(struct msm_gpu *gpu, 515 struct a6xx_gpu_state *a6xx_state, 516 const struct a6xx_cluster *cluster, 517 struct a6xx_gpu_state_obj *obj, 518 struct a6xx_crashdumper *dumper) 519 { 520 u64 *in = dumper->ptr; 521 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 522 size_t datasize; 523 int i, regcount = 0; 524 525 /* Some clusters need a selector register to be programmed too */ 526 if (cluster->sel_reg) 527 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 528 529 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 530 int j; 531 532 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 533 (cluster->id << 8) | (i << 4) | i); 534 535 for (j = 0; j < cluster->count; j += 2) { 536 int count = RANGE(cluster->registers, j); 537 538 in += CRASHDUMP_READ(in, cluster->registers[j], 539 count, out); 540 541 out += count * sizeof(u32); 542 543 if (i == 0) 544 regcount += count; 545 } 546 } 547 548 CRASHDUMP_FINI(in); 549 550 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 551 552 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 553 return; 554 555 if (a6xx_crashdumper_run(gpu, dumper)) 556 return; 557 558 obj->handle = cluster; 559 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 560 datasize); 561 } 562 563 static void a6xx_get_clusters(struct msm_gpu *gpu, 564 struct a6xx_gpu_state *a6xx_state, 565 struct a6xx_crashdumper *dumper) 566 { 567 int i; 568 569 a6xx_state->clusters = state_kcalloc(a6xx_state, 570 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 571 572 if (!a6xx_state->clusters) 573 return; 574 575 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 576 577 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 578 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 579 &a6xx_state->clusters[i], dumper); 580 } 581 582 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 583 static void a6xx_get_shader_block(struct msm_gpu *gpu, 584 struct a6xx_gpu_state *a6xx_state, 585 const struct a6xx_shader_block *block, 586 struct a6xx_gpu_state_obj *obj, 587 struct a6xx_crashdumper *dumper) 588 { 589 u64 *in = dumper->ptr; 590 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 591 int i; 592 593 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 594 return; 595 596 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 597 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 598 (block->type << 8) | i); 599 600 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 601 block->size, dumper->iova + A6XX_CD_DATA_OFFSET); 602 } 603 604 CRASHDUMP_FINI(in); 605 606 if (a6xx_crashdumper_run(gpu, dumper)) 607 return; 608 609 obj->handle = block; 610 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 611 datasize); 612 } 613 614 static void a6xx_get_shaders(struct msm_gpu *gpu, 615 struct a6xx_gpu_state *a6xx_state, 616 struct a6xx_crashdumper *dumper) 617 { 618 int i; 619 620 a6xx_state->shaders = state_kcalloc(a6xx_state, 621 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 622 623 if (!a6xx_state->shaders) 624 return; 625 626 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 627 628 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 629 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 630 &a6xx_state->shaders[i], dumper); 631 } 632 633 /* Read registers from behind the HLSQ aperture with the crashdumper */ 634 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 635 struct a6xx_gpu_state *a6xx_state, 636 const struct a6xx_registers *regs, 637 struct a6xx_gpu_state_obj *obj, 638 struct a6xx_crashdumper *dumper) 639 640 { 641 u64 *in = dumper->ptr; 642 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 643 int i, regcount = 0; 644 645 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 646 647 for (i = 0; i < regs->count; i += 2) { 648 u32 count = RANGE(regs->registers, i); 649 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 650 regs->registers[i] - (regs->val0 >> 2); 651 652 in += CRASHDUMP_READ(in, offset, count, out); 653 654 out += count * sizeof(u32); 655 regcount += count; 656 } 657 658 CRASHDUMP_FINI(in); 659 660 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 661 return; 662 663 if (a6xx_crashdumper_run(gpu, dumper)) 664 return; 665 666 obj->handle = regs; 667 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 668 regcount * sizeof(u32)); 669 } 670 671 /* Read a block of registers using the crashdumper */ 672 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 673 struct a6xx_gpu_state *a6xx_state, 674 const struct a6xx_registers *regs, 675 struct a6xx_gpu_state_obj *obj, 676 struct a6xx_crashdumper *dumper) 677 678 { 679 u64 *in = dumper->ptr; 680 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 681 int i, regcount = 0; 682 683 /* Some blocks might need to program a selector register first */ 684 if (regs->val0) 685 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 686 687 for (i = 0; i < regs->count; i += 2) { 688 u32 count = RANGE(regs->registers, i); 689 690 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 691 692 out += count * sizeof(u32); 693 regcount += count; 694 } 695 696 CRASHDUMP_FINI(in); 697 698 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 699 return; 700 701 if (a6xx_crashdumper_run(gpu, dumper)) 702 return; 703 704 obj->handle = regs; 705 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 706 regcount * sizeof(u32)); 707 } 708 709 /* Read a block of registers via AHB */ 710 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 711 struct a6xx_gpu_state *a6xx_state, 712 const struct a6xx_registers *regs, 713 struct a6xx_gpu_state_obj *obj) 714 { 715 int i, regcount = 0, index = 0; 716 717 for (i = 0; i < regs->count; i += 2) 718 regcount += RANGE(regs->registers, i); 719 720 obj->handle = (const void *) regs; 721 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 722 if (!obj->data) 723 return; 724 725 for (i = 0; i < regs->count; i += 2) { 726 u32 count = RANGE(regs->registers, i); 727 int j; 728 729 for (j = 0; j < count; j++) 730 obj->data[index++] = gpu_read(gpu, 731 regs->registers[i] + j); 732 } 733 } 734 735 /* Read a block of GMU registers */ 736 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 737 struct a6xx_gpu_state *a6xx_state, 738 const struct a6xx_registers *regs, 739 struct a6xx_gpu_state_obj *obj, 740 bool rscc) 741 { 742 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 743 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 744 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 745 int i, regcount = 0, index = 0; 746 747 for (i = 0; i < regs->count; i += 2) 748 regcount += RANGE(regs->registers, i); 749 750 obj->handle = (const void *) regs; 751 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 752 if (!obj->data) 753 return; 754 755 for (i = 0; i < regs->count; i += 2) { 756 u32 count = RANGE(regs->registers, i); 757 int j; 758 759 for (j = 0; j < count; j++) { 760 u32 offset = regs->registers[i] + j; 761 u32 val; 762 763 if (rscc) 764 val = gmu_read_rscc(gmu, offset); 765 else 766 val = gmu_read(gmu, offset); 767 768 obj->data[index++] = val; 769 } 770 } 771 } 772 773 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 774 struct a6xx_gpu_state *a6xx_state) 775 { 776 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 777 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 778 779 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 780 2, sizeof(*a6xx_state->gmu_registers)); 781 782 if (!a6xx_state->gmu_registers) 783 return; 784 785 a6xx_state->nr_gmu_registers = 2; 786 787 /* Get the CX GMU registers from AHB */ 788 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 789 &a6xx_state->gmu_registers[0], false); 790 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 791 &a6xx_state->gmu_registers[1], true); 792 793 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 794 return; 795 796 /* Set the fence to ALLOW mode so we can access the registers */ 797 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 798 799 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], 800 &a6xx_state->gmu_registers[2], false); 801 } 802 803 #define A6XX_GBIF_REGLIST_SIZE 1 804 static void a6xx_get_registers(struct msm_gpu *gpu, 805 struct a6xx_gpu_state *a6xx_state, 806 struct a6xx_crashdumper *dumper) 807 { 808 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) + 809 ARRAY_SIZE(a6xx_reglist) + 810 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 811 int index = 0; 812 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 813 814 a6xx_state->registers = state_kcalloc(a6xx_state, 815 count, sizeof(*a6xx_state->registers)); 816 817 if (!a6xx_state->registers) 818 return; 819 820 a6xx_state->nr_registers = count; 821 822 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++) 823 a6xx_get_ahb_gpu_registers(gpu, 824 a6xx_state, &a6xx_ahb_reglist[i], 825 &a6xx_state->registers[index++]); 826 827 if (a6xx_has_gbif(adreno_gpu)) 828 a6xx_get_ahb_gpu_registers(gpu, 829 a6xx_state, &a6xx_gbif_reglist, 830 &a6xx_state->registers[index++]); 831 else 832 a6xx_get_ahb_gpu_registers(gpu, 833 a6xx_state, &a6xx_vbif_reglist, 834 &a6xx_state->registers[index++]); 835 836 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 837 a6xx_get_crashdumper_registers(gpu, 838 a6xx_state, &a6xx_reglist[i], 839 &a6xx_state->registers[index++], 840 dumper); 841 842 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 843 a6xx_get_crashdumper_hlsq_registers(gpu, 844 a6xx_state, &a6xx_hlsq_reglist[i], 845 &a6xx_state->registers[index++], 846 dumper); 847 } 848 849 /* Read a block of data from an indexed register pair */ 850 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 851 struct a6xx_gpu_state *a6xx_state, 852 const struct a6xx_indexed_registers *indexed, 853 struct a6xx_gpu_state_obj *obj) 854 { 855 int i; 856 857 obj->handle = (const void *) indexed; 858 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32)); 859 if (!obj->data) 860 return; 861 862 /* All the indexed banks start at address 0 */ 863 gpu_write(gpu, indexed->addr, 0); 864 865 /* Read the data - each read increments the internal address by 1 */ 866 for (i = 0; i < indexed->count; i++) 867 obj->data[i] = gpu_read(gpu, indexed->data); 868 } 869 870 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 871 struct a6xx_gpu_state *a6xx_state) 872 { 873 u32 mempool_size; 874 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 875 int i; 876 877 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 878 sizeof(*a6xx_state->indexed_regs)); 879 if (!a6xx_state->indexed_regs) 880 return; 881 882 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 883 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 884 &a6xx_state->indexed_regs[i]); 885 886 /* Set the CP mempool size to 0 to stabilize it while dumping */ 887 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 888 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 889 890 /* Get the contents of the CP mempool */ 891 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 892 &a6xx_state->indexed_regs[i]); 893 894 /* 895 * Offset 0x2000 in the mempool is the size - copy the saved size over 896 * so the data is consistent 897 */ 898 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 899 900 /* Restore the size in the hardware */ 901 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 902 903 a6xx_state->nr_indexed_regs = count; 904 } 905 906 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 907 { 908 struct a6xx_crashdumper dumper = { 0 }; 909 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 910 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 911 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 912 GFP_KERNEL); 913 914 if (!a6xx_state) 915 return ERR_PTR(-ENOMEM); 916 917 INIT_LIST_HEAD(&a6xx_state->objs); 918 919 /* Get the generic state from the adreno core */ 920 adreno_gpu_state_get(gpu, &a6xx_state->base); 921 922 a6xx_get_gmu_registers(gpu, a6xx_state); 923 924 /* If GX isn't on the rest of the data isn't going to be accessible */ 925 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 926 return &a6xx_state->base; 927 928 /* Get the banks of indexed registers */ 929 a6xx_get_indexed_registers(gpu, a6xx_state); 930 931 /* Try to initialize the crashdumper */ 932 if (!a6xx_crashdumper_init(gpu, &dumper)) { 933 a6xx_get_registers(gpu, a6xx_state, &dumper); 934 a6xx_get_shaders(gpu, a6xx_state, &dumper); 935 a6xx_get_clusters(gpu, a6xx_state, &dumper); 936 a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper); 937 938 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 939 } 940 941 if (snapshot_debugbus) 942 a6xx_get_debugbus(gpu, a6xx_state); 943 944 return &a6xx_state->base; 945 } 946 947 static void a6xx_gpu_state_destroy(struct kref *kref) 948 { 949 struct a6xx_state_memobj *obj, *tmp; 950 struct msm_gpu_state *state = container_of(kref, 951 struct msm_gpu_state, ref); 952 struct a6xx_gpu_state *a6xx_state = container_of(state, 953 struct a6xx_gpu_state, base); 954 955 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) 956 kfree(obj); 957 958 adreno_gpu_state_destroy(state); 959 kfree(a6xx_state); 960 } 961 962 int a6xx_gpu_state_put(struct msm_gpu_state *state) 963 { 964 if (IS_ERR_OR_NULL(state)) 965 return 1; 966 967 return kref_put(&state->ref, a6xx_gpu_state_destroy); 968 } 969 970 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 971 struct drm_printer *p) 972 { 973 int i, index = 0; 974 975 if (!data) 976 return; 977 978 for (i = 0; i < count; i += 2) { 979 u32 count = RANGE(registers, i); 980 u32 offset = registers[i]; 981 int j; 982 983 for (j = 0; j < count; index++, offset++, j++) { 984 if (data[index] == 0xdeafbead) 985 continue; 986 987 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 988 offset << 2, data[index]); 989 } 990 } 991 } 992 993 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 994 { 995 char out[ASCII85_BUFSZ]; 996 long i, l, datalen = 0; 997 998 for (i = 0; i < len >> 2; i++) { 999 if (data[i]) 1000 datalen = (i + 1) << 2; 1001 } 1002 1003 if (datalen == 0) 1004 return; 1005 1006 drm_puts(p, " data: !!ascii85 |\n"); 1007 drm_puts(p, " "); 1008 1009 1010 l = ascii85_encode_len(datalen); 1011 1012 for (i = 0; i < l; i++) 1013 drm_puts(p, ascii85_encode(data[i], out)); 1014 1015 drm_puts(p, "\n"); 1016 } 1017 1018 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1019 { 1020 drm_puts(p, fmt); 1021 drm_puts(p, name); 1022 drm_puts(p, "\n"); 1023 } 1024 1025 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1026 struct drm_printer *p) 1027 { 1028 const struct a6xx_shader_block *block = obj->handle; 1029 int i; 1030 1031 if (!obj->handle) 1032 return; 1033 1034 print_name(p, " - type: ", block->name); 1035 1036 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1037 drm_printf(p, " - bank: %d\n", i); 1038 drm_printf(p, " size: %d\n", block->size); 1039 1040 if (!obj->data) 1041 continue; 1042 1043 print_ascii85(p, block->size << 2, 1044 obj->data + (block->size * i)); 1045 } 1046 } 1047 1048 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1049 struct drm_printer *p) 1050 { 1051 int ctx, index = 0; 1052 1053 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1054 int j; 1055 1056 drm_printf(p, " - context: %d\n", ctx); 1057 1058 for (j = 0; j < size; j += 2) { 1059 u32 count = RANGE(registers, j); 1060 u32 offset = registers[j]; 1061 int k; 1062 1063 for (k = 0; k < count; index++, offset++, k++) { 1064 if (data[index] == 0xdeafbead) 1065 continue; 1066 1067 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1068 offset << 2, data[index]); 1069 } 1070 } 1071 } 1072 } 1073 1074 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1075 struct drm_printer *p) 1076 { 1077 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1078 1079 if (dbgahb) { 1080 print_name(p, " - cluster-name: ", dbgahb->name); 1081 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1082 obj->data, p); 1083 } 1084 } 1085 1086 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1087 struct drm_printer *p) 1088 { 1089 const struct a6xx_cluster *cluster = obj->handle; 1090 1091 if (cluster) { 1092 print_name(p, " - cluster-name: ", cluster->name); 1093 a6xx_show_cluster_data(cluster->registers, cluster->count, 1094 obj->data, p); 1095 } 1096 } 1097 1098 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1099 struct drm_printer *p) 1100 { 1101 const struct a6xx_indexed_registers *indexed = obj->handle; 1102 1103 if (!indexed) 1104 return; 1105 1106 print_name(p, " - regs-name: ", indexed->name); 1107 drm_printf(p, " dwords: %d\n", indexed->count); 1108 1109 print_ascii85(p, indexed->count << 2, obj->data); 1110 } 1111 1112 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1113 u32 *data, struct drm_printer *p) 1114 { 1115 if (block) { 1116 print_name(p, " - debugbus-block: ", block->name); 1117 1118 /* 1119 * count for regular debugbus data is in quadwords, 1120 * but print the size in dwords for consistency 1121 */ 1122 drm_printf(p, " count: %d\n", block->count << 1); 1123 1124 print_ascii85(p, block->count << 3, data); 1125 } 1126 } 1127 1128 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1129 struct drm_printer *p) 1130 { 1131 int i; 1132 1133 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1134 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1135 1136 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1137 } 1138 1139 if (a6xx_state->vbif_debugbus) { 1140 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1141 1142 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1143 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1144 1145 /* vbif debugbus data is in dwords. Confusing, huh? */ 1146 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1147 } 1148 1149 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1150 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1151 1152 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1153 } 1154 } 1155 1156 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1157 struct drm_printer *p) 1158 { 1159 struct a6xx_gpu_state *a6xx_state = container_of(state, 1160 struct a6xx_gpu_state, base); 1161 int i; 1162 1163 if (IS_ERR_OR_NULL(state)) 1164 return; 1165 1166 adreno_show(gpu, state, p); 1167 1168 drm_puts(p, "registers:\n"); 1169 for (i = 0; i < a6xx_state->nr_registers; i++) { 1170 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 1171 const struct a6xx_registers *regs = obj->handle; 1172 1173 if (!obj->handle) 1174 continue; 1175 1176 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1177 } 1178 1179 drm_puts(p, "registers-gmu:\n"); 1180 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 1181 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 1182 const struct a6xx_registers *regs = obj->handle; 1183 1184 if (!obj->handle) 1185 continue; 1186 1187 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1188 } 1189 1190 drm_puts(p, "indexed-registers:\n"); 1191 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 1192 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 1193 1194 drm_puts(p, "shader-blocks:\n"); 1195 for (i = 0; i < a6xx_state->nr_shaders; i++) 1196 a6xx_show_shader(&a6xx_state->shaders[i], p); 1197 1198 drm_puts(p, "clusters:\n"); 1199 for (i = 0; i < a6xx_state->nr_clusters; i++) 1200 a6xx_show_cluster(&a6xx_state->clusters[i], p); 1201 1202 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) 1203 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 1204 1205 drm_puts(p, "debugbus:\n"); 1206 a6xx_show_debugbus(a6xx_state, p); 1207 } 1208