1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 struct a6xx_gpu_state_obj { 12 const void *handle; 13 u32 *data; 14 }; 15 16 struct a6xx_gpu_state { 17 struct msm_gpu_state base; 18 19 struct a6xx_gpu_state_obj *gmu_registers; 20 int nr_gmu_registers; 21 22 struct a6xx_gpu_state_obj *registers; 23 int nr_registers; 24 25 struct a6xx_gpu_state_obj *shaders; 26 int nr_shaders; 27 28 struct a6xx_gpu_state_obj *clusters; 29 int nr_clusters; 30 31 struct a6xx_gpu_state_obj *dbgahb_clusters; 32 int nr_dbgahb_clusters; 33 34 struct a6xx_gpu_state_obj *indexed_regs; 35 int nr_indexed_regs; 36 37 struct a6xx_gpu_state_obj *debugbus; 38 int nr_debugbus; 39 40 struct a6xx_gpu_state_obj *vbif_debugbus; 41 42 struct a6xx_gpu_state_obj *cx_debugbus; 43 int nr_cx_debugbus; 44 45 struct msm_gpu_state_bo *gmu_log; 46 struct msm_gpu_state_bo *gmu_hfi; 47 struct msm_gpu_state_bo *gmu_debug; 48 49 s32 hfi_queue_history[2][HFI_HISTORY_SZ]; 50 51 struct list_head objs; 52 53 bool gpu_initialized; 54 }; 55 56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 57 { 58 in[0] = val; 59 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 60 61 return 2; 62 } 63 64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 65 { 66 in[0] = target; 67 in[1] = (((u64) reg) << 44 | dwords); 68 69 return 2; 70 } 71 72 static inline int CRASHDUMP_FINI(u64 *in) 73 { 74 in[0] = 0; 75 in[1] = 0; 76 77 return 2; 78 } 79 80 struct a6xx_crashdumper { 81 void *ptr; 82 struct drm_gem_object *bo; 83 u64 iova; 84 }; 85 86 struct a6xx_state_memobj { 87 struct list_head node; 88 unsigned long long data[]; 89 }; 90 91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 92 { 93 struct a6xx_state_memobj *obj = 94 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 95 96 if (!obj) 97 return NULL; 98 99 list_add_tail(&obj->node, &a6xx_state->objs); 100 return &obj->data; 101 } 102 103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 104 size_t size) 105 { 106 void *dst = state_kcalloc(a6xx_state, 1, size); 107 108 if (dst) 109 memcpy(dst, src, size); 110 return dst; 111 } 112 113 /* 114 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 115 * the rest for the data 116 */ 117 #define A6XX_CD_DATA_OFFSET 8192 118 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 119 120 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 121 struct a6xx_crashdumper *dumper) 122 { 123 dumper->ptr = msm_gem_kernel_new(gpu->dev, 124 SZ_1M, MSM_BO_WC, gpu->aspace, 125 &dumper->bo, &dumper->iova); 126 127 if (!IS_ERR(dumper->ptr)) 128 msm_gem_object_set_name(dumper->bo, "crashdump"); 129 130 return PTR_ERR_OR_ZERO(dumper->ptr); 131 } 132 133 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 134 struct a6xx_crashdumper *dumper) 135 { 136 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 137 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 138 u32 val; 139 int ret; 140 141 if (IS_ERR_OR_NULL(dumper->ptr)) 142 return -EINVAL; 143 144 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 145 return -EINVAL; 146 147 /* Make sure all pending memory writes are posted */ 148 wmb(); 149 150 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova); 151 152 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 153 154 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 155 val & 0x02, 100, 10000); 156 157 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 158 159 return ret; 160 } 161 162 /* read a value from the GX debug bus */ 163 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 164 u32 *data) 165 { 166 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 167 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 168 169 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 170 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 171 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 172 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 173 174 /* Wait 1 us to make sure the data is flowing */ 175 udelay(1); 176 177 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 178 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 179 180 return 2; 181 } 182 183 #define cxdbg_write(ptr, offset, val) \ 184 msm_writel((val), (ptr) + ((offset) << 2)) 185 186 #define cxdbg_read(ptr, offset) \ 187 msm_readl((ptr) + ((offset) << 2)) 188 189 /* read a value from the CX debug bus */ 190 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, 191 u32 *data) 192 { 193 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 194 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 195 196 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 197 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 198 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 199 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 200 201 /* Wait 1 us to make sure the data is flowing */ 202 udelay(1); 203 204 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 205 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 206 207 return 2; 208 } 209 210 /* Read a chunk of data from the VBIF debug bus */ 211 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 212 u32 reg, int count, u32 *data) 213 { 214 int i; 215 216 gpu_write(gpu, ctrl0, reg); 217 218 for (i = 0; i < count; i++) { 219 gpu_write(gpu, ctrl1, i); 220 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 221 } 222 223 return count; 224 } 225 226 #define AXI_ARB_BLOCKS 2 227 #define XIN_AXI_BLOCKS 5 228 #define XIN_CORE_BLOCKS 4 229 230 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 231 ((16 * AXI_ARB_BLOCKS) + \ 232 (18 * XIN_AXI_BLOCKS) + \ 233 (12 * XIN_CORE_BLOCKS)) 234 235 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 236 struct a6xx_gpu_state *a6xx_state, 237 struct a6xx_gpu_state_obj *obj) 238 { 239 u32 clk, *ptr; 240 int i; 241 242 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 243 sizeof(u32)); 244 if (!obj->data) 245 return; 246 247 obj->handle = NULL; 248 249 /* Get the current clock setting */ 250 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 251 252 /* Force on the bus so we can read it */ 253 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 254 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 255 256 /* We will read from BUS2 first, so disable BUS1 */ 257 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 258 259 /* Enable the VBIF bus for reading */ 260 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 261 262 ptr = obj->data; 263 264 for (i = 0; i < AXI_ARB_BLOCKS; i++) 265 ptr += vbif_debugbus_read(gpu, 266 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 267 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 268 1 << (i + 16), 16, ptr); 269 270 for (i = 0; i < XIN_AXI_BLOCKS; i++) 271 ptr += vbif_debugbus_read(gpu, 272 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 273 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 274 1 << i, 18, ptr); 275 276 /* Stop BUS2 so we can turn on BUS1 */ 277 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 278 279 for (i = 0; i < XIN_CORE_BLOCKS; i++) 280 ptr += vbif_debugbus_read(gpu, 281 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 282 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 283 1 << i, 12, ptr); 284 285 /* Restore the VBIF clock setting */ 286 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 287 } 288 289 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 290 struct a6xx_gpu_state *a6xx_state, 291 const struct a6xx_debugbus_block *block, 292 struct a6xx_gpu_state_obj *obj) 293 { 294 int i; 295 u32 *ptr; 296 297 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 298 if (!obj->data) 299 return; 300 301 obj->handle = block; 302 303 for (ptr = obj->data, i = 0; i < block->count; i++) 304 ptr += debugbus_read(gpu, block->id, i, ptr); 305 } 306 307 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 308 struct a6xx_gpu_state *a6xx_state, 309 const struct a6xx_debugbus_block *block, 310 struct a6xx_gpu_state_obj *obj) 311 { 312 int i; 313 u32 *ptr; 314 315 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 316 if (!obj->data) 317 return; 318 319 obj->handle = block; 320 321 for (ptr = obj->data, i = 0; i < block->count; i++) 322 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 323 } 324 325 static void a6xx_get_debugbus(struct msm_gpu *gpu, 326 struct a6xx_gpu_state *a6xx_state) 327 { 328 struct resource *res; 329 void __iomem *cxdbg = NULL; 330 int nr_debugbus_blocks; 331 332 /* Set up the GX debug bus */ 333 334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 335 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 336 337 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 338 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 339 340 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 344 345 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 346 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 347 348 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 349 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 350 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 351 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 352 353 /* Set up the CX debug bus - it lives elsewhere in the system so do a 354 * temporary ioremap for the registers 355 */ 356 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 357 "cx_dbgc"); 358 359 if (res) 360 cxdbg = ioremap(res->start, resource_size(res)); 361 362 if (cxdbg) { 363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 364 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 365 366 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 367 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 368 369 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 370 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 371 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 373 374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 375 0x76543210); 376 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 377 0xFEDCBA98); 378 379 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 380 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 381 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 382 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 383 } 384 385 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 386 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 387 388 if (adreno_is_a650_family(to_adreno_gpu(gpu))) 389 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks); 390 391 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 392 sizeof(*a6xx_state->debugbus)); 393 394 if (a6xx_state->debugbus) { 395 int i; 396 397 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 398 a6xx_get_debugbus_block(gpu, 399 a6xx_state, 400 &a6xx_debugbus_blocks[i], 401 &a6xx_state->debugbus[i]); 402 403 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 404 405 /* 406 * GBIF has same debugbus as of other GPU blocks, fall back to 407 * default path if GPU uses GBIF, also GBIF uses exactly same 408 * ID as of VBIF. 409 */ 410 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 411 a6xx_get_debugbus_block(gpu, a6xx_state, 412 &a6xx_gbif_debugbus_block, 413 &a6xx_state->debugbus[i]); 414 415 a6xx_state->nr_debugbus += 1; 416 } 417 418 419 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 420 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++) 421 a6xx_get_debugbus_block(gpu, 422 a6xx_state, 423 &a650_debugbus_blocks[i], 424 &a6xx_state->debugbus[i]); 425 } 426 } 427 428 /* Dump the VBIF debugbus on applicable targets */ 429 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) { 430 a6xx_state->vbif_debugbus = 431 state_kcalloc(a6xx_state, 1, 432 sizeof(*a6xx_state->vbif_debugbus)); 433 434 if (a6xx_state->vbif_debugbus) 435 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 436 a6xx_state->vbif_debugbus); 437 } 438 439 if (cxdbg) { 440 a6xx_state->cx_debugbus = 441 state_kcalloc(a6xx_state, 442 ARRAY_SIZE(a6xx_cx_debugbus_blocks), 443 sizeof(*a6xx_state->cx_debugbus)); 444 445 if (a6xx_state->cx_debugbus) { 446 int i; 447 448 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++) 449 a6xx_get_cx_debugbus_block(cxdbg, 450 a6xx_state, 451 &a6xx_cx_debugbus_blocks[i], 452 &a6xx_state->cx_debugbus[i]); 453 454 a6xx_state->nr_cx_debugbus = 455 ARRAY_SIZE(a6xx_cx_debugbus_blocks); 456 } 457 458 iounmap(cxdbg); 459 } 460 } 461 462 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 463 464 /* Read a data cluster from behind the AHB aperture */ 465 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 466 struct a6xx_gpu_state *a6xx_state, 467 const struct a6xx_dbgahb_cluster *dbgahb, 468 struct a6xx_gpu_state_obj *obj, 469 struct a6xx_crashdumper *dumper) 470 { 471 u64 *in = dumper->ptr; 472 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 473 size_t datasize; 474 int i, regcount = 0; 475 476 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 477 int j; 478 479 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 480 (dbgahb->statetype + i * 2) << 8); 481 482 for (j = 0; j < dbgahb->count; j += 2) { 483 int count = RANGE(dbgahb->registers, j); 484 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 485 dbgahb->registers[j] - (dbgahb->base >> 2); 486 487 in += CRASHDUMP_READ(in, offset, count, out); 488 489 out += count * sizeof(u32); 490 491 if (i == 0) 492 regcount += count; 493 } 494 } 495 496 CRASHDUMP_FINI(in); 497 498 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 499 500 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 501 return; 502 503 if (a6xx_crashdumper_run(gpu, dumper)) 504 return; 505 506 obj->handle = dbgahb; 507 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 508 datasize); 509 } 510 511 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 512 struct a6xx_gpu_state *a6xx_state, 513 struct a6xx_crashdumper *dumper) 514 { 515 int i; 516 517 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 518 ARRAY_SIZE(a6xx_dbgahb_clusters), 519 sizeof(*a6xx_state->dbgahb_clusters)); 520 521 if (!a6xx_state->dbgahb_clusters) 522 return; 523 524 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 525 526 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 527 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 528 &a6xx_dbgahb_clusters[i], 529 &a6xx_state->dbgahb_clusters[i], dumper); 530 } 531 532 /* Read a data cluster from the CP aperture with the crashdumper */ 533 static void a6xx_get_cluster(struct msm_gpu *gpu, 534 struct a6xx_gpu_state *a6xx_state, 535 const struct a6xx_cluster *cluster, 536 struct a6xx_gpu_state_obj *obj, 537 struct a6xx_crashdumper *dumper) 538 { 539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 540 u64 *in = dumper->ptr; 541 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 542 size_t datasize; 543 int i, regcount = 0; 544 u32 id = cluster->id; 545 546 /* Skip registers that are not present on older generation */ 547 if (!adreno_is_a660_family(adreno_gpu) && 548 cluster->registers == a660_fe_cluster) 549 return; 550 551 if (adreno_is_a650_family(adreno_gpu) && 552 cluster->registers == a6xx_ps_cluster) 553 id = CLUSTER_VPC_PS; 554 555 /* Some clusters need a selector register to be programmed too */ 556 if (cluster->sel_reg) 557 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 558 559 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 560 int j; 561 562 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 563 (id << 8) | (i << 4) | i); 564 565 for (j = 0; j < cluster->count; j += 2) { 566 int count = RANGE(cluster->registers, j); 567 568 in += CRASHDUMP_READ(in, cluster->registers[j], 569 count, out); 570 571 out += count * sizeof(u32); 572 573 if (i == 0) 574 regcount += count; 575 } 576 } 577 578 CRASHDUMP_FINI(in); 579 580 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 581 582 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 583 return; 584 585 if (a6xx_crashdumper_run(gpu, dumper)) 586 return; 587 588 obj->handle = cluster; 589 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 590 datasize); 591 } 592 593 static void a6xx_get_clusters(struct msm_gpu *gpu, 594 struct a6xx_gpu_state *a6xx_state, 595 struct a6xx_crashdumper *dumper) 596 { 597 int i; 598 599 a6xx_state->clusters = state_kcalloc(a6xx_state, 600 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 601 602 if (!a6xx_state->clusters) 603 return; 604 605 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 606 607 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 608 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 609 &a6xx_state->clusters[i], dumper); 610 } 611 612 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 613 static void a6xx_get_shader_block(struct msm_gpu *gpu, 614 struct a6xx_gpu_state *a6xx_state, 615 const struct a6xx_shader_block *block, 616 struct a6xx_gpu_state_obj *obj, 617 struct a6xx_crashdumper *dumper) 618 { 619 u64 *in = dumper->ptr; 620 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 621 int i; 622 623 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 624 return; 625 626 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 627 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 628 (block->type << 8) | i); 629 630 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 631 block->size, dumper->iova + A6XX_CD_DATA_OFFSET); 632 } 633 634 CRASHDUMP_FINI(in); 635 636 if (a6xx_crashdumper_run(gpu, dumper)) 637 return; 638 639 obj->handle = block; 640 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 641 datasize); 642 } 643 644 static void a6xx_get_shaders(struct msm_gpu *gpu, 645 struct a6xx_gpu_state *a6xx_state, 646 struct a6xx_crashdumper *dumper) 647 { 648 int i; 649 650 a6xx_state->shaders = state_kcalloc(a6xx_state, 651 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 652 653 if (!a6xx_state->shaders) 654 return; 655 656 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 657 658 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 659 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 660 &a6xx_state->shaders[i], dumper); 661 } 662 663 /* Read registers from behind the HLSQ aperture with the crashdumper */ 664 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 665 struct a6xx_gpu_state *a6xx_state, 666 const struct a6xx_registers *regs, 667 struct a6xx_gpu_state_obj *obj, 668 struct a6xx_crashdumper *dumper) 669 670 { 671 u64 *in = dumper->ptr; 672 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 673 int i, regcount = 0; 674 675 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 676 677 for (i = 0; i < regs->count; i += 2) { 678 u32 count = RANGE(regs->registers, i); 679 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 680 regs->registers[i] - (regs->val0 >> 2); 681 682 in += CRASHDUMP_READ(in, offset, count, out); 683 684 out += count * sizeof(u32); 685 regcount += count; 686 } 687 688 CRASHDUMP_FINI(in); 689 690 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 691 return; 692 693 if (a6xx_crashdumper_run(gpu, dumper)) 694 return; 695 696 obj->handle = regs; 697 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 698 regcount * sizeof(u32)); 699 } 700 701 /* Read a block of registers using the crashdumper */ 702 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 703 struct a6xx_gpu_state *a6xx_state, 704 const struct a6xx_registers *regs, 705 struct a6xx_gpu_state_obj *obj, 706 struct a6xx_crashdumper *dumper) 707 708 { 709 u64 *in = dumper->ptr; 710 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 711 int i, regcount = 0; 712 713 /* Skip unsupported registers on older generations */ 714 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 715 (regs->registers == a660_registers)) 716 return; 717 718 /* Some blocks might need to program a selector register first */ 719 if (regs->val0) 720 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 721 722 for (i = 0; i < regs->count; i += 2) { 723 u32 count = RANGE(regs->registers, i); 724 725 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 726 727 out += count * sizeof(u32); 728 regcount += count; 729 } 730 731 CRASHDUMP_FINI(in); 732 733 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 734 return; 735 736 if (a6xx_crashdumper_run(gpu, dumper)) 737 return; 738 739 obj->handle = regs; 740 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 741 regcount * sizeof(u32)); 742 } 743 744 /* Read a block of registers via AHB */ 745 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 746 struct a6xx_gpu_state *a6xx_state, 747 const struct a6xx_registers *regs, 748 struct a6xx_gpu_state_obj *obj) 749 { 750 int i, regcount = 0, index = 0; 751 752 /* Skip unsupported registers on older generations */ 753 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) && 754 (regs->registers == a660_registers)) 755 return; 756 757 for (i = 0; i < regs->count; i += 2) 758 regcount += RANGE(regs->registers, i); 759 760 obj->handle = (const void *) regs; 761 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 762 if (!obj->data) 763 return; 764 765 for (i = 0; i < regs->count; i += 2) { 766 u32 count = RANGE(regs->registers, i); 767 int j; 768 769 for (j = 0; j < count; j++) 770 obj->data[index++] = gpu_read(gpu, 771 regs->registers[i] + j); 772 } 773 } 774 775 /* Read a block of GMU registers */ 776 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 777 struct a6xx_gpu_state *a6xx_state, 778 const struct a6xx_registers *regs, 779 struct a6xx_gpu_state_obj *obj, 780 bool rscc) 781 { 782 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 783 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 784 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 785 int i, regcount = 0, index = 0; 786 787 for (i = 0; i < regs->count; i += 2) 788 regcount += RANGE(regs->registers, i); 789 790 obj->handle = (const void *) regs; 791 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 792 if (!obj->data) 793 return; 794 795 for (i = 0; i < regs->count; i += 2) { 796 u32 count = RANGE(regs->registers, i); 797 int j; 798 799 for (j = 0; j < count; j++) { 800 u32 offset = regs->registers[i] + j; 801 u32 val; 802 803 if (rscc) 804 val = gmu_read_rscc(gmu, offset); 805 else 806 val = gmu_read(gmu, offset); 807 808 obj->data[index++] = val; 809 } 810 } 811 } 812 813 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 814 struct a6xx_gpu_state *a6xx_state) 815 { 816 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 817 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 818 819 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 820 3, sizeof(*a6xx_state->gmu_registers)); 821 822 if (!a6xx_state->gmu_registers) 823 return; 824 825 a6xx_state->nr_gmu_registers = 3; 826 827 /* Get the CX GMU registers from AHB */ 828 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 829 &a6xx_state->gmu_registers[0], false); 830 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 831 &a6xx_state->gmu_registers[1], true); 832 833 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 834 return; 835 836 /* Set the fence to ALLOW mode so we can access the registers */ 837 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 838 839 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2], 840 &a6xx_state->gmu_registers[2], false); 841 } 842 843 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo( 844 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo) 845 { 846 struct msm_gpu_state_bo *snapshot; 847 848 if (!bo->size) 849 return NULL; 850 851 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot)); 852 if (!snapshot) 853 return NULL; 854 855 snapshot->iova = bo->iova; 856 snapshot->size = bo->size; 857 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL); 858 if (!snapshot->data) 859 return NULL; 860 861 memcpy(snapshot->data, bo->virt, bo->size); 862 863 return snapshot; 864 } 865 866 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu, 867 struct a6xx_gpu_state *a6xx_state) 868 { 869 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 870 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 871 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 872 unsigned i, j; 873 874 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history)); 875 876 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { 877 struct a6xx_hfi_queue *queue = &gmu->queues[i]; 878 for (j = 0; j < HFI_HISTORY_SZ; j++) { 879 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ; 880 a6xx_state->hfi_queue_history[i][j] = queue->history[idx]; 881 } 882 } 883 } 884 885 #define A6XX_GBIF_REGLIST_SIZE 1 886 static void a6xx_get_registers(struct msm_gpu *gpu, 887 struct a6xx_gpu_state *a6xx_state, 888 struct a6xx_crashdumper *dumper) 889 { 890 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) + 891 ARRAY_SIZE(a6xx_reglist) + 892 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 893 int index = 0; 894 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 895 896 a6xx_state->registers = state_kcalloc(a6xx_state, 897 count, sizeof(*a6xx_state->registers)); 898 899 if (!a6xx_state->registers) 900 return; 901 902 a6xx_state->nr_registers = count; 903 904 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++) 905 a6xx_get_ahb_gpu_registers(gpu, 906 a6xx_state, &a6xx_ahb_reglist[i], 907 &a6xx_state->registers[index++]); 908 909 if (a6xx_has_gbif(adreno_gpu)) 910 a6xx_get_ahb_gpu_registers(gpu, 911 a6xx_state, &a6xx_gbif_reglist, 912 &a6xx_state->registers[index++]); 913 else 914 a6xx_get_ahb_gpu_registers(gpu, 915 a6xx_state, &a6xx_vbif_reglist, 916 &a6xx_state->registers[index++]); 917 if (!dumper) { 918 /* 919 * We can't use the crashdumper when the SMMU is stalled, 920 * because the GPU has no memory access until we resume 921 * translation (but we don't want to do that until after 922 * we have captured as much useful GPU state as possible). 923 * So instead collect registers via the CPU: 924 */ 925 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 926 a6xx_get_ahb_gpu_registers(gpu, 927 a6xx_state, &a6xx_reglist[i], 928 &a6xx_state->registers[index++]); 929 return; 930 } 931 932 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 933 a6xx_get_crashdumper_registers(gpu, 934 a6xx_state, &a6xx_reglist[i], 935 &a6xx_state->registers[index++], 936 dumper); 937 938 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 939 a6xx_get_crashdumper_hlsq_registers(gpu, 940 a6xx_state, &a6xx_hlsq_reglist[i], 941 &a6xx_state->registers[index++], 942 dumper); 943 } 944 945 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu) 946 { 947 /* The value at [16:31] is in 4dword units. Convert it to dwords */ 948 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14; 949 } 950 951 /* Read a block of data from an indexed register pair */ 952 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 953 struct a6xx_gpu_state *a6xx_state, 954 struct a6xx_indexed_registers *indexed, 955 struct a6xx_gpu_state_obj *obj) 956 { 957 int i; 958 959 obj->handle = (const void *) indexed; 960 if (indexed->count_fn) 961 indexed->count = indexed->count_fn(gpu); 962 963 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32)); 964 if (!obj->data) 965 return; 966 967 /* All the indexed banks start at address 0 */ 968 gpu_write(gpu, indexed->addr, 0); 969 970 /* Read the data - each read increments the internal address by 1 */ 971 for (i = 0; i < indexed->count; i++) 972 obj->data[i] = gpu_read(gpu, indexed->data); 973 } 974 975 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 976 struct a6xx_gpu_state *a6xx_state) 977 { 978 u32 mempool_size; 979 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 980 int i; 981 982 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 983 sizeof(*a6xx_state->indexed_regs)); 984 if (!a6xx_state->indexed_regs) 985 return; 986 987 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 988 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 989 &a6xx_state->indexed_regs[i]); 990 991 if (adreno_is_a650_family(to_adreno_gpu(gpu))) { 992 u32 val; 993 994 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG); 995 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4); 996 997 /* Get the contents of the CP mempool */ 998 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 999 &a6xx_state->indexed_regs[i]); 1000 1001 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val); 1002 a6xx_state->nr_indexed_regs = count; 1003 return; 1004 } 1005 1006 /* Set the CP mempool size to 0 to stabilize it while dumping */ 1007 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 1008 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 1009 1010 /* Get the contents of the CP mempool */ 1011 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 1012 &a6xx_state->indexed_regs[i]); 1013 1014 /* 1015 * Offset 0x2000 in the mempool is the size - copy the saved size over 1016 * so the data is consistent 1017 */ 1018 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 1019 1020 /* Restore the size in the hardware */ 1021 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 1022 1023 a6xx_state->nr_indexed_regs = count; 1024 } 1025 1026 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 1027 { 1028 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; 1029 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1030 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1031 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 1032 GFP_KERNEL); 1033 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & 1034 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); 1035 1036 if (!a6xx_state) 1037 return ERR_PTR(-ENOMEM); 1038 1039 INIT_LIST_HEAD(&a6xx_state->objs); 1040 1041 /* Get the generic state from the adreno core */ 1042 adreno_gpu_state_get(gpu, &a6xx_state->base); 1043 1044 a6xx_get_gmu_registers(gpu, a6xx_state); 1045 1046 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); 1047 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi); 1048 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug); 1049 1050 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state); 1051 1052 /* If GX isn't on the rest of the data isn't going to be accessible */ 1053 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 1054 return &a6xx_state->base; 1055 1056 /* Get the banks of indexed registers */ 1057 a6xx_get_indexed_registers(gpu, a6xx_state); 1058 1059 /* 1060 * Try to initialize the crashdumper, if we are not dumping state 1061 * with the SMMU stalled. The crashdumper needs memory access to 1062 * write out GPU state, so we need to skip this when the SMMU is 1063 * stalled in response to an iova fault 1064 */ 1065 if (!stalled && !gpu->needs_hw_init && 1066 !a6xx_crashdumper_init(gpu, &_dumper)) { 1067 dumper = &_dumper; 1068 } 1069 1070 a6xx_get_registers(gpu, a6xx_state, dumper); 1071 1072 if (dumper) { 1073 a6xx_get_shaders(gpu, a6xx_state, dumper); 1074 a6xx_get_clusters(gpu, a6xx_state, dumper); 1075 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); 1076 1077 msm_gem_kernel_put(dumper->bo, gpu->aspace); 1078 } 1079 1080 if (snapshot_debugbus) 1081 a6xx_get_debugbus(gpu, a6xx_state); 1082 1083 a6xx_state->gpu_initialized = !gpu->needs_hw_init; 1084 1085 return &a6xx_state->base; 1086 } 1087 1088 static void a6xx_gpu_state_destroy(struct kref *kref) 1089 { 1090 struct a6xx_state_memobj *obj, *tmp; 1091 struct msm_gpu_state *state = container_of(kref, 1092 struct msm_gpu_state, ref); 1093 struct a6xx_gpu_state *a6xx_state = container_of(state, 1094 struct a6xx_gpu_state, base); 1095 1096 if (a6xx_state->gmu_log) 1097 kvfree(a6xx_state->gmu_log->data); 1098 1099 if (a6xx_state->gmu_hfi) 1100 kvfree(a6xx_state->gmu_hfi->data); 1101 1102 if (a6xx_state->gmu_debug) 1103 kvfree(a6xx_state->gmu_debug->data); 1104 1105 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) { 1106 list_del(&obj->node); 1107 kvfree(obj); 1108 } 1109 1110 adreno_gpu_state_destroy(state); 1111 kfree(a6xx_state); 1112 } 1113 1114 int a6xx_gpu_state_put(struct msm_gpu_state *state) 1115 { 1116 if (IS_ERR_OR_NULL(state)) 1117 return 1; 1118 1119 return kref_put(&state->ref, a6xx_gpu_state_destroy); 1120 } 1121 1122 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 1123 struct drm_printer *p) 1124 { 1125 int i, index = 0; 1126 1127 if (!data) 1128 return; 1129 1130 for (i = 0; i < count; i += 2) { 1131 u32 count = RANGE(registers, i); 1132 u32 offset = registers[i]; 1133 int j; 1134 1135 for (j = 0; j < count; index++, offset++, j++) { 1136 if (data[index] == 0xdeafbead) 1137 continue; 1138 1139 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1140 offset << 2, data[index]); 1141 } 1142 } 1143 } 1144 1145 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 1146 { 1147 char out[ASCII85_BUFSZ]; 1148 long i, l, datalen = 0; 1149 1150 for (i = 0; i < len >> 2; i++) { 1151 if (data[i]) 1152 datalen = (i + 1) << 2; 1153 } 1154 1155 if (datalen == 0) 1156 return; 1157 1158 drm_puts(p, " data: !!ascii85 |\n"); 1159 drm_puts(p, " "); 1160 1161 1162 l = ascii85_encode_len(datalen); 1163 1164 for (i = 0; i < l; i++) 1165 drm_puts(p, ascii85_encode(data[i], out)); 1166 1167 drm_puts(p, "\n"); 1168 } 1169 1170 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1171 { 1172 drm_puts(p, fmt); 1173 drm_puts(p, name); 1174 drm_puts(p, "\n"); 1175 } 1176 1177 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1178 struct drm_printer *p) 1179 { 1180 const struct a6xx_shader_block *block = obj->handle; 1181 int i; 1182 1183 if (!obj->handle) 1184 return; 1185 1186 print_name(p, " - type: ", block->name); 1187 1188 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1189 drm_printf(p, " - bank: %d\n", i); 1190 drm_printf(p, " size: %d\n", block->size); 1191 1192 if (!obj->data) 1193 continue; 1194 1195 print_ascii85(p, block->size << 2, 1196 obj->data + (block->size * i)); 1197 } 1198 } 1199 1200 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1201 struct drm_printer *p) 1202 { 1203 int ctx, index = 0; 1204 1205 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1206 int j; 1207 1208 drm_printf(p, " - context: %d\n", ctx); 1209 1210 for (j = 0; j < size; j += 2) { 1211 u32 count = RANGE(registers, j); 1212 u32 offset = registers[j]; 1213 int k; 1214 1215 for (k = 0; k < count; index++, offset++, k++) { 1216 if (data[index] == 0xdeafbead) 1217 continue; 1218 1219 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1220 offset << 2, data[index]); 1221 } 1222 } 1223 } 1224 } 1225 1226 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1227 struct drm_printer *p) 1228 { 1229 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1230 1231 if (dbgahb) { 1232 print_name(p, " - cluster-name: ", dbgahb->name); 1233 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1234 obj->data, p); 1235 } 1236 } 1237 1238 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1239 struct drm_printer *p) 1240 { 1241 const struct a6xx_cluster *cluster = obj->handle; 1242 1243 if (cluster) { 1244 print_name(p, " - cluster-name: ", cluster->name); 1245 a6xx_show_cluster_data(cluster->registers, cluster->count, 1246 obj->data, p); 1247 } 1248 } 1249 1250 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1251 struct drm_printer *p) 1252 { 1253 const struct a6xx_indexed_registers *indexed = obj->handle; 1254 1255 if (!indexed) 1256 return; 1257 1258 print_name(p, " - regs-name: ", indexed->name); 1259 drm_printf(p, " dwords: %d\n", indexed->count); 1260 1261 print_ascii85(p, indexed->count << 2, obj->data); 1262 } 1263 1264 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1265 u32 *data, struct drm_printer *p) 1266 { 1267 if (block) { 1268 print_name(p, " - debugbus-block: ", block->name); 1269 1270 /* 1271 * count for regular debugbus data is in quadwords, 1272 * but print the size in dwords for consistency 1273 */ 1274 drm_printf(p, " count: %d\n", block->count << 1); 1275 1276 print_ascii85(p, block->count << 3, data); 1277 } 1278 } 1279 1280 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1281 struct drm_printer *p) 1282 { 1283 int i; 1284 1285 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1286 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1287 1288 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1289 } 1290 1291 if (a6xx_state->vbif_debugbus) { 1292 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1293 1294 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1295 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1296 1297 /* vbif debugbus data is in dwords. Confusing, huh? */ 1298 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1299 } 1300 1301 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1302 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1303 1304 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1305 } 1306 } 1307 1308 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1309 struct drm_printer *p) 1310 { 1311 struct a6xx_gpu_state *a6xx_state = container_of(state, 1312 struct a6xx_gpu_state, base); 1313 int i; 1314 1315 if (IS_ERR_OR_NULL(state)) 1316 return; 1317 1318 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized); 1319 1320 adreno_show(gpu, state, p); 1321 1322 drm_puts(p, "gmu-log:\n"); 1323 if (a6xx_state->gmu_log) { 1324 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log; 1325 1326 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova); 1327 drm_printf(p, " size: %zu\n", gmu_log->size); 1328 adreno_show_object(p, &gmu_log->data, gmu_log->size, 1329 &gmu_log->encoded); 1330 } 1331 1332 drm_puts(p, "gmu-hfi:\n"); 1333 if (a6xx_state->gmu_hfi) { 1334 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi; 1335 unsigned i, j; 1336 1337 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova); 1338 drm_printf(p, " size: %zu\n", gmu_hfi->size); 1339 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) { 1340 drm_printf(p, " queue-history[%u]:", i); 1341 for (j = 0; j < HFI_HISTORY_SZ; j++) { 1342 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]); 1343 } 1344 drm_printf(p, "\n"); 1345 } 1346 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size, 1347 &gmu_hfi->encoded); 1348 } 1349 1350 drm_puts(p, "gmu-debug:\n"); 1351 if (a6xx_state->gmu_debug) { 1352 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug; 1353 1354 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova); 1355 drm_printf(p, " size: %zu\n", gmu_debug->size); 1356 adreno_show_object(p, &gmu_debug->data, gmu_debug->size, 1357 &gmu_debug->encoded); 1358 } 1359 1360 drm_puts(p, "registers:\n"); 1361 for (i = 0; i < a6xx_state->nr_registers; i++) { 1362 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 1363 const struct a6xx_registers *regs = obj->handle; 1364 1365 if (!obj->handle) 1366 continue; 1367 1368 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1369 } 1370 1371 drm_puts(p, "registers-gmu:\n"); 1372 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 1373 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 1374 const struct a6xx_registers *regs = obj->handle; 1375 1376 if (!obj->handle) 1377 continue; 1378 1379 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1380 } 1381 1382 drm_puts(p, "indexed-registers:\n"); 1383 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 1384 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 1385 1386 drm_puts(p, "shader-blocks:\n"); 1387 for (i = 0; i < a6xx_state->nr_shaders; i++) 1388 a6xx_show_shader(&a6xx_state->shaders[i], p); 1389 1390 drm_puts(p, "clusters:\n"); 1391 for (i = 0; i < a6xx_state->nr_clusters; i++) 1392 a6xx_show_cluster(&a6xx_state->clusters[i], p); 1393 1394 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) 1395 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 1396 1397 drm_puts(p, "debugbus:\n"); 1398 a6xx_show_debugbus(a6xx_state, p); 1399 } 1400