1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ 3 4 #include <linux/ascii85.h> 5 #include "msm_gem.h" 6 #include "a6xx_gpu.h" 7 #include "a6xx_gmu.h" 8 #include "a6xx_gpu_state.h" 9 #include "a6xx_gmu.xml.h" 10 11 struct a6xx_gpu_state_obj { 12 const void *handle; 13 u32 *data; 14 }; 15 16 struct a6xx_gpu_state { 17 struct msm_gpu_state base; 18 19 struct a6xx_gpu_state_obj *gmu_registers; 20 int nr_gmu_registers; 21 22 struct a6xx_gpu_state_obj *registers; 23 int nr_registers; 24 25 struct a6xx_gpu_state_obj *shaders; 26 int nr_shaders; 27 28 struct a6xx_gpu_state_obj *clusters; 29 int nr_clusters; 30 31 struct a6xx_gpu_state_obj *dbgahb_clusters; 32 int nr_dbgahb_clusters; 33 34 struct a6xx_gpu_state_obj *indexed_regs; 35 int nr_indexed_regs; 36 37 struct a6xx_gpu_state_obj *debugbus; 38 int nr_debugbus; 39 40 struct a6xx_gpu_state_obj *vbif_debugbus; 41 42 struct a6xx_gpu_state_obj *cx_debugbus; 43 int nr_cx_debugbus; 44 45 struct list_head objs; 46 }; 47 48 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) 49 { 50 in[0] = val; 51 in[1] = (((u64) reg) << 44 | (1 << 21) | 1); 52 53 return 2; 54 } 55 56 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) 57 { 58 in[0] = target; 59 in[1] = (((u64) reg) << 44 | dwords); 60 61 return 2; 62 } 63 64 static inline int CRASHDUMP_FINI(u64 *in) 65 { 66 in[0] = 0; 67 in[1] = 0; 68 69 return 2; 70 } 71 72 struct a6xx_crashdumper { 73 void *ptr; 74 struct drm_gem_object *bo; 75 u64 iova; 76 }; 77 78 struct a6xx_state_memobj { 79 struct list_head node; 80 unsigned long long data[]; 81 }; 82 83 void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize) 84 { 85 struct a6xx_state_memobj *obj = 86 kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL); 87 88 if (!obj) 89 return NULL; 90 91 list_add_tail(&obj->node, &a6xx_state->objs); 92 return &obj->data; 93 } 94 95 void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, 96 size_t size) 97 { 98 void *dst = state_kcalloc(a6xx_state, 1, size); 99 100 if (dst) 101 memcpy(dst, src, size); 102 return dst; 103 } 104 105 /* 106 * Allocate 1MB for the crashdumper scratch region - 8k for the script and 107 * the rest for the data 108 */ 109 #define A6XX_CD_DATA_OFFSET 8192 110 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192) 111 112 static int a6xx_crashdumper_init(struct msm_gpu *gpu, 113 struct a6xx_crashdumper *dumper) 114 { 115 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, 116 SZ_1M, MSM_BO_UNCACHED, gpu->aspace, 117 &dumper->bo, &dumper->iova); 118 119 if (!IS_ERR(dumper->ptr)) 120 msm_gem_object_set_name(dumper->bo, "crashdump"); 121 122 return PTR_ERR_OR_ZERO(dumper->ptr); 123 } 124 125 static int a6xx_crashdumper_run(struct msm_gpu *gpu, 126 struct a6xx_crashdumper *dumper) 127 { 128 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 129 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 130 u32 val; 131 int ret; 132 133 if (IS_ERR_OR_NULL(dumper->ptr)) 134 return -EINVAL; 135 136 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) 137 return -EINVAL; 138 139 /* Make sure all pending memory writes are posted */ 140 wmb(); 141 142 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, 143 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); 144 145 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); 146 147 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, 148 val & 0x02, 100, 10000); 149 150 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); 151 152 return ret; 153 } 154 155 /* read a value from the GX debug bus */ 156 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, 157 u32 *data) 158 { 159 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | 160 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); 161 162 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); 163 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); 164 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); 165 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); 166 167 /* Wait 1 us to make sure the data is flowing */ 168 udelay(1); 169 170 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); 171 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); 172 173 return 2; 174 } 175 176 #define cxdbg_write(ptr, offset, val) \ 177 msm_writel((val), (ptr) + ((offset) << 2)) 178 179 #define cxdbg_read(ptr, offset) \ 180 msm_readl((ptr) + ((offset) << 2)) 181 182 /* read a value from the CX debug bus */ 183 static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset, 184 u32 *data) 185 { 186 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | 187 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); 188 189 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); 190 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); 191 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); 192 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); 193 194 /* Wait 1 us to make sure the data is flowing */ 195 udelay(1); 196 197 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); 198 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); 199 200 return 2; 201 } 202 203 /* Read a chunk of data from the VBIF debug bus */ 204 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, 205 u32 reg, int count, u32 *data) 206 { 207 int i; 208 209 gpu_write(gpu, ctrl0, reg); 210 211 for (i = 0; i < count; i++) { 212 gpu_write(gpu, ctrl1, i); 213 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); 214 } 215 216 return count; 217 } 218 219 #define AXI_ARB_BLOCKS 2 220 #define XIN_AXI_BLOCKS 5 221 #define XIN_CORE_BLOCKS 4 222 223 #define VBIF_DEBUGBUS_BLOCK_SIZE \ 224 ((16 * AXI_ARB_BLOCKS) + \ 225 (18 * XIN_AXI_BLOCKS) + \ 226 (12 * XIN_CORE_BLOCKS)) 227 228 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, 229 struct a6xx_gpu_state *a6xx_state, 230 struct a6xx_gpu_state_obj *obj) 231 { 232 u32 clk, *ptr; 233 int i; 234 235 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE, 236 sizeof(u32)); 237 if (!obj->data) 238 return; 239 240 obj->handle = NULL; 241 242 /* Get the current clock setting */ 243 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); 244 245 /* Force on the bus so we can read it */ 246 gpu_write(gpu, REG_A6XX_VBIF_CLKON, 247 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); 248 249 /* We will read from BUS2 first, so disable BUS1 */ 250 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); 251 252 /* Enable the VBIF bus for reading */ 253 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); 254 255 ptr = obj->data; 256 257 for (i = 0; i < AXI_ARB_BLOCKS; i++) 258 ptr += vbif_debugbus_read(gpu, 259 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 260 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 261 1 << (i + 16), 16, ptr); 262 263 for (i = 0; i < XIN_AXI_BLOCKS; i++) 264 ptr += vbif_debugbus_read(gpu, 265 REG_A6XX_VBIF_TEST_BUS2_CTRL0, 266 REG_A6XX_VBIF_TEST_BUS2_CTRL1, 267 1 << i, 18, ptr); 268 269 /* Stop BUS2 so we can turn on BUS1 */ 270 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); 271 272 for (i = 0; i < XIN_CORE_BLOCKS; i++) 273 ptr += vbif_debugbus_read(gpu, 274 REG_A6XX_VBIF_TEST_BUS1_CTRL0, 275 REG_A6XX_VBIF_TEST_BUS1_CTRL1, 276 1 << i, 12, ptr); 277 278 /* Restore the VBIF clock setting */ 279 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); 280 } 281 282 static void a6xx_get_debugbus_block(struct msm_gpu *gpu, 283 struct a6xx_gpu_state *a6xx_state, 284 const struct a6xx_debugbus_block *block, 285 struct a6xx_gpu_state_obj *obj) 286 { 287 int i; 288 u32 *ptr; 289 290 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 291 if (!obj->data) 292 return; 293 294 obj->handle = block; 295 296 for (ptr = obj->data, i = 0; i < block->count; i++) 297 ptr += debugbus_read(gpu, block->id, i, ptr); 298 } 299 300 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, 301 struct a6xx_gpu_state *a6xx_state, 302 const struct a6xx_debugbus_block *block, 303 struct a6xx_gpu_state_obj *obj) 304 { 305 int i; 306 u32 *ptr; 307 308 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64)); 309 if (!obj->data) 310 return; 311 312 obj->handle = block; 313 314 for (ptr = obj->data, i = 0; i < block->count; i++) 315 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); 316 } 317 318 static void a6xx_get_debugbus(struct msm_gpu *gpu, 319 struct a6xx_gpu_state *a6xx_state) 320 { 321 struct resource *res; 322 void __iomem *cxdbg = NULL; 323 int nr_debugbus_blocks; 324 325 /* Set up the GX debug bus */ 326 327 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, 328 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 329 330 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, 331 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 332 333 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); 334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); 335 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); 336 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); 337 338 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); 339 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); 340 341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); 342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); 343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); 344 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); 345 346 /* Set up the CX debug bus - it lives elsewhere in the system so do a 347 * temporary ioremap for the registers 348 */ 349 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, 350 "cx_dbgc"); 351 352 if (res) 353 cxdbg = ioremap(res->start, resource_size(res)); 354 355 if (cxdbg) { 356 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, 357 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); 358 359 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, 360 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); 361 362 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); 363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); 364 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); 365 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); 366 367 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, 368 0x76543210); 369 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, 370 0xFEDCBA98); 371 372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); 373 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); 374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); 375 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); 376 } 377 378 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) + 379 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0); 380 381 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks, 382 sizeof(*a6xx_state->debugbus)); 383 384 if (a6xx_state->debugbus) { 385 int i; 386 387 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) 388 a6xx_get_debugbus_block(gpu, 389 a6xx_state, 390 &a6xx_debugbus_blocks[i], 391 &a6xx_state->debugbus[i]); 392 393 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); 394 395 /* 396 * GBIF has same debugbus as of other GPU blocks, fall back to 397 * default path if GPU uses GBIF, also GBIF uses exactly same 398 * ID as of VBIF. 399 */ 400 if (a6xx_has_gbif(to_adreno_gpu(gpu))) { 401 a6xx_get_debugbus_block(gpu, a6xx_state, 402 &a6xx_gbif_debugbus_block, 403 &a6xx_state->debugbus[i]); 404 405 a6xx_state->nr_debugbus += 1; 406 } 407 } 408 409 /* Dump the VBIF debugbus on applicable targets */ 410 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) { 411 a6xx_state->vbif_debugbus = 412 state_kcalloc(a6xx_state, 1, 413 sizeof(*a6xx_state->vbif_debugbus)); 414 415 if (a6xx_state->vbif_debugbus) 416 a6xx_get_vbif_debugbus_block(gpu, a6xx_state, 417 a6xx_state->vbif_debugbus); 418 } 419 420 if (cxdbg) { 421 a6xx_state->cx_debugbus = 422 state_kcalloc(a6xx_state, 423 ARRAY_SIZE(a6xx_cx_debugbus_blocks), 424 sizeof(*a6xx_state->cx_debugbus)); 425 426 if (a6xx_state->cx_debugbus) { 427 int i; 428 429 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++) 430 a6xx_get_cx_debugbus_block(cxdbg, 431 a6xx_state, 432 &a6xx_cx_debugbus_blocks[i], 433 &a6xx_state->cx_debugbus[i]); 434 435 a6xx_state->nr_cx_debugbus = 436 ARRAY_SIZE(a6xx_cx_debugbus_blocks); 437 } 438 439 iounmap(cxdbg); 440 } 441 } 442 443 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) 444 445 /* Read a data cluster from behind the AHB aperture */ 446 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, 447 struct a6xx_gpu_state *a6xx_state, 448 const struct a6xx_dbgahb_cluster *dbgahb, 449 struct a6xx_gpu_state_obj *obj, 450 struct a6xx_crashdumper *dumper) 451 { 452 u64 *in = dumper->ptr; 453 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 454 size_t datasize; 455 int i, regcount = 0; 456 457 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 458 int j; 459 460 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 461 (dbgahb->statetype + i * 2) << 8); 462 463 for (j = 0; j < dbgahb->count; j += 2) { 464 int count = RANGE(dbgahb->registers, j); 465 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 466 dbgahb->registers[j] - (dbgahb->base >> 2); 467 468 in += CRASHDUMP_READ(in, offset, count, out); 469 470 out += count * sizeof(u32); 471 472 if (i == 0) 473 regcount += count; 474 } 475 } 476 477 CRASHDUMP_FINI(in); 478 479 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 480 481 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 482 return; 483 484 if (a6xx_crashdumper_run(gpu, dumper)) 485 return; 486 487 obj->handle = dbgahb; 488 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 489 datasize); 490 } 491 492 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, 493 struct a6xx_gpu_state *a6xx_state, 494 struct a6xx_crashdumper *dumper) 495 { 496 int i; 497 498 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state, 499 ARRAY_SIZE(a6xx_dbgahb_clusters), 500 sizeof(*a6xx_state->dbgahb_clusters)); 501 502 if (!a6xx_state->dbgahb_clusters) 503 return; 504 505 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); 506 507 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) 508 a6xx_get_dbgahb_cluster(gpu, a6xx_state, 509 &a6xx_dbgahb_clusters[i], 510 &a6xx_state->dbgahb_clusters[i], dumper); 511 } 512 513 /* Read a data cluster from the CP aperture with the crashdumper */ 514 static void a6xx_get_cluster(struct msm_gpu *gpu, 515 struct a6xx_gpu_state *a6xx_state, 516 const struct a6xx_cluster *cluster, 517 struct a6xx_gpu_state_obj *obj, 518 struct a6xx_crashdumper *dumper) 519 { 520 u64 *in = dumper->ptr; 521 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 522 size_t datasize; 523 int i, regcount = 0; 524 525 /* Some clusters need a selector register to be programmed too */ 526 if (cluster->sel_reg) 527 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); 528 529 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { 530 int j; 531 532 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, 533 (cluster->id << 8) | (i << 4) | i); 534 535 for (j = 0; j < cluster->count; j += 2) { 536 int count = RANGE(cluster->registers, j); 537 538 in += CRASHDUMP_READ(in, cluster->registers[j], 539 count, out); 540 541 out += count * sizeof(u32); 542 543 if (i == 0) 544 regcount += count; 545 } 546 } 547 548 CRASHDUMP_FINI(in); 549 550 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); 551 552 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 553 return; 554 555 if (a6xx_crashdumper_run(gpu, dumper)) 556 return; 557 558 obj->handle = cluster; 559 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 560 datasize); 561 } 562 563 static void a6xx_get_clusters(struct msm_gpu *gpu, 564 struct a6xx_gpu_state *a6xx_state, 565 struct a6xx_crashdumper *dumper) 566 { 567 int i; 568 569 a6xx_state->clusters = state_kcalloc(a6xx_state, 570 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters)); 571 572 if (!a6xx_state->clusters) 573 return; 574 575 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); 576 577 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) 578 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i], 579 &a6xx_state->clusters[i], dumper); 580 } 581 582 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */ 583 static void a6xx_get_shader_block(struct msm_gpu *gpu, 584 struct a6xx_gpu_state *a6xx_state, 585 const struct a6xx_shader_block *block, 586 struct a6xx_gpu_state_obj *obj, 587 struct a6xx_crashdumper *dumper) 588 { 589 u64 *in = dumper->ptr; 590 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); 591 int i; 592 593 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) 594 return; 595 596 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 597 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, 598 (block->type << 8) | i); 599 600 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, 601 block->size, dumper->iova + A6XX_CD_DATA_OFFSET); 602 } 603 604 CRASHDUMP_FINI(in); 605 606 if (a6xx_crashdumper_run(gpu, dumper)) 607 return; 608 609 obj->handle = block; 610 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 611 datasize); 612 } 613 614 static void a6xx_get_shaders(struct msm_gpu *gpu, 615 struct a6xx_gpu_state *a6xx_state, 616 struct a6xx_crashdumper *dumper) 617 { 618 int i; 619 620 a6xx_state->shaders = state_kcalloc(a6xx_state, 621 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders)); 622 623 if (!a6xx_state->shaders) 624 return; 625 626 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); 627 628 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) 629 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i], 630 &a6xx_state->shaders[i], dumper); 631 } 632 633 /* Read registers from behind the HLSQ aperture with the crashdumper */ 634 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, 635 struct a6xx_gpu_state *a6xx_state, 636 const struct a6xx_registers *regs, 637 struct a6xx_gpu_state_obj *obj, 638 struct a6xx_crashdumper *dumper) 639 640 { 641 u64 *in = dumper->ptr; 642 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 643 int i, regcount = 0; 644 645 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); 646 647 for (i = 0; i < regs->count; i += 2) { 648 u32 count = RANGE(regs->registers, i); 649 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + 650 regs->registers[i] - (regs->val0 >> 2); 651 652 in += CRASHDUMP_READ(in, offset, count, out); 653 654 out += count * sizeof(u32); 655 regcount += count; 656 } 657 658 CRASHDUMP_FINI(in); 659 660 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 661 return; 662 663 if (a6xx_crashdumper_run(gpu, dumper)) 664 return; 665 666 obj->handle = regs; 667 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 668 regcount * sizeof(u32)); 669 } 670 671 /* Read a block of registers using the crashdumper */ 672 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, 673 struct a6xx_gpu_state *a6xx_state, 674 const struct a6xx_registers *regs, 675 struct a6xx_gpu_state_obj *obj, 676 struct a6xx_crashdumper *dumper) 677 678 { 679 u64 *in = dumper->ptr; 680 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; 681 int i, regcount = 0; 682 683 /* Some blocks might need to program a selector register first */ 684 if (regs->val0) 685 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); 686 687 for (i = 0; i < regs->count; i += 2) { 688 u32 count = RANGE(regs->registers, i); 689 690 in += CRASHDUMP_READ(in, regs->registers[i], count, out); 691 692 out += count * sizeof(u32); 693 regcount += count; 694 } 695 696 CRASHDUMP_FINI(in); 697 698 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) 699 return; 700 701 if (a6xx_crashdumper_run(gpu, dumper)) 702 return; 703 704 obj->handle = regs; 705 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET, 706 regcount * sizeof(u32)); 707 } 708 709 /* Read a block of registers via AHB */ 710 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, 711 struct a6xx_gpu_state *a6xx_state, 712 const struct a6xx_registers *regs, 713 struct a6xx_gpu_state_obj *obj) 714 { 715 int i, regcount = 0, index = 0; 716 717 for (i = 0; i < regs->count; i += 2) 718 regcount += RANGE(regs->registers, i); 719 720 obj->handle = (const void *) regs; 721 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 722 if (!obj->data) 723 return; 724 725 for (i = 0; i < regs->count; i += 2) { 726 u32 count = RANGE(regs->registers, i); 727 int j; 728 729 for (j = 0; j < count; j++) 730 obj->data[index++] = gpu_read(gpu, 731 regs->registers[i] + j); 732 } 733 } 734 735 /* Read a block of GMU registers */ 736 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, 737 struct a6xx_gpu_state *a6xx_state, 738 const struct a6xx_registers *regs, 739 struct a6xx_gpu_state_obj *obj) 740 { 741 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 742 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 743 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 744 int i, regcount = 0, index = 0; 745 746 for (i = 0; i < regs->count; i += 2) 747 regcount += RANGE(regs->registers, i); 748 749 obj->handle = (const void *) regs; 750 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32)); 751 if (!obj->data) 752 return; 753 754 for (i = 0; i < regs->count; i += 2) { 755 u32 count = RANGE(regs->registers, i); 756 int j; 757 758 for (j = 0; j < count; j++) 759 obj->data[index++] = gmu_read(gmu, 760 regs->registers[i] + j); 761 } 762 } 763 764 static void a6xx_get_gmu_registers(struct msm_gpu *gpu, 765 struct a6xx_gpu_state *a6xx_state) 766 { 767 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 768 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 769 770 a6xx_state->gmu_registers = state_kcalloc(a6xx_state, 771 2, sizeof(*a6xx_state->gmu_registers)); 772 773 if (!a6xx_state->gmu_registers) 774 return; 775 776 a6xx_state->nr_gmu_registers = 2; 777 778 /* Get the CX GMU registers from AHB */ 779 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], 780 &a6xx_state->gmu_registers[0]); 781 782 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 783 return; 784 785 /* Set the fence to ALLOW mode so we can access the registers */ 786 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); 787 788 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1], 789 &a6xx_state->gmu_registers[1]); 790 } 791 792 #define A6XX_GBIF_REGLIST_SIZE 1 793 static void a6xx_get_registers(struct msm_gpu *gpu, 794 struct a6xx_gpu_state *a6xx_state, 795 struct a6xx_crashdumper *dumper) 796 { 797 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) + 798 ARRAY_SIZE(a6xx_reglist) + 799 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE; 800 int index = 0; 801 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 802 803 a6xx_state->registers = state_kcalloc(a6xx_state, 804 count, sizeof(*a6xx_state->registers)); 805 806 if (!a6xx_state->registers) 807 return; 808 809 a6xx_state->nr_registers = count; 810 811 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++) 812 a6xx_get_ahb_gpu_registers(gpu, 813 a6xx_state, &a6xx_ahb_reglist[i], 814 &a6xx_state->registers[index++]); 815 816 if (a6xx_has_gbif(adreno_gpu)) 817 a6xx_get_ahb_gpu_registers(gpu, 818 a6xx_state, &a6xx_gbif_reglist, 819 &a6xx_state->registers[index++]); 820 else 821 a6xx_get_ahb_gpu_registers(gpu, 822 a6xx_state, &a6xx_vbif_reglist, 823 &a6xx_state->registers[index++]); 824 825 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) 826 a6xx_get_crashdumper_registers(gpu, 827 a6xx_state, &a6xx_reglist[i], 828 &a6xx_state->registers[index++], 829 dumper); 830 831 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) 832 a6xx_get_crashdumper_hlsq_registers(gpu, 833 a6xx_state, &a6xx_hlsq_reglist[i], 834 &a6xx_state->registers[index++], 835 dumper); 836 } 837 838 /* Read a block of data from an indexed register pair */ 839 static void a6xx_get_indexed_regs(struct msm_gpu *gpu, 840 struct a6xx_gpu_state *a6xx_state, 841 const struct a6xx_indexed_registers *indexed, 842 struct a6xx_gpu_state_obj *obj) 843 { 844 int i; 845 846 obj->handle = (const void *) indexed; 847 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32)); 848 if (!obj->data) 849 return; 850 851 /* All the indexed banks start at address 0 */ 852 gpu_write(gpu, indexed->addr, 0); 853 854 /* Read the data - each read increments the internal address by 1 */ 855 for (i = 0; i < indexed->count; i++) 856 obj->data[i] = gpu_read(gpu, indexed->data); 857 } 858 859 static void a6xx_get_indexed_registers(struct msm_gpu *gpu, 860 struct a6xx_gpu_state *a6xx_state) 861 { 862 u32 mempool_size; 863 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; 864 int i; 865 866 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count, 867 sizeof(a6xx_state->indexed_regs)); 868 if (!a6xx_state->indexed_regs) 869 return; 870 871 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) 872 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i], 873 &a6xx_state->indexed_regs[i]); 874 875 /* Set the CP mempool size to 0 to stabilize it while dumping */ 876 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); 877 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); 878 879 /* Get the contents of the CP mempool */ 880 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed, 881 &a6xx_state->indexed_regs[i]); 882 883 /* 884 * Offset 0x2000 in the mempool is the size - copy the saved size over 885 * so the data is consistent 886 */ 887 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; 888 889 /* Restore the size in the hardware */ 890 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); 891 892 a6xx_state->nr_indexed_regs = count; 893 } 894 895 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) 896 { 897 struct a6xx_crashdumper dumper = { 0 }; 898 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 899 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 900 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), 901 GFP_KERNEL); 902 903 if (!a6xx_state) 904 return ERR_PTR(-ENOMEM); 905 906 INIT_LIST_HEAD(&a6xx_state->objs); 907 908 /* Get the generic state from the adreno core */ 909 adreno_gpu_state_get(gpu, &a6xx_state->base); 910 911 a6xx_get_gmu_registers(gpu, a6xx_state); 912 913 /* If GX isn't on the rest of the data isn't going to be accessible */ 914 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) 915 return &a6xx_state->base; 916 917 /* Get the banks of indexed registers */ 918 a6xx_get_indexed_registers(gpu, a6xx_state); 919 920 /* Try to initialize the crashdumper */ 921 if (!a6xx_crashdumper_init(gpu, &dumper)) { 922 a6xx_get_registers(gpu, a6xx_state, &dumper); 923 a6xx_get_shaders(gpu, a6xx_state, &dumper); 924 a6xx_get_clusters(gpu, a6xx_state, &dumper); 925 a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper); 926 927 msm_gem_kernel_put(dumper.bo, gpu->aspace, true); 928 } 929 930 a6xx_get_debugbus(gpu, a6xx_state); 931 932 return &a6xx_state->base; 933 } 934 935 void a6xx_gpu_state_destroy(struct kref *kref) 936 { 937 struct a6xx_state_memobj *obj, *tmp; 938 struct msm_gpu_state *state = container_of(kref, 939 struct msm_gpu_state, ref); 940 struct a6xx_gpu_state *a6xx_state = container_of(state, 941 struct a6xx_gpu_state, base); 942 943 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) 944 kfree(obj); 945 946 adreno_gpu_state_destroy(state); 947 kfree(a6xx_state); 948 } 949 950 int a6xx_gpu_state_put(struct msm_gpu_state *state) 951 { 952 if (IS_ERR_OR_NULL(state)) 953 return 1; 954 955 return kref_put(&state->ref, a6xx_gpu_state_destroy); 956 } 957 958 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, 959 struct drm_printer *p) 960 { 961 int i, index = 0; 962 963 if (!data) 964 return; 965 966 for (i = 0; i < count; i += 2) { 967 u32 count = RANGE(registers, i); 968 u32 offset = registers[i]; 969 int j; 970 971 for (j = 0; j < count; index++, offset++, j++) { 972 if (data[index] == 0xdeafbead) 973 continue; 974 975 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 976 offset << 2, data[index]); 977 } 978 } 979 } 980 981 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) 982 { 983 char out[ASCII85_BUFSZ]; 984 long i, l, datalen = 0; 985 986 for (i = 0; i < len >> 2; i++) { 987 if (data[i]) 988 datalen = (i + 1) << 2; 989 } 990 991 if (datalen == 0) 992 return; 993 994 drm_puts(p, " data: !!ascii85 |\n"); 995 drm_puts(p, " "); 996 997 998 l = ascii85_encode_len(datalen); 999 1000 for (i = 0; i < l; i++) 1001 drm_puts(p, ascii85_encode(data[i], out)); 1002 1003 drm_puts(p, "\n"); 1004 } 1005 1006 static void print_name(struct drm_printer *p, const char *fmt, const char *name) 1007 { 1008 drm_puts(p, fmt); 1009 drm_puts(p, name); 1010 drm_puts(p, "\n"); 1011 } 1012 1013 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, 1014 struct drm_printer *p) 1015 { 1016 const struct a6xx_shader_block *block = obj->handle; 1017 int i; 1018 1019 if (!obj->handle) 1020 return; 1021 1022 print_name(p, " - type: ", block->name); 1023 1024 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { 1025 drm_printf(p, " - bank: %d\n", i); 1026 drm_printf(p, " size: %d\n", block->size); 1027 1028 if (!obj->data) 1029 continue; 1030 1031 print_ascii85(p, block->size << 2, 1032 obj->data + (block->size * i)); 1033 } 1034 } 1035 1036 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, 1037 struct drm_printer *p) 1038 { 1039 int ctx, index = 0; 1040 1041 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { 1042 int j; 1043 1044 drm_printf(p, " - context: %d\n", ctx); 1045 1046 for (j = 0; j < size; j += 2) { 1047 u32 count = RANGE(registers, j); 1048 u32 offset = registers[j]; 1049 int k; 1050 1051 for (k = 0; k < count; index++, offset++, k++) { 1052 if (data[index] == 0xdeafbead) 1053 continue; 1054 1055 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", 1056 offset << 2, data[index]); 1057 } 1058 } 1059 } 1060 } 1061 1062 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, 1063 struct drm_printer *p) 1064 { 1065 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; 1066 1067 if (dbgahb) { 1068 print_name(p, " - cluster-name: ", dbgahb->name); 1069 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, 1070 obj->data, p); 1071 } 1072 } 1073 1074 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, 1075 struct drm_printer *p) 1076 { 1077 const struct a6xx_cluster *cluster = obj->handle; 1078 1079 if (cluster) { 1080 print_name(p, " - cluster-name: ", cluster->name); 1081 a6xx_show_cluster_data(cluster->registers, cluster->count, 1082 obj->data, p); 1083 } 1084 } 1085 1086 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, 1087 struct drm_printer *p) 1088 { 1089 const struct a6xx_indexed_registers *indexed = obj->handle; 1090 1091 if (!indexed) 1092 return; 1093 1094 print_name(p, " - regs-name: ", indexed->name); 1095 drm_printf(p, " dwords: %d\n", indexed->count); 1096 1097 print_ascii85(p, indexed->count << 2, obj->data); 1098 } 1099 1100 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, 1101 u32 *data, struct drm_printer *p) 1102 { 1103 if (block) { 1104 print_name(p, " - debugbus-block: ", block->name); 1105 1106 /* 1107 * count for regular debugbus data is in quadwords, 1108 * but print the size in dwords for consistency 1109 */ 1110 drm_printf(p, " count: %d\n", block->count << 1); 1111 1112 print_ascii85(p, block->count << 3, data); 1113 } 1114 } 1115 1116 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, 1117 struct drm_printer *p) 1118 { 1119 int i; 1120 1121 for (i = 0; i < a6xx_state->nr_debugbus; i++) { 1122 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; 1123 1124 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1125 } 1126 1127 if (a6xx_state->vbif_debugbus) { 1128 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; 1129 1130 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); 1131 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); 1132 1133 /* vbif debugbus data is in dwords. Confusing, huh? */ 1134 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); 1135 } 1136 1137 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { 1138 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; 1139 1140 a6xx_show_debugbus_block(obj->handle, obj->data, p); 1141 } 1142 } 1143 1144 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, 1145 struct drm_printer *p) 1146 { 1147 struct a6xx_gpu_state *a6xx_state = container_of(state, 1148 struct a6xx_gpu_state, base); 1149 int i; 1150 1151 if (IS_ERR_OR_NULL(state)) 1152 return; 1153 1154 adreno_show(gpu, state, p); 1155 1156 drm_puts(p, "registers:\n"); 1157 for (i = 0; i < a6xx_state->nr_registers; i++) { 1158 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; 1159 const struct a6xx_registers *regs = obj->handle; 1160 1161 if (!obj->handle) 1162 continue; 1163 1164 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1165 } 1166 1167 drm_puts(p, "registers-gmu:\n"); 1168 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { 1169 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; 1170 const struct a6xx_registers *regs = obj->handle; 1171 1172 if (!obj->handle) 1173 continue; 1174 1175 a6xx_show_registers(regs->registers, obj->data, regs->count, p); 1176 } 1177 1178 drm_puts(p, "indexed-registers:\n"); 1179 for (i = 0; i < a6xx_state->nr_indexed_regs; i++) 1180 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); 1181 1182 drm_puts(p, "shader-blocks:\n"); 1183 for (i = 0; i < a6xx_state->nr_shaders; i++) 1184 a6xx_show_shader(&a6xx_state->shaders[i], p); 1185 1186 drm_puts(p, "clusters:\n"); 1187 for (i = 0; i < a6xx_state->nr_clusters; i++) 1188 a6xx_show_cluster(&a6xx_state->clusters[i], p); 1189 1190 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) 1191 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); 1192 1193 drm_puts(p, "debugbus:\n"); 1194 a6xx_show_debugbus(a6xx_state, p); 1195 } 1196