1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 struct a6xx_gpu_state_obj {
12 const void *handle;
13 u32 *data;
14 };
15
16 struct a6xx_gpu_state {
17 struct msm_gpu_state base;
18
19 struct a6xx_gpu_state_obj *gmu_registers;
20 int nr_gmu_registers;
21
22 struct a6xx_gpu_state_obj *registers;
23 int nr_registers;
24
25 struct a6xx_gpu_state_obj *shaders;
26 int nr_shaders;
27
28 struct a6xx_gpu_state_obj *clusters;
29 int nr_clusters;
30
31 struct a6xx_gpu_state_obj *dbgahb_clusters;
32 int nr_dbgahb_clusters;
33
34 struct a6xx_gpu_state_obj *indexed_regs;
35 int nr_indexed_regs;
36
37 struct a6xx_gpu_state_obj *debugbus;
38 int nr_debugbus;
39
40 struct a6xx_gpu_state_obj *vbif_debugbus;
41
42 struct a6xx_gpu_state_obj *cx_debugbus;
43 int nr_cx_debugbus;
44
45 struct msm_gpu_state_bo *gmu_log;
46 struct msm_gpu_state_bo *gmu_hfi;
47 struct msm_gpu_state_bo *gmu_debug;
48
49 s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50
51 struct list_head objs;
52
53 bool gpu_initialized;
54 };
55
CRASHDUMP_WRITE(u64 * in,u32 reg,u32 val)56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58 in[0] = val;
59 in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60
61 return 2;
62 }
63
CRASHDUMP_READ(u64 * in,u32 reg,u32 dwords,u64 target)64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66 in[0] = target;
67 in[1] = (((u64) reg) << 44 | dwords);
68
69 return 2;
70 }
71
CRASHDUMP_FINI(u64 * in)72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74 in[0] = 0;
75 in[1] = 0;
76
77 return 2;
78 }
79
80 struct a6xx_crashdumper {
81 void *ptr;
82 struct drm_gem_object *bo;
83 u64 iova;
84 };
85
86 struct a6xx_state_memobj {
87 struct list_head node;
88 unsigned long long data[];
89 };
90
state_kcalloc(struct a6xx_gpu_state * a6xx_state,int nr,size_t objsize)91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93 struct a6xx_state_memobj *obj =
94 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95
96 if (!obj)
97 return NULL;
98
99 list_add_tail(&obj->node, &a6xx_state->objs);
100 return &obj->data;
101 }
102
state_kmemdup(struct a6xx_gpu_state * a6xx_state,void * src,size_t size)103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104 size_t size)
105 {
106 void *dst = state_kcalloc(a6xx_state, 1, size);
107
108 if (dst)
109 memcpy(dst, src, size);
110 return dst;
111 }
112
113 /*
114 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115 * the rest for the data
116 */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
119
a6xx_crashdumper_init(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121 struct a6xx_crashdumper *dumper)
122 {
123 dumper->ptr = msm_gem_kernel_new(gpu->dev,
124 SZ_1M, MSM_BO_WC, gpu->aspace,
125 &dumper->bo, &dumper->iova);
126
127 if (!IS_ERR(dumper->ptr))
128 msm_gem_object_set_name(dumper->bo, "crashdump");
129
130 return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132
a6xx_crashdumper_run(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134 struct a6xx_crashdumper *dumper)
135 {
136 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138 u32 val;
139 int ret;
140
141 if (IS_ERR_OR_NULL(dumper->ptr))
142 return -EINVAL;
143
144 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145 return -EINVAL;
146
147 /* Make sure all pending memory writes are posted */
148 wmb();
149
150 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
151
152 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
153
154 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
155 val & 0x02, 100, 10000);
156
157 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
158
159 return ret;
160 }
161
162 /* read a value from the GX debug bus */
debugbus_read(struct msm_gpu * gpu,u32 block,u32 offset,u32 * data)163 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
164 u32 *data)
165 {
166 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
167 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
168
169 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
170 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
171 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
172 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
173
174 /* Wait 1 us to make sure the data is flowing */
175 udelay(1);
176
177 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
178 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
179
180 return 2;
181 }
182
183 #define cxdbg_write(ptr, offset, val) \
184 msm_writel((val), (ptr) + ((offset) << 2))
185
186 #define cxdbg_read(ptr, offset) \
187 msm_readl((ptr) + ((offset) << 2))
188
189 /* read a value from the CX debug bus */
cx_debugbus_read(void __iomem * cxdbg,u32 block,u32 offset,u32 * data)190 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
191 u32 *data)
192 {
193 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
194 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
195
196 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
197 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
198 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
199 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
200
201 /* Wait 1 us to make sure the data is flowing */
202 udelay(1);
203
204 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
205 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
206
207 return 2;
208 }
209
210 /* Read a chunk of data from the VBIF debug bus */
vbif_debugbus_read(struct msm_gpu * gpu,u32 ctrl0,u32 ctrl1,u32 reg,int count,u32 * data)211 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
212 u32 reg, int count, u32 *data)
213 {
214 int i;
215
216 gpu_write(gpu, ctrl0, reg);
217
218 for (i = 0; i < count; i++) {
219 gpu_write(gpu, ctrl1, i);
220 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
221 }
222
223 return count;
224 }
225
226 #define AXI_ARB_BLOCKS 2
227 #define XIN_AXI_BLOCKS 5
228 #define XIN_CORE_BLOCKS 4
229
230 #define VBIF_DEBUGBUS_BLOCK_SIZE \
231 ((16 * AXI_ARB_BLOCKS) + \
232 (18 * XIN_AXI_BLOCKS) + \
233 (12 * XIN_CORE_BLOCKS))
234
a6xx_get_vbif_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_gpu_state_obj * obj)235 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
236 struct a6xx_gpu_state *a6xx_state,
237 struct a6xx_gpu_state_obj *obj)
238 {
239 u32 clk, *ptr;
240 int i;
241
242 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
243 sizeof(u32));
244 if (!obj->data)
245 return;
246
247 obj->handle = NULL;
248
249 /* Get the current clock setting */
250 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
251
252 /* Force on the bus so we can read it */
253 gpu_write(gpu, REG_A6XX_VBIF_CLKON,
254 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
255
256 /* We will read from BUS2 first, so disable BUS1 */
257 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
258
259 /* Enable the VBIF bus for reading */
260 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
261
262 ptr = obj->data;
263
264 for (i = 0; i < AXI_ARB_BLOCKS; i++)
265 ptr += vbif_debugbus_read(gpu,
266 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
267 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
268 1 << (i + 16), 16, ptr);
269
270 for (i = 0; i < XIN_AXI_BLOCKS; i++)
271 ptr += vbif_debugbus_read(gpu,
272 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
273 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
274 1 << i, 18, ptr);
275
276 /* Stop BUS2 so we can turn on BUS1 */
277 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
278
279 for (i = 0; i < XIN_CORE_BLOCKS; i++)
280 ptr += vbif_debugbus_read(gpu,
281 REG_A6XX_VBIF_TEST_BUS1_CTRL0,
282 REG_A6XX_VBIF_TEST_BUS1_CTRL1,
283 1 << i, 12, ptr);
284
285 /* Restore the VBIF clock setting */
286 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
287 }
288
a6xx_get_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)289 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
290 struct a6xx_gpu_state *a6xx_state,
291 const struct a6xx_debugbus_block *block,
292 struct a6xx_gpu_state_obj *obj)
293 {
294 int i;
295 u32 *ptr;
296
297 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
298 if (!obj->data)
299 return;
300
301 obj->handle = block;
302
303 for (ptr = obj->data, i = 0; i < block->count; i++)
304 ptr += debugbus_read(gpu, block->id, i, ptr);
305 }
306
a6xx_get_cx_debugbus_block(void __iomem * cxdbg,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)307 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
308 struct a6xx_gpu_state *a6xx_state,
309 const struct a6xx_debugbus_block *block,
310 struct a6xx_gpu_state_obj *obj)
311 {
312 int i;
313 u32 *ptr;
314
315 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
316 if (!obj->data)
317 return;
318
319 obj->handle = block;
320
321 for (ptr = obj->data, i = 0; i < block->count; i++)
322 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
323 }
324
a6xx_get_debugbus(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)325 static void a6xx_get_debugbus(struct msm_gpu *gpu,
326 struct a6xx_gpu_state *a6xx_state)
327 {
328 struct resource *res;
329 void __iomem *cxdbg = NULL;
330 int nr_debugbus_blocks;
331
332 /* Set up the GX debug bus */
333
334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
335 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
336
337 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
338 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
339
340 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
344
345 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
346 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
347
348 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
349 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
350 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
351 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
352
353 /* Set up the CX debug bus - it lives elsewhere in the system so do a
354 * temporary ioremap for the registers
355 */
356 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
357 "cx_dbgc");
358
359 if (res)
360 cxdbg = ioremap(res->start, resource_size(res));
361
362 if (cxdbg) {
363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
364 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
365
366 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
367 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
368
369 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
370 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
371 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
373
374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
375 0x76543210);
376 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
377 0xFEDCBA98);
378
379 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
380 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
381 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
382 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
383 }
384
385 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
386 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
387
388 if (adreno_is_a650_family(to_adreno_gpu(gpu)))
389 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
390
391 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
392 sizeof(*a6xx_state->debugbus));
393
394 if (a6xx_state->debugbus) {
395 int i;
396
397 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
398 a6xx_get_debugbus_block(gpu,
399 a6xx_state,
400 &a6xx_debugbus_blocks[i],
401 &a6xx_state->debugbus[i]);
402
403 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
404
405 /*
406 * GBIF has same debugbus as of other GPU blocks, fall back to
407 * default path if GPU uses GBIF, also GBIF uses exactly same
408 * ID as of VBIF.
409 */
410 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
411 a6xx_get_debugbus_block(gpu, a6xx_state,
412 &a6xx_gbif_debugbus_block,
413 &a6xx_state->debugbus[i]);
414
415 a6xx_state->nr_debugbus += 1;
416 }
417
418
419 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
420 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
421 a6xx_get_debugbus_block(gpu,
422 a6xx_state,
423 &a650_debugbus_blocks[i],
424 &a6xx_state->debugbus[i]);
425 }
426 }
427
428 /* Dump the VBIF debugbus on applicable targets */
429 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
430 a6xx_state->vbif_debugbus =
431 state_kcalloc(a6xx_state, 1,
432 sizeof(*a6xx_state->vbif_debugbus));
433
434 if (a6xx_state->vbif_debugbus)
435 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
436 a6xx_state->vbif_debugbus);
437 }
438
439 if (cxdbg) {
440 a6xx_state->cx_debugbus =
441 state_kcalloc(a6xx_state,
442 ARRAY_SIZE(a6xx_cx_debugbus_blocks),
443 sizeof(*a6xx_state->cx_debugbus));
444
445 if (a6xx_state->cx_debugbus) {
446 int i;
447
448 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
449 a6xx_get_cx_debugbus_block(cxdbg,
450 a6xx_state,
451 &a6xx_cx_debugbus_blocks[i],
452 &a6xx_state->cx_debugbus[i]);
453
454 a6xx_state->nr_cx_debugbus =
455 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
456 }
457
458 iounmap(cxdbg);
459 }
460 }
461
462 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
463
464 /* Read a data cluster from behind the AHB aperture */
a6xx_get_dbgahb_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_dbgahb_cluster * dbgahb,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)465 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
466 struct a6xx_gpu_state *a6xx_state,
467 const struct a6xx_dbgahb_cluster *dbgahb,
468 struct a6xx_gpu_state_obj *obj,
469 struct a6xx_crashdumper *dumper)
470 {
471 u64 *in = dumper->ptr;
472 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
473 size_t datasize;
474 int i, regcount = 0;
475
476 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
477 int j;
478
479 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
480 (dbgahb->statetype + i * 2) << 8);
481
482 for (j = 0; j < dbgahb->count; j += 2) {
483 int count = RANGE(dbgahb->registers, j);
484 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
485 dbgahb->registers[j] - (dbgahb->base >> 2);
486
487 in += CRASHDUMP_READ(in, offset, count, out);
488
489 out += count * sizeof(u32);
490
491 if (i == 0)
492 regcount += count;
493 }
494 }
495
496 CRASHDUMP_FINI(in);
497
498 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
499
500 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
501 return;
502
503 if (a6xx_crashdumper_run(gpu, dumper))
504 return;
505
506 obj->handle = dbgahb;
507 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
508 datasize);
509 }
510
a6xx_get_dbgahb_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)511 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
512 struct a6xx_gpu_state *a6xx_state,
513 struct a6xx_crashdumper *dumper)
514 {
515 int i;
516
517 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
518 ARRAY_SIZE(a6xx_dbgahb_clusters),
519 sizeof(*a6xx_state->dbgahb_clusters));
520
521 if (!a6xx_state->dbgahb_clusters)
522 return;
523
524 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
525
526 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
527 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
528 &a6xx_dbgahb_clusters[i],
529 &a6xx_state->dbgahb_clusters[i], dumper);
530 }
531
532 /* Read a data cluster from the CP aperture with the crashdumper */
a6xx_get_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_cluster * cluster,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)533 static void a6xx_get_cluster(struct msm_gpu *gpu,
534 struct a6xx_gpu_state *a6xx_state,
535 const struct a6xx_cluster *cluster,
536 struct a6xx_gpu_state_obj *obj,
537 struct a6xx_crashdumper *dumper)
538 {
539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
540 u64 *in = dumper->ptr;
541 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
542 size_t datasize;
543 int i, regcount = 0;
544 u32 id = cluster->id;
545
546 /* Skip registers that are not present on older generation */
547 if (!adreno_is_a660_family(adreno_gpu) &&
548 cluster->registers == a660_fe_cluster)
549 return;
550
551 if (adreno_is_a650_family(adreno_gpu) &&
552 cluster->registers == a6xx_ps_cluster)
553 id = CLUSTER_VPC_PS;
554
555 /* Some clusters need a selector register to be programmed too */
556 if (cluster->sel_reg)
557 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
558
559 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
560 int j;
561
562 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
563 (id << 8) | (i << 4) | i);
564
565 for (j = 0; j < cluster->count; j += 2) {
566 int count = RANGE(cluster->registers, j);
567
568 in += CRASHDUMP_READ(in, cluster->registers[j],
569 count, out);
570
571 out += count * sizeof(u32);
572
573 if (i == 0)
574 regcount += count;
575 }
576 }
577
578 CRASHDUMP_FINI(in);
579
580 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
581
582 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
583 return;
584
585 if (a6xx_crashdumper_run(gpu, dumper))
586 return;
587
588 obj->handle = cluster;
589 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
590 datasize);
591 }
592
a6xx_get_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)593 static void a6xx_get_clusters(struct msm_gpu *gpu,
594 struct a6xx_gpu_state *a6xx_state,
595 struct a6xx_crashdumper *dumper)
596 {
597 int i;
598
599 a6xx_state->clusters = state_kcalloc(a6xx_state,
600 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
601
602 if (!a6xx_state->clusters)
603 return;
604
605 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
606
607 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
608 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
609 &a6xx_state->clusters[i], dumper);
610 }
611
612 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
a6xx_get_shader_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_shader_block * block,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)613 static void a6xx_get_shader_block(struct msm_gpu *gpu,
614 struct a6xx_gpu_state *a6xx_state,
615 const struct a6xx_shader_block *block,
616 struct a6xx_gpu_state_obj *obj,
617 struct a6xx_crashdumper *dumper)
618 {
619 u64 *in = dumper->ptr;
620 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
621 int i;
622
623 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
624 return;
625
626 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
627 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
628 (block->type << 8) | i);
629
630 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
631 block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
632 }
633
634 CRASHDUMP_FINI(in);
635
636 if (a6xx_crashdumper_run(gpu, dumper))
637 return;
638
639 obj->handle = block;
640 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
641 datasize);
642 }
643
a6xx_get_shaders(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)644 static void a6xx_get_shaders(struct msm_gpu *gpu,
645 struct a6xx_gpu_state *a6xx_state,
646 struct a6xx_crashdumper *dumper)
647 {
648 int i;
649
650 a6xx_state->shaders = state_kcalloc(a6xx_state,
651 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
652
653 if (!a6xx_state->shaders)
654 return;
655
656 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
657
658 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
659 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
660 &a6xx_state->shaders[i], dumper);
661 }
662
663 /* Read registers from behind the HLSQ aperture with the crashdumper */
a6xx_get_crashdumper_hlsq_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)664 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
665 struct a6xx_gpu_state *a6xx_state,
666 const struct a6xx_registers *regs,
667 struct a6xx_gpu_state_obj *obj,
668 struct a6xx_crashdumper *dumper)
669
670 {
671 u64 *in = dumper->ptr;
672 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
673 int i, regcount = 0;
674
675 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
676
677 for (i = 0; i < regs->count; i += 2) {
678 u32 count = RANGE(regs->registers, i);
679 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
680 regs->registers[i] - (regs->val0 >> 2);
681
682 in += CRASHDUMP_READ(in, offset, count, out);
683
684 out += count * sizeof(u32);
685 regcount += count;
686 }
687
688 CRASHDUMP_FINI(in);
689
690 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
691 return;
692
693 if (a6xx_crashdumper_run(gpu, dumper))
694 return;
695
696 obj->handle = regs;
697 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
698 regcount * sizeof(u32));
699 }
700
701 /* Read a block of registers using the crashdumper */
a6xx_get_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)702 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
703 struct a6xx_gpu_state *a6xx_state,
704 const struct a6xx_registers *regs,
705 struct a6xx_gpu_state_obj *obj,
706 struct a6xx_crashdumper *dumper)
707
708 {
709 u64 *in = dumper->ptr;
710 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
711 int i, regcount = 0;
712
713 /* Skip unsupported registers on older generations */
714 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
715 (regs->registers == a660_registers))
716 return;
717
718 /* Some blocks might need to program a selector register first */
719 if (regs->val0)
720 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
721
722 for (i = 0; i < regs->count; i += 2) {
723 u32 count = RANGE(regs->registers, i);
724
725 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
726
727 out += count * sizeof(u32);
728 regcount += count;
729 }
730
731 CRASHDUMP_FINI(in);
732
733 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
734 return;
735
736 if (a6xx_crashdumper_run(gpu, dumper))
737 return;
738
739 obj->handle = regs;
740 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
741 regcount * sizeof(u32));
742 }
743
744 /* Read a block of registers via AHB */
a6xx_get_ahb_gpu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj)745 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
746 struct a6xx_gpu_state *a6xx_state,
747 const struct a6xx_registers *regs,
748 struct a6xx_gpu_state_obj *obj)
749 {
750 int i, regcount = 0, index = 0;
751
752 /* Skip unsupported registers on older generations */
753 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
754 (regs->registers == a660_registers))
755 return;
756
757 for (i = 0; i < regs->count; i += 2)
758 regcount += RANGE(regs->registers, i);
759
760 obj->handle = (const void *) regs;
761 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
762 if (!obj->data)
763 return;
764
765 for (i = 0; i < regs->count; i += 2) {
766 u32 count = RANGE(regs->registers, i);
767 int j;
768
769 for (j = 0; j < count; j++)
770 obj->data[index++] = gpu_read(gpu,
771 regs->registers[i] + j);
772 }
773 }
774
775 /* Read a block of GMU registers */
_a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,bool rscc)776 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
777 struct a6xx_gpu_state *a6xx_state,
778 const struct a6xx_registers *regs,
779 struct a6xx_gpu_state_obj *obj,
780 bool rscc)
781 {
782 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
783 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
784 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
785 int i, regcount = 0, index = 0;
786
787 for (i = 0; i < regs->count; i += 2)
788 regcount += RANGE(regs->registers, i);
789
790 obj->handle = (const void *) regs;
791 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
792 if (!obj->data)
793 return;
794
795 for (i = 0; i < regs->count; i += 2) {
796 u32 count = RANGE(regs->registers, i);
797 int j;
798
799 for (j = 0; j < count; j++) {
800 u32 offset = regs->registers[i] + j;
801 u32 val;
802
803 if (rscc)
804 val = gmu_read_rscc(gmu, offset);
805 else
806 val = gmu_read(gmu, offset);
807
808 obj->data[index++] = val;
809 }
810 }
811 }
812
a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)813 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
814 struct a6xx_gpu_state *a6xx_state)
815 {
816 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
817 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
818
819 a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
820 3, sizeof(*a6xx_state->gmu_registers));
821
822 if (!a6xx_state->gmu_registers)
823 return;
824
825 a6xx_state->nr_gmu_registers = 3;
826
827 /* Get the CX GMU registers from AHB */
828 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
829 &a6xx_state->gmu_registers[0], false);
830 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
831 &a6xx_state->gmu_registers[1], true);
832
833 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
834 return;
835
836 /* Set the fence to ALLOW mode so we can access the registers */
837 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
838
839 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
840 &a6xx_state->gmu_registers[2], false);
841 }
842
a6xx_snapshot_gmu_bo(struct a6xx_gpu_state * a6xx_state,struct a6xx_gmu_bo * bo)843 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
844 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
845 {
846 struct msm_gpu_state_bo *snapshot;
847
848 if (!bo->size)
849 return NULL;
850
851 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
852 if (!snapshot)
853 return NULL;
854
855 snapshot->iova = bo->iova;
856 snapshot->size = bo->size;
857 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
858 if (!snapshot->data)
859 return NULL;
860
861 memcpy(snapshot->data, bo->virt, bo->size);
862
863 return snapshot;
864 }
865
a6xx_snapshot_gmu_hfi_history(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)866 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
867 struct a6xx_gpu_state *a6xx_state)
868 {
869 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
870 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
871 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
872 unsigned i, j;
873
874 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
875
876 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
877 struct a6xx_hfi_queue *queue = &gmu->queues[i];
878 for (j = 0; j < HFI_HISTORY_SZ; j++) {
879 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
880 a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
881 }
882 }
883 }
884
885 #define A6XX_GBIF_REGLIST_SIZE 1
a6xx_get_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)886 static void a6xx_get_registers(struct msm_gpu *gpu,
887 struct a6xx_gpu_state *a6xx_state,
888 struct a6xx_crashdumper *dumper)
889 {
890 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
891 ARRAY_SIZE(a6xx_reglist) +
892 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
893 int index = 0;
894 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
895
896 a6xx_state->registers = state_kcalloc(a6xx_state,
897 count, sizeof(*a6xx_state->registers));
898
899 if (!a6xx_state->registers)
900 return;
901
902 a6xx_state->nr_registers = count;
903
904 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
905 a6xx_get_ahb_gpu_registers(gpu,
906 a6xx_state, &a6xx_ahb_reglist[i],
907 &a6xx_state->registers[index++]);
908
909 if (a6xx_has_gbif(adreno_gpu))
910 a6xx_get_ahb_gpu_registers(gpu,
911 a6xx_state, &a6xx_gbif_reglist,
912 &a6xx_state->registers[index++]);
913 else
914 a6xx_get_ahb_gpu_registers(gpu,
915 a6xx_state, &a6xx_vbif_reglist,
916 &a6xx_state->registers[index++]);
917 if (!dumper) {
918 /*
919 * We can't use the crashdumper when the SMMU is stalled,
920 * because the GPU has no memory access until we resume
921 * translation (but we don't want to do that until after
922 * we have captured as much useful GPU state as possible).
923 * So instead collect registers via the CPU:
924 */
925 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
926 a6xx_get_ahb_gpu_registers(gpu,
927 a6xx_state, &a6xx_reglist[i],
928 &a6xx_state->registers[index++]);
929 return;
930 }
931
932 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
933 a6xx_get_crashdumper_registers(gpu,
934 a6xx_state, &a6xx_reglist[i],
935 &a6xx_state->registers[index++],
936 dumper);
937
938 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
939 a6xx_get_crashdumper_hlsq_registers(gpu,
940 a6xx_state, &a6xx_hlsq_reglist[i],
941 &a6xx_state->registers[index++],
942 dumper);
943 }
944
a6xx_get_cp_roq_size(struct msm_gpu * gpu)945 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
946 {
947 /* The value at [16:31] is in 4dword units. Convert it to dwords */
948 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
949 }
950
951 /* Read a block of data from an indexed register pair */
a6xx_get_indexed_regs(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_indexed_registers * indexed,struct a6xx_gpu_state_obj * obj)952 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
953 struct a6xx_gpu_state *a6xx_state,
954 struct a6xx_indexed_registers *indexed,
955 struct a6xx_gpu_state_obj *obj)
956 {
957 int i;
958
959 obj->handle = (const void *) indexed;
960 if (indexed->count_fn)
961 indexed->count = indexed->count_fn(gpu);
962
963 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
964 if (!obj->data)
965 return;
966
967 /* All the indexed banks start at address 0 */
968 gpu_write(gpu, indexed->addr, 0);
969
970 /* Read the data - each read increments the internal address by 1 */
971 for (i = 0; i < indexed->count; i++)
972 obj->data[i] = gpu_read(gpu, indexed->data);
973 }
974
a6xx_get_indexed_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)975 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
976 struct a6xx_gpu_state *a6xx_state)
977 {
978 u32 mempool_size;
979 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
980 int i;
981
982 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
983 sizeof(*a6xx_state->indexed_regs));
984 if (!a6xx_state->indexed_regs)
985 return;
986
987 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
988 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
989 &a6xx_state->indexed_regs[i]);
990
991 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
992 u32 val;
993
994 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
995 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
996
997 /* Get the contents of the CP mempool */
998 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
999 &a6xx_state->indexed_regs[i]);
1000
1001 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1002 a6xx_state->nr_indexed_regs = count;
1003 return;
1004 }
1005
1006 /* Set the CP mempool size to 0 to stabilize it while dumping */
1007 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1008 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1009
1010 /* Get the contents of the CP mempool */
1011 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1012 &a6xx_state->indexed_regs[i]);
1013
1014 /*
1015 * Offset 0x2000 in the mempool is the size - copy the saved size over
1016 * so the data is consistent
1017 */
1018 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1019
1020 /* Restore the size in the hardware */
1021 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1022
1023 a6xx_state->nr_indexed_regs = count;
1024 }
1025
a6xx_gpu_state_get(struct msm_gpu * gpu)1026 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1027 {
1028 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1029 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1030 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1031 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1032 GFP_KERNEL);
1033 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1034 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1035
1036 if (!a6xx_state)
1037 return ERR_PTR(-ENOMEM);
1038
1039 INIT_LIST_HEAD(&a6xx_state->objs);
1040
1041 /* Get the generic state from the adreno core */
1042 adreno_gpu_state_get(gpu, &a6xx_state->base);
1043
1044 if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1045 a6xx_get_gmu_registers(gpu, a6xx_state);
1046
1047 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1048 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1049 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1050
1051 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1052 }
1053
1054 /* If GX isn't on the rest of the data isn't going to be accessible */
1055 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1056 return &a6xx_state->base;
1057
1058 /* Get the banks of indexed registers */
1059 a6xx_get_indexed_registers(gpu, a6xx_state);
1060
1061 /*
1062 * Try to initialize the crashdumper, if we are not dumping state
1063 * with the SMMU stalled. The crashdumper needs memory access to
1064 * write out GPU state, so we need to skip this when the SMMU is
1065 * stalled in response to an iova fault
1066 */
1067 if (!stalled && !gpu->needs_hw_init &&
1068 !a6xx_crashdumper_init(gpu, &_dumper)) {
1069 dumper = &_dumper;
1070 }
1071
1072 a6xx_get_registers(gpu, a6xx_state, dumper);
1073
1074 if (dumper) {
1075 a6xx_get_shaders(gpu, a6xx_state, dumper);
1076 a6xx_get_clusters(gpu, a6xx_state, dumper);
1077 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1078
1079 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1080 }
1081
1082 if (snapshot_debugbus)
1083 a6xx_get_debugbus(gpu, a6xx_state);
1084
1085 a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1086
1087 return &a6xx_state->base;
1088 }
1089
a6xx_gpu_state_destroy(struct kref * kref)1090 static void a6xx_gpu_state_destroy(struct kref *kref)
1091 {
1092 struct a6xx_state_memobj *obj, *tmp;
1093 struct msm_gpu_state *state = container_of(kref,
1094 struct msm_gpu_state, ref);
1095 struct a6xx_gpu_state *a6xx_state = container_of(state,
1096 struct a6xx_gpu_state, base);
1097
1098 if (a6xx_state->gmu_log)
1099 kvfree(a6xx_state->gmu_log->data);
1100
1101 if (a6xx_state->gmu_hfi)
1102 kvfree(a6xx_state->gmu_hfi->data);
1103
1104 if (a6xx_state->gmu_debug)
1105 kvfree(a6xx_state->gmu_debug->data);
1106
1107 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1108 list_del(&obj->node);
1109 kvfree(obj);
1110 }
1111
1112 adreno_gpu_state_destroy(state);
1113 kfree(a6xx_state);
1114 }
1115
a6xx_gpu_state_put(struct msm_gpu_state * state)1116 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1117 {
1118 if (IS_ERR_OR_NULL(state))
1119 return 1;
1120
1121 return kref_put(&state->ref, a6xx_gpu_state_destroy);
1122 }
1123
a6xx_show_registers(const u32 * registers,u32 * data,size_t count,struct drm_printer * p)1124 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1125 struct drm_printer *p)
1126 {
1127 int i, index = 0;
1128
1129 if (!data)
1130 return;
1131
1132 for (i = 0; i < count; i += 2) {
1133 u32 count = RANGE(registers, i);
1134 u32 offset = registers[i];
1135 int j;
1136
1137 for (j = 0; j < count; index++, offset++, j++) {
1138 if (data[index] == 0xdeafbead)
1139 continue;
1140
1141 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1142 offset << 2, data[index]);
1143 }
1144 }
1145 }
1146
print_ascii85(struct drm_printer * p,size_t len,u32 * data)1147 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1148 {
1149 char out[ASCII85_BUFSZ];
1150 long i, l, datalen = 0;
1151
1152 for (i = 0; i < len >> 2; i++) {
1153 if (data[i])
1154 datalen = (i + 1) << 2;
1155 }
1156
1157 if (datalen == 0)
1158 return;
1159
1160 drm_puts(p, " data: !!ascii85 |\n");
1161 drm_puts(p, " ");
1162
1163
1164 l = ascii85_encode_len(datalen);
1165
1166 for (i = 0; i < l; i++)
1167 drm_puts(p, ascii85_encode(data[i], out));
1168
1169 drm_puts(p, "\n");
1170 }
1171
print_name(struct drm_printer * p,const char * fmt,const char * name)1172 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1173 {
1174 drm_puts(p, fmt);
1175 drm_puts(p, name);
1176 drm_puts(p, "\n");
1177 }
1178
a6xx_show_shader(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1179 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1180 struct drm_printer *p)
1181 {
1182 const struct a6xx_shader_block *block = obj->handle;
1183 int i;
1184
1185 if (!obj->handle)
1186 return;
1187
1188 print_name(p, " - type: ", block->name);
1189
1190 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1191 drm_printf(p, " - bank: %d\n", i);
1192 drm_printf(p, " size: %d\n", block->size);
1193
1194 if (!obj->data)
1195 continue;
1196
1197 print_ascii85(p, block->size << 2,
1198 obj->data + (block->size * i));
1199 }
1200 }
1201
a6xx_show_cluster_data(const u32 * registers,int size,u32 * data,struct drm_printer * p)1202 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1203 struct drm_printer *p)
1204 {
1205 int ctx, index = 0;
1206
1207 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1208 int j;
1209
1210 drm_printf(p, " - context: %d\n", ctx);
1211
1212 for (j = 0; j < size; j += 2) {
1213 u32 count = RANGE(registers, j);
1214 u32 offset = registers[j];
1215 int k;
1216
1217 for (k = 0; k < count; index++, offset++, k++) {
1218 if (data[index] == 0xdeafbead)
1219 continue;
1220
1221 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1222 offset << 2, data[index]);
1223 }
1224 }
1225 }
1226 }
1227
a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1228 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1229 struct drm_printer *p)
1230 {
1231 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1232
1233 if (dbgahb) {
1234 print_name(p, " - cluster-name: ", dbgahb->name);
1235 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1236 obj->data, p);
1237 }
1238 }
1239
a6xx_show_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1240 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1241 struct drm_printer *p)
1242 {
1243 const struct a6xx_cluster *cluster = obj->handle;
1244
1245 if (cluster) {
1246 print_name(p, " - cluster-name: ", cluster->name);
1247 a6xx_show_cluster_data(cluster->registers, cluster->count,
1248 obj->data, p);
1249 }
1250 }
1251
a6xx_show_indexed_regs(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1252 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1253 struct drm_printer *p)
1254 {
1255 const struct a6xx_indexed_registers *indexed = obj->handle;
1256
1257 if (!indexed)
1258 return;
1259
1260 print_name(p, " - regs-name: ", indexed->name);
1261 drm_printf(p, " dwords: %d\n", indexed->count);
1262
1263 print_ascii85(p, indexed->count << 2, obj->data);
1264 }
1265
a6xx_show_debugbus_block(const struct a6xx_debugbus_block * block,u32 * data,struct drm_printer * p)1266 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1267 u32 *data, struct drm_printer *p)
1268 {
1269 if (block) {
1270 print_name(p, " - debugbus-block: ", block->name);
1271
1272 /*
1273 * count for regular debugbus data is in quadwords,
1274 * but print the size in dwords for consistency
1275 */
1276 drm_printf(p, " count: %d\n", block->count << 1);
1277
1278 print_ascii85(p, block->count << 3, data);
1279 }
1280 }
1281
a6xx_show_debugbus(struct a6xx_gpu_state * a6xx_state,struct drm_printer * p)1282 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1283 struct drm_printer *p)
1284 {
1285 int i;
1286
1287 for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1288 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1289
1290 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1291 }
1292
1293 if (a6xx_state->vbif_debugbus) {
1294 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1295
1296 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1297 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1298
1299 /* vbif debugbus data is in dwords. Confusing, huh? */
1300 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1301 }
1302
1303 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1304 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1305
1306 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1307 }
1308 }
1309
a6xx_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)1310 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1311 struct drm_printer *p)
1312 {
1313 struct a6xx_gpu_state *a6xx_state = container_of(state,
1314 struct a6xx_gpu_state, base);
1315 int i;
1316
1317 if (IS_ERR_OR_NULL(state))
1318 return;
1319
1320 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1321
1322 adreno_show(gpu, state, p);
1323
1324 drm_puts(p, "gmu-log:\n");
1325 if (a6xx_state->gmu_log) {
1326 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1327
1328 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
1329 drm_printf(p, " size: %zu\n", gmu_log->size);
1330 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1331 &gmu_log->encoded);
1332 }
1333
1334 drm_puts(p, "gmu-hfi:\n");
1335 if (a6xx_state->gmu_hfi) {
1336 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1337 unsigned i, j;
1338
1339 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
1340 drm_printf(p, " size: %zu\n", gmu_hfi->size);
1341 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1342 drm_printf(p, " queue-history[%u]:", i);
1343 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1344 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1345 }
1346 drm_printf(p, "\n");
1347 }
1348 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1349 &gmu_hfi->encoded);
1350 }
1351
1352 drm_puts(p, "gmu-debug:\n");
1353 if (a6xx_state->gmu_debug) {
1354 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1355
1356 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova);
1357 drm_printf(p, " size: %zu\n", gmu_debug->size);
1358 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1359 &gmu_debug->encoded);
1360 }
1361
1362 drm_puts(p, "registers:\n");
1363 for (i = 0; i < a6xx_state->nr_registers; i++) {
1364 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1365 const struct a6xx_registers *regs = obj->handle;
1366
1367 if (!obj->handle)
1368 continue;
1369
1370 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1371 }
1372
1373 drm_puts(p, "registers-gmu:\n");
1374 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1375 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1376 const struct a6xx_registers *regs = obj->handle;
1377
1378 if (!obj->handle)
1379 continue;
1380
1381 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1382 }
1383
1384 drm_puts(p, "indexed-registers:\n");
1385 for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1386 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1387
1388 drm_puts(p, "shader-blocks:\n");
1389 for (i = 0; i < a6xx_state->nr_shaders; i++)
1390 a6xx_show_shader(&a6xx_state->shaders[i], p);
1391
1392 drm_puts(p, "clusters:\n");
1393 for (i = 0; i < a6xx_state->nr_clusters; i++)
1394 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1395
1396 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1397 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1398
1399 drm_puts(p, "debugbus:\n");
1400 a6xx_show_debugbus(a6xx_state, p);
1401 }
1402