1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 struct a6xx_gpu_state_obj {
12 	const void *handle;
13 	u32 *data;
14 };
15 
16 struct a6xx_gpu_state {
17 	struct msm_gpu_state base;
18 
19 	struct a6xx_gpu_state_obj *gmu_registers;
20 	int nr_gmu_registers;
21 
22 	struct a6xx_gpu_state_obj *registers;
23 	int nr_registers;
24 
25 	struct a6xx_gpu_state_obj *shaders;
26 	int nr_shaders;
27 
28 	struct a6xx_gpu_state_obj *clusters;
29 	int nr_clusters;
30 
31 	struct a6xx_gpu_state_obj *dbgahb_clusters;
32 	int nr_dbgahb_clusters;
33 
34 	struct a6xx_gpu_state_obj *indexed_regs;
35 	int nr_indexed_regs;
36 
37 	struct a6xx_gpu_state_obj *debugbus;
38 	int nr_debugbus;
39 
40 	struct a6xx_gpu_state_obj *vbif_debugbus;
41 
42 	struct a6xx_gpu_state_obj *cx_debugbus;
43 	int nr_cx_debugbus;
44 
45 	struct msm_gpu_state_bo *gmu_log;
46 	struct msm_gpu_state_bo *gmu_hfi;
47 	struct msm_gpu_state_bo *gmu_debug;
48 
49 	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50 
51 	struct list_head objs;
52 
53 	bool gpu_initialized;
54 };
55 
56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58 	in[0] = val;
59 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60 
61 	return 2;
62 }
63 
64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66 	in[0] = target;
67 	in[1] = (((u64) reg) << 44 | dwords);
68 
69 	return 2;
70 }
71 
72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74 	in[0] = 0;
75 	in[1] = 0;
76 
77 	return 2;
78 }
79 
80 struct a6xx_crashdumper {
81 	void *ptr;
82 	struct drm_gem_object *bo;
83 	u64 iova;
84 };
85 
86 struct a6xx_state_memobj {
87 	struct list_head node;
88 	unsigned long long data[];
89 };
90 
91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93 	struct a6xx_state_memobj *obj =
94 		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95 
96 	if (!obj)
97 		return NULL;
98 
99 	list_add_tail(&obj->node, &a6xx_state->objs);
100 	return &obj->data;
101 }
102 
103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104 		size_t size)
105 {
106 	void *dst = state_kcalloc(a6xx_state, 1, size);
107 
108 	if (dst)
109 		memcpy(dst, src, size);
110 	return dst;
111 }
112 
113 /*
114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115  * the rest for the data
116  */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
119 
120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121 		struct a6xx_crashdumper *dumper)
122 {
123 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
124 		SZ_1M, MSM_BO_WC, gpu->aspace,
125 		&dumper->bo, &dumper->iova);
126 
127 	if (!IS_ERR(dumper->ptr))
128 		msm_gem_object_set_name(dumper->bo, "crashdump");
129 
130 	return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132 
133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134 		struct a6xx_crashdumper *dumper)
135 {
136 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138 	u32 val;
139 	int ret;
140 
141 	if (IS_ERR_OR_NULL(dumper->ptr))
142 		return -EINVAL;
143 
144 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145 		return -EINVAL;
146 
147 	/* Make sure all pending memory writes are posted */
148 	wmb();
149 
150 	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
151 
152 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
153 
154 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
155 		val & 0x02, 100, 10000);
156 
157 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
158 
159 	return ret;
160 }
161 
162 /* read a value from the GX debug bus */
163 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
164 		u32 *data)
165 {
166 	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
167 		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
168 
169 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
170 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
171 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
172 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
173 
174 	/* Wait 1 us to make sure the data is flowing */
175 	udelay(1);
176 
177 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
178 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
179 
180 	return 2;
181 }
182 
183 #define cxdbg_write(ptr, offset, val) \
184 	msm_writel((val), (ptr) + ((offset) << 2))
185 
186 #define cxdbg_read(ptr, offset) \
187 	msm_readl((ptr) + ((offset) << 2))
188 
189 /* read a value from the CX debug bus */
190 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
191 		u32 *data)
192 {
193 	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
194 		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
195 
196 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
197 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
198 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
199 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
200 
201 	/* Wait 1 us to make sure the data is flowing */
202 	udelay(1);
203 
204 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
205 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
206 
207 	return 2;
208 }
209 
210 /* Read a chunk of data from the VBIF debug bus */
211 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
212 		u32 reg, int count, u32 *data)
213 {
214 	int i;
215 
216 	gpu_write(gpu, ctrl0, reg);
217 
218 	for (i = 0; i < count; i++) {
219 		gpu_write(gpu, ctrl1, i);
220 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
221 	}
222 
223 	return count;
224 }
225 
226 #define AXI_ARB_BLOCKS 2
227 #define XIN_AXI_BLOCKS 5
228 #define XIN_CORE_BLOCKS 4
229 
230 #define VBIF_DEBUGBUS_BLOCK_SIZE \
231 	((16 * AXI_ARB_BLOCKS) + \
232 	 (18 * XIN_AXI_BLOCKS) + \
233 	 (12 * XIN_CORE_BLOCKS))
234 
235 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
236 		struct a6xx_gpu_state *a6xx_state,
237 		struct a6xx_gpu_state_obj *obj)
238 {
239 	u32 clk, *ptr;
240 	int i;
241 
242 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
243 		sizeof(u32));
244 	if (!obj->data)
245 		return;
246 
247 	obj->handle = NULL;
248 
249 	/* Get the current clock setting */
250 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
251 
252 	/* Force on the bus so we can read it */
253 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
254 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
255 
256 	/* We will read from BUS2 first, so disable BUS1 */
257 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
258 
259 	/* Enable the VBIF bus for reading */
260 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
261 
262 	ptr = obj->data;
263 
264 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
265 		ptr += vbif_debugbus_read(gpu,
266 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
267 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
268 			1 << (i + 16), 16, ptr);
269 
270 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
271 		ptr += vbif_debugbus_read(gpu,
272 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
273 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
274 			1 << i, 18, ptr);
275 
276 	/* Stop BUS2 so we can turn on BUS1 */
277 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
278 
279 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
280 		ptr += vbif_debugbus_read(gpu,
281 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
282 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
283 			1 << i, 12, ptr);
284 
285 	/* Restore the VBIF clock setting */
286 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
287 }
288 
289 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
290 		struct a6xx_gpu_state *a6xx_state,
291 		const struct a6xx_debugbus_block *block,
292 		struct a6xx_gpu_state_obj *obj)
293 {
294 	int i;
295 	u32 *ptr;
296 
297 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
298 	if (!obj->data)
299 		return;
300 
301 	obj->handle = block;
302 
303 	for (ptr = obj->data, i = 0; i < block->count; i++)
304 		ptr += debugbus_read(gpu, block->id, i, ptr);
305 }
306 
307 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
308 		struct a6xx_gpu_state *a6xx_state,
309 		const struct a6xx_debugbus_block *block,
310 		struct a6xx_gpu_state_obj *obj)
311 {
312 	int i;
313 	u32 *ptr;
314 
315 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
316 	if (!obj->data)
317 		return;
318 
319 	obj->handle = block;
320 
321 	for (ptr = obj->data, i = 0; i < block->count; i++)
322 		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
323 }
324 
325 static void a6xx_get_debugbus(struct msm_gpu *gpu,
326 		struct a6xx_gpu_state *a6xx_state)
327 {
328 	struct resource *res;
329 	void __iomem *cxdbg = NULL;
330 	int nr_debugbus_blocks;
331 
332 	/* Set up the GX debug bus */
333 
334 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
335 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
336 
337 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
338 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
339 
340 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
341 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
342 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
343 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
344 
345 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
346 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
347 
348 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
349 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
350 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
351 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
352 
353 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
354 	 * temporary ioremap for the registers
355 	 */
356 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
357 			"cx_dbgc");
358 
359 	if (res)
360 		cxdbg = ioremap(res->start, resource_size(res));
361 
362 	if (cxdbg) {
363 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
364 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
365 
366 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
367 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
368 
369 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
370 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
371 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
372 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
373 
374 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
375 			0x76543210);
376 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
377 			0xFEDCBA98);
378 
379 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
380 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
381 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
382 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
383 	}
384 
385 	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
386 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
387 
388 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
389 			sizeof(*a6xx_state->debugbus));
390 
391 	if (a6xx_state->debugbus) {
392 		int i;
393 
394 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
395 			a6xx_get_debugbus_block(gpu,
396 				a6xx_state,
397 				&a6xx_debugbus_blocks[i],
398 				&a6xx_state->debugbus[i]);
399 
400 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
401 
402 		/*
403 		 * GBIF has same debugbus as of other GPU blocks, fall back to
404 		 * default path if GPU uses GBIF, also GBIF uses exactly same
405 		 * ID as of VBIF.
406 		 */
407 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
408 			a6xx_get_debugbus_block(gpu, a6xx_state,
409 				&a6xx_gbif_debugbus_block,
410 				&a6xx_state->debugbus[i]);
411 
412 			a6xx_state->nr_debugbus += 1;
413 		}
414 	}
415 
416 	/*  Dump the VBIF debugbus on applicable targets */
417 	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
418 		a6xx_state->vbif_debugbus =
419 			state_kcalloc(a6xx_state, 1,
420 					sizeof(*a6xx_state->vbif_debugbus));
421 
422 		if (a6xx_state->vbif_debugbus)
423 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
424 					a6xx_state->vbif_debugbus);
425 	}
426 
427 	if (cxdbg) {
428 		a6xx_state->cx_debugbus =
429 			state_kcalloc(a6xx_state,
430 			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
431 			sizeof(*a6xx_state->cx_debugbus));
432 
433 		if (a6xx_state->cx_debugbus) {
434 			int i;
435 
436 			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
437 				a6xx_get_cx_debugbus_block(cxdbg,
438 					a6xx_state,
439 					&a6xx_cx_debugbus_blocks[i],
440 					&a6xx_state->cx_debugbus[i]);
441 
442 			a6xx_state->nr_cx_debugbus =
443 				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
444 		}
445 
446 		iounmap(cxdbg);
447 	}
448 }
449 
450 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
451 
452 /* Read a data cluster from behind the AHB aperture */
453 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
454 		struct a6xx_gpu_state *a6xx_state,
455 		const struct a6xx_dbgahb_cluster *dbgahb,
456 		struct a6xx_gpu_state_obj *obj,
457 		struct a6xx_crashdumper *dumper)
458 {
459 	u64 *in = dumper->ptr;
460 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
461 	size_t datasize;
462 	int i, regcount = 0;
463 
464 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
465 		int j;
466 
467 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
468 			(dbgahb->statetype + i * 2) << 8);
469 
470 		for (j = 0; j < dbgahb->count; j += 2) {
471 			int count = RANGE(dbgahb->registers, j);
472 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
473 				dbgahb->registers[j] - (dbgahb->base >> 2);
474 
475 			in += CRASHDUMP_READ(in, offset, count, out);
476 
477 			out += count * sizeof(u32);
478 
479 			if (i == 0)
480 				regcount += count;
481 		}
482 	}
483 
484 	CRASHDUMP_FINI(in);
485 
486 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
487 
488 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
489 		return;
490 
491 	if (a6xx_crashdumper_run(gpu, dumper))
492 		return;
493 
494 	obj->handle = dbgahb;
495 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
496 		datasize);
497 }
498 
499 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
500 		struct a6xx_gpu_state *a6xx_state,
501 		struct a6xx_crashdumper *dumper)
502 {
503 	int i;
504 
505 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
506 		ARRAY_SIZE(a6xx_dbgahb_clusters),
507 		sizeof(*a6xx_state->dbgahb_clusters));
508 
509 	if (!a6xx_state->dbgahb_clusters)
510 		return;
511 
512 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
513 
514 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
515 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
516 			&a6xx_dbgahb_clusters[i],
517 			&a6xx_state->dbgahb_clusters[i], dumper);
518 }
519 
520 /* Read a data cluster from the CP aperture with the crashdumper */
521 static void a6xx_get_cluster(struct msm_gpu *gpu,
522 		struct a6xx_gpu_state *a6xx_state,
523 		const struct a6xx_cluster *cluster,
524 		struct a6xx_gpu_state_obj *obj,
525 		struct a6xx_crashdumper *dumper)
526 {
527 	u64 *in = dumper->ptr;
528 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
529 	size_t datasize;
530 	int i, regcount = 0;
531 
532 	/* Some clusters need a selector register to be programmed too */
533 	if (cluster->sel_reg)
534 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
535 
536 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
537 		int j;
538 
539 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
540 			(cluster->id << 8) | (i << 4) | i);
541 
542 		for (j = 0; j < cluster->count; j += 2) {
543 			int count = RANGE(cluster->registers, j);
544 
545 			in += CRASHDUMP_READ(in, cluster->registers[j],
546 				count, out);
547 
548 			out += count * sizeof(u32);
549 
550 			if (i == 0)
551 				regcount += count;
552 		}
553 	}
554 
555 	CRASHDUMP_FINI(in);
556 
557 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
558 
559 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
560 		return;
561 
562 	if (a6xx_crashdumper_run(gpu, dumper))
563 		return;
564 
565 	obj->handle = cluster;
566 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
567 		datasize);
568 }
569 
570 static void a6xx_get_clusters(struct msm_gpu *gpu,
571 		struct a6xx_gpu_state *a6xx_state,
572 		struct a6xx_crashdumper *dumper)
573 {
574 	int i;
575 
576 	a6xx_state->clusters = state_kcalloc(a6xx_state,
577 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
578 
579 	if (!a6xx_state->clusters)
580 		return;
581 
582 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
583 
584 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
585 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
586 			&a6xx_state->clusters[i], dumper);
587 }
588 
589 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
590 static void a6xx_get_shader_block(struct msm_gpu *gpu,
591 		struct a6xx_gpu_state *a6xx_state,
592 		const struct a6xx_shader_block *block,
593 		struct a6xx_gpu_state_obj *obj,
594 		struct a6xx_crashdumper *dumper)
595 {
596 	u64 *in = dumper->ptr;
597 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
598 	int i;
599 
600 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
601 		return;
602 
603 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
604 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
605 			(block->type << 8) | i);
606 
607 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
608 			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
609 	}
610 
611 	CRASHDUMP_FINI(in);
612 
613 	if (a6xx_crashdumper_run(gpu, dumper))
614 		return;
615 
616 	obj->handle = block;
617 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
618 		datasize);
619 }
620 
621 static void a6xx_get_shaders(struct msm_gpu *gpu,
622 		struct a6xx_gpu_state *a6xx_state,
623 		struct a6xx_crashdumper *dumper)
624 {
625 	int i;
626 
627 	a6xx_state->shaders = state_kcalloc(a6xx_state,
628 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
629 
630 	if (!a6xx_state->shaders)
631 		return;
632 
633 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
634 
635 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
636 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
637 			&a6xx_state->shaders[i], dumper);
638 }
639 
640 /* Read registers from behind the HLSQ aperture with the crashdumper */
641 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
642 		struct a6xx_gpu_state *a6xx_state,
643 		const struct a6xx_registers *regs,
644 		struct a6xx_gpu_state_obj *obj,
645 		struct a6xx_crashdumper *dumper)
646 
647 {
648 	u64 *in = dumper->ptr;
649 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
650 	int i, regcount = 0;
651 
652 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
653 
654 	for (i = 0; i < regs->count; i += 2) {
655 		u32 count = RANGE(regs->registers, i);
656 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
657 			regs->registers[i] - (regs->val0 >> 2);
658 
659 		in += CRASHDUMP_READ(in, offset, count, out);
660 
661 		out += count * sizeof(u32);
662 		regcount += count;
663 	}
664 
665 	CRASHDUMP_FINI(in);
666 
667 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
668 		return;
669 
670 	if (a6xx_crashdumper_run(gpu, dumper))
671 		return;
672 
673 	obj->handle = regs;
674 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
675 		regcount * sizeof(u32));
676 }
677 
678 /* Read a block of registers using the crashdumper */
679 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
680 		struct a6xx_gpu_state *a6xx_state,
681 		const struct a6xx_registers *regs,
682 		struct a6xx_gpu_state_obj *obj,
683 		struct a6xx_crashdumper *dumper)
684 
685 {
686 	u64 *in = dumper->ptr;
687 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
688 	int i, regcount = 0;
689 
690 	/* Some blocks might need to program a selector register first */
691 	if (regs->val0)
692 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
693 
694 	for (i = 0; i < regs->count; i += 2) {
695 		u32 count = RANGE(regs->registers, i);
696 
697 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
698 
699 		out += count * sizeof(u32);
700 		regcount += count;
701 	}
702 
703 	CRASHDUMP_FINI(in);
704 
705 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
706 		return;
707 
708 	if (a6xx_crashdumper_run(gpu, dumper))
709 		return;
710 
711 	obj->handle = regs;
712 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
713 		regcount * sizeof(u32));
714 }
715 
716 /* Read a block of registers via AHB */
717 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
718 		struct a6xx_gpu_state *a6xx_state,
719 		const struct a6xx_registers *regs,
720 		struct a6xx_gpu_state_obj *obj)
721 {
722 	int i, regcount = 0, index = 0;
723 
724 	for (i = 0; i < regs->count; i += 2)
725 		regcount += RANGE(regs->registers, i);
726 
727 	obj->handle = (const void *) regs;
728 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
729 	if (!obj->data)
730 		return;
731 
732 	for (i = 0; i < regs->count; i += 2) {
733 		u32 count = RANGE(regs->registers, i);
734 		int j;
735 
736 		for (j = 0; j < count; j++)
737 			obj->data[index++] = gpu_read(gpu,
738 				regs->registers[i] + j);
739 	}
740 }
741 
742 /* Read a block of GMU registers */
743 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
744 		struct a6xx_gpu_state *a6xx_state,
745 		const struct a6xx_registers *regs,
746 		struct a6xx_gpu_state_obj *obj,
747 		bool rscc)
748 {
749 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
750 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
751 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
752 	int i, regcount = 0, index = 0;
753 
754 	for (i = 0; i < regs->count; i += 2)
755 		regcount += RANGE(regs->registers, i);
756 
757 	obj->handle = (const void *) regs;
758 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
759 	if (!obj->data)
760 		return;
761 
762 	for (i = 0; i < regs->count; i += 2) {
763 		u32 count = RANGE(regs->registers, i);
764 		int j;
765 
766 		for (j = 0; j < count; j++) {
767 			u32 offset = regs->registers[i] + j;
768 			u32 val;
769 
770 			if (rscc)
771 				val = gmu_read_rscc(gmu, offset);
772 			else
773 				val = gmu_read(gmu, offset);
774 
775 			obj->data[index++] = val;
776 		}
777 	}
778 }
779 
780 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
781 		struct a6xx_gpu_state *a6xx_state)
782 {
783 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
784 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
785 
786 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
787 		3, sizeof(*a6xx_state->gmu_registers));
788 
789 	if (!a6xx_state->gmu_registers)
790 		return;
791 
792 	a6xx_state->nr_gmu_registers = 3;
793 
794 	/* Get the CX GMU registers from AHB */
795 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
796 		&a6xx_state->gmu_registers[0], false);
797 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
798 		&a6xx_state->gmu_registers[1], true);
799 
800 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
801 		return;
802 
803 	/* Set the fence to ALLOW mode so we can access the registers */
804 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
805 
806 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
807 		&a6xx_state->gmu_registers[2], false);
808 }
809 
810 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
811 		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
812 {
813 	struct msm_gpu_state_bo *snapshot;
814 
815 	if (!bo->size)
816 		return NULL;
817 
818 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
819 	if (!snapshot)
820 		return NULL;
821 
822 	snapshot->iova = bo->iova;
823 	snapshot->size = bo->size;
824 	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
825 	if (!snapshot->data)
826 		return NULL;
827 
828 	memcpy(snapshot->data, bo->virt, bo->size);
829 
830 	return snapshot;
831 }
832 
833 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
834 					  struct a6xx_gpu_state *a6xx_state)
835 {
836 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
837 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
838 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
839 	unsigned i, j;
840 
841 	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
842 
843 	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
844 		struct a6xx_hfi_queue *queue = &gmu->queues[i];
845 		for (j = 0; j < HFI_HISTORY_SZ; j++) {
846 			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
847 			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
848 		}
849 	}
850 }
851 
852 #define A6XX_GBIF_REGLIST_SIZE   1
853 static void a6xx_get_registers(struct msm_gpu *gpu,
854 		struct a6xx_gpu_state *a6xx_state,
855 		struct a6xx_crashdumper *dumper)
856 {
857 	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
858 		ARRAY_SIZE(a6xx_reglist) +
859 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
860 	int index = 0;
861 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
862 
863 	a6xx_state->registers = state_kcalloc(a6xx_state,
864 		count, sizeof(*a6xx_state->registers));
865 
866 	if (!a6xx_state->registers)
867 		return;
868 
869 	a6xx_state->nr_registers = count;
870 
871 	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
872 		a6xx_get_ahb_gpu_registers(gpu,
873 			a6xx_state, &a6xx_ahb_reglist[i],
874 			&a6xx_state->registers[index++]);
875 
876 	if (a6xx_has_gbif(adreno_gpu))
877 		a6xx_get_ahb_gpu_registers(gpu,
878 				a6xx_state, &a6xx_gbif_reglist,
879 				&a6xx_state->registers[index++]);
880 	else
881 		a6xx_get_ahb_gpu_registers(gpu,
882 				a6xx_state, &a6xx_vbif_reglist,
883 				&a6xx_state->registers[index++]);
884 	if (!dumper) {
885 		/*
886 		 * We can't use the crashdumper when the SMMU is stalled,
887 		 * because the GPU has no memory access until we resume
888 		 * translation (but we don't want to do that until after
889 		 * we have captured as much useful GPU state as possible).
890 		 * So instead collect registers via the CPU:
891 		 */
892 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
893 			a6xx_get_ahb_gpu_registers(gpu,
894 				a6xx_state, &a6xx_reglist[i],
895 				&a6xx_state->registers[index++]);
896 		return;
897 	}
898 
899 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
900 		a6xx_get_crashdumper_registers(gpu,
901 			a6xx_state, &a6xx_reglist[i],
902 			&a6xx_state->registers[index++],
903 			dumper);
904 
905 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
906 		a6xx_get_crashdumper_hlsq_registers(gpu,
907 			a6xx_state, &a6xx_hlsq_reglist[i],
908 			&a6xx_state->registers[index++],
909 			dumper);
910 }
911 
912 /* Read a block of data from an indexed register pair */
913 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
914 		struct a6xx_gpu_state *a6xx_state,
915 		const struct a6xx_indexed_registers *indexed,
916 		struct a6xx_gpu_state_obj *obj)
917 {
918 	int i;
919 
920 	obj->handle = (const void *) indexed;
921 	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
922 	if (!obj->data)
923 		return;
924 
925 	/* All the indexed banks start at address 0 */
926 	gpu_write(gpu, indexed->addr, 0);
927 
928 	/* Read the data - each read increments the internal address by 1 */
929 	for (i = 0; i < indexed->count; i++)
930 		obj->data[i] = gpu_read(gpu, indexed->data);
931 }
932 
933 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
934 		struct a6xx_gpu_state *a6xx_state)
935 {
936 	u32 mempool_size;
937 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
938 	int i;
939 
940 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
941 		sizeof(*a6xx_state->indexed_regs));
942 	if (!a6xx_state->indexed_regs)
943 		return;
944 
945 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
946 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
947 			&a6xx_state->indexed_regs[i]);
948 
949 	/* Set the CP mempool size to 0 to stabilize it while dumping */
950 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
951 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
952 
953 	/* Get the contents of the CP mempool */
954 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
955 		&a6xx_state->indexed_regs[i]);
956 
957 	/*
958 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
959 	 * so the data is consistent
960 	 */
961 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
962 
963 	/* Restore the size in the hardware */
964 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
965 
966 	a6xx_state->nr_indexed_regs = count;
967 }
968 
969 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
970 {
971 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
972 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
973 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
974 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
975 		GFP_KERNEL);
976 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
977 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
978 
979 	if (!a6xx_state)
980 		return ERR_PTR(-ENOMEM);
981 
982 	INIT_LIST_HEAD(&a6xx_state->objs);
983 
984 	/* Get the generic state from the adreno core */
985 	adreno_gpu_state_get(gpu, &a6xx_state->base);
986 
987 	a6xx_get_gmu_registers(gpu, a6xx_state);
988 
989 	a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
990 	a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
991 	a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
992 
993 	a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
994 
995 	/* If GX isn't on the rest of the data isn't going to be accessible */
996 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
997 		return &a6xx_state->base;
998 
999 	/* Get the banks of indexed registers */
1000 	a6xx_get_indexed_registers(gpu, a6xx_state);
1001 
1002 	/*
1003 	 * Try to initialize the crashdumper, if we are not dumping state
1004 	 * with the SMMU stalled.  The crashdumper needs memory access to
1005 	 * write out GPU state, so we need to skip this when the SMMU is
1006 	 * stalled in response to an iova fault
1007 	 */
1008 	if (!stalled && !gpu->needs_hw_init &&
1009 	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1010 		dumper = &_dumper;
1011 	}
1012 
1013 	a6xx_get_registers(gpu, a6xx_state, dumper);
1014 
1015 	if (dumper) {
1016 		a6xx_get_shaders(gpu, a6xx_state, dumper);
1017 		a6xx_get_clusters(gpu, a6xx_state, dumper);
1018 		a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1019 
1020 		msm_gem_kernel_put(dumper->bo, gpu->aspace);
1021 	}
1022 
1023 	if (snapshot_debugbus)
1024 		a6xx_get_debugbus(gpu, a6xx_state);
1025 
1026 	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1027 
1028 	return  &a6xx_state->base;
1029 }
1030 
1031 static void a6xx_gpu_state_destroy(struct kref *kref)
1032 {
1033 	struct a6xx_state_memobj *obj, *tmp;
1034 	struct msm_gpu_state *state = container_of(kref,
1035 			struct msm_gpu_state, ref);
1036 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1037 			struct a6xx_gpu_state, base);
1038 
1039 	if (a6xx_state->gmu_log)
1040 		kvfree(a6xx_state->gmu_log->data);
1041 
1042 	if (a6xx_state->gmu_hfi)
1043 		kvfree(a6xx_state->gmu_hfi->data);
1044 
1045 	if (a6xx_state->gmu_debug)
1046 		kvfree(a6xx_state->gmu_debug->data);
1047 
1048 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1049 		list_del(&obj->node);
1050 		kvfree(obj);
1051 	}
1052 
1053 	adreno_gpu_state_destroy(state);
1054 	kfree(a6xx_state);
1055 }
1056 
1057 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1058 {
1059 	if (IS_ERR_OR_NULL(state))
1060 		return 1;
1061 
1062 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1063 }
1064 
1065 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1066 		struct drm_printer *p)
1067 {
1068 	int i, index = 0;
1069 
1070 	if (!data)
1071 		return;
1072 
1073 	for (i = 0; i < count; i += 2) {
1074 		u32 count = RANGE(registers, i);
1075 		u32 offset = registers[i];
1076 		int j;
1077 
1078 		for (j = 0; j < count; index++, offset++, j++) {
1079 			if (data[index] == 0xdeafbead)
1080 				continue;
1081 
1082 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1083 				offset << 2, data[index]);
1084 		}
1085 	}
1086 }
1087 
1088 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1089 {
1090 	char out[ASCII85_BUFSZ];
1091 	long i, l, datalen = 0;
1092 
1093 	for (i = 0; i < len >> 2; i++) {
1094 		if (data[i])
1095 			datalen = (i + 1) << 2;
1096 	}
1097 
1098 	if (datalen == 0)
1099 		return;
1100 
1101 	drm_puts(p, "    data: !!ascii85 |\n");
1102 	drm_puts(p, "      ");
1103 
1104 
1105 	l = ascii85_encode_len(datalen);
1106 
1107 	for (i = 0; i < l; i++)
1108 		drm_puts(p, ascii85_encode(data[i], out));
1109 
1110 	drm_puts(p, "\n");
1111 }
1112 
1113 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1114 {
1115 	drm_puts(p, fmt);
1116 	drm_puts(p, name);
1117 	drm_puts(p, "\n");
1118 }
1119 
1120 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1121 		struct drm_printer *p)
1122 {
1123 	const struct a6xx_shader_block *block = obj->handle;
1124 	int i;
1125 
1126 	if (!obj->handle)
1127 		return;
1128 
1129 	print_name(p, "  - type: ", block->name);
1130 
1131 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1132 		drm_printf(p, "    - bank: %d\n", i);
1133 		drm_printf(p, "      size: %d\n", block->size);
1134 
1135 		if (!obj->data)
1136 			continue;
1137 
1138 		print_ascii85(p, block->size << 2,
1139 			obj->data + (block->size * i));
1140 	}
1141 }
1142 
1143 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1144 		struct drm_printer *p)
1145 {
1146 	int ctx, index = 0;
1147 
1148 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1149 		int j;
1150 
1151 		drm_printf(p, "    - context: %d\n", ctx);
1152 
1153 		for (j = 0; j < size; j += 2) {
1154 			u32 count = RANGE(registers, j);
1155 			u32 offset = registers[j];
1156 			int k;
1157 
1158 			for (k = 0; k < count; index++, offset++, k++) {
1159 				if (data[index] == 0xdeafbead)
1160 					continue;
1161 
1162 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1163 					offset << 2, data[index]);
1164 			}
1165 		}
1166 	}
1167 }
1168 
1169 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1170 		struct drm_printer *p)
1171 {
1172 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1173 
1174 	if (dbgahb) {
1175 		print_name(p, "  - cluster-name: ", dbgahb->name);
1176 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1177 			obj->data, p);
1178 	}
1179 }
1180 
1181 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1182 		struct drm_printer *p)
1183 {
1184 	const struct a6xx_cluster *cluster = obj->handle;
1185 
1186 	if (cluster) {
1187 		print_name(p, "  - cluster-name: ", cluster->name);
1188 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1189 			obj->data, p);
1190 	}
1191 }
1192 
1193 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1194 		struct drm_printer *p)
1195 {
1196 	const struct a6xx_indexed_registers *indexed = obj->handle;
1197 
1198 	if (!indexed)
1199 		return;
1200 
1201 	print_name(p, "  - regs-name: ", indexed->name);
1202 	drm_printf(p, "    dwords: %d\n", indexed->count);
1203 
1204 	print_ascii85(p, indexed->count << 2, obj->data);
1205 }
1206 
1207 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1208 		u32 *data, struct drm_printer *p)
1209 {
1210 	if (block) {
1211 		print_name(p, "  - debugbus-block: ", block->name);
1212 
1213 		/*
1214 		 * count for regular debugbus data is in quadwords,
1215 		 * but print the size in dwords for consistency
1216 		 */
1217 		drm_printf(p, "    count: %d\n", block->count << 1);
1218 
1219 		print_ascii85(p, block->count << 3, data);
1220 	}
1221 }
1222 
1223 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1224 		struct drm_printer *p)
1225 {
1226 	int i;
1227 
1228 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1229 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1230 
1231 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1232 	}
1233 
1234 	if (a6xx_state->vbif_debugbus) {
1235 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1236 
1237 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1238 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1239 
1240 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1241 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1242 	}
1243 
1244 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1245 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1246 
1247 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1248 	}
1249 }
1250 
1251 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1252 		struct drm_printer *p)
1253 {
1254 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1255 			struct a6xx_gpu_state, base);
1256 	int i;
1257 
1258 	if (IS_ERR_OR_NULL(state))
1259 		return;
1260 
1261 	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1262 
1263 	adreno_show(gpu, state, p);
1264 
1265 	drm_puts(p, "gmu-log:\n");
1266 	if (a6xx_state->gmu_log) {
1267 		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1268 
1269 		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1270 		drm_printf(p, "    size: %zu\n", gmu_log->size);
1271 		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1272 				&gmu_log->encoded);
1273 	}
1274 
1275 	drm_puts(p, "gmu-hfi:\n");
1276 	if (a6xx_state->gmu_hfi) {
1277 		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1278 		unsigned i, j;
1279 
1280 		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1281 		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1282 		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1283 			drm_printf(p, "    queue-history[%u]:", i);
1284 			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1285 				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1286 			}
1287 			drm_printf(p, "\n");
1288 		}
1289 		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1290 				&gmu_hfi->encoded);
1291 	}
1292 
1293 	drm_puts(p, "gmu-debug:\n");
1294 	if (a6xx_state->gmu_debug) {
1295 		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1296 
1297 		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1298 		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1299 		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1300 				&gmu_debug->encoded);
1301 	}
1302 
1303 	drm_puts(p, "registers:\n");
1304 	for (i = 0; i < a6xx_state->nr_registers; i++) {
1305 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1306 		const struct a6xx_registers *regs = obj->handle;
1307 
1308 		if (!obj->handle)
1309 			continue;
1310 
1311 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1312 	}
1313 
1314 	drm_puts(p, "registers-gmu:\n");
1315 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1316 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1317 		const struct a6xx_registers *regs = obj->handle;
1318 
1319 		if (!obj->handle)
1320 			continue;
1321 
1322 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1323 	}
1324 
1325 	drm_puts(p, "indexed-registers:\n");
1326 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1327 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1328 
1329 	drm_puts(p, "shader-blocks:\n");
1330 	for (i = 0; i < a6xx_state->nr_shaders; i++)
1331 		a6xx_show_shader(&a6xx_state->shaders[i], p);
1332 
1333 	drm_puts(p, "clusters:\n");
1334 	for (i = 0; i < a6xx_state->nr_clusters; i++)
1335 		a6xx_show_cluster(&a6xx_state->clusters[i], p);
1336 
1337 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1338 		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1339 
1340 	drm_puts(p, "debugbus:\n");
1341 	a6xx_show_debugbus(a6xx_state, p);
1342 }
1343