1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3 
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10 
11 struct a6xx_gpu_state_obj {
12 	const void *handle;
13 	u32 *data;
14 };
15 
16 struct a6xx_gpu_state {
17 	struct msm_gpu_state base;
18 
19 	struct a6xx_gpu_state_obj *gmu_registers;
20 	int nr_gmu_registers;
21 
22 	struct a6xx_gpu_state_obj *registers;
23 	int nr_registers;
24 
25 	struct a6xx_gpu_state_obj *shaders;
26 	int nr_shaders;
27 
28 	struct a6xx_gpu_state_obj *clusters;
29 	int nr_clusters;
30 
31 	struct a6xx_gpu_state_obj *dbgahb_clusters;
32 	int nr_dbgahb_clusters;
33 
34 	struct a6xx_gpu_state_obj *indexed_regs;
35 	int nr_indexed_regs;
36 
37 	struct a6xx_gpu_state_obj *debugbus;
38 	int nr_debugbus;
39 
40 	struct a6xx_gpu_state_obj *vbif_debugbus;
41 
42 	struct a6xx_gpu_state_obj *cx_debugbus;
43 	int nr_cx_debugbus;
44 
45 	struct msm_gpu_state_bo *gmu_log;
46 	struct msm_gpu_state_bo *gmu_hfi;
47 	struct msm_gpu_state_bo *gmu_debug;
48 
49 	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50 
51 	struct list_head objs;
52 
53 	bool gpu_initialized;
54 };
55 
56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58 	in[0] = val;
59 	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60 
61 	return 2;
62 }
63 
64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66 	in[0] = target;
67 	in[1] = (((u64) reg) << 44 | dwords);
68 
69 	return 2;
70 }
71 
72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74 	in[0] = 0;
75 	in[1] = 0;
76 
77 	return 2;
78 }
79 
80 struct a6xx_crashdumper {
81 	void *ptr;
82 	struct drm_gem_object *bo;
83 	u64 iova;
84 };
85 
86 struct a6xx_state_memobj {
87 	struct list_head node;
88 	unsigned long long data[];
89 };
90 
91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93 	struct a6xx_state_memobj *obj =
94 		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95 
96 	if (!obj)
97 		return NULL;
98 
99 	list_add_tail(&obj->node, &a6xx_state->objs);
100 	return &obj->data;
101 }
102 
103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104 		size_t size)
105 {
106 	void *dst = state_kcalloc(a6xx_state, 1, size);
107 
108 	if (dst)
109 		memcpy(dst, src, size);
110 	return dst;
111 }
112 
113 /*
114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115  * the rest for the data
116  */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
119 
120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121 		struct a6xx_crashdumper *dumper)
122 {
123 	dumper->ptr = msm_gem_kernel_new(gpu->dev,
124 		SZ_1M, MSM_BO_WC, gpu->aspace,
125 		&dumper->bo, &dumper->iova);
126 
127 	if (!IS_ERR(dumper->ptr))
128 		msm_gem_object_set_name(dumper->bo, "crashdump");
129 
130 	return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132 
133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134 		struct a6xx_crashdumper *dumper)
135 {
136 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138 	u32 val;
139 	int ret;
140 
141 	if (IS_ERR_OR_NULL(dumper->ptr))
142 		return -EINVAL;
143 
144 	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145 		return -EINVAL;
146 
147 	/* Make sure all pending memory writes are posted */
148 	wmb();
149 
150 	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
151 		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
152 
153 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
154 
155 	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
156 		val & 0x02, 100, 10000);
157 
158 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
159 
160 	return ret;
161 }
162 
163 /* read a value from the GX debug bus */
164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
165 		u32 *data)
166 {
167 	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
168 		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
169 
170 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
171 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
172 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
173 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
174 
175 	/* Wait 1 us to make sure the data is flowing */
176 	udelay(1);
177 
178 	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
179 	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
180 
181 	return 2;
182 }
183 
184 #define cxdbg_write(ptr, offset, val) \
185 	msm_writel((val), (ptr) + ((offset) << 2))
186 
187 #define cxdbg_read(ptr, offset) \
188 	msm_readl((ptr) + ((offset) << 2))
189 
190 /* read a value from the CX debug bus */
191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
192 		u32 *data)
193 {
194 	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
195 		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
196 
197 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
198 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
199 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
200 	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
201 
202 	/* Wait 1 us to make sure the data is flowing */
203 	udelay(1);
204 
205 	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
206 	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
207 
208 	return 2;
209 }
210 
211 /* Read a chunk of data from the VBIF debug bus */
212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
213 		u32 reg, int count, u32 *data)
214 {
215 	int i;
216 
217 	gpu_write(gpu, ctrl0, reg);
218 
219 	for (i = 0; i < count; i++) {
220 		gpu_write(gpu, ctrl1, i);
221 		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
222 	}
223 
224 	return count;
225 }
226 
227 #define AXI_ARB_BLOCKS 2
228 #define XIN_AXI_BLOCKS 5
229 #define XIN_CORE_BLOCKS 4
230 
231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
232 	((16 * AXI_ARB_BLOCKS) + \
233 	 (18 * XIN_AXI_BLOCKS) + \
234 	 (12 * XIN_CORE_BLOCKS))
235 
236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
237 		struct a6xx_gpu_state *a6xx_state,
238 		struct a6xx_gpu_state_obj *obj)
239 {
240 	u32 clk, *ptr;
241 	int i;
242 
243 	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
244 		sizeof(u32));
245 	if (!obj->data)
246 		return;
247 
248 	obj->handle = NULL;
249 
250 	/* Get the current clock setting */
251 	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
252 
253 	/* Force on the bus so we can read it */
254 	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
255 		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
256 
257 	/* We will read from BUS2 first, so disable BUS1 */
258 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
259 
260 	/* Enable the VBIF bus for reading */
261 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
262 
263 	ptr = obj->data;
264 
265 	for (i = 0; i < AXI_ARB_BLOCKS; i++)
266 		ptr += vbif_debugbus_read(gpu,
267 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
268 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
269 			1 << (i + 16), 16, ptr);
270 
271 	for (i = 0; i < XIN_AXI_BLOCKS; i++)
272 		ptr += vbif_debugbus_read(gpu,
273 			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
274 			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
275 			1 << i, 18, ptr);
276 
277 	/* Stop BUS2 so we can turn on BUS1 */
278 	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
279 
280 	for (i = 0; i < XIN_CORE_BLOCKS; i++)
281 		ptr += vbif_debugbus_read(gpu,
282 			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
283 			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
284 			1 << i, 12, ptr);
285 
286 	/* Restore the VBIF clock setting */
287 	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
288 }
289 
290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
291 		struct a6xx_gpu_state *a6xx_state,
292 		const struct a6xx_debugbus_block *block,
293 		struct a6xx_gpu_state_obj *obj)
294 {
295 	int i;
296 	u32 *ptr;
297 
298 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
299 	if (!obj->data)
300 		return;
301 
302 	obj->handle = block;
303 
304 	for (ptr = obj->data, i = 0; i < block->count; i++)
305 		ptr += debugbus_read(gpu, block->id, i, ptr);
306 }
307 
308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
309 		struct a6xx_gpu_state *a6xx_state,
310 		const struct a6xx_debugbus_block *block,
311 		struct a6xx_gpu_state_obj *obj)
312 {
313 	int i;
314 	u32 *ptr;
315 
316 	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
317 	if (!obj->data)
318 		return;
319 
320 	obj->handle = block;
321 
322 	for (ptr = obj->data, i = 0; i < block->count; i++)
323 		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
324 }
325 
326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
327 		struct a6xx_gpu_state *a6xx_state)
328 {
329 	struct resource *res;
330 	void __iomem *cxdbg = NULL;
331 	int nr_debugbus_blocks;
332 
333 	/* Set up the GX debug bus */
334 
335 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
336 		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
337 
338 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
339 		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
340 
341 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
342 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
343 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
344 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
345 
346 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
347 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
348 
349 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
350 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
351 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
352 	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
353 
354 	/* Set up the CX debug bus - it lives elsewhere in the system so do a
355 	 * temporary ioremap for the registers
356 	 */
357 	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
358 			"cx_dbgc");
359 
360 	if (res)
361 		cxdbg = ioremap(res->start, resource_size(res));
362 
363 	if (cxdbg) {
364 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
365 			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
366 
367 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
368 			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
369 
370 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
371 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
372 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
373 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
374 
375 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
376 			0x76543210);
377 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
378 			0xFEDCBA98);
379 
380 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
381 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
382 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
383 		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
384 	}
385 
386 	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
387 		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
388 
389 	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
390 			sizeof(*a6xx_state->debugbus));
391 
392 	if (a6xx_state->debugbus) {
393 		int i;
394 
395 		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
396 			a6xx_get_debugbus_block(gpu,
397 				a6xx_state,
398 				&a6xx_debugbus_blocks[i],
399 				&a6xx_state->debugbus[i]);
400 
401 		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
402 
403 		/*
404 		 * GBIF has same debugbus as of other GPU blocks, fall back to
405 		 * default path if GPU uses GBIF, also GBIF uses exactly same
406 		 * ID as of VBIF.
407 		 */
408 		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
409 			a6xx_get_debugbus_block(gpu, a6xx_state,
410 				&a6xx_gbif_debugbus_block,
411 				&a6xx_state->debugbus[i]);
412 
413 			a6xx_state->nr_debugbus += 1;
414 		}
415 	}
416 
417 	/*  Dump the VBIF debugbus on applicable targets */
418 	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
419 		a6xx_state->vbif_debugbus =
420 			state_kcalloc(a6xx_state, 1,
421 					sizeof(*a6xx_state->vbif_debugbus));
422 
423 		if (a6xx_state->vbif_debugbus)
424 			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
425 					a6xx_state->vbif_debugbus);
426 	}
427 
428 	if (cxdbg) {
429 		a6xx_state->cx_debugbus =
430 			state_kcalloc(a6xx_state,
431 			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
432 			sizeof(*a6xx_state->cx_debugbus));
433 
434 		if (a6xx_state->cx_debugbus) {
435 			int i;
436 
437 			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
438 				a6xx_get_cx_debugbus_block(cxdbg,
439 					a6xx_state,
440 					&a6xx_cx_debugbus_blocks[i],
441 					&a6xx_state->cx_debugbus[i]);
442 
443 			a6xx_state->nr_cx_debugbus =
444 				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
445 		}
446 
447 		iounmap(cxdbg);
448 	}
449 }
450 
451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
452 
453 /* Read a data cluster from behind the AHB aperture */
454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
455 		struct a6xx_gpu_state *a6xx_state,
456 		const struct a6xx_dbgahb_cluster *dbgahb,
457 		struct a6xx_gpu_state_obj *obj,
458 		struct a6xx_crashdumper *dumper)
459 {
460 	u64 *in = dumper->ptr;
461 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
462 	size_t datasize;
463 	int i, regcount = 0;
464 
465 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
466 		int j;
467 
468 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
469 			(dbgahb->statetype + i * 2) << 8);
470 
471 		for (j = 0; j < dbgahb->count; j += 2) {
472 			int count = RANGE(dbgahb->registers, j);
473 			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
474 				dbgahb->registers[j] - (dbgahb->base >> 2);
475 
476 			in += CRASHDUMP_READ(in, offset, count, out);
477 
478 			out += count * sizeof(u32);
479 
480 			if (i == 0)
481 				regcount += count;
482 		}
483 	}
484 
485 	CRASHDUMP_FINI(in);
486 
487 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
488 
489 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
490 		return;
491 
492 	if (a6xx_crashdumper_run(gpu, dumper))
493 		return;
494 
495 	obj->handle = dbgahb;
496 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
497 		datasize);
498 }
499 
500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
501 		struct a6xx_gpu_state *a6xx_state,
502 		struct a6xx_crashdumper *dumper)
503 {
504 	int i;
505 
506 	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
507 		ARRAY_SIZE(a6xx_dbgahb_clusters),
508 		sizeof(*a6xx_state->dbgahb_clusters));
509 
510 	if (!a6xx_state->dbgahb_clusters)
511 		return;
512 
513 	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
514 
515 	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
516 		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
517 			&a6xx_dbgahb_clusters[i],
518 			&a6xx_state->dbgahb_clusters[i], dumper);
519 }
520 
521 /* Read a data cluster from the CP aperture with the crashdumper */
522 static void a6xx_get_cluster(struct msm_gpu *gpu,
523 		struct a6xx_gpu_state *a6xx_state,
524 		const struct a6xx_cluster *cluster,
525 		struct a6xx_gpu_state_obj *obj,
526 		struct a6xx_crashdumper *dumper)
527 {
528 	u64 *in = dumper->ptr;
529 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
530 	size_t datasize;
531 	int i, regcount = 0;
532 
533 	/* Some clusters need a selector register to be programmed too */
534 	if (cluster->sel_reg)
535 		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
536 
537 	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
538 		int j;
539 
540 		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
541 			(cluster->id << 8) | (i << 4) | i);
542 
543 		for (j = 0; j < cluster->count; j += 2) {
544 			int count = RANGE(cluster->registers, j);
545 
546 			in += CRASHDUMP_READ(in, cluster->registers[j],
547 				count, out);
548 
549 			out += count * sizeof(u32);
550 
551 			if (i == 0)
552 				regcount += count;
553 		}
554 	}
555 
556 	CRASHDUMP_FINI(in);
557 
558 	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
559 
560 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
561 		return;
562 
563 	if (a6xx_crashdumper_run(gpu, dumper))
564 		return;
565 
566 	obj->handle = cluster;
567 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
568 		datasize);
569 }
570 
571 static void a6xx_get_clusters(struct msm_gpu *gpu,
572 		struct a6xx_gpu_state *a6xx_state,
573 		struct a6xx_crashdumper *dumper)
574 {
575 	int i;
576 
577 	a6xx_state->clusters = state_kcalloc(a6xx_state,
578 		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
579 
580 	if (!a6xx_state->clusters)
581 		return;
582 
583 	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
584 
585 	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
586 		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
587 			&a6xx_state->clusters[i], dumper);
588 }
589 
590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
592 		struct a6xx_gpu_state *a6xx_state,
593 		const struct a6xx_shader_block *block,
594 		struct a6xx_gpu_state_obj *obj,
595 		struct a6xx_crashdumper *dumper)
596 {
597 	u64 *in = dumper->ptr;
598 	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
599 	int i;
600 
601 	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
602 		return;
603 
604 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
605 		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
606 			(block->type << 8) | i);
607 
608 		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
609 			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
610 	}
611 
612 	CRASHDUMP_FINI(in);
613 
614 	if (a6xx_crashdumper_run(gpu, dumper))
615 		return;
616 
617 	obj->handle = block;
618 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
619 		datasize);
620 }
621 
622 static void a6xx_get_shaders(struct msm_gpu *gpu,
623 		struct a6xx_gpu_state *a6xx_state,
624 		struct a6xx_crashdumper *dumper)
625 {
626 	int i;
627 
628 	a6xx_state->shaders = state_kcalloc(a6xx_state,
629 		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
630 
631 	if (!a6xx_state->shaders)
632 		return;
633 
634 	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
635 
636 	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
637 		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
638 			&a6xx_state->shaders[i], dumper);
639 }
640 
641 /* Read registers from behind the HLSQ aperture with the crashdumper */
642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
643 		struct a6xx_gpu_state *a6xx_state,
644 		const struct a6xx_registers *regs,
645 		struct a6xx_gpu_state_obj *obj,
646 		struct a6xx_crashdumper *dumper)
647 
648 {
649 	u64 *in = dumper->ptr;
650 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
651 	int i, regcount = 0;
652 
653 	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
654 
655 	for (i = 0; i < regs->count; i += 2) {
656 		u32 count = RANGE(regs->registers, i);
657 		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
658 			regs->registers[i] - (regs->val0 >> 2);
659 
660 		in += CRASHDUMP_READ(in, offset, count, out);
661 
662 		out += count * sizeof(u32);
663 		regcount += count;
664 	}
665 
666 	CRASHDUMP_FINI(in);
667 
668 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
669 		return;
670 
671 	if (a6xx_crashdumper_run(gpu, dumper))
672 		return;
673 
674 	obj->handle = regs;
675 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
676 		regcount * sizeof(u32));
677 }
678 
679 /* Read a block of registers using the crashdumper */
680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
681 		struct a6xx_gpu_state *a6xx_state,
682 		const struct a6xx_registers *regs,
683 		struct a6xx_gpu_state_obj *obj,
684 		struct a6xx_crashdumper *dumper)
685 
686 {
687 	u64 *in = dumper->ptr;
688 	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689 	int i, regcount = 0;
690 
691 	/* Some blocks might need to program a selector register first */
692 	if (regs->val0)
693 		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
694 
695 	for (i = 0; i < regs->count; i += 2) {
696 		u32 count = RANGE(regs->registers, i);
697 
698 		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
699 
700 		out += count * sizeof(u32);
701 		regcount += count;
702 	}
703 
704 	CRASHDUMP_FINI(in);
705 
706 	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
707 		return;
708 
709 	if (a6xx_crashdumper_run(gpu, dumper))
710 		return;
711 
712 	obj->handle = regs;
713 	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
714 		regcount * sizeof(u32));
715 }
716 
717 /* Read a block of registers via AHB */
718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
719 		struct a6xx_gpu_state *a6xx_state,
720 		const struct a6xx_registers *regs,
721 		struct a6xx_gpu_state_obj *obj)
722 {
723 	int i, regcount = 0, index = 0;
724 
725 	for (i = 0; i < regs->count; i += 2)
726 		regcount += RANGE(regs->registers, i);
727 
728 	obj->handle = (const void *) regs;
729 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
730 	if (!obj->data)
731 		return;
732 
733 	for (i = 0; i < regs->count; i += 2) {
734 		u32 count = RANGE(regs->registers, i);
735 		int j;
736 
737 		for (j = 0; j < count; j++)
738 			obj->data[index++] = gpu_read(gpu,
739 				regs->registers[i] + j);
740 	}
741 }
742 
743 /* Read a block of GMU registers */
744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
745 		struct a6xx_gpu_state *a6xx_state,
746 		const struct a6xx_registers *regs,
747 		struct a6xx_gpu_state_obj *obj,
748 		bool rscc)
749 {
750 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
751 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
752 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
753 	int i, regcount = 0, index = 0;
754 
755 	for (i = 0; i < regs->count; i += 2)
756 		regcount += RANGE(regs->registers, i);
757 
758 	obj->handle = (const void *) regs;
759 	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
760 	if (!obj->data)
761 		return;
762 
763 	for (i = 0; i < regs->count; i += 2) {
764 		u32 count = RANGE(regs->registers, i);
765 		int j;
766 
767 		for (j = 0; j < count; j++) {
768 			u32 offset = regs->registers[i] + j;
769 			u32 val;
770 
771 			if (rscc)
772 				val = gmu_read_rscc(gmu, offset);
773 			else
774 				val = gmu_read(gmu, offset);
775 
776 			obj->data[index++] = val;
777 		}
778 	}
779 }
780 
781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
782 		struct a6xx_gpu_state *a6xx_state)
783 {
784 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
785 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
786 
787 	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
788 		3, sizeof(*a6xx_state->gmu_registers));
789 
790 	if (!a6xx_state->gmu_registers)
791 		return;
792 
793 	a6xx_state->nr_gmu_registers = 3;
794 
795 	/* Get the CX GMU registers from AHB */
796 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
797 		&a6xx_state->gmu_registers[0], false);
798 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
799 		&a6xx_state->gmu_registers[1], true);
800 
801 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
802 		return;
803 
804 	/* Set the fence to ALLOW mode so we can access the registers */
805 	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
806 
807 	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
808 		&a6xx_state->gmu_registers[2], false);
809 }
810 
811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
812 		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
813 {
814 	struct msm_gpu_state_bo *snapshot;
815 
816 	if (!bo->size)
817 		return NULL;
818 
819 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
820 	if (!snapshot)
821 		return NULL;
822 
823 	snapshot->iova = bo->iova;
824 	snapshot->size = bo->size;
825 	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
826 	if (!snapshot->data)
827 		return NULL;
828 
829 	memcpy(snapshot->data, bo->virt, bo->size);
830 
831 	return snapshot;
832 }
833 
834 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
835 					  struct a6xx_gpu_state *a6xx_state)
836 {
837 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
838 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
839 	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
840 	unsigned i, j;
841 
842 	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
843 
844 	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
845 		struct a6xx_hfi_queue *queue = &gmu->queues[i];
846 		for (j = 0; j < HFI_HISTORY_SZ; j++) {
847 			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
848 			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
849 		}
850 	}
851 }
852 
853 #define A6XX_GBIF_REGLIST_SIZE   1
854 static void a6xx_get_registers(struct msm_gpu *gpu,
855 		struct a6xx_gpu_state *a6xx_state,
856 		struct a6xx_crashdumper *dumper)
857 {
858 	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
859 		ARRAY_SIZE(a6xx_reglist) +
860 		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
861 	int index = 0;
862 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
863 
864 	a6xx_state->registers = state_kcalloc(a6xx_state,
865 		count, sizeof(*a6xx_state->registers));
866 
867 	if (!a6xx_state->registers)
868 		return;
869 
870 	a6xx_state->nr_registers = count;
871 
872 	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
873 		a6xx_get_ahb_gpu_registers(gpu,
874 			a6xx_state, &a6xx_ahb_reglist[i],
875 			&a6xx_state->registers[index++]);
876 
877 	if (a6xx_has_gbif(adreno_gpu))
878 		a6xx_get_ahb_gpu_registers(gpu,
879 				a6xx_state, &a6xx_gbif_reglist,
880 				&a6xx_state->registers[index++]);
881 	else
882 		a6xx_get_ahb_gpu_registers(gpu,
883 				a6xx_state, &a6xx_vbif_reglist,
884 				&a6xx_state->registers[index++]);
885 	if (!dumper) {
886 		/*
887 		 * We can't use the crashdumper when the SMMU is stalled,
888 		 * because the GPU has no memory access until we resume
889 		 * translation (but we don't want to do that until after
890 		 * we have captured as much useful GPU state as possible).
891 		 * So instead collect registers via the CPU:
892 		 */
893 		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
894 			a6xx_get_ahb_gpu_registers(gpu,
895 				a6xx_state, &a6xx_reglist[i],
896 				&a6xx_state->registers[index++]);
897 		return;
898 	}
899 
900 	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
901 		a6xx_get_crashdumper_registers(gpu,
902 			a6xx_state, &a6xx_reglist[i],
903 			&a6xx_state->registers[index++],
904 			dumper);
905 
906 	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
907 		a6xx_get_crashdumper_hlsq_registers(gpu,
908 			a6xx_state, &a6xx_hlsq_reglist[i],
909 			&a6xx_state->registers[index++],
910 			dumper);
911 }
912 
913 /* Read a block of data from an indexed register pair */
914 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
915 		struct a6xx_gpu_state *a6xx_state,
916 		const struct a6xx_indexed_registers *indexed,
917 		struct a6xx_gpu_state_obj *obj)
918 {
919 	int i;
920 
921 	obj->handle = (const void *) indexed;
922 	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
923 	if (!obj->data)
924 		return;
925 
926 	/* All the indexed banks start at address 0 */
927 	gpu_write(gpu, indexed->addr, 0);
928 
929 	/* Read the data - each read increments the internal address by 1 */
930 	for (i = 0; i < indexed->count; i++)
931 		obj->data[i] = gpu_read(gpu, indexed->data);
932 }
933 
934 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
935 		struct a6xx_gpu_state *a6xx_state)
936 {
937 	u32 mempool_size;
938 	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
939 	int i;
940 
941 	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
942 		sizeof(*a6xx_state->indexed_regs));
943 	if (!a6xx_state->indexed_regs)
944 		return;
945 
946 	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
947 		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
948 			&a6xx_state->indexed_regs[i]);
949 
950 	/* Set the CP mempool size to 0 to stabilize it while dumping */
951 	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
952 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
953 
954 	/* Get the contents of the CP mempool */
955 	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
956 		&a6xx_state->indexed_regs[i]);
957 
958 	/*
959 	 * Offset 0x2000 in the mempool is the size - copy the saved size over
960 	 * so the data is consistent
961 	 */
962 	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
963 
964 	/* Restore the size in the hardware */
965 	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
966 
967 	a6xx_state->nr_indexed_regs = count;
968 }
969 
970 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
971 {
972 	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
973 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
974 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
975 	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
976 		GFP_KERNEL);
977 	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
978 			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
979 
980 	if (!a6xx_state)
981 		return ERR_PTR(-ENOMEM);
982 
983 	INIT_LIST_HEAD(&a6xx_state->objs);
984 
985 	/* Get the generic state from the adreno core */
986 	adreno_gpu_state_get(gpu, &a6xx_state->base);
987 
988 	a6xx_get_gmu_registers(gpu, a6xx_state);
989 
990 	a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
991 	a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
992 	a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
993 
994 	a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
995 
996 	/* If GX isn't on the rest of the data isn't going to be accessible */
997 	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
998 		return &a6xx_state->base;
999 
1000 	/* Get the banks of indexed registers */
1001 	a6xx_get_indexed_registers(gpu, a6xx_state);
1002 
1003 	/*
1004 	 * Try to initialize the crashdumper, if we are not dumping state
1005 	 * with the SMMU stalled.  The crashdumper needs memory access to
1006 	 * write out GPU state, so we need to skip this when the SMMU is
1007 	 * stalled in response to an iova fault
1008 	 */
1009 	if (!stalled && !gpu->needs_hw_init &&
1010 	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1011 		dumper = &_dumper;
1012 	}
1013 
1014 	a6xx_get_registers(gpu, a6xx_state, dumper);
1015 
1016 	if (dumper) {
1017 		a6xx_get_shaders(gpu, a6xx_state, dumper);
1018 		a6xx_get_clusters(gpu, a6xx_state, dumper);
1019 		a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1020 
1021 		msm_gem_kernel_put(dumper->bo, gpu->aspace);
1022 	}
1023 
1024 	if (snapshot_debugbus)
1025 		a6xx_get_debugbus(gpu, a6xx_state);
1026 
1027 	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1028 
1029 	return  &a6xx_state->base;
1030 }
1031 
1032 static void a6xx_gpu_state_destroy(struct kref *kref)
1033 {
1034 	struct a6xx_state_memobj *obj, *tmp;
1035 	struct msm_gpu_state *state = container_of(kref,
1036 			struct msm_gpu_state, ref);
1037 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1038 			struct a6xx_gpu_state, base);
1039 
1040 	if (a6xx_state->gmu_log)
1041 		kvfree(a6xx_state->gmu_log->data);
1042 
1043 	if (a6xx_state->gmu_hfi)
1044 		kvfree(a6xx_state->gmu_hfi->data);
1045 
1046 	if (a6xx_state->gmu_debug)
1047 		kvfree(a6xx_state->gmu_debug->data);
1048 
1049 	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1050 		list_del(&obj->node);
1051 		kvfree(obj);
1052 	}
1053 
1054 	adreno_gpu_state_destroy(state);
1055 	kfree(a6xx_state);
1056 }
1057 
1058 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1059 {
1060 	if (IS_ERR_OR_NULL(state))
1061 		return 1;
1062 
1063 	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1064 }
1065 
1066 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1067 		struct drm_printer *p)
1068 {
1069 	int i, index = 0;
1070 
1071 	if (!data)
1072 		return;
1073 
1074 	for (i = 0; i < count; i += 2) {
1075 		u32 count = RANGE(registers, i);
1076 		u32 offset = registers[i];
1077 		int j;
1078 
1079 		for (j = 0; j < count; index++, offset++, j++) {
1080 			if (data[index] == 0xdeafbead)
1081 				continue;
1082 
1083 			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1084 				offset << 2, data[index]);
1085 		}
1086 	}
1087 }
1088 
1089 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1090 {
1091 	char out[ASCII85_BUFSZ];
1092 	long i, l, datalen = 0;
1093 
1094 	for (i = 0; i < len >> 2; i++) {
1095 		if (data[i])
1096 			datalen = (i + 1) << 2;
1097 	}
1098 
1099 	if (datalen == 0)
1100 		return;
1101 
1102 	drm_puts(p, "    data: !!ascii85 |\n");
1103 	drm_puts(p, "      ");
1104 
1105 
1106 	l = ascii85_encode_len(datalen);
1107 
1108 	for (i = 0; i < l; i++)
1109 		drm_puts(p, ascii85_encode(data[i], out));
1110 
1111 	drm_puts(p, "\n");
1112 }
1113 
1114 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1115 {
1116 	drm_puts(p, fmt);
1117 	drm_puts(p, name);
1118 	drm_puts(p, "\n");
1119 }
1120 
1121 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1122 		struct drm_printer *p)
1123 {
1124 	const struct a6xx_shader_block *block = obj->handle;
1125 	int i;
1126 
1127 	if (!obj->handle)
1128 		return;
1129 
1130 	print_name(p, "  - type: ", block->name);
1131 
1132 	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1133 		drm_printf(p, "    - bank: %d\n", i);
1134 		drm_printf(p, "      size: %d\n", block->size);
1135 
1136 		if (!obj->data)
1137 			continue;
1138 
1139 		print_ascii85(p, block->size << 2,
1140 			obj->data + (block->size * i));
1141 	}
1142 }
1143 
1144 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1145 		struct drm_printer *p)
1146 {
1147 	int ctx, index = 0;
1148 
1149 	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1150 		int j;
1151 
1152 		drm_printf(p, "    - context: %d\n", ctx);
1153 
1154 		for (j = 0; j < size; j += 2) {
1155 			u32 count = RANGE(registers, j);
1156 			u32 offset = registers[j];
1157 			int k;
1158 
1159 			for (k = 0; k < count; index++, offset++, k++) {
1160 				if (data[index] == 0xdeafbead)
1161 					continue;
1162 
1163 				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1164 					offset << 2, data[index]);
1165 			}
1166 		}
1167 	}
1168 }
1169 
1170 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1171 		struct drm_printer *p)
1172 {
1173 	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1174 
1175 	if (dbgahb) {
1176 		print_name(p, "  - cluster-name: ", dbgahb->name);
1177 		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1178 			obj->data, p);
1179 	}
1180 }
1181 
1182 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1183 		struct drm_printer *p)
1184 {
1185 	const struct a6xx_cluster *cluster = obj->handle;
1186 
1187 	if (cluster) {
1188 		print_name(p, "  - cluster-name: ", cluster->name);
1189 		a6xx_show_cluster_data(cluster->registers, cluster->count,
1190 			obj->data, p);
1191 	}
1192 }
1193 
1194 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1195 		struct drm_printer *p)
1196 {
1197 	const struct a6xx_indexed_registers *indexed = obj->handle;
1198 
1199 	if (!indexed)
1200 		return;
1201 
1202 	print_name(p, "  - regs-name: ", indexed->name);
1203 	drm_printf(p, "    dwords: %d\n", indexed->count);
1204 
1205 	print_ascii85(p, indexed->count << 2, obj->data);
1206 }
1207 
1208 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1209 		u32 *data, struct drm_printer *p)
1210 {
1211 	if (block) {
1212 		print_name(p, "  - debugbus-block: ", block->name);
1213 
1214 		/*
1215 		 * count for regular debugbus data is in quadwords,
1216 		 * but print the size in dwords for consistency
1217 		 */
1218 		drm_printf(p, "    count: %d\n", block->count << 1);
1219 
1220 		print_ascii85(p, block->count << 3, data);
1221 	}
1222 }
1223 
1224 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1225 		struct drm_printer *p)
1226 {
1227 	int i;
1228 
1229 	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1230 		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1231 
1232 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1233 	}
1234 
1235 	if (a6xx_state->vbif_debugbus) {
1236 		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1237 
1238 		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1239 		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1240 
1241 		/* vbif debugbus data is in dwords.  Confusing, huh? */
1242 		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1243 	}
1244 
1245 	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1246 		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1247 
1248 		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1249 	}
1250 }
1251 
1252 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1253 		struct drm_printer *p)
1254 {
1255 	struct a6xx_gpu_state *a6xx_state = container_of(state,
1256 			struct a6xx_gpu_state, base);
1257 	int i;
1258 
1259 	if (IS_ERR_OR_NULL(state))
1260 		return;
1261 
1262 	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1263 
1264 	adreno_show(gpu, state, p);
1265 
1266 	drm_puts(p, "gmu-log:\n");
1267 	if (a6xx_state->gmu_log) {
1268 		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1269 
1270 		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1271 		drm_printf(p, "    size: %zu\n", gmu_log->size);
1272 		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1273 				&gmu_log->encoded);
1274 	}
1275 
1276 	drm_puts(p, "gmu-hfi:\n");
1277 	if (a6xx_state->gmu_hfi) {
1278 		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1279 		unsigned i, j;
1280 
1281 		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1282 		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1283 		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1284 			drm_printf(p, "    queue-history[%u]:", i);
1285 			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1286 				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1287 			}
1288 			drm_printf(p, "\n");
1289 		}
1290 		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1291 				&gmu_hfi->encoded);
1292 	}
1293 
1294 	drm_puts(p, "gmu-debug:\n");
1295 	if (a6xx_state->gmu_debug) {
1296 		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1297 
1298 		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1299 		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1300 		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1301 				&gmu_debug->encoded);
1302 	}
1303 
1304 	drm_puts(p, "registers:\n");
1305 	for (i = 0; i < a6xx_state->nr_registers; i++) {
1306 		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1307 		const struct a6xx_registers *regs = obj->handle;
1308 
1309 		if (!obj->handle)
1310 			continue;
1311 
1312 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1313 	}
1314 
1315 	drm_puts(p, "registers-gmu:\n");
1316 	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1317 		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1318 		const struct a6xx_registers *regs = obj->handle;
1319 
1320 		if (!obj->handle)
1321 			continue;
1322 
1323 		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1324 	}
1325 
1326 	drm_puts(p, "indexed-registers:\n");
1327 	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1328 		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1329 
1330 	drm_puts(p, "shader-blocks:\n");
1331 	for (i = 0; i < a6xx_state->nr_shaders; i++)
1332 		a6xx_show_shader(&a6xx_state->shaders[i], p);
1333 
1334 	drm_puts(p, "clusters:\n");
1335 	for (i = 0; i < a6xx_state->nr_clusters; i++)
1336 		a6xx_show_cluster(&a6xx_state->clusters[i], p);
1337 
1338 	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1339 		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1340 
1341 	drm_puts(p, "debugbus:\n");
1342 	a6xx_show_debugbus(a6xx_state, p);
1343 }
1344