1 /*
2  * Copyright 2012 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs
23  */
24 #include "nv50.h"
25 
26 #include <core/client.h>
27 #include <core/handle.h>
28 #include <engine/fifo.h>
29 #include <subdev/timer.h>
30 
31 struct nv50_gr {
32 	struct nvkm_gr base;
33 	spinlock_t lock;
34 	u32 size;
35 };
36 
37 struct nv50_gr_chan {
38 	struct nvkm_gr_chan base;
39 };
40 
41 static u64
42 nv50_gr_units(struct nvkm_gr *gr)
43 {
44 	return nvkm_rd32(gr->engine.subdev.device, 0x1540);
45 }
46 
47 /*******************************************************************************
48  * Graphics object classes
49  ******************************************************************************/
50 
51 static int
52 nv50_gr_object_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
53 		    struct nvkm_oclass *oclass, void *data, u32 size,
54 		    struct nvkm_object **pobject)
55 {
56 	struct nvkm_gpuobj *obj;
57 	int ret;
58 
59 	ret = nvkm_gpuobj_create(parent, engine, oclass, 0, parent,
60 				 16, 16, 0, &obj);
61 	*pobject = nv_object(obj);
62 	if (ret)
63 		return ret;
64 
65 	nv_wo32(obj, 0x00, nv_mclass(obj));
66 	nv_wo32(obj, 0x04, 0x00000000);
67 	nv_wo32(obj, 0x08, 0x00000000);
68 	nv_wo32(obj, 0x0c, 0x00000000);
69 	return 0;
70 }
71 
72 static struct nvkm_ofuncs
73 nv50_gr_ofuncs = {
74 	.ctor = nv50_gr_object_ctor,
75 	.dtor = _nvkm_gpuobj_dtor,
76 	.init = _nvkm_gpuobj_init,
77 	.fini = _nvkm_gpuobj_fini,
78 	.rd32 = _nvkm_gpuobj_rd32,
79 	.wr32 = _nvkm_gpuobj_wr32,
80 };
81 
82 static struct nvkm_oclass
83 nv50_gr_sclass[] = {
84 	{ 0x0030, &nv50_gr_ofuncs },
85 	{ 0x502d, &nv50_gr_ofuncs },
86 	{ 0x5039, &nv50_gr_ofuncs },
87 	{ 0x5097, &nv50_gr_ofuncs },
88 	{ 0x50c0, &nv50_gr_ofuncs },
89 	{}
90 };
91 
92 static struct nvkm_oclass
93 g84_gr_sclass[] = {
94 	{ 0x0030, &nv50_gr_ofuncs },
95 	{ 0x502d, &nv50_gr_ofuncs },
96 	{ 0x5039, &nv50_gr_ofuncs },
97 	{ 0x50c0, &nv50_gr_ofuncs },
98 	{ 0x8297, &nv50_gr_ofuncs },
99 	{}
100 };
101 
102 static struct nvkm_oclass
103 gt200_gr_sclass[] = {
104 	{ 0x0030, &nv50_gr_ofuncs },
105 	{ 0x502d, &nv50_gr_ofuncs },
106 	{ 0x5039, &nv50_gr_ofuncs },
107 	{ 0x50c0, &nv50_gr_ofuncs },
108 	{ 0x8397, &nv50_gr_ofuncs },
109 	{}
110 };
111 
112 static struct nvkm_oclass
113 gt215_gr_sclass[] = {
114 	{ 0x0030, &nv50_gr_ofuncs },
115 	{ 0x502d, &nv50_gr_ofuncs },
116 	{ 0x5039, &nv50_gr_ofuncs },
117 	{ 0x50c0, &nv50_gr_ofuncs },
118 	{ 0x8597, &nv50_gr_ofuncs },
119 	{ 0x85c0, &nv50_gr_ofuncs },
120 	{}
121 };
122 
123 static struct nvkm_oclass
124 mcp89_gr_sclass[] = {
125 	{ 0x0030, &nv50_gr_ofuncs },
126 	{ 0x502d, &nv50_gr_ofuncs },
127 	{ 0x5039, &nv50_gr_ofuncs },
128 	{ 0x50c0, &nv50_gr_ofuncs },
129 	{ 0x85c0, &nv50_gr_ofuncs },
130 	{ 0x8697, &nv50_gr_ofuncs },
131 	{}
132 };
133 
134 /*******************************************************************************
135  * PGRAPH context
136  ******************************************************************************/
137 
138 static int
139 nv50_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
140 		     struct nvkm_oclass *oclass, void *data, u32 size,
141 		     struct nvkm_object **pobject)
142 {
143 	struct nv50_gr *gr = (void *)engine;
144 	struct nv50_gr_chan *chan;
145 	int ret;
146 
147 	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, gr->size,
148 				     0, NVOBJ_FLAG_ZERO_ALLOC, &chan);
149 	*pobject = nv_object(chan);
150 	if (ret)
151 		return ret;
152 
153 	nv50_grctx_fill(nv_device(gr), nv_gpuobj(chan));
154 	return 0;
155 }
156 
157 static struct nvkm_oclass
158 nv50_gr_cclass = {
159 	.handle = NV_ENGCTX(GR, 0x50),
160 	.ofuncs = &(struct nvkm_ofuncs) {
161 		.ctor = nv50_gr_context_ctor,
162 		.dtor = _nvkm_gr_context_dtor,
163 		.init = _nvkm_gr_context_init,
164 		.fini = _nvkm_gr_context_fini,
165 		.rd32 = _nvkm_gr_context_rd32,
166 		.wr32 = _nvkm_gr_context_wr32,
167 	},
168 };
169 
170 /*******************************************************************************
171  * PGRAPH engine/subdev functions
172  ******************************************************************************/
173 
174 static const struct nvkm_bitfield nv50_gr_status[] = {
175 	{ 0x00000001, "BUSY" }, /* set when any bit is set */
176 	{ 0x00000002, "DISPATCH" },
177 	{ 0x00000004, "UNK2" },
178 	{ 0x00000008, "UNK3" },
179 	{ 0x00000010, "UNK4" },
180 	{ 0x00000020, "UNK5" },
181 	{ 0x00000040, "M2MF" },
182 	{ 0x00000080, "UNK7" },
183 	{ 0x00000100, "CTXPROG" },
184 	{ 0x00000200, "VFETCH" },
185 	{ 0x00000400, "CCACHE_PREGEOM" },
186 	{ 0x00000800, "STRMOUT_VATTR_POSTGEOM" },
187 	{ 0x00001000, "VCLIP" },
188 	{ 0x00002000, "RATTR_APLANE" },
189 	{ 0x00004000, "TRAST" },
190 	{ 0x00008000, "CLIPID" },
191 	{ 0x00010000, "ZCULL" },
192 	{ 0x00020000, "ENG2D" },
193 	{ 0x00040000, "RMASK" },
194 	{ 0x00080000, "TPC_RAST" },
195 	{ 0x00100000, "TPC_PROP" },
196 	{ 0x00200000, "TPC_TEX" },
197 	{ 0x00400000, "TPC_GEOM" },
198 	{ 0x00800000, "TPC_MP" },
199 	{ 0x01000000, "ROP" },
200 	{}
201 };
202 
203 static const char *const nv50_gr_vstatus_0[] = {
204 	"VFETCH", "CCACHE", "PREGEOM", "POSTGEOM", "VATTR", "STRMOUT", "VCLIP",
205 	NULL
206 };
207 
208 static const char *const nv50_gr_vstatus_1[] = {
209 	"TPC_RAST", "TPC_PROP", "TPC_TEX", "TPC_GEOM", "TPC_MP", NULL
210 };
211 
212 static const char *const nv50_gr_vstatus_2[] = {
213 	"RATTR", "APLANE", "TRAST", "CLIPID", "ZCULL", "ENG2D", "RMASK",
214 	"ROP", NULL
215 };
216 
217 static void
218 nvkm_gr_vstatus_print(struct nv50_gr *gr, int r,
219 		       const char *const units[], u32 status)
220 {
221 	int i;
222 
223 	nv_error(gr, "PGRAPH_VSTATUS%d: 0x%08x", r, status);
224 
225 	for (i = 0; units[i] && status; i++) {
226 		if ((status & 7) == 1)
227 			pr_cont(" %s", units[i]);
228 		status >>= 3;
229 	}
230 	if (status)
231 		pr_cont(" (invalid: 0x%x)", status);
232 	pr_cont("\n");
233 }
234 
235 static int
236 g84_gr_tlb_flush(struct nvkm_engine *engine)
237 {
238 	struct nv50_gr *gr = (void *)engine;
239 	struct nvkm_device *device = gr->base.engine.subdev.device;
240 	struct nvkm_timer *tmr = device->timer;
241 	bool idle, timeout = false;
242 	unsigned long flags;
243 	u64 start;
244 	u32 tmp;
245 
246 	spin_lock_irqsave(&gr->lock, flags);
247 	nvkm_mask(device, 0x400500, 0x00000001, 0x00000000);
248 
249 	start = tmr->read(tmr);
250 	do {
251 		idle = true;
252 
253 		for (tmp = nvkm_rd32(device, 0x400380); tmp && idle; tmp >>= 3) {
254 			if ((tmp & 7) == 1)
255 				idle = false;
256 		}
257 
258 		for (tmp = nvkm_rd32(device, 0x400384); tmp && idle; tmp >>= 3) {
259 			if ((tmp & 7) == 1)
260 				idle = false;
261 		}
262 
263 		for (tmp = nvkm_rd32(device, 0x400388); tmp && idle; tmp >>= 3) {
264 			if ((tmp & 7) == 1)
265 				idle = false;
266 		}
267 	} while (!idle &&
268 		 !(timeout = tmr->read(tmr) - start > 2000000000));
269 
270 	if (timeout) {
271 		nv_error(gr, "PGRAPH TLB flush idle timeout fail\n");
272 
273 		tmp = nvkm_rd32(device, 0x400700);
274 		nv_error(gr, "PGRAPH_STATUS  : 0x%08x", tmp);
275 		nvkm_bitfield_print(nv50_gr_status, tmp);
276 		pr_cont("\n");
277 
278 		nvkm_gr_vstatus_print(gr, 0, nv50_gr_vstatus_0,
279 				       nvkm_rd32(device, 0x400380));
280 		nvkm_gr_vstatus_print(gr, 1, nv50_gr_vstatus_1,
281 				       nvkm_rd32(device, 0x400384));
282 		nvkm_gr_vstatus_print(gr, 2, nv50_gr_vstatus_2,
283 				       nvkm_rd32(device, 0x400388));
284 	}
285 
286 
287 	nvkm_wr32(device, 0x100c80, 0x00000001);
288 	nvkm_msec(device, 2000,
289 		if (!(nvkm_rd32(device, 0x100c80) & 0x00000001))
290 			break;
291 	);
292 	nvkm_mask(device, 0x400500, 0x00000001, 0x00000001);
293 	spin_unlock_irqrestore(&gr->lock, flags);
294 	return timeout ? -EBUSY : 0;
295 }
296 
297 static const struct nvkm_bitfield nv50_mp_exec_errors[] = {
298 	{ 0x01, "STACK_UNDERFLOW" },
299 	{ 0x02, "STACK_MISMATCH" },
300 	{ 0x04, "QUADON_ACTIVE" },
301 	{ 0x08, "TIMEOUT" },
302 	{ 0x10, "INVALID_OPCODE" },
303 	{ 0x20, "PM_OVERFLOW" },
304 	{ 0x40, "BREAKPOINT" },
305 	{}
306 };
307 
308 static const struct nvkm_bitfield nv50_mpc_traps[] = {
309 	{ 0x0000001, "LOCAL_LIMIT_READ" },
310 	{ 0x0000010, "LOCAL_LIMIT_WRITE" },
311 	{ 0x0000040, "STACK_LIMIT" },
312 	{ 0x0000100, "GLOBAL_LIMIT_READ" },
313 	{ 0x0001000, "GLOBAL_LIMIT_WRITE" },
314 	{ 0x0010000, "MP0" },
315 	{ 0x0020000, "MP1" },
316 	{ 0x0040000, "GLOBAL_LIMIT_RED" },
317 	{ 0x0400000, "GLOBAL_LIMIT_ATOM" },
318 	{ 0x4000000, "MP2" },
319 	{}
320 };
321 
322 static const struct nvkm_bitfield nv50_tex_traps[] = {
323 	{ 0x00000001, "" }, /* any bit set? */
324 	{ 0x00000002, "FAULT" },
325 	{ 0x00000004, "STORAGE_TYPE_MISMATCH" },
326 	{ 0x00000008, "LINEAR_MISMATCH" },
327 	{ 0x00000020, "WRONG_MEMTYPE" },
328 	{}
329 };
330 
331 static const struct nvkm_bitfield nv50_gr_trap_m2mf[] = {
332 	{ 0x00000001, "NOTIFY" },
333 	{ 0x00000002, "IN" },
334 	{ 0x00000004, "OUT" },
335 	{}
336 };
337 
338 static const struct nvkm_bitfield nv50_gr_trap_vfetch[] = {
339 	{ 0x00000001, "FAULT" },
340 	{}
341 };
342 
343 static const struct nvkm_bitfield nv50_gr_trap_strmout[] = {
344 	{ 0x00000001, "FAULT" },
345 	{}
346 };
347 
348 static const struct nvkm_bitfield nv50_gr_trap_ccache[] = {
349 	{ 0x00000001, "FAULT" },
350 	{}
351 };
352 
353 /* There must be a *lot* of these. Will take some time to gather them up. */
354 const struct nvkm_enum nv50_data_error_names[] = {
355 	{ 0x00000003, "INVALID_OPERATION", NULL },
356 	{ 0x00000004, "INVALID_VALUE", NULL },
357 	{ 0x00000005, "INVALID_ENUM", NULL },
358 	{ 0x00000008, "INVALID_OBJECT", NULL },
359 	{ 0x00000009, "READ_ONLY_OBJECT", NULL },
360 	{ 0x0000000a, "SUPERVISOR_OBJECT", NULL },
361 	{ 0x0000000b, "INVALID_ADDRESS_ALIGNMENT", NULL },
362 	{ 0x0000000c, "INVALID_BITFIELD", NULL },
363 	{ 0x0000000d, "BEGIN_END_ACTIVE", NULL },
364 	{ 0x0000000e, "SEMANTIC_COLOR_BACK_OVER_LIMIT", NULL },
365 	{ 0x0000000f, "VIEWPORT_ID_NEEDS_GP", NULL },
366 	{ 0x00000010, "RT_DOUBLE_BIND", NULL },
367 	{ 0x00000011, "RT_TYPES_MISMATCH", NULL },
368 	{ 0x00000012, "RT_LINEAR_WITH_ZETA", NULL },
369 	{ 0x00000015, "FP_TOO_FEW_REGS", NULL },
370 	{ 0x00000016, "ZETA_FORMAT_CSAA_MISMATCH", NULL },
371 	{ 0x00000017, "RT_LINEAR_WITH_MSAA", NULL },
372 	{ 0x00000018, "FP_INTERPOLANT_START_OVER_LIMIT", NULL },
373 	{ 0x00000019, "SEMANTIC_LAYER_OVER_LIMIT", NULL },
374 	{ 0x0000001a, "RT_INVALID_ALIGNMENT", NULL },
375 	{ 0x0000001b, "SAMPLER_OVER_LIMIT", NULL },
376 	{ 0x0000001c, "TEXTURE_OVER_LIMIT", NULL },
377 	{ 0x0000001e, "GP_TOO_MANY_OUTPUTS", NULL },
378 	{ 0x0000001f, "RT_BPP128_WITH_MS8", NULL },
379 	{ 0x00000021, "Z_OUT_OF_BOUNDS", NULL },
380 	{ 0x00000023, "XY_OUT_OF_BOUNDS", NULL },
381 	{ 0x00000024, "VP_ZERO_INPUTS", NULL },
382 	{ 0x00000027, "CP_MORE_PARAMS_THAN_SHARED", NULL },
383 	{ 0x00000028, "CP_NO_REG_SPACE_STRIPED", NULL },
384 	{ 0x00000029, "CP_NO_REG_SPACE_PACKED", NULL },
385 	{ 0x0000002a, "CP_NOT_ENOUGH_WARPS", NULL },
386 	{ 0x0000002b, "CP_BLOCK_SIZE_MISMATCH", NULL },
387 	{ 0x0000002c, "CP_NOT_ENOUGH_LOCAL_WARPS", NULL },
388 	{ 0x0000002d, "CP_NOT_ENOUGH_STACK_WARPS", NULL },
389 	{ 0x0000002e, "CP_NO_BLOCKDIM_LATCH", NULL },
390 	{ 0x00000031, "ENG2D_FORMAT_MISMATCH", NULL },
391 	{ 0x0000003f, "PRIMITIVE_ID_NEEDS_GP", NULL },
392 	{ 0x00000044, "SEMANTIC_VIEWPORT_OVER_LIMIT", NULL },
393 	{ 0x00000045, "SEMANTIC_COLOR_FRONT_OVER_LIMIT", NULL },
394 	{ 0x00000046, "LAYER_ID_NEEDS_GP", NULL },
395 	{ 0x00000047, "SEMANTIC_CLIP_OVER_LIMIT", NULL },
396 	{ 0x00000048, "SEMANTIC_PTSZ_OVER_LIMIT", NULL },
397 	{}
398 };
399 
400 static const struct nvkm_bitfield nv50_gr_intr_name[] = {
401 	{ 0x00000001, "NOTIFY" },
402 	{ 0x00000002, "COMPUTE_QUERY" },
403 	{ 0x00000010, "ILLEGAL_MTHD" },
404 	{ 0x00000020, "ILLEGAL_CLASS" },
405 	{ 0x00000040, "DOUBLE_NOTIFY" },
406 	{ 0x00001000, "CONTEXT_SWITCH" },
407 	{ 0x00010000, "BUFFER_NOTIFY" },
408 	{ 0x00100000, "DATA_ERROR" },
409 	{ 0x00200000, "TRAP" },
410 	{ 0x01000000, "SINGLE_STEP" },
411 	{}
412 };
413 
414 static const struct nvkm_bitfield nv50_gr_trap_prop[] = {
415 	{ 0x00000004, "SURF_WIDTH_OVERRUN" },
416 	{ 0x00000008, "SURF_HEIGHT_OVERRUN" },
417 	{ 0x00000010, "DST2D_FAULT" },
418 	{ 0x00000020, "ZETA_FAULT" },
419 	{ 0x00000040, "RT_FAULT" },
420 	{ 0x00000080, "CUDA_FAULT" },
421 	{ 0x00000100, "DST2D_STORAGE_TYPE_MISMATCH" },
422 	{ 0x00000200, "ZETA_STORAGE_TYPE_MISMATCH" },
423 	{ 0x00000400, "RT_STORAGE_TYPE_MISMATCH" },
424 	{ 0x00000800, "DST2D_LINEAR_MISMATCH" },
425 	{ 0x00001000, "RT_LINEAR_MISMATCH" },
426 	{}
427 };
428 
429 static void
430 nv50_gr_prop_trap(struct nv50_gr *gr,
431 		    u32 ustatus_addr, u32 ustatus, u32 tp)
432 {
433 	struct nvkm_device *device = gr->base.engine.subdev.device;
434 	u32 e0c = nvkm_rd32(device, ustatus_addr + 0x04);
435 	u32 e10 = nvkm_rd32(device, ustatus_addr + 0x08);
436 	u32 e14 = nvkm_rd32(device, ustatus_addr + 0x0c);
437 	u32 e18 = nvkm_rd32(device, ustatus_addr + 0x10);
438 	u32 e1c = nvkm_rd32(device, ustatus_addr + 0x14);
439 	u32 e20 = nvkm_rd32(device, ustatus_addr + 0x18);
440 	u32 e24 = nvkm_rd32(device, ustatus_addr + 0x1c);
441 
442 	/* CUDA memory: l[], g[] or stack. */
443 	if (ustatus & 0x00000080) {
444 		if (e18 & 0x80000000) {
445 			/* g[] read fault? */
446 			nv_error(gr, "TRAP_PROP - TP %d - CUDA_FAULT - Global read fault at address %02x%08x\n",
447 					 tp, e14, e10 | ((e18 >> 24) & 0x1f));
448 			e18 &= ~0x1f000000;
449 		} else if (e18 & 0xc) {
450 			/* g[] write fault? */
451 			nv_error(gr, "TRAP_PROP - TP %d - CUDA_FAULT - Global write fault at address %02x%08x\n",
452 				 tp, e14, e10 | ((e18 >> 7) & 0x1f));
453 			e18 &= ~0x00000f80;
454 		} else {
455 			nv_error(gr, "TRAP_PROP - TP %d - Unknown CUDA fault at address %02x%08x\n",
456 				 tp, e14, e10);
457 		}
458 		ustatus &= ~0x00000080;
459 	}
460 	if (ustatus) {
461 		nv_error(gr, "TRAP_PROP - TP %d -", tp);
462 		nvkm_bitfield_print(nv50_gr_trap_prop, ustatus);
463 		pr_cont(" - Address %02x%08x\n", e14, e10);
464 	}
465 	nv_error(gr, "TRAP_PROP - TP %d - e0c: %08x, e18: %08x, e1c: %08x, e20: %08x, e24: %08x\n",
466 		 tp, e0c, e18, e1c, e20, e24);
467 }
468 
469 static void
470 nv50_gr_mp_trap(struct nv50_gr *gr, int tpid, int display)
471 {
472 	struct nvkm_device *device = gr->base.engine.subdev.device;
473 	u32 units = nvkm_rd32(device, 0x1540);
474 	u32 addr, mp10, status, pc, oplow, ophigh;
475 	int i;
476 	int mps = 0;
477 	for (i = 0; i < 4; i++) {
478 		if (!(units & 1 << (i+24)))
479 			continue;
480 		if (nv_device(gr)->chipset < 0xa0)
481 			addr = 0x408200 + (tpid << 12) + (i << 7);
482 		else
483 			addr = 0x408100 + (tpid << 11) + (i << 7);
484 		mp10 = nvkm_rd32(device, addr + 0x10);
485 		status = nvkm_rd32(device, addr + 0x14);
486 		if (!status)
487 			continue;
488 		if (display) {
489 			nvkm_rd32(device, addr + 0x20);
490 			pc = nvkm_rd32(device, addr + 0x24);
491 			oplow = nvkm_rd32(device, addr + 0x70);
492 			ophigh = nvkm_rd32(device, addr + 0x74);
493 			nv_error(gr, "TRAP_MP_EXEC - "
494 					"TP %d MP %d:", tpid, i);
495 			nvkm_bitfield_print(nv50_mp_exec_errors, status);
496 			pr_cont(" at %06x warp %d, opcode %08x %08x\n",
497 					pc&0xffffff, pc >> 24,
498 					oplow, ophigh);
499 		}
500 		nvkm_wr32(device, addr + 0x10, mp10);
501 		nvkm_wr32(device, addr + 0x14, 0);
502 		mps++;
503 	}
504 	if (!mps && display)
505 		nv_error(gr, "TRAP_MP_EXEC - TP %d: "
506 				"No MPs claiming errors?\n", tpid);
507 }
508 
509 static void
510 nv50_gr_tp_trap(struct nv50_gr *gr, int type, u32 ustatus_old,
511 		  u32 ustatus_new, int display, const char *name)
512 {
513 	struct nvkm_device *device = gr->base.engine.subdev.device;
514 	u32 units = nvkm_rd32(device, 0x1540);
515 	int tps = 0;
516 	int i, r;
517 	u32 ustatus_addr, ustatus;
518 	for (i = 0; i < 16; i++) {
519 		if (!(units & (1 << i)))
520 			continue;
521 		if (nv_device(gr)->chipset < 0xa0)
522 			ustatus_addr = ustatus_old + (i << 12);
523 		else
524 			ustatus_addr = ustatus_new + (i << 11);
525 		ustatus = nvkm_rd32(device, ustatus_addr) & 0x7fffffff;
526 		if (!ustatus)
527 			continue;
528 		tps++;
529 		switch (type) {
530 		case 6: /* texture error... unknown for now */
531 			if (display) {
532 				nv_error(gr, "magic set %d:\n", i);
533 				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
534 					nv_error(gr, "\t0x%08x: 0x%08x\n", r,
535 						nvkm_rd32(device, r));
536 				if (ustatus) {
537 					nv_error(gr, "%s - TP%d:", name, i);
538 					nvkm_bitfield_print(nv50_tex_traps,
539 							       ustatus);
540 					pr_cont("\n");
541 					ustatus = 0;
542 				}
543 			}
544 			break;
545 		case 7: /* MP error */
546 			if (ustatus & 0x04030000) {
547 				nv50_gr_mp_trap(gr, i, display);
548 				ustatus &= ~0x04030000;
549 			}
550 			if (ustatus && display) {
551 				nv_error(gr, "%s - TP%d:", name, i);
552 				nvkm_bitfield_print(nv50_mpc_traps, ustatus);
553 				pr_cont("\n");
554 				ustatus = 0;
555 			}
556 			break;
557 		case 8: /* PROP error */
558 			if (display)
559 				nv50_gr_prop_trap(
560 						gr, ustatus_addr, ustatus, i);
561 			ustatus = 0;
562 			break;
563 		}
564 		if (ustatus) {
565 			if (display)
566 				nv_error(gr, "%s - TP%d: Unhandled ustatus 0x%08x\n", name, i, ustatus);
567 		}
568 		nvkm_wr32(device, ustatus_addr, 0xc0000000);
569 	}
570 
571 	if (!tps && display)
572 		nv_warn(gr, "%s - No TPs claiming errors?\n", name);
573 }
574 
575 static int
576 nv50_gr_trap_handler(struct nv50_gr *gr, u32 display,
577 		     int chid, u64 inst, struct nvkm_object *engctx)
578 {
579 	struct nvkm_device *device = gr->base.engine.subdev.device;
580 	u32 status = nvkm_rd32(device, 0x400108);
581 	u32 ustatus;
582 
583 	if (!status && display) {
584 		nv_error(gr, "TRAP: no units reporting traps?\n");
585 		return 1;
586 	}
587 
588 	/* DISPATCH: Relays commands to other units and handles NOTIFY,
589 	 * COND, QUERY. If you get a trap from it, the command is still stuck
590 	 * in DISPATCH and you need to do something about it. */
591 	if (status & 0x001) {
592 		ustatus = nvkm_rd32(device, 0x400804) & 0x7fffffff;
593 		if (!ustatus && display) {
594 			nv_error(gr, "TRAP_DISPATCH - no ustatus?\n");
595 		}
596 
597 		nvkm_wr32(device, 0x400500, 0x00000000);
598 
599 		/* Known to be triggered by screwed up NOTIFY and COND... */
600 		if (ustatus & 0x00000001) {
601 			u32 addr = nvkm_rd32(device, 0x400808);
602 			u32 subc = (addr & 0x00070000) >> 16;
603 			u32 mthd = (addr & 0x00001ffc);
604 			u32 datal = nvkm_rd32(device, 0x40080c);
605 			u32 datah = nvkm_rd32(device, 0x400810);
606 			u32 class = nvkm_rd32(device, 0x400814);
607 			u32 r848 = nvkm_rd32(device, 0x400848);
608 
609 			nv_error(gr, "TRAP DISPATCH_FAULT\n");
610 			if (display && (addr & 0x80000000)) {
611 				nv_error(gr,
612 					 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x%08x 400808 0x%08x 400848 0x%08x\n",
613 					 chid, inst,
614 					 nvkm_client_name(engctx), subc,
615 					 class, mthd, datah, datal, addr, r848);
616 			} else
617 			if (display) {
618 				nv_error(gr, "no stuck command?\n");
619 			}
620 
621 			nvkm_wr32(device, 0x400808, 0);
622 			nvkm_wr32(device, 0x4008e8, nvkm_rd32(device, 0x4008e8) & 3);
623 			nvkm_wr32(device, 0x400848, 0);
624 			ustatus &= ~0x00000001;
625 		}
626 
627 		if (ustatus & 0x00000002) {
628 			u32 addr = nvkm_rd32(device, 0x40084c);
629 			u32 subc = (addr & 0x00070000) >> 16;
630 			u32 mthd = (addr & 0x00001ffc);
631 			u32 data = nvkm_rd32(device, 0x40085c);
632 			u32 class = nvkm_rd32(device, 0x400814);
633 
634 			nv_error(gr, "TRAP DISPATCH_QUERY\n");
635 			if (display && (addr & 0x80000000)) {
636 				nv_error(gr,
637 					 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x 40084c 0x%08x\n",
638 					 chid, inst,
639 					 nvkm_client_name(engctx), subc,
640 					 class, mthd, data, addr);
641 			} else
642 			if (display) {
643 				nv_error(gr, "no stuck command?\n");
644 			}
645 
646 			nvkm_wr32(device, 0x40084c, 0);
647 			ustatus &= ~0x00000002;
648 		}
649 
650 		if (ustatus && display) {
651 			nv_error(gr, "TRAP_DISPATCH (unknown "
652 				      "0x%08x)\n", ustatus);
653 		}
654 
655 		nvkm_wr32(device, 0x400804, 0xc0000000);
656 		nvkm_wr32(device, 0x400108, 0x001);
657 		status &= ~0x001;
658 		if (!status)
659 			return 0;
660 	}
661 
662 	/* M2MF: Memory to memory copy engine. */
663 	if (status & 0x002) {
664 		u32 ustatus = nvkm_rd32(device, 0x406800) & 0x7fffffff;
665 		if (display) {
666 			nv_error(gr, "TRAP_M2MF");
667 			nvkm_bitfield_print(nv50_gr_trap_m2mf, ustatus);
668 			pr_cont("\n");
669 			nv_error(gr, "TRAP_M2MF %08x %08x %08x %08x\n",
670 				nvkm_rd32(device, 0x406804), nvkm_rd32(device, 0x406808),
671 				nvkm_rd32(device, 0x40680c), nvkm_rd32(device, 0x406810));
672 
673 		}
674 
675 		/* No sane way found yet -- just reset the bugger. */
676 		nvkm_wr32(device, 0x400040, 2);
677 		nvkm_wr32(device, 0x400040, 0);
678 		nvkm_wr32(device, 0x406800, 0xc0000000);
679 		nvkm_wr32(device, 0x400108, 0x002);
680 		status &= ~0x002;
681 	}
682 
683 	/* VFETCH: Fetches data from vertex buffers. */
684 	if (status & 0x004) {
685 		u32 ustatus = nvkm_rd32(device, 0x400c04) & 0x7fffffff;
686 		if (display) {
687 			nv_error(gr, "TRAP_VFETCH");
688 			nvkm_bitfield_print(nv50_gr_trap_vfetch, ustatus);
689 			pr_cont("\n");
690 			nv_error(gr, "TRAP_VFETCH %08x %08x %08x %08x\n",
691 				nvkm_rd32(device, 0x400c00), nvkm_rd32(device, 0x400c08),
692 				nvkm_rd32(device, 0x400c0c), nvkm_rd32(device, 0x400c10));
693 		}
694 
695 		nvkm_wr32(device, 0x400c04, 0xc0000000);
696 		nvkm_wr32(device, 0x400108, 0x004);
697 		status &= ~0x004;
698 	}
699 
700 	/* STRMOUT: DirectX streamout / OpenGL transform feedback. */
701 	if (status & 0x008) {
702 		ustatus = nvkm_rd32(device, 0x401800) & 0x7fffffff;
703 		if (display) {
704 			nv_error(gr, "TRAP_STRMOUT");
705 			nvkm_bitfield_print(nv50_gr_trap_strmout, ustatus);
706 			pr_cont("\n");
707 			nv_error(gr, "TRAP_STRMOUT %08x %08x %08x %08x\n",
708 				nvkm_rd32(device, 0x401804), nvkm_rd32(device, 0x401808),
709 				nvkm_rd32(device, 0x40180c), nvkm_rd32(device, 0x401810));
710 
711 		}
712 
713 		/* No sane way found yet -- just reset the bugger. */
714 		nvkm_wr32(device, 0x400040, 0x80);
715 		nvkm_wr32(device, 0x400040, 0);
716 		nvkm_wr32(device, 0x401800, 0xc0000000);
717 		nvkm_wr32(device, 0x400108, 0x008);
718 		status &= ~0x008;
719 	}
720 
721 	/* CCACHE: Handles code and c[] caches and fills them. */
722 	if (status & 0x010) {
723 		ustatus = nvkm_rd32(device, 0x405018) & 0x7fffffff;
724 		if (display) {
725 			nv_error(gr, "TRAP_CCACHE");
726 			nvkm_bitfield_print(nv50_gr_trap_ccache, ustatus);
727 			pr_cont("\n");
728 			nv_error(gr, "TRAP_CCACHE %08x %08x %08x %08x"
729 				     " %08x %08x %08x\n",
730 				nvkm_rd32(device, 0x405000), nvkm_rd32(device, 0x405004),
731 				nvkm_rd32(device, 0x405008), nvkm_rd32(device, 0x40500c),
732 				nvkm_rd32(device, 0x405010), nvkm_rd32(device, 0x405014),
733 				nvkm_rd32(device, 0x40501c));
734 
735 		}
736 
737 		nvkm_wr32(device, 0x405018, 0xc0000000);
738 		nvkm_wr32(device, 0x400108, 0x010);
739 		status &= ~0x010;
740 	}
741 
742 	/* Unknown, not seen yet... 0x402000 is the only trap status reg
743 	 * remaining, so try to handle it anyway. Perhaps related to that
744 	 * unknown DMA slot on tesla? */
745 	if (status & 0x20) {
746 		ustatus = nvkm_rd32(device, 0x402000) & 0x7fffffff;
747 		if (display)
748 			nv_error(gr, "TRAP_UNKC04 0x%08x\n", ustatus);
749 		nvkm_wr32(device, 0x402000, 0xc0000000);
750 		/* no status modifiction on purpose */
751 	}
752 
753 	/* TEXTURE: CUDA texturing units */
754 	if (status & 0x040) {
755 		nv50_gr_tp_trap(gr, 6, 0x408900, 0x408600, display,
756 				    "TRAP_TEXTURE");
757 		nvkm_wr32(device, 0x400108, 0x040);
758 		status &= ~0x040;
759 	}
760 
761 	/* MP: CUDA execution engines. */
762 	if (status & 0x080) {
763 		nv50_gr_tp_trap(gr, 7, 0x408314, 0x40831c, display,
764 				    "TRAP_MP");
765 		nvkm_wr32(device, 0x400108, 0x080);
766 		status &= ~0x080;
767 	}
768 
769 	/* PROP:  Handles TP-initiated uncached memory accesses:
770 	 * l[], g[], stack, 2d surfaces, render targets. */
771 	if (status & 0x100) {
772 		nv50_gr_tp_trap(gr, 8, 0x408e08, 0x408708, display,
773 				    "TRAP_PROP");
774 		nvkm_wr32(device, 0x400108, 0x100);
775 		status &= ~0x100;
776 	}
777 
778 	if (status) {
779 		if (display)
780 			nv_error(gr, "TRAP: unknown 0x%08x\n", status);
781 		nvkm_wr32(device, 0x400108, status);
782 	}
783 
784 	return 1;
785 }
786 
787 static void
788 nv50_gr_intr(struct nvkm_subdev *subdev)
789 {
790 	struct nv50_gr *gr = (void *)subdev;
791 	struct nvkm_device *device = gr->base.engine.subdev.device;
792 	struct nvkm_fifo *fifo = device->fifo;
793 	struct nvkm_engine *engine = nv_engine(subdev);
794 	struct nvkm_object *engctx;
795 	struct nvkm_handle *handle = NULL;
796 	u32 stat = nvkm_rd32(device, 0x400100);
797 	u32 inst = nvkm_rd32(device, 0x40032c) & 0x0fffffff;
798 	u32 addr = nvkm_rd32(device, 0x400704);
799 	u32 subc = (addr & 0x00070000) >> 16;
800 	u32 mthd = (addr & 0x00001ffc);
801 	u32 data = nvkm_rd32(device, 0x400708);
802 	u32 class = nvkm_rd32(device, 0x400814);
803 	u32 show = stat, show_bitfield = stat;
804 	int chid;
805 
806 	engctx = nvkm_engctx_get(engine, inst);
807 	chid   = fifo->chid(fifo, engctx);
808 
809 	if (stat & 0x00000010) {
810 		handle = nvkm_handle_get_class(engctx, class);
811 		if (handle && !nv_call(handle->object, mthd, data))
812 			show &= ~0x00000010;
813 		nvkm_handle_put(handle);
814 	}
815 
816 	if (show & 0x00100000) {
817 		u32 ecode = nvkm_rd32(device, 0x400110);
818 		nv_error(gr, "DATA_ERROR ");
819 		nvkm_enum_print(nv50_data_error_names, ecode);
820 		pr_cont("\n");
821 		show_bitfield &= ~0x00100000;
822 	}
823 
824 	if (stat & 0x00200000) {
825 		if (!nv50_gr_trap_handler(gr, show, chid, (u64)inst << 12,
826 					  engctx))
827 			show &= ~0x00200000;
828 		show_bitfield &= ~0x00200000;
829 	}
830 
831 	nvkm_wr32(device, 0x400100, stat);
832 	nvkm_wr32(device, 0x400500, 0x00010001);
833 
834 	if (show) {
835 		show &= show_bitfield;
836 		if (show) {
837 			nv_error(gr, "%s", "");
838 			nvkm_bitfield_print(nv50_gr_intr_name, show);
839 			pr_cont("\n");
840 		}
841 		nv_error(gr,
842 			 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
843 			 chid, (u64)inst << 12, nvkm_client_name(engctx),
844 			 subc, class, mthd, data);
845 	}
846 
847 	if (nvkm_rd32(device, 0x400824) & (1 << 31))
848 		nvkm_wr32(device, 0x400824, nvkm_rd32(device, 0x400824) & ~(1 << 31));
849 
850 	nvkm_engctx_put(engctx);
851 }
852 
853 static int
854 nv50_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
855 	     struct nvkm_oclass *oclass, void *data, u32 size,
856 	     struct nvkm_object **pobject)
857 {
858 	struct nv50_gr *gr;
859 	int ret;
860 
861 	ret = nvkm_gr_create(parent, engine, oclass, true, &gr);
862 	*pobject = nv_object(gr);
863 	if (ret)
864 		return ret;
865 
866 	nv_subdev(gr)->unit = 0x00201000;
867 	nv_subdev(gr)->intr = nv50_gr_intr;
868 	nv_engine(gr)->cclass = &nv50_gr_cclass;
869 
870 	gr->base.units = nv50_gr_units;
871 
872 	switch (nv_device(gr)->chipset) {
873 	case 0x50:
874 		nv_engine(gr)->sclass = nv50_gr_sclass;
875 		break;
876 	case 0x84:
877 	case 0x86:
878 	case 0x92:
879 	case 0x94:
880 	case 0x96:
881 	case 0x98:
882 		nv_engine(gr)->sclass = g84_gr_sclass;
883 		break;
884 	case 0xa0:
885 	case 0xaa:
886 	case 0xac:
887 		nv_engine(gr)->sclass = gt200_gr_sclass;
888 		break;
889 	case 0xa3:
890 	case 0xa5:
891 	case 0xa8:
892 		nv_engine(gr)->sclass = gt215_gr_sclass;
893 		break;
894 	case 0xaf:
895 		nv_engine(gr)->sclass = mcp89_gr_sclass;
896 		break;
897 
898 	}
899 
900 	/* unfortunate hw bug workaround... */
901 	if (nv_device(gr)->chipset != 0x50 &&
902 	    nv_device(gr)->chipset != 0xac)
903 		nv_engine(gr)->tlb_flush = g84_gr_tlb_flush;
904 
905 	spin_lock_init(&gr->lock);
906 	return 0;
907 }
908 
909 static int
910 nv50_gr_init(struct nvkm_object *object)
911 {
912 	struct nv50_gr *gr = (void *)object;
913 	struct nvkm_device *device = gr->base.engine.subdev.device;
914 	int ret, units, i;
915 
916 	ret = nvkm_gr_init(&gr->base);
917 	if (ret)
918 		return ret;
919 
920 	/* NV_PGRAPH_DEBUG_3_HW_CTX_SWITCH_ENABLED */
921 	nvkm_wr32(device, 0x40008c, 0x00000004);
922 
923 	/* reset/enable traps and interrupts */
924 	nvkm_wr32(device, 0x400804, 0xc0000000);
925 	nvkm_wr32(device, 0x406800, 0xc0000000);
926 	nvkm_wr32(device, 0x400c04, 0xc0000000);
927 	nvkm_wr32(device, 0x401800, 0xc0000000);
928 	nvkm_wr32(device, 0x405018, 0xc0000000);
929 	nvkm_wr32(device, 0x402000, 0xc0000000);
930 
931 	units = nvkm_rd32(device, 0x001540);
932 	for (i = 0; i < 16; i++) {
933 		if (!(units & (1 << i)))
934 			continue;
935 
936 		if (nv_device(gr)->chipset < 0xa0) {
937 			nvkm_wr32(device, 0x408900 + (i << 12), 0xc0000000);
938 			nvkm_wr32(device, 0x408e08 + (i << 12), 0xc0000000);
939 			nvkm_wr32(device, 0x408314 + (i << 12), 0xc0000000);
940 		} else {
941 			nvkm_wr32(device, 0x408600 + (i << 11), 0xc0000000);
942 			nvkm_wr32(device, 0x408708 + (i << 11), 0xc0000000);
943 			nvkm_wr32(device, 0x40831c + (i << 11), 0xc0000000);
944 		}
945 	}
946 
947 	nvkm_wr32(device, 0x400108, 0xffffffff);
948 	nvkm_wr32(device, 0x400138, 0xffffffff);
949 	nvkm_wr32(device, 0x400100, 0xffffffff);
950 	nvkm_wr32(device, 0x40013c, 0xffffffff);
951 	nvkm_wr32(device, 0x400500, 0x00010001);
952 
953 	/* upload context program, initialise ctxctl defaults */
954 	ret = nv50_grctx_init(nv_device(gr), &gr->size);
955 	if (ret)
956 		return ret;
957 
958 	nvkm_wr32(device, 0x400824, 0x00000000);
959 	nvkm_wr32(device, 0x400828, 0x00000000);
960 	nvkm_wr32(device, 0x40082c, 0x00000000);
961 	nvkm_wr32(device, 0x400830, 0x00000000);
962 	nvkm_wr32(device, 0x40032c, 0x00000000);
963 	nvkm_wr32(device, 0x400330, 0x00000000);
964 
965 	/* some unknown zcull magic */
966 	switch (nv_device(gr)->chipset & 0xf0) {
967 	case 0x50:
968 	case 0x80:
969 	case 0x90:
970 		nvkm_wr32(device, 0x402ca8, 0x00000800);
971 		break;
972 	case 0xa0:
973 	default:
974 		if (nv_device(gr)->chipset == 0xa0 ||
975 		    nv_device(gr)->chipset == 0xaa ||
976 		    nv_device(gr)->chipset == 0xac) {
977 			nvkm_wr32(device, 0x402ca8, 0x00000802);
978 		} else {
979 			nvkm_wr32(device, 0x402cc0, 0x00000000);
980 			nvkm_wr32(device, 0x402ca8, 0x00000002);
981 		}
982 
983 		break;
984 	}
985 
986 	/* zero out zcull regions */
987 	for (i = 0; i < 8; i++) {
988 		nvkm_wr32(device, 0x402c20 + (i * 0x10), 0x00000000);
989 		nvkm_wr32(device, 0x402c24 + (i * 0x10), 0x00000000);
990 		nvkm_wr32(device, 0x402c28 + (i * 0x10), 0x00000000);
991 		nvkm_wr32(device, 0x402c2c + (i * 0x10), 0x00000000);
992 	}
993 	return 0;
994 }
995 
996 struct nvkm_oclass
997 nv50_gr_oclass = {
998 	.handle = NV_ENGINE(GR, 0x50),
999 	.ofuncs = &(struct nvkm_ofuncs) {
1000 		.ctor = nv50_gr_ctor,
1001 		.dtor = _nvkm_gr_dtor,
1002 		.init = nv50_gr_init,
1003 		.fini = _nvkm_gr_fini,
1004 	},
1005 };
1006