1 /*
2  * Copyright 2012 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs
23  */
24 #include "gf100.h"
25 #include "ctxgf100.h"
26 #include "fuc/os.h"
27 
28 #include <core/client.h>
29 #include <core/option.h>
30 #include <core/firmware.h>
31 #include <subdev/secboot.h>
32 #include <subdev/fb.h>
33 #include <subdev/mc.h>
34 #include <subdev/pmu.h>
35 #include <subdev/timer.h>
36 #include <engine/fifo.h>
37 
38 #include <nvif/class.h>
39 #include <nvif/cl9097.h>
40 #include <nvif/unpack.h>
41 
42 /*******************************************************************************
43  * Zero Bandwidth Clear
44  ******************************************************************************/
45 
46 static void
47 gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
48 {
49 	struct nvkm_device *device = gr->base.engine.subdev.device;
50 	if (gr->zbc_color[zbc].format) {
51 		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
52 		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
53 		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
54 		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
55 	}
56 	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
57 	nvkm_wr32(device, 0x405820, zbc);
58 	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
59 }
60 
61 static int
62 gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
63 		       const u32 ds[4], const u32 l2[4])
64 {
65 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
66 	int zbc = -ENOSPC, i;
67 
68 	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
69 		if (gr->zbc_color[i].format) {
70 			if (gr->zbc_color[i].format != format)
71 				continue;
72 			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
73 				   gr->zbc_color[i].ds)))
74 				continue;
75 			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
76 				   gr->zbc_color[i].l2))) {
77 				WARN_ON(1);
78 				return -EINVAL;
79 			}
80 			return i;
81 		} else {
82 			zbc = (zbc < 0) ? i : zbc;
83 		}
84 	}
85 
86 	if (zbc < 0)
87 		return zbc;
88 
89 	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
90 	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
91 	gr->zbc_color[zbc].format = format;
92 	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
93 	gf100_gr_zbc_clear_color(gr, zbc);
94 	return zbc;
95 }
96 
97 static void
98 gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
99 {
100 	struct nvkm_device *device = gr->base.engine.subdev.device;
101 	if (gr->zbc_depth[zbc].format)
102 		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
103 	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
104 	nvkm_wr32(device, 0x405820, zbc);
105 	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
106 }
107 
108 static int
109 gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
110 		       const u32 ds, const u32 l2)
111 {
112 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
113 	int zbc = -ENOSPC, i;
114 
115 	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
116 		if (gr->zbc_depth[i].format) {
117 			if (gr->zbc_depth[i].format != format)
118 				continue;
119 			if (gr->zbc_depth[i].ds != ds)
120 				continue;
121 			if (gr->zbc_depth[i].l2 != l2) {
122 				WARN_ON(1);
123 				return -EINVAL;
124 			}
125 			return i;
126 		} else {
127 			zbc = (zbc < 0) ? i : zbc;
128 		}
129 	}
130 
131 	if (zbc < 0)
132 		return zbc;
133 
134 	gr->zbc_depth[zbc].format = format;
135 	gr->zbc_depth[zbc].ds = ds;
136 	gr->zbc_depth[zbc].l2 = l2;
137 	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
138 	gf100_gr_zbc_clear_depth(gr, zbc);
139 	return zbc;
140 }
141 
142 /*******************************************************************************
143  * Graphics object classes
144  ******************************************************************************/
145 #define gf100_gr_object(p) container_of((p), struct gf100_gr_object, object)
146 
147 struct gf100_gr_object {
148 	struct nvkm_object object;
149 	struct gf100_gr_chan *chan;
150 };
151 
152 static int
153 gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
154 {
155 	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
156 	union {
157 		struct fermi_a_zbc_color_v0 v0;
158 	} *args = data;
159 	int ret = -ENOSYS;
160 
161 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
162 		switch (args->v0.format) {
163 		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
164 		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
165 		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
166 		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
167 		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
168 		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
169 		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
170 		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
171 		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
172 		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
173 		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
174 		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
175 		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
176 		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
177 		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
178 		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
179 		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
180 		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
181 		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
182 			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
183 							   args->v0.ds,
184 							   args->v0.l2);
185 			if (ret >= 0) {
186 				args->v0.index = ret;
187 				return 0;
188 			}
189 			break;
190 		default:
191 			return -EINVAL;
192 		}
193 	}
194 
195 	return ret;
196 }
197 
198 static int
199 gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
200 {
201 	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
202 	union {
203 		struct fermi_a_zbc_depth_v0 v0;
204 	} *args = data;
205 	int ret = -ENOSYS;
206 
207 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
208 		switch (args->v0.format) {
209 		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
210 			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
211 							   args->v0.ds,
212 							   args->v0.l2);
213 			return (ret >= 0) ? 0 : -ENOSPC;
214 		default:
215 			return -EINVAL;
216 		}
217 	}
218 
219 	return ret;
220 }
221 
222 static int
223 gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
224 {
225 	nvif_ioctl(object, "fermi mthd %08x\n", mthd);
226 	switch (mthd) {
227 	case FERMI_A_ZBC_COLOR:
228 		return gf100_fermi_mthd_zbc_color(object, data, size);
229 	case FERMI_A_ZBC_DEPTH:
230 		return gf100_fermi_mthd_zbc_depth(object, data, size);
231 	default:
232 		break;
233 	}
234 	return -EINVAL;
235 }
236 
237 const struct nvkm_object_func
238 gf100_fermi = {
239 	.mthd = gf100_fermi_mthd,
240 };
241 
242 static void
243 gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
244 {
245 	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
246 	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
247 }
248 
249 static bool
250 gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
251 {
252 	switch (class & 0x00ff) {
253 	case 0x97:
254 	case 0xc0:
255 		switch (mthd) {
256 		case 0x1528:
257 			gf100_gr_mthd_set_shader_exceptions(device, data);
258 			return true;
259 		default:
260 			break;
261 		}
262 		break;
263 	default:
264 		break;
265 	}
266 	return false;
267 }
268 
269 static const struct nvkm_object_func
270 gf100_gr_object_func = {
271 };
272 
273 static int
274 gf100_gr_object_new(const struct nvkm_oclass *oclass, void *data, u32 size,
275 		    struct nvkm_object **pobject)
276 {
277 	struct gf100_gr_chan *chan = gf100_gr_chan(oclass->parent);
278 	struct gf100_gr_object *object;
279 
280 	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
281 		return -ENOMEM;
282 	*pobject = &object->object;
283 
284 	nvkm_object_ctor(oclass->base.func ? oclass->base.func :
285 			 &gf100_gr_object_func, oclass, &object->object);
286 	object->chan = chan;
287 	return 0;
288 }
289 
290 static int
291 gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
292 {
293 	struct gf100_gr *gr = gf100_gr(base);
294 	int c = 0;
295 
296 	while (gr->func->sclass[c].oclass) {
297 		if (c++ == index) {
298 			*sclass = gr->func->sclass[index];
299 			sclass->ctor = gf100_gr_object_new;
300 			return index;
301 		}
302 	}
303 
304 	return c;
305 }
306 
307 /*******************************************************************************
308  * PGRAPH context
309  ******************************************************************************/
310 
311 static int
312 gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
313 		   int align, struct nvkm_gpuobj **pgpuobj)
314 {
315 	struct gf100_gr_chan *chan = gf100_gr_chan(object);
316 	struct gf100_gr *gr = chan->gr;
317 	int ret, i;
318 
319 	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
320 			      align, false, parent, pgpuobj);
321 	if (ret)
322 		return ret;
323 
324 	nvkm_kmap(*pgpuobj);
325 	for (i = 0; i < gr->size; i += 4)
326 		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);
327 
328 	if (!gr->firmware) {
329 		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
330 		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
331 	} else {
332 		nvkm_wo32(*pgpuobj, 0xf4, 0);
333 		nvkm_wo32(*pgpuobj, 0xf8, 0);
334 		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
335 		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
336 		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
337 		nvkm_wo32(*pgpuobj, 0x1c, 1);
338 		nvkm_wo32(*pgpuobj, 0x20, 0);
339 		nvkm_wo32(*pgpuobj, 0x28, 0);
340 		nvkm_wo32(*pgpuobj, 0x2c, 0);
341 	}
342 	nvkm_done(*pgpuobj);
343 	return 0;
344 }
345 
346 static void *
347 gf100_gr_chan_dtor(struct nvkm_object *object)
348 {
349 	struct gf100_gr_chan *chan = gf100_gr_chan(object);
350 	int i;
351 
352 	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
353 		if (chan->data[i].vma.node) {
354 			nvkm_vm_unmap(&chan->data[i].vma);
355 			nvkm_vm_put(&chan->data[i].vma);
356 		}
357 		nvkm_memory_del(&chan->data[i].mem);
358 	}
359 
360 	if (chan->mmio_vma.node) {
361 		nvkm_vm_unmap(&chan->mmio_vma);
362 		nvkm_vm_put(&chan->mmio_vma);
363 	}
364 	nvkm_memory_del(&chan->mmio);
365 	return chan;
366 }
367 
368 static const struct nvkm_object_func
369 gf100_gr_chan = {
370 	.dtor = gf100_gr_chan_dtor,
371 	.bind = gf100_gr_chan_bind,
372 };
373 
374 static int
375 gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
376 		  const struct nvkm_oclass *oclass,
377 		  struct nvkm_object **pobject)
378 {
379 	struct gf100_gr *gr = gf100_gr(base);
380 	struct gf100_gr_data *data = gr->mmio_data;
381 	struct gf100_gr_mmio *mmio = gr->mmio_list;
382 	struct gf100_gr_chan *chan;
383 	struct nvkm_device *device = gr->base.engine.subdev.device;
384 	int ret, i;
385 
386 	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
387 		return -ENOMEM;
388 	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
389 	chan->gr = gr;
390 	*pobject = &chan->object;
391 
392 	/* allocate memory for a "mmio list" buffer that's used by the HUB
393 	 * fuc to modify some per-context register settings on first load
394 	 * of the context.
395 	 */
396 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
397 			      false, &chan->mmio);
398 	if (ret)
399 		return ret;
400 
401 	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
402 			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
403 	if (ret)
404 		return ret;
405 
406 	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);
407 
408 	/* allocate buffers referenced by mmio list */
409 	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
410 		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
411 				      data->size, data->align, false,
412 				      &chan->data[i].mem);
413 		if (ret)
414 			return ret;
415 
416 		ret = nvkm_vm_get(fifoch->vm,
417 				  nvkm_memory_size(chan->data[i].mem), 12,
418 				  data->access, &chan->data[i].vma);
419 		if (ret)
420 			return ret;
421 
422 		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
423 		data++;
424 	}
425 
426 	/* finally, fill in the mmio list and point the context at it */
427 	nvkm_kmap(chan->mmio);
428 	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
429 		u32 addr = mmio->addr;
430 		u32 data = mmio->data;
431 
432 		if (mmio->buffer >= 0) {
433 			u64 info = chan->data[mmio->buffer].vma.offset;
434 			data |= info >> mmio->shift;
435 		}
436 
437 		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
438 		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
439 		mmio++;
440 	}
441 	nvkm_done(chan->mmio);
442 	return 0;
443 }
444 
445 /*******************************************************************************
446  * PGRAPH register lists
447  ******************************************************************************/
448 
449 const struct gf100_gr_init
450 gf100_gr_init_main_0[] = {
451 	{ 0x400080,   1, 0x04, 0x003083c2 },
452 	{ 0x400088,   1, 0x04, 0x00006fe7 },
453 	{ 0x40008c,   1, 0x04, 0x00000000 },
454 	{ 0x400090,   1, 0x04, 0x00000030 },
455 	{ 0x40013c,   1, 0x04, 0x013901f7 },
456 	{ 0x400140,   1, 0x04, 0x00000100 },
457 	{ 0x400144,   1, 0x04, 0x00000000 },
458 	{ 0x400148,   1, 0x04, 0x00000110 },
459 	{ 0x400138,   1, 0x04, 0x00000000 },
460 	{ 0x400130,   2, 0x04, 0x00000000 },
461 	{ 0x400124,   1, 0x04, 0x00000002 },
462 	{}
463 };
464 
465 const struct gf100_gr_init
466 gf100_gr_init_fe_0[] = {
467 	{ 0x40415c,   1, 0x04, 0x00000000 },
468 	{ 0x404170,   1, 0x04, 0x00000000 },
469 	{}
470 };
471 
472 const struct gf100_gr_init
473 gf100_gr_init_pri_0[] = {
474 	{ 0x404488,   2, 0x04, 0x00000000 },
475 	{}
476 };
477 
478 const struct gf100_gr_init
479 gf100_gr_init_rstr2d_0[] = {
480 	{ 0x407808,   1, 0x04, 0x00000000 },
481 	{}
482 };
483 
484 const struct gf100_gr_init
485 gf100_gr_init_pd_0[] = {
486 	{ 0x406024,   1, 0x04, 0x00000000 },
487 	{}
488 };
489 
490 const struct gf100_gr_init
491 gf100_gr_init_ds_0[] = {
492 	{ 0x405844,   1, 0x04, 0x00ffffff },
493 	{ 0x405850,   1, 0x04, 0x00000000 },
494 	{ 0x405908,   1, 0x04, 0x00000000 },
495 	{}
496 };
497 
498 const struct gf100_gr_init
499 gf100_gr_init_scc_0[] = {
500 	{ 0x40803c,   1, 0x04, 0x00000000 },
501 	{}
502 };
503 
504 const struct gf100_gr_init
505 gf100_gr_init_prop_0[] = {
506 	{ 0x4184a0,   1, 0x04, 0x00000000 },
507 	{}
508 };
509 
510 const struct gf100_gr_init
511 gf100_gr_init_gpc_unk_0[] = {
512 	{ 0x418604,   1, 0x04, 0x00000000 },
513 	{ 0x418680,   1, 0x04, 0x00000000 },
514 	{ 0x418714,   1, 0x04, 0x80000000 },
515 	{ 0x418384,   1, 0x04, 0x00000000 },
516 	{}
517 };
518 
519 const struct gf100_gr_init
520 gf100_gr_init_setup_0[] = {
521 	{ 0x418814,   3, 0x04, 0x00000000 },
522 	{}
523 };
524 
525 const struct gf100_gr_init
526 gf100_gr_init_crstr_0[] = {
527 	{ 0x418b04,   1, 0x04, 0x00000000 },
528 	{}
529 };
530 
531 const struct gf100_gr_init
532 gf100_gr_init_setup_1[] = {
533 	{ 0x4188c8,   1, 0x04, 0x80000000 },
534 	{ 0x4188cc,   1, 0x04, 0x00000000 },
535 	{ 0x4188d0,   1, 0x04, 0x00010000 },
536 	{ 0x4188d4,   1, 0x04, 0x00000001 },
537 	{}
538 };
539 
540 const struct gf100_gr_init
541 gf100_gr_init_zcull_0[] = {
542 	{ 0x418910,   1, 0x04, 0x00010001 },
543 	{ 0x418914,   1, 0x04, 0x00000301 },
544 	{ 0x418918,   1, 0x04, 0x00800000 },
545 	{ 0x418980,   1, 0x04, 0x77777770 },
546 	{ 0x418984,   3, 0x04, 0x77777777 },
547 	{}
548 };
549 
550 const struct gf100_gr_init
551 gf100_gr_init_gpm_0[] = {
552 	{ 0x418c04,   1, 0x04, 0x00000000 },
553 	{ 0x418c88,   1, 0x04, 0x00000000 },
554 	{}
555 };
556 
557 const struct gf100_gr_init
558 gf100_gr_init_gpc_unk_1[] = {
559 	{ 0x418d00,   1, 0x04, 0x00000000 },
560 	{ 0x418f08,   1, 0x04, 0x00000000 },
561 	{ 0x418e00,   1, 0x04, 0x00000050 },
562 	{ 0x418e08,   1, 0x04, 0x00000000 },
563 	{}
564 };
565 
566 const struct gf100_gr_init
567 gf100_gr_init_gcc_0[] = {
568 	{ 0x41900c,   1, 0x04, 0x00000000 },
569 	{ 0x419018,   1, 0x04, 0x00000000 },
570 	{}
571 };
572 
573 const struct gf100_gr_init
574 gf100_gr_init_tpccs_0[] = {
575 	{ 0x419d08,   2, 0x04, 0x00000000 },
576 	{ 0x419d10,   1, 0x04, 0x00000014 },
577 	{}
578 };
579 
580 const struct gf100_gr_init
581 gf100_gr_init_tex_0[] = {
582 	{ 0x419ab0,   1, 0x04, 0x00000000 },
583 	{ 0x419ab8,   1, 0x04, 0x000000e7 },
584 	{ 0x419abc,   2, 0x04, 0x00000000 },
585 	{}
586 };
587 
588 const struct gf100_gr_init
589 gf100_gr_init_pe_0[] = {
590 	{ 0x41980c,   3, 0x04, 0x00000000 },
591 	{ 0x419844,   1, 0x04, 0x00000000 },
592 	{ 0x41984c,   1, 0x04, 0x00005bc5 },
593 	{ 0x419850,   4, 0x04, 0x00000000 },
594 	{}
595 };
596 
597 const struct gf100_gr_init
598 gf100_gr_init_l1c_0[] = {
599 	{ 0x419c98,   1, 0x04, 0x00000000 },
600 	{ 0x419ca8,   1, 0x04, 0x80000000 },
601 	{ 0x419cb4,   1, 0x04, 0x00000000 },
602 	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
603 	{ 0x419cbc,   1, 0x04, 0x28137606 },
604 	{ 0x419cc0,   2, 0x04, 0x00000000 },
605 	{}
606 };
607 
608 const struct gf100_gr_init
609 gf100_gr_init_wwdx_0[] = {
610 	{ 0x419bd4,   1, 0x04, 0x00800000 },
611 	{ 0x419bdc,   1, 0x04, 0x00000000 },
612 	{}
613 };
614 
615 const struct gf100_gr_init
616 gf100_gr_init_tpccs_1[] = {
617 	{ 0x419d2c,   1, 0x04, 0x00000000 },
618 	{}
619 };
620 
621 const struct gf100_gr_init
622 gf100_gr_init_mpc_0[] = {
623 	{ 0x419c0c,   1, 0x04, 0x00000000 },
624 	{}
625 };
626 
627 static const struct gf100_gr_init
628 gf100_gr_init_sm_0[] = {
629 	{ 0x419e00,   1, 0x04, 0x00000000 },
630 	{ 0x419ea0,   1, 0x04, 0x00000000 },
631 	{ 0x419ea4,   1, 0x04, 0x00000100 },
632 	{ 0x419ea8,   1, 0x04, 0x00001100 },
633 	{ 0x419eac,   1, 0x04, 0x11100702 },
634 	{ 0x419eb0,   1, 0x04, 0x00000003 },
635 	{ 0x419eb4,   4, 0x04, 0x00000000 },
636 	{ 0x419ec8,   1, 0x04, 0x06060618 },
637 	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
638 	{ 0x419ed4,   1, 0x04, 0x011104f1 },
639 	{ 0x419edc,   1, 0x04, 0x00000000 },
640 	{ 0x419f00,   1, 0x04, 0x00000000 },
641 	{ 0x419f2c,   1, 0x04, 0x00000000 },
642 	{}
643 };
644 
645 const struct gf100_gr_init
646 gf100_gr_init_be_0[] = {
647 	{ 0x40880c,   1, 0x04, 0x00000000 },
648 	{ 0x408910,   9, 0x04, 0x00000000 },
649 	{ 0x408950,   1, 0x04, 0x00000000 },
650 	{ 0x408954,   1, 0x04, 0x0000ffff },
651 	{ 0x408984,   1, 0x04, 0x00000000 },
652 	{ 0x408988,   1, 0x04, 0x08040201 },
653 	{ 0x40898c,   1, 0x04, 0x80402010 },
654 	{}
655 };
656 
657 const struct gf100_gr_init
658 gf100_gr_init_fe_1[] = {
659 	{ 0x4040f0,   1, 0x04, 0x00000000 },
660 	{}
661 };
662 
663 const struct gf100_gr_init
664 gf100_gr_init_pe_1[] = {
665 	{ 0x419880,   1, 0x04, 0x00000002 },
666 	{}
667 };
668 
669 static const struct gf100_gr_pack
670 gf100_gr_pack_mmio[] = {
671 	{ gf100_gr_init_main_0 },
672 	{ gf100_gr_init_fe_0 },
673 	{ gf100_gr_init_pri_0 },
674 	{ gf100_gr_init_rstr2d_0 },
675 	{ gf100_gr_init_pd_0 },
676 	{ gf100_gr_init_ds_0 },
677 	{ gf100_gr_init_scc_0 },
678 	{ gf100_gr_init_prop_0 },
679 	{ gf100_gr_init_gpc_unk_0 },
680 	{ gf100_gr_init_setup_0 },
681 	{ gf100_gr_init_crstr_0 },
682 	{ gf100_gr_init_setup_1 },
683 	{ gf100_gr_init_zcull_0 },
684 	{ gf100_gr_init_gpm_0 },
685 	{ gf100_gr_init_gpc_unk_1 },
686 	{ gf100_gr_init_gcc_0 },
687 	{ gf100_gr_init_tpccs_0 },
688 	{ gf100_gr_init_tex_0 },
689 	{ gf100_gr_init_pe_0 },
690 	{ gf100_gr_init_l1c_0 },
691 	{ gf100_gr_init_wwdx_0 },
692 	{ gf100_gr_init_tpccs_1 },
693 	{ gf100_gr_init_mpc_0 },
694 	{ gf100_gr_init_sm_0 },
695 	{ gf100_gr_init_be_0 },
696 	{ gf100_gr_init_fe_1 },
697 	{ gf100_gr_init_pe_1 },
698 	{}
699 };
700 
701 /*******************************************************************************
702  * PGRAPH engine/subdev functions
703  ******************************************************************************/
704 
705 static bool
706 gf100_gr_chsw_load(struct nvkm_gr *base)
707 {
708 	struct gf100_gr *gr = gf100_gr(base);
709 	if (!gr->firmware) {
710 		u32 trace = nvkm_rd32(gr->base.engine.subdev.device, 0x40981c);
711 		if (trace & 0x00000040)
712 			return true;
713 	} else {
714 		u32 mthd = nvkm_rd32(gr->base.engine.subdev.device, 0x409808);
715 		if (mthd & 0x00080000)
716 			return true;
717 	}
718 	return false;
719 }
720 
721 int
722 gf100_gr_rops(struct gf100_gr *gr)
723 {
724 	struct nvkm_device *device = gr->base.engine.subdev.device;
725 	return (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
726 }
727 
728 void
729 gf100_gr_zbc_init(struct gf100_gr *gr)
730 {
731 	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
732 			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
733 	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
734 			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
735 	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
736 			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
737 	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
738 			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
739 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
740 	int index;
741 
742 	if (!gr->zbc_color[0].format) {
743 		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
744 		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
745 		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
746 		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
747 		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
748 		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
749 	}
750 
751 	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
752 		gf100_gr_zbc_clear_color(gr, index);
753 	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
754 		gf100_gr_zbc_clear_depth(gr, index);
755 }
756 
757 /**
758  * Wait until GR goes idle. GR is considered idle if it is disabled by the
759  * MC (0x200) register, or GR is not busy and a context switch is not in
760  * progress.
761  */
762 int
763 gf100_gr_wait_idle(struct gf100_gr *gr)
764 {
765 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
766 	struct nvkm_device *device = subdev->device;
767 	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
768 	bool gr_enabled, ctxsw_active, gr_busy;
769 
770 	do {
771 		/*
772 		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
773 		 * up-to-date
774 		 */
775 		nvkm_rd32(device, 0x400700);
776 
777 		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
778 		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
779 		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
780 
781 		if (!gr_enabled || (!gr_busy && !ctxsw_active))
782 			return 0;
783 	} while (time_before(jiffies, end_jiffies));
784 
785 	nvkm_error(subdev,
786 		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
787 		   gr_enabled, ctxsw_active, gr_busy);
788 	return -EAGAIN;
789 }
790 
791 void
792 gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
793 {
794 	struct nvkm_device *device = gr->base.engine.subdev.device;
795 	const struct gf100_gr_pack *pack;
796 	const struct gf100_gr_init *init;
797 
798 	pack_for_each_init(init, pack, p) {
799 		u32 next = init->addr + init->count * init->pitch;
800 		u32 addr = init->addr;
801 		while (addr < next) {
802 			nvkm_wr32(device, addr, init->data);
803 			addr += init->pitch;
804 		}
805 	}
806 }
807 
808 void
809 gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
810 {
811 	struct nvkm_device *device = gr->base.engine.subdev.device;
812 	const struct gf100_gr_pack *pack;
813 	const struct gf100_gr_init *init;
814 	u32 data = 0;
815 
816 	nvkm_wr32(device, 0x400208, 0x80000000);
817 
818 	pack_for_each_init(init, pack, p) {
819 		u32 next = init->addr + init->count * init->pitch;
820 		u32 addr = init->addr;
821 
822 		if ((pack == p && init == p->init) || data != init->data) {
823 			nvkm_wr32(device, 0x400204, init->data);
824 			data = init->data;
825 		}
826 
827 		while (addr < next) {
828 			nvkm_wr32(device, 0x400200, addr);
829 			/**
830 			 * Wait for GR to go idle after submitting a
831 			 * GO_IDLE bundle
832 			 */
833 			if ((addr & 0xffff) == 0xe100)
834 				gf100_gr_wait_idle(gr);
835 			nvkm_msec(device, 2000,
836 				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
837 					break;
838 			);
839 			addr += init->pitch;
840 		}
841 	}
842 
843 	nvkm_wr32(device, 0x400208, 0x00000000);
844 }
845 
846 void
847 gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
848 {
849 	struct nvkm_device *device = gr->base.engine.subdev.device;
850 	const struct gf100_gr_pack *pack;
851 	const struct gf100_gr_init *init;
852 	u32 data = 0;
853 
854 	pack_for_each_init(init, pack, p) {
855 		u32 ctrl = 0x80000000 | pack->type;
856 		u32 next = init->addr + init->count * init->pitch;
857 		u32 addr = init->addr;
858 
859 		if ((pack == p && init == p->init) || data != init->data) {
860 			nvkm_wr32(device, 0x40448c, init->data);
861 			data = init->data;
862 		}
863 
864 		while (addr < next) {
865 			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
866 			addr += init->pitch;
867 		}
868 	}
869 }
870 
871 u64
872 gf100_gr_units(struct nvkm_gr *base)
873 {
874 	struct gf100_gr *gr = gf100_gr(base);
875 	u64 cfg;
876 
877 	cfg  = (u32)gr->gpc_nr;
878 	cfg |= (u32)gr->tpc_total << 8;
879 	cfg |= (u64)gr->rop_nr << 32;
880 
881 	return cfg;
882 }
883 
884 static const struct nvkm_bitfield gf100_dispatch_error[] = {
885 	{ 0x00000001, "INJECTED_BUNDLE_ERROR" },
886 	{ 0x00000002, "CLASS_SUBCH_MISMATCH" },
887 	{ 0x00000004, "SUBCHSW_DURING_NOTIFY" },
888 	{}
889 };
890 
891 static const struct nvkm_bitfield gf100_m2mf_error[] = {
892 	{ 0x00000001, "PUSH_TOO_MUCH_DATA" },
893 	{ 0x00000002, "PUSH_NOT_ENOUGH_DATA" },
894 	{}
895 };
896 
897 static const struct nvkm_bitfield gf100_unk6_error[] = {
898 	{ 0x00000001, "TEMP_TOO_SMALL" },
899 	{}
900 };
901 
902 static const struct nvkm_bitfield gf100_ccache_error[] = {
903 	{ 0x00000001, "INTR" },
904 	{ 0x00000002, "LDCONST_OOB" },
905 	{}
906 };
907 
908 static const struct nvkm_bitfield gf100_macro_error[] = {
909 	{ 0x00000001, "TOO_FEW_PARAMS" },
910 	{ 0x00000002, "TOO_MANY_PARAMS" },
911 	{ 0x00000004, "ILLEGAL_OPCODE" },
912 	{ 0x00000008, "DOUBLE_BRANCH" },
913 	{ 0x00000010, "WATCHDOG" },
914 	{}
915 };
916 
917 static const struct nvkm_bitfield gk104_sked_error[] = {
918 	{ 0x00000040, "CTA_RESUME" },
919 	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
920 	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
921 	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
922 	{ 0x00000800, "WARP_CSTACK_SIZE" },
923 	{ 0x00001000, "TOTAL_TEMP_SIZE" },
924 	{ 0x00002000, "REGISTER_COUNT" },
925 	{ 0x00040000, "TOTAL_THREADS" },
926 	{ 0x00100000, "PROGRAM_OFFSET" },
927 	{ 0x00200000, "SHARED_MEMORY_SIZE" },
928 	{ 0x00800000, "CTA_THREAD_DIMENSION_ZERO" },
929 	{ 0x01000000, "MEMORY_WINDOW_OVERLAP" },
930 	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
931 	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
932 	{}
933 };
934 
935 static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
936 	{ 0x00000002, "RT_PITCH_OVERRUN" },
937 	{ 0x00000010, "RT_WIDTH_OVERRUN" },
938 	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
939 	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
940 	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
941 	{ 0x00000400, "RT_LINEAR_MISMATCH" },
942 	{}
943 };
944 
945 static void
946 gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
947 {
948 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
949 	struct nvkm_device *device = subdev->device;
950 	char error[128];
951 	u32 trap[4];
952 
953 	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
954 	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
955 	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
956 	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
957 
958 	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
959 
960 	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
961 			   "format = %x, storage type = %x\n",
962 		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
963 		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
964 	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
965 }
966 
967 static const struct nvkm_enum gf100_mp_warp_error[] = {
968 	{ 0x01, "STACK_ERROR" },
969 	{ 0x02, "API_STACK_ERROR" },
970 	{ 0x03, "RET_EMPTY_STACK_ERROR" },
971 	{ 0x04, "PC_WRAP" },
972 	{ 0x05, "MISALIGNED_PC" },
973 	{ 0x06, "PC_OVERFLOW" },
974 	{ 0x07, "MISALIGNED_IMMC_ADDR" },
975 	{ 0x08, "MISALIGNED_REG" },
976 	{ 0x09, "ILLEGAL_INSTR_ENCODING" },
977 	{ 0x0a, "ILLEGAL_SPH_INSTR_COMBO" },
978 	{ 0x0b, "ILLEGAL_INSTR_PARAM" },
979 	{ 0x0c, "INVALID_CONST_ADDR" },
980 	{ 0x0d, "OOR_REG" },
981 	{ 0x0e, "OOR_ADDR" },
982 	{ 0x0f, "MISALIGNED_ADDR" },
983 	{ 0x10, "INVALID_ADDR_SPACE" },
984 	{ 0x11, "ILLEGAL_INSTR_PARAM2" },
985 	{ 0x12, "INVALID_CONST_ADDR_LDC" },
986 	{ 0x13, "GEOMETRY_SM_ERROR" },
987 	{ 0x14, "DIVERGENT" },
988 	{ 0x15, "WARP_EXIT" },
989 	{}
990 };
991 
992 static const struct nvkm_bitfield gf100_mp_global_error[] = {
993 	{ 0x00000001, "SM_TO_SM_FAULT" },
994 	{ 0x00000002, "L1_ERROR" },
995 	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
996 	{ 0x00000008, "PHYSICAL_STACK_OVERFLOW" },
997 	{ 0x00000010, "BPT_INT" },
998 	{ 0x00000020, "BPT_PAUSE" },
999 	{ 0x00000040, "SINGLE_STEP_COMPLETE" },
1000 	{ 0x20000000, "ECC_SEC_ERROR" },
1001 	{ 0x40000000, "ECC_DED_ERROR" },
1002 	{ 0x80000000, "TIMEOUT" },
1003 	{}
1004 };
1005 
1006 static void
1007 gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
1008 {
1009 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1010 	struct nvkm_device *device = subdev->device;
1011 	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
1012 	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
1013 	const struct nvkm_enum *warp;
1014 	char glob[128];
1015 
1016 	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
1017 	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);
1018 
1019 	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
1020 			   "global %08x [%s] warp %04x [%s]\n",
1021 		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
1022 
1023 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
1024 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
1025 }
1026 
1027 static void
1028 gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
1029 {
1030 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1031 	struct nvkm_device *device = subdev->device;
1032 	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
1033 
1034 	if (stat & 0x00000001) {
1035 		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
1036 		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
1037 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
1038 		stat &= ~0x00000001;
1039 	}
1040 
1041 	if (stat & 0x00000002) {
1042 		gf100_gr_trap_mp(gr, gpc, tpc);
1043 		stat &= ~0x00000002;
1044 	}
1045 
1046 	if (stat & 0x00000004) {
1047 		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
1048 		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
1049 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
1050 		stat &= ~0x00000004;
1051 	}
1052 
1053 	if (stat & 0x00000008) {
1054 		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
1055 		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
1056 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
1057 		stat &= ~0x00000008;
1058 	}
1059 
1060 	if (stat & 0x00000010) {
1061 		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0430));
1062 		nvkm_error(subdev, "GPC%d/TPC%d/MPC: %08x\n", gpc, tpc, trap);
1063 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0430), 0xc0000000);
1064 		stat &= ~0x00000010;
1065 	}
1066 
1067 	if (stat) {
1068 		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
1069 	}
1070 }
1071 
1072 static void
1073 gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
1074 {
1075 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1076 	struct nvkm_device *device = subdev->device;
1077 	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
1078 	int tpc;
1079 
1080 	if (stat & 0x00000001) {
1081 		gf100_gr_trap_gpc_rop(gr, gpc);
1082 		stat &= ~0x00000001;
1083 	}
1084 
1085 	if (stat & 0x00000002) {
1086 		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
1087 		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
1088 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
1089 		stat &= ~0x00000002;
1090 	}
1091 
1092 	if (stat & 0x00000004) {
1093 		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
1094 		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
1095 		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
1096 		stat &= ~0x00000004;
1097 	}
1098 
1099 	if (stat & 0x00000008) {
1100 		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
1101 		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
1102 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
1103 		stat &= ~0x00000009;
1104 	}
1105 
1106 	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1107 		u32 mask = 0x00010000 << tpc;
1108 		if (stat & mask) {
1109 			gf100_gr_trap_tpc(gr, gpc, tpc);
1110 			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
1111 			stat &= ~mask;
1112 		}
1113 	}
1114 
1115 	if (stat) {
1116 		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
1117 	}
1118 }
1119 
1120 static void
1121 gf100_gr_trap_intr(struct gf100_gr *gr)
1122 {
1123 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1124 	struct nvkm_device *device = subdev->device;
1125 	char error[128];
1126 	u32 trap = nvkm_rd32(device, 0x400108);
1127 	int rop, gpc;
1128 
1129 	if (trap & 0x00000001) {
1130 		u32 stat = nvkm_rd32(device, 0x404000);
1131 
1132 		nvkm_snprintbf(error, sizeof(error), gf100_dispatch_error,
1133 			       stat & 0x3fffffff);
1134 		nvkm_error(subdev, "DISPATCH %08x [%s]\n", stat, error);
1135 		nvkm_wr32(device, 0x404000, 0xc0000000);
1136 		nvkm_wr32(device, 0x400108, 0x00000001);
1137 		trap &= ~0x00000001;
1138 	}
1139 
1140 	if (trap & 0x00000002) {
1141 		u32 stat = nvkm_rd32(device, 0x404600);
1142 
1143 		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
1144 			       stat & 0x3fffffff);
1145 		nvkm_error(subdev, "M2MF %08x [%s]\n", stat, error);
1146 
1147 		nvkm_wr32(device, 0x404600, 0xc0000000);
1148 		nvkm_wr32(device, 0x400108, 0x00000002);
1149 		trap &= ~0x00000002;
1150 	}
1151 
1152 	if (trap & 0x00000008) {
1153 		u32 stat = nvkm_rd32(device, 0x408030);
1154 
1155 		nvkm_snprintbf(error, sizeof(error), gf100_ccache_error,
1156 			       stat & 0x3fffffff);
1157 		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
1158 		nvkm_wr32(device, 0x408030, 0xc0000000);
1159 		nvkm_wr32(device, 0x400108, 0x00000008);
1160 		trap &= ~0x00000008;
1161 	}
1162 
1163 	if (trap & 0x00000010) {
1164 		u32 stat = nvkm_rd32(device, 0x405840);
1165 		nvkm_error(subdev, "SHADER %08x, sph: 0x%06x, stage: 0x%02x\n",
1166 			   stat, stat & 0xffffff, (stat >> 24) & 0x3f);
1167 		nvkm_wr32(device, 0x405840, 0xc0000000);
1168 		nvkm_wr32(device, 0x400108, 0x00000010);
1169 		trap &= ~0x00000010;
1170 	}
1171 
1172 	if (trap & 0x00000040) {
1173 		u32 stat = nvkm_rd32(device, 0x40601c);
1174 
1175 		nvkm_snprintbf(error, sizeof(error), gf100_unk6_error,
1176 			       stat & 0x3fffffff);
1177 		nvkm_error(subdev, "UNK6 %08x [%s]\n", stat, error);
1178 
1179 		nvkm_wr32(device, 0x40601c, 0xc0000000);
1180 		nvkm_wr32(device, 0x400108, 0x00000040);
1181 		trap &= ~0x00000040;
1182 	}
1183 
1184 	if (trap & 0x00000080) {
1185 		u32 stat = nvkm_rd32(device, 0x404490);
1186 		u32 pc = nvkm_rd32(device, 0x404494);
1187 		u32 op = nvkm_rd32(device, 0x40449c);
1188 
1189 		nvkm_snprintbf(error, sizeof(error), gf100_macro_error,
1190 			       stat & 0x1fffffff);
1191 		nvkm_error(subdev, "MACRO %08x [%s], pc: 0x%03x%s, op: 0x%08x\n",
1192 			   stat, error, pc & 0x7ff,
1193 			   (pc & 0x10000000) ? "" : " (invalid)",
1194 			   op);
1195 
1196 		nvkm_wr32(device, 0x404490, 0xc0000000);
1197 		nvkm_wr32(device, 0x400108, 0x00000080);
1198 		trap &= ~0x00000080;
1199 	}
1200 
1201 	if (trap & 0x00000100) {
1202 		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
1203 
1204 		nvkm_snprintbf(error, sizeof(error), gk104_sked_error, stat);
1205 		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, error);
1206 
1207 		if (stat)
1208 			nvkm_wr32(device, 0x407020, 0x40000000);
1209 		nvkm_wr32(device, 0x400108, 0x00000100);
1210 		trap &= ~0x00000100;
1211 	}
1212 
1213 	if (trap & 0x01000000) {
1214 		u32 stat = nvkm_rd32(device, 0x400118);
1215 		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1216 			u32 mask = 0x00000001 << gpc;
1217 			if (stat & mask) {
1218 				gf100_gr_trap_gpc(gr, gpc);
1219 				nvkm_wr32(device, 0x400118, mask);
1220 				stat &= ~mask;
1221 			}
1222 		}
1223 		nvkm_wr32(device, 0x400108, 0x01000000);
1224 		trap &= ~0x01000000;
1225 	}
1226 
1227 	if (trap & 0x02000000) {
1228 		for (rop = 0; rop < gr->rop_nr; rop++) {
1229 			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
1230 			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1231 			nvkm_error(subdev, "ROP%d %08x %08x\n",
1232 				 rop, statz, statc);
1233 			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
1234 			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1235 		}
1236 		nvkm_wr32(device, 0x400108, 0x02000000);
1237 		trap &= ~0x02000000;
1238 	}
1239 
1240 	if (trap) {
1241 		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1242 		nvkm_wr32(device, 0x400108, trap);
1243 	}
1244 }
1245 
1246 static void
1247 gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1248 {
1249 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1250 	struct nvkm_device *device = subdev->device;
1251 	nvkm_error(subdev, "%06x - done %08x\n", base,
1252 		   nvkm_rd32(device, base + 0x400));
1253 	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
1254 		   nvkm_rd32(device, base + 0x800),
1255 		   nvkm_rd32(device, base + 0x804),
1256 		   nvkm_rd32(device, base + 0x808),
1257 		   nvkm_rd32(device, base + 0x80c));
1258 	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
1259 		   nvkm_rd32(device, base + 0x810),
1260 		   nvkm_rd32(device, base + 0x814),
1261 		   nvkm_rd32(device, base + 0x818),
1262 		   nvkm_rd32(device, base + 0x81c));
1263 }
1264 
1265 void
1266 gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1267 {
1268 	struct nvkm_device *device = gr->base.engine.subdev.device;
1269 	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1270 	u32 gpc;
1271 
1272 	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1273 	for (gpc = 0; gpc < gpcnr; gpc++)
1274 		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1275 }
1276 
1277 static void
1278 gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1279 {
1280 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1281 	struct nvkm_device *device = subdev->device;
1282 	u32 stat = nvkm_rd32(device, 0x409c18);
1283 
1284 	if (!gr->firmware && (stat & 0x00000001)) {
1285 		u32 code = nvkm_rd32(device, 0x409814);
1286 		if (code == E_BAD_FWMTHD) {
1287 			u32 class = nvkm_rd32(device, 0x409808);
1288 			u32  addr = nvkm_rd32(device, 0x40980c);
1289 			u32  subc = (addr & 0x00070000) >> 16;
1290 			u32  mthd = (addr & 0x00003ffc);
1291 			u32  data = nvkm_rd32(device, 0x409810);
1292 
1293 			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
1294 					   "mthd %04x data %08x\n",
1295 				   subc, class, mthd, data);
1296 		} else {
1297 			nvkm_error(subdev, "FECS ucode error %d\n", code);
1298 		}
1299 		nvkm_wr32(device, 0x409c20, 0x00000001);
1300 		stat &= ~0x00000001;
1301 	}
1302 
1303 	if (!gr->firmware && (stat & 0x00080000)) {
1304 		nvkm_error(subdev, "FECS watchdog timeout\n");
1305 		gf100_gr_ctxctl_debug(gr);
1306 		nvkm_wr32(device, 0x409c20, 0x00080000);
1307 		stat &= ~0x00080000;
1308 	}
1309 
1310 	if (stat) {
1311 		nvkm_error(subdev, "FECS %08x\n", stat);
1312 		gf100_gr_ctxctl_debug(gr);
1313 		nvkm_wr32(device, 0x409c20, stat);
1314 	}
1315 }
1316 
1317 static void
1318 gf100_gr_intr(struct nvkm_gr *base)
1319 {
1320 	struct gf100_gr *gr = gf100_gr(base);
1321 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1322 	struct nvkm_device *device = subdev->device;
1323 	struct nvkm_fifo_chan *chan;
1324 	unsigned long flags;
1325 	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
1326 	u32 stat = nvkm_rd32(device, 0x400100);
1327 	u32 addr = nvkm_rd32(device, 0x400704);
1328 	u32 mthd = (addr & 0x00003ffc);
1329 	u32 subc = (addr & 0x00070000) >> 16;
1330 	u32 data = nvkm_rd32(device, 0x400708);
1331 	u32 code = nvkm_rd32(device, 0x400110);
1332 	u32 class;
1333 	const char *name = "unknown";
1334 	int chid = -1;
1335 
1336 	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1337 	if (chan) {
1338 		name = chan->object.client->name;
1339 		chid = chan->chid;
1340 	}
1341 
1342 	if (device->card_type < NV_E0 || subc < 4)
1343 		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1344 	else
1345 		class = 0x0000;
1346 
1347 	if (stat & 0x00000001) {
1348 		/*
1349 		 * notifier interrupt, only needed for cyclestats
1350 		 * can be safely ignored
1351 		 */
1352 		nvkm_wr32(device, 0x400100, 0x00000001);
1353 		stat &= ~0x00000001;
1354 	}
1355 
1356 	if (stat & 0x00000010) {
1357 		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1358 			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
1359 				   "subc %d class %04x mthd %04x data %08x\n",
1360 				   chid, inst << 12, name, subc,
1361 				   class, mthd, data);
1362 		}
1363 		nvkm_wr32(device, 0x400100, 0x00000010);
1364 		stat &= ~0x00000010;
1365 	}
1366 
1367 	if (stat & 0x00000020) {
1368 		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
1369 			   "subc %d class %04x mthd %04x data %08x\n",
1370 			   chid, inst << 12, name, subc, class, mthd, data);
1371 		nvkm_wr32(device, 0x400100, 0x00000020);
1372 		stat &= ~0x00000020;
1373 	}
1374 
1375 	if (stat & 0x00100000) {
1376 		const struct nvkm_enum *en =
1377 			nvkm_enum_find(nv50_data_error_names, code);
1378 		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
1379 				   "subc %d class %04x mthd %04x data %08x\n",
1380 			   code, en ? en->name : "", chid, inst << 12,
1381 			   name, subc, class, mthd, data);
1382 		nvkm_wr32(device, 0x400100, 0x00100000);
1383 		stat &= ~0x00100000;
1384 	}
1385 
1386 	if (stat & 0x00200000) {
1387 		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1388 			   chid, inst << 12, name);
1389 		gf100_gr_trap_intr(gr);
1390 		nvkm_wr32(device, 0x400100, 0x00200000);
1391 		stat &= ~0x00200000;
1392 	}
1393 
1394 	if (stat & 0x00080000) {
1395 		gf100_gr_ctxctl_isr(gr);
1396 		nvkm_wr32(device, 0x400100, 0x00080000);
1397 		stat &= ~0x00080000;
1398 	}
1399 
1400 	if (stat) {
1401 		nvkm_error(subdev, "intr %08x\n", stat);
1402 		nvkm_wr32(device, 0x400100, stat);
1403 	}
1404 
1405 	nvkm_wr32(device, 0x400500, 0x00010001);
1406 	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1407 }
1408 
1409 static void
1410 gf100_gr_init_fw(struct nvkm_falcon *falcon,
1411 		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1412 {
1413 	nvkm_falcon_load_dmem(falcon, data->data, 0x0, data->size, 0);
1414 	nvkm_falcon_load_imem(falcon, code->data, 0x0, code->size, 0, 0, false);
1415 }
1416 
1417 static void
1418 gf100_gr_init_csdata(struct gf100_gr *gr,
1419 		     const struct gf100_gr_pack *pack,
1420 		     u32 falcon, u32 starstar, u32 base)
1421 {
1422 	struct nvkm_device *device = gr->base.engine.subdev.device;
1423 	const struct gf100_gr_pack *iter;
1424 	const struct gf100_gr_init *init;
1425 	u32 addr = ~0, prev = ~0, xfer = 0;
1426 	u32 star, temp;
1427 
1428 	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
1429 	star = nvkm_rd32(device, falcon + 0x01c4);
1430 	temp = nvkm_rd32(device, falcon + 0x01c4);
1431 	if (temp > star)
1432 		star = temp;
1433 	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1434 
1435 	pack_for_each_init(init, iter, pack) {
1436 		u32 head = init->addr - base;
1437 		u32 tail = head + init->count * init->pitch;
1438 		while (head < tail) {
1439 			if (head != prev + 4 || xfer >= 32) {
1440 				if (xfer) {
1441 					u32 data = ((--xfer << 26) | addr);
1442 					nvkm_wr32(device, falcon + 0x01c4, data);
1443 					star += 4;
1444 				}
1445 				addr = head;
1446 				xfer = 0;
1447 			}
1448 			prev = head;
1449 			xfer = xfer + 1;
1450 			head = head + init->pitch;
1451 		}
1452 	}
1453 
1454 	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
1455 	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
1456 	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1457 }
1458 
1459 /* Initialize context from an external (secure or not) firmware */
1460 static int
1461 gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
1462 {
1463 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1464 	struct nvkm_device *device = subdev->device;
1465 	struct nvkm_secboot *sb = device->secboot;
1466 	int ret = 0;
1467 
1468 	/* load fuc microcode */
1469 	nvkm_mc_unk260(device, 0);
1470 
1471 	/* securely-managed falcons must be reset using secure boot */
1472 	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
1473 		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
1474 	else
1475 		gf100_gr_init_fw(gr->fecs, &gr->fuc409c, &gr->fuc409d);
1476 	if (ret)
1477 		return ret;
1478 
1479 	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
1480 		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
1481 	else
1482 		gf100_gr_init_fw(gr->gpccs, &gr->fuc41ac, &gr->fuc41ad);
1483 	if (ret)
1484 		return ret;
1485 
1486 	nvkm_mc_unk260(device, 1);
1487 
1488 	/* start both of them running */
1489 	nvkm_wr32(device, 0x409840, 0xffffffff);
1490 	nvkm_wr32(device, 0x41a10c, 0x00000000);
1491 	nvkm_wr32(device, 0x40910c, 0x00000000);
1492 
1493 	nvkm_falcon_start(gr->gpccs);
1494 	nvkm_falcon_start(gr->fecs);
1495 
1496 	if (nvkm_msec(device, 2000,
1497 		if (nvkm_rd32(device, 0x409800) & 0x00000001)
1498 			break;
1499 	) < 0)
1500 		return -EBUSY;
1501 
1502 	nvkm_wr32(device, 0x409840, 0xffffffff);
1503 	nvkm_wr32(device, 0x409500, 0x7fffffff);
1504 	nvkm_wr32(device, 0x409504, 0x00000021);
1505 
1506 	nvkm_wr32(device, 0x409840, 0xffffffff);
1507 	nvkm_wr32(device, 0x409500, 0x00000000);
1508 	nvkm_wr32(device, 0x409504, 0x00000010);
1509 	if (nvkm_msec(device, 2000,
1510 		if ((gr->size = nvkm_rd32(device, 0x409800)))
1511 			break;
1512 	) < 0)
1513 		return -EBUSY;
1514 
1515 	nvkm_wr32(device, 0x409840, 0xffffffff);
1516 	nvkm_wr32(device, 0x409500, 0x00000000);
1517 	nvkm_wr32(device, 0x409504, 0x00000016);
1518 	if (nvkm_msec(device, 2000,
1519 		if (nvkm_rd32(device, 0x409800))
1520 			break;
1521 	) < 0)
1522 		return -EBUSY;
1523 
1524 	nvkm_wr32(device, 0x409840, 0xffffffff);
1525 	nvkm_wr32(device, 0x409500, 0x00000000);
1526 	nvkm_wr32(device, 0x409504, 0x00000025);
1527 	if (nvkm_msec(device, 2000,
1528 		if (nvkm_rd32(device, 0x409800))
1529 			break;
1530 	) < 0)
1531 		return -EBUSY;
1532 
1533 	if (device->chipset >= 0xe0) {
1534 		nvkm_wr32(device, 0x409800, 0x00000000);
1535 		nvkm_wr32(device, 0x409500, 0x00000001);
1536 		nvkm_wr32(device, 0x409504, 0x00000030);
1537 		if (nvkm_msec(device, 2000,
1538 			if (nvkm_rd32(device, 0x409800))
1539 				break;
1540 		) < 0)
1541 			return -EBUSY;
1542 
1543 		nvkm_wr32(device, 0x409810, 0xb00095c8);
1544 		nvkm_wr32(device, 0x409800, 0x00000000);
1545 		nvkm_wr32(device, 0x409500, 0x00000001);
1546 		nvkm_wr32(device, 0x409504, 0x00000031);
1547 		if (nvkm_msec(device, 2000,
1548 			if (nvkm_rd32(device, 0x409800))
1549 				break;
1550 		) < 0)
1551 			return -EBUSY;
1552 
1553 		nvkm_wr32(device, 0x409810, 0x00080420);
1554 		nvkm_wr32(device, 0x409800, 0x00000000);
1555 		nvkm_wr32(device, 0x409500, 0x00000001);
1556 		nvkm_wr32(device, 0x409504, 0x00000032);
1557 		if (nvkm_msec(device, 2000,
1558 			if (nvkm_rd32(device, 0x409800))
1559 				break;
1560 		) < 0)
1561 			return -EBUSY;
1562 
1563 		nvkm_wr32(device, 0x409614, 0x00000070);
1564 		nvkm_wr32(device, 0x409614, 0x00000770);
1565 		nvkm_wr32(device, 0x40802c, 0x00000001);
1566 	}
1567 
1568 	if (gr->data == NULL) {
1569 		int ret = gf100_grctx_generate(gr);
1570 		if (ret) {
1571 			nvkm_error(subdev, "failed to construct context\n");
1572 			return ret;
1573 		}
1574 	}
1575 
1576 	return 0;
1577 }
1578 
1579 static int
1580 gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
1581 {
1582 	const struct gf100_grctx_func *grctx = gr->func->grctx;
1583 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1584 	struct nvkm_device *device = subdev->device;
1585 
1586 	if (!gr->func->fecs.ucode) {
1587 		return -ENOSYS;
1588 	}
1589 
1590 	/* load HUB microcode */
1591 	nvkm_mc_unk260(device, 0);
1592 	nvkm_falcon_load_dmem(gr->fecs, gr->func->fecs.ucode->data.data, 0x0,
1593 			      gr->func->fecs.ucode->data.size, 0);
1594 	nvkm_falcon_load_imem(gr->fecs, gr->func->fecs.ucode->code.data, 0x0,
1595 			      gr->func->fecs.ucode->code.size, 0, 0, false);
1596 
1597 	/* load GPC microcode */
1598 	nvkm_falcon_load_dmem(gr->gpccs, gr->func->gpccs.ucode->data.data, 0x0,
1599 			      gr->func->gpccs.ucode->data.size, 0);
1600 	nvkm_falcon_load_imem(gr->gpccs, gr->func->gpccs.ucode->code.data, 0x0,
1601 			      gr->func->gpccs.ucode->code.size, 0, 0, false);
1602 	nvkm_mc_unk260(device, 1);
1603 
1604 	/* load register lists */
1605 	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
1606 	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
1607 	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
1608 	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1609 
1610 	/* start HUB ucode running, it'll init the GPCs */
1611 	nvkm_wr32(device, 0x40910c, 0x00000000);
1612 	nvkm_wr32(device, 0x409100, 0x00000002);
1613 	if (nvkm_msec(device, 2000,
1614 		if (nvkm_rd32(device, 0x409800) & 0x80000000)
1615 			break;
1616 	) < 0) {
1617 		gf100_gr_ctxctl_debug(gr);
1618 		return -EBUSY;
1619 	}
1620 
1621 	gr->size = nvkm_rd32(device, 0x409804);
1622 	if (gr->data == NULL) {
1623 		int ret = gf100_grctx_generate(gr);
1624 		if (ret) {
1625 			nvkm_error(subdev, "failed to construct context\n");
1626 			return ret;
1627 		}
1628 	}
1629 
1630 	return 0;
1631 }
1632 
1633 int
1634 gf100_gr_init_ctxctl(struct gf100_gr *gr)
1635 {
1636 	int ret;
1637 
1638 	if (gr->firmware)
1639 		ret = gf100_gr_init_ctxctl_ext(gr);
1640 	else
1641 		ret = gf100_gr_init_ctxctl_int(gr);
1642 
1643 	return ret;
1644 }
1645 
1646 static int
1647 gf100_gr_oneinit(struct nvkm_gr *base)
1648 {
1649 	struct gf100_gr *gr = gf100_gr(base);
1650 	struct nvkm_device *device = gr->base.engine.subdev.device;
1651 	int i, j;
1652 
1653 	nvkm_pmu_pgob(device->pmu, false);
1654 
1655 	gr->rop_nr = gr->func->rops(gr);
1656 	gr->gpc_nr = nvkm_rd32(device, 0x409604) & 0x0000001f;
1657 	for (i = 0; i < gr->gpc_nr; i++) {
1658 		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
1659 		gr->tpc_total += gr->tpc_nr[i];
1660 		gr->ppc_nr[i]  = gr->func->ppc_nr;
1661 		for (j = 0; j < gr->ppc_nr[i]; j++) {
1662 			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
1663 			if (mask)
1664 				gr->ppc_mask[i] |= (1 << j);
1665 			gr->ppc_tpc_nr[i][j] = hweight8(mask);
1666 		}
1667 	}
1668 
1669 	/*XXX: these need figuring out... though it might not even matter */
1670 	switch (device->chipset) {
1671 	case 0xc0:
1672 		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
1673 			gr->screen_tile_row_offset = 0x07;
1674 		} else
1675 		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
1676 			gr->screen_tile_row_offset = 0x05;
1677 		} else
1678 		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
1679 			gr->screen_tile_row_offset = 0x06;
1680 		}
1681 		break;
1682 	case 0xc3: /* 450, 4/0/0/0, 2 */
1683 		gr->screen_tile_row_offset = 0x03;
1684 		break;
1685 	case 0xc4: /* 460, 3/4/0/0, 4 */
1686 		gr->screen_tile_row_offset = 0x01;
1687 		break;
1688 	case 0xc1: /* 2/0/0/0, 1 */
1689 		gr->screen_tile_row_offset = 0x01;
1690 		break;
1691 	case 0xc8: /* 4/4/3/4, 5 */
1692 		gr->screen_tile_row_offset = 0x06;
1693 		break;
1694 	case 0xce: /* 4/4/0/0, 4 */
1695 		gr->screen_tile_row_offset = 0x03;
1696 		break;
1697 	case 0xcf: /* 4/0/0/0, 3 */
1698 		gr->screen_tile_row_offset = 0x03;
1699 		break;
1700 	case 0xd7:
1701 	case 0xd9: /* 1/0/0/0, 1 */
1702 	case 0xea: /* gk20a */
1703 	case 0x12b: /* gm20b */
1704 		gr->screen_tile_row_offset = 0x01;
1705 		break;
1706 	}
1707 
1708 	return 0;
1709 }
1710 
1711 static int
1712 gf100_gr_init_(struct nvkm_gr *base)
1713 {
1714 	struct gf100_gr *gr = gf100_gr(base);
1715 	struct nvkm_subdev *subdev = &base->engine.subdev;
1716 	u32 ret;
1717 
1718 	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
1719 
1720 	ret = nvkm_falcon_get(gr->fecs, subdev);
1721 	if (ret)
1722 		return ret;
1723 
1724 	ret = nvkm_falcon_get(gr->gpccs, subdev);
1725 	if (ret)
1726 		return ret;
1727 
1728 	return gr->func->init(gr);
1729 }
1730 
1731 static int
1732 gf100_gr_fini_(struct nvkm_gr *base, bool suspend)
1733 {
1734 	struct gf100_gr *gr = gf100_gr(base);
1735 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1736 	nvkm_falcon_put(gr->gpccs, subdev);
1737 	nvkm_falcon_put(gr->fecs, subdev);
1738 	return 0;
1739 }
1740 
1741 void
1742 gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
1743 {
1744 	kfree(fuc->data);
1745 	fuc->data = NULL;
1746 }
1747 
1748 static void
1749 gf100_gr_dtor_init(struct gf100_gr_pack *pack)
1750 {
1751 	vfree(pack);
1752 }
1753 
1754 void *
1755 gf100_gr_dtor(struct nvkm_gr *base)
1756 {
1757 	struct gf100_gr *gr = gf100_gr(base);
1758 
1759 	if (gr->func->dtor)
1760 		gr->func->dtor(gr);
1761 	kfree(gr->data);
1762 
1763 	nvkm_falcon_del(&gr->gpccs);
1764 	nvkm_falcon_del(&gr->fecs);
1765 
1766 	gf100_gr_dtor_fw(&gr->fuc409c);
1767 	gf100_gr_dtor_fw(&gr->fuc409d);
1768 	gf100_gr_dtor_fw(&gr->fuc41ac);
1769 	gf100_gr_dtor_fw(&gr->fuc41ad);
1770 
1771 	gf100_gr_dtor_init(gr->fuc_bundle);
1772 	gf100_gr_dtor_init(gr->fuc_method);
1773 	gf100_gr_dtor_init(gr->fuc_sw_ctx);
1774 	gf100_gr_dtor_init(gr->fuc_sw_nonctx);
1775 
1776 	return gr;
1777 }
1778 
1779 static const struct nvkm_gr_func
1780 gf100_gr_ = {
1781 	.dtor = gf100_gr_dtor,
1782 	.oneinit = gf100_gr_oneinit,
1783 	.init = gf100_gr_init_,
1784 	.fini = gf100_gr_fini_,
1785 	.intr = gf100_gr_intr,
1786 	.units = gf100_gr_units,
1787 	.chan_new = gf100_gr_chan_new,
1788 	.object_get = gf100_gr_object_get,
1789 	.chsw_load = gf100_gr_chsw_load,
1790 };
1791 
1792 int
1793 gf100_gr_ctor_fw_legacy(struct gf100_gr *gr, const char *fwname,
1794 			struct gf100_gr_fuc *fuc, int ret)
1795 {
1796 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1797 	struct nvkm_device *device = subdev->device;
1798 	const struct firmware *fw;
1799 	char f[32];
1800 
1801 	/* see if this firmware has a legacy path */
1802 	if (!strcmp(fwname, "fecs_inst"))
1803 		fwname = "fuc409c";
1804 	else if (!strcmp(fwname, "fecs_data"))
1805 		fwname = "fuc409d";
1806 	else if (!strcmp(fwname, "gpccs_inst"))
1807 		fwname = "fuc41ac";
1808 	else if (!strcmp(fwname, "gpccs_data"))
1809 		fwname = "fuc41ad";
1810 	else {
1811 		/* nope, let's just return the error we got */
1812 		nvkm_error(subdev, "failed to load %s\n", fwname);
1813 		return ret;
1814 	}
1815 
1816 	/* yes, try to load from the legacy path */
1817 	nvkm_debug(subdev, "%s: falling back to legacy path\n", fwname);
1818 
1819 	snprintf(f, sizeof(f), "nouveau/nv%02x_%s", device->chipset, fwname);
1820 	ret = request_firmware(&fw, f, device->dev);
1821 	if (ret) {
1822 		snprintf(f, sizeof(f), "nouveau/%s", fwname);
1823 		ret = request_firmware(&fw, f, device->dev);
1824 		if (ret) {
1825 			nvkm_error(subdev, "failed to load %s\n", fwname);
1826 			return ret;
1827 		}
1828 	}
1829 
1830 	fuc->size = fw->size;
1831 	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
1832 	release_firmware(fw);
1833 	return (fuc->data != NULL) ? 0 : -ENOMEM;
1834 }
1835 
1836 int
1837 gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
1838 		 struct gf100_gr_fuc *fuc)
1839 {
1840 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1841 	struct nvkm_device *device = subdev->device;
1842 	const struct firmware *fw;
1843 	int ret;
1844 
1845 	ret = nvkm_firmware_get(device, fwname, &fw);
1846 	if (ret)
1847 		return gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret);
1848 
1849 	fuc->size = fw->size;
1850 	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
1851 	nvkm_firmware_put(fw);
1852 	return (fuc->data != NULL) ? 0 : -ENOMEM;
1853 }
1854 
1855 int
1856 gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
1857 	      int index, struct gf100_gr *gr)
1858 {
1859 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
1860 	int ret;
1861 
1862 	gr->func = func;
1863 	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
1864 				    func->fecs.ucode == NULL);
1865 
1866 	ret = nvkm_gr_ctor(&gf100_gr_, device, index,
1867 			   gr->firmware || func->fecs.ucode != NULL,
1868 			   &gr->base);
1869 	if (ret)
1870 		return ret;
1871 
1872 	ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs);
1873 	if (ret)
1874 		return ret;
1875 
1876 	return nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs);
1877 }
1878 
1879 int
1880 gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
1881 	      int index, struct nvkm_gr **pgr)
1882 {
1883 	struct gf100_gr *gr;
1884 	int ret;
1885 
1886 	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
1887 		return -ENOMEM;
1888 	*pgr = &gr->base;
1889 
1890 	ret = gf100_gr_ctor(func, device, index, gr);
1891 	if (ret)
1892 		return ret;
1893 
1894 	if (gr->firmware) {
1895 		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
1896 		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
1897 		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
1898 		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
1899 			return -ENODEV;
1900 	}
1901 
1902 	return 0;
1903 }
1904 
1905 int
1906 gf100_gr_init(struct gf100_gr *gr)
1907 {
1908 	struct nvkm_device *device = gr->base.engine.subdev.device;
1909 	struct nvkm_fb *fb = device->fb;
1910 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1911 	u32 data[TPC_MAX / 8] = {};
1912 	u8  tpcnr[GPC_MAX];
1913 	int gpc, tpc, rop;
1914 	int i;
1915 
1916 	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
1917 	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
1918 	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
1919 	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
1920 	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
1921 	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1922 	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8);
1923 	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8);
1924 
1925 	gf100_gr_mmio(gr, gr->func->mmio);
1926 
1927 	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
1928 
1929 	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
1930 	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1931 		do {
1932 			gpc = (gpc + 1) % gr->gpc_nr;
1933 		} while (!tpcnr[gpc]);
1934 		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1935 
1936 		data[i / 8] |= tpc << ((i % 8) * 4);
1937 	}
1938 
1939 	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
1940 	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
1941 	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
1942 	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1943 
1944 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1945 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
1946 			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
1947 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
1948 							 gr->tpc_total);
1949 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1950 	}
1951 
1952 	if (device->chipset != 0xd7)
1953 		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
1954 	else
1955 		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
1956 
1957 	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
1958 
1959 	nvkm_wr32(device, 0x400500, 0x00010001);
1960 
1961 	nvkm_wr32(device, 0x400100, 0xffffffff);
1962 	nvkm_wr32(device, 0x40013c, 0xffffffff);
1963 
1964 	nvkm_wr32(device, 0x409c24, 0x000f0000);
1965 	nvkm_wr32(device, 0x404000, 0xc0000000);
1966 	nvkm_wr32(device, 0x404600, 0xc0000000);
1967 	nvkm_wr32(device, 0x408030, 0xc0000000);
1968 	nvkm_wr32(device, 0x40601c, 0xc0000000);
1969 	nvkm_wr32(device, 0x404490, 0xc0000000);
1970 	nvkm_wr32(device, 0x406018, 0xc0000000);
1971 	nvkm_wr32(device, 0x405840, 0xc0000000);
1972 	nvkm_wr32(device, 0x405844, 0x00ffffff);
1973 	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
1974 	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
1975 
1976 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1977 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
1978 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
1979 		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
1980 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
1981 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1982 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
1983 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
1984 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
1985 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
1986 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
1987 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
1988 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1989 		}
1990 		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
1991 		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1992 	}
1993 
1994 	for (rop = 0; rop < gr->rop_nr; rop++) {
1995 		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1996 		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
1997 		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
1998 		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1999 	}
2000 
2001 	nvkm_wr32(device, 0x400108, 0xffffffff);
2002 	nvkm_wr32(device, 0x400138, 0xffffffff);
2003 	nvkm_wr32(device, 0x400118, 0xffffffff);
2004 	nvkm_wr32(device, 0x400130, 0xffffffff);
2005 	nvkm_wr32(device, 0x40011c, 0xffffffff);
2006 	nvkm_wr32(device, 0x400134, 0xffffffff);
2007 
2008 	nvkm_wr32(device, 0x400054, 0x34ce3464);
2009 
2010 	gf100_gr_zbc_init(gr);
2011 
2012 	return gf100_gr_init_ctxctl(gr);
2013 }
2014 
2015 #include "fuc/hubgf100.fuc3.h"
2016 
2017 struct gf100_gr_ucode
2018 gf100_gr_fecs_ucode = {
2019 	.code.data = gf100_grhub_code,
2020 	.code.size = sizeof(gf100_grhub_code),
2021 	.data.data = gf100_grhub_data,
2022 	.data.size = sizeof(gf100_grhub_data),
2023 };
2024 
2025 #include "fuc/gpcgf100.fuc3.h"
2026 
2027 struct gf100_gr_ucode
2028 gf100_gr_gpccs_ucode = {
2029 	.code.data = gf100_grgpc_code,
2030 	.code.size = sizeof(gf100_grgpc_code),
2031 	.data.data = gf100_grgpc_data,
2032 	.data.size = sizeof(gf100_grgpc_data),
2033 };
2034 
2035 static const struct gf100_gr_func
2036 gf100_gr = {
2037 	.init = gf100_gr_init,
2038 	.mmio = gf100_gr_pack_mmio,
2039 	.fecs.ucode = &gf100_gr_fecs_ucode,
2040 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
2041 	.rops = gf100_gr_rops,
2042 	.grctx = &gf100_grctx,
2043 	.sclass = {
2044 		{ -1, -1, FERMI_TWOD_A },
2045 		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
2046 		{ -1, -1, FERMI_A, &gf100_fermi },
2047 		{ -1, -1, FERMI_COMPUTE_A },
2048 		{}
2049 	}
2050 };
2051 
2052 int
2053 gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
2054 {
2055 	return gf100_gr_new_(&gf100_gr, device, index, pgr);
2056 }
2057