1 /*
2  * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 #include "gk20a.h"
23 #include "ctxgf100.h"
24 
25 #include <nvif/class.h>
26 #include <subdev/timer.h>
27 
28 static struct nvkm_oclass
29 gk20a_gr_sclass[] = {
30 	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
31 	{ KEPLER_INLINE_TO_MEMORY_A, &nvkm_object_ofuncs },
32 	{ KEPLER_C, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
33 	{ KEPLER_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
34 	{}
35 };
36 
37 static void
38 gk20a_gr_init_dtor(struct gf100_gr_pack *pack)
39 {
40 	vfree(pack);
41 }
42 
43 struct gk20a_fw_av
44 {
45 	u32 addr;
46 	u32 data;
47 };
48 
49 static struct gf100_gr_pack *
50 gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
51 {
52 	struct gf100_gr_init *init;
53 	struct gf100_gr_pack *pack;
54 	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
55 	int i;
56 
57 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
58 	if (!pack)
59 		return ERR_PTR(-ENOMEM);
60 
61 	init = (void *)(pack + 2);
62 
63 	pack[0].init = init;
64 
65 	for (i = 0; i < nent; i++) {
66 		struct gf100_gr_init *ent = &init[i];
67 		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
68 
69 		ent->addr = av->addr;
70 		ent->data = av->data;
71 		ent->count = 1;
72 		ent->pitch = 1;
73 	}
74 
75 	return pack;
76 }
77 
78 struct gk20a_fw_aiv
79 {
80 	u32 addr;
81 	u32 index;
82 	u32 data;
83 };
84 
85 static struct gf100_gr_pack *
86 gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
87 {
88 	struct gf100_gr_init *init;
89 	struct gf100_gr_pack *pack;
90 	const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv));
91 	int i;
92 
93 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
94 	if (!pack)
95 		return ERR_PTR(-ENOMEM);
96 
97 	init = (void *)(pack + 2);
98 
99 	pack[0].init = init;
100 
101 	for (i = 0; i < nent; i++) {
102 		struct gf100_gr_init *ent = &init[i];
103 		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i];
104 
105 		ent->addr = av->addr;
106 		ent->data = av->data;
107 		ent->count = 1;
108 		ent->pitch = 1;
109 	}
110 
111 	return pack;
112 }
113 
114 static struct gf100_gr_pack *
115 gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
116 {
117 	struct gf100_gr_init *init;
118 	struct gf100_gr_pack *pack;
119 	/* We don't suppose we will initialize more than 16 classes here... */
120 	static const unsigned int max_classes = 16;
121 	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
122 	int i, classidx = 0;
123 	u32 prevclass = 0;
124 
125 	pack = vzalloc((sizeof(*pack) * max_classes) +
126 		       (sizeof(*init) * (nent + 1)));
127 	if (!pack)
128 		return ERR_PTR(-ENOMEM);
129 
130 	init = (void *)(pack + max_classes);
131 
132 	for (i = 0; i < nent; i++) {
133 		struct gf100_gr_init *ent = &init[i];
134 		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
135 		u32 class = av->addr & 0xffff;
136 		u32 addr = (av->addr & 0xffff0000) >> 14;
137 
138 		if (prevclass != class) {
139 			pack[classidx].init = ent;
140 			pack[classidx].type = class;
141 			prevclass = class;
142 			if (++classidx >= max_classes) {
143 				vfree(pack);
144 				return ERR_PTR(-ENOSPC);
145 			}
146 		}
147 
148 		ent->addr = addr;
149 		ent->data = av->data;
150 		ent->count = 1;
151 		ent->pitch = 1;
152 	}
153 
154 	return pack;
155 }
156 
157 int
158 gk20a_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
159 	      struct nvkm_oclass *oclass, void *data, u32 size,
160 	      struct nvkm_object **pobject)
161 {
162 	int err;
163 	struct gf100_gr *gr;
164 	struct gf100_gr_fuc fuc;
165 
166 	err = gf100_gr_ctor(parent, engine, oclass, data, size, pobject);
167 	if (err)
168 		return err;
169 
170 	gr = (void *)*pobject;
171 
172 	err = gf100_gr_ctor_fw(gr, "sw_nonctx", &fuc);
173 	if (err)
174 		return err;
175 	gr->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc);
176 	gf100_gr_dtor_fw(&fuc);
177 	if (IS_ERR(gr->fuc_sw_nonctx))
178 		return PTR_ERR(gr->fuc_sw_nonctx);
179 
180 	err = gf100_gr_ctor_fw(gr, "sw_ctx", &fuc);
181 	if (err)
182 		return err;
183 	gr->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc);
184 	gf100_gr_dtor_fw(&fuc);
185 	if (IS_ERR(gr->fuc_sw_ctx))
186 		return PTR_ERR(gr->fuc_sw_ctx);
187 
188 	err = gf100_gr_ctor_fw(gr, "sw_bundle_init", &fuc);
189 	if (err)
190 		return err;
191 	gr->fuc_bundle = gk20a_gr_av_to_init(&fuc);
192 	gf100_gr_dtor_fw(&fuc);
193 	if (IS_ERR(gr->fuc_bundle))
194 		return PTR_ERR(gr->fuc_bundle);
195 
196 	err = gf100_gr_ctor_fw(gr, "sw_method_init", &fuc);
197 	if (err)
198 		return err;
199 	gr->fuc_method = gk20a_gr_av_to_method(&fuc);
200 	gf100_gr_dtor_fw(&fuc);
201 	if (IS_ERR(gr->fuc_method))
202 		return PTR_ERR(gr->fuc_method);
203 
204 	return 0;
205 }
206 
207 void
208 gk20a_gr_dtor(struct nvkm_object *object)
209 {
210 	struct gf100_gr *gr = (void *)object;
211 
212 	gk20a_gr_init_dtor(gr->fuc_method);
213 	gk20a_gr_init_dtor(gr->fuc_bundle);
214 	gk20a_gr_init_dtor(gr->fuc_sw_ctx);
215 	gk20a_gr_init_dtor(gr->fuc_sw_nonctx);
216 
217 	gf100_gr_dtor(object);
218 }
219 
220 static int
221 gk20a_gr_wait_mem_scrubbing(struct gf100_gr *gr)
222 {
223 	struct nvkm_device *device = gr->base.engine.subdev.device;
224 
225 	if (nvkm_msec(device, 2000,
226 		if (!(nvkm_rd32(device, 0x40910c) & 0x00000006))
227 			break;
228 	) < 0) {
229 		nv_error(gr, "FECS mem scrubbing timeout\n");
230 		return -ETIMEDOUT;
231 	}
232 
233 	if (nvkm_msec(device, 2000,
234 		if (!(nvkm_rd32(device, 0x41a10c) & 0x00000006))
235 			break;
236 	) < 0) {
237 		nv_error(gr, "GPCCS mem scrubbing timeout\n");
238 		return -ETIMEDOUT;
239 	}
240 
241 	return 0;
242 }
243 
244 static void
245 gk20a_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
246 {
247 	struct nvkm_device *device = gr->base.engine.subdev.device;
248 	nvkm_wr32(device, 0x419e44, 0x1ffffe);
249 	nvkm_wr32(device, 0x419e4c, 0x7f);
250 }
251 
252 int
253 gk20a_gr_init(struct nvkm_object *object)
254 {
255 	struct gk20a_gr_oclass *oclass = (void *)object->oclass;
256 	struct gf100_gr *gr = (void *)object;
257 	struct nvkm_device *device = gr->base.engine.subdev.device;
258 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
259 	u32 data[TPC_MAX / 8] = {};
260 	u8  tpcnr[GPC_MAX];
261 	int gpc, tpc;
262 	int ret, i;
263 
264 	ret = nvkm_gr_init(&gr->base);
265 	if (ret)
266 		return ret;
267 
268 	/* Clear SCC RAM */
269 	nvkm_wr32(device, 0x40802c, 0x1);
270 
271 	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
272 
273 	ret = gk20a_gr_wait_mem_scrubbing(gr);
274 	if (ret)
275 		return ret;
276 
277 	ret = gf100_gr_wait_idle(gr);
278 	if (ret)
279 		return ret;
280 
281 	/* MMU debug buffer */
282 	nvkm_wr32(device, 0x100cc8, gr->unk4188b4->addr >> 8);
283 	nvkm_wr32(device, 0x100ccc, gr->unk4188b8->addr >> 8);
284 
285 	if (oclass->init_gpc_mmu)
286 		oclass->init_gpc_mmu(gr);
287 
288 	/* Set the PE as stream master */
289 	nvkm_mask(device, 0x503018, 0x1, 0x1);
290 
291 	/* Zcull init */
292 	memset(data, 0x00, sizeof(data));
293 	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
294 	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
295 		do {
296 			gpc = (gpc + 1) % gr->gpc_nr;
297 		} while (!tpcnr[gpc]);
298 		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
299 
300 		data[i / 8] |= tpc << ((i % 8) * 4);
301 	}
302 
303 	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
304 	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
305 	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
306 	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
307 
308 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
309 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
310 			  gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
311 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
312 			  gr->tpc_total);
313 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
314 	}
315 
316 	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
317 
318 	/* Enable FIFO access */
319 	nvkm_wr32(device, 0x400500, 0x00010001);
320 
321 	/* Enable interrupts */
322 	nvkm_wr32(device, 0x400100, 0xffffffff);
323 	nvkm_wr32(device, 0x40013c, 0xffffffff);
324 
325 	/* Enable FECS error interrupts */
326 	nvkm_wr32(device, 0x409c24, 0x000f0000);
327 
328 	/* Enable hardware warning exceptions */
329 	nvkm_wr32(device, 0x404000, 0xc0000000);
330 	nvkm_wr32(device, 0x404600, 0xc0000000);
331 
332 	if (oclass->set_hww_esr_report_mask)
333 		oclass->set_hww_esr_report_mask(gr);
334 
335 	/* Enable TPC exceptions per GPC */
336 	nvkm_wr32(device, 0x419d0c, 0x2);
337 	nvkm_wr32(device, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16);
338 
339 	/* Reset and enable all exceptions */
340 	nvkm_wr32(device, 0x400108, 0xffffffff);
341 	nvkm_wr32(device, 0x400138, 0xffffffff);
342 	nvkm_wr32(device, 0x400118, 0xffffffff);
343 	nvkm_wr32(device, 0x400130, 0xffffffff);
344 	nvkm_wr32(device, 0x40011c, 0xffffffff);
345 	nvkm_wr32(device, 0x400134, 0xffffffff);
346 
347 	gf100_gr_zbc_init(gr);
348 
349 	return gf100_gr_init_ctxctl(gr);
350 }
351 
352 struct nvkm_oclass *
353 gk20a_gr_oclass = &(struct gk20a_gr_oclass) {
354 	.gf100 = {
355 		.base.handle = NV_ENGINE(GR, 0xea),
356 		.base.ofuncs = &(struct nvkm_ofuncs) {
357 			.ctor = gk20a_gr_ctor,
358 			.dtor = gk20a_gr_dtor,
359 			.init = gk20a_gr_init,
360 			.fini = _nvkm_gr_fini,
361 		},
362 		.cclass = &gk20a_grctx_oclass,
363 		.sclass = gk20a_gr_sclass,
364 		.ppc_nr = 1,
365 	},
366 	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
367 }.gf100.base;
368