1 /*
2  * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 #include "gk20a.h"
23 #include "ctxgf100.h"
24 
25 #include <nvif/class.h>
26 #include <subdev/timer.h>
27 
28 static struct nvkm_oclass
29 gk20a_gr_sclass[] = {
30 	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
31 	{ KEPLER_INLINE_TO_MEMORY_A, &nvkm_object_ofuncs },
32 	{ KEPLER_C, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
33 	{ KEPLER_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
34 	{}
35 };
36 
37 static void
38 gk20a_gr_init_dtor(struct gf100_gr_pack *pack)
39 {
40 	vfree(pack);
41 }
42 
43 struct gk20a_fw_av
44 {
45 	u32 addr;
46 	u32 data;
47 };
48 
49 static struct gf100_gr_pack *
50 gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
51 {
52 	struct gf100_gr_init *init;
53 	struct gf100_gr_pack *pack;
54 	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
55 	int i;
56 
57 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
58 	if (!pack)
59 		return ERR_PTR(-ENOMEM);
60 
61 	init = (void *)(pack + 2);
62 
63 	pack[0].init = init;
64 
65 	for (i = 0; i < nent; i++) {
66 		struct gf100_gr_init *ent = &init[i];
67 		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
68 
69 		ent->addr = av->addr;
70 		ent->data = av->data;
71 		ent->count = 1;
72 		ent->pitch = 1;
73 	}
74 
75 	return pack;
76 }
77 
78 struct gk20a_fw_aiv
79 {
80 	u32 addr;
81 	u32 index;
82 	u32 data;
83 };
84 
85 static struct gf100_gr_pack *
86 gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
87 {
88 	struct gf100_gr_init *init;
89 	struct gf100_gr_pack *pack;
90 	const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv));
91 	int i;
92 
93 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
94 	if (!pack)
95 		return ERR_PTR(-ENOMEM);
96 
97 	init = (void *)(pack + 2);
98 
99 	pack[0].init = init;
100 
101 	for (i = 0; i < nent; i++) {
102 		struct gf100_gr_init *ent = &init[i];
103 		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i];
104 
105 		ent->addr = av->addr;
106 		ent->data = av->data;
107 		ent->count = 1;
108 		ent->pitch = 1;
109 	}
110 
111 	return pack;
112 }
113 
114 static struct gf100_gr_pack *
115 gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
116 {
117 	struct gf100_gr_init *init;
118 	struct gf100_gr_pack *pack;
119 	/* We don't suppose we will initialize more than 16 classes here... */
120 	static const unsigned int max_classes = 16;
121 	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
122 	int i, classidx = 0;
123 	u32 prevclass = 0;
124 
125 	pack = vzalloc((sizeof(*pack) * max_classes) +
126 		       (sizeof(*init) * (nent + 1)));
127 	if (!pack)
128 		return ERR_PTR(-ENOMEM);
129 
130 	init = (void *)(pack + max_classes);
131 
132 	for (i = 0; i < nent; i++) {
133 		struct gf100_gr_init *ent = &init[i];
134 		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
135 		u32 class = av->addr & 0xffff;
136 		u32 addr = (av->addr & 0xffff0000) >> 14;
137 
138 		if (prevclass != class) {
139 			pack[classidx].init = ent;
140 			pack[classidx].type = class;
141 			prevclass = class;
142 			if (++classidx >= max_classes) {
143 				vfree(pack);
144 				return ERR_PTR(-ENOSPC);
145 			}
146 		}
147 
148 		ent->addr = addr;
149 		ent->data = av->data;
150 		ent->count = 1;
151 		ent->pitch = 1;
152 	}
153 
154 	return pack;
155 }
156 
157 int
158 gk20a_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
159 	      struct nvkm_oclass *oclass, void *data, u32 size,
160 	      struct nvkm_object **pobject)
161 {
162 	int err;
163 	struct gf100_gr_priv *priv;
164 	struct gf100_gr_fuc fuc;
165 
166 	err = gf100_gr_ctor(parent, engine, oclass, data, size, pobject);
167 	if (err)
168 		return err;
169 
170 	priv = (void *)*pobject;
171 
172 	err = gf100_gr_ctor_fw(priv, "sw_nonctx", &fuc);
173 	if (err)
174 		return err;
175 	priv->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc);
176 	gf100_gr_dtor_fw(&fuc);
177 	if (IS_ERR(priv->fuc_sw_nonctx))
178 		return PTR_ERR(priv->fuc_sw_nonctx);
179 
180 	err = gf100_gr_ctor_fw(priv, "sw_ctx", &fuc);
181 	if (err)
182 		return err;
183 	priv->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc);
184 	gf100_gr_dtor_fw(&fuc);
185 	if (IS_ERR(priv->fuc_sw_ctx))
186 		return PTR_ERR(priv->fuc_sw_ctx);
187 
188 	err = gf100_gr_ctor_fw(priv, "sw_bundle_init", &fuc);
189 	if (err)
190 		return err;
191 	priv->fuc_bundle = gk20a_gr_av_to_init(&fuc);
192 	gf100_gr_dtor_fw(&fuc);
193 	if (IS_ERR(priv->fuc_bundle))
194 		return PTR_ERR(priv->fuc_bundle);
195 
196 	err = gf100_gr_ctor_fw(priv, "sw_method_init", &fuc);
197 	if (err)
198 		return err;
199 	priv->fuc_method = gk20a_gr_av_to_method(&fuc);
200 	gf100_gr_dtor_fw(&fuc);
201 	if (IS_ERR(priv->fuc_method))
202 		return PTR_ERR(priv->fuc_method);
203 
204 	return 0;
205 }
206 
207 void
208 gk20a_gr_dtor(struct nvkm_object *object)
209 {
210 	struct gf100_gr_priv *priv = (void *)object;
211 
212 	gk20a_gr_init_dtor(priv->fuc_method);
213 	gk20a_gr_init_dtor(priv->fuc_bundle);
214 	gk20a_gr_init_dtor(priv->fuc_sw_ctx);
215 	gk20a_gr_init_dtor(priv->fuc_sw_nonctx);
216 
217 	gf100_gr_dtor(object);
218 }
219 
220 static int
221 gk20a_gr_wait_mem_scrubbing(struct gf100_gr_priv *priv)
222 {
223 	if (!nv_wait(priv, 0x40910c, 0x6, 0x0)) {
224 		nv_error(priv, "FECS mem scrubbing timeout\n");
225 		return -ETIMEDOUT;
226 	}
227 
228 	if (!nv_wait(priv, 0x41a10c, 0x6, 0x0)) {
229 		nv_error(priv, "GPCCS mem scrubbing timeout\n");
230 		return -ETIMEDOUT;
231 	}
232 
233 	return 0;
234 }
235 
236 static void
237 gk20a_gr_set_hww_esr_report_mask(struct gf100_gr_priv *priv)
238 {
239 	nv_wr32(priv, 0x419e44, 0x1ffffe);
240 	nv_wr32(priv, 0x419e4c, 0x7f);
241 }
242 
243 int
244 gk20a_gr_init(struct nvkm_object *object)
245 {
246 	struct gk20a_gr_oclass *oclass = (void *)object->oclass;
247 	struct gf100_gr_priv *priv = (void *)object;
248 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tpc_total);
249 	u32 data[TPC_MAX / 8] = {};
250 	u8  tpcnr[GPC_MAX];
251 	int gpc, tpc;
252 	int ret, i;
253 
254 	ret = nvkm_gr_init(&priv->base);
255 	if (ret)
256 		return ret;
257 
258 	/* Clear SCC RAM */
259 	nv_wr32(priv, 0x40802c, 0x1);
260 
261 	gf100_gr_mmio(priv, priv->fuc_sw_nonctx);
262 
263 	ret = gk20a_gr_wait_mem_scrubbing(priv);
264 	if (ret)
265 		return ret;
266 
267 	ret = gf100_gr_wait_idle(priv);
268 	if (ret)
269 		return ret;
270 
271 	/* MMU debug buffer */
272 	nv_wr32(priv, 0x100cc8, priv->unk4188b4->addr >> 8);
273 	nv_wr32(priv, 0x100ccc, priv->unk4188b8->addr >> 8);
274 
275 	if (oclass->init_gpc_mmu)
276 		oclass->init_gpc_mmu(priv);
277 
278 	/* Set the PE as stream master */
279 	nv_mask(priv, 0x503018, 0x1, 0x1);
280 
281 	/* Zcull init */
282 	memset(data, 0x00, sizeof(data));
283 	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
284 	for (i = 0, gpc = -1; i < priv->tpc_total; i++) {
285 		do {
286 			gpc = (gpc + 1) % priv->gpc_nr;
287 		} while (!tpcnr[gpc]);
288 		tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
289 
290 		data[i / 8] |= tpc << ((i % 8) * 4);
291 	}
292 
293 	nv_wr32(priv, GPC_BCAST(0x0980), data[0]);
294 	nv_wr32(priv, GPC_BCAST(0x0984), data[1]);
295 	nv_wr32(priv, GPC_BCAST(0x0988), data[2]);
296 	nv_wr32(priv, GPC_BCAST(0x098c), data[3]);
297 
298 	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
299 		nv_wr32(priv, GPC_UNIT(gpc, 0x0914),
300 			priv->magic_not_rop_nr << 8 | priv->tpc_nr[gpc]);
301 		nv_wr32(priv, GPC_UNIT(gpc, 0x0910), 0x00040000 |
302 			priv->tpc_total);
303 		nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918);
304 	}
305 
306 	nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918);
307 
308 	/* Enable FIFO access */
309 	nv_wr32(priv, 0x400500, 0x00010001);
310 
311 	/* Enable interrupts */
312 	nv_wr32(priv, 0x400100, 0xffffffff);
313 	nv_wr32(priv, 0x40013c, 0xffffffff);
314 
315 	/* Enable FECS error interrupts */
316 	nv_wr32(priv, 0x409c24, 0x000f0000);
317 
318 	/* Enable hardware warning exceptions */
319 	nv_wr32(priv, 0x404000, 0xc0000000);
320 	nv_wr32(priv, 0x404600, 0xc0000000);
321 
322 	if (oclass->set_hww_esr_report_mask)
323 		oclass->set_hww_esr_report_mask(priv);
324 
325 	/* Enable TPC exceptions per GPC */
326 	nv_wr32(priv, 0x419d0c, 0x2);
327 	nv_wr32(priv, 0x41ac94, (((1 << priv->tpc_total) - 1) & 0xff) << 16);
328 
329 	/* Reset and enable all exceptions */
330 	nv_wr32(priv, 0x400108, 0xffffffff);
331 	nv_wr32(priv, 0x400138, 0xffffffff);
332 	nv_wr32(priv, 0x400118, 0xffffffff);
333 	nv_wr32(priv, 0x400130, 0xffffffff);
334 	nv_wr32(priv, 0x40011c, 0xffffffff);
335 	nv_wr32(priv, 0x400134, 0xffffffff);
336 
337 	gf100_gr_zbc_init(priv);
338 
339 	return gf100_gr_init_ctxctl(priv);
340 }
341 
342 struct nvkm_oclass *
343 gk20a_gr_oclass = &(struct gk20a_gr_oclass) {
344 	.gf100 = {
345 		.base.handle = NV_ENGINE(GR, 0xea),
346 		.base.ofuncs = &(struct nvkm_ofuncs) {
347 			.ctor = gk20a_gr_ctor,
348 			.dtor = gk20a_gr_dtor,
349 			.init = gk20a_gr_init,
350 			.fini = _nvkm_gr_fini,
351 		},
352 		.cclass = &gk20a_grctx_oclass,
353 		.sclass = gk20a_gr_sclass,
354 		.ppc_nr = 1,
355 	},
356 	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
357 }.gf100.base;
358