1 /*
2  * Copyright 2018 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 #include "gk104.h"
23 #include "cgrp.h"
24 #include "changk104.h"
25 #include "user.h"
26 
27 #include <core/client.h>
28 #include <core/gpuobj.h>
29 #include <subdev/bar.h>
30 #include <subdev/fault.h>
31 #include <subdev/top.h>
32 #include <subdev/timer.h>
33 #include <engine/sw.h>
34 
35 #include <nvif/class.h>
36 
37 static void
38 tu102_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
39 			  struct nvkm_memory *mem, int nr)
40 {
41 	struct nvkm_device *device = fifo->base.engine.subdev.device;
42 	u64 addr = nvkm_memory_addr(mem);
43 	/*XXX: target? */
44 
45 	nvkm_wr32(device, 0x002b00 + (runl * 0x10), lower_32_bits(addr));
46 	nvkm_wr32(device, 0x002b04 + (runl * 0x10), upper_32_bits(addr));
47 	nvkm_wr32(device, 0x002b08 + (runl * 0x10), nr);
48 
49 	/*XXX: how to wait? can you even wait? */
50 }
51 
52 const struct gk104_fifo_runlist_func
53 tu102_fifo_runlist = {
54 	.size = 16,
55 	.cgrp = gv100_fifo_runlist_cgrp,
56 	.chan = gv100_fifo_runlist_chan,
57 	.commit = tu102_fifo_runlist_commit,
58 };
59 
60 static const struct nvkm_enum
61 tu102_fifo_fault_engine[] = {
62 	{ 0x01, "DISPLAY" },
63 	{ 0x03, "PTP" },
64 	{ 0x06, "PWR_PMU" },
65 	{ 0x08, "IFB", NULL, NVKM_ENGINE_IFB },
66 	{ 0x09, "PERF" },
67 	{ 0x1f, "PHYSICAL" },
68 	{ 0x20, "HOST0" },
69 	{ 0x21, "HOST1" },
70 	{ 0x22, "HOST2" },
71 	{ 0x23, "HOST3" },
72 	{ 0x24, "HOST4" },
73 	{ 0x25, "HOST5" },
74 	{ 0x26, "HOST6" },
75 	{ 0x27, "HOST7" },
76 	{ 0x28, "HOST8" },
77 	{ 0x29, "HOST9" },
78 	{ 0x2a, "HOST10" },
79 	{ 0x2b, "HOST11" },
80 	{ 0x2c, "HOST12" },
81 	{ 0x2d, "HOST13" },
82 	{ 0x2e, "HOST14" },
83 	{ 0x80, "BAR1", NULL, NVKM_SUBDEV_BAR },
84 	{ 0xc0, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
85 	{}
86 };
87 
88 static void
89 tu102_fifo_pbdma_init(struct gk104_fifo *fifo)
90 {
91 	struct nvkm_device *device = fifo->base.engine.subdev.device;
92 	const u32 mask = (1 << fifo->pbdma_nr) - 1;
93 	/*XXX: this is a bit of a guess at this point in time. */
94 	nvkm_mask(device, 0xb65000, 0x80000fff, 0x80000000 | mask);
95 }
96 
97 static const struct gk104_fifo_pbdma_func
98 tu102_fifo_pbdma = {
99 	.nr = gm200_fifo_pbdma_nr,
100 	.init = tu102_fifo_pbdma_init,
101 	.init_timeout = gk208_fifo_pbdma_init_timeout,
102 };
103 
104 static const struct gk104_fifo_func
105 tu102_fifo = {
106 	.pbdma = &tu102_fifo_pbdma,
107 	.fault.access = gv100_fifo_fault_access,
108 	.fault.engine = tu102_fifo_fault_engine,
109 	.fault.reason = gv100_fifo_fault_reason,
110 	.fault.hubclient = gv100_fifo_fault_hubclient,
111 	.fault.gpcclient = gv100_fifo_fault_gpcclient,
112 	.runlist = &tu102_fifo_runlist,
113 	.user = {{-1,-1,VOLTA_USERMODE_A       }, tu102_fifo_user_new   },
114 	.chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu102_fifo_gpfifo_new },
115 	.cgrp_force = true,
116 };
117 
118 static void
119 tu102_fifo_recover_work(struct work_struct *w)
120 {
121 	struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
122 	struct nvkm_device *device = fifo->base.engine.subdev.device;
123 	struct nvkm_engine *engine;
124 	unsigned long flags;
125 	u32 engm, runm, todo;
126 	int engn, runl;
127 
128 	spin_lock_irqsave(&fifo->base.lock, flags);
129 	runm = fifo->recover.runm;
130 	engm = fifo->recover.engm;
131 	fifo->recover.engm = 0;
132 	fifo->recover.runm = 0;
133 	spin_unlock_irqrestore(&fifo->base.lock, flags);
134 
135 	nvkm_mask(device, 0x002630, runm, runm);
136 
137 	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
138 		if ((engine = fifo->engine[engn].engine)) {
139 			nvkm_subdev_fini(&engine->subdev, false);
140 			WARN_ON(nvkm_subdev_init(&engine->subdev));
141 		}
142 	}
143 
144 	for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
145 		gk104_fifo_runlist_update(fifo, runl);
146 
147 	nvkm_mask(device, 0x002630, runm, 0x00000000);
148 }
149 
150 static void tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
151 
152 static void
153 tu102_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
154 {
155 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
156 	struct nvkm_device *device = subdev->device;
157 	const u32 runm = BIT(runl);
158 
159 	assert_spin_locked(&fifo->base.lock);
160 	if (fifo->recover.runm & runm)
161 		return;
162 	fifo->recover.runm |= runm;
163 
164 	/* Block runlist to prevent channel assignment(s) from changing. */
165 	nvkm_mask(device, 0x002630, runm, runm);
166 
167 	/* Schedule recovery. */
168 	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
169 	schedule_work(&fifo->recover.work);
170 }
171 
172 static struct gk104_fifo_chan *
173 tu102_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
174 {
175 	struct gk104_fifo_chan *chan;
176 	struct nvkm_fifo_cgrp *cgrp;
177 
178 	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
179 		if (chan->base.chid == chid) {
180 			list_del_init(&chan->head);
181 			return chan;
182 		}
183 	}
184 
185 	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
186 		if (cgrp->id == chid) {
187 			chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
188 			list_del_init(&chan->head);
189 			if (!--cgrp->chan_nr)
190 				list_del_init(&cgrp->head);
191 			return chan;
192 		}
193 	}
194 
195 	return NULL;
196 }
197 
198 static void
199 tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid)
200 {
201 	struct gk104_fifo *fifo = gk104_fifo(base);
202 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
203 	struct nvkm_device *device = subdev->device;
204 	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
205 	const u32  runl = (stat & 0x000f0000) >> 16;
206 	const bool used = (stat & 0x00000001);
207 	unsigned long engn, engm = fifo->runlist[runl].engm;
208 	struct gk104_fifo_chan *chan;
209 
210 	assert_spin_locked(&fifo->base.lock);
211 	if (!used)
212 		return;
213 
214 	/* Lookup SW state for channel, and mark it as dead. */
215 	chan = tu102_fifo_recover_chid(fifo, runl, chid);
216 	if (chan) {
217 		chan->killed = true;
218 		nvkm_fifo_kevent(&fifo->base, chid);
219 	}
220 
221 	/* Disable channel. */
222 	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
223 	nvkm_warn(subdev, "channel %d: killed\n", chid);
224 
225 	/* Block channel assignments from changing during recovery. */
226 	tu102_fifo_recover_runl(fifo, runl);
227 
228 	/* Schedule recovery for any engines the channel is on. */
229 	for_each_set_bit(engn, &engm, fifo->engine_nr) {
230 		struct gk104_fifo_engine_status status;
231 
232 		gk104_fifo_engine_status(fifo, engn, &status);
233 		if (!status.chan || status.chan->id != chid)
234 			continue;
235 		tu102_fifo_recover_engn(fifo, engn);
236 	}
237 }
238 
239 static void
240 tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
241 {
242 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
243 	struct nvkm_device *device = subdev->device;
244 	const u32 runl = fifo->engine[engn].runl;
245 	const u32 engm = BIT(engn);
246 	struct gk104_fifo_engine_status status;
247 
248 	assert_spin_locked(&fifo->base.lock);
249 	if (fifo->recover.engm & engm)
250 		return;
251 	fifo->recover.engm |= engm;
252 
253 	/* Block channel assignments from changing during recovery. */
254 	tu102_fifo_recover_runl(fifo, runl);
255 
256 	/* Determine which channel (if any) is currently on the engine. */
257 	gk104_fifo_engine_status(fifo, engn, &status);
258 	if (status.chan) {
259 		/* The channel is not longer viable, kill it. */
260 		tu102_fifo_recover_chan(&fifo->base, status.chan->id);
261 	}
262 
263 	/* Preempt the runlist */
264 	nvkm_wr32(device, 0x2638, BIT(runl));
265 
266 	/* Schedule recovery. */
267 	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
268 	schedule_work(&fifo->recover.work);
269 }
270 
271 static void
272 tu102_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
273 {
274 	struct gk104_fifo *fifo = gk104_fifo(base);
275 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
276 	struct nvkm_device *device = subdev->device;
277 	const struct nvkm_enum *er, *ee, *ec, *ea;
278 	struct nvkm_engine *engine = NULL;
279 	struct nvkm_fifo_chan *chan;
280 	unsigned long flags;
281 	char ct[8] = "HUB/", en[16] = "";
282 	int engn;
283 
284 	er = nvkm_enum_find(fifo->func->fault.reason, info->reason);
285 	ee = nvkm_enum_find(fifo->func->fault.engine, info->engine);
286 	if (info->hub) {
287 		ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client);
288 	} else {
289 		ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client);
290 		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
291 	}
292 	ea = nvkm_enum_find(fifo->func->fault.access, info->access);
293 
294 	if (ee && ee->data2) {
295 		switch (ee->data2) {
296 		case NVKM_SUBDEV_BAR:
297 			nvkm_bar_bar1_reset(device);
298 			break;
299 		case NVKM_SUBDEV_INSTMEM:
300 			nvkm_bar_bar2_reset(device);
301 			break;
302 		case NVKM_ENGINE_IFB:
303 			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
304 			break;
305 		default:
306 			engine = nvkm_device_engine(device, ee->data2, 0);
307 			break;
308 		}
309 	}
310 
311 	if (ee == NULL) {
312 		enum nvkm_devidx engidx = nvkm_top_fault(device, info->engine);
313 
314 		if (engidx < NVKM_SUBDEV_NR) {
315 			const char *src = nvkm_subdev_type[engidx];
316 			char *dst = en;
317 
318 			do {
319 				*dst++ = toupper(*src++);
320 			} while (*src);
321 			engine = nvkm_device_engine(device, engidx, 0);
322 		}
323 	} else {
324 		snprintf(en, sizeof(en), "%s", ee->name);
325 	}
326 
327 	spin_lock_irqsave(&fifo->base.lock, flags);
328 	chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst);
329 
330 	nvkm_error(subdev,
331 		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
332 		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
333 		   info->access, ea ? ea->name : "", info->addr,
334 		   info->engine, ee ? ee->name : en,
335 		   info->client, ct, ec ? ec->name : "",
336 		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
337 		   info->inst, chan ? chan->object.client->name : "unknown");
338 
339 	/* Kill the channel that caused the fault. */
340 	if (chan)
341 		tu102_fifo_recover_chan(&fifo->base, chan->chid);
342 
343 	/* Channel recovery will probably have already done this for the
344 	 * correct engine(s), but just in case we can't find the channel
345 	 * information...
346 	 */
347 	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
348 		if (fifo->engine[engn].engine == engine) {
349 			tu102_fifo_recover_engn(fifo, engn);
350 			break;
351 		}
352 	}
353 
354 	spin_unlock_irqrestore(&fifo->base.lock, flags);
355 }
356 
357 static void
358 tu102_fifo_intr_ctxsw_timeout(struct gk104_fifo *fifo)
359 {
360 	struct nvkm_device *device = fifo->base.engine.subdev.device;
361 	unsigned long flags, engm;
362 	u32 engn;
363 
364 	spin_lock_irqsave(&fifo->base.lock, flags);
365 
366 	engm = nvkm_rd32(device, 0x2a30);
367 	nvkm_wr32(device, 0x2a30, engm);
368 
369 	for_each_set_bit(engn, &engm, 32)
370 		tu102_fifo_recover_engn(fifo, engn);
371 
372 	spin_unlock_irqrestore(&fifo->base.lock, flags);
373 }
374 
375 static void
376 tu102_fifo_intr_sched(struct gk104_fifo *fifo)
377 {
378 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
379 	struct nvkm_device *device = subdev->device;
380 	u32 intr = nvkm_rd32(device, 0x00254c);
381 	u32 code = intr & 0x000000ff;
382 
383 	nvkm_error(subdev, "SCHED_ERROR %02x\n", code);
384 }
385 
386 static void
387 tu102_fifo_intr(struct nvkm_fifo *base)
388 {
389 	struct gk104_fifo *fifo = gk104_fifo(base);
390 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
391 	struct nvkm_device *device = subdev->device;
392 	u32 mask = nvkm_rd32(device, 0x002140);
393 	u32 stat = nvkm_rd32(device, 0x002100) & mask;
394 
395 	if (stat & 0x00000001) {
396 		gk104_fifo_intr_bind(fifo);
397 		nvkm_wr32(device, 0x002100, 0x00000001);
398 		stat &= ~0x00000001;
399 	}
400 
401 	if (stat & 0x00000002) {
402 		tu102_fifo_intr_ctxsw_timeout(fifo);
403 		stat &= ~0x00000002;
404 	}
405 
406 	if (stat & 0x00000100) {
407 		tu102_fifo_intr_sched(fifo);
408 		nvkm_wr32(device, 0x002100, 0x00000100);
409 		stat &= ~0x00000100;
410 	}
411 
412 	if (stat & 0x00010000) {
413 		gk104_fifo_intr_chsw(fifo);
414 		nvkm_wr32(device, 0x002100, 0x00010000);
415 		stat &= ~0x00010000;
416 	}
417 
418 	if (stat & 0x20000000) {
419 		u32 mask = nvkm_rd32(device, 0x0025a0);
420 
421 		while (mask) {
422 			u32 unit = __ffs(mask);
423 
424 			gk104_fifo_intr_pbdma_0(fifo, unit);
425 			gk104_fifo_intr_pbdma_1(fifo, unit);
426 			nvkm_wr32(device, 0x0025a0, (1 << unit));
427 			mask &= ~(1 << unit);
428 		}
429 		stat &= ~0x20000000;
430 	}
431 
432 	if (stat & 0x40000000) {
433 		gk104_fifo_intr_runlist(fifo);
434 		stat &= ~0x40000000;
435 	}
436 
437 	if (stat & 0x80000000) {
438 		nvkm_wr32(device, 0x002100, 0x80000000);
439 		gk104_fifo_intr_engine(fifo);
440 		stat &= ~0x80000000;
441 	}
442 
443 	if (stat) {
444 		nvkm_error(subdev, "INTR %08x\n", stat);
445 		nvkm_mask(device, 0x002140, stat, 0x00000000);
446 		nvkm_wr32(device, 0x002100, stat);
447 	}
448 }
449 
450 static const struct nvkm_fifo_func
451 tu102_fifo_ = {
452 	.dtor = gk104_fifo_dtor,
453 	.oneinit = gk104_fifo_oneinit,
454 	.info = gk104_fifo_info,
455 	.init = gk104_fifo_init,
456 	.fini = gk104_fifo_fini,
457 	.intr = tu102_fifo_intr,
458 	.fault = tu102_fifo_fault,
459 	.uevent_init = gk104_fifo_uevent_init,
460 	.uevent_fini = gk104_fifo_uevent_fini,
461 	.recover_chan = tu102_fifo_recover_chan,
462 	.class_get = gk104_fifo_class_get,
463 	.class_new = gk104_fifo_class_new,
464 };
465 
466 int
467 tu102_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
468 {
469 	struct gk104_fifo *fifo;
470 
471 	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
472 		return -ENOMEM;
473 	fifo->func = &tu102_fifo;
474 	INIT_WORK(&fifo->recover.work, tu102_fifo_recover_work);
475 	*pfifo = &fifo->base;
476 
477 	return nvkm_fifo_ctor(&tu102_fifo_, device, index, 4096, &fifo->base);
478 }
479