1 /* 2 * Copyright 2018 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include "chan.h" 23 #include "runl.h" 24 25 #include "gk104.h" 26 #include "cgrp.h" 27 #include "changk104.h" 28 29 #include <core/client.h> 30 #include <core/memory.h> 31 #include <subdev/bar.h> 32 #include <subdev/fault.h> 33 #include <subdev/top.h> 34 35 #include <nvif/class.h> 36 37 static const struct nvkm_chan_func 38 tu102_chan = { 39 }; 40 41 static void 42 tu102_fifo_runlist_commit(struct gk104_fifo *fifo, int runl, 43 struct nvkm_memory *mem, int nr) 44 { 45 struct nvkm_device *device = fifo->base.engine.subdev.device; 46 u64 addr = nvkm_memory_addr(mem); 47 /*XXX: target? */ 48 49 nvkm_wr32(device, 0x002b00 + (runl * 0x10), lower_32_bits(addr)); 50 nvkm_wr32(device, 0x002b04 + (runl * 0x10), upper_32_bits(addr)); 51 nvkm_wr32(device, 0x002b08 + (runl * 0x10), nr); 52 53 /*XXX: how to wait? can you even wait? */ 54 } 55 56 static const struct gk104_fifo_runlist_func 57 tu102_fifo_runlist = { 58 .size = 16, 59 .cgrp = gv100_fifo_runlist_cgrp, 60 .chan = gv100_fifo_runlist_chan, 61 .commit = tu102_fifo_runlist_commit, 62 }; 63 64 static const struct nvkm_runl_func 65 tu102_runl = { 66 }; 67 68 static const struct nvkm_enum 69 tu102_fifo_fault_engine[] = { 70 { 0x01, "DISPLAY" }, 71 { 0x03, "PTP" }, 72 { 0x06, "PWR_PMU" }, 73 { 0x08, "IFB", NULL, NVKM_ENGINE_IFB }, 74 { 0x09, "PERF" }, 75 { 0x1f, "PHYSICAL" }, 76 { 0x20, "HOST0" }, 77 { 0x21, "HOST1" }, 78 { 0x22, "HOST2" }, 79 { 0x23, "HOST3" }, 80 { 0x24, "HOST4" }, 81 { 0x25, "HOST5" }, 82 { 0x26, "HOST6" }, 83 { 0x27, "HOST7" }, 84 { 0x28, "HOST8" }, 85 { 0x29, "HOST9" }, 86 { 0x2a, "HOST10" }, 87 { 0x2b, "HOST11" }, 88 { 0x2c, "HOST12" }, 89 { 0x2d, "HOST13" }, 90 { 0x2e, "HOST14" }, 91 { 0x80, "BAR1", NULL, NVKM_SUBDEV_BAR }, 92 { 0xc0, "BAR2", NULL, NVKM_SUBDEV_INSTMEM }, 93 {} 94 }; 95 96 static void 97 tu102_fifo_pbdma_init(struct gk104_fifo *fifo) 98 { 99 struct nvkm_device *device = fifo->base.engine.subdev.device; 100 const u32 mask = (1 << fifo->pbdma_nr) - 1; 101 /*XXX: this is a bit of a guess at this point in time. */ 102 nvkm_mask(device, 0xb65000, 0x80000fff, 0x80000000 | mask); 103 } 104 105 static const struct gk104_fifo_pbdma_func 106 tu102_fifo_pbdma = { 107 .init = tu102_fifo_pbdma_init, 108 .init_timeout = gk208_fifo_pbdma_init_timeout, 109 }; 110 111 static void 112 tu102_fifo_recover_work(struct work_struct *w) 113 { 114 struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work); 115 struct nvkm_device *device = fifo->base.engine.subdev.device; 116 struct nvkm_engine *engine; 117 unsigned long flags; 118 u32 engm, runm, todo; 119 int engn, runl; 120 121 spin_lock_irqsave(&fifo->base.lock, flags); 122 runm = fifo->recover.runm; 123 engm = fifo->recover.engm; 124 fifo->recover.engm = 0; 125 fifo->recover.runm = 0; 126 spin_unlock_irqrestore(&fifo->base.lock, flags); 127 128 nvkm_mask(device, 0x002630, runm, runm); 129 130 for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) { 131 if ((engine = fifo->engine[engn].engine)) { 132 nvkm_subdev_fini(&engine->subdev, false); 133 WARN_ON(nvkm_subdev_init(&engine->subdev)); 134 } 135 } 136 137 for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl)) 138 gk104_fifo_runlist_update(fifo, runl); 139 140 nvkm_mask(device, 0x002630, runm, 0x00000000); 141 } 142 143 static void tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn); 144 145 static void 146 tu102_fifo_recover_runl(struct gk104_fifo *fifo, int runl) 147 { 148 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 149 struct nvkm_device *device = subdev->device; 150 const u32 runm = BIT(runl); 151 152 assert_spin_locked(&fifo->base.lock); 153 if (fifo->recover.runm & runm) 154 return; 155 fifo->recover.runm |= runm; 156 157 /* Block runlist to prevent channel assignment(s) from changing. */ 158 nvkm_mask(device, 0x002630, runm, runm); 159 160 /* Schedule recovery. */ 161 nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl); 162 schedule_work(&fifo->recover.work); 163 } 164 165 static struct gk104_fifo_chan * 166 tu102_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid) 167 { 168 struct gk104_fifo_chan *chan; 169 struct nvkm_fifo_cgrp *cgrp; 170 171 list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { 172 if (chan->base.chid == chid) { 173 list_del_init(&chan->head); 174 return chan; 175 } 176 } 177 178 list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) { 179 if (cgrp->id == chid) { 180 chan = list_first_entry(&cgrp->chan, typeof(*chan), head); 181 list_del_init(&chan->head); 182 if (!--cgrp->chan_nr) 183 list_del_init(&cgrp->head); 184 return chan; 185 } 186 } 187 188 return NULL; 189 } 190 191 static void 192 tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid) 193 { 194 struct gk104_fifo *fifo = gk104_fifo(base); 195 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 196 struct nvkm_device *device = subdev->device; 197 const u32 stat = nvkm_rd32(device, 0x800004 + (chid * 0x08)); 198 const u32 runl = (stat & 0x000f0000) >> 16; 199 const bool used = (stat & 0x00000001); 200 unsigned long engn, engm = fifo->runlist[runl].engm; 201 struct gk104_fifo_chan *chan; 202 203 assert_spin_locked(&fifo->base.lock); 204 if (!used) 205 return; 206 207 /* Lookup SW state for channel, and mark it as dead. */ 208 chan = tu102_fifo_recover_chid(fifo, runl, chid); 209 if (chan) { 210 chan->killed = true; 211 nvkm_fifo_kevent(&fifo->base, chid); 212 } 213 214 /* Disable channel. */ 215 nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800); 216 nvkm_warn(subdev, "channel %d: killed\n", chid); 217 218 /* Block channel assignments from changing during recovery. */ 219 tu102_fifo_recover_runl(fifo, runl); 220 221 /* Schedule recovery for any engines the channel is on. */ 222 for_each_set_bit(engn, &engm, fifo->engine_nr) { 223 struct gk104_fifo_engine_status status; 224 225 gk104_fifo_engine_status(fifo, engn, &status); 226 if (!status.chan || status.chan->id != chid) 227 continue; 228 tu102_fifo_recover_engn(fifo, engn); 229 } 230 } 231 232 static void 233 tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn) 234 { 235 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 236 struct nvkm_device *device = subdev->device; 237 const u32 runl = fifo->engine[engn].runl; 238 const u32 engm = BIT(engn); 239 struct gk104_fifo_engine_status status; 240 241 assert_spin_locked(&fifo->base.lock); 242 if (fifo->recover.engm & engm) 243 return; 244 fifo->recover.engm |= engm; 245 246 /* Block channel assignments from changing during recovery. */ 247 tu102_fifo_recover_runl(fifo, runl); 248 249 /* Determine which channel (if any) is currently on the engine. */ 250 gk104_fifo_engine_status(fifo, engn, &status); 251 if (status.chan) { 252 /* The channel is not longer viable, kill it. */ 253 tu102_fifo_recover_chan(&fifo->base, status.chan->id); 254 } 255 256 /* Preempt the runlist */ 257 nvkm_wr32(device, 0x2638, BIT(runl)); 258 259 /* Schedule recovery. */ 260 nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn); 261 schedule_work(&fifo->recover.work); 262 } 263 264 static void 265 tu102_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info) 266 { 267 struct gk104_fifo *fifo = gk104_fifo(base); 268 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 269 struct nvkm_device *device = subdev->device; 270 const struct nvkm_enum *er, *ee, *ec, *ea; 271 struct nvkm_engine *engine = NULL; 272 struct nvkm_fifo_chan *chan; 273 unsigned long flags; 274 const char *en = ""; 275 char ct[8] = "HUB/"; 276 int engn; 277 278 er = nvkm_enum_find(fifo->func->fault.reason, info->reason); 279 ee = nvkm_enum_find(fifo->func->fault.engine, info->engine); 280 if (info->hub) { 281 ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client); 282 } else { 283 ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client); 284 snprintf(ct, sizeof(ct), "GPC%d/", info->gpc); 285 } 286 ea = nvkm_enum_find(fifo->func->fault.access, info->access); 287 288 if (ee && ee->data2) { 289 switch (ee->data2) { 290 case NVKM_SUBDEV_BAR: 291 nvkm_bar_bar1_reset(device); 292 break; 293 case NVKM_SUBDEV_INSTMEM: 294 nvkm_bar_bar2_reset(device); 295 break; 296 case NVKM_ENGINE_IFB: 297 nvkm_mask(device, 0x001718, 0x00000000, 0x00000000); 298 break; 299 default: 300 engine = nvkm_device_engine(device, ee->data2, 0); 301 break; 302 } 303 } 304 305 if (ee == NULL) { 306 struct nvkm_subdev *subdev = nvkm_top_fault(device, info->engine); 307 if (subdev) { 308 if (subdev->func == &nvkm_engine) 309 engine = container_of(subdev, typeof(*engine), subdev); 310 en = engine->subdev.name; 311 } 312 } else { 313 en = ee->name; 314 } 315 316 spin_lock_irqsave(&fifo->base.lock, flags); 317 chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst); 318 319 nvkm_error(subdev, 320 "fault %02x [%s] at %016llx engine %02x [%s] client %02x " 321 "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n", 322 info->access, ea ? ea->name : "", info->addr, 323 info->engine, ee ? ee->name : en, 324 info->client, ct, ec ? ec->name : "", 325 info->reason, er ? er->name : "", chan ? chan->chid : -1, 326 info->inst, chan ? chan->object.client->name : "unknown"); 327 328 /* Kill the channel that caused the fault. */ 329 if (chan) 330 tu102_fifo_recover_chan(&fifo->base, chan->chid); 331 332 /* Channel recovery will probably have already done this for the 333 * correct engine(s), but just in case we can't find the channel 334 * information... 335 */ 336 for (engn = 0; engn < fifo->engine_nr && engine; engn++) { 337 if (fifo->engine[engn].engine == engine) { 338 tu102_fifo_recover_engn(fifo, engn); 339 break; 340 } 341 } 342 343 spin_unlock_irqrestore(&fifo->base.lock, flags); 344 } 345 346 const struct nvkm_fifo_func_mmu_fault 347 tu102_fifo_mmu_fault = { 348 .recover = tu102_fifo_fault, 349 }; 350 351 static void 352 tu102_fifo_intr_ctxsw_timeout(struct gk104_fifo *fifo) 353 { 354 struct nvkm_device *device = fifo->base.engine.subdev.device; 355 unsigned long flags, engm; 356 u32 engn; 357 358 spin_lock_irqsave(&fifo->base.lock, flags); 359 360 engm = nvkm_rd32(device, 0x2a30); 361 nvkm_wr32(device, 0x2a30, engm); 362 363 for_each_set_bit(engn, &engm, 32) 364 tu102_fifo_recover_engn(fifo, engn); 365 366 spin_unlock_irqrestore(&fifo->base.lock, flags); 367 } 368 369 static void 370 tu102_fifo_intr_sched(struct gk104_fifo *fifo) 371 { 372 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 373 struct nvkm_device *device = subdev->device; 374 u32 intr = nvkm_rd32(device, 0x00254c); 375 u32 code = intr & 0x000000ff; 376 377 nvkm_error(subdev, "SCHED_ERROR %02x\n", code); 378 } 379 380 static void 381 tu102_fifo_intr(struct nvkm_fifo *base) 382 { 383 struct gk104_fifo *fifo = gk104_fifo(base); 384 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 385 struct nvkm_device *device = subdev->device; 386 u32 mask = nvkm_rd32(device, 0x002140); 387 u32 stat = nvkm_rd32(device, 0x002100) & mask; 388 389 if (stat & 0x00000001) { 390 gk104_fifo_intr_bind(fifo); 391 nvkm_wr32(device, 0x002100, 0x00000001); 392 stat &= ~0x00000001; 393 } 394 395 if (stat & 0x00000002) { 396 tu102_fifo_intr_ctxsw_timeout(fifo); 397 stat &= ~0x00000002; 398 } 399 400 if (stat & 0x00000100) { 401 tu102_fifo_intr_sched(fifo); 402 nvkm_wr32(device, 0x002100, 0x00000100); 403 stat &= ~0x00000100; 404 } 405 406 if (stat & 0x00010000) { 407 gk104_fifo_intr_chsw(fifo); 408 nvkm_wr32(device, 0x002100, 0x00010000); 409 stat &= ~0x00010000; 410 } 411 412 if (stat & 0x20000000) { 413 u32 mask = nvkm_rd32(device, 0x0025a0); 414 415 while (mask) { 416 u32 unit = __ffs(mask); 417 418 gk104_fifo_intr_pbdma_0(fifo, unit); 419 gk104_fifo_intr_pbdma_1(fifo, unit); 420 nvkm_wr32(device, 0x0025a0, (1 << unit)); 421 mask &= ~(1 << unit); 422 } 423 stat &= ~0x20000000; 424 } 425 426 if (stat & 0x40000000) { 427 gk104_fifo_intr_runlist(fifo); 428 stat &= ~0x40000000; 429 } 430 431 if (stat & 0x80000000) { 432 nvkm_wr32(device, 0x002100, 0x80000000); 433 gk104_fifo_intr_engine(fifo); 434 stat &= ~0x80000000; 435 } 436 437 if (stat) { 438 nvkm_error(subdev, "INTR %08x\n", stat); 439 nvkm_mask(device, 0x002140, stat, 0x00000000); 440 nvkm_wr32(device, 0x002100, stat); 441 } 442 } 443 444 static const struct nvkm_fifo_func 445 tu102_fifo = { 446 .dtor = gk104_fifo_dtor, 447 .oneinit = gk104_fifo_oneinit, 448 .chid_nr = gm200_fifo_chid_nr, 449 .chid_ctor = gk110_fifo_chid_ctor, 450 .runq_nr = gm200_fifo_runq_nr, 451 .runl_ctor = gk104_fifo_runl_ctor, 452 .init = gk104_fifo_init, 453 .fini = gk104_fifo_fini, 454 .intr = tu102_fifo_intr, 455 .mmu_fault = &tu102_fifo_mmu_fault, 456 .fault.access = gv100_fifo_fault_access, 457 .fault.engine = tu102_fifo_fault_engine, 458 .fault.reason = gv100_fifo_fault_reason, 459 .fault.hubclient = gv100_fifo_fault_hubclient, 460 .fault.gpcclient = gv100_fifo_fault_gpcclient, 461 .engine_id = gk104_fifo_engine_id, 462 .id_engine = gk104_fifo_id_engine, 463 .uevent_init = gk104_fifo_uevent_init, 464 .uevent_fini = gk104_fifo_uevent_fini, 465 .recover_chan = tu102_fifo_recover_chan, 466 .runlist = &tu102_fifo_runlist, 467 .pbdma = &tu102_fifo_pbdma, 468 .runl = &tu102_runl, 469 .runq = &gv100_runq, 470 .engn = &gv100_engn, 471 .engn_ce = &gv100_engn_ce, 472 .cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A }, &gk110_cgrp, .force = true }, 473 .chan = {{ 0, 0, TURING_CHANNEL_GPFIFO_A }, &tu102_chan, .ctor = tu102_fifo_gpfifo_new }, 474 }; 475 476 int 477 tu102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, 478 struct nvkm_fifo **pfifo) 479 { 480 struct gk104_fifo *fifo; 481 482 if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL))) 483 return -ENOMEM; 484 fifo->func = &tu102_fifo; 485 INIT_WORK(&fifo->recover.work, tu102_fifo_recover_work); 486 *pfifo = &fifo->base; 487 488 return nvkm_fifo_ctor(&tu102_fifo, device, type, inst, &fifo->base); 489 } 490