1 /* 2 * Copyright 2018 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include "gk104.h" 23 #include "cgrp.h" 24 #include "changk104.h" 25 #include "user.h" 26 27 #include <core/client.h> 28 #include <core/gpuobj.h> 29 #include <subdev/bar.h> 30 #include <subdev/fault.h> 31 #include <subdev/top.h> 32 #include <subdev/timer.h> 33 #include <engine/sw.h> 34 35 #include <nvif/class.h> 36 37 static void 38 tu102_fifo_runlist_commit(struct gk104_fifo *fifo, int runl, 39 struct nvkm_memory *mem, int nr) 40 { 41 struct nvkm_device *device = fifo->base.engine.subdev.device; 42 u64 addr = nvkm_memory_addr(mem); 43 /*XXX: target? */ 44 45 nvkm_wr32(device, 0x002b00 + (runl * 0x10), lower_32_bits(addr)); 46 nvkm_wr32(device, 0x002b04 + (runl * 0x10), upper_32_bits(addr)); 47 nvkm_wr32(device, 0x002b08 + (runl * 0x10), nr); 48 49 /*XXX: how to wait? can you even wait? */ 50 } 51 52 const struct gk104_fifo_runlist_func 53 tu102_fifo_runlist = { 54 .size = 16, 55 .cgrp = gv100_fifo_runlist_cgrp, 56 .chan = gv100_fifo_runlist_chan, 57 .commit = tu102_fifo_runlist_commit, 58 }; 59 60 static const struct nvkm_enum 61 tu102_fifo_fault_engine[] = { 62 { 0x01, "DISPLAY" }, 63 { 0x03, "PTP" }, 64 { 0x06, "PWR_PMU" }, 65 { 0x08, "IFB", NULL, NVKM_ENGINE_IFB }, 66 { 0x09, "PERF" }, 67 { 0x1f, "PHYSICAL" }, 68 { 0x20, "HOST0" }, 69 { 0x21, "HOST1" }, 70 { 0x22, "HOST2" }, 71 { 0x23, "HOST3" }, 72 { 0x24, "HOST4" }, 73 { 0x25, "HOST5" }, 74 { 0x26, "HOST6" }, 75 { 0x27, "HOST7" }, 76 { 0x28, "HOST8" }, 77 { 0x29, "HOST9" }, 78 { 0x2a, "HOST10" }, 79 { 0x2b, "HOST11" }, 80 { 0x2c, "HOST12" }, 81 { 0x2d, "HOST13" }, 82 { 0x2e, "HOST14" }, 83 { 0x80, "BAR1", NULL, NVKM_SUBDEV_BAR }, 84 { 0xc0, "BAR2", NULL, NVKM_SUBDEV_INSTMEM }, 85 {} 86 }; 87 88 static void 89 tu102_fifo_pbdma_init(struct gk104_fifo *fifo) 90 { 91 struct nvkm_device *device = fifo->base.engine.subdev.device; 92 const u32 mask = (1 << fifo->pbdma_nr) - 1; 93 /*XXX: this is a bit of a guess at this point in time. */ 94 nvkm_mask(device, 0xb65000, 0x80000fff, 0x80000000 | mask); 95 } 96 97 static const struct gk104_fifo_pbdma_func 98 tu102_fifo_pbdma = { 99 .nr = gm200_fifo_pbdma_nr, 100 .init = tu102_fifo_pbdma_init, 101 .init_timeout = gk208_fifo_pbdma_init_timeout, 102 }; 103 104 static const struct gk104_fifo_func 105 tu102_fifo = { 106 .pbdma = &tu102_fifo_pbdma, 107 .fault.access = gv100_fifo_fault_access, 108 .fault.engine = tu102_fifo_fault_engine, 109 .fault.reason = gv100_fifo_fault_reason, 110 .fault.hubclient = gv100_fifo_fault_hubclient, 111 .fault.gpcclient = gv100_fifo_fault_gpcclient, 112 .runlist = &tu102_fifo_runlist, 113 .user = {{-1,-1,VOLTA_USERMODE_A }, tu102_fifo_user_new }, 114 .chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu102_fifo_gpfifo_new }, 115 .cgrp_force = true, 116 }; 117 118 static void 119 tu102_fifo_recover_work(struct work_struct *w) 120 { 121 struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work); 122 struct nvkm_device *device = fifo->base.engine.subdev.device; 123 struct nvkm_engine *engine; 124 unsigned long flags; 125 u32 engm, runm, todo; 126 int engn, runl; 127 128 spin_lock_irqsave(&fifo->base.lock, flags); 129 runm = fifo->recover.runm; 130 engm = fifo->recover.engm; 131 fifo->recover.engm = 0; 132 fifo->recover.runm = 0; 133 spin_unlock_irqrestore(&fifo->base.lock, flags); 134 135 nvkm_mask(device, 0x002630, runm, runm); 136 137 for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) { 138 if ((engine = fifo->engine[engn].engine)) { 139 nvkm_subdev_fini(&engine->subdev, false); 140 WARN_ON(nvkm_subdev_init(&engine->subdev)); 141 } 142 } 143 144 for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl)) 145 gk104_fifo_runlist_update(fifo, runl); 146 147 nvkm_mask(device, 0x002630, runm, 0x00000000); 148 } 149 150 static void tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn); 151 152 static void 153 tu102_fifo_recover_runl(struct gk104_fifo *fifo, int runl) 154 { 155 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 156 struct nvkm_device *device = subdev->device; 157 const u32 runm = BIT(runl); 158 159 assert_spin_locked(&fifo->base.lock); 160 if (fifo->recover.runm & runm) 161 return; 162 fifo->recover.runm |= runm; 163 164 /* Block runlist to prevent channel assignment(s) from changing. */ 165 nvkm_mask(device, 0x002630, runm, runm); 166 167 /* Schedule recovery. */ 168 nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl); 169 schedule_work(&fifo->recover.work); 170 } 171 172 static struct gk104_fifo_chan * 173 tu102_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid) 174 { 175 struct gk104_fifo_chan *chan; 176 struct nvkm_fifo_cgrp *cgrp; 177 178 list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { 179 if (chan->base.chid == chid) { 180 list_del_init(&chan->head); 181 return chan; 182 } 183 } 184 185 list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) { 186 if (cgrp->id == chid) { 187 chan = list_first_entry(&cgrp->chan, typeof(*chan), head); 188 list_del_init(&chan->head); 189 if (!--cgrp->chan_nr) 190 list_del_init(&cgrp->head); 191 return chan; 192 } 193 } 194 195 return NULL; 196 } 197 198 static void 199 tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid) 200 { 201 struct gk104_fifo *fifo = gk104_fifo(base); 202 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 203 struct nvkm_device *device = subdev->device; 204 const u32 stat = nvkm_rd32(device, 0x800004 + (chid * 0x08)); 205 const u32 runl = (stat & 0x000f0000) >> 16; 206 const bool used = (stat & 0x00000001); 207 unsigned long engn, engm = fifo->runlist[runl].engm; 208 struct gk104_fifo_chan *chan; 209 210 assert_spin_locked(&fifo->base.lock); 211 if (!used) 212 return; 213 214 /* Lookup SW state for channel, and mark it as dead. */ 215 chan = tu102_fifo_recover_chid(fifo, runl, chid); 216 if (chan) { 217 chan->killed = true; 218 nvkm_fifo_kevent(&fifo->base, chid); 219 } 220 221 /* Disable channel. */ 222 nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800); 223 nvkm_warn(subdev, "channel %d: killed\n", chid); 224 225 /* Block channel assignments from changing during recovery. */ 226 tu102_fifo_recover_runl(fifo, runl); 227 228 /* Schedule recovery for any engines the channel is on. */ 229 for_each_set_bit(engn, &engm, fifo->engine_nr) { 230 struct gk104_fifo_engine_status status; 231 232 gk104_fifo_engine_status(fifo, engn, &status); 233 if (!status.chan || status.chan->id != chid) 234 continue; 235 tu102_fifo_recover_engn(fifo, engn); 236 } 237 } 238 239 static void 240 tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn) 241 { 242 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 243 struct nvkm_device *device = subdev->device; 244 const u32 runl = fifo->engine[engn].runl; 245 const u32 engm = BIT(engn); 246 struct gk104_fifo_engine_status status; 247 248 assert_spin_locked(&fifo->base.lock); 249 if (fifo->recover.engm & engm) 250 return; 251 fifo->recover.engm |= engm; 252 253 /* Block channel assignments from changing during recovery. */ 254 tu102_fifo_recover_runl(fifo, runl); 255 256 /* Determine which channel (if any) is currently on the engine. */ 257 gk104_fifo_engine_status(fifo, engn, &status); 258 if (status.chan) { 259 /* The channel is not longer viable, kill it. */ 260 tu102_fifo_recover_chan(&fifo->base, status.chan->id); 261 } 262 263 /* Preempt the runlist */ 264 nvkm_wr32(device, 0x2638, BIT(runl)); 265 266 /* Schedule recovery. */ 267 nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn); 268 schedule_work(&fifo->recover.work); 269 } 270 271 static void 272 tu102_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info) 273 { 274 struct gk104_fifo *fifo = gk104_fifo(base); 275 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 276 struct nvkm_device *device = subdev->device; 277 const struct nvkm_enum *er, *ee, *ec, *ea; 278 struct nvkm_engine *engine = NULL; 279 struct nvkm_fifo_chan *chan; 280 unsigned long flags; 281 char ct[8] = "HUB/", en[16] = ""; 282 int engn; 283 284 er = nvkm_enum_find(fifo->func->fault.reason, info->reason); 285 ee = nvkm_enum_find(fifo->func->fault.engine, info->engine); 286 if (info->hub) { 287 ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client); 288 } else { 289 ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client); 290 snprintf(ct, sizeof(ct), "GPC%d/", info->gpc); 291 } 292 ea = nvkm_enum_find(fifo->func->fault.access, info->access); 293 294 if (ee && ee->data2) { 295 switch (ee->data2) { 296 case NVKM_SUBDEV_BAR: 297 nvkm_bar_bar1_reset(device); 298 break; 299 case NVKM_SUBDEV_INSTMEM: 300 nvkm_bar_bar2_reset(device); 301 break; 302 case NVKM_ENGINE_IFB: 303 nvkm_mask(device, 0x001718, 0x00000000, 0x00000000); 304 break; 305 default: 306 engine = nvkm_device_engine(device, ee->data2, 0); 307 break; 308 } 309 } 310 311 if (ee == NULL) { 312 enum nvkm_devidx engidx = nvkm_top_fault(device, info->engine); 313 314 if (engidx < NVKM_SUBDEV_NR) { 315 const char *src = nvkm_subdev_type[engidx]; 316 char *dst = en; 317 318 do { 319 *dst++ = toupper(*src++); 320 } while (*src); 321 engine = nvkm_device_engine(device, engidx, 0); 322 } 323 } else { 324 snprintf(en, sizeof(en), "%s", ee->name); 325 } 326 327 spin_lock_irqsave(&fifo->base.lock, flags); 328 chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst); 329 330 nvkm_error(subdev, 331 "fault %02x [%s] at %016llx engine %02x [%s] client %02x " 332 "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n", 333 info->access, ea ? ea->name : "", info->addr, 334 info->engine, ee ? ee->name : en, 335 info->client, ct, ec ? ec->name : "", 336 info->reason, er ? er->name : "", chan ? chan->chid : -1, 337 info->inst, chan ? chan->object.client->name : "unknown"); 338 339 /* Kill the channel that caused the fault. */ 340 if (chan) 341 tu102_fifo_recover_chan(&fifo->base, chan->chid); 342 343 /* Channel recovery will probably have already done this for the 344 * correct engine(s), but just in case we can't find the channel 345 * information... 346 */ 347 for (engn = 0; engn < fifo->engine_nr && engine; engn++) { 348 if (fifo->engine[engn].engine == engine) { 349 tu102_fifo_recover_engn(fifo, engn); 350 break; 351 } 352 } 353 354 spin_unlock_irqrestore(&fifo->base.lock, flags); 355 } 356 357 static void 358 tu102_fifo_intr_ctxsw_timeout(struct gk104_fifo *fifo) 359 { 360 struct nvkm_device *device = fifo->base.engine.subdev.device; 361 unsigned long flags, engm; 362 u32 engn; 363 364 spin_lock_irqsave(&fifo->base.lock, flags); 365 366 engm = nvkm_rd32(device, 0x2a30); 367 nvkm_wr32(device, 0x2a30, engm); 368 369 for_each_set_bit(engn, &engm, 32) 370 tu102_fifo_recover_engn(fifo, engn); 371 372 spin_unlock_irqrestore(&fifo->base.lock, flags); 373 } 374 375 static void 376 tu102_fifo_intr_sched(struct gk104_fifo *fifo) 377 { 378 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 379 struct nvkm_device *device = subdev->device; 380 u32 intr = nvkm_rd32(device, 0x00254c); 381 u32 code = intr & 0x000000ff; 382 383 nvkm_error(subdev, "SCHED_ERROR %02x\n", code); 384 } 385 386 static void 387 tu102_fifo_intr(struct nvkm_fifo *base) 388 { 389 struct gk104_fifo *fifo = gk104_fifo(base); 390 struct nvkm_subdev *subdev = &fifo->base.engine.subdev; 391 struct nvkm_device *device = subdev->device; 392 u32 mask = nvkm_rd32(device, 0x002140); 393 u32 stat = nvkm_rd32(device, 0x002100) & mask; 394 395 if (stat & 0x00000001) { 396 gk104_fifo_intr_bind(fifo); 397 nvkm_wr32(device, 0x002100, 0x00000001); 398 stat &= ~0x00000001; 399 } 400 401 if (stat & 0x00000002) { 402 tu102_fifo_intr_ctxsw_timeout(fifo); 403 stat &= ~0x00000002; 404 } 405 406 if (stat & 0x00000100) { 407 tu102_fifo_intr_sched(fifo); 408 nvkm_wr32(device, 0x002100, 0x00000100); 409 stat &= ~0x00000100; 410 } 411 412 if (stat & 0x00010000) { 413 gk104_fifo_intr_chsw(fifo); 414 nvkm_wr32(device, 0x002100, 0x00010000); 415 stat &= ~0x00010000; 416 } 417 418 if (stat & 0x20000000) { 419 u32 mask = nvkm_rd32(device, 0x0025a0); 420 421 while (mask) { 422 u32 unit = __ffs(mask); 423 424 gk104_fifo_intr_pbdma_0(fifo, unit); 425 gk104_fifo_intr_pbdma_1(fifo, unit); 426 nvkm_wr32(device, 0x0025a0, (1 << unit)); 427 mask &= ~(1 << unit); 428 } 429 stat &= ~0x20000000; 430 } 431 432 if (stat & 0x40000000) { 433 gk104_fifo_intr_runlist(fifo); 434 stat &= ~0x40000000; 435 } 436 437 if (stat & 0x80000000) { 438 nvkm_wr32(device, 0x002100, 0x80000000); 439 gk104_fifo_intr_engine(fifo); 440 stat &= ~0x80000000; 441 } 442 443 if (stat) { 444 nvkm_error(subdev, "INTR %08x\n", stat); 445 nvkm_mask(device, 0x002140, stat, 0x00000000); 446 nvkm_wr32(device, 0x002100, stat); 447 } 448 } 449 450 static const struct nvkm_fifo_func 451 tu102_fifo_ = { 452 .dtor = gk104_fifo_dtor, 453 .oneinit = gk104_fifo_oneinit, 454 .info = gk104_fifo_info, 455 .init = gk104_fifo_init, 456 .fini = gk104_fifo_fini, 457 .intr = tu102_fifo_intr, 458 .fault = tu102_fifo_fault, 459 .uevent_init = gk104_fifo_uevent_init, 460 .uevent_fini = gk104_fifo_uevent_fini, 461 .recover_chan = tu102_fifo_recover_chan, 462 .class_get = gk104_fifo_class_get, 463 .class_new = gk104_fifo_class_new, 464 }; 465 466 int 467 tu102_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo) 468 { 469 struct gk104_fifo *fifo; 470 471 if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL))) 472 return -ENOMEM; 473 fifo->func = &tu102_fifo; 474 INIT_WORK(&fifo->recover.work, tu102_fifo_recover_work); 475 *pfifo = &fifo->base; 476 477 return nvkm_fifo_ctor(&tu102_fifo_, device, index, 4096, &fifo->base); 478 } 479