1 /* 2 * Copyright 2021 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include "runl.h" 23 #include "cgrp.h" 24 #include "chan.h" 25 #include "chid.h" 26 #include "priv.h" 27 #include "runq.h" 28 29 #include <core/gpuobj.h> 30 #include <subdev/timer.h> 31 #include <subdev/top.h> 32 33 static struct nvkm_cgrp * 34 nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags) 35 { 36 struct nvkm_cgrp *cgrp = NULL; 37 struct nvkm_chan *chan; 38 bool cgid; 39 int id; 40 41 id = engn->func->cxid(engn, &cgid); 42 if (id < 0) 43 return NULL; 44 45 if (!cgid) { 46 chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags); 47 if (chan) 48 cgrp = chan->cgrp; 49 } else { 50 cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags); 51 } 52 53 WARN_ON(!cgrp); 54 return cgrp; 55 } 56 57 static void 58 nvkm_runl_rc(struct nvkm_runl *runl) 59 { 60 struct nvkm_fifo *fifo = runl->fifo; 61 struct nvkm_cgrp *cgrp, *gtmp; 62 struct nvkm_chan *chan, *ctmp; 63 struct nvkm_engn *engn; 64 unsigned long flags; 65 int rc, state, i; 66 bool reset; 67 68 /* Runlist is blocked before scheduling recovery - fetch count. */ 69 BUG_ON(!mutex_is_locked(&runl->mutex)); 70 rc = atomic_xchg(&runl->rc_pending, 0); 71 if (!rc) 72 return; 73 74 /* Look for channel groups flagged for RC. */ 75 nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) { 76 state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING); 77 if (state == NVKM_CGRP_RC_PENDING) { 78 /* Disable all channels in them, and remove from runlist. */ 79 nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp) { 80 nvkm_chan_error(chan, false); 81 nvkm_chan_remove_locked(chan); 82 } 83 } 84 } 85 86 /* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */ 87 if (runl->func->preempt) { 88 for (i = 0; i < runl->runq_nr; i++) { 89 struct nvkm_runq *runq = runl->runq[i]; 90 91 if (runq) { 92 nvkm_msec(fifo->engine.subdev.device, 2000, 93 if (runq->func->idle(runq)) 94 break; 95 ); 96 } 97 } 98 } 99 100 /* Look for engines that are still on flagged channel groups - reset them. */ 101 nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) { 102 cgrp = nvkm_engn_cgrp_get(engn, &flags); 103 if (!cgrp) { 104 ENGN_DEBUG(engn, "cxid not valid"); 105 continue; 106 } 107 108 reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING; 109 nvkm_cgrp_put(&cgrp, flags); 110 if (!reset) { 111 ENGN_DEBUG(engn, "cxid not in recovery"); 112 continue; 113 } 114 115 ENGN_DEBUG(engn, "resetting..."); 116 /*TODO: can we do something less of a potential catastrophe on failure? */ 117 WARN_ON(nvkm_engine_reset(engn->engine)); 118 } 119 120 /* Submit runlist update, and clear any remaining exception state. */ 121 runl->func->update(runl); 122 if (runl->func->fault_clear) 123 runl->func->fault_clear(runl); 124 125 /* Unblock runlist processing. */ 126 while (rc--) 127 nvkm_runl_allow(runl); 128 runl->func->wait(runl); 129 } 130 131 static void 132 nvkm_runl_rc_runl(struct nvkm_runl *runl) 133 { 134 RUNL_ERROR(runl, "rc scheduled"); 135 136 nvkm_runl_block(runl); 137 if (runl->func->preempt) 138 runl->func->preempt(runl); 139 140 atomic_inc(&runl->rc_pending); 141 schedule_work(&runl->work); 142 } 143 144 void 145 nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp) 146 { 147 if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE) 148 return; 149 150 CGRP_ERROR(cgrp, "rc scheduled"); 151 nvkm_runl_rc_runl(cgrp->runl); 152 } 153 154 void 155 nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn) 156 { 157 struct nvkm_cgrp *cgrp; 158 unsigned long flags; 159 160 /* Lookup channel group currently on engine. */ 161 cgrp = nvkm_engn_cgrp_get(engn, &flags); 162 if (!cgrp) { 163 ENGN_DEBUG(engn, "rc skipped, not on channel"); 164 return; 165 } 166 167 nvkm_runl_rc_cgrp(cgrp); 168 nvkm_cgrp_put(&cgrp, flags); 169 } 170 171 static void 172 nvkm_runl_work(struct work_struct *work) 173 { 174 struct nvkm_runl *runl = container_of(work, typeof(*runl), work); 175 176 mutex_lock(&runl->mutex); 177 nvkm_runl_rc(runl); 178 mutex_unlock(&runl->mutex); 179 180 } 181 182 struct nvkm_chan * 183 nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags) 184 { 185 struct nvkm_chid *chid = runl->chid; 186 struct nvkm_chan *chan; 187 unsigned long flags; 188 int id; 189 190 spin_lock_irqsave(&chid->lock, flags); 191 for_each_set_bit(id, chid->used, chid->nr) { 192 chan = chid->data[id]; 193 if (likely(chan)) { 194 if (chan->inst->addr == inst) { 195 spin_lock(&chan->cgrp->lock); 196 *pirqflags = flags; 197 spin_unlock(&chid->lock); 198 return chan; 199 } 200 } 201 } 202 spin_unlock_irqrestore(&chid->lock, flags); 203 return NULL; 204 } 205 206 struct nvkm_chan * 207 nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags) 208 { 209 struct nvkm_chid *chid = runl->chid; 210 struct nvkm_chan *chan; 211 unsigned long flags; 212 213 spin_lock_irqsave(&chid->lock, flags); 214 if (!WARN_ON(id >= chid->nr)) { 215 chan = chid->data[id]; 216 if (likely(chan)) { 217 spin_lock(&chan->cgrp->lock); 218 *pirqflags = flags; 219 spin_unlock(&chid->lock); 220 return chan; 221 } 222 } 223 spin_unlock_irqrestore(&chid->lock, flags); 224 return NULL; 225 } 226 227 struct nvkm_cgrp * 228 nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags) 229 { 230 struct nvkm_chid *cgid = runl->cgid; 231 struct nvkm_cgrp *cgrp; 232 unsigned long flags; 233 234 spin_lock_irqsave(&cgid->lock, flags); 235 if (!WARN_ON(id >= cgid->nr)) { 236 cgrp = cgid->data[id]; 237 if (likely(cgrp)) { 238 spin_lock(&cgrp->lock); 239 *pirqflags = flags; 240 spin_unlock(&cgid->lock); 241 return cgrp; 242 } 243 } 244 spin_unlock_irqrestore(&cgid->lock, flags); 245 return NULL; 246 } 247 248 int 249 nvkm_runl_preempt_wait(struct nvkm_runl *runl) 250 { 251 return nvkm_msec(runl->fifo->engine.subdev.device, runl->fifo->timeout.chan_msec, 252 if (!runl->func->preempt_pending(runl)) 253 break; 254 255 nvkm_runl_rc(runl); 256 usleep_range(1, 2); 257 ) < 0 ? -ETIMEDOUT : 0; 258 } 259 260 bool 261 nvkm_runl_update_pending(struct nvkm_runl *runl) 262 { 263 if (!runl->func->pending(runl)) 264 return false; 265 266 nvkm_runl_rc(runl); 267 return true; 268 } 269 270 void 271 nvkm_runl_update_locked(struct nvkm_runl *runl, bool wait) 272 { 273 if (atomic_xchg(&runl->changed, 0) && runl->func->update) { 274 runl->func->update(runl); 275 if (wait) 276 runl->func->wait(runl); 277 } 278 } 279 280 void 281 nvkm_runl_allow(struct nvkm_runl *runl) 282 { 283 struct nvkm_fifo *fifo = runl->fifo; 284 unsigned long flags; 285 286 spin_lock_irqsave(&fifo->lock, flags); 287 if (!--runl->blocked) { 288 RUNL_TRACE(runl, "running"); 289 runl->func->allow(runl, ~0); 290 } 291 spin_unlock_irqrestore(&fifo->lock, flags); 292 } 293 294 void 295 nvkm_runl_block(struct nvkm_runl *runl) 296 { 297 struct nvkm_fifo *fifo = runl->fifo; 298 unsigned long flags; 299 300 spin_lock_irqsave(&fifo->lock, flags); 301 if (!runl->blocked++) { 302 RUNL_TRACE(runl, "stopped"); 303 runl->func->block(runl, ~0); 304 } 305 spin_unlock_irqrestore(&fifo->lock, flags); 306 } 307 308 void 309 nvkm_runl_fini(struct nvkm_runl *runl) 310 { 311 if (runl->func->fini) 312 runl->func->fini(runl); 313 314 flush_work(&runl->work); 315 } 316 317 void 318 nvkm_runl_del(struct nvkm_runl *runl) 319 { 320 struct nvkm_engn *engn, *engt; 321 322 nvkm_memory_unref(&runl->mem); 323 324 list_for_each_entry_safe(engn, engt, &runl->engns, head) { 325 list_del(&engn->head); 326 kfree(engn); 327 } 328 329 nvkm_chid_unref(&runl->chid); 330 nvkm_chid_unref(&runl->cgid); 331 332 list_del(&runl->head); 333 mutex_destroy(&runl->mutex); 334 kfree(runl); 335 } 336 337 struct nvkm_engn * 338 nvkm_runl_add(struct nvkm_runl *runl, int engi, const struct nvkm_engn_func *func, 339 enum nvkm_subdev_type type, int inst) 340 { 341 struct nvkm_fifo *fifo = runl->fifo; 342 struct nvkm_device *device = fifo->engine.subdev.device; 343 struct nvkm_engine *engine; 344 struct nvkm_engn *engn; 345 346 engine = nvkm_device_engine(device, type, inst); 347 if (!engine) { 348 RUNL_DEBUG(runl, "engn %d.%d[%s] not found", engi, inst, nvkm_subdev_type[type]); 349 return NULL; 350 } 351 352 if (!(engn = kzalloc(sizeof(*engn), GFP_KERNEL))) 353 return NULL; 354 355 engn->func = func; 356 engn->runl = runl; 357 engn->id = engi; 358 engn->engine = engine; 359 engn->fault = -1; 360 list_add_tail(&engn->head, &runl->engns); 361 362 /* Lookup MMU engine ID for fault handling. */ 363 if (device->top) 364 engn->fault = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst); 365 366 if (engn->fault < 0 && fifo->func->mmu_fault) { 367 const struct nvkm_enum *map = fifo->func->mmu_fault->engine; 368 369 while (map->name) { 370 if (map->data2 == engine->subdev.type && map->inst == engine->subdev.inst) { 371 engn->fault = map->value; 372 break; 373 } 374 map++; 375 } 376 } 377 378 return engn; 379 } 380 381 struct nvkm_runl * 382 nvkm_runl_get(struct nvkm_fifo *fifo, int runi, u32 addr) 383 { 384 struct nvkm_runl *runl; 385 386 nvkm_runl_foreach(runl, fifo) { 387 if ((runi >= 0 && runl->id == runi) || (runi < 0 && runl->addr == addr)) 388 return runl; 389 } 390 391 return NULL; 392 } 393 394 struct nvkm_runl * 395 nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr) 396 { 397 struct nvkm_subdev *subdev = &fifo->engine.subdev; 398 struct nvkm_runl *runl; 399 int ret; 400 401 if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL))) 402 return NULL; 403 404 runl->func = fifo->func->runl; 405 runl->fifo = fifo; 406 runl->id = runi; 407 runl->addr = addr; 408 INIT_LIST_HEAD(&runl->engns); 409 INIT_LIST_HEAD(&runl->cgrps); 410 atomic_set(&runl->changed, 0); 411 mutex_init(&runl->mutex); 412 INIT_WORK(&runl->work, nvkm_runl_work); 413 atomic_set(&runl->rc_triggered, 0); 414 atomic_set(&runl->rc_pending, 0); 415 list_add_tail(&runl->head, &fifo->runls); 416 417 if (!fifo->chid) { 418 if ((ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->cgid)) || 419 (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) { 420 RUNL_ERROR(runl, "cgid/chid: %d", ret); 421 nvkm_runl_del(runl); 422 return NULL; 423 } 424 } else { 425 runl->cgid = nvkm_chid_ref(fifo->cgid); 426 runl->chid = nvkm_chid_ref(fifo->chid); 427 } 428 429 return runl; 430 } 431