1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2015-2021 Intel Corporation 4 */ 5 6 #include <linux/kthread.h> 7 #include <trace/events/dma_fence.h> 8 #include <uapi/linux/sched/types.h> 9 10 #include "i915_drv.h" 11 #include "i915_trace.h" 12 #include "intel_breadcrumbs.h" 13 #include "intel_context.h" 14 #include "intel_engine_pm.h" 15 #include "intel_gt_pm.h" 16 #include "intel_gt_requests.h" 17 18 static bool irq_enable(struct intel_breadcrumbs *b) 19 { 20 return intel_engine_irq_enable(b->irq_engine); 21 } 22 23 static void irq_disable(struct intel_breadcrumbs *b) 24 { 25 intel_engine_irq_disable(b->irq_engine); 26 } 27 28 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 29 { 30 /* 31 * Since we are waiting on a request, the GPU should be busy 32 * and should have its own rpm reference. 33 */ 34 if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) 35 return; 36 37 /* 38 * The breadcrumb irq will be disarmed on the interrupt after the 39 * waiters are signaled. This gives us a single interrupt window in 40 * which we can add a new waiter and avoid the cost of re-enabling 41 * the irq. 42 */ 43 WRITE_ONCE(b->irq_armed, true); 44 45 /* Requests may have completed before we could enable the interrupt. */ 46 if (!b->irq_enabled++ && b->irq_enable(b)) 47 irq_work_queue(&b->irq_work); 48 } 49 50 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 51 { 52 if (!b->irq_engine) 53 return; 54 55 spin_lock(&b->irq_lock); 56 if (!b->irq_armed) 57 __intel_breadcrumbs_arm_irq(b); 58 spin_unlock(&b->irq_lock); 59 } 60 61 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 62 { 63 GEM_BUG_ON(!b->irq_enabled); 64 if (!--b->irq_enabled) 65 b->irq_disable(b); 66 67 WRITE_ONCE(b->irq_armed, false); 68 intel_gt_pm_put_async(b->irq_engine->gt); 69 } 70 71 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 72 { 73 spin_lock(&b->irq_lock); 74 if (b->irq_armed) 75 __intel_breadcrumbs_disarm_irq(b); 76 spin_unlock(&b->irq_lock); 77 } 78 79 static void add_signaling_context(struct intel_breadcrumbs *b, 80 struct intel_context *ce) 81 { 82 lockdep_assert_held(&ce->signal_lock); 83 84 spin_lock(&b->signalers_lock); 85 list_add_rcu(&ce->signal_link, &b->signalers); 86 spin_unlock(&b->signalers_lock); 87 } 88 89 static bool remove_signaling_context(struct intel_breadcrumbs *b, 90 struct intel_context *ce) 91 { 92 lockdep_assert_held(&ce->signal_lock); 93 94 if (!list_empty(&ce->signals)) 95 return false; 96 97 spin_lock(&b->signalers_lock); 98 list_del_rcu(&ce->signal_link); 99 spin_unlock(&b->signalers_lock); 100 101 return true; 102 } 103 104 __maybe_unused static bool 105 check_signal_order(struct intel_context *ce, struct i915_request *rq) 106 { 107 if (rq->context != ce) 108 return false; 109 110 if (!list_is_last(&rq->signal_link, &ce->signals) && 111 i915_seqno_passed(rq->fence.seqno, 112 list_next_entry(rq, signal_link)->fence.seqno)) 113 return false; 114 115 if (!list_is_first(&rq->signal_link, &ce->signals) && 116 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, 117 rq->fence.seqno)) 118 return false; 119 120 return true; 121 } 122 123 static bool 124 __dma_fence_signal(struct dma_fence *fence) 125 { 126 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); 127 } 128 129 static void 130 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) 131 { 132 fence->timestamp = timestamp; 133 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); 134 trace_dma_fence_signaled(fence); 135 } 136 137 static void 138 __dma_fence_signal__notify(struct dma_fence *fence, 139 const struct list_head *list) 140 { 141 struct dma_fence_cb *cur, *tmp; 142 143 lockdep_assert_held(fence->lock); 144 145 list_for_each_entry_safe(cur, tmp, list, node) { 146 INIT_LIST_HEAD(&cur->node); 147 cur->func(fence, cur); 148 } 149 } 150 151 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) 152 { 153 if (b->irq_engine) 154 intel_engine_add_retire(b->irq_engine, tl); 155 } 156 157 static struct llist_node * 158 slist_add(struct llist_node *node, struct llist_node *head) 159 { 160 node->next = head; 161 return node; 162 } 163 164 static void signal_irq_work(struct irq_work *work) 165 { 166 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); 167 const ktime_t timestamp = ktime_get(); 168 struct llist_node *signal, *sn; 169 struct intel_context *ce; 170 171 signal = NULL; 172 if (unlikely(!llist_empty(&b->signaled_requests))) 173 signal = llist_del_all(&b->signaled_requests); 174 175 /* 176 * Keep the irq armed until the interrupt after all listeners are gone. 177 * 178 * Enabling/disabling the interrupt is rather costly, roughly a couple 179 * of hundred microseconds. If we are proactive and enable/disable 180 * the interrupt around every request that wants a breadcrumb, we 181 * quickly drown in the extra orders of magnitude of latency imposed 182 * on request submission. 183 * 184 * So we try to be lazy, and keep the interrupts enabled until no 185 * more listeners appear within a breadcrumb interrupt interval (that 186 * is until a request completes that no one cares about). The 187 * observation is that listeners come in batches, and will often 188 * listen to a bunch of requests in succession. Though note on icl+, 189 * interrupts are always enabled due to concerns with rc6 being 190 * dysfunctional with per-engine interrupt masking. 191 * 192 * We also try to avoid raising too many interrupts, as they may 193 * be generated by userspace batches and it is unfortunately rather 194 * too easy to drown the CPU under a flood of GPU interrupts. Thus 195 * whenever no one appears to be listening, we turn off the interrupts. 196 * Fewer interrupts should conserve power -- at the very least, fewer 197 * interrupt draw less ire from other users of the system and tools 198 * like powertop. 199 */ 200 if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers)) 201 intel_breadcrumbs_disarm_irq(b); 202 203 rcu_read_lock(); 204 atomic_inc(&b->signaler_active); 205 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 206 struct i915_request *rq; 207 208 list_for_each_entry_rcu(rq, &ce->signals, signal_link) { 209 bool release; 210 211 if (!__i915_request_is_complete(rq)) 212 break; 213 214 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 215 &rq->fence.flags)) 216 break; 217 218 /* 219 * Queue for execution after dropping the signaling 220 * spinlock as the callback chain may end up adding 221 * more signalers to the same context or engine. 222 */ 223 spin_lock(&ce->signal_lock); 224 list_del_rcu(&rq->signal_link); 225 release = remove_signaling_context(b, ce); 226 spin_unlock(&ce->signal_lock); 227 if (release) { 228 if (intel_timeline_is_last(ce->timeline, rq)) 229 add_retire(b, ce->timeline); 230 intel_context_put(ce); 231 } 232 233 if (__dma_fence_signal(&rq->fence)) 234 /* We own signal_node now, xfer to local list */ 235 signal = slist_add(&rq->signal_node, signal); 236 else 237 i915_request_put(rq); 238 } 239 } 240 atomic_dec(&b->signaler_active); 241 rcu_read_unlock(); 242 243 llist_for_each_safe(signal, sn, signal) { 244 struct i915_request *rq = 245 llist_entry(signal, typeof(*rq), signal_node); 246 struct list_head cb_list; 247 248 if (rq->engine->sched_engine->retire_inflight_request_prio) 249 rq->engine->sched_engine->retire_inflight_request_prio(rq); 250 251 spin_lock(&rq->lock); 252 list_replace(&rq->fence.cb_list, &cb_list); 253 __dma_fence_signal__timestamp(&rq->fence, timestamp); 254 __dma_fence_signal__notify(&rq->fence, &cb_list); 255 spin_unlock(&rq->lock); 256 257 i915_request_put(rq); 258 } 259 260 if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) 261 intel_breadcrumbs_arm_irq(b); 262 } 263 264 struct intel_breadcrumbs * 265 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) 266 { 267 struct intel_breadcrumbs *b; 268 269 b = kzalloc(sizeof(*b), GFP_KERNEL); 270 if (!b) 271 return NULL; 272 273 kref_init(&b->ref); 274 275 spin_lock_init(&b->signalers_lock); 276 INIT_LIST_HEAD(&b->signalers); 277 init_llist_head(&b->signaled_requests); 278 279 spin_lock_init(&b->irq_lock); 280 init_irq_work(&b->irq_work, signal_irq_work); 281 282 b->irq_engine = irq_engine; 283 b->irq_enable = irq_enable; 284 b->irq_disable = irq_disable; 285 286 return b; 287 } 288 289 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) 290 { 291 unsigned long flags; 292 293 if (!b->irq_engine) 294 return; 295 296 spin_lock_irqsave(&b->irq_lock, flags); 297 298 if (b->irq_enabled) 299 b->irq_enable(b); 300 else 301 b->irq_disable(b); 302 303 spin_unlock_irqrestore(&b->irq_lock, flags); 304 } 305 306 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) 307 { 308 if (!READ_ONCE(b->irq_armed)) 309 return; 310 311 /* Kick the work once more to drain the signalers, and disarm the irq */ 312 irq_work_sync(&b->irq_work); 313 while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { 314 local_irq_disable(); 315 signal_irq_work(&b->irq_work); 316 local_irq_enable(); 317 cond_resched(); 318 } 319 } 320 321 void intel_breadcrumbs_free(struct kref *kref) 322 { 323 struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); 324 325 irq_work_sync(&b->irq_work); 326 GEM_BUG_ON(!list_empty(&b->signalers)); 327 GEM_BUG_ON(b->irq_armed); 328 329 kfree(b); 330 } 331 332 static void irq_signal_request(struct i915_request *rq, 333 struct intel_breadcrumbs *b) 334 { 335 if (!__dma_fence_signal(&rq->fence)) 336 return; 337 338 i915_request_get(rq); 339 if (llist_add(&rq->signal_node, &b->signaled_requests)) 340 irq_work_queue(&b->irq_work); 341 } 342 343 static void insert_breadcrumb(struct i915_request *rq) 344 { 345 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 346 struct intel_context *ce = rq->context; 347 struct list_head *pos; 348 349 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) 350 return; 351 352 /* 353 * If the request is already completed, we can transfer it 354 * straight onto a signaled list, and queue the irq worker for 355 * its signal completion. 356 */ 357 if (__i915_request_is_complete(rq)) { 358 irq_signal_request(rq, b); 359 return; 360 } 361 362 if (list_empty(&ce->signals)) { 363 intel_context_get(ce); 364 add_signaling_context(b, ce); 365 pos = &ce->signals; 366 } else { 367 /* 368 * We keep the seqno in retirement order, so we can break 369 * inside intel_engine_signal_breadcrumbs as soon as we've 370 * passed the last completed request (or seen a request that 371 * hasn't event started). We could walk the timeline->requests, 372 * but keeping a separate signalers_list has the advantage of 373 * hopefully being much smaller than the full list and so 374 * provides faster iteration and detection when there are no 375 * more interrupts required for this context. 376 * 377 * We typically expect to add new signalers in order, so we 378 * start looking for our insertion point from the tail of 379 * the list. 380 */ 381 list_for_each_prev(pos, &ce->signals) { 382 struct i915_request *it = 383 list_entry(pos, typeof(*it), signal_link); 384 385 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) 386 break; 387 } 388 } 389 390 i915_request_get(rq); 391 list_add_rcu(&rq->signal_link, pos); 392 GEM_BUG_ON(!check_signal_order(ce, rq)); 393 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); 394 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 395 396 /* 397 * Defer enabling the interrupt to after HW submission and recheck 398 * the request as it may have completed and raised the interrupt as 399 * we were attaching it into the lists. 400 */ 401 irq_work_queue(&b->irq_work); 402 } 403 404 bool i915_request_enable_breadcrumb(struct i915_request *rq) 405 { 406 struct intel_context *ce = rq->context; 407 408 /* Serialises with i915_request_retire() using rq->lock */ 409 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) 410 return true; 411 412 /* 413 * Peek at i915_request_submit()/i915_request_unsubmit() status. 414 * 415 * If the request is not yet active (and not signaled), we will 416 * attach the breadcrumb later. 417 */ 418 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 419 return true; 420 421 spin_lock(&ce->signal_lock); 422 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) 423 insert_breadcrumb(rq); 424 spin_unlock(&ce->signal_lock); 425 426 return true; 427 } 428 429 void i915_request_cancel_breadcrumb(struct i915_request *rq) 430 { 431 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; 432 struct intel_context *ce = rq->context; 433 bool release; 434 435 spin_lock(&ce->signal_lock); 436 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { 437 spin_unlock(&ce->signal_lock); 438 return; 439 } 440 441 list_del_rcu(&rq->signal_link); 442 release = remove_signaling_context(b, ce); 443 spin_unlock(&ce->signal_lock); 444 if (release) 445 intel_context_put(ce); 446 447 if (__i915_request_is_complete(rq)) 448 irq_signal_request(rq, b); 449 450 i915_request_put(rq); 451 } 452 453 void intel_context_remove_breadcrumbs(struct intel_context *ce, 454 struct intel_breadcrumbs *b) 455 { 456 struct i915_request *rq, *rn; 457 bool release = false; 458 unsigned long flags; 459 460 spin_lock_irqsave(&ce->signal_lock, flags); 461 462 if (list_empty(&ce->signals)) 463 goto unlock; 464 465 list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { 466 GEM_BUG_ON(!__i915_request_is_complete(rq)); 467 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, 468 &rq->fence.flags)) 469 continue; 470 471 list_del_rcu(&rq->signal_link); 472 irq_signal_request(rq, b); 473 i915_request_put(rq); 474 } 475 release = remove_signaling_context(b, ce); 476 477 unlock: 478 spin_unlock_irqrestore(&ce->signal_lock, flags); 479 if (release) 480 intel_context_put(ce); 481 482 while (atomic_read(&b->signaler_active)) 483 cpu_relax(); 484 } 485 486 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) 487 { 488 struct intel_context *ce; 489 struct i915_request *rq; 490 491 drm_printf(p, "Signals:\n"); 492 493 rcu_read_lock(); 494 list_for_each_entry_rcu(ce, &b->signalers, signal_link) { 495 list_for_each_entry_rcu(rq, &ce->signals, signal_link) 496 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", 497 rq->fence.context, rq->fence.seqno, 498 __i915_request_is_complete(rq) ? "!" : 499 __i915_request_has_started(rq) ? "*" : 500 "", 501 jiffies_to_msecs(jiffies - rq->emitted_jiffies)); 502 } 503 rcu_read_unlock(); 504 } 505 506 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, 507 struct drm_printer *p) 508 { 509 struct intel_breadcrumbs *b; 510 511 b = engine->breadcrumbs; 512 if (!b) 513 return; 514 515 drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed)); 516 if (!list_empty(&b->signalers)) 517 print_signals(b, p); 518 } 519