1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2016-2018 Intel Corporation 4 */ 5 6 #include "i915_drv.h" 7 8 #include "i915_active.h" 9 #include "i915_syncmap.h" 10 #include "intel_gt.h" 11 #include "intel_ring.h" 12 #include "intel_timeline.h" 13 14 #define TIMELINE_SEQNO_BYTES 8 15 16 static struct i915_vma *hwsp_alloc(struct intel_gt *gt) 17 { 18 struct drm_i915_private *i915 = gt->i915; 19 struct drm_i915_gem_object *obj; 20 struct i915_vma *vma; 21 22 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 23 if (IS_ERR(obj)) 24 return ERR_CAST(obj); 25 26 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 27 28 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 29 if (IS_ERR(vma)) 30 i915_gem_object_put(obj); 31 32 return vma; 33 } 34 35 static void __timeline_retire(struct i915_active *active) 36 { 37 struct intel_timeline *tl = 38 container_of(active, typeof(*tl), active); 39 40 i915_vma_unpin(tl->hwsp_ggtt); 41 intel_timeline_put(tl); 42 } 43 44 static int __timeline_active(struct i915_active *active) 45 { 46 struct intel_timeline *tl = 47 container_of(active, typeof(*tl), active); 48 49 __i915_vma_pin(tl->hwsp_ggtt); 50 intel_timeline_get(tl); 51 return 0; 52 } 53 54 I915_SELFTEST_EXPORT int 55 intel_timeline_pin_map(struct intel_timeline *timeline) 56 { 57 struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj; 58 u32 ofs = offset_in_page(timeline->hwsp_offset); 59 void *vaddr; 60 61 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 62 if (IS_ERR(vaddr)) 63 return PTR_ERR(vaddr); 64 65 timeline->hwsp_map = vaddr; 66 timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); 67 clflush(vaddr + ofs); 68 69 return 0; 70 } 71 72 static int intel_timeline_init(struct intel_timeline *timeline, 73 struct intel_gt *gt, 74 struct i915_vma *hwsp, 75 unsigned int offset) 76 { 77 kref_init(&timeline->kref); 78 atomic_set(&timeline->pin_count, 0); 79 80 timeline->gt = gt; 81 82 if (hwsp) { 83 timeline->hwsp_offset = offset; 84 timeline->hwsp_ggtt = i915_vma_get(hwsp); 85 } else { 86 timeline->has_initial_breadcrumb = true; 87 hwsp = hwsp_alloc(gt); 88 if (IS_ERR(hwsp)) 89 return PTR_ERR(hwsp); 90 timeline->hwsp_ggtt = hwsp; 91 } 92 93 timeline->hwsp_map = NULL; 94 timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset; 95 96 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 97 98 timeline->fence_context = dma_fence_context_alloc(1); 99 100 mutex_init(&timeline->mutex); 101 102 INIT_ACTIVE_FENCE(&timeline->last_request); 103 INIT_LIST_HEAD(&timeline->requests); 104 105 i915_syncmap_init(&timeline->sync); 106 i915_active_init(&timeline->active, __timeline_active, 107 __timeline_retire, 0); 108 109 return 0; 110 } 111 112 void intel_gt_init_timelines(struct intel_gt *gt) 113 { 114 struct intel_gt_timelines *timelines = >->timelines; 115 116 spin_lock_init(&timelines->lock); 117 INIT_LIST_HEAD(&timelines->active_list); 118 } 119 120 static void intel_timeline_fini(struct rcu_head *rcu) 121 { 122 struct intel_timeline *timeline = 123 container_of(rcu, struct intel_timeline, rcu); 124 125 if (timeline->hwsp_map) 126 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 127 128 i915_vma_put(timeline->hwsp_ggtt); 129 i915_active_fini(&timeline->active); 130 131 /* 132 * A small race exists between intel_gt_retire_requests_timeout and 133 * intel_timeline_exit which could result in the syncmap not getting 134 * free'd. Rather than work to hard to seal this race, simply cleanup 135 * the syncmap on fini. 136 */ 137 i915_syncmap_free(&timeline->sync); 138 139 kfree(timeline); 140 } 141 142 struct intel_timeline * 143 __intel_timeline_create(struct intel_gt *gt, 144 struct i915_vma *global_hwsp, 145 unsigned int offset) 146 { 147 struct intel_timeline *timeline; 148 int err; 149 150 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 151 if (!timeline) 152 return ERR_PTR(-ENOMEM); 153 154 err = intel_timeline_init(timeline, gt, global_hwsp, offset); 155 if (err) { 156 kfree(timeline); 157 return ERR_PTR(err); 158 } 159 160 return timeline; 161 } 162 163 struct intel_timeline * 164 intel_timeline_create_from_engine(struct intel_engine_cs *engine, 165 unsigned int offset) 166 { 167 struct i915_vma *hwsp = engine->status_page.vma; 168 struct intel_timeline *tl; 169 170 tl = __intel_timeline_create(engine->gt, hwsp, offset); 171 if (IS_ERR(tl)) 172 return tl; 173 174 /* Borrow a nearby lock; we only create these timelines during init */ 175 mutex_lock(&hwsp->vm->mutex); 176 list_add_tail(&tl->engine_link, &engine->status_page.timelines); 177 mutex_unlock(&hwsp->vm->mutex); 178 179 return tl; 180 } 181 182 void __intel_timeline_pin(struct intel_timeline *tl) 183 { 184 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 185 atomic_inc(&tl->pin_count); 186 } 187 188 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) 189 { 190 int err; 191 192 if (atomic_add_unless(&tl->pin_count, 1, 0)) 193 return 0; 194 195 if (!tl->hwsp_map) { 196 err = intel_timeline_pin_map(tl); 197 if (err) 198 return err; 199 } 200 201 err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); 202 if (err) 203 return err; 204 205 tl->hwsp_offset = 206 i915_ggtt_offset(tl->hwsp_ggtt) + 207 offset_in_page(tl->hwsp_offset); 208 GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", 209 tl->fence_context, tl->hwsp_offset); 210 211 i915_active_acquire(&tl->active); 212 if (atomic_fetch_inc(&tl->pin_count)) { 213 i915_active_release(&tl->active); 214 __i915_vma_unpin(tl->hwsp_ggtt); 215 } 216 217 return 0; 218 } 219 220 void intel_timeline_reset_seqno(const struct intel_timeline *tl) 221 { 222 u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno; 223 /* Must be pinned to be writable, and no requests in flight. */ 224 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 225 226 memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); 227 WRITE_ONCE(*hwsp_seqno, tl->seqno); 228 clflush(hwsp_seqno); 229 } 230 231 void intel_timeline_enter(struct intel_timeline *tl) 232 { 233 struct intel_gt_timelines *timelines = &tl->gt->timelines; 234 235 /* 236 * Pretend we are serialised by the timeline->mutex. 237 * 238 * While generally true, there are a few exceptions to the rule 239 * for the engine->kernel_context being used to manage power 240 * transitions. As the engine_park may be called from under any 241 * timeline, it uses the power mutex as a global serialisation 242 * lock to prevent any other request entering its timeline. 243 * 244 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 245 * 246 * However, intel_gt_retire_request() does not know which engine 247 * it is retiring along and so cannot partake in the engine-pm 248 * barrier, and there we use the tl->active_count as a means to 249 * pin the timeline in the active_list while the locks are dropped. 250 * Ergo, as that is outside of the engine-pm barrier, we need to 251 * use atomic to manipulate tl->active_count. 252 */ 253 lockdep_assert_held(&tl->mutex); 254 255 if (atomic_add_unless(&tl->active_count, 1, 0)) 256 return; 257 258 spin_lock(&timelines->lock); 259 if (!atomic_fetch_inc(&tl->active_count)) { 260 /* 261 * The HWSP is volatile, and may have been lost while inactive, 262 * e.g. across suspend/resume. Be paranoid, and ensure that 263 * the HWSP value matches our seqno so we don't proclaim 264 * the next request as already complete. 265 */ 266 intel_timeline_reset_seqno(tl); 267 list_add_tail(&tl->link, &timelines->active_list); 268 } 269 spin_unlock(&timelines->lock); 270 } 271 272 void intel_timeline_exit(struct intel_timeline *tl) 273 { 274 struct intel_gt_timelines *timelines = &tl->gt->timelines; 275 276 /* See intel_timeline_enter() */ 277 lockdep_assert_held(&tl->mutex); 278 279 GEM_BUG_ON(!atomic_read(&tl->active_count)); 280 if (atomic_add_unless(&tl->active_count, -1, 1)) 281 return; 282 283 spin_lock(&timelines->lock); 284 if (atomic_dec_and_test(&tl->active_count)) 285 list_del(&tl->link); 286 spin_unlock(&timelines->lock); 287 288 /* 289 * Since this timeline is idle, all bariers upon which we were waiting 290 * must also be complete and so we can discard the last used barriers 291 * without loss of information. 292 */ 293 i915_syncmap_free(&tl->sync); 294 } 295 296 static u32 timeline_advance(struct intel_timeline *tl) 297 { 298 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 299 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 300 301 return tl->seqno += 1 + tl->has_initial_breadcrumb; 302 } 303 304 static noinline int 305 __intel_timeline_get_seqno(struct intel_timeline *tl, 306 u32 *seqno) 307 { 308 u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES); 309 310 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ 311 if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5))) 312 next_ofs = offset_in_page(next_ofs + BIT(5)); 313 314 tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs; 315 tl->hwsp_seqno = tl->hwsp_map + next_ofs; 316 intel_timeline_reset_seqno(tl); 317 318 *seqno = timeline_advance(tl); 319 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 320 return 0; 321 } 322 323 int intel_timeline_get_seqno(struct intel_timeline *tl, 324 struct i915_request *rq, 325 u32 *seqno) 326 { 327 *seqno = timeline_advance(tl); 328 329 /* Replace the HWSP on wraparound for HW semaphores */ 330 if (unlikely(!*seqno && tl->has_initial_breadcrumb)) 331 return __intel_timeline_get_seqno(tl, seqno); 332 333 return 0; 334 } 335 336 int intel_timeline_read_hwsp(struct i915_request *from, 337 struct i915_request *to, 338 u32 *hwsp) 339 { 340 struct intel_timeline *tl; 341 int err; 342 343 rcu_read_lock(); 344 tl = rcu_dereference(from->timeline); 345 if (i915_request_signaled(from) || 346 !i915_active_acquire_if_busy(&tl->active)) 347 tl = NULL; 348 349 if (tl) { 350 /* hwsp_offset may wraparound, so use from->hwsp_seqno */ 351 *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) + 352 offset_in_page(from->hwsp_seqno); 353 } 354 355 /* ensure we wait on the right request, if not, we completed */ 356 if (tl && __i915_request_is_complete(from)) { 357 i915_active_release(&tl->active); 358 tl = NULL; 359 } 360 rcu_read_unlock(); 361 362 if (!tl) 363 return 1; 364 365 /* Can't do semaphore waits on kernel context */ 366 if (!tl->has_initial_breadcrumb) { 367 err = -EINVAL; 368 goto out; 369 } 370 371 err = i915_active_add_request(&tl->active, to); 372 373 out: 374 i915_active_release(&tl->active); 375 return err; 376 } 377 378 void intel_timeline_unpin(struct intel_timeline *tl) 379 { 380 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 381 if (!atomic_dec_and_test(&tl->pin_count)) 382 return; 383 384 i915_active_release(&tl->active); 385 __i915_vma_unpin(tl->hwsp_ggtt); 386 } 387 388 void __intel_timeline_free(struct kref *kref) 389 { 390 struct intel_timeline *timeline = 391 container_of(kref, typeof(*timeline), kref); 392 393 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 394 GEM_BUG_ON(!list_empty(&timeline->requests)); 395 GEM_BUG_ON(timeline->retire); 396 397 call_rcu(&timeline->rcu, intel_timeline_fini); 398 } 399 400 void intel_gt_fini_timelines(struct intel_gt *gt) 401 { 402 struct intel_gt_timelines *timelines = >->timelines; 403 404 GEM_BUG_ON(!list_empty(&timelines->active_list)); 405 } 406 407 void intel_gt_show_timelines(struct intel_gt *gt, 408 struct drm_printer *m, 409 void (*show_request)(struct drm_printer *m, 410 const struct i915_request *rq, 411 const char *prefix, 412 int indent)) 413 { 414 struct intel_gt_timelines *timelines = >->timelines; 415 struct intel_timeline *tl, *tn; 416 LIST_HEAD(free); 417 418 spin_lock(&timelines->lock); 419 list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { 420 unsigned long count, ready, inflight; 421 struct i915_request *rq, *rn; 422 struct dma_fence *fence; 423 424 if (!mutex_trylock(&tl->mutex)) { 425 drm_printf(m, "Timeline %llx: busy; skipping\n", 426 tl->fence_context); 427 continue; 428 } 429 430 intel_timeline_get(tl); 431 GEM_BUG_ON(!atomic_read(&tl->active_count)); 432 atomic_inc(&tl->active_count); /* pin the list element */ 433 spin_unlock(&timelines->lock); 434 435 count = 0; 436 ready = 0; 437 inflight = 0; 438 list_for_each_entry_safe(rq, rn, &tl->requests, link) { 439 if (i915_request_completed(rq)) 440 continue; 441 442 count++; 443 if (i915_request_is_ready(rq)) 444 ready++; 445 if (i915_request_is_active(rq)) 446 inflight++; 447 } 448 449 drm_printf(m, "Timeline %llx: { ", tl->fence_context); 450 drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", 451 count, ready, inflight); 452 drm_printf(m, ", seqno: { current: %d, last: %d }", 453 *tl->hwsp_seqno, tl->seqno); 454 fence = i915_active_fence_get(&tl->last_request); 455 if (fence) { 456 drm_printf(m, ", engine: %s", 457 to_request(fence)->engine->name); 458 dma_fence_put(fence); 459 } 460 drm_printf(m, " }\n"); 461 462 if (show_request) { 463 list_for_each_entry_safe(rq, rn, &tl->requests, link) 464 show_request(m, rq, "", 2); 465 } 466 467 mutex_unlock(&tl->mutex); 468 spin_lock(&timelines->lock); 469 470 /* Resume list iteration after reacquiring spinlock */ 471 list_safe_reset_next(tl, tn, link); 472 if (atomic_dec_and_test(&tl->active_count)) 473 list_del(&tl->link); 474 475 /* Defer the final release to after the spinlock */ 476 if (refcount_dec_and_test(&tl->kref.refcount)) { 477 GEM_BUG_ON(atomic_read(&tl->active_count)); 478 list_add(&tl->link, &free); 479 } 480 } 481 spin_unlock(&timelines->lock); 482 483 list_for_each_entry_safe(tl, tn, &free, link) 484 __intel_timeline_free(&tl->kref); 485 } 486 487 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 488 #include "gt/selftests/mock_timeline.c" 489 #include "gt/selftest_timeline.c" 490 #endif 491