1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2016-2018 Intel Corporation 4 */ 5 6 #include <drm/drm_cache.h> 7 8 #include "gem/i915_gem_internal.h" 9 10 #include "i915_active.h" 11 #include "i915_drv.h" 12 #include "i915_syncmap.h" 13 #include "intel_gt.h" 14 #include "intel_ring.h" 15 #include "intel_timeline.h" 16 17 #define TIMELINE_SEQNO_BYTES 8 18 19 static struct i915_vma *hwsp_alloc(struct intel_gt *gt) 20 { 21 struct drm_i915_private *i915 = gt->i915; 22 struct drm_i915_gem_object *obj; 23 struct i915_vma *vma; 24 25 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 26 if (IS_ERR(obj)) 27 return ERR_CAST(obj); 28 29 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 30 31 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 32 if (IS_ERR(vma)) 33 i915_gem_object_put(obj); 34 35 return vma; 36 } 37 38 static void __timeline_retire(struct i915_active *active) 39 { 40 struct intel_timeline *tl = 41 container_of(active, typeof(*tl), active); 42 43 i915_vma_unpin(tl->hwsp_ggtt); 44 intel_timeline_put(tl); 45 } 46 47 static int __timeline_active(struct i915_active *active) 48 { 49 struct intel_timeline *tl = 50 container_of(active, typeof(*tl), active); 51 52 __i915_vma_pin(tl->hwsp_ggtt); 53 intel_timeline_get(tl); 54 return 0; 55 } 56 57 I915_SELFTEST_EXPORT int 58 intel_timeline_pin_map(struct intel_timeline *timeline) 59 { 60 struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj; 61 u32 ofs = offset_in_page(timeline->hwsp_offset); 62 void *vaddr; 63 64 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 65 if (IS_ERR(vaddr)) 66 return PTR_ERR(vaddr); 67 68 timeline->hwsp_map = vaddr; 69 timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); 70 drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); 71 72 return 0; 73 } 74 75 static int intel_timeline_init(struct intel_timeline *timeline, 76 struct intel_gt *gt, 77 struct i915_vma *hwsp, 78 unsigned int offset) 79 { 80 kref_init(&timeline->kref); 81 atomic_set(&timeline->pin_count, 0); 82 83 timeline->gt = gt; 84 85 if (hwsp) { 86 timeline->hwsp_offset = offset; 87 timeline->hwsp_ggtt = i915_vma_get(hwsp); 88 } else { 89 timeline->has_initial_breadcrumb = true; 90 hwsp = hwsp_alloc(gt); 91 if (IS_ERR(hwsp)) 92 return PTR_ERR(hwsp); 93 timeline->hwsp_ggtt = hwsp; 94 } 95 96 timeline->hwsp_map = NULL; 97 timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset; 98 99 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 100 101 timeline->fence_context = dma_fence_context_alloc(1); 102 103 mutex_init(&timeline->mutex); 104 105 INIT_ACTIVE_FENCE(&timeline->last_request); 106 INIT_LIST_HEAD(&timeline->requests); 107 108 i915_syncmap_init(&timeline->sync); 109 i915_active_init(&timeline->active, __timeline_active, 110 __timeline_retire, 0); 111 112 return 0; 113 } 114 115 void intel_gt_init_timelines(struct intel_gt *gt) 116 { 117 struct intel_gt_timelines *timelines = >->timelines; 118 119 spin_lock_init(&timelines->lock); 120 INIT_LIST_HEAD(&timelines->active_list); 121 } 122 123 static void intel_timeline_fini(struct rcu_head *rcu) 124 { 125 struct intel_timeline *timeline = 126 container_of(rcu, struct intel_timeline, rcu); 127 128 if (timeline->hwsp_map) 129 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 130 131 i915_vma_put(timeline->hwsp_ggtt); 132 i915_active_fini(&timeline->active); 133 134 /* 135 * A small race exists between intel_gt_retire_requests_timeout and 136 * intel_timeline_exit which could result in the syncmap not getting 137 * free'd. Rather than work to hard to seal this race, simply cleanup 138 * the syncmap on fini. 139 */ 140 i915_syncmap_free(&timeline->sync); 141 142 kfree(timeline); 143 } 144 145 struct intel_timeline * 146 __intel_timeline_create(struct intel_gt *gt, 147 struct i915_vma *global_hwsp, 148 unsigned int offset) 149 { 150 struct intel_timeline *timeline; 151 int err; 152 153 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 154 if (!timeline) 155 return ERR_PTR(-ENOMEM); 156 157 err = intel_timeline_init(timeline, gt, global_hwsp, offset); 158 if (err) { 159 kfree(timeline); 160 return ERR_PTR(err); 161 } 162 163 return timeline; 164 } 165 166 struct intel_timeline * 167 intel_timeline_create_from_engine(struct intel_engine_cs *engine, 168 unsigned int offset) 169 { 170 struct i915_vma *hwsp = engine->status_page.vma; 171 struct intel_timeline *tl; 172 173 tl = __intel_timeline_create(engine->gt, hwsp, offset); 174 if (IS_ERR(tl)) 175 return tl; 176 177 /* Borrow a nearby lock; we only create these timelines during init */ 178 mutex_lock(&hwsp->vm->mutex); 179 list_add_tail(&tl->engine_link, &engine->status_page.timelines); 180 mutex_unlock(&hwsp->vm->mutex); 181 182 return tl; 183 } 184 185 void __intel_timeline_pin(struct intel_timeline *tl) 186 { 187 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 188 atomic_inc(&tl->pin_count); 189 } 190 191 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) 192 { 193 int err; 194 195 if (atomic_add_unless(&tl->pin_count, 1, 0)) 196 return 0; 197 198 if (!tl->hwsp_map) { 199 err = intel_timeline_pin_map(tl); 200 if (err) 201 return err; 202 } 203 204 err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); 205 if (err) 206 return err; 207 208 tl->hwsp_offset = 209 i915_ggtt_offset(tl->hwsp_ggtt) + 210 offset_in_page(tl->hwsp_offset); 211 GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", 212 tl->fence_context, tl->hwsp_offset); 213 214 i915_active_acquire(&tl->active); 215 if (atomic_fetch_inc(&tl->pin_count)) { 216 i915_active_release(&tl->active); 217 __i915_vma_unpin(tl->hwsp_ggtt); 218 } 219 220 return 0; 221 } 222 223 void intel_timeline_reset_seqno(const struct intel_timeline *tl) 224 { 225 u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno; 226 /* Must be pinned to be writable, and no requests in flight. */ 227 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 228 229 memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); 230 WRITE_ONCE(*hwsp_seqno, tl->seqno); 231 drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); 232 } 233 234 void intel_timeline_enter(struct intel_timeline *tl) 235 { 236 struct intel_gt_timelines *timelines = &tl->gt->timelines; 237 238 /* 239 * Pretend we are serialised by the timeline->mutex. 240 * 241 * While generally true, there are a few exceptions to the rule 242 * for the engine->kernel_context being used to manage power 243 * transitions. As the engine_park may be called from under any 244 * timeline, it uses the power mutex as a global serialisation 245 * lock to prevent any other request entering its timeline. 246 * 247 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 248 * 249 * However, intel_gt_retire_request() does not know which engine 250 * it is retiring along and so cannot partake in the engine-pm 251 * barrier, and there we use the tl->active_count as a means to 252 * pin the timeline in the active_list while the locks are dropped. 253 * Ergo, as that is outside of the engine-pm barrier, we need to 254 * use atomic to manipulate tl->active_count. 255 */ 256 lockdep_assert_held(&tl->mutex); 257 258 if (atomic_add_unless(&tl->active_count, 1, 0)) 259 return; 260 261 spin_lock(&timelines->lock); 262 if (!atomic_fetch_inc(&tl->active_count)) { 263 /* 264 * The HWSP is volatile, and may have been lost while inactive, 265 * e.g. across suspend/resume. Be paranoid, and ensure that 266 * the HWSP value matches our seqno so we don't proclaim 267 * the next request as already complete. 268 */ 269 intel_timeline_reset_seqno(tl); 270 list_add_tail(&tl->link, &timelines->active_list); 271 } 272 spin_unlock(&timelines->lock); 273 } 274 275 void intel_timeline_exit(struct intel_timeline *tl) 276 { 277 struct intel_gt_timelines *timelines = &tl->gt->timelines; 278 279 /* See intel_timeline_enter() */ 280 lockdep_assert_held(&tl->mutex); 281 282 GEM_BUG_ON(!atomic_read(&tl->active_count)); 283 if (atomic_add_unless(&tl->active_count, -1, 1)) 284 return; 285 286 spin_lock(&timelines->lock); 287 if (atomic_dec_and_test(&tl->active_count)) 288 list_del(&tl->link); 289 spin_unlock(&timelines->lock); 290 291 /* 292 * Since this timeline is idle, all bariers upon which we were waiting 293 * must also be complete and so we can discard the last used barriers 294 * without loss of information. 295 */ 296 i915_syncmap_free(&tl->sync); 297 } 298 299 static u32 timeline_advance(struct intel_timeline *tl) 300 { 301 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 302 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 303 304 return tl->seqno += 1 + tl->has_initial_breadcrumb; 305 } 306 307 static noinline int 308 __intel_timeline_get_seqno(struct intel_timeline *tl, 309 u32 *seqno) 310 { 311 u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES); 312 313 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ 314 if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5))) 315 next_ofs = offset_in_page(next_ofs + BIT(5)); 316 317 tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs; 318 tl->hwsp_seqno = tl->hwsp_map + next_ofs; 319 intel_timeline_reset_seqno(tl); 320 321 *seqno = timeline_advance(tl); 322 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 323 return 0; 324 } 325 326 int intel_timeline_get_seqno(struct intel_timeline *tl, 327 struct i915_request *rq, 328 u32 *seqno) 329 { 330 *seqno = timeline_advance(tl); 331 332 /* Replace the HWSP on wraparound for HW semaphores */ 333 if (unlikely(!*seqno && tl->has_initial_breadcrumb)) 334 return __intel_timeline_get_seqno(tl, seqno); 335 336 return 0; 337 } 338 339 int intel_timeline_read_hwsp(struct i915_request *from, 340 struct i915_request *to, 341 u32 *hwsp) 342 { 343 struct intel_timeline *tl; 344 int err; 345 346 rcu_read_lock(); 347 tl = rcu_dereference(from->timeline); 348 if (i915_request_signaled(from) || 349 !i915_active_acquire_if_busy(&tl->active)) 350 tl = NULL; 351 352 if (tl) { 353 /* hwsp_offset may wraparound, so use from->hwsp_seqno */ 354 *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) + 355 offset_in_page(from->hwsp_seqno); 356 } 357 358 /* ensure we wait on the right request, if not, we completed */ 359 if (tl && __i915_request_is_complete(from)) { 360 i915_active_release(&tl->active); 361 tl = NULL; 362 } 363 rcu_read_unlock(); 364 365 if (!tl) 366 return 1; 367 368 /* Can't do semaphore waits on kernel context */ 369 if (!tl->has_initial_breadcrumb) { 370 err = -EINVAL; 371 goto out; 372 } 373 374 err = i915_active_add_request(&tl->active, to); 375 376 out: 377 i915_active_release(&tl->active); 378 return err; 379 } 380 381 void intel_timeline_unpin(struct intel_timeline *tl) 382 { 383 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 384 if (!atomic_dec_and_test(&tl->pin_count)) 385 return; 386 387 i915_active_release(&tl->active); 388 __i915_vma_unpin(tl->hwsp_ggtt); 389 } 390 391 void __intel_timeline_free(struct kref *kref) 392 { 393 struct intel_timeline *timeline = 394 container_of(kref, typeof(*timeline), kref); 395 396 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 397 GEM_BUG_ON(!list_empty(&timeline->requests)); 398 GEM_BUG_ON(timeline->retire); 399 400 call_rcu(&timeline->rcu, intel_timeline_fini); 401 } 402 403 void intel_gt_fini_timelines(struct intel_gt *gt) 404 { 405 struct intel_gt_timelines *timelines = >->timelines; 406 407 GEM_BUG_ON(!list_empty(&timelines->active_list)); 408 } 409 410 void intel_gt_show_timelines(struct intel_gt *gt, 411 struct drm_printer *m, 412 void (*show_request)(struct drm_printer *m, 413 const struct i915_request *rq, 414 const char *prefix, 415 int indent)) 416 { 417 struct intel_gt_timelines *timelines = >->timelines; 418 struct intel_timeline *tl, *tn; 419 LIST_HEAD(free); 420 421 spin_lock(&timelines->lock); 422 list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { 423 unsigned long count, ready, inflight; 424 struct i915_request *rq, *rn; 425 struct dma_fence *fence; 426 427 if (!mutex_trylock(&tl->mutex)) { 428 drm_printf(m, "Timeline %llx: busy; skipping\n", 429 tl->fence_context); 430 continue; 431 } 432 433 intel_timeline_get(tl); 434 GEM_BUG_ON(!atomic_read(&tl->active_count)); 435 atomic_inc(&tl->active_count); /* pin the list element */ 436 spin_unlock(&timelines->lock); 437 438 count = 0; 439 ready = 0; 440 inflight = 0; 441 list_for_each_entry_safe(rq, rn, &tl->requests, link) { 442 if (i915_request_completed(rq)) 443 continue; 444 445 count++; 446 if (i915_request_is_ready(rq)) 447 ready++; 448 if (i915_request_is_active(rq)) 449 inflight++; 450 } 451 452 drm_printf(m, "Timeline %llx: { ", tl->fence_context); 453 drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", 454 count, ready, inflight); 455 drm_printf(m, ", seqno: { current: %d, last: %d }", 456 *tl->hwsp_seqno, tl->seqno); 457 fence = i915_active_fence_get(&tl->last_request); 458 if (fence) { 459 drm_printf(m, ", engine: %s", 460 to_request(fence)->engine->name); 461 dma_fence_put(fence); 462 } 463 drm_printf(m, " }\n"); 464 465 if (show_request) { 466 list_for_each_entry_safe(rq, rn, &tl->requests, link) 467 show_request(m, rq, "", 2); 468 } 469 470 mutex_unlock(&tl->mutex); 471 spin_lock(&timelines->lock); 472 473 /* Resume list iteration after reacquiring spinlock */ 474 list_safe_reset_next(tl, tn, link); 475 if (atomic_dec_and_test(&tl->active_count)) 476 list_del(&tl->link); 477 478 /* Defer the final release to after the spinlock */ 479 if (refcount_dec_and_test(&tl->kref.refcount)) { 480 GEM_BUG_ON(atomic_read(&tl->active_count)); 481 list_add(&tl->link, &free); 482 } 483 } 484 spin_unlock(&timelines->lock); 485 486 list_for_each_entry_safe(tl, tn, &free, link) 487 __intel_timeline_free(&tl->kref); 488 } 489 490 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 491 #include "gt/selftests/mock_timeline.c" 492 #include "gt/selftest_timeline.c" 493 #endif 494