1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2016-2018 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 9 #include "i915_active.h" 10 #include "i915_syncmap.h" 11 #include "intel_gt.h" 12 #include "intel_ring.h" 13 #include "intel_timeline.h" 14 15 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) 16 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) 17 18 struct intel_timeline_hwsp { 19 struct intel_gt *gt; 20 struct intel_gt_timelines *gt_timelines; 21 struct list_head free_link; 22 struct i915_vma *vma; 23 u64 free_bitmap; 24 }; 25 26 struct intel_timeline_cacheline { 27 struct i915_active active; 28 struct intel_timeline_hwsp *hwsp; 29 void *vaddr; 30 #define CACHELINE_BITS 6 31 #define CACHELINE_FREE CACHELINE_BITS 32 }; 33 34 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) 35 { 36 struct drm_i915_private *i915 = gt->i915; 37 struct drm_i915_gem_object *obj; 38 struct i915_vma *vma; 39 40 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 41 if (IS_ERR(obj)) 42 return ERR_CAST(obj); 43 44 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 45 46 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 47 if (IS_ERR(vma)) 48 i915_gem_object_put(obj); 49 50 return vma; 51 } 52 53 static struct i915_vma * 54 hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) 55 { 56 struct intel_gt_timelines *gt = &timeline->gt->timelines; 57 struct intel_timeline_hwsp *hwsp; 58 59 BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); 60 61 spin_lock_irq(>->hwsp_lock); 62 63 /* hwsp_free_list only contains HWSP that have available cachelines */ 64 hwsp = list_first_entry_or_null(>->hwsp_free_list, 65 typeof(*hwsp), free_link); 66 if (!hwsp) { 67 struct i915_vma *vma; 68 69 spin_unlock_irq(>->hwsp_lock); 70 71 hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); 72 if (!hwsp) 73 return ERR_PTR(-ENOMEM); 74 75 vma = __hwsp_alloc(timeline->gt); 76 if (IS_ERR(vma)) { 77 kfree(hwsp); 78 return vma; 79 } 80 81 vma->private = hwsp; 82 hwsp->gt = timeline->gt; 83 hwsp->vma = vma; 84 hwsp->free_bitmap = ~0ull; 85 hwsp->gt_timelines = gt; 86 87 spin_lock_irq(>->hwsp_lock); 88 list_add(&hwsp->free_link, >->hwsp_free_list); 89 } 90 91 GEM_BUG_ON(!hwsp->free_bitmap); 92 *cacheline = __ffs64(hwsp->free_bitmap); 93 hwsp->free_bitmap &= ~BIT_ULL(*cacheline); 94 if (!hwsp->free_bitmap) 95 list_del(&hwsp->free_link); 96 97 spin_unlock_irq(>->hwsp_lock); 98 99 GEM_BUG_ON(hwsp->vma->private != hwsp); 100 return hwsp->vma; 101 } 102 103 static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) 104 { 105 struct intel_gt_timelines *gt = hwsp->gt_timelines; 106 unsigned long flags; 107 108 spin_lock_irqsave(>->hwsp_lock, flags); 109 110 /* As a cacheline becomes available, publish the HWSP on the freelist */ 111 if (!hwsp->free_bitmap) 112 list_add_tail(&hwsp->free_link, >->hwsp_free_list); 113 114 GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); 115 hwsp->free_bitmap |= BIT_ULL(cacheline); 116 117 /* And if no one is left using it, give the page back to the system */ 118 if (hwsp->free_bitmap == ~0ull) { 119 i915_vma_put(hwsp->vma); 120 list_del(&hwsp->free_link); 121 kfree(hwsp); 122 } 123 124 spin_unlock_irqrestore(>->hwsp_lock, flags); 125 } 126 127 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) 128 { 129 GEM_BUG_ON(!i915_active_is_idle(&cl->active)); 130 131 i915_gem_object_unpin_map(cl->hwsp->vma->obj); 132 i915_vma_put(cl->hwsp->vma); 133 __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); 134 135 i915_active_fini(&cl->active); 136 kfree(cl); 137 } 138 139 __i915_active_call 140 static void __cacheline_retire(struct i915_active *active) 141 { 142 struct intel_timeline_cacheline *cl = 143 container_of(active, typeof(*cl), active); 144 145 i915_vma_unpin(cl->hwsp->vma); 146 if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) 147 __idle_cacheline_free(cl); 148 } 149 150 static int __cacheline_active(struct i915_active *active) 151 { 152 struct intel_timeline_cacheline *cl = 153 container_of(active, typeof(*cl), active); 154 155 __i915_vma_pin(cl->hwsp->vma); 156 return 0; 157 } 158 159 static struct intel_timeline_cacheline * 160 cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) 161 { 162 struct intel_timeline_cacheline *cl; 163 void *vaddr; 164 165 GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); 166 167 cl = kmalloc(sizeof(*cl), GFP_KERNEL); 168 if (!cl) 169 return ERR_PTR(-ENOMEM); 170 171 vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); 172 if (IS_ERR(vaddr)) { 173 kfree(cl); 174 return ERR_CAST(vaddr); 175 } 176 177 i915_vma_get(hwsp->vma); 178 cl->hwsp = hwsp; 179 cl->vaddr = page_pack_bits(vaddr, cacheline); 180 181 i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); 182 183 return cl; 184 } 185 186 static void cacheline_acquire(struct intel_timeline_cacheline *cl) 187 { 188 if (cl) 189 i915_active_acquire(&cl->active); 190 } 191 192 static void cacheline_release(struct intel_timeline_cacheline *cl) 193 { 194 if (cl) 195 i915_active_release(&cl->active); 196 } 197 198 static void cacheline_free(struct intel_timeline_cacheline *cl) 199 { 200 GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); 201 cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); 202 203 if (i915_active_is_idle(&cl->active)) 204 __idle_cacheline_free(cl); 205 } 206 207 int intel_timeline_init(struct intel_timeline *timeline, 208 struct intel_gt *gt, 209 struct i915_vma *hwsp) 210 { 211 void *vaddr; 212 213 kref_init(&timeline->kref); 214 atomic_set(&timeline->pin_count, 0); 215 216 timeline->gt = gt; 217 218 timeline->has_initial_breadcrumb = !hwsp; 219 timeline->hwsp_cacheline = NULL; 220 221 if (!hwsp) { 222 struct intel_timeline_cacheline *cl; 223 unsigned int cacheline; 224 225 hwsp = hwsp_alloc(timeline, &cacheline); 226 if (IS_ERR(hwsp)) 227 return PTR_ERR(hwsp); 228 229 cl = cacheline_alloc(hwsp->private, cacheline); 230 if (IS_ERR(cl)) { 231 __idle_hwsp_free(hwsp->private, cacheline); 232 return PTR_ERR(cl); 233 } 234 235 timeline->hwsp_cacheline = cl; 236 timeline->hwsp_offset = cacheline * CACHELINE_BYTES; 237 238 vaddr = page_mask_bits(cl->vaddr); 239 } else { 240 timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; 241 242 vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); 243 if (IS_ERR(vaddr)) 244 return PTR_ERR(vaddr); 245 } 246 247 timeline->hwsp_seqno = 248 memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); 249 250 timeline->hwsp_ggtt = i915_vma_get(hwsp); 251 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 252 253 timeline->fence_context = dma_fence_context_alloc(1); 254 255 mutex_init(&timeline->mutex); 256 257 INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); 258 INIT_LIST_HEAD(&timeline->requests); 259 260 i915_syncmap_init(&timeline->sync); 261 262 return 0; 263 } 264 265 static void timelines_init(struct intel_gt *gt) 266 { 267 struct intel_gt_timelines *timelines = >->timelines; 268 269 spin_lock_init(&timelines->lock); 270 INIT_LIST_HEAD(&timelines->active_list); 271 272 spin_lock_init(&timelines->hwsp_lock); 273 INIT_LIST_HEAD(&timelines->hwsp_free_list); 274 } 275 276 void intel_timelines_init(struct drm_i915_private *i915) 277 { 278 timelines_init(&i915->gt); 279 } 280 281 void intel_timeline_fini(struct intel_timeline *timeline) 282 { 283 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 284 GEM_BUG_ON(!list_empty(&timeline->requests)); 285 GEM_BUG_ON(timeline->retire); 286 287 if (timeline->hwsp_cacheline) 288 cacheline_free(timeline->hwsp_cacheline); 289 else 290 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 291 292 i915_vma_put(timeline->hwsp_ggtt); 293 } 294 295 struct intel_timeline * 296 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) 297 { 298 struct intel_timeline *timeline; 299 int err; 300 301 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 302 if (!timeline) 303 return ERR_PTR(-ENOMEM); 304 305 err = intel_timeline_init(timeline, gt, global_hwsp); 306 if (err) { 307 kfree(timeline); 308 return ERR_PTR(err); 309 } 310 311 return timeline; 312 } 313 314 int intel_timeline_pin(struct intel_timeline *tl) 315 { 316 int err; 317 318 if (atomic_add_unless(&tl->pin_count, 1, 0)) 319 return 0; 320 321 err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); 322 if (err) 323 return err; 324 325 tl->hwsp_offset = 326 i915_ggtt_offset(tl->hwsp_ggtt) + 327 offset_in_page(tl->hwsp_offset); 328 329 cacheline_acquire(tl->hwsp_cacheline); 330 if (atomic_fetch_inc(&tl->pin_count)) { 331 cacheline_release(tl->hwsp_cacheline); 332 __i915_vma_unpin(tl->hwsp_ggtt); 333 } 334 335 return 0; 336 } 337 338 void intel_timeline_enter(struct intel_timeline *tl) 339 { 340 struct intel_gt_timelines *timelines = &tl->gt->timelines; 341 unsigned long flags; 342 343 /* 344 * Pretend we are serialised by the timeline->mutex. 345 * 346 * While generally true, there are a few exceptions to the rule 347 * for the engine->kernel_context being used to manage power 348 * transitions. As the engine_park may be called from under any 349 * timeline, it uses the power mutex as a global serialisation 350 * lock to prevent any other request entering its timeline. 351 * 352 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 353 * 354 * However, intel_gt_retire_request() does not know which engine 355 * it is retiring along and so cannot partake in the engine-pm 356 * barrier, and there we use the tl->active_count as a means to 357 * pin the timeline in the active_list while the locks are dropped. 358 * Ergo, as that is outside of the engine-pm barrier, we need to 359 * use atomic to manipulate tl->active_count. 360 */ 361 lockdep_assert_held(&tl->mutex); 362 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 363 364 if (atomic_add_unless(&tl->active_count, 1, 0)) 365 return; 366 367 spin_lock_irqsave(&timelines->lock, flags); 368 if (!atomic_fetch_inc(&tl->active_count)) 369 list_add_tail(&tl->link, &timelines->active_list); 370 spin_unlock_irqrestore(&timelines->lock, flags); 371 } 372 373 void intel_timeline_exit(struct intel_timeline *tl) 374 { 375 struct intel_gt_timelines *timelines = &tl->gt->timelines; 376 unsigned long flags; 377 378 /* See intel_timeline_enter() */ 379 lockdep_assert_held(&tl->mutex); 380 381 GEM_BUG_ON(!atomic_read(&tl->active_count)); 382 if (atomic_add_unless(&tl->active_count, -1, 1)) 383 return; 384 385 spin_lock_irqsave(&timelines->lock, flags); 386 if (atomic_dec_and_test(&tl->active_count)) 387 list_del(&tl->link); 388 spin_unlock_irqrestore(&timelines->lock, flags); 389 390 /* 391 * Since this timeline is idle, all bariers upon which we were waiting 392 * must also be complete and so we can discard the last used barriers 393 * without loss of information. 394 */ 395 i915_syncmap_free(&tl->sync); 396 } 397 398 static u32 timeline_advance(struct intel_timeline *tl) 399 { 400 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 401 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 402 403 return tl->seqno += 1 + tl->has_initial_breadcrumb; 404 } 405 406 static void timeline_rollback(struct intel_timeline *tl) 407 { 408 tl->seqno -= 1 + tl->has_initial_breadcrumb; 409 } 410 411 static noinline int 412 __intel_timeline_get_seqno(struct intel_timeline *tl, 413 struct i915_request *rq, 414 u32 *seqno) 415 { 416 struct intel_timeline_cacheline *cl; 417 unsigned int cacheline; 418 struct i915_vma *vma; 419 void *vaddr; 420 int err; 421 422 /* 423 * If there is an outstanding GPU reference to this cacheline, 424 * such as it being sampled by a HW semaphore on another timeline, 425 * we cannot wraparound our seqno value (the HW semaphore does 426 * a strict greater-than-or-equals compare, not i915_seqno_passed). 427 * So if the cacheline is still busy, we must detach ourselves 428 * from it and leave it inflight alongside its users. 429 * 430 * However, if nobody is watching and we can guarantee that nobody 431 * will, we could simply reuse the same cacheline. 432 * 433 * if (i915_active_request_is_signaled(&tl->last_request) && 434 * i915_active_is_signaled(&tl->hwsp_cacheline->active)) 435 * return 0; 436 * 437 * That seems unlikely for a busy timeline that needed to wrap in 438 * the first place, so just replace the cacheline. 439 */ 440 441 vma = hwsp_alloc(tl, &cacheline); 442 if (IS_ERR(vma)) { 443 err = PTR_ERR(vma); 444 goto err_rollback; 445 } 446 447 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 448 if (err) { 449 __idle_hwsp_free(vma->private, cacheline); 450 goto err_rollback; 451 } 452 453 cl = cacheline_alloc(vma->private, cacheline); 454 if (IS_ERR(cl)) { 455 err = PTR_ERR(cl); 456 __idle_hwsp_free(vma->private, cacheline); 457 goto err_unpin; 458 } 459 GEM_BUG_ON(cl->hwsp->vma != vma); 460 461 /* 462 * Attach the old cacheline to the current request, so that we only 463 * free it after the current request is retired, which ensures that 464 * all writes into the cacheline from previous requests are complete. 465 */ 466 err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); 467 if (err) 468 goto err_cacheline; 469 470 cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ 471 cacheline_free(tl->hwsp_cacheline); 472 473 i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ 474 i915_vma_put(tl->hwsp_ggtt); 475 476 tl->hwsp_ggtt = i915_vma_get(vma); 477 478 vaddr = page_mask_bits(cl->vaddr); 479 tl->hwsp_offset = cacheline * CACHELINE_BYTES; 480 tl->hwsp_seqno = 481 memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); 482 483 tl->hwsp_offset += i915_ggtt_offset(vma); 484 485 cacheline_acquire(cl); 486 tl->hwsp_cacheline = cl; 487 488 *seqno = timeline_advance(tl); 489 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 490 return 0; 491 492 err_cacheline: 493 cacheline_free(cl); 494 err_unpin: 495 i915_vma_unpin(vma); 496 err_rollback: 497 timeline_rollback(tl); 498 return err; 499 } 500 501 int intel_timeline_get_seqno(struct intel_timeline *tl, 502 struct i915_request *rq, 503 u32 *seqno) 504 { 505 *seqno = timeline_advance(tl); 506 507 /* Replace the HWSP on wraparound for HW semaphores */ 508 if (unlikely(!*seqno && tl->hwsp_cacheline)) 509 return __intel_timeline_get_seqno(tl, rq, seqno); 510 511 return 0; 512 } 513 514 static int cacheline_ref(struct intel_timeline_cacheline *cl, 515 struct i915_request *rq) 516 { 517 return i915_active_add_request(&cl->active, rq); 518 } 519 520 int intel_timeline_read_hwsp(struct i915_request *from, 521 struct i915_request *to, 522 u32 *hwsp) 523 { 524 struct intel_timeline *tl; 525 int err; 526 527 rcu_read_lock(); 528 tl = rcu_dereference(from->timeline); 529 if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) 530 tl = NULL; 531 rcu_read_unlock(); 532 if (!tl) /* already completed */ 533 return 1; 534 535 GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); 536 537 err = -EBUSY; 538 if (mutex_trylock(&tl->mutex)) { 539 struct intel_timeline_cacheline *cl = from->hwsp_cacheline; 540 541 if (i915_request_completed(from)) { 542 err = 1; 543 goto unlock; 544 } 545 546 err = cacheline_ref(cl, to); 547 if (err) 548 goto unlock; 549 550 if (likely(cl == tl->hwsp_cacheline)) { 551 *hwsp = tl->hwsp_offset; 552 } else { /* across a seqno wrap, recover the original offset */ 553 *hwsp = i915_ggtt_offset(cl->hwsp->vma) + 554 ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * 555 CACHELINE_BYTES; 556 } 557 558 unlock: 559 mutex_unlock(&tl->mutex); 560 } 561 intel_timeline_put(tl); 562 563 return err; 564 } 565 566 void intel_timeline_unpin(struct intel_timeline *tl) 567 { 568 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 569 if (!atomic_dec_and_test(&tl->pin_count)) 570 return; 571 572 cacheline_release(tl->hwsp_cacheline); 573 574 __i915_vma_unpin(tl->hwsp_ggtt); 575 } 576 577 void __intel_timeline_free(struct kref *kref) 578 { 579 struct intel_timeline *timeline = 580 container_of(kref, typeof(*timeline), kref); 581 582 intel_timeline_fini(timeline); 583 kfree_rcu(timeline, rcu); 584 } 585 586 static void timelines_fini(struct intel_gt *gt) 587 { 588 struct intel_gt_timelines *timelines = >->timelines; 589 590 GEM_BUG_ON(!list_empty(&timelines->active_list)); 591 GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); 592 } 593 594 void intel_timelines_fini(struct drm_i915_private *i915) 595 { 596 timelines_fini(&i915->gt); 597 } 598 599 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 600 #include "gt/selftests/mock_timeline.c" 601 #include "gt/selftest_timeline.c" 602 #endif 603