1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2016-2018 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 9 #include "i915_active.h" 10 #include "i915_syncmap.h" 11 #include "intel_gt.h" 12 #include "intel_ring.h" 13 #include "intel_timeline.h" 14 15 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) 16 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) 17 18 #define CACHELINE_BITS 6 19 #define CACHELINE_FREE CACHELINE_BITS 20 21 struct intel_timeline_hwsp { 22 struct intel_gt *gt; 23 struct intel_gt_timelines *gt_timelines; 24 struct list_head free_link; 25 struct i915_vma *vma; 26 u64 free_bitmap; 27 }; 28 29 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) 30 { 31 struct drm_i915_private *i915 = gt->i915; 32 struct drm_i915_gem_object *obj; 33 struct i915_vma *vma; 34 35 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 36 if (IS_ERR(obj)) 37 return ERR_CAST(obj); 38 39 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 40 41 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 42 if (IS_ERR(vma)) 43 i915_gem_object_put(obj); 44 45 return vma; 46 } 47 48 static struct i915_vma * 49 hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) 50 { 51 struct intel_gt_timelines *gt = &timeline->gt->timelines; 52 struct intel_timeline_hwsp *hwsp; 53 54 BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); 55 56 spin_lock_irq(>->hwsp_lock); 57 58 /* hwsp_free_list only contains HWSP that have available cachelines */ 59 hwsp = list_first_entry_or_null(>->hwsp_free_list, 60 typeof(*hwsp), free_link); 61 if (!hwsp) { 62 struct i915_vma *vma; 63 64 spin_unlock_irq(>->hwsp_lock); 65 66 hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); 67 if (!hwsp) 68 return ERR_PTR(-ENOMEM); 69 70 vma = __hwsp_alloc(timeline->gt); 71 if (IS_ERR(vma)) { 72 kfree(hwsp); 73 return vma; 74 } 75 76 vma->private = hwsp; 77 hwsp->gt = timeline->gt; 78 hwsp->vma = vma; 79 hwsp->free_bitmap = ~0ull; 80 hwsp->gt_timelines = gt; 81 82 spin_lock_irq(>->hwsp_lock); 83 list_add(&hwsp->free_link, >->hwsp_free_list); 84 } 85 86 GEM_BUG_ON(!hwsp->free_bitmap); 87 *cacheline = __ffs64(hwsp->free_bitmap); 88 hwsp->free_bitmap &= ~BIT_ULL(*cacheline); 89 if (!hwsp->free_bitmap) 90 list_del(&hwsp->free_link); 91 92 spin_unlock_irq(>->hwsp_lock); 93 94 GEM_BUG_ON(hwsp->vma->private != hwsp); 95 return hwsp->vma; 96 } 97 98 static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) 99 { 100 struct intel_gt_timelines *gt = hwsp->gt_timelines; 101 unsigned long flags; 102 103 spin_lock_irqsave(>->hwsp_lock, flags); 104 105 /* As a cacheline becomes available, publish the HWSP on the freelist */ 106 if (!hwsp->free_bitmap) 107 list_add_tail(&hwsp->free_link, >->hwsp_free_list); 108 109 GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); 110 hwsp->free_bitmap |= BIT_ULL(cacheline); 111 112 /* And if no one is left using it, give the page back to the system */ 113 if (hwsp->free_bitmap == ~0ull) { 114 i915_vma_put(hwsp->vma); 115 list_del(&hwsp->free_link); 116 kfree(hwsp); 117 } 118 119 spin_unlock_irqrestore(>->hwsp_lock, flags); 120 } 121 122 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) 123 { 124 GEM_BUG_ON(!i915_active_is_idle(&cl->active)); 125 126 i915_gem_object_unpin_map(cl->hwsp->vma->obj); 127 i915_vma_put(cl->hwsp->vma); 128 __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); 129 130 i915_active_fini(&cl->active); 131 kfree_rcu(cl, rcu); 132 } 133 134 __i915_active_call 135 static void __cacheline_retire(struct i915_active *active) 136 { 137 struct intel_timeline_cacheline *cl = 138 container_of(active, typeof(*cl), active); 139 140 i915_vma_unpin(cl->hwsp->vma); 141 if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) 142 __idle_cacheline_free(cl); 143 } 144 145 static int __cacheline_active(struct i915_active *active) 146 { 147 struct intel_timeline_cacheline *cl = 148 container_of(active, typeof(*cl), active); 149 150 __i915_vma_pin(cl->hwsp->vma); 151 return 0; 152 } 153 154 static struct intel_timeline_cacheline * 155 cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) 156 { 157 struct intel_timeline_cacheline *cl; 158 void *vaddr; 159 160 GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); 161 162 cl = kmalloc(sizeof(*cl), GFP_KERNEL); 163 if (!cl) 164 return ERR_PTR(-ENOMEM); 165 166 vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); 167 if (IS_ERR(vaddr)) { 168 kfree(cl); 169 return ERR_CAST(vaddr); 170 } 171 172 i915_vma_get(hwsp->vma); 173 cl->hwsp = hwsp; 174 cl->vaddr = page_pack_bits(vaddr, cacheline); 175 176 i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); 177 178 return cl; 179 } 180 181 static void cacheline_acquire(struct intel_timeline_cacheline *cl) 182 { 183 if (cl) 184 i915_active_acquire(&cl->active); 185 } 186 187 static void cacheline_release(struct intel_timeline_cacheline *cl) 188 { 189 if (cl) 190 i915_active_release(&cl->active); 191 } 192 193 static void cacheline_free(struct intel_timeline_cacheline *cl) 194 { 195 GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); 196 cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); 197 198 if (i915_active_is_idle(&cl->active)) 199 __idle_cacheline_free(cl); 200 } 201 202 int intel_timeline_init(struct intel_timeline *timeline, 203 struct intel_gt *gt, 204 struct i915_vma *hwsp) 205 { 206 void *vaddr; 207 208 kref_init(&timeline->kref); 209 atomic_set(&timeline->pin_count, 0); 210 211 timeline->gt = gt; 212 213 timeline->has_initial_breadcrumb = !hwsp; 214 timeline->hwsp_cacheline = NULL; 215 216 if (!hwsp) { 217 struct intel_timeline_cacheline *cl; 218 unsigned int cacheline; 219 220 hwsp = hwsp_alloc(timeline, &cacheline); 221 if (IS_ERR(hwsp)) 222 return PTR_ERR(hwsp); 223 224 cl = cacheline_alloc(hwsp->private, cacheline); 225 if (IS_ERR(cl)) { 226 __idle_hwsp_free(hwsp->private, cacheline); 227 return PTR_ERR(cl); 228 } 229 230 timeline->hwsp_cacheline = cl; 231 timeline->hwsp_offset = cacheline * CACHELINE_BYTES; 232 233 vaddr = page_mask_bits(cl->vaddr); 234 } else { 235 timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; 236 237 vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); 238 if (IS_ERR(vaddr)) 239 return PTR_ERR(vaddr); 240 } 241 242 timeline->hwsp_seqno = 243 memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); 244 245 timeline->hwsp_ggtt = i915_vma_get(hwsp); 246 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 247 248 timeline->fence_context = dma_fence_context_alloc(1); 249 250 mutex_init(&timeline->mutex); 251 252 INIT_ACTIVE_FENCE(&timeline->last_request); 253 INIT_LIST_HEAD(&timeline->requests); 254 255 i915_syncmap_init(&timeline->sync); 256 257 return 0; 258 } 259 260 void intel_gt_init_timelines(struct intel_gt *gt) 261 { 262 struct intel_gt_timelines *timelines = >->timelines; 263 264 spin_lock_init(&timelines->lock); 265 INIT_LIST_HEAD(&timelines->active_list); 266 267 spin_lock_init(&timelines->hwsp_lock); 268 INIT_LIST_HEAD(&timelines->hwsp_free_list); 269 } 270 271 void intel_timeline_fini(struct intel_timeline *timeline) 272 { 273 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 274 GEM_BUG_ON(!list_empty(&timeline->requests)); 275 GEM_BUG_ON(timeline->retire); 276 277 if (timeline->hwsp_cacheline) 278 cacheline_free(timeline->hwsp_cacheline); 279 else 280 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 281 282 i915_vma_put(timeline->hwsp_ggtt); 283 } 284 285 struct intel_timeline * 286 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) 287 { 288 struct intel_timeline *timeline; 289 int err; 290 291 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 292 if (!timeline) 293 return ERR_PTR(-ENOMEM); 294 295 err = intel_timeline_init(timeline, gt, global_hwsp); 296 if (err) { 297 kfree(timeline); 298 return ERR_PTR(err); 299 } 300 301 return timeline; 302 } 303 304 int intel_timeline_pin(struct intel_timeline *tl) 305 { 306 int err; 307 308 if (atomic_add_unless(&tl->pin_count, 1, 0)) 309 return 0; 310 311 err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); 312 if (err) 313 return err; 314 315 tl->hwsp_offset = 316 i915_ggtt_offset(tl->hwsp_ggtt) + 317 offset_in_page(tl->hwsp_offset); 318 319 cacheline_acquire(tl->hwsp_cacheline); 320 if (atomic_fetch_inc(&tl->pin_count)) { 321 cacheline_release(tl->hwsp_cacheline); 322 __i915_vma_unpin(tl->hwsp_ggtt); 323 } 324 325 return 0; 326 } 327 328 void intel_timeline_enter(struct intel_timeline *tl) 329 { 330 struct intel_gt_timelines *timelines = &tl->gt->timelines; 331 332 /* 333 * Pretend we are serialised by the timeline->mutex. 334 * 335 * While generally true, there are a few exceptions to the rule 336 * for the engine->kernel_context being used to manage power 337 * transitions. As the engine_park may be called from under any 338 * timeline, it uses the power mutex as a global serialisation 339 * lock to prevent any other request entering its timeline. 340 * 341 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 342 * 343 * However, intel_gt_retire_request() does not know which engine 344 * it is retiring along and so cannot partake in the engine-pm 345 * barrier, and there we use the tl->active_count as a means to 346 * pin the timeline in the active_list while the locks are dropped. 347 * Ergo, as that is outside of the engine-pm barrier, we need to 348 * use atomic to manipulate tl->active_count. 349 */ 350 lockdep_assert_held(&tl->mutex); 351 352 if (atomic_add_unless(&tl->active_count, 1, 0)) 353 return; 354 355 spin_lock(&timelines->lock); 356 if (!atomic_fetch_inc(&tl->active_count)) 357 list_add_tail(&tl->link, &timelines->active_list); 358 spin_unlock(&timelines->lock); 359 } 360 361 void intel_timeline_exit(struct intel_timeline *tl) 362 { 363 struct intel_gt_timelines *timelines = &tl->gt->timelines; 364 365 /* See intel_timeline_enter() */ 366 lockdep_assert_held(&tl->mutex); 367 368 GEM_BUG_ON(!atomic_read(&tl->active_count)); 369 if (atomic_add_unless(&tl->active_count, -1, 1)) 370 return; 371 372 spin_lock(&timelines->lock); 373 if (atomic_dec_and_test(&tl->active_count)) 374 list_del(&tl->link); 375 spin_unlock(&timelines->lock); 376 377 /* 378 * Since this timeline is idle, all bariers upon which we were waiting 379 * must also be complete and so we can discard the last used barriers 380 * without loss of information. 381 */ 382 i915_syncmap_free(&tl->sync); 383 } 384 385 static u32 timeline_advance(struct intel_timeline *tl) 386 { 387 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 388 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 389 390 return tl->seqno += 1 + tl->has_initial_breadcrumb; 391 } 392 393 static void timeline_rollback(struct intel_timeline *tl) 394 { 395 tl->seqno -= 1 + tl->has_initial_breadcrumb; 396 } 397 398 static noinline int 399 __intel_timeline_get_seqno(struct intel_timeline *tl, 400 struct i915_request *rq, 401 u32 *seqno) 402 { 403 struct intel_timeline_cacheline *cl; 404 unsigned int cacheline; 405 struct i915_vma *vma; 406 void *vaddr; 407 int err; 408 409 /* 410 * If there is an outstanding GPU reference to this cacheline, 411 * such as it being sampled by a HW semaphore on another timeline, 412 * we cannot wraparound our seqno value (the HW semaphore does 413 * a strict greater-than-or-equals compare, not i915_seqno_passed). 414 * So if the cacheline is still busy, we must detach ourselves 415 * from it and leave it inflight alongside its users. 416 * 417 * However, if nobody is watching and we can guarantee that nobody 418 * will, we could simply reuse the same cacheline. 419 * 420 * if (i915_active_request_is_signaled(&tl->last_request) && 421 * i915_active_is_signaled(&tl->hwsp_cacheline->active)) 422 * return 0; 423 * 424 * That seems unlikely for a busy timeline that needed to wrap in 425 * the first place, so just replace the cacheline. 426 */ 427 428 vma = hwsp_alloc(tl, &cacheline); 429 if (IS_ERR(vma)) { 430 err = PTR_ERR(vma); 431 goto err_rollback; 432 } 433 434 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 435 if (err) { 436 __idle_hwsp_free(vma->private, cacheline); 437 goto err_rollback; 438 } 439 440 cl = cacheline_alloc(vma->private, cacheline); 441 if (IS_ERR(cl)) { 442 err = PTR_ERR(cl); 443 __idle_hwsp_free(vma->private, cacheline); 444 goto err_unpin; 445 } 446 GEM_BUG_ON(cl->hwsp->vma != vma); 447 448 /* 449 * Attach the old cacheline to the current request, so that we only 450 * free it after the current request is retired, which ensures that 451 * all writes into the cacheline from previous requests are complete. 452 */ 453 err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); 454 if (err) 455 goto err_cacheline; 456 457 cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ 458 cacheline_free(tl->hwsp_cacheline); 459 460 i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ 461 i915_vma_put(tl->hwsp_ggtt); 462 463 tl->hwsp_ggtt = i915_vma_get(vma); 464 465 vaddr = page_mask_bits(cl->vaddr); 466 tl->hwsp_offset = cacheline * CACHELINE_BYTES; 467 tl->hwsp_seqno = 468 memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); 469 470 tl->hwsp_offset += i915_ggtt_offset(vma); 471 472 cacheline_acquire(cl); 473 tl->hwsp_cacheline = cl; 474 475 *seqno = timeline_advance(tl); 476 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 477 return 0; 478 479 err_cacheline: 480 cacheline_free(cl); 481 err_unpin: 482 i915_vma_unpin(vma); 483 err_rollback: 484 timeline_rollback(tl); 485 return err; 486 } 487 488 int intel_timeline_get_seqno(struct intel_timeline *tl, 489 struct i915_request *rq, 490 u32 *seqno) 491 { 492 *seqno = timeline_advance(tl); 493 494 /* Replace the HWSP on wraparound for HW semaphores */ 495 if (unlikely(!*seqno && tl->hwsp_cacheline)) 496 return __intel_timeline_get_seqno(tl, rq, seqno); 497 498 return 0; 499 } 500 501 static int cacheline_ref(struct intel_timeline_cacheline *cl, 502 struct i915_request *rq) 503 { 504 return i915_active_add_request(&cl->active, rq); 505 } 506 507 int intel_timeline_read_hwsp(struct i915_request *from, 508 struct i915_request *to, 509 u32 *hwsp) 510 { 511 struct intel_timeline_cacheline *cl; 512 int err; 513 514 GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); 515 516 rcu_read_lock(); 517 cl = rcu_dereference(from->hwsp_cacheline); 518 if (unlikely(!i915_active_acquire_if_busy(&cl->active))) 519 goto unlock; /* seqno wrapped and completed! */ 520 if (unlikely(i915_request_completed(from))) 521 goto release; 522 rcu_read_unlock(); 523 524 err = cacheline_ref(cl, to); 525 if (err) 526 goto out; 527 528 *hwsp = i915_ggtt_offset(cl->hwsp->vma) + 529 ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; 530 531 out: 532 i915_active_release(&cl->active); 533 return err; 534 535 release: 536 i915_active_release(&cl->active); 537 unlock: 538 rcu_read_unlock(); 539 return 1; 540 } 541 542 void intel_timeline_unpin(struct intel_timeline *tl) 543 { 544 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 545 if (!atomic_dec_and_test(&tl->pin_count)) 546 return; 547 548 cacheline_release(tl->hwsp_cacheline); 549 550 __i915_vma_unpin(tl->hwsp_ggtt); 551 } 552 553 void __intel_timeline_free(struct kref *kref) 554 { 555 struct intel_timeline *timeline = 556 container_of(kref, typeof(*timeline), kref); 557 558 intel_timeline_fini(timeline); 559 kfree_rcu(timeline, rcu); 560 } 561 562 void intel_gt_fini_timelines(struct intel_gt *gt) 563 { 564 struct intel_gt_timelines *timelines = >->timelines; 565 566 GEM_BUG_ON(!list_empty(&timelines->active_list)); 567 GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); 568 } 569 570 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 571 #include "gt/selftests/mock_timeline.c" 572 #include "gt/selftest_timeline.c" 573 #endif 574