1 /* 2 * Copyright 2009 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Jerome Glisse <glisse@freedesktop.org> 29 * Dave Airlie 30 */ 31 #include <linux/seq_file.h> 32 #include <linux/atomic.h> 33 #include <linux/wait.h> 34 #include <linux/kref.h> 35 #include <linux/slab.h> 36 #include <linux/firmware.h> 37 #include <drm/drmP.h> 38 #include "amdgpu.h" 39 #include "amdgpu_trace.h" 40 41 /* 42 * Fences 43 * Fences mark an event in the GPUs pipeline and are used 44 * for GPU/CPU synchronization. When the fence is written, 45 * it is expected that all buffers associated with that fence 46 * are no longer in use by the associated ring on the GPU and 47 * that the the relevant GPU caches have been flushed. 48 */ 49 50 /** 51 * amdgpu_fence_write - write a fence value 52 * 53 * @ring: ring the fence is associated with 54 * @seq: sequence number to write 55 * 56 * Writes a fence value to memory (all asics). 57 */ 58 static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq) 59 { 60 struct amdgpu_fence_driver *drv = &ring->fence_drv; 61 62 if (drv->cpu_addr) 63 *drv->cpu_addr = cpu_to_le32(seq); 64 } 65 66 /** 67 * amdgpu_fence_read - read a fence value 68 * 69 * @ring: ring the fence is associated with 70 * 71 * Reads a fence value from memory (all asics). 72 * Returns the value of the fence read from memory. 73 */ 74 static u32 amdgpu_fence_read(struct amdgpu_ring *ring) 75 { 76 struct amdgpu_fence_driver *drv = &ring->fence_drv; 77 u32 seq = 0; 78 79 if (drv->cpu_addr) 80 seq = le32_to_cpu(*drv->cpu_addr); 81 else 82 seq = lower_32_bits(atomic64_read(&drv->last_seq)); 83 84 return seq; 85 } 86 87 /** 88 * amdgpu_fence_schedule_check - schedule lockup check 89 * 90 * @ring: pointer to struct amdgpu_ring 91 * 92 * Queues a delayed work item to check for lockups. 93 */ 94 static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring) 95 { 96 /* 97 * Do not reset the timer here with mod_delayed_work, 98 * this can livelock in an interaction with TTM delayed destroy. 99 */ 100 queue_delayed_work(system_power_efficient_wq, 101 &ring->fence_drv.lockup_work, 102 AMDGPU_FENCE_JIFFIES_TIMEOUT); 103 } 104 105 /** 106 * amdgpu_fence_emit - emit a fence on the requested ring 107 * 108 * @ring: ring the fence is associated with 109 * @owner: creator of the fence 110 * @fence: amdgpu fence object 111 * 112 * Emits a fence command on the requested ring (all asics). 113 * Returns 0 on success, -ENOMEM on failure. 114 */ 115 int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, 116 struct amdgpu_fence **fence) 117 { 118 struct amdgpu_device *adev = ring->adev; 119 120 /* we are protected by the ring emission mutex */ 121 *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 122 if ((*fence) == NULL) { 123 return -ENOMEM; 124 } 125 (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx]; 126 (*fence)->ring = ring; 127 (*fence)->owner = owner; 128 fence_init(&(*fence)->base, &amdgpu_fence_ops, 129 &adev->fence_queue.lock, adev->fence_context + ring->idx, 130 (*fence)->seq); 131 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 132 (*fence)->seq, 133 AMDGPU_FENCE_FLAG_INT); 134 trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq); 135 return 0; 136 } 137 138 /** 139 * amdgpu_fence_recreate - recreate a fence from an user fence 140 * 141 * @ring: ring the fence is associated with 142 * @owner: creator of the fence 143 * @seq: user fence sequence number 144 * @fence: resulting amdgpu fence object 145 * 146 * Recreates a fence command from the user fence sequence number (all asics). 147 * Returns 0 on success, -ENOMEM on failure. 148 */ 149 int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, 150 uint64_t seq, struct amdgpu_fence **fence) 151 { 152 struct amdgpu_device *adev = ring->adev; 153 154 if (seq > ring->fence_drv.sync_seq[ring->idx]) 155 return -EINVAL; 156 157 *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 158 if ((*fence) == NULL) 159 return -ENOMEM; 160 161 (*fence)->seq = seq; 162 (*fence)->ring = ring; 163 (*fence)->owner = owner; 164 fence_init(&(*fence)->base, &amdgpu_fence_ops, 165 &adev->fence_queue.lock, adev->fence_context + ring->idx, 166 (*fence)->seq); 167 return 0; 168 } 169 170 /** 171 * amdgpu_fence_check_signaled - callback from fence_queue 172 * 173 * this function is called with fence_queue lock held, which is also used 174 * for the fence locking itself, so unlocked variants are used for 175 * fence_signal, and remove_wait_queue. 176 */ 177 static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) 178 { 179 struct amdgpu_fence *fence; 180 struct amdgpu_device *adev; 181 u64 seq; 182 int ret; 183 184 fence = container_of(wait, struct amdgpu_fence, fence_wake); 185 adev = fence->ring->adev; 186 187 /* 188 * We cannot use amdgpu_fence_process here because we're already 189 * in the waitqueue, in a call from wake_up_all. 190 */ 191 seq = atomic64_read(&fence->ring->fence_drv.last_seq); 192 if (seq >= fence->seq) { 193 ret = fence_signal_locked(&fence->base); 194 if (!ret) 195 FENCE_TRACE(&fence->base, "signaled from irq context\n"); 196 else 197 FENCE_TRACE(&fence->base, "was already signaled\n"); 198 199 amdgpu_irq_put(adev, fence->ring->fence_drv.irq_src, 200 fence->ring->fence_drv.irq_type); 201 __remove_wait_queue(&adev->fence_queue, &fence->fence_wake); 202 fence_put(&fence->base); 203 } else 204 FENCE_TRACE(&fence->base, "pending\n"); 205 return 0; 206 } 207 208 /** 209 * amdgpu_fence_activity - check for fence activity 210 * 211 * @ring: pointer to struct amdgpu_ring 212 * 213 * Checks the current fence value and calculates the last 214 * signalled fence value. Returns true if activity occured 215 * on the ring, and the fence_queue should be waken up. 216 */ 217 static bool amdgpu_fence_activity(struct amdgpu_ring *ring) 218 { 219 uint64_t seq, last_seq, last_emitted; 220 unsigned count_loop = 0; 221 bool wake = false; 222 223 /* Note there is a scenario here for an infinite loop but it's 224 * very unlikely to happen. For it to happen, the current polling 225 * process need to be interrupted by another process and another 226 * process needs to update the last_seq btw the atomic read and 227 * xchg of the current process. 228 * 229 * More over for this to go in infinite loop there need to be 230 * continuously new fence signaled ie amdgpu_fence_read needs 231 * to return a different value each time for both the currently 232 * polling process and the other process that xchg the last_seq 233 * btw atomic read and xchg of the current process. And the 234 * value the other process set as last seq must be higher than 235 * the seq value we just read. Which means that current process 236 * need to be interrupted after amdgpu_fence_read and before 237 * atomic xchg. 238 * 239 * To be even more safe we count the number of time we loop and 240 * we bail after 10 loop just accepting the fact that we might 241 * have temporarly set the last_seq not to the true real last 242 * seq but to an older one. 243 */ 244 last_seq = atomic64_read(&ring->fence_drv.last_seq); 245 do { 246 last_emitted = ring->fence_drv.sync_seq[ring->idx]; 247 seq = amdgpu_fence_read(ring); 248 seq |= last_seq & 0xffffffff00000000LL; 249 if (seq < last_seq) { 250 seq &= 0xffffffff; 251 seq |= last_emitted & 0xffffffff00000000LL; 252 } 253 254 if (seq <= last_seq || seq > last_emitted) { 255 break; 256 } 257 /* If we loop over we don't want to return without 258 * checking if a fence is signaled as it means that the 259 * seq we just read is different from the previous on. 260 */ 261 wake = true; 262 last_seq = seq; 263 if ((count_loop++) > 10) { 264 /* We looped over too many time leave with the 265 * fact that we might have set an older fence 266 * seq then the current real last seq as signaled 267 * by the hw. 268 */ 269 break; 270 } 271 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); 272 273 if (seq < last_emitted) 274 amdgpu_fence_schedule_check(ring); 275 276 return wake; 277 } 278 279 /** 280 * amdgpu_fence_check_lockup - check for hardware lockup 281 * 282 * @work: delayed work item 283 * 284 * Checks for fence activity and if there is none probe 285 * the hardware if a lockup occured. 286 */ 287 static void amdgpu_fence_check_lockup(struct work_struct *work) 288 { 289 struct amdgpu_fence_driver *fence_drv; 290 struct amdgpu_ring *ring; 291 292 fence_drv = container_of(work, struct amdgpu_fence_driver, 293 lockup_work.work); 294 ring = fence_drv->ring; 295 296 if (!down_read_trylock(&ring->adev->exclusive_lock)) { 297 /* just reschedule the check if a reset is going on */ 298 amdgpu_fence_schedule_check(ring); 299 return; 300 } 301 302 if (fence_drv->delayed_irq && ring->adev->ddev->irq_enabled) { 303 fence_drv->delayed_irq = false; 304 amdgpu_irq_update(ring->adev, fence_drv->irq_src, 305 fence_drv->irq_type); 306 } 307 308 if (amdgpu_fence_activity(ring)) 309 wake_up_all(&ring->adev->fence_queue); 310 else if (amdgpu_ring_is_lockup(ring)) { 311 /* good news we believe it's a lockup */ 312 dev_warn(ring->adev->dev, "GPU lockup (current fence id " 313 "0x%016llx last fence id 0x%016llx on ring %d)\n", 314 (uint64_t)atomic64_read(&fence_drv->last_seq), 315 fence_drv->sync_seq[ring->idx], ring->idx); 316 317 /* remember that we need an reset */ 318 ring->adev->needs_reset = true; 319 wake_up_all(&ring->adev->fence_queue); 320 } 321 up_read(&ring->adev->exclusive_lock); 322 } 323 324 /** 325 * amdgpu_fence_process - process a fence 326 * 327 * @adev: amdgpu_device pointer 328 * @ring: ring index the fence is associated with 329 * 330 * Checks the current fence value and wakes the fence queue 331 * if the sequence number has increased (all asics). 332 */ 333 void amdgpu_fence_process(struct amdgpu_ring *ring) 334 { 335 uint64_t seq, last_seq, last_emitted; 336 unsigned count_loop = 0; 337 bool wake = false; 338 339 /* Note there is a scenario here for an infinite loop but it's 340 * very unlikely to happen. For it to happen, the current polling 341 * process need to be interrupted by another process and another 342 * process needs to update the last_seq btw the atomic read and 343 * xchg of the current process. 344 * 345 * More over for this to go in infinite loop there need to be 346 * continuously new fence signaled ie amdgpu_fence_read needs 347 * to return a different value each time for both the currently 348 * polling process and the other process that xchg the last_seq 349 * btw atomic read and xchg of the current process. And the 350 * value the other process set as last seq must be higher than 351 * the seq value we just read. Which means that current process 352 * need to be interrupted after amdgpu_fence_read and before 353 * atomic xchg. 354 * 355 * To be even more safe we count the number of time we loop and 356 * we bail after 10 loop just accepting the fact that we might 357 * have temporarly set the last_seq not to the true real last 358 * seq but to an older one. 359 */ 360 last_seq = atomic64_read(&ring->fence_drv.last_seq); 361 do { 362 last_emitted = ring->fence_drv.sync_seq[ring->idx]; 363 seq = amdgpu_fence_read(ring); 364 seq |= last_seq & 0xffffffff00000000LL; 365 if (seq < last_seq) { 366 seq &= 0xffffffff; 367 seq |= last_emitted & 0xffffffff00000000LL; 368 } 369 370 if (seq <= last_seq || seq > last_emitted) { 371 break; 372 } 373 /* If we loop over we don't want to return without 374 * checking if a fence is signaled as it means that the 375 * seq we just read is different from the previous on. 376 */ 377 wake = true; 378 last_seq = seq; 379 if ((count_loop++) > 10) { 380 /* We looped over too many time leave with the 381 * fact that we might have set an older fence 382 * seq then the current real last seq as signaled 383 * by the hw. 384 */ 385 break; 386 } 387 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); 388 389 if (wake) 390 wake_up_all(&ring->adev->fence_queue); 391 } 392 393 /** 394 * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled 395 * 396 * @ring: ring the fence is associated with 397 * @seq: sequence number 398 * 399 * Check if the last signaled fence sequnce number is >= the requested 400 * sequence number (all asics). 401 * Returns true if the fence has signaled (current fence value 402 * is >= requested value) or false if it has not (current fence 403 * value is < the requested value. Helper function for 404 * amdgpu_fence_signaled(). 405 */ 406 static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq) 407 { 408 if (atomic64_read(&ring->fence_drv.last_seq) >= seq) 409 return true; 410 411 /* poll new last sequence at least once */ 412 amdgpu_fence_process(ring); 413 if (atomic64_read(&ring->fence_drv.last_seq) >= seq) 414 return true; 415 416 return false; 417 } 418 419 static bool amdgpu_fence_is_signaled(struct fence *f) 420 { 421 struct amdgpu_fence *fence = to_amdgpu_fence(f); 422 struct amdgpu_ring *ring = fence->ring; 423 struct amdgpu_device *adev = ring->adev; 424 425 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 426 return true; 427 428 if (down_read_trylock(&adev->exclusive_lock)) { 429 amdgpu_fence_process(ring); 430 up_read(&adev->exclusive_lock); 431 432 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 433 return true; 434 } 435 return false; 436 } 437 438 /** 439 * amdgpu_fence_enable_signaling - enable signalling on fence 440 * @fence: fence 441 * 442 * This function is called with fence_queue lock held, and adds a callback 443 * to fence_queue that checks if this fence is signaled, and if so it 444 * signals the fence and removes itself. 445 */ 446 static bool amdgpu_fence_enable_signaling(struct fence *f) 447 { 448 struct amdgpu_fence *fence = to_amdgpu_fence(f); 449 struct amdgpu_ring *ring = fence->ring; 450 struct amdgpu_device *adev = ring->adev; 451 452 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 453 return false; 454 455 if (down_read_trylock(&adev->exclusive_lock)) { 456 amdgpu_irq_get(adev, ring->fence_drv.irq_src, 457 ring->fence_drv.irq_type); 458 if (amdgpu_fence_activity(ring)) 459 wake_up_all_locked(&adev->fence_queue); 460 461 /* did fence get signaled after we enabled the sw irq? */ 462 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) { 463 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 464 ring->fence_drv.irq_type); 465 up_read(&adev->exclusive_lock); 466 return false; 467 } 468 469 up_read(&adev->exclusive_lock); 470 } else { 471 /* we're probably in a lockup, lets not fiddle too much */ 472 if (amdgpu_irq_get_delayed(adev, ring->fence_drv.irq_src, 473 ring->fence_drv.irq_type)) 474 ring->fence_drv.delayed_irq = true; 475 amdgpu_fence_schedule_check(ring); 476 } 477 478 fence->fence_wake.flags = 0; 479 fence->fence_wake.private = NULL; 480 fence->fence_wake.func = amdgpu_fence_check_signaled; 481 __add_wait_queue(&adev->fence_queue, &fence->fence_wake); 482 fence_get(f); 483 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); 484 return true; 485 } 486 487 /** 488 * amdgpu_fence_signaled - check if a fence has signaled 489 * 490 * @fence: amdgpu fence object 491 * 492 * Check if the requested fence has signaled (all asics). 493 * Returns true if the fence has signaled or false if it has not. 494 */ 495 bool amdgpu_fence_signaled(struct amdgpu_fence *fence) 496 { 497 if (!fence) 498 return true; 499 500 if (amdgpu_fence_seq_signaled(fence->ring, fence->seq)) { 501 if (!fence_signal(&fence->base)) 502 FENCE_TRACE(&fence->base, "signaled from amdgpu_fence_signaled\n"); 503 return true; 504 } 505 506 return false; 507 } 508 509 /** 510 * amdgpu_fence_any_seq_signaled - check if any sequence number is signaled 511 * 512 * @adev: amdgpu device pointer 513 * @seq: sequence numbers 514 * 515 * Check if the last signaled fence sequnce number is >= the requested 516 * sequence number (all asics). 517 * Returns true if any has signaled (current value is >= requested value) 518 * or false if it has not. Helper function for amdgpu_fence_wait_seq. 519 */ 520 static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq) 521 { 522 unsigned i; 523 524 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 525 if (!adev->rings[i] || !seq[i]) 526 continue; 527 528 if (amdgpu_fence_seq_signaled(adev->rings[i], seq[i])) 529 return true; 530 } 531 532 return false; 533 } 534 535 /** 536 * amdgpu_fence_wait_seq_timeout - wait for a specific sequence numbers 537 * 538 * @adev: amdgpu device pointer 539 * @target_seq: sequence number(s) we want to wait for 540 * @intr: use interruptable sleep 541 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait 542 * 543 * Wait for the requested sequence number(s) to be written by any ring 544 * (all asics). Sequnce number array is indexed by ring id. 545 * @intr selects whether to use interruptable (true) or non-interruptable 546 * (false) sleep when waiting for the sequence number. Helper function 547 * for amdgpu_fence_wait_*(). 548 * Returns remaining time if the sequence number has passed, 0 when 549 * the wait timeout, or an error for all other cases. 550 * -EDEADLK is returned when a GPU lockup has been detected. 551 */ 552 static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, 553 u64 *target_seq, bool intr, 554 long timeout) 555 { 556 uint64_t last_seq[AMDGPU_MAX_RINGS]; 557 bool signaled; 558 int i; 559 long r; 560 561 if (timeout == 0) { 562 return amdgpu_fence_any_seq_signaled(adev, target_seq); 563 } 564 565 while (!amdgpu_fence_any_seq_signaled(adev, target_seq)) { 566 567 /* Save current sequence values, used to check for GPU lockups */ 568 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 569 struct amdgpu_ring *ring = adev->rings[i]; 570 571 if (!ring || !target_seq[i]) 572 continue; 573 574 last_seq[i] = atomic64_read(&ring->fence_drv.last_seq); 575 trace_amdgpu_fence_wait_begin(adev->ddev, i, target_seq[i]); 576 amdgpu_irq_get(adev, ring->fence_drv.irq_src, 577 ring->fence_drv.irq_type); 578 } 579 580 if (intr) { 581 r = wait_event_interruptible_timeout(adev->fence_queue, ( 582 (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq)) 583 || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT); 584 } else { 585 r = wait_event_timeout(adev->fence_queue, ( 586 (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq)) 587 || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT); 588 } 589 590 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 591 struct amdgpu_ring *ring = adev->rings[i]; 592 593 if (!ring || !target_seq[i]) 594 continue; 595 596 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 597 ring->fence_drv.irq_type); 598 trace_amdgpu_fence_wait_end(adev->ddev, i, target_seq[i]); 599 } 600 601 if (unlikely(r < 0)) 602 return r; 603 604 if (unlikely(!signaled)) { 605 606 if (adev->needs_reset) 607 return -EDEADLK; 608 609 /* we were interrupted for some reason and fence 610 * isn't signaled yet, resume waiting */ 611 if (r) 612 continue; 613 614 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 615 struct amdgpu_ring *ring = adev->rings[i]; 616 617 if (!ring || !target_seq[i]) 618 continue; 619 620 if (last_seq[i] != atomic64_read(&ring->fence_drv.last_seq)) 621 break; 622 } 623 624 if (i != AMDGPU_MAX_RINGS) 625 continue; 626 627 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 628 if (!adev->rings[i] || !target_seq[i]) 629 continue; 630 631 if (amdgpu_ring_is_lockup(adev->rings[i])) 632 break; 633 } 634 635 if (i < AMDGPU_MAX_RINGS) { 636 /* good news we believe it's a lockup */ 637 dev_warn(adev->dev, "GPU lockup (waiting for " 638 "0x%016llx last fence id 0x%016llx on" 639 " ring %d)\n", 640 target_seq[i], last_seq[i], i); 641 642 /* remember that we need an reset */ 643 adev->needs_reset = true; 644 wake_up_all(&adev->fence_queue); 645 return -EDEADLK; 646 } 647 648 if (timeout < MAX_SCHEDULE_TIMEOUT) { 649 timeout -= AMDGPU_FENCE_JIFFIES_TIMEOUT; 650 if (timeout <= 0) { 651 return 0; 652 } 653 } 654 } 655 } 656 return timeout; 657 } 658 659 /** 660 * amdgpu_fence_wait - wait for a fence to signal 661 * 662 * @fence: amdgpu fence object 663 * @intr: use interruptable sleep 664 * 665 * Wait for the requested fence to signal (all asics). 666 * @intr selects whether to use interruptable (true) or non-interruptable 667 * (false) sleep when waiting for the fence. 668 * Returns 0 if the fence has passed, error for all other cases. 669 */ 670 int amdgpu_fence_wait(struct amdgpu_fence *fence, bool intr) 671 { 672 uint64_t seq[AMDGPU_MAX_RINGS] = {}; 673 long r; 674 675 seq[fence->ring->idx] = fence->seq; 676 r = amdgpu_fence_wait_seq_timeout(fence->ring->adev, seq, intr, MAX_SCHEDULE_TIMEOUT); 677 if (r < 0) { 678 return r; 679 } 680 681 r = fence_signal(&fence->base); 682 if (!r) 683 FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); 684 return 0; 685 } 686 687 /** 688 * amdgpu_fence_wait_any - wait for a fence to signal on any ring 689 * 690 * @adev: amdgpu device pointer 691 * @fences: amdgpu fence object(s) 692 * @intr: use interruptable sleep 693 * 694 * Wait for any requested fence to signal (all asics). Fence 695 * array is indexed by ring id. @intr selects whether to use 696 * interruptable (true) or non-interruptable (false) sleep when 697 * waiting for the fences. Used by the suballocator. 698 * Returns 0 if any fence has passed, error for all other cases. 699 */ 700 int amdgpu_fence_wait_any(struct amdgpu_device *adev, 701 struct amdgpu_fence **fences, 702 bool intr) 703 { 704 uint64_t seq[AMDGPU_MAX_RINGS]; 705 unsigned i, num_rings = 0; 706 long r; 707 708 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 709 seq[i] = 0; 710 711 if (!fences[i]) { 712 continue; 713 } 714 715 seq[i] = fences[i]->seq; 716 ++num_rings; 717 } 718 719 /* nothing to wait for ? */ 720 if (num_rings == 0) 721 return -ENOENT; 722 723 r = amdgpu_fence_wait_seq_timeout(adev, seq, intr, MAX_SCHEDULE_TIMEOUT); 724 if (r < 0) { 725 return r; 726 } 727 return 0; 728 } 729 730 /** 731 * amdgpu_fence_wait_next - wait for the next fence to signal 732 * 733 * @adev: amdgpu device pointer 734 * @ring: ring index the fence is associated with 735 * 736 * Wait for the next fence on the requested ring to signal (all asics). 737 * Returns 0 if the next fence has passed, error for all other cases. 738 * Caller must hold ring lock. 739 */ 740 int amdgpu_fence_wait_next(struct amdgpu_ring *ring) 741 { 742 uint64_t seq[AMDGPU_MAX_RINGS] = {}; 743 long r; 744 745 seq[ring->idx] = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; 746 if (seq[ring->idx] >= ring->fence_drv.sync_seq[ring->idx]) { 747 /* nothing to wait for, last_seq is 748 already the last emited fence */ 749 return -ENOENT; 750 } 751 r = amdgpu_fence_wait_seq_timeout(ring->adev, seq, false, MAX_SCHEDULE_TIMEOUT); 752 if (r < 0) 753 return r; 754 return 0; 755 } 756 757 /** 758 * amdgpu_fence_wait_empty - wait for all fences to signal 759 * 760 * @adev: amdgpu device pointer 761 * @ring: ring index the fence is associated with 762 * 763 * Wait for all fences on the requested ring to signal (all asics). 764 * Returns 0 if the fences have passed, error for all other cases. 765 * Caller must hold ring lock. 766 */ 767 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) 768 { 769 struct amdgpu_device *adev = ring->adev; 770 uint64_t seq[AMDGPU_MAX_RINGS] = {}; 771 long r; 772 773 seq[ring->idx] = ring->fence_drv.sync_seq[ring->idx]; 774 if (!seq[ring->idx]) 775 return 0; 776 777 r = amdgpu_fence_wait_seq_timeout(adev, seq, false, MAX_SCHEDULE_TIMEOUT); 778 if (r < 0) { 779 if (r == -EDEADLK) 780 return -EDEADLK; 781 782 dev_err(adev->dev, "error waiting for ring[%d] to become idle (%ld)\n", 783 ring->idx, r); 784 } 785 return 0; 786 } 787 788 /** 789 * amdgpu_fence_ref - take a ref on a fence 790 * 791 * @fence: amdgpu fence object 792 * 793 * Take a reference on a fence (all asics). 794 * Returns the fence. 795 */ 796 struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence) 797 { 798 fence_get(&fence->base); 799 return fence; 800 } 801 802 /** 803 * amdgpu_fence_unref - remove a ref on a fence 804 * 805 * @fence: amdgpu fence object 806 * 807 * Remove a reference on a fence (all asics). 808 */ 809 void amdgpu_fence_unref(struct amdgpu_fence **fence) 810 { 811 struct amdgpu_fence *tmp = *fence; 812 813 *fence = NULL; 814 if (tmp) 815 fence_put(&tmp->base); 816 } 817 818 /** 819 * amdgpu_fence_count_emitted - get the count of emitted fences 820 * 821 * @ring: ring the fence is associated with 822 * 823 * Get the number of fences emitted on the requested ring (all asics). 824 * Returns the number of emitted fences on the ring. Used by the 825 * dynpm code to ring track activity. 826 */ 827 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) 828 { 829 uint64_t emitted; 830 831 /* We are not protected by ring lock when reading the last sequence 832 * but it's ok to report slightly wrong fence count here. 833 */ 834 amdgpu_fence_process(ring); 835 emitted = ring->fence_drv.sync_seq[ring->idx] 836 - atomic64_read(&ring->fence_drv.last_seq); 837 /* to avoid 32bits warp around */ 838 if (emitted > 0x10000000) 839 emitted = 0x10000000; 840 841 return (unsigned)emitted; 842 } 843 844 /** 845 * amdgpu_fence_need_sync - do we need a semaphore 846 * 847 * @fence: amdgpu fence object 848 * @dst_ring: which ring to check against 849 * 850 * Check if the fence needs to be synced against another ring 851 * (all asics). If so, we need to emit a semaphore. 852 * Returns true if we need to sync with another ring, false if 853 * not. 854 */ 855 bool amdgpu_fence_need_sync(struct amdgpu_fence *fence, 856 struct amdgpu_ring *dst_ring) 857 { 858 struct amdgpu_fence_driver *fdrv; 859 860 if (!fence) 861 return false; 862 863 if (fence->ring == dst_ring) 864 return false; 865 866 /* we are protected by the ring mutex */ 867 fdrv = &dst_ring->fence_drv; 868 if (fence->seq <= fdrv->sync_seq[fence->ring->idx]) 869 return false; 870 871 return true; 872 } 873 874 /** 875 * amdgpu_fence_note_sync - record the sync point 876 * 877 * @fence: amdgpu fence object 878 * @dst_ring: which ring to check against 879 * 880 * Note the sequence number at which point the fence will 881 * be synced with the requested ring (all asics). 882 */ 883 void amdgpu_fence_note_sync(struct amdgpu_fence *fence, 884 struct amdgpu_ring *dst_ring) 885 { 886 struct amdgpu_fence_driver *dst, *src; 887 unsigned i; 888 889 if (!fence) 890 return; 891 892 if (fence->ring == dst_ring) 893 return; 894 895 /* we are protected by the ring mutex */ 896 src = &fence->ring->fence_drv; 897 dst = &dst_ring->fence_drv; 898 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 899 if (i == dst_ring->idx) 900 continue; 901 902 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); 903 } 904 } 905 906 /** 907 * amdgpu_fence_driver_start_ring - make the fence driver 908 * ready for use on the requested ring. 909 * 910 * @ring: ring to start the fence driver on 911 * @irq_src: interrupt source to use for this ring 912 * @irq_type: interrupt type to use for this ring 913 * 914 * Make the fence driver ready for processing (all asics). 915 * Not all asics have all rings, so each asic will only 916 * start the fence driver on the rings it has. 917 * Returns 0 for success, errors for failure. 918 */ 919 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, 920 struct amdgpu_irq_src *irq_src, 921 unsigned irq_type) 922 { 923 struct amdgpu_device *adev = ring->adev; 924 uint64_t index; 925 926 if (ring != &adev->uvd.ring) { 927 ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; 928 ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); 929 } else { 930 /* put fence directly behind firmware */ 931 index = ALIGN(adev->uvd.fw->size, 8); 932 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; 933 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; 934 } 935 amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); 936 ring->fence_drv.initialized = true; 937 ring->fence_drv.irq_src = irq_src; 938 ring->fence_drv.irq_type = irq_type; 939 dev_info(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " 940 "cpu addr 0x%p\n", ring->idx, 941 ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); 942 return 0; 943 } 944 945 /** 946 * amdgpu_fence_driver_init_ring - init the fence driver 947 * for the requested ring. 948 * 949 * @ring: ring to init the fence driver on 950 * 951 * Init the fence driver for the requested ring (all asics). 952 * Helper function for amdgpu_fence_driver_init(). 953 */ 954 void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) 955 { 956 int i; 957 958 ring->fence_drv.cpu_addr = NULL; 959 ring->fence_drv.gpu_addr = 0; 960 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 961 ring->fence_drv.sync_seq[i] = 0; 962 963 atomic64_set(&ring->fence_drv.last_seq, 0); 964 ring->fence_drv.initialized = false; 965 966 INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, 967 amdgpu_fence_check_lockup); 968 ring->fence_drv.ring = ring; 969 } 970 971 /** 972 * amdgpu_fence_driver_init - init the fence driver 973 * for all possible rings. 974 * 975 * @adev: amdgpu device pointer 976 * 977 * Init the fence driver for all possible rings (all asics). 978 * Not all asics have all rings, so each asic will only 979 * start the fence driver on the rings it has using 980 * amdgpu_fence_driver_start_ring(). 981 * Returns 0 for success. 982 */ 983 int amdgpu_fence_driver_init(struct amdgpu_device *adev) 984 { 985 init_waitqueue_head(&adev->fence_queue); 986 if (amdgpu_debugfs_fence_init(adev)) 987 dev_err(adev->dev, "fence debugfs file creation failed\n"); 988 989 return 0; 990 } 991 992 /** 993 * amdgpu_fence_driver_fini - tear down the fence driver 994 * for all possible rings. 995 * 996 * @adev: amdgpu device pointer 997 * 998 * Tear down the fence driver for all possible rings (all asics). 999 */ 1000 void amdgpu_fence_driver_fini(struct amdgpu_device *adev) 1001 { 1002 int i, r; 1003 1004 mutex_lock(&adev->ring_lock); 1005 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 1006 struct amdgpu_ring *ring = adev->rings[i]; 1007 if (!ring || !ring->fence_drv.initialized) 1008 continue; 1009 r = amdgpu_fence_wait_empty(ring); 1010 if (r) { 1011 /* no need to trigger GPU reset as we are unloading */ 1012 amdgpu_fence_driver_force_completion(adev); 1013 } 1014 wake_up_all(&adev->fence_queue); 1015 ring->fence_drv.initialized = false; 1016 } 1017 mutex_unlock(&adev->ring_lock); 1018 } 1019 1020 /** 1021 * amdgpu_fence_driver_force_completion - force all fence waiter to complete 1022 * 1023 * @adev: amdgpu device pointer 1024 * 1025 * In case of GPU reset failure make sure no process keep waiting on fence 1026 * that will never complete. 1027 */ 1028 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) 1029 { 1030 int i; 1031 1032 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 1033 struct amdgpu_ring *ring = adev->rings[i]; 1034 if (!ring || !ring->fence_drv.initialized) 1035 continue; 1036 1037 amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]); 1038 } 1039 } 1040 1041 1042 /* 1043 * Fence debugfs 1044 */ 1045 #if defined(CONFIG_DEBUG_FS) 1046 static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) 1047 { 1048 struct drm_info_node *node = (struct drm_info_node *)m->private; 1049 struct drm_device *dev = node->minor->dev; 1050 struct amdgpu_device *adev = dev->dev_private; 1051 int i, j; 1052 1053 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1054 struct amdgpu_ring *ring = adev->rings[i]; 1055 if (!ring || !ring->fence_drv.initialized) 1056 continue; 1057 1058 amdgpu_fence_process(ring); 1059 1060 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); 1061 seq_printf(m, "Last signaled fence 0x%016llx\n", 1062 (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); 1063 seq_printf(m, "Last emitted 0x%016llx\n", 1064 ring->fence_drv.sync_seq[i]); 1065 1066 for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { 1067 struct amdgpu_ring *other = adev->rings[j]; 1068 if (i != j && other && other->fence_drv.initialized && 1069 ring->fence_drv.sync_seq[j]) 1070 seq_printf(m, "Last sync to ring %d 0x%016llx\n", 1071 j, ring->fence_drv.sync_seq[j]); 1072 } 1073 } 1074 return 0; 1075 } 1076 1077 static struct drm_info_list amdgpu_debugfs_fence_list[] = { 1078 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, 1079 }; 1080 #endif 1081 1082 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) 1083 { 1084 #if defined(CONFIG_DEBUG_FS) 1085 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1); 1086 #else 1087 return 0; 1088 #endif 1089 } 1090 1091 static const char *amdgpu_fence_get_driver_name(struct fence *fence) 1092 { 1093 return "amdgpu"; 1094 } 1095 1096 static const char *amdgpu_fence_get_timeline_name(struct fence *f) 1097 { 1098 struct amdgpu_fence *fence = to_amdgpu_fence(f); 1099 return (const char *)fence->ring->name; 1100 } 1101 1102 static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence) 1103 { 1104 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); 1105 } 1106 1107 struct amdgpu_wait_cb { 1108 struct fence_cb base; 1109 struct task_struct *task; 1110 }; 1111 1112 static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb) 1113 { 1114 struct amdgpu_wait_cb *wait = 1115 container_of(cb, struct amdgpu_wait_cb, base); 1116 wake_up_process(wait->task); 1117 } 1118 1119 static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, 1120 signed long t) 1121 { 1122 struct amdgpu_fence *fence = to_amdgpu_fence(f); 1123 struct amdgpu_device *adev = fence->ring->adev; 1124 struct amdgpu_wait_cb cb; 1125 1126 cb.task = current; 1127 1128 if (fence_add_callback(f, &cb.base, amdgpu_fence_wait_cb)) 1129 return t; 1130 1131 while (t > 0) { 1132 if (intr) 1133 set_current_state(TASK_INTERRUPTIBLE); 1134 else 1135 set_current_state(TASK_UNINTERRUPTIBLE); 1136 1137 /* 1138 * amdgpu_test_signaled must be called after 1139 * set_current_state to prevent a race with wake_up_process 1140 */ 1141 if (amdgpu_test_signaled(fence)) 1142 break; 1143 1144 if (adev->needs_reset) { 1145 t = -EDEADLK; 1146 break; 1147 } 1148 1149 t = schedule_timeout(t); 1150 1151 if (t > 0 && intr && signal_pending(current)) 1152 t = -ERESTARTSYS; 1153 } 1154 1155 __set_current_state(TASK_RUNNING); 1156 fence_remove_callback(f, &cb.base); 1157 1158 return t; 1159 } 1160 1161 const struct fence_ops amdgpu_fence_ops = { 1162 .get_driver_name = amdgpu_fence_get_driver_name, 1163 .get_timeline_name = amdgpu_fence_get_timeline_name, 1164 .enable_signaling = amdgpu_fence_enable_signaling, 1165 .signaled = amdgpu_fence_is_signaled, 1166 .wait = amdgpu_fence_default_wait, 1167 .release = NULL, 1168 }; 1169