1 /* 2 * Copyright (C) 2007 Ben Skeggs. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27 #include "drmP.h" 28 #include "drm.h" 29 30 #include "nouveau_drv.h" 31 #include "nouveau_ramht.h" 32 #include "nouveau_dma.h" 33 34 #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10) 35 #define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17 && \ 36 nouveau_private(dev)->card_type < NV_C0) 37 38 struct nouveau_fence { 39 struct nouveau_channel *channel; 40 struct kref refcount; 41 struct list_head entry; 42 43 uint32_t sequence; 44 bool signalled; 45 46 void (*work)(void *priv, bool signalled); 47 void *priv; 48 }; 49 50 struct nouveau_semaphore { 51 struct kref ref; 52 struct drm_device *dev; 53 struct drm_mm_node *mem; 54 }; 55 56 static inline struct nouveau_fence * 57 nouveau_fence(void *sync_obj) 58 { 59 return (struct nouveau_fence *)sync_obj; 60 } 61 62 static void 63 nouveau_fence_del(struct kref *ref) 64 { 65 struct nouveau_fence *fence = 66 container_of(ref, struct nouveau_fence, refcount); 67 68 nouveau_channel_ref(NULL, &fence->channel); 69 kfree(fence); 70 } 71 72 void 73 nouveau_fence_update(struct nouveau_channel *chan) 74 { 75 struct drm_device *dev = chan->dev; 76 struct nouveau_fence *tmp, *fence; 77 uint32_t sequence; 78 79 spin_lock(&chan->fence.lock); 80 81 /* Fetch the last sequence if the channel is still up and running */ 82 if (likely(!list_empty(&chan->fence.pending))) { 83 if (USE_REFCNT(dev)) 84 sequence = nvchan_rd32(chan, 0x48); 85 else 86 sequence = atomic_read(&chan->fence.last_sequence_irq); 87 88 if (chan->fence.sequence_ack == sequence) 89 goto out; 90 chan->fence.sequence_ack = sequence; 91 } 92 93 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) { 94 sequence = fence->sequence; 95 fence->signalled = true; 96 list_del(&fence->entry); 97 98 if (unlikely(fence->work)) 99 fence->work(fence->priv, true); 100 101 kref_put(&fence->refcount, nouveau_fence_del); 102 103 if (sequence == chan->fence.sequence_ack) 104 break; 105 } 106 out: 107 spin_unlock(&chan->fence.lock); 108 } 109 110 int 111 nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence, 112 bool emit) 113 { 114 struct nouveau_fence *fence; 115 int ret = 0; 116 117 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 118 if (!fence) 119 return -ENOMEM; 120 kref_init(&fence->refcount); 121 nouveau_channel_ref(chan, &fence->channel); 122 123 if (emit) 124 ret = nouveau_fence_emit(fence); 125 126 if (ret) 127 nouveau_fence_unref(&fence); 128 *pfence = fence; 129 return ret; 130 } 131 132 struct nouveau_channel * 133 nouveau_fence_channel(struct nouveau_fence *fence) 134 { 135 return fence ? nouveau_channel_get_unlocked(fence->channel) : NULL; 136 } 137 138 int 139 nouveau_fence_emit(struct nouveau_fence *fence) 140 { 141 struct nouveau_channel *chan = fence->channel; 142 struct drm_device *dev = chan->dev; 143 struct drm_nouveau_private *dev_priv = dev->dev_private; 144 int ret; 145 146 ret = RING_SPACE(chan, 2); 147 if (ret) 148 return ret; 149 150 if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) { 151 nouveau_fence_update(chan); 152 153 BUG_ON(chan->fence.sequence == 154 chan->fence.sequence_ack - 1); 155 } 156 157 fence->sequence = ++chan->fence.sequence; 158 159 kref_get(&fence->refcount); 160 spin_lock(&chan->fence.lock); 161 list_add_tail(&fence->entry, &chan->fence.pending); 162 spin_unlock(&chan->fence.lock); 163 164 if (USE_REFCNT(dev)) { 165 if (dev_priv->card_type < NV_C0) 166 BEGIN_RING(chan, NvSubSw, 0x0050, 1); 167 else 168 BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0050, 1); 169 } else { 170 BEGIN_RING(chan, NvSubSw, 0x0150, 1); 171 } 172 OUT_RING (chan, fence->sequence); 173 FIRE_RING(chan); 174 175 return 0; 176 } 177 178 void 179 nouveau_fence_work(struct nouveau_fence *fence, 180 void (*work)(void *priv, bool signalled), 181 void *priv) 182 { 183 BUG_ON(fence->work); 184 185 spin_lock(&fence->channel->fence.lock); 186 187 if (fence->signalled) { 188 work(priv, true); 189 } else { 190 fence->work = work; 191 fence->priv = priv; 192 } 193 194 spin_unlock(&fence->channel->fence.lock); 195 } 196 197 void 198 __nouveau_fence_unref(void **sync_obj) 199 { 200 struct nouveau_fence *fence = nouveau_fence(*sync_obj); 201 202 if (fence) 203 kref_put(&fence->refcount, nouveau_fence_del); 204 *sync_obj = NULL; 205 } 206 207 void * 208 __nouveau_fence_ref(void *sync_obj) 209 { 210 struct nouveau_fence *fence = nouveau_fence(sync_obj); 211 212 kref_get(&fence->refcount); 213 return sync_obj; 214 } 215 216 bool 217 __nouveau_fence_signalled(void *sync_obj, void *sync_arg) 218 { 219 struct nouveau_fence *fence = nouveau_fence(sync_obj); 220 struct nouveau_channel *chan = fence->channel; 221 222 if (fence->signalled) 223 return true; 224 225 nouveau_fence_update(chan); 226 return fence->signalled; 227 } 228 229 int 230 __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr) 231 { 232 unsigned long timeout = jiffies + (3 * DRM_HZ); 233 unsigned long sleep_time = jiffies + 1; 234 int ret = 0; 235 236 while (1) { 237 if (__nouveau_fence_signalled(sync_obj, sync_arg)) 238 break; 239 240 if (time_after_eq(jiffies, timeout)) { 241 ret = -EBUSY; 242 break; 243 } 244 245 __set_current_state(intr ? TASK_INTERRUPTIBLE 246 : TASK_UNINTERRUPTIBLE); 247 if (lazy && time_after_eq(jiffies, sleep_time)) 248 schedule_timeout(1); 249 250 if (intr && signal_pending(current)) { 251 ret = -ERESTARTSYS; 252 break; 253 } 254 } 255 256 __set_current_state(TASK_RUNNING); 257 258 return ret; 259 } 260 261 static struct nouveau_semaphore * 262 alloc_semaphore(struct drm_device *dev) 263 { 264 struct drm_nouveau_private *dev_priv = dev->dev_private; 265 struct nouveau_semaphore *sema; 266 int ret; 267 268 if (!USE_SEMA(dev)) 269 return NULL; 270 271 sema = kmalloc(sizeof(*sema), GFP_KERNEL); 272 if (!sema) 273 goto fail; 274 275 ret = drm_mm_pre_get(&dev_priv->fence.heap); 276 if (ret) 277 goto fail; 278 279 spin_lock(&dev_priv->fence.lock); 280 sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0); 281 if (sema->mem) 282 sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0); 283 spin_unlock(&dev_priv->fence.lock); 284 285 if (!sema->mem) 286 goto fail; 287 288 kref_init(&sema->ref); 289 sema->dev = dev; 290 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0); 291 292 return sema; 293 fail: 294 kfree(sema); 295 return NULL; 296 } 297 298 static void 299 free_semaphore(struct kref *ref) 300 { 301 struct nouveau_semaphore *sema = 302 container_of(ref, struct nouveau_semaphore, ref); 303 struct drm_nouveau_private *dev_priv = sema->dev->dev_private; 304 305 spin_lock(&dev_priv->fence.lock); 306 drm_mm_put_block(sema->mem); 307 spin_unlock(&dev_priv->fence.lock); 308 309 kfree(sema); 310 } 311 312 static void 313 semaphore_work(void *priv, bool signalled) 314 { 315 struct nouveau_semaphore *sema = priv; 316 struct drm_nouveau_private *dev_priv = sema->dev->dev_private; 317 318 if (unlikely(!signalled)) 319 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1); 320 321 kref_put(&sema->ref, free_semaphore); 322 } 323 324 static int 325 emit_semaphore(struct nouveau_channel *chan, int method, 326 struct nouveau_semaphore *sema) 327 { 328 struct drm_nouveau_private *dev_priv = sema->dev->dev_private; 329 struct nouveau_fence *fence; 330 bool smart = (dev_priv->card_type >= NV_50); 331 int ret; 332 333 ret = RING_SPACE(chan, smart ? 8 : 4); 334 if (ret) 335 return ret; 336 337 if (smart) { 338 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); 339 OUT_RING(chan, NvSema); 340 } 341 BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); 342 OUT_RING(chan, sema->mem->start); 343 344 if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) { 345 /* 346 * NV50 tries to be too smart and context-switch 347 * between semaphores instead of doing a "first come, 348 * first served" strategy like previous cards 349 * do. 350 * 351 * That's bad because the ACQUIRE latency can get as 352 * large as the PFIFO context time slice in the 353 * typical DRI2 case where you have several 354 * outstanding semaphores at the same moment. 355 * 356 * If we're going to ACQUIRE, force the card to 357 * context switch before, just in case the matching 358 * RELEASE is already scheduled to be executed in 359 * another channel. 360 */ 361 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); 362 OUT_RING(chan, 0); 363 } 364 365 BEGIN_RING(chan, NvSubSw, method, 1); 366 OUT_RING(chan, 1); 367 368 if (smart && method == NV_SW_SEMAPHORE_RELEASE) { 369 /* 370 * Force the card to context switch, there may be 371 * another channel waiting for the semaphore we just 372 * released. 373 */ 374 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); 375 OUT_RING(chan, 0); 376 } 377 378 /* Delay semaphore destruction until its work is done */ 379 ret = nouveau_fence_new(chan, &fence, true); 380 if (ret) 381 return ret; 382 383 kref_get(&sema->ref); 384 nouveau_fence_work(fence, semaphore_work, sema); 385 nouveau_fence_unref(&fence); 386 387 return 0; 388 } 389 390 int 391 nouveau_fence_sync(struct nouveau_fence *fence, 392 struct nouveau_channel *wchan) 393 { 394 struct nouveau_channel *chan = nouveau_fence_channel(fence); 395 struct drm_device *dev = wchan->dev; 396 struct nouveau_semaphore *sema; 397 int ret = 0; 398 399 if (likely(!chan || chan == wchan || 400 nouveau_fence_signalled(fence))) 401 goto out; 402 403 sema = alloc_semaphore(dev); 404 if (!sema) { 405 /* Early card or broken userspace, fall back to 406 * software sync. */ 407 ret = nouveau_fence_wait(fence, true, false); 408 goto out; 409 } 410 411 /* try to take chan's mutex, if we can't take it right away 412 * we have to fallback to software sync to prevent locking 413 * order issues 414 */ 415 if (!mutex_trylock(&chan->mutex)) { 416 ret = nouveau_fence_wait(fence, true, false); 417 goto out_unref; 418 } 419 420 /* Make wchan wait until it gets signalled */ 421 ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema); 422 if (ret) 423 goto out_unlock; 424 425 /* Signal the semaphore from chan */ 426 ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema); 427 428 out_unlock: 429 mutex_unlock(&chan->mutex); 430 out_unref: 431 kref_put(&sema->ref, free_semaphore); 432 out: 433 if (chan) 434 nouveau_channel_put_unlocked(&chan); 435 return ret; 436 } 437 438 int 439 __nouveau_fence_flush(void *sync_obj, void *sync_arg) 440 { 441 return 0; 442 } 443 444 int 445 nouveau_fence_channel_init(struct nouveau_channel *chan) 446 { 447 struct drm_device *dev = chan->dev; 448 struct drm_nouveau_private *dev_priv = dev->dev_private; 449 struct nouveau_gpuobj *obj = NULL; 450 int ret; 451 452 /* Create an NV_SW object for various sync purposes */ 453 ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW); 454 if (ret) 455 return ret; 456 457 /* we leave subchannel empty for nvc0 */ 458 if (dev_priv->card_type < NV_C0) { 459 ret = RING_SPACE(chan, 2); 460 if (ret) 461 return ret; 462 BEGIN_RING(chan, NvSubSw, 0, 1); 463 OUT_RING(chan, NvSw); 464 } 465 466 /* Create a DMA object for the shared cross-channel sync area. */ 467 if (USE_SEMA(dev)) { 468 struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem; 469 470 ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 471 mem->start << PAGE_SHIFT, 472 mem->size, NV_MEM_ACCESS_RW, 473 NV_MEM_TARGET_VRAM, &obj); 474 if (ret) 475 return ret; 476 477 ret = nouveau_ramht_insert(chan, NvSema, obj); 478 nouveau_gpuobj_ref(NULL, &obj); 479 if (ret) 480 return ret; 481 482 ret = RING_SPACE(chan, 2); 483 if (ret) 484 return ret; 485 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); 486 OUT_RING(chan, NvSema); 487 } 488 489 FIRE_RING(chan); 490 491 INIT_LIST_HEAD(&chan->fence.pending); 492 spin_lock_init(&chan->fence.lock); 493 atomic_set(&chan->fence.last_sequence_irq, 0); 494 495 return 0; 496 } 497 498 void 499 nouveau_fence_channel_fini(struct nouveau_channel *chan) 500 { 501 struct nouveau_fence *tmp, *fence; 502 503 spin_lock(&chan->fence.lock); 504 505 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) { 506 fence->signalled = true; 507 list_del(&fence->entry); 508 509 if (unlikely(fence->work)) 510 fence->work(fence->priv, false); 511 512 kref_put(&fence->refcount, nouveau_fence_del); 513 } 514 515 spin_unlock(&chan->fence.lock); 516 } 517 518 int 519 nouveau_fence_init(struct drm_device *dev) 520 { 521 struct drm_nouveau_private *dev_priv = dev->dev_private; 522 int ret; 523 524 /* Create a shared VRAM heap for cross-channel sync. */ 525 if (USE_SEMA(dev)) { 526 ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM, 527 0, 0, false, true, &dev_priv->fence.bo); 528 if (ret) 529 return ret; 530 531 ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM); 532 if (ret) 533 goto fail; 534 535 ret = nouveau_bo_map(dev_priv->fence.bo); 536 if (ret) 537 goto fail; 538 539 ret = drm_mm_init(&dev_priv->fence.heap, 0, 540 dev_priv->fence.bo->bo.mem.size); 541 if (ret) 542 goto fail; 543 544 spin_lock_init(&dev_priv->fence.lock); 545 } 546 547 return 0; 548 fail: 549 nouveau_bo_unmap(dev_priv->fence.bo); 550 nouveau_bo_ref(NULL, &dev_priv->fence.bo); 551 return ret; 552 } 553 554 void 555 nouveau_fence_fini(struct drm_device *dev) 556 { 557 struct drm_nouveau_private *dev_priv = dev->dev_private; 558 559 if (USE_SEMA(dev)) { 560 drm_mm_takedown(&dev_priv->fence.heap); 561 nouveau_bo_unmap(dev_priv->fence.bo); 562 nouveau_bo_unpin(dev_priv->fence.bo); 563 nouveau_bo_ref(NULL, &dev_priv->fence.bo); 564 } 565 } 566