1 /* 2 * Copyright (C) 2007 Ben Skeggs. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27 #include "drmP.h" 28 #include "drm.h" 29 30 #include "nouveau_drv.h" 31 #include "nouveau_ramht.h" 32 #include "nouveau_dma.h" 33 34 #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10) 35 #define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17) 36 37 struct nouveau_fence { 38 struct nouveau_channel *channel; 39 struct kref refcount; 40 struct list_head entry; 41 42 uint32_t sequence; 43 bool signalled; 44 45 void (*work)(void *priv, bool signalled); 46 void *priv; 47 }; 48 49 struct nouveau_semaphore { 50 struct kref ref; 51 struct drm_device *dev; 52 struct drm_mm_node *mem; 53 }; 54 55 static inline struct nouveau_fence * 56 nouveau_fence(void *sync_obj) 57 { 58 return (struct nouveau_fence *)sync_obj; 59 } 60 61 static void 62 nouveau_fence_del(struct kref *ref) 63 { 64 struct nouveau_fence *fence = 65 container_of(ref, struct nouveau_fence, refcount); 66 67 kfree(fence); 68 } 69 70 void 71 nouveau_fence_update(struct nouveau_channel *chan) 72 { 73 struct drm_device *dev = chan->dev; 74 struct nouveau_fence *tmp, *fence; 75 uint32_t sequence; 76 77 spin_lock(&chan->fence.lock); 78 79 if (USE_REFCNT(dev)) 80 sequence = nvchan_rd32(chan, 0x48); 81 else 82 sequence = atomic_read(&chan->fence.last_sequence_irq); 83 84 if (chan->fence.sequence_ack == sequence) 85 goto out; 86 chan->fence.sequence_ack = sequence; 87 88 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) { 89 sequence = fence->sequence; 90 fence->signalled = true; 91 list_del(&fence->entry); 92 93 if (unlikely(fence->work)) 94 fence->work(fence->priv, true); 95 96 kref_put(&fence->refcount, nouveau_fence_del); 97 98 if (sequence == chan->fence.sequence_ack) 99 break; 100 } 101 out: 102 spin_unlock(&chan->fence.lock); 103 } 104 105 int 106 nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence, 107 bool emit) 108 { 109 struct nouveau_fence *fence; 110 int ret = 0; 111 112 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 113 if (!fence) 114 return -ENOMEM; 115 kref_init(&fence->refcount); 116 fence->channel = chan; 117 118 if (emit) 119 ret = nouveau_fence_emit(fence); 120 121 if (ret) 122 nouveau_fence_unref((void *)&fence); 123 *pfence = fence; 124 return ret; 125 } 126 127 struct nouveau_channel * 128 nouveau_fence_channel(struct nouveau_fence *fence) 129 { 130 return fence ? fence->channel : NULL; 131 } 132 133 int 134 nouveau_fence_emit(struct nouveau_fence *fence) 135 { 136 struct nouveau_channel *chan = fence->channel; 137 struct drm_device *dev = chan->dev; 138 int ret; 139 140 ret = RING_SPACE(chan, 2); 141 if (ret) 142 return ret; 143 144 if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) { 145 nouveau_fence_update(chan); 146 147 BUG_ON(chan->fence.sequence == 148 chan->fence.sequence_ack - 1); 149 } 150 151 fence->sequence = ++chan->fence.sequence; 152 153 kref_get(&fence->refcount); 154 spin_lock(&chan->fence.lock); 155 list_add_tail(&fence->entry, &chan->fence.pending); 156 spin_unlock(&chan->fence.lock); 157 158 BEGIN_RING(chan, NvSubSw, USE_REFCNT(dev) ? 0x0050 : 0x0150, 1); 159 OUT_RING(chan, fence->sequence); 160 FIRE_RING(chan); 161 162 return 0; 163 } 164 165 void 166 nouveau_fence_work(struct nouveau_fence *fence, 167 void (*work)(void *priv, bool signalled), 168 void *priv) 169 { 170 BUG_ON(fence->work); 171 172 spin_lock(&fence->channel->fence.lock); 173 174 if (fence->signalled) { 175 work(priv, true); 176 } else { 177 fence->work = work; 178 fence->priv = priv; 179 } 180 181 spin_unlock(&fence->channel->fence.lock); 182 } 183 184 void 185 nouveau_fence_unref(void **sync_obj) 186 { 187 struct nouveau_fence *fence = nouveau_fence(*sync_obj); 188 189 if (fence) 190 kref_put(&fence->refcount, nouveau_fence_del); 191 *sync_obj = NULL; 192 } 193 194 void * 195 nouveau_fence_ref(void *sync_obj) 196 { 197 struct nouveau_fence *fence = nouveau_fence(sync_obj); 198 199 kref_get(&fence->refcount); 200 return sync_obj; 201 } 202 203 bool 204 nouveau_fence_signalled(void *sync_obj, void *sync_arg) 205 { 206 struct nouveau_fence *fence = nouveau_fence(sync_obj); 207 struct nouveau_channel *chan = fence->channel; 208 209 if (fence->signalled) 210 return true; 211 212 nouveau_fence_update(chan); 213 return fence->signalled; 214 } 215 216 int 217 nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr) 218 { 219 unsigned long timeout = jiffies + (3 * DRM_HZ); 220 int ret = 0; 221 222 while (1) { 223 if (nouveau_fence_signalled(sync_obj, sync_arg)) 224 break; 225 226 if (time_after_eq(jiffies, timeout)) { 227 ret = -EBUSY; 228 break; 229 } 230 231 __set_current_state(intr ? TASK_INTERRUPTIBLE 232 : TASK_UNINTERRUPTIBLE); 233 if (lazy) 234 schedule_timeout(1); 235 236 if (intr && signal_pending(current)) { 237 ret = -ERESTARTSYS; 238 break; 239 } 240 } 241 242 __set_current_state(TASK_RUNNING); 243 244 return ret; 245 } 246 247 static struct nouveau_semaphore * 248 alloc_semaphore(struct drm_device *dev) 249 { 250 struct drm_nouveau_private *dev_priv = dev->dev_private; 251 struct nouveau_semaphore *sema; 252 253 if (!USE_SEMA(dev)) 254 return NULL; 255 256 sema = kmalloc(sizeof(*sema), GFP_KERNEL); 257 if (!sema) 258 goto fail; 259 260 spin_lock(&dev_priv->fence.lock); 261 sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0); 262 if (sema->mem) 263 sema->mem = drm_mm_get_block(sema->mem, 4, 0); 264 spin_unlock(&dev_priv->fence.lock); 265 266 if (!sema->mem) 267 goto fail; 268 269 kref_init(&sema->ref); 270 sema->dev = dev; 271 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0); 272 273 return sema; 274 fail: 275 kfree(sema); 276 return NULL; 277 } 278 279 static void 280 free_semaphore(struct kref *ref) 281 { 282 struct nouveau_semaphore *sema = 283 container_of(ref, struct nouveau_semaphore, ref); 284 struct drm_nouveau_private *dev_priv = sema->dev->dev_private; 285 286 spin_lock(&dev_priv->fence.lock); 287 drm_mm_put_block(sema->mem); 288 spin_unlock(&dev_priv->fence.lock); 289 290 kfree(sema); 291 } 292 293 static void 294 semaphore_work(void *priv, bool signalled) 295 { 296 struct nouveau_semaphore *sema = priv; 297 struct drm_nouveau_private *dev_priv = sema->dev->dev_private; 298 299 if (unlikely(!signalled)) 300 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1); 301 302 kref_put(&sema->ref, free_semaphore); 303 } 304 305 static int 306 emit_semaphore(struct nouveau_channel *chan, int method, 307 struct nouveau_semaphore *sema) 308 { 309 struct drm_nouveau_private *dev_priv = sema->dev->dev_private; 310 struct nouveau_fence *fence; 311 bool smart = (dev_priv->card_type >= NV_50); 312 int ret; 313 314 ret = RING_SPACE(chan, smart ? 8 : 4); 315 if (ret) 316 return ret; 317 318 if (smart) { 319 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); 320 OUT_RING(chan, NvSema); 321 } 322 BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); 323 OUT_RING(chan, sema->mem->start); 324 325 if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) { 326 /* 327 * NV50 tries to be too smart and context-switch 328 * between semaphores instead of doing a "first come, 329 * first served" strategy like previous cards 330 * do. 331 * 332 * That's bad because the ACQUIRE latency can get as 333 * large as the PFIFO context time slice in the 334 * typical DRI2 case where you have several 335 * outstanding semaphores at the same moment. 336 * 337 * If we're going to ACQUIRE, force the card to 338 * context switch before, just in case the matching 339 * RELEASE is already scheduled to be executed in 340 * another channel. 341 */ 342 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); 343 OUT_RING(chan, 0); 344 } 345 346 BEGIN_RING(chan, NvSubSw, method, 1); 347 OUT_RING(chan, 1); 348 349 if (smart && method == NV_SW_SEMAPHORE_RELEASE) { 350 /* 351 * Force the card to context switch, there may be 352 * another channel waiting for the semaphore we just 353 * released. 354 */ 355 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); 356 OUT_RING(chan, 0); 357 } 358 359 /* Delay semaphore destruction until its work is done */ 360 ret = nouveau_fence_new(chan, &fence, true); 361 if (ret) 362 return ret; 363 364 kref_get(&sema->ref); 365 nouveau_fence_work(fence, semaphore_work, sema); 366 nouveau_fence_unref((void *)&fence); 367 368 return 0; 369 } 370 371 int 372 nouveau_fence_sync(struct nouveau_fence *fence, 373 struct nouveau_channel *wchan) 374 { 375 struct nouveau_channel *chan = nouveau_fence_channel(fence); 376 struct drm_device *dev = wchan->dev; 377 struct nouveau_semaphore *sema; 378 int ret; 379 380 if (likely(!fence || chan == wchan || 381 nouveau_fence_signalled(fence, NULL))) 382 return 0; 383 384 sema = alloc_semaphore(dev); 385 if (!sema) { 386 /* Early card or broken userspace, fall back to 387 * software sync. */ 388 return nouveau_fence_wait(fence, NULL, false, false); 389 } 390 391 /* Make wchan wait until it gets signalled */ 392 ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema); 393 if (ret) 394 goto out; 395 396 /* Signal the semaphore from chan */ 397 ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema); 398 out: 399 kref_put(&sema->ref, free_semaphore); 400 return ret; 401 } 402 403 int 404 nouveau_fence_flush(void *sync_obj, void *sync_arg) 405 { 406 return 0; 407 } 408 409 int 410 nouveau_fence_channel_init(struct nouveau_channel *chan) 411 { 412 struct drm_device *dev = chan->dev; 413 struct drm_nouveau_private *dev_priv = dev->dev_private; 414 struct nouveau_gpuobj *obj = NULL; 415 int ret; 416 417 /* Create an NV_SW object for various sync purposes */ 418 ret = nouveau_gpuobj_sw_new(chan, NV_SW, &obj); 419 if (ret) 420 return ret; 421 422 ret = nouveau_ramht_insert(chan, NvSw, obj); 423 nouveau_gpuobj_ref(NULL, &obj); 424 if (ret) 425 return ret; 426 427 ret = RING_SPACE(chan, 2); 428 if (ret) 429 return ret; 430 BEGIN_RING(chan, NvSubSw, 0, 1); 431 OUT_RING(chan, NvSw); 432 433 /* Create a DMA object for the shared cross-channel sync area. */ 434 if (USE_SEMA(dev)) { 435 struct drm_mm_node *mem = dev_priv->fence.bo->bo.mem.mm_node; 436 437 ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 438 mem->start << PAGE_SHIFT, 439 mem->size << PAGE_SHIFT, 440 NV_DMA_ACCESS_RW, 441 NV_DMA_TARGET_VIDMEM, &obj); 442 if (ret) 443 return ret; 444 445 ret = nouveau_ramht_insert(chan, NvSema, obj); 446 nouveau_gpuobj_ref(NULL, &obj); 447 if (ret) 448 return ret; 449 450 ret = RING_SPACE(chan, 2); 451 if (ret) 452 return ret; 453 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); 454 OUT_RING(chan, NvSema); 455 } 456 457 FIRE_RING(chan); 458 459 INIT_LIST_HEAD(&chan->fence.pending); 460 spin_lock_init(&chan->fence.lock); 461 atomic_set(&chan->fence.last_sequence_irq, 0); 462 463 return 0; 464 } 465 466 void 467 nouveau_fence_channel_fini(struct nouveau_channel *chan) 468 { 469 struct nouveau_fence *tmp, *fence; 470 471 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) { 472 fence->signalled = true; 473 list_del(&fence->entry); 474 475 if (unlikely(fence->work)) 476 fence->work(fence->priv, false); 477 478 kref_put(&fence->refcount, nouveau_fence_del); 479 } 480 } 481 482 int 483 nouveau_fence_init(struct drm_device *dev) 484 { 485 struct drm_nouveau_private *dev_priv = dev->dev_private; 486 int ret; 487 488 /* Create a shared VRAM heap for cross-channel sync. */ 489 if (USE_SEMA(dev)) { 490 ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM, 491 0, 0, false, true, &dev_priv->fence.bo); 492 if (ret) 493 return ret; 494 495 ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM); 496 if (ret) 497 goto fail; 498 499 ret = nouveau_bo_map(dev_priv->fence.bo); 500 if (ret) 501 goto fail; 502 503 ret = drm_mm_init(&dev_priv->fence.heap, 0, 504 dev_priv->fence.bo->bo.mem.size); 505 if (ret) 506 goto fail; 507 508 spin_lock_init(&dev_priv->fence.lock); 509 } 510 511 return 0; 512 fail: 513 nouveau_bo_unmap(dev_priv->fence.bo); 514 nouveau_bo_ref(NULL, &dev_priv->fence.bo); 515 return ret; 516 } 517 518 void 519 nouveau_fence_fini(struct drm_device *dev) 520 { 521 struct drm_nouveau_private *dev_priv = dev->dev_private; 522 523 if (USE_SEMA(dev)) { 524 drm_mm_takedown(&dev_priv->fence.heap); 525 nouveau_bo_unmap(dev_priv->fence.bo); 526 nouveau_bo_unpin(dev_priv->fence.bo); 527 nouveau_bo_ref(NULL, &dev_priv->fence.bo); 528 } 529 } 530