1 /*
2  * Copyright (C) 2007 Ben Skeggs.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial
15  * portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  */
26 
27 #include "drmP.h"
28 #include "drm.h"
29 
30 #include "nouveau_drv.h"
31 #include "nouveau_ramht.h"
32 #include "nouveau_dma.h"
33 
34 #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
35 #define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17 && \
36 		       nouveau_private(dev)->card_type < NV_C0)
37 
38 struct nouveau_fence {
39 	struct nouveau_channel *channel;
40 	struct kref refcount;
41 	struct list_head entry;
42 
43 	uint32_t sequence;
44 	bool signalled;
45 
46 	void (*work)(void *priv, bool signalled);
47 	void *priv;
48 };
49 
50 struct nouveau_semaphore {
51 	struct kref ref;
52 	struct drm_device *dev;
53 	struct drm_mm_node *mem;
54 };
55 
56 static inline struct nouveau_fence *
57 nouveau_fence(void *sync_obj)
58 {
59 	return (struct nouveau_fence *)sync_obj;
60 }
61 
62 static void
63 nouveau_fence_del(struct kref *ref)
64 {
65 	struct nouveau_fence *fence =
66 		container_of(ref, struct nouveau_fence, refcount);
67 
68 	nouveau_channel_ref(NULL, &fence->channel);
69 	kfree(fence);
70 }
71 
72 void
73 nouveau_fence_update(struct nouveau_channel *chan)
74 {
75 	struct drm_device *dev = chan->dev;
76 	struct nouveau_fence *tmp, *fence;
77 	uint32_t sequence;
78 
79 	spin_lock(&chan->fence.lock);
80 
81 	/* Fetch the last sequence if the channel is still up and running */
82 	if (likely(!list_empty(&chan->fence.pending))) {
83 		if (USE_REFCNT(dev))
84 			sequence = nvchan_rd32(chan, 0x48);
85 		else
86 			sequence = atomic_read(&chan->fence.last_sequence_irq);
87 
88 		if (chan->fence.sequence_ack == sequence)
89 			goto out;
90 		chan->fence.sequence_ack = sequence;
91 	}
92 
93 	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
94 		sequence = fence->sequence;
95 		fence->signalled = true;
96 		list_del(&fence->entry);
97 
98 		if (unlikely(fence->work))
99 			fence->work(fence->priv, true);
100 
101 		kref_put(&fence->refcount, nouveau_fence_del);
102 
103 		if (sequence == chan->fence.sequence_ack)
104 			break;
105 	}
106 out:
107 	spin_unlock(&chan->fence.lock);
108 }
109 
110 int
111 nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence,
112 		  bool emit)
113 {
114 	struct nouveau_fence *fence;
115 	int ret = 0;
116 
117 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
118 	if (!fence)
119 		return -ENOMEM;
120 	kref_init(&fence->refcount);
121 	nouveau_channel_ref(chan, &fence->channel);
122 
123 	if (emit)
124 		ret = nouveau_fence_emit(fence);
125 
126 	if (ret)
127 		nouveau_fence_unref(&fence);
128 	*pfence = fence;
129 	return ret;
130 }
131 
132 struct nouveau_channel *
133 nouveau_fence_channel(struct nouveau_fence *fence)
134 {
135 	return fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
136 }
137 
138 int
139 nouveau_fence_emit(struct nouveau_fence *fence)
140 {
141 	struct nouveau_channel *chan = fence->channel;
142 	struct drm_device *dev = chan->dev;
143 	struct drm_nouveau_private *dev_priv = dev->dev_private;
144 	int ret;
145 
146 	ret = RING_SPACE(chan, 2);
147 	if (ret)
148 		return ret;
149 
150 	if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) {
151 		nouveau_fence_update(chan);
152 
153 		BUG_ON(chan->fence.sequence ==
154 		       chan->fence.sequence_ack - 1);
155 	}
156 
157 	fence->sequence = ++chan->fence.sequence;
158 
159 	kref_get(&fence->refcount);
160 	spin_lock(&chan->fence.lock);
161 	list_add_tail(&fence->entry, &chan->fence.pending);
162 	spin_unlock(&chan->fence.lock);
163 
164 	if (USE_REFCNT(dev)) {
165 		if (dev_priv->card_type < NV_C0)
166 			BEGIN_RING(chan, NvSubSw, 0x0050, 1);
167 		else
168 			BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0050, 1);
169 	} else {
170 		BEGIN_RING(chan, NvSubSw, 0x0150, 1);
171 	}
172 	OUT_RING (chan, fence->sequence);
173 	FIRE_RING(chan);
174 
175 	return 0;
176 }
177 
178 void
179 nouveau_fence_work(struct nouveau_fence *fence,
180 		   void (*work)(void *priv, bool signalled),
181 		   void *priv)
182 {
183 	BUG_ON(fence->work);
184 
185 	spin_lock(&fence->channel->fence.lock);
186 
187 	if (fence->signalled) {
188 		work(priv, true);
189 	} else {
190 		fence->work = work;
191 		fence->priv = priv;
192 	}
193 
194 	spin_unlock(&fence->channel->fence.lock);
195 }
196 
197 void
198 __nouveau_fence_unref(void **sync_obj)
199 {
200 	struct nouveau_fence *fence = nouveau_fence(*sync_obj);
201 
202 	if (fence)
203 		kref_put(&fence->refcount, nouveau_fence_del);
204 	*sync_obj = NULL;
205 }
206 
207 void *
208 __nouveau_fence_ref(void *sync_obj)
209 {
210 	struct nouveau_fence *fence = nouveau_fence(sync_obj);
211 
212 	kref_get(&fence->refcount);
213 	return sync_obj;
214 }
215 
216 bool
217 __nouveau_fence_signalled(void *sync_obj, void *sync_arg)
218 {
219 	struct nouveau_fence *fence = nouveau_fence(sync_obj);
220 	struct nouveau_channel *chan = fence->channel;
221 
222 	if (fence->signalled)
223 		return true;
224 
225 	nouveau_fence_update(chan);
226 	return fence->signalled;
227 }
228 
229 int
230 __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
231 {
232 	unsigned long timeout = jiffies + (3 * DRM_HZ);
233 	unsigned long sleep_time = jiffies + 1;
234 	int ret = 0;
235 
236 	while (1) {
237 		if (__nouveau_fence_signalled(sync_obj, sync_arg))
238 			break;
239 
240 		if (time_after_eq(jiffies, timeout)) {
241 			ret = -EBUSY;
242 			break;
243 		}
244 
245 		__set_current_state(intr ? TASK_INTERRUPTIBLE
246 			: TASK_UNINTERRUPTIBLE);
247 		if (lazy && time_after_eq(jiffies, sleep_time))
248 			schedule_timeout(1);
249 
250 		if (intr && signal_pending(current)) {
251 			ret = -ERESTARTSYS;
252 			break;
253 		}
254 	}
255 
256 	__set_current_state(TASK_RUNNING);
257 
258 	return ret;
259 }
260 
261 static struct nouveau_semaphore *
262 alloc_semaphore(struct drm_device *dev)
263 {
264 	struct drm_nouveau_private *dev_priv = dev->dev_private;
265 	struct nouveau_semaphore *sema;
266 	int ret;
267 
268 	if (!USE_SEMA(dev))
269 		return NULL;
270 
271 	sema = kmalloc(sizeof(*sema), GFP_KERNEL);
272 	if (!sema)
273 		goto fail;
274 
275 	ret = drm_mm_pre_get(&dev_priv->fence.heap);
276 	if (ret)
277 		goto fail;
278 
279 	spin_lock(&dev_priv->fence.lock);
280 	sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
281 	if (sema->mem)
282 		sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
283 	spin_unlock(&dev_priv->fence.lock);
284 
285 	if (!sema->mem)
286 		goto fail;
287 
288 	kref_init(&sema->ref);
289 	sema->dev = dev;
290 	nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
291 
292 	return sema;
293 fail:
294 	kfree(sema);
295 	return NULL;
296 }
297 
298 static void
299 free_semaphore(struct kref *ref)
300 {
301 	struct nouveau_semaphore *sema =
302 		container_of(ref, struct nouveau_semaphore, ref);
303 	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
304 
305 	spin_lock(&dev_priv->fence.lock);
306 	drm_mm_put_block(sema->mem);
307 	spin_unlock(&dev_priv->fence.lock);
308 
309 	kfree(sema);
310 }
311 
312 static void
313 semaphore_work(void *priv, bool signalled)
314 {
315 	struct nouveau_semaphore *sema = priv;
316 	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
317 
318 	if (unlikely(!signalled))
319 		nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
320 
321 	kref_put(&sema->ref, free_semaphore);
322 }
323 
324 static int
325 emit_semaphore(struct nouveau_channel *chan, int method,
326 	       struct nouveau_semaphore *sema)
327 {
328 	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
329 	struct nouveau_fence *fence;
330 	bool smart = (dev_priv->card_type >= NV_50);
331 	int ret;
332 
333 	ret = RING_SPACE(chan, smart ? 8 : 4);
334 	if (ret)
335 		return ret;
336 
337 	if (smart) {
338 		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
339 		OUT_RING(chan, NvSema);
340 	}
341 	BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
342 	OUT_RING(chan, sema->mem->start);
343 
344 	if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
345 		/*
346 		 * NV50 tries to be too smart and context-switch
347 		 * between semaphores instead of doing a "first come,
348 		 * first served" strategy like previous cards
349 		 * do.
350 		 *
351 		 * That's bad because the ACQUIRE latency can get as
352 		 * large as the PFIFO context time slice in the
353 		 * typical DRI2 case where you have several
354 		 * outstanding semaphores at the same moment.
355 		 *
356 		 * If we're going to ACQUIRE, force the card to
357 		 * context switch before, just in case the matching
358 		 * RELEASE is already scheduled to be executed in
359 		 * another channel.
360 		 */
361 		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
362 		OUT_RING(chan, 0);
363 	}
364 
365 	BEGIN_RING(chan, NvSubSw, method, 1);
366 	OUT_RING(chan, 1);
367 
368 	if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
369 		/*
370 		 * Force the card to context switch, there may be
371 		 * another channel waiting for the semaphore we just
372 		 * released.
373 		 */
374 		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
375 		OUT_RING(chan, 0);
376 	}
377 
378 	/* Delay semaphore destruction until its work is done */
379 	ret = nouveau_fence_new(chan, &fence, true);
380 	if (ret)
381 		return ret;
382 
383 	kref_get(&sema->ref);
384 	nouveau_fence_work(fence, semaphore_work, sema);
385 	nouveau_fence_unref(&fence);
386 
387 	return 0;
388 }
389 
390 int
391 nouveau_fence_sync(struct nouveau_fence *fence,
392 		   struct nouveau_channel *wchan)
393 {
394 	struct nouveau_channel *chan = nouveau_fence_channel(fence);
395 	struct drm_device *dev = wchan->dev;
396 	struct nouveau_semaphore *sema;
397 	int ret = 0;
398 
399 	if (likely(!chan || chan == wchan ||
400 		   nouveau_fence_signalled(fence)))
401 		goto out;
402 
403 	sema = alloc_semaphore(dev);
404 	if (!sema) {
405 		/* Early card or broken userspace, fall back to
406 		 * software sync. */
407 		ret = nouveau_fence_wait(fence, true, false);
408 		goto out;
409 	}
410 
411 	/* try to take chan's mutex, if we can't take it right away
412 	 * we have to fallback to software sync to prevent locking
413 	 * order issues
414 	 */
415 	if (!mutex_trylock(&chan->mutex)) {
416 		ret = nouveau_fence_wait(fence, true, false);
417 		goto out_unref;
418 	}
419 
420 	/* Make wchan wait until it gets signalled */
421 	ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
422 	if (ret)
423 		goto out_unlock;
424 
425 	/* Signal the semaphore from chan */
426 	ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
427 
428 out_unlock:
429 	mutex_unlock(&chan->mutex);
430 out_unref:
431 	kref_put(&sema->ref, free_semaphore);
432 out:
433 	if (chan)
434 		nouveau_channel_put_unlocked(&chan);
435 	return ret;
436 }
437 
438 int
439 __nouveau_fence_flush(void *sync_obj, void *sync_arg)
440 {
441 	return 0;
442 }
443 
444 int
445 nouveau_fence_channel_init(struct nouveau_channel *chan)
446 {
447 	struct drm_device *dev = chan->dev;
448 	struct drm_nouveau_private *dev_priv = dev->dev_private;
449 	struct nouveau_gpuobj *obj = NULL;
450 	int ret;
451 
452 	/* Create an NV_SW object for various sync purposes */
453 	ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
454 	if (ret)
455 		return ret;
456 
457 	/* we leave subchannel empty for nvc0 */
458 	if (dev_priv->card_type < NV_C0) {
459 		ret = RING_SPACE(chan, 2);
460 		if (ret)
461 			return ret;
462 		BEGIN_RING(chan, NvSubSw, 0, 1);
463 		OUT_RING(chan, NvSw);
464 	}
465 
466 	/* Create a DMA object for the shared cross-channel sync area. */
467 	if (USE_SEMA(dev)) {
468 		struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
469 
470 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
471 					     mem->start << PAGE_SHIFT,
472 					     mem->size, NV_MEM_ACCESS_RW,
473 					     NV_MEM_TARGET_VRAM, &obj);
474 		if (ret)
475 			return ret;
476 
477 		ret = nouveau_ramht_insert(chan, NvSema, obj);
478 		nouveau_gpuobj_ref(NULL, &obj);
479 		if (ret)
480 			return ret;
481 
482 		ret = RING_SPACE(chan, 2);
483 		if (ret)
484 			return ret;
485 		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
486 		OUT_RING(chan, NvSema);
487 	}
488 
489 	FIRE_RING(chan);
490 
491 	INIT_LIST_HEAD(&chan->fence.pending);
492 	spin_lock_init(&chan->fence.lock);
493 	atomic_set(&chan->fence.last_sequence_irq, 0);
494 
495 	return 0;
496 }
497 
498 void
499 nouveau_fence_channel_fini(struct nouveau_channel *chan)
500 {
501 	struct nouveau_fence *tmp, *fence;
502 
503 	spin_lock(&chan->fence.lock);
504 
505 	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
506 		fence->signalled = true;
507 		list_del(&fence->entry);
508 
509 		if (unlikely(fence->work))
510 			fence->work(fence->priv, false);
511 
512 		kref_put(&fence->refcount, nouveau_fence_del);
513 	}
514 
515 	spin_unlock(&chan->fence.lock);
516 }
517 
518 int
519 nouveau_fence_init(struct drm_device *dev)
520 {
521 	struct drm_nouveau_private *dev_priv = dev->dev_private;
522 	int ret;
523 
524 	/* Create a shared VRAM heap for cross-channel sync. */
525 	if (USE_SEMA(dev)) {
526 		ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM,
527 				     0, 0, false, true, &dev_priv->fence.bo);
528 		if (ret)
529 			return ret;
530 
531 		ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM);
532 		if (ret)
533 			goto fail;
534 
535 		ret = nouveau_bo_map(dev_priv->fence.bo);
536 		if (ret)
537 			goto fail;
538 
539 		ret = drm_mm_init(&dev_priv->fence.heap, 0,
540 				  dev_priv->fence.bo->bo.mem.size);
541 		if (ret)
542 			goto fail;
543 
544 		spin_lock_init(&dev_priv->fence.lock);
545 	}
546 
547 	return 0;
548 fail:
549 	nouveau_bo_unmap(dev_priv->fence.bo);
550 	nouveau_bo_ref(NULL, &dev_priv->fence.bo);
551 	return ret;
552 }
553 
554 void
555 nouveau_fence_fini(struct drm_device *dev)
556 {
557 	struct drm_nouveau_private *dev_priv = dev->dev_private;
558 
559 	if (USE_SEMA(dev)) {
560 		drm_mm_takedown(&dev_priv->fence.heap);
561 		nouveau_bo_unmap(dev_priv->fence.bo);
562 		nouveau_bo_unpin(dev_priv->fence.bo);
563 		nouveau_bo_ref(NULL, &dev_priv->fence.bo);
564 	}
565 }
566