xref: /openbmc/linux/drivers/gpu/host1x/cdma.c (revision f79e4d5f92a129a1159c973735007d4ddc8541f3)
1 /*
2  * Tegra host1x Command DMA
3  *
4  * Copyright (c) 2010-2013, NVIDIA Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 
20 #include <asm/cacheflush.h>
21 #include <linux/device.h>
22 #include <linux/dma-mapping.h>
23 #include <linux/host1x.h>
24 #include <linux/interrupt.h>
25 #include <linux/kernel.h>
26 #include <linux/kfifo.h>
27 #include <linux/slab.h>
28 #include <trace/events/host1x.h>
29 
30 #include "cdma.h"
31 #include "channel.h"
32 #include "dev.h"
33 #include "debug.h"
34 #include "job.h"
35 
36 /*
37  * push_buffer
38  *
39  * The push buffer is a circular array of words to be fetched by command DMA.
40  * Note that it works slightly differently to the sync queue; fence == pos
41  * means that the push buffer is full, not empty.
42  */
43 
44 #define HOST1X_PUSHBUFFER_SLOTS	512
45 
46 /*
47  * Clean up push buffer resources
48  */
49 static void host1x_pushbuffer_destroy(struct push_buffer *pb)
50 {
51 	struct host1x_cdma *cdma = pb_to_cdma(pb);
52 	struct host1x *host1x = cdma_to_host1x(cdma);
53 
54 	if (!pb->mapped)
55 		return;
56 
57 	if (host1x->domain) {
58 		iommu_unmap(host1x->domain, pb->dma, pb->alloc_size);
59 		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
60 	}
61 
62 	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
63 
64 	pb->mapped = NULL;
65 	pb->phys = 0;
66 }
67 
68 /*
69  * Init push buffer resources
70  */
71 static int host1x_pushbuffer_init(struct push_buffer *pb)
72 {
73 	struct host1x_cdma *cdma = pb_to_cdma(pb);
74 	struct host1x *host1x = cdma_to_host1x(cdma);
75 	struct iova *alloc;
76 	u32 size;
77 	int err;
78 
79 	pb->mapped = NULL;
80 	pb->phys = 0;
81 	pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
82 
83 	size = pb->size + 4;
84 
85 	/* initialize buffer pointers */
86 	pb->fence = pb->size - 8;
87 	pb->pos = 0;
88 
89 	if (host1x->domain) {
90 		unsigned long shift;
91 
92 		size = iova_align(&host1x->iova, size);
93 
94 		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
95 					  GFP_KERNEL);
96 		if (!pb->mapped)
97 			return -ENOMEM;
98 
99 		shift = iova_shift(&host1x->iova);
100 		alloc = alloc_iova(&host1x->iova, size >> shift,
101 				   host1x->iova_end >> shift, true);
102 		if (!alloc) {
103 			err = -ENOMEM;
104 			goto iommu_free_mem;
105 		}
106 
107 		pb->dma = iova_dma_addr(&host1x->iova, alloc);
108 		err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
109 				IOMMU_READ);
110 		if (err)
111 			goto iommu_free_iova;
112 	} else {
113 		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
114 					  GFP_KERNEL);
115 		if (!pb->mapped)
116 			return -ENOMEM;
117 
118 		pb->dma = pb->phys;
119 	}
120 
121 	pb->alloc_size = size;
122 
123 	host1x_hw_pushbuffer_init(host1x, pb);
124 
125 	return 0;
126 
127 iommu_free_iova:
128 	__free_iova(&host1x->iova, alloc);
129 iommu_free_mem:
130 	dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);
131 
132 	return err;
133 }
134 
135 /*
136  * Push two words to the push buffer
137  * Caller must ensure push buffer is not full
138  */
139 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
140 {
141 	u32 *p = (u32 *)((void *)pb->mapped + pb->pos);
142 
143 	WARN_ON(pb->pos == pb->fence);
144 	*(p++) = op1;
145 	*(p++) = op2;
146 	pb->pos = (pb->pos + 8) & (pb->size - 1);
147 }
148 
149 /*
150  * Pop a number of two word slots from the push buffer
151  * Caller must ensure push buffer is not empty
152  */
153 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
154 {
155 	/* Advance the next write position */
156 	pb->fence = (pb->fence + slots * 8) & (pb->size - 1);
157 }
158 
159 /*
160  * Return the number of two word slots free in the push buffer
161  */
162 static u32 host1x_pushbuffer_space(struct push_buffer *pb)
163 {
164 	return ((pb->fence - pb->pos) & (pb->size - 1)) / 8;
165 }
166 
167 /*
168  * Sleep (if necessary) until the requested event happens
169  *   - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty.
170  *     - Returns 1
171  *   - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer
172  *     - Return the amount of space (> 0)
173  * Must be called with the cdma lock held.
174  */
175 unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
176 				     enum cdma_event event)
177 {
178 	for (;;) {
179 		struct push_buffer *pb = &cdma->push_buffer;
180 		unsigned int space;
181 
182 		switch (event) {
183 		case CDMA_EVENT_SYNC_QUEUE_EMPTY:
184 			space = list_empty(&cdma->sync_queue) ? 1 : 0;
185 			break;
186 
187 		case CDMA_EVENT_PUSH_BUFFER_SPACE:
188 			space = host1x_pushbuffer_space(pb);
189 			break;
190 
191 		default:
192 			WARN_ON(1);
193 			return -EINVAL;
194 		}
195 
196 		if (space)
197 			return space;
198 
199 		trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
200 				       event);
201 
202 		/* If somebody has managed to already start waiting, yield */
203 		if (cdma->event != CDMA_EVENT_NONE) {
204 			mutex_unlock(&cdma->lock);
205 			schedule();
206 			mutex_lock(&cdma->lock);
207 			continue;
208 		}
209 
210 		cdma->event = event;
211 
212 		mutex_unlock(&cdma->lock);
213 		down(&cdma->sem);
214 		mutex_lock(&cdma->lock);
215 	}
216 
217 	return 0;
218 }
219 
220 /*
221  * Start timer that tracks the time spent by the job.
222  * Must be called with the cdma lock held.
223  */
224 static void cdma_start_timer_locked(struct host1x_cdma *cdma,
225 				    struct host1x_job *job)
226 {
227 	struct host1x *host = cdma_to_host1x(cdma);
228 
229 	if (cdma->timeout.client) {
230 		/* timer already started */
231 		return;
232 	}
233 
234 	cdma->timeout.client = job->client;
235 	cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id);
236 	cdma->timeout.syncpt_val = job->syncpt_end;
237 	cdma->timeout.start_ktime = ktime_get();
238 
239 	schedule_delayed_work(&cdma->timeout.wq,
240 			      msecs_to_jiffies(job->timeout));
241 }
242 
243 /*
244  * Stop timer when a buffer submission completes.
245  * Must be called with the cdma lock held.
246  */
247 static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
248 {
249 	cancel_delayed_work(&cdma->timeout.wq);
250 	cdma->timeout.client = NULL;
251 }
252 
253 /*
254  * For all sync queue entries that have already finished according to the
255  * current sync point registers:
256  *  - unpin & unref their mems
257  *  - pop their push buffer slots
258  *  - remove them from the sync queue
259  * This is normally called from the host code's worker thread, but can be
260  * called manually if necessary.
261  * Must be called with the cdma lock held.
262  */
263 static void update_cdma_locked(struct host1x_cdma *cdma)
264 {
265 	bool signal = false;
266 	struct host1x *host1x = cdma_to_host1x(cdma);
267 	struct host1x_job *job, *n;
268 
269 	/* If CDMA is stopped, queue is cleared and we can return */
270 	if (!cdma->running)
271 		return;
272 
273 	/*
274 	 * Walk the sync queue, reading the sync point registers as necessary,
275 	 * to consume as many sync queue entries as possible without blocking
276 	 */
277 	list_for_each_entry_safe(job, n, &cdma->sync_queue, list) {
278 		struct host1x_syncpt *sp =
279 			host1x_syncpt_get(host1x, job->syncpt_id);
280 
281 		/* Check whether this syncpt has completed, and bail if not */
282 		if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) {
283 			/* Start timer on next pending syncpt */
284 			if (job->timeout)
285 				cdma_start_timer_locked(cdma, job);
286 
287 			break;
288 		}
289 
290 		/* Cancel timeout, when a buffer completes */
291 		if (cdma->timeout.client)
292 			stop_cdma_timer_locked(cdma);
293 
294 		/* Unpin the memory */
295 		host1x_job_unpin(job);
296 
297 		/* Pop push buffer slots */
298 		if (job->num_slots) {
299 			struct push_buffer *pb = &cdma->push_buffer;
300 
301 			host1x_pushbuffer_pop(pb, job->num_slots);
302 
303 			if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE)
304 				signal = true;
305 		}
306 
307 		list_del(&job->list);
308 		host1x_job_put(job);
309 	}
310 
311 	if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY &&
312 	    list_empty(&cdma->sync_queue))
313 		signal = true;
314 
315 	if (signal) {
316 		cdma->event = CDMA_EVENT_NONE;
317 		up(&cdma->sem);
318 	}
319 }
320 
321 void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
322 				   struct device *dev)
323 {
324 	struct host1x *host1x = cdma_to_host1x(cdma);
325 	u32 restart_addr, syncpt_incrs, syncpt_val;
326 	struct host1x_job *job = NULL;
327 
328 	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
329 
330 	dev_dbg(dev, "%s: starting cleanup (thresh %d)\n",
331 		__func__, syncpt_val);
332 
333 	/*
334 	 * Move the sync_queue read pointer to the first entry that hasn't
335 	 * completed based on the current HW syncpt value. It's likely there
336 	 * won't be any (i.e. we're still at the head), but covers the case
337 	 * where a syncpt incr happens just prior/during the teardown.
338 	 */
339 
340 	dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n",
341 		__func__);
342 
343 	list_for_each_entry(job, &cdma->sync_queue, list) {
344 		if (syncpt_val < job->syncpt_end)
345 			break;
346 
347 		host1x_job_dump(dev, job);
348 	}
349 
350 	/*
351 	 * Walk the sync_queue, first incrementing with the CPU syncpts that
352 	 * are partially executed (the first buffer) or fully skipped while
353 	 * still in the current context (slots are also NOP-ed).
354 	 *
355 	 * At the point contexts are interleaved, syncpt increments must be
356 	 * done inline with the pushbuffer from a GATHER buffer to maintain
357 	 * the order (slots are modified to be a GATHER of syncpt incrs).
358 	 *
359 	 * Note: save in restart_addr the location where the timed out buffer
360 	 * started in the PB, so we can start the refetch from there (with the
361 	 * modified NOP-ed PB slots). This lets things appear to have completed
362 	 * properly for this buffer and resources are freed.
363 	 */
364 
365 	dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n",
366 		__func__);
367 
368 	if (!list_empty(&cdma->sync_queue))
369 		restart_addr = job->first_get;
370 	else
371 		restart_addr = cdma->last_pos;
372 
373 	/* do CPU increments as long as this context continues */
374 	list_for_each_entry_from(job, &cdma->sync_queue, list) {
375 		/* different context, gets us out of this loop */
376 		if (job->client != cdma->timeout.client)
377 			break;
378 
379 		/* won't need a timeout when replayed */
380 		job->timeout = 0;
381 
382 		syncpt_incrs = job->syncpt_end - syncpt_val;
383 		dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs);
384 
385 		host1x_job_dump(dev, job);
386 
387 		/* safe to use CPU to incr syncpts */
388 		host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get,
389 						syncpt_incrs, job->syncpt_end,
390 						job->num_slots);
391 
392 		syncpt_val += syncpt_incrs;
393 	}
394 
395 	/*
396 	 * The following sumbits from the same client may be dependent on the
397 	 * failed submit and therefore they may fail. Force a small timeout
398 	 * to make the queue cleanup faster.
399 	 */
400 
401 	list_for_each_entry_from(job, &cdma->sync_queue, list)
402 		if (job->client == cdma->timeout.client)
403 			job->timeout = min_t(unsigned int, job->timeout, 500);
404 
405 	dev_dbg(dev, "%s: finished sync_queue modification\n", __func__);
406 
407 	/* roll back DMAGET and start up channel again */
408 	host1x_hw_cdma_resume(host1x, cdma, restart_addr);
409 }
410 
411 /*
412  * Create a cdma
413  */
414 int host1x_cdma_init(struct host1x_cdma *cdma)
415 {
416 	int err;
417 
418 	mutex_init(&cdma->lock);
419 	sema_init(&cdma->sem, 0);
420 
421 	INIT_LIST_HEAD(&cdma->sync_queue);
422 
423 	cdma->event = CDMA_EVENT_NONE;
424 	cdma->running = false;
425 	cdma->torndown = false;
426 
427 	err = host1x_pushbuffer_init(&cdma->push_buffer);
428 	if (err)
429 		return err;
430 
431 	return 0;
432 }
433 
434 /*
435  * Destroy a cdma
436  */
437 int host1x_cdma_deinit(struct host1x_cdma *cdma)
438 {
439 	struct push_buffer *pb = &cdma->push_buffer;
440 	struct host1x *host1x = cdma_to_host1x(cdma);
441 
442 	if (cdma->running) {
443 		pr_warn("%s: CDMA still running\n", __func__);
444 		return -EBUSY;
445 	}
446 
447 	host1x_pushbuffer_destroy(pb);
448 	host1x_hw_cdma_timeout_destroy(host1x, cdma);
449 
450 	return 0;
451 }
452 
453 /*
454  * Begin a cdma submit
455  */
456 int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
457 {
458 	struct host1x *host1x = cdma_to_host1x(cdma);
459 
460 	mutex_lock(&cdma->lock);
461 
462 	if (job->timeout) {
463 		/* init state on first submit with timeout value */
464 		if (!cdma->timeout.initialized) {
465 			int err;
466 
467 			err = host1x_hw_cdma_timeout_init(host1x, cdma,
468 							  job->syncpt_id);
469 			if (err) {
470 				mutex_unlock(&cdma->lock);
471 				return err;
472 			}
473 		}
474 	}
475 
476 	if (!cdma->running)
477 		host1x_hw_cdma_start(host1x, cdma);
478 
479 	cdma->slots_free = 0;
480 	cdma->slots_used = 0;
481 	cdma->first_get = cdma->push_buffer.pos;
482 
483 	trace_host1x_cdma_begin(dev_name(job->channel->dev));
484 	return 0;
485 }
486 
487 /*
488  * Push two words into a push buffer slot
489  * Blocks as necessary if the push buffer is full.
490  */
491 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
492 {
493 	struct host1x *host1x = cdma_to_host1x(cdma);
494 	struct push_buffer *pb = &cdma->push_buffer;
495 	u32 slots_free = cdma->slots_free;
496 
497 	if (host1x_debug_trace_cmdbuf)
498 		trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev),
499 				       op1, op2);
500 
501 	if (slots_free == 0) {
502 		host1x_hw_cdma_flush(host1x, cdma);
503 		slots_free = host1x_cdma_wait_locked(cdma,
504 						CDMA_EVENT_PUSH_BUFFER_SPACE);
505 	}
506 
507 	cdma->slots_free = slots_free - 1;
508 	cdma->slots_used++;
509 	host1x_pushbuffer_push(pb, op1, op2);
510 }
511 
512 /*
513  * End a cdma submit
514  * Kick off DMA, add job to the sync queue, and a number of slots to be freed
515  * from the pushbuffer. The handles for a submit must all be pinned at the same
516  * time, but they can be unpinned in smaller chunks.
517  */
518 void host1x_cdma_end(struct host1x_cdma *cdma,
519 		     struct host1x_job *job)
520 {
521 	struct host1x *host1x = cdma_to_host1x(cdma);
522 	bool idle = list_empty(&cdma->sync_queue);
523 
524 	host1x_hw_cdma_flush(host1x, cdma);
525 
526 	job->first_get = cdma->first_get;
527 	job->num_slots = cdma->slots_used;
528 	host1x_job_get(job);
529 	list_add_tail(&job->list, &cdma->sync_queue);
530 
531 	/* start timer on idle -> active transitions */
532 	if (job->timeout && idle)
533 		cdma_start_timer_locked(cdma, job);
534 
535 	trace_host1x_cdma_end(dev_name(job->channel->dev));
536 	mutex_unlock(&cdma->lock);
537 }
538 
539 /*
540  * Update cdma state according to current sync point values
541  */
542 void host1x_cdma_update(struct host1x_cdma *cdma)
543 {
544 	mutex_lock(&cdma->lock);
545 	update_cdma_locked(cdma);
546 	mutex_unlock(&cdma->lock);
547 }
548