xref: /openbmc/linux/drivers/gpu/host1x/job.c (revision 2359ccdd)
1 /*
2  * Tegra host1x Job
3  *
4  * Copyright (c) 2010-2015, NVIDIA Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <linux/dma-mapping.h>
20 #include <linux/err.h>
21 #include <linux/host1x.h>
22 #include <linux/kref.h>
23 #include <linux/module.h>
24 #include <linux/scatterlist.h>
25 #include <linux/slab.h>
26 #include <linux/vmalloc.h>
27 #include <trace/events/host1x.h>
28 
29 #include "channel.h"
30 #include "dev.h"
31 #include "job.h"
32 #include "syncpt.h"
33 
34 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
35 
36 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
37 				    u32 num_cmdbufs, u32 num_relocs,
38 				    u32 num_waitchks)
39 {
40 	struct host1x_job *job = NULL;
41 	unsigned int num_unpins = num_cmdbufs + num_relocs;
42 	u64 total;
43 	void *mem;
44 
45 	/* Check that we're not going to overflow */
46 	total = sizeof(struct host1x_job) +
47 		(u64)num_relocs * sizeof(struct host1x_reloc) +
48 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
49 		(u64)num_waitchks * sizeof(struct host1x_waitchk) +
50 		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
51 		(u64)num_unpins * sizeof(dma_addr_t) +
52 		(u64)num_unpins * sizeof(u32 *);
53 	if (total > ULONG_MAX)
54 		return NULL;
55 
56 	mem = job = kzalloc(total, GFP_KERNEL);
57 	if (!job)
58 		return NULL;
59 
60 	kref_init(&job->ref);
61 	job->channel = ch;
62 
63 	/* Redistribute memory to the structs  */
64 	mem += sizeof(struct host1x_job);
65 	job->relocarray = num_relocs ? mem : NULL;
66 	mem += num_relocs * sizeof(struct host1x_reloc);
67 	job->unpins = num_unpins ? mem : NULL;
68 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
69 	job->waitchk = num_waitchks ? mem : NULL;
70 	mem += num_waitchks * sizeof(struct host1x_waitchk);
71 	job->gathers = num_cmdbufs ? mem : NULL;
72 	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
73 	job->addr_phys = num_unpins ? mem : NULL;
74 
75 	job->reloc_addr_phys = job->addr_phys;
76 	job->gather_addr_phys = &job->addr_phys[num_relocs];
77 
78 	return job;
79 }
80 EXPORT_SYMBOL(host1x_job_alloc);
81 
82 struct host1x_job *host1x_job_get(struct host1x_job *job)
83 {
84 	kref_get(&job->ref);
85 	return job;
86 }
87 EXPORT_SYMBOL(host1x_job_get);
88 
89 static void job_free(struct kref *ref)
90 {
91 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
92 
93 	kfree(job);
94 }
95 
96 void host1x_job_put(struct host1x_job *job)
97 {
98 	kref_put(&job->ref, job_free);
99 }
100 EXPORT_SYMBOL(host1x_job_put);
101 
102 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
103 			   u32 words, u32 offset)
104 {
105 	struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers];
106 
107 	cur_gather->words = words;
108 	cur_gather->bo = bo;
109 	cur_gather->offset = offset;
110 	job->num_gathers++;
111 }
112 EXPORT_SYMBOL(host1x_job_add_gather);
113 
114 /*
115  * NULL an already satisfied WAIT_SYNCPT host method, by patching its
116  * args in the command stream. The method data is changed to reference
117  * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt
118  * with a matching threshold value of 0, so is guaranteed to be popped
119  * by the host HW.
120  */
121 static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
122 				       struct host1x_bo *h, u32 offset)
123 {
124 	void *patch_addr = NULL;
125 
126 	/* patch the wait */
127 	patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT);
128 	if (patch_addr) {
129 		host1x_syncpt_patch_wait(sp,
130 					 patch_addr + (offset & ~PAGE_MASK));
131 		host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr);
132 	} else
133 		pr_err("Could not map cmdbuf for wait check\n");
134 }
135 
136 /*
137  * Check driver supplied waitchk structs for syncpt thresholds
138  * that have already been satisfied and NULL the comparison (to
139  * avoid a wrap condition in the HW).
140  */
141 static int do_waitchks(struct host1x_job *job, struct host1x *host,
142 		       struct host1x_job_gather *g)
143 {
144 	struct host1x_bo *patch = g->bo;
145 	int i;
146 
147 	/* compare syncpt vs wait threshold */
148 	for (i = 0; i < job->num_waitchk; i++) {
149 		struct host1x_waitchk *wait = &job->waitchk[i];
150 		struct host1x_syncpt *sp =
151 			host1x_syncpt_get(host, wait->syncpt_id);
152 
153 		/* validate syncpt id */
154 		if (wait->syncpt_id > host1x_syncpt_nb_pts(host))
155 			continue;
156 
157 		/* skip all other gathers */
158 		if (patch != wait->bo)
159 			continue;
160 
161 		trace_host1x_syncpt_wait_check(wait->bo, wait->offset,
162 					       wait->syncpt_id, wait->thresh,
163 					       host1x_syncpt_read_min(sp));
164 
165 		if (host1x_syncpt_is_expired(sp, wait->thresh)) {
166 			dev_dbg(host->dev,
167 				"drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n",
168 				wait->syncpt_id, sp->name, wait->thresh,
169 				host1x_syncpt_read_min(sp));
170 
171 			host1x_syncpt_patch_offset(sp, patch,
172 						   g->offset + wait->offset);
173 		}
174 
175 		wait->bo = NULL;
176 	}
177 
178 	return 0;
179 }
180 
181 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
182 {
183 	unsigned int i;
184 	int err;
185 
186 	job->num_unpins = 0;
187 
188 	for (i = 0; i < job->num_relocs; i++) {
189 		struct host1x_reloc *reloc = &job->relocarray[i];
190 		struct sg_table *sgt;
191 		dma_addr_t phys_addr;
192 
193 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
194 		if (!reloc->target.bo) {
195 			err = -EINVAL;
196 			goto unpin;
197 		}
198 
199 		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
200 
201 		job->addr_phys[job->num_unpins] = phys_addr;
202 		job->unpins[job->num_unpins].bo = reloc->target.bo;
203 		job->unpins[job->num_unpins].sgt = sgt;
204 		job->num_unpins++;
205 	}
206 
207 	for (i = 0; i < job->num_gathers; i++) {
208 		struct host1x_job_gather *g = &job->gathers[i];
209 		size_t gather_size = 0;
210 		struct scatterlist *sg;
211 		struct sg_table *sgt;
212 		dma_addr_t phys_addr;
213 		unsigned long shift;
214 		struct iova *alloc;
215 		unsigned int j;
216 
217 		g->bo = host1x_bo_get(g->bo);
218 		if (!g->bo) {
219 			err = -EINVAL;
220 			goto unpin;
221 		}
222 
223 		phys_addr = host1x_bo_pin(g->bo, &sgt);
224 
225 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
226 			for_each_sg(sgt->sgl, sg, sgt->nents, j)
227 				gather_size += sg->length;
228 			gather_size = iova_align(&host->iova, gather_size);
229 
230 			shift = iova_shift(&host->iova);
231 			alloc = alloc_iova(&host->iova, gather_size >> shift,
232 					   host->iova_end >> shift, true);
233 			if (!alloc) {
234 				err = -ENOMEM;
235 				goto unpin;
236 			}
237 
238 			err = iommu_map_sg(host->domain,
239 					iova_dma_addr(&host->iova, alloc),
240 					sgt->sgl, sgt->nents, IOMMU_READ);
241 			if (err == 0) {
242 				__free_iova(&host->iova, alloc);
243 				err = -EINVAL;
244 				goto unpin;
245 			}
246 
247 			job->addr_phys[job->num_unpins] =
248 				iova_dma_addr(&host->iova, alloc);
249 			job->unpins[job->num_unpins].size = gather_size;
250 		} else {
251 			job->addr_phys[job->num_unpins] = phys_addr;
252 		}
253 
254 		job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
255 
256 		job->unpins[job->num_unpins].bo = g->bo;
257 		job->unpins[job->num_unpins].sgt = sgt;
258 		job->num_unpins++;
259 	}
260 
261 	return 0;
262 
263 unpin:
264 	host1x_job_unpin(job);
265 	return err;
266 }
267 
268 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
269 {
270 	int i = 0;
271 	u32 last_page = ~0;
272 	void *cmdbuf_page_addr = NULL;
273 	struct host1x_bo *cmdbuf = g->bo;
274 
275 	/* pin & patch the relocs for one gather */
276 	for (i = 0; i < job->num_relocs; i++) {
277 		struct host1x_reloc *reloc = &job->relocarray[i];
278 		u32 reloc_addr = (job->reloc_addr_phys[i] +
279 				  reloc->target.offset) >> reloc->shift;
280 		u32 *target;
281 
282 		/* skip all other gathers */
283 		if (cmdbuf != reloc->cmdbuf.bo)
284 			continue;
285 
286 		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
287 			target = (u32 *)job->gather_copy_mapped +
288 					reloc->cmdbuf.offset / sizeof(u32) +
289 						g->offset / sizeof(u32);
290 			goto patch_reloc;
291 		}
292 
293 		if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
294 			if (cmdbuf_page_addr)
295 				host1x_bo_kunmap(cmdbuf, last_page,
296 						 cmdbuf_page_addr);
297 
298 			cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
299 					reloc->cmdbuf.offset >> PAGE_SHIFT);
300 			last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
301 
302 			if (unlikely(!cmdbuf_page_addr)) {
303 				pr_err("Could not map cmdbuf for relocation\n");
304 				return -ENOMEM;
305 			}
306 		}
307 
308 		target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
309 patch_reloc:
310 		*target = reloc_addr;
311 	}
312 
313 	if (cmdbuf_page_addr)
314 		host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
315 
316 	return 0;
317 }
318 
319 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
320 			unsigned int offset)
321 {
322 	offset *= sizeof(u32);
323 
324 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
325 		return false;
326 
327 	/* relocation shift value validation isn't implemented yet */
328 	if (reloc->shift)
329 		return false;
330 
331 	return true;
332 }
333 
334 static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf,
335 		       unsigned int offset)
336 {
337 	offset *= sizeof(u32);
338 
339 	if (wait->bo != cmdbuf || wait->offset != offset)
340 		return false;
341 
342 	return true;
343 }
344 
345 struct host1x_firewall {
346 	struct host1x_job *job;
347 	struct device *dev;
348 
349 	unsigned int num_relocs;
350 	struct host1x_reloc *reloc;
351 
352 	unsigned int num_waitchks;
353 	struct host1x_waitchk *waitchk;
354 
355 	struct host1x_bo *cmdbuf;
356 	unsigned int offset;
357 
358 	u32 words;
359 	u32 class;
360 	u32 reg;
361 	u32 mask;
362 	u32 count;
363 };
364 
365 static int check_register(struct host1x_firewall *fw, unsigned long offset)
366 {
367 	if (!fw->job->is_addr_reg)
368 		return 0;
369 
370 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
371 		if (!fw->num_relocs)
372 			return -EINVAL;
373 
374 		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
375 			return -EINVAL;
376 
377 		fw->num_relocs--;
378 		fw->reloc++;
379 	}
380 
381 	if (offset == HOST1X_WAIT_SYNCPT_OFFSET) {
382 		if (fw->class != HOST1X_CLASS_HOST1X)
383 			return -EINVAL;
384 
385 		if (!fw->num_waitchks)
386 			return -EINVAL;
387 
388 		if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset))
389 			return -EINVAL;
390 
391 		fw->num_waitchks--;
392 		fw->waitchk++;
393 	}
394 
395 	return 0;
396 }
397 
398 static int check_class(struct host1x_firewall *fw, u32 class)
399 {
400 	if (!fw->job->is_valid_class) {
401 		if (fw->class != class)
402 			return -EINVAL;
403 	} else {
404 		if (!fw->job->is_valid_class(fw->class))
405 			return -EINVAL;
406 	}
407 
408 	return 0;
409 }
410 
411 static int check_mask(struct host1x_firewall *fw)
412 {
413 	u32 mask = fw->mask;
414 	u32 reg = fw->reg;
415 	int ret;
416 
417 	while (mask) {
418 		if (fw->words == 0)
419 			return -EINVAL;
420 
421 		if (mask & 1) {
422 			ret = check_register(fw, reg);
423 			if (ret < 0)
424 				return ret;
425 
426 			fw->words--;
427 			fw->offset++;
428 		}
429 		mask >>= 1;
430 		reg++;
431 	}
432 
433 	return 0;
434 }
435 
436 static int check_incr(struct host1x_firewall *fw)
437 {
438 	u32 count = fw->count;
439 	u32 reg = fw->reg;
440 	int ret;
441 
442 	while (count) {
443 		if (fw->words == 0)
444 			return -EINVAL;
445 
446 		ret = check_register(fw, reg);
447 		if (ret < 0)
448 			return ret;
449 
450 		reg++;
451 		fw->words--;
452 		fw->offset++;
453 		count--;
454 	}
455 
456 	return 0;
457 }
458 
459 static int check_nonincr(struct host1x_firewall *fw)
460 {
461 	u32 count = fw->count;
462 	int ret;
463 
464 	while (count) {
465 		if (fw->words == 0)
466 			return -EINVAL;
467 
468 		ret = check_register(fw, fw->reg);
469 		if (ret < 0)
470 			return ret;
471 
472 		fw->words--;
473 		fw->offset++;
474 		count--;
475 	}
476 
477 	return 0;
478 }
479 
480 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
481 {
482 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
483 		(g->offset / sizeof(u32));
484 	u32 job_class = fw->class;
485 	int err = 0;
486 
487 	fw->words = g->words;
488 	fw->cmdbuf = g->bo;
489 	fw->offset = 0;
490 
491 	while (fw->words && !err) {
492 		u32 word = cmdbuf_base[fw->offset];
493 		u32 opcode = (word & 0xf0000000) >> 28;
494 
495 		fw->mask = 0;
496 		fw->reg = 0;
497 		fw->count = 0;
498 		fw->words--;
499 		fw->offset++;
500 
501 		switch (opcode) {
502 		case 0:
503 			fw->class = word >> 6 & 0x3ff;
504 			fw->mask = word & 0x3f;
505 			fw->reg = word >> 16 & 0xfff;
506 			err = check_class(fw, job_class);
507 			if (!err)
508 				err = check_mask(fw);
509 			if (err)
510 				goto out;
511 			break;
512 		case 1:
513 			fw->reg = word >> 16 & 0xfff;
514 			fw->count = word & 0xffff;
515 			err = check_incr(fw);
516 			if (err)
517 				goto out;
518 			break;
519 
520 		case 2:
521 			fw->reg = word >> 16 & 0xfff;
522 			fw->count = word & 0xffff;
523 			err = check_nonincr(fw);
524 			if (err)
525 				goto out;
526 			break;
527 
528 		case 3:
529 			fw->mask = word & 0xffff;
530 			fw->reg = word >> 16 & 0xfff;
531 			err = check_mask(fw);
532 			if (err)
533 				goto out;
534 			break;
535 		case 4:
536 		case 14:
537 			break;
538 		default:
539 			err = -EINVAL;
540 			break;
541 		}
542 	}
543 
544 out:
545 	return err;
546 }
547 
548 static inline int copy_gathers(struct host1x_job *job, struct device *dev)
549 {
550 	struct host1x_firewall fw;
551 	size_t size = 0;
552 	size_t offset = 0;
553 	int i;
554 
555 	fw.job = job;
556 	fw.dev = dev;
557 	fw.reloc = job->relocarray;
558 	fw.num_relocs = job->num_relocs;
559 	fw.waitchk = job->waitchk;
560 	fw.num_waitchks = job->num_waitchk;
561 	fw.class = job->class;
562 
563 	for (i = 0; i < job->num_gathers; i++) {
564 		struct host1x_job_gather *g = &job->gathers[i];
565 
566 		size += g->words * sizeof(u32);
567 	}
568 
569 	/*
570 	 * Try a non-blocking allocation from a higher priority pools first,
571 	 * as awaiting for the allocation here is a major performance hit.
572 	 */
573 	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
574 					       GFP_NOWAIT);
575 
576 	/* the higher priority allocation failed, try the generic-blocking */
577 	if (!job->gather_copy_mapped)
578 		job->gather_copy_mapped = dma_alloc_wc(dev, size,
579 						       &job->gather_copy,
580 						       GFP_KERNEL);
581 	if (!job->gather_copy_mapped)
582 		return -ENOMEM;
583 
584 	job->gather_copy_size = size;
585 
586 	for (i = 0; i < job->num_gathers; i++) {
587 		struct host1x_job_gather *g = &job->gathers[i];
588 		void *gather;
589 
590 		/* Copy the gather */
591 		gather = host1x_bo_mmap(g->bo);
592 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
593 		       g->words * sizeof(u32));
594 		host1x_bo_munmap(g->bo, gather);
595 
596 		/* Store the location in the buffer */
597 		g->base = job->gather_copy;
598 		g->offset = offset;
599 
600 		/* Validate the job */
601 		if (validate(&fw, g))
602 			return -EINVAL;
603 
604 		offset += g->words * sizeof(u32);
605 	}
606 
607 	/* No relocs and waitchks should remain at this point */
608 	if (fw.num_relocs || fw.num_waitchks)
609 		return -EINVAL;
610 
611 	return 0;
612 }
613 
614 int host1x_job_pin(struct host1x_job *job, struct device *dev)
615 {
616 	int err;
617 	unsigned int i, j;
618 	struct host1x *host = dev_get_drvdata(dev->parent);
619 	DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
620 
621 	bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
622 	for (i = 0; i < job->num_waitchk; i++) {
623 		u32 syncpt_id = job->waitchk[i].syncpt_id;
624 
625 		if (syncpt_id < host1x_syncpt_nb_pts(host))
626 			set_bit(syncpt_id, waitchk_mask);
627 	}
628 
629 	/* get current syncpt values for waitchk */
630 	for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host))
631 		host1x_syncpt_load(host->syncpt + i);
632 
633 	/* pin memory */
634 	err = pin_job(host, job);
635 	if (err)
636 		goto out;
637 
638 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
639 		err = copy_gathers(job, dev);
640 		if (err)
641 			goto out;
642 	}
643 
644 	/* patch gathers */
645 	for (i = 0; i < job->num_gathers; i++) {
646 		struct host1x_job_gather *g = &job->gathers[i];
647 
648 		/* process each gather mem only once */
649 		if (g->handled)
650 			continue;
651 
652 		/* copy_gathers() sets gathers base if firewall is enabled */
653 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
654 			g->base = job->gather_addr_phys[i];
655 
656 		for (j = i + 1; j < job->num_gathers; j++) {
657 			if (job->gathers[j].bo == g->bo) {
658 				job->gathers[j].handled = true;
659 				job->gathers[j].base = g->base;
660 			}
661 		}
662 
663 		err = do_relocs(job, g);
664 		if (err)
665 			break;
666 
667 		err = do_waitchks(job, host, g);
668 		if (err)
669 			break;
670 	}
671 
672 out:
673 	if (err)
674 		host1x_job_unpin(job);
675 	wmb();
676 
677 	return err;
678 }
679 EXPORT_SYMBOL(host1x_job_pin);
680 
681 void host1x_job_unpin(struct host1x_job *job)
682 {
683 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
684 	unsigned int i;
685 
686 	for (i = 0; i < job->num_unpins; i++) {
687 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
688 
689 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
690 			iommu_unmap(host->domain, job->addr_phys[i],
691 				    unpin->size);
692 			free_iova(&host->iova,
693 				iova_pfn(&host->iova, job->addr_phys[i]));
694 		}
695 
696 		host1x_bo_unpin(unpin->bo, unpin->sgt);
697 		host1x_bo_put(unpin->bo);
698 	}
699 
700 	job->num_unpins = 0;
701 
702 	if (job->gather_copy_size)
703 		dma_free_wc(job->channel->dev, job->gather_copy_size,
704 			    job->gather_copy_mapped, job->gather_copy);
705 }
706 EXPORT_SYMBOL(host1x_job_unpin);
707 
708 /*
709  * Debug routine used to dump job entries
710  */
711 void host1x_job_dump(struct device *dev, struct host1x_job *job)
712 {
713 	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt_id);
714 	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
715 	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
716 	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
717 	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
718 	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
719 }
720