xref: /openbmc/linux/drivers/gpu/host1x/job.c (revision 06490bb99e1840ab2b6814af7356e8b4ab0e3ee6)
1 /*
2  * Tegra host1x Job
3  *
4  * Copyright (c) 2010-2015, NVIDIA Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <linux/dma-mapping.h>
20 #include <linux/err.h>
21 #include <linux/host1x.h>
22 #include <linux/kref.h>
23 #include <linux/module.h>
24 #include <linux/scatterlist.h>
25 #include <linux/slab.h>
26 #include <linux/vmalloc.h>
27 #include <trace/events/host1x.h>
28 
29 #include "channel.h"
30 #include "dev.h"
31 #include "job.h"
32 #include "syncpt.h"
33 
34 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
35 
36 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
37 				    u32 num_cmdbufs, u32 num_relocs)
38 {
39 	struct host1x_job *job = NULL;
40 	unsigned int num_unpins = num_cmdbufs + num_relocs;
41 	u64 total;
42 	void *mem;
43 
44 	/* Check that we're not going to overflow */
45 	total = sizeof(struct host1x_job) +
46 		(u64)num_relocs * sizeof(struct host1x_reloc) +
47 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
48 		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
49 		(u64)num_unpins * sizeof(dma_addr_t) +
50 		(u64)num_unpins * sizeof(u32 *);
51 	if (total > ULONG_MAX)
52 		return NULL;
53 
54 	mem = job = kzalloc(total, GFP_KERNEL);
55 	if (!job)
56 		return NULL;
57 
58 	kref_init(&job->ref);
59 	job->channel = ch;
60 
61 	/* Redistribute memory to the structs  */
62 	mem += sizeof(struct host1x_job);
63 	job->relocs = num_relocs ? mem : NULL;
64 	mem += num_relocs * sizeof(struct host1x_reloc);
65 	job->unpins = num_unpins ? mem : NULL;
66 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
67 	job->gathers = num_cmdbufs ? mem : NULL;
68 	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
69 	job->addr_phys = num_unpins ? mem : NULL;
70 
71 	job->reloc_addr_phys = job->addr_phys;
72 	job->gather_addr_phys = &job->addr_phys[num_relocs];
73 
74 	return job;
75 }
76 EXPORT_SYMBOL(host1x_job_alloc);
77 
78 struct host1x_job *host1x_job_get(struct host1x_job *job)
79 {
80 	kref_get(&job->ref);
81 	return job;
82 }
83 EXPORT_SYMBOL(host1x_job_get);
84 
85 static void job_free(struct kref *ref)
86 {
87 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
88 
89 	kfree(job);
90 }
91 
92 void host1x_job_put(struct host1x_job *job)
93 {
94 	kref_put(&job->ref, job_free);
95 }
96 EXPORT_SYMBOL(host1x_job_put);
97 
98 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
99 			   u32 words, u32 offset)
100 {
101 	struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers];
102 
103 	cur_gather->words = words;
104 	cur_gather->bo = bo;
105 	cur_gather->offset = offset;
106 	job->num_gathers++;
107 }
108 EXPORT_SYMBOL(host1x_job_add_gather);
109 
110 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
111 {
112 	unsigned int i;
113 	int err;
114 
115 	job->num_unpins = 0;
116 
117 	for (i = 0; i < job->num_relocs; i++) {
118 		struct host1x_reloc *reloc = &job->relocs[i];
119 		struct sg_table *sgt;
120 		dma_addr_t phys_addr;
121 
122 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
123 		if (!reloc->target.bo) {
124 			err = -EINVAL;
125 			goto unpin;
126 		}
127 
128 		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
129 
130 		job->addr_phys[job->num_unpins] = phys_addr;
131 		job->unpins[job->num_unpins].bo = reloc->target.bo;
132 		job->unpins[job->num_unpins].sgt = sgt;
133 		job->num_unpins++;
134 	}
135 
136 	for (i = 0; i < job->num_gathers; i++) {
137 		struct host1x_job_gather *g = &job->gathers[i];
138 		size_t gather_size = 0;
139 		struct scatterlist *sg;
140 		struct sg_table *sgt;
141 		dma_addr_t phys_addr;
142 		unsigned long shift;
143 		struct iova *alloc;
144 		unsigned int j;
145 
146 		g->bo = host1x_bo_get(g->bo);
147 		if (!g->bo) {
148 			err = -EINVAL;
149 			goto unpin;
150 		}
151 
152 		phys_addr = host1x_bo_pin(g->bo, &sgt);
153 
154 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
155 			for_each_sg(sgt->sgl, sg, sgt->nents, j)
156 				gather_size += sg->length;
157 			gather_size = iova_align(&host->iova, gather_size);
158 
159 			shift = iova_shift(&host->iova);
160 			alloc = alloc_iova(&host->iova, gather_size >> shift,
161 					   host->iova_end >> shift, true);
162 			if (!alloc) {
163 				err = -ENOMEM;
164 				goto unpin;
165 			}
166 
167 			err = iommu_map_sg(host->domain,
168 					iova_dma_addr(&host->iova, alloc),
169 					sgt->sgl, sgt->nents, IOMMU_READ);
170 			if (err == 0) {
171 				__free_iova(&host->iova, alloc);
172 				err = -EINVAL;
173 				goto unpin;
174 			}
175 
176 			job->addr_phys[job->num_unpins] =
177 				iova_dma_addr(&host->iova, alloc);
178 			job->unpins[job->num_unpins].size = gather_size;
179 		} else {
180 			job->addr_phys[job->num_unpins] = phys_addr;
181 		}
182 
183 		job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
184 
185 		job->unpins[job->num_unpins].bo = g->bo;
186 		job->unpins[job->num_unpins].sgt = sgt;
187 		job->num_unpins++;
188 	}
189 
190 	return 0;
191 
192 unpin:
193 	host1x_job_unpin(job);
194 	return err;
195 }
196 
197 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
198 {
199 	u32 last_page = ~0;
200 	void *cmdbuf_page_addr = NULL;
201 	struct host1x_bo *cmdbuf = g->bo;
202 	unsigned int i;
203 
204 	/* pin & patch the relocs for one gather */
205 	for (i = 0; i < job->num_relocs; i++) {
206 		struct host1x_reloc *reloc = &job->relocs[i];
207 		u32 reloc_addr = (job->reloc_addr_phys[i] +
208 				  reloc->target.offset) >> reloc->shift;
209 		u32 *target;
210 
211 		/* skip all other gathers */
212 		if (cmdbuf != reloc->cmdbuf.bo)
213 			continue;
214 
215 		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
216 			target = (u32 *)job->gather_copy_mapped +
217 					reloc->cmdbuf.offset / sizeof(u32) +
218 						g->offset / sizeof(u32);
219 			goto patch_reloc;
220 		}
221 
222 		if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
223 			if (cmdbuf_page_addr)
224 				host1x_bo_kunmap(cmdbuf, last_page,
225 						 cmdbuf_page_addr);
226 
227 			cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
228 					reloc->cmdbuf.offset >> PAGE_SHIFT);
229 			last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
230 
231 			if (unlikely(!cmdbuf_page_addr)) {
232 				pr_err("Could not map cmdbuf for relocation\n");
233 				return -ENOMEM;
234 			}
235 		}
236 
237 		target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
238 patch_reloc:
239 		*target = reloc_addr;
240 	}
241 
242 	if (cmdbuf_page_addr)
243 		host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
244 
245 	return 0;
246 }
247 
248 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
249 			unsigned int offset)
250 {
251 	offset *= sizeof(u32);
252 
253 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
254 		return false;
255 
256 	/* relocation shift value validation isn't implemented yet */
257 	if (reloc->shift)
258 		return false;
259 
260 	return true;
261 }
262 
263 struct host1x_firewall {
264 	struct host1x_job *job;
265 	struct device *dev;
266 
267 	unsigned int num_relocs;
268 	struct host1x_reloc *reloc;
269 
270 	struct host1x_bo *cmdbuf;
271 	unsigned int offset;
272 
273 	u32 words;
274 	u32 class;
275 	u32 reg;
276 	u32 mask;
277 	u32 count;
278 };
279 
280 static int check_register(struct host1x_firewall *fw, unsigned long offset)
281 {
282 	if (!fw->job->is_addr_reg)
283 		return 0;
284 
285 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
286 		if (!fw->num_relocs)
287 			return -EINVAL;
288 
289 		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
290 			return -EINVAL;
291 
292 		fw->num_relocs--;
293 		fw->reloc++;
294 	}
295 
296 	return 0;
297 }
298 
299 static int check_class(struct host1x_firewall *fw, u32 class)
300 {
301 	if (!fw->job->is_valid_class) {
302 		if (fw->class != class)
303 			return -EINVAL;
304 	} else {
305 		if (!fw->job->is_valid_class(fw->class))
306 			return -EINVAL;
307 	}
308 
309 	return 0;
310 }
311 
312 static int check_mask(struct host1x_firewall *fw)
313 {
314 	u32 mask = fw->mask;
315 	u32 reg = fw->reg;
316 	int ret;
317 
318 	while (mask) {
319 		if (fw->words == 0)
320 			return -EINVAL;
321 
322 		if (mask & 1) {
323 			ret = check_register(fw, reg);
324 			if (ret < 0)
325 				return ret;
326 
327 			fw->words--;
328 			fw->offset++;
329 		}
330 		mask >>= 1;
331 		reg++;
332 	}
333 
334 	return 0;
335 }
336 
337 static int check_incr(struct host1x_firewall *fw)
338 {
339 	u32 count = fw->count;
340 	u32 reg = fw->reg;
341 	int ret;
342 
343 	while (count) {
344 		if (fw->words == 0)
345 			return -EINVAL;
346 
347 		ret = check_register(fw, reg);
348 		if (ret < 0)
349 			return ret;
350 
351 		reg++;
352 		fw->words--;
353 		fw->offset++;
354 		count--;
355 	}
356 
357 	return 0;
358 }
359 
360 static int check_nonincr(struct host1x_firewall *fw)
361 {
362 	u32 count = fw->count;
363 	int ret;
364 
365 	while (count) {
366 		if (fw->words == 0)
367 			return -EINVAL;
368 
369 		ret = check_register(fw, fw->reg);
370 		if (ret < 0)
371 			return ret;
372 
373 		fw->words--;
374 		fw->offset++;
375 		count--;
376 	}
377 
378 	return 0;
379 }
380 
381 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
382 {
383 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
384 		(g->offset / sizeof(u32));
385 	u32 job_class = fw->class;
386 	int err = 0;
387 
388 	fw->words = g->words;
389 	fw->cmdbuf = g->bo;
390 	fw->offset = 0;
391 
392 	while (fw->words && !err) {
393 		u32 word = cmdbuf_base[fw->offset];
394 		u32 opcode = (word & 0xf0000000) >> 28;
395 
396 		fw->mask = 0;
397 		fw->reg = 0;
398 		fw->count = 0;
399 		fw->words--;
400 		fw->offset++;
401 
402 		switch (opcode) {
403 		case 0:
404 			fw->class = word >> 6 & 0x3ff;
405 			fw->mask = word & 0x3f;
406 			fw->reg = word >> 16 & 0xfff;
407 			err = check_class(fw, job_class);
408 			if (!err)
409 				err = check_mask(fw);
410 			if (err)
411 				goto out;
412 			break;
413 		case 1:
414 			fw->reg = word >> 16 & 0xfff;
415 			fw->count = word & 0xffff;
416 			err = check_incr(fw);
417 			if (err)
418 				goto out;
419 			break;
420 
421 		case 2:
422 			fw->reg = word >> 16 & 0xfff;
423 			fw->count = word & 0xffff;
424 			err = check_nonincr(fw);
425 			if (err)
426 				goto out;
427 			break;
428 
429 		case 3:
430 			fw->mask = word & 0xffff;
431 			fw->reg = word >> 16 & 0xfff;
432 			err = check_mask(fw);
433 			if (err)
434 				goto out;
435 			break;
436 		case 4:
437 		case 14:
438 			break;
439 		default:
440 			err = -EINVAL;
441 			break;
442 		}
443 	}
444 
445 out:
446 	return err;
447 }
448 
449 static inline int copy_gathers(struct host1x_job *job, struct device *dev)
450 {
451 	struct host1x_firewall fw;
452 	size_t size = 0;
453 	size_t offset = 0;
454 	unsigned int i;
455 
456 	fw.job = job;
457 	fw.dev = dev;
458 	fw.reloc = job->relocs;
459 	fw.num_relocs = job->num_relocs;
460 	fw.class = job->class;
461 
462 	for (i = 0; i < job->num_gathers; i++) {
463 		struct host1x_job_gather *g = &job->gathers[i];
464 
465 		size += g->words * sizeof(u32);
466 	}
467 
468 	/*
469 	 * Try a non-blocking allocation from a higher priority pools first,
470 	 * as awaiting for the allocation here is a major performance hit.
471 	 */
472 	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
473 					       GFP_NOWAIT);
474 
475 	/* the higher priority allocation failed, try the generic-blocking */
476 	if (!job->gather_copy_mapped)
477 		job->gather_copy_mapped = dma_alloc_wc(dev, size,
478 						       &job->gather_copy,
479 						       GFP_KERNEL);
480 	if (!job->gather_copy_mapped)
481 		return -ENOMEM;
482 
483 	job->gather_copy_size = size;
484 
485 	for (i = 0; i < job->num_gathers; i++) {
486 		struct host1x_job_gather *g = &job->gathers[i];
487 		void *gather;
488 
489 		/* Copy the gather */
490 		gather = host1x_bo_mmap(g->bo);
491 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
492 		       g->words * sizeof(u32));
493 		host1x_bo_munmap(g->bo, gather);
494 
495 		/* Store the location in the buffer */
496 		g->base = job->gather_copy;
497 		g->offset = offset;
498 
499 		/* Validate the job */
500 		if (validate(&fw, g))
501 			return -EINVAL;
502 
503 		offset += g->words * sizeof(u32);
504 	}
505 
506 	/* No relocs should remain at this point */
507 	if (fw.num_relocs)
508 		return -EINVAL;
509 
510 	return 0;
511 }
512 
513 int host1x_job_pin(struct host1x_job *job, struct device *dev)
514 {
515 	int err;
516 	unsigned int i, j;
517 	struct host1x *host = dev_get_drvdata(dev->parent);
518 
519 	/* pin memory */
520 	err = pin_job(host, job);
521 	if (err)
522 		goto out;
523 
524 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
525 		err = copy_gathers(job, dev);
526 		if (err)
527 			goto out;
528 	}
529 
530 	/* patch gathers */
531 	for (i = 0; i < job->num_gathers; i++) {
532 		struct host1x_job_gather *g = &job->gathers[i];
533 
534 		/* process each gather mem only once */
535 		if (g->handled)
536 			continue;
537 
538 		/* copy_gathers() sets gathers base if firewall is enabled */
539 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
540 			g->base = job->gather_addr_phys[i];
541 
542 		for (j = i + 1; j < job->num_gathers; j++) {
543 			if (job->gathers[j].bo == g->bo) {
544 				job->gathers[j].handled = true;
545 				job->gathers[j].base = g->base;
546 			}
547 		}
548 
549 		err = do_relocs(job, g);
550 		if (err)
551 			break;
552 	}
553 
554 out:
555 	if (err)
556 		host1x_job_unpin(job);
557 	wmb();
558 
559 	return err;
560 }
561 EXPORT_SYMBOL(host1x_job_pin);
562 
563 void host1x_job_unpin(struct host1x_job *job)
564 {
565 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
566 	unsigned int i;
567 
568 	for (i = 0; i < job->num_unpins; i++) {
569 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
570 
571 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
572 			iommu_unmap(host->domain, job->addr_phys[i],
573 				    unpin->size);
574 			free_iova(&host->iova,
575 				iova_pfn(&host->iova, job->addr_phys[i]));
576 		}
577 
578 		host1x_bo_unpin(unpin->bo, unpin->sgt);
579 		host1x_bo_put(unpin->bo);
580 	}
581 
582 	job->num_unpins = 0;
583 
584 	if (job->gather_copy_size)
585 		dma_free_wc(job->channel->dev, job->gather_copy_size,
586 			    job->gather_copy_mapped, job->gather_copy);
587 }
588 EXPORT_SYMBOL(host1x_job_unpin);
589 
590 /*
591  * Debug routine used to dump job entries
592  */
593 void host1x_job_dump(struct device *dev, struct host1x_job *job)
594 {
595 	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt_id);
596 	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
597 	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
598 	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
599 	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
600 	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
601 }
602