xref: /openbmc/linux/drivers/gpu/host1x/job.c (revision a0ae2562c6c4b2721d9fddba63b7286c13517d9f)
1 /*
2  * Tegra host1x Job
3  *
4  * Copyright (c) 2010-2015, NVIDIA Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <linux/dma-mapping.h>
20 #include <linux/err.h>
21 #include <linux/host1x.h>
22 #include <linux/kref.h>
23 #include <linux/module.h>
24 #include <linux/scatterlist.h>
25 #include <linux/slab.h>
26 #include <linux/vmalloc.h>
27 #include <trace/events/host1x.h>
28 
29 #include "channel.h"
30 #include "dev.h"
31 #include "job.h"
32 #include "syncpt.h"
33 
34 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
35 
36 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
37 				    u32 num_cmdbufs, u32 num_relocs)
38 {
39 	struct host1x_job *job = NULL;
40 	unsigned int num_unpins = num_cmdbufs + num_relocs;
41 	u64 total;
42 	void *mem;
43 
44 	/* Check that we're not going to overflow */
45 	total = sizeof(struct host1x_job) +
46 		(u64)num_relocs * sizeof(struct host1x_reloc) +
47 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
48 		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
49 		(u64)num_unpins * sizeof(dma_addr_t) +
50 		(u64)num_unpins * sizeof(u32 *);
51 	if (total > ULONG_MAX)
52 		return NULL;
53 
54 	mem = job = kzalloc(total, GFP_KERNEL);
55 	if (!job)
56 		return NULL;
57 
58 	kref_init(&job->ref);
59 	job->channel = ch;
60 
61 	/* Redistribute memory to the structs  */
62 	mem += sizeof(struct host1x_job);
63 	job->relocs = num_relocs ? mem : NULL;
64 	mem += num_relocs * sizeof(struct host1x_reloc);
65 	job->unpins = num_unpins ? mem : NULL;
66 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
67 	job->gathers = num_cmdbufs ? mem : NULL;
68 	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
69 	job->addr_phys = num_unpins ? mem : NULL;
70 
71 	job->reloc_addr_phys = job->addr_phys;
72 	job->gather_addr_phys = &job->addr_phys[num_relocs];
73 
74 	return job;
75 }
76 EXPORT_SYMBOL(host1x_job_alloc);
77 
78 struct host1x_job *host1x_job_get(struct host1x_job *job)
79 {
80 	kref_get(&job->ref);
81 	return job;
82 }
83 EXPORT_SYMBOL(host1x_job_get);
84 
85 static void job_free(struct kref *ref)
86 {
87 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
88 
89 	kfree(job);
90 }
91 
92 void host1x_job_put(struct host1x_job *job)
93 {
94 	kref_put(&job->ref, job_free);
95 }
96 EXPORT_SYMBOL(host1x_job_put);
97 
98 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
99 			   unsigned int words, unsigned int offset)
100 {
101 	struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
102 
103 	gather->words = words;
104 	gather->bo = bo;
105 	gather->offset = offset;
106 
107 	job->num_gathers++;
108 }
109 EXPORT_SYMBOL(host1x_job_add_gather);
110 
111 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
112 {
113 	unsigned int i;
114 	int err;
115 
116 	job->num_unpins = 0;
117 
118 	for (i = 0; i < job->num_relocs; i++) {
119 		struct host1x_reloc *reloc = &job->relocs[i];
120 		struct sg_table *sgt;
121 		dma_addr_t phys_addr;
122 
123 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
124 		if (!reloc->target.bo) {
125 			err = -EINVAL;
126 			goto unpin;
127 		}
128 
129 		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
130 
131 		job->addr_phys[job->num_unpins] = phys_addr;
132 		job->unpins[job->num_unpins].bo = reloc->target.bo;
133 		job->unpins[job->num_unpins].sgt = sgt;
134 		job->num_unpins++;
135 	}
136 
137 	for (i = 0; i < job->num_gathers; i++) {
138 		struct host1x_job_gather *g = &job->gathers[i];
139 		size_t gather_size = 0;
140 		struct scatterlist *sg;
141 		struct sg_table *sgt;
142 		dma_addr_t phys_addr;
143 		unsigned long shift;
144 		struct iova *alloc;
145 		unsigned int j;
146 
147 		g->bo = host1x_bo_get(g->bo);
148 		if (!g->bo) {
149 			err = -EINVAL;
150 			goto unpin;
151 		}
152 
153 		phys_addr = host1x_bo_pin(g->bo, &sgt);
154 
155 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
156 			for_each_sg(sgt->sgl, sg, sgt->nents, j)
157 				gather_size += sg->length;
158 			gather_size = iova_align(&host->iova, gather_size);
159 
160 			shift = iova_shift(&host->iova);
161 			alloc = alloc_iova(&host->iova, gather_size >> shift,
162 					   host->iova_end >> shift, true);
163 			if (!alloc) {
164 				err = -ENOMEM;
165 				goto unpin;
166 			}
167 
168 			err = iommu_map_sg(host->domain,
169 					iova_dma_addr(&host->iova, alloc),
170 					sgt->sgl, sgt->nents, IOMMU_READ);
171 			if (err == 0) {
172 				__free_iova(&host->iova, alloc);
173 				err = -EINVAL;
174 				goto unpin;
175 			}
176 
177 			job->addr_phys[job->num_unpins] =
178 				iova_dma_addr(&host->iova, alloc);
179 			job->unpins[job->num_unpins].size = gather_size;
180 		} else {
181 			job->addr_phys[job->num_unpins] = phys_addr;
182 		}
183 
184 		job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
185 
186 		job->unpins[job->num_unpins].bo = g->bo;
187 		job->unpins[job->num_unpins].sgt = sgt;
188 		job->num_unpins++;
189 	}
190 
191 	return 0;
192 
193 unpin:
194 	host1x_job_unpin(job);
195 	return err;
196 }
197 
198 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
199 {
200 	u32 last_page = ~0;
201 	void *cmdbuf_page_addr = NULL;
202 	struct host1x_bo *cmdbuf = g->bo;
203 	unsigned int i;
204 
205 	/* pin & patch the relocs for one gather */
206 	for (i = 0; i < job->num_relocs; i++) {
207 		struct host1x_reloc *reloc = &job->relocs[i];
208 		u32 reloc_addr = (job->reloc_addr_phys[i] +
209 				  reloc->target.offset) >> reloc->shift;
210 		u32 *target;
211 
212 		/* skip all other gathers */
213 		if (cmdbuf != reloc->cmdbuf.bo)
214 			continue;
215 
216 		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
217 			target = (u32 *)job->gather_copy_mapped +
218 					reloc->cmdbuf.offset / sizeof(u32) +
219 						g->offset / sizeof(u32);
220 			goto patch_reloc;
221 		}
222 
223 		if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
224 			if (cmdbuf_page_addr)
225 				host1x_bo_kunmap(cmdbuf, last_page,
226 						 cmdbuf_page_addr);
227 
228 			cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
229 					reloc->cmdbuf.offset >> PAGE_SHIFT);
230 			last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
231 
232 			if (unlikely(!cmdbuf_page_addr)) {
233 				pr_err("Could not map cmdbuf for relocation\n");
234 				return -ENOMEM;
235 			}
236 		}
237 
238 		target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
239 patch_reloc:
240 		*target = reloc_addr;
241 	}
242 
243 	if (cmdbuf_page_addr)
244 		host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
245 
246 	return 0;
247 }
248 
249 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
250 			unsigned int offset)
251 {
252 	offset *= sizeof(u32);
253 
254 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
255 		return false;
256 
257 	/* relocation shift value validation isn't implemented yet */
258 	if (reloc->shift)
259 		return false;
260 
261 	return true;
262 }
263 
264 struct host1x_firewall {
265 	struct host1x_job *job;
266 	struct device *dev;
267 
268 	unsigned int num_relocs;
269 	struct host1x_reloc *reloc;
270 
271 	struct host1x_bo *cmdbuf;
272 	unsigned int offset;
273 
274 	u32 words;
275 	u32 class;
276 	u32 reg;
277 	u32 mask;
278 	u32 count;
279 };
280 
281 static int check_register(struct host1x_firewall *fw, unsigned long offset)
282 {
283 	if (!fw->job->is_addr_reg)
284 		return 0;
285 
286 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
287 		if (!fw->num_relocs)
288 			return -EINVAL;
289 
290 		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
291 			return -EINVAL;
292 
293 		fw->num_relocs--;
294 		fw->reloc++;
295 	}
296 
297 	return 0;
298 }
299 
300 static int check_class(struct host1x_firewall *fw, u32 class)
301 {
302 	if (!fw->job->is_valid_class) {
303 		if (fw->class != class)
304 			return -EINVAL;
305 	} else {
306 		if (!fw->job->is_valid_class(fw->class))
307 			return -EINVAL;
308 	}
309 
310 	return 0;
311 }
312 
313 static int check_mask(struct host1x_firewall *fw)
314 {
315 	u32 mask = fw->mask;
316 	u32 reg = fw->reg;
317 	int ret;
318 
319 	while (mask) {
320 		if (fw->words == 0)
321 			return -EINVAL;
322 
323 		if (mask & 1) {
324 			ret = check_register(fw, reg);
325 			if (ret < 0)
326 				return ret;
327 
328 			fw->words--;
329 			fw->offset++;
330 		}
331 		mask >>= 1;
332 		reg++;
333 	}
334 
335 	return 0;
336 }
337 
338 static int check_incr(struct host1x_firewall *fw)
339 {
340 	u32 count = fw->count;
341 	u32 reg = fw->reg;
342 	int ret;
343 
344 	while (count) {
345 		if (fw->words == 0)
346 			return -EINVAL;
347 
348 		ret = check_register(fw, reg);
349 		if (ret < 0)
350 			return ret;
351 
352 		reg++;
353 		fw->words--;
354 		fw->offset++;
355 		count--;
356 	}
357 
358 	return 0;
359 }
360 
361 static int check_nonincr(struct host1x_firewall *fw)
362 {
363 	u32 count = fw->count;
364 	int ret;
365 
366 	while (count) {
367 		if (fw->words == 0)
368 			return -EINVAL;
369 
370 		ret = check_register(fw, fw->reg);
371 		if (ret < 0)
372 			return ret;
373 
374 		fw->words--;
375 		fw->offset++;
376 		count--;
377 	}
378 
379 	return 0;
380 }
381 
382 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
383 {
384 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
385 		(g->offset / sizeof(u32));
386 	u32 job_class = fw->class;
387 	int err = 0;
388 
389 	fw->words = g->words;
390 	fw->cmdbuf = g->bo;
391 	fw->offset = 0;
392 
393 	while (fw->words && !err) {
394 		u32 word = cmdbuf_base[fw->offset];
395 		u32 opcode = (word & 0xf0000000) >> 28;
396 
397 		fw->mask = 0;
398 		fw->reg = 0;
399 		fw->count = 0;
400 		fw->words--;
401 		fw->offset++;
402 
403 		switch (opcode) {
404 		case 0:
405 			fw->class = word >> 6 & 0x3ff;
406 			fw->mask = word & 0x3f;
407 			fw->reg = word >> 16 & 0xfff;
408 			err = check_class(fw, job_class);
409 			if (!err)
410 				err = check_mask(fw);
411 			if (err)
412 				goto out;
413 			break;
414 		case 1:
415 			fw->reg = word >> 16 & 0xfff;
416 			fw->count = word & 0xffff;
417 			err = check_incr(fw);
418 			if (err)
419 				goto out;
420 			break;
421 
422 		case 2:
423 			fw->reg = word >> 16 & 0xfff;
424 			fw->count = word & 0xffff;
425 			err = check_nonincr(fw);
426 			if (err)
427 				goto out;
428 			break;
429 
430 		case 3:
431 			fw->mask = word & 0xffff;
432 			fw->reg = word >> 16 & 0xfff;
433 			err = check_mask(fw);
434 			if (err)
435 				goto out;
436 			break;
437 		case 4:
438 		case 14:
439 			break;
440 		default:
441 			err = -EINVAL;
442 			break;
443 		}
444 	}
445 
446 out:
447 	return err;
448 }
449 
450 static inline int copy_gathers(struct host1x_job *job, struct device *dev)
451 {
452 	struct host1x_firewall fw;
453 	size_t size = 0;
454 	size_t offset = 0;
455 	unsigned int i;
456 
457 	fw.job = job;
458 	fw.dev = dev;
459 	fw.reloc = job->relocs;
460 	fw.num_relocs = job->num_relocs;
461 	fw.class = job->class;
462 
463 	for (i = 0; i < job->num_gathers; i++) {
464 		struct host1x_job_gather *g = &job->gathers[i];
465 
466 		size += g->words * sizeof(u32);
467 	}
468 
469 	/*
470 	 * Try a non-blocking allocation from a higher priority pools first,
471 	 * as awaiting for the allocation here is a major performance hit.
472 	 */
473 	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
474 					       GFP_NOWAIT);
475 
476 	/* the higher priority allocation failed, try the generic-blocking */
477 	if (!job->gather_copy_mapped)
478 		job->gather_copy_mapped = dma_alloc_wc(dev, size,
479 						       &job->gather_copy,
480 						       GFP_KERNEL);
481 	if (!job->gather_copy_mapped)
482 		return -ENOMEM;
483 
484 	job->gather_copy_size = size;
485 
486 	for (i = 0; i < job->num_gathers; i++) {
487 		struct host1x_job_gather *g = &job->gathers[i];
488 		void *gather;
489 
490 		/* Copy the gather */
491 		gather = host1x_bo_mmap(g->bo);
492 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
493 		       g->words * sizeof(u32));
494 		host1x_bo_munmap(g->bo, gather);
495 
496 		/* Store the location in the buffer */
497 		g->base = job->gather_copy;
498 		g->offset = offset;
499 
500 		/* Validate the job */
501 		if (validate(&fw, g))
502 			return -EINVAL;
503 
504 		offset += g->words * sizeof(u32);
505 	}
506 
507 	/* No relocs should remain at this point */
508 	if (fw.num_relocs)
509 		return -EINVAL;
510 
511 	return 0;
512 }
513 
514 int host1x_job_pin(struct host1x_job *job, struct device *dev)
515 {
516 	int err;
517 	unsigned int i, j;
518 	struct host1x *host = dev_get_drvdata(dev->parent);
519 
520 	/* pin memory */
521 	err = pin_job(host, job);
522 	if (err)
523 		goto out;
524 
525 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
526 		err = copy_gathers(job, dev);
527 		if (err)
528 			goto out;
529 	}
530 
531 	/* patch gathers */
532 	for (i = 0; i < job->num_gathers; i++) {
533 		struct host1x_job_gather *g = &job->gathers[i];
534 
535 		/* process each gather mem only once */
536 		if (g->handled)
537 			continue;
538 
539 		/* copy_gathers() sets gathers base if firewall is enabled */
540 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
541 			g->base = job->gather_addr_phys[i];
542 
543 		for (j = i + 1; j < job->num_gathers; j++) {
544 			if (job->gathers[j].bo == g->bo) {
545 				job->gathers[j].handled = true;
546 				job->gathers[j].base = g->base;
547 			}
548 		}
549 
550 		err = do_relocs(job, g);
551 		if (err)
552 			break;
553 	}
554 
555 out:
556 	if (err)
557 		host1x_job_unpin(job);
558 	wmb();
559 
560 	return err;
561 }
562 EXPORT_SYMBOL(host1x_job_pin);
563 
564 void host1x_job_unpin(struct host1x_job *job)
565 {
566 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
567 	unsigned int i;
568 
569 	for (i = 0; i < job->num_unpins; i++) {
570 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
571 
572 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
573 			iommu_unmap(host->domain, job->addr_phys[i],
574 				    unpin->size);
575 			free_iova(&host->iova,
576 				iova_pfn(&host->iova, job->addr_phys[i]));
577 		}
578 
579 		host1x_bo_unpin(unpin->bo, unpin->sgt);
580 		host1x_bo_put(unpin->bo);
581 	}
582 
583 	job->num_unpins = 0;
584 
585 	if (job->gather_copy_size)
586 		dma_free_wc(job->channel->dev, job->gather_copy_size,
587 			    job->gather_copy_mapped, job->gather_copy);
588 }
589 EXPORT_SYMBOL(host1x_job_unpin);
590 
591 /*
592  * Debug routine used to dump job entries
593  */
594 void host1x_job_dump(struct device *dev, struct host1x_job *job)
595 {
596 	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt_id);
597 	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
598 	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
599 	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
600 	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
601 	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
602 }
603