xref: /openbmc/linux/drivers/gpu/host1x/job.c (revision c4849f88)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Tegra host1x Job
4  *
5  * Copyright (c) 2010-2015, NVIDIA Corporation.
6  */
7 
8 #include <linux/dma-mapping.h>
9 #include <linux/err.h>
10 #include <linux/host1x.h>
11 #include <linux/iommu.h>
12 #include <linux/kref.h>
13 #include <linux/module.h>
14 #include <linux/scatterlist.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <trace/events/host1x.h>
18 
19 #include "channel.h"
20 #include "dev.h"
21 #include "job.h"
22 #include "syncpt.h"
23 
24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
25 
26 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
27 				    u32 num_cmdbufs, u32 num_relocs,
28 				    bool skip_firewall)
29 {
30 	struct host1x_job *job = NULL;
31 	unsigned int num_unpins = num_relocs;
32 	bool enable_firewall;
33 	u64 total;
34 	void *mem;
35 
36 	enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
37 
38 	if (!enable_firewall)
39 		num_unpins += num_cmdbufs;
40 
41 	/* Check that we're not going to overflow */
42 	total = sizeof(struct host1x_job) +
43 		(u64)num_relocs * sizeof(struct host1x_reloc) +
44 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
45 		(u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
46 		(u64)num_unpins * sizeof(dma_addr_t) +
47 		(u64)num_unpins * sizeof(u32 *);
48 	if (total > ULONG_MAX)
49 		return NULL;
50 
51 	mem = job = kzalloc(total, GFP_KERNEL);
52 	if (!job)
53 		return NULL;
54 
55 	job->enable_firewall = enable_firewall;
56 
57 	kref_init(&job->ref);
58 	job->channel = ch;
59 
60 	/* Redistribute memory to the structs  */
61 	mem += sizeof(struct host1x_job);
62 	job->relocs = num_relocs ? mem : NULL;
63 	mem += num_relocs * sizeof(struct host1x_reloc);
64 	job->unpins = num_unpins ? mem : NULL;
65 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
66 	job->cmds = num_cmdbufs ? mem : NULL;
67 	mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
68 	job->addr_phys = num_unpins ? mem : NULL;
69 
70 	job->reloc_addr_phys = job->addr_phys;
71 	job->gather_addr_phys = &job->addr_phys[num_relocs];
72 
73 	return job;
74 }
75 EXPORT_SYMBOL(host1x_job_alloc);
76 
77 struct host1x_job *host1x_job_get(struct host1x_job *job)
78 {
79 	kref_get(&job->ref);
80 	return job;
81 }
82 EXPORT_SYMBOL(host1x_job_get);
83 
84 static void job_free(struct kref *ref)
85 {
86 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
87 
88 	if (job->release)
89 		job->release(job);
90 
91 	if (job->waiter)
92 		host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
93 				    job->waiter, false);
94 
95 	if (job->syncpt)
96 		host1x_syncpt_put(job->syncpt);
97 
98 	kfree(job);
99 }
100 
101 void host1x_job_put(struct host1x_job *job)
102 {
103 	kref_put(&job->ref, job_free);
104 }
105 EXPORT_SYMBOL(host1x_job_put);
106 
107 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
108 			   unsigned int words, unsigned int offset)
109 {
110 	struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
111 
112 	gather->words = words;
113 	gather->bo = bo;
114 	gather->offset = offset;
115 
116 	job->num_cmds++;
117 }
118 EXPORT_SYMBOL(host1x_job_add_gather);
119 
120 void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
121 			 bool relative, u32 next_class)
122 {
123 	struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
124 
125 	cmd->is_wait = true;
126 	cmd->wait.id = id;
127 	cmd->wait.threshold = thresh;
128 	cmd->wait.next_class = next_class;
129 	cmd->wait.relative = relative;
130 
131 	job->num_cmds++;
132 }
133 EXPORT_SYMBOL(host1x_job_add_wait);
134 
135 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
136 {
137 	unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
138 	struct host1x_client *client = job->client;
139 	struct device *dev = client->dev;
140 	struct host1x_job_gather *g;
141 	unsigned int i;
142 	int err;
143 
144 	job->num_unpins = 0;
145 
146 	for (i = 0; i < job->num_relocs; i++) {
147 		struct host1x_reloc *reloc = &job->relocs[i];
148 		enum dma_data_direction direction;
149 		struct host1x_bo_mapping *map;
150 		struct host1x_bo *bo;
151 
152 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
153 		if (!reloc->target.bo) {
154 			err = -EINVAL;
155 			goto unpin;
156 		}
157 
158 		bo = reloc->target.bo;
159 
160 		switch (reloc->flags & mask) {
161 		case HOST1X_RELOC_READ:
162 			direction = DMA_TO_DEVICE;
163 			break;
164 
165 		case HOST1X_RELOC_WRITE:
166 			direction = DMA_FROM_DEVICE;
167 			break;
168 
169 		case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
170 			direction = DMA_BIDIRECTIONAL;
171 			break;
172 
173 		default:
174 			err = -EINVAL;
175 			goto unpin;
176 		}
177 
178 		map = host1x_bo_pin(dev, bo, direction, &client->cache);
179 		if (IS_ERR(map)) {
180 			err = PTR_ERR(map);
181 			goto unpin;
182 		}
183 
184 		/*
185 		 * host1x clients are generally not able to do scatter-gather themselves, so fail
186 		 * if the buffer is discontiguous and we fail to map its SG table to a single
187 		 * contiguous chunk of I/O virtual memory.
188 		 */
189 		if (map->chunks > 1) {
190 			err = -EINVAL;
191 			goto unpin;
192 		}
193 
194 		job->addr_phys[job->num_unpins] = map->phys;
195 		job->unpins[job->num_unpins].map = map;
196 		job->num_unpins++;
197 	}
198 
199 	/*
200 	 * We will copy gathers BO content later, so there is no need to
201 	 * hold and pin them.
202 	 */
203 	if (job->enable_firewall)
204 		return 0;
205 
206 	for (i = 0; i < job->num_cmds; i++) {
207 		struct host1x_bo_mapping *map;
208 		size_t gather_size = 0;
209 		struct scatterlist *sg;
210 		unsigned long shift;
211 		struct iova *alloc;
212 		unsigned int j;
213 
214 		if (job->cmds[i].is_wait)
215 			continue;
216 
217 		g = &job->cmds[i].gather;
218 
219 		g->bo = host1x_bo_get(g->bo);
220 		if (!g->bo) {
221 			err = -EINVAL;
222 			goto unpin;
223 		}
224 
225 		map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache);
226 		if (IS_ERR(map)) {
227 			err = PTR_ERR(map);
228 			goto unpin;
229 		}
230 
231 		if (host->domain) {
232 			for_each_sgtable_sg(map->sgt, sg, j)
233 				gather_size += sg->length;
234 
235 			gather_size = iova_align(&host->iova, gather_size);
236 
237 			shift = iova_shift(&host->iova);
238 			alloc = alloc_iova(&host->iova, gather_size >> shift,
239 					   host->iova_end >> shift, true);
240 			if (!alloc) {
241 				err = -ENOMEM;
242 				goto put;
243 			}
244 
245 			err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
246 						map->sgt, IOMMU_READ);
247 			if (err == 0) {
248 				__free_iova(&host->iova, alloc);
249 				err = -EINVAL;
250 				goto put;
251 			}
252 
253 			map->phys = iova_dma_addr(&host->iova, alloc);
254 			map->size = gather_size;
255 		}
256 
257 		job->addr_phys[job->num_unpins] = map->phys;
258 		job->unpins[job->num_unpins].map = map;
259 		job->num_unpins++;
260 
261 		job->gather_addr_phys[i] = map->phys;
262 	}
263 
264 	return 0;
265 
266 put:
267 	host1x_bo_put(g->bo);
268 unpin:
269 	host1x_job_unpin(job);
270 	return err;
271 }
272 
273 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
274 {
275 	void *cmdbuf_addr = NULL;
276 	struct host1x_bo *cmdbuf = g->bo;
277 	unsigned int i;
278 
279 	/* pin & patch the relocs for one gather */
280 	for (i = 0; i < job->num_relocs; i++) {
281 		struct host1x_reloc *reloc = &job->relocs[i];
282 		u32 reloc_addr = (job->reloc_addr_phys[i] +
283 				  reloc->target.offset) >> reloc->shift;
284 		u32 *target;
285 
286 		/* skip all other gathers */
287 		if (cmdbuf != reloc->cmdbuf.bo)
288 			continue;
289 
290 		if (job->enable_firewall) {
291 			target = (u32 *)job->gather_copy_mapped +
292 					reloc->cmdbuf.offset / sizeof(u32) +
293 						g->offset / sizeof(u32);
294 			goto patch_reloc;
295 		}
296 
297 		if (!cmdbuf_addr) {
298 			cmdbuf_addr = host1x_bo_mmap(cmdbuf);
299 
300 			if (unlikely(!cmdbuf_addr)) {
301 				pr_err("Could not map cmdbuf for relocation\n");
302 				return -ENOMEM;
303 			}
304 		}
305 
306 		target = cmdbuf_addr + reloc->cmdbuf.offset;
307 patch_reloc:
308 		*target = reloc_addr;
309 	}
310 
311 	if (cmdbuf_addr)
312 		host1x_bo_munmap(cmdbuf, cmdbuf_addr);
313 
314 	return 0;
315 }
316 
317 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
318 			unsigned int offset)
319 {
320 	offset *= sizeof(u32);
321 
322 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
323 		return false;
324 
325 	/* relocation shift value validation isn't implemented yet */
326 	if (reloc->shift)
327 		return false;
328 
329 	return true;
330 }
331 
332 struct host1x_firewall {
333 	struct host1x_job *job;
334 	struct device *dev;
335 
336 	unsigned int num_relocs;
337 	struct host1x_reloc *reloc;
338 
339 	struct host1x_bo *cmdbuf;
340 	unsigned int offset;
341 
342 	u32 words;
343 	u32 class;
344 	u32 reg;
345 	u32 mask;
346 	u32 count;
347 };
348 
349 static int check_register(struct host1x_firewall *fw, unsigned long offset)
350 {
351 	if (!fw->job->is_addr_reg)
352 		return 0;
353 
354 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
355 		if (!fw->num_relocs)
356 			return -EINVAL;
357 
358 		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
359 			return -EINVAL;
360 
361 		fw->num_relocs--;
362 		fw->reloc++;
363 	}
364 
365 	return 0;
366 }
367 
368 static int check_class(struct host1x_firewall *fw, u32 class)
369 {
370 	if (!fw->job->is_valid_class) {
371 		if (fw->class != class)
372 			return -EINVAL;
373 	} else {
374 		if (!fw->job->is_valid_class(fw->class))
375 			return -EINVAL;
376 	}
377 
378 	return 0;
379 }
380 
381 static int check_mask(struct host1x_firewall *fw)
382 {
383 	u32 mask = fw->mask;
384 	u32 reg = fw->reg;
385 	int ret;
386 
387 	while (mask) {
388 		if (fw->words == 0)
389 			return -EINVAL;
390 
391 		if (mask & 1) {
392 			ret = check_register(fw, reg);
393 			if (ret < 0)
394 				return ret;
395 
396 			fw->words--;
397 			fw->offset++;
398 		}
399 		mask >>= 1;
400 		reg++;
401 	}
402 
403 	return 0;
404 }
405 
406 static int check_incr(struct host1x_firewall *fw)
407 {
408 	u32 count = fw->count;
409 	u32 reg = fw->reg;
410 	int ret;
411 
412 	while (count) {
413 		if (fw->words == 0)
414 			return -EINVAL;
415 
416 		ret = check_register(fw, reg);
417 		if (ret < 0)
418 			return ret;
419 
420 		reg++;
421 		fw->words--;
422 		fw->offset++;
423 		count--;
424 	}
425 
426 	return 0;
427 }
428 
429 static int check_nonincr(struct host1x_firewall *fw)
430 {
431 	u32 count = fw->count;
432 	int ret;
433 
434 	while (count) {
435 		if (fw->words == 0)
436 			return -EINVAL;
437 
438 		ret = check_register(fw, fw->reg);
439 		if (ret < 0)
440 			return ret;
441 
442 		fw->words--;
443 		fw->offset++;
444 		count--;
445 	}
446 
447 	return 0;
448 }
449 
450 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
451 {
452 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
453 		(g->offset / sizeof(u32));
454 	u32 job_class = fw->class;
455 	int err = 0;
456 
457 	fw->words = g->words;
458 	fw->cmdbuf = g->bo;
459 	fw->offset = 0;
460 
461 	while (fw->words && !err) {
462 		u32 word = cmdbuf_base[fw->offset];
463 		u32 opcode = (word & 0xf0000000) >> 28;
464 
465 		fw->mask = 0;
466 		fw->reg = 0;
467 		fw->count = 0;
468 		fw->words--;
469 		fw->offset++;
470 
471 		switch (opcode) {
472 		case 0:
473 			fw->class = word >> 6 & 0x3ff;
474 			fw->mask = word & 0x3f;
475 			fw->reg = word >> 16 & 0xfff;
476 			err = check_class(fw, job_class);
477 			if (!err)
478 				err = check_mask(fw);
479 			if (err)
480 				goto out;
481 			break;
482 		case 1:
483 			fw->reg = word >> 16 & 0xfff;
484 			fw->count = word & 0xffff;
485 			err = check_incr(fw);
486 			if (err)
487 				goto out;
488 			break;
489 
490 		case 2:
491 			fw->reg = word >> 16 & 0xfff;
492 			fw->count = word & 0xffff;
493 			err = check_nonincr(fw);
494 			if (err)
495 				goto out;
496 			break;
497 
498 		case 3:
499 			fw->mask = word & 0xffff;
500 			fw->reg = word >> 16 & 0xfff;
501 			err = check_mask(fw);
502 			if (err)
503 				goto out;
504 			break;
505 		case 4:
506 		case 14:
507 			break;
508 		default:
509 			err = -EINVAL;
510 			break;
511 		}
512 	}
513 
514 out:
515 	return err;
516 }
517 
518 static inline int copy_gathers(struct device *host, struct host1x_job *job,
519 			       struct device *dev)
520 {
521 	struct host1x_firewall fw;
522 	size_t size = 0;
523 	size_t offset = 0;
524 	unsigned int i;
525 
526 	fw.job = job;
527 	fw.dev = dev;
528 	fw.reloc = job->relocs;
529 	fw.num_relocs = job->num_relocs;
530 	fw.class = job->class;
531 
532 	for (i = 0; i < job->num_cmds; i++) {
533 		struct host1x_job_gather *g;
534 
535 		if (job->cmds[i].is_wait)
536 			continue;
537 
538 		g = &job->cmds[i].gather;
539 
540 		size += g->words * sizeof(u32);
541 	}
542 
543 	/*
544 	 * Try a non-blocking allocation from a higher priority pools first,
545 	 * as awaiting for the allocation here is a major performance hit.
546 	 */
547 	job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
548 					       GFP_NOWAIT);
549 
550 	/* the higher priority allocation failed, try the generic-blocking */
551 	if (!job->gather_copy_mapped)
552 		job->gather_copy_mapped = dma_alloc_wc(host, size,
553 						       &job->gather_copy,
554 						       GFP_KERNEL);
555 	if (!job->gather_copy_mapped)
556 		return -ENOMEM;
557 
558 	job->gather_copy_size = size;
559 
560 	for (i = 0; i < job->num_cmds; i++) {
561 		struct host1x_job_gather *g;
562 		void *gather;
563 
564 		if (job->cmds[i].is_wait)
565 			continue;
566 		g = &job->cmds[i].gather;
567 
568 		/* Copy the gather */
569 		gather = host1x_bo_mmap(g->bo);
570 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
571 		       g->words * sizeof(u32));
572 		host1x_bo_munmap(g->bo, gather);
573 
574 		/* Store the location in the buffer */
575 		g->base = job->gather_copy;
576 		g->offset = offset;
577 
578 		/* Validate the job */
579 		if (validate(&fw, g))
580 			return -EINVAL;
581 
582 		offset += g->words * sizeof(u32);
583 	}
584 
585 	/* No relocs should remain at this point */
586 	if (fw.num_relocs)
587 		return -EINVAL;
588 
589 	return 0;
590 }
591 
592 int host1x_job_pin(struct host1x_job *job, struct device *dev)
593 {
594 	int err;
595 	unsigned int i, j;
596 	struct host1x *host = dev_get_drvdata(dev->parent);
597 
598 	/* pin memory */
599 	err = pin_job(host, job);
600 	if (err)
601 		goto out;
602 
603 	if (job->enable_firewall) {
604 		err = copy_gathers(host->dev, job, dev);
605 		if (err)
606 			goto out;
607 	}
608 
609 	/* patch gathers */
610 	for (i = 0; i < job->num_cmds; i++) {
611 		struct host1x_job_gather *g;
612 
613 		if (job->cmds[i].is_wait)
614 			continue;
615 		g = &job->cmds[i].gather;
616 
617 		/* process each gather mem only once */
618 		if (g->handled)
619 			continue;
620 
621 		/* copy_gathers() sets gathers base if firewall is enabled */
622 		if (!job->enable_firewall)
623 			g->base = job->gather_addr_phys[i];
624 
625 		for (j = i + 1; j < job->num_cmds; j++) {
626 			if (!job->cmds[j].is_wait &&
627 			    job->cmds[j].gather.bo == g->bo) {
628 				job->cmds[j].gather.handled = true;
629 				job->cmds[j].gather.base = g->base;
630 			}
631 		}
632 
633 		err = do_relocs(job, g);
634 		if (err)
635 			break;
636 	}
637 
638 out:
639 	if (err)
640 		host1x_job_unpin(job);
641 	wmb();
642 
643 	return err;
644 }
645 EXPORT_SYMBOL(host1x_job_pin);
646 
647 void host1x_job_unpin(struct host1x_job *job)
648 {
649 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
650 	unsigned int i;
651 
652 	for (i = 0; i < job->num_unpins; i++) {
653 		struct host1x_bo_mapping *map = job->unpins[i].map;
654 		struct host1x_bo *bo = map->bo;
655 
656 		if (!job->enable_firewall && map->size && host->domain) {
657 			iommu_unmap(host->domain, job->addr_phys[i], map->size);
658 			free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
659 		}
660 
661 		host1x_bo_unpin(map);
662 		host1x_bo_put(bo);
663 	}
664 
665 	job->num_unpins = 0;
666 
667 	if (job->gather_copy_size)
668 		dma_free_wc(host->dev, job->gather_copy_size,
669 			    job->gather_copy_mapped, job->gather_copy);
670 }
671 EXPORT_SYMBOL(host1x_job_unpin);
672 
673 /*
674  * Debug routine used to dump job entries
675  */
676 void host1x_job_dump(struct device *dev, struct host1x_job *job)
677 {
678 	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt->id);
679 	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
680 	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
681 	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
682 	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
683 	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
684 }
685