1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Tegra host1x Job 4 * 5 * Copyright (c) 2010-2015, NVIDIA Corporation. 6 */ 7 8 #include <linux/dma-mapping.h> 9 #include <linux/err.h> 10 #include <linux/host1x.h> 11 #include <linux/iommu.h> 12 #include <linux/kref.h> 13 #include <linux/module.h> 14 #include <linux/scatterlist.h> 15 #include <linux/slab.h> 16 #include <linux/vmalloc.h> 17 #include <trace/events/host1x.h> 18 19 #include "channel.h" 20 #include "dev.h" 21 #include "job.h" 22 #include "syncpt.h" 23 24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 25 26 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, 27 u32 num_cmdbufs, u32 num_relocs) 28 { 29 struct host1x_job *job = NULL; 30 unsigned int num_unpins = num_cmdbufs + num_relocs; 31 u64 total; 32 void *mem; 33 34 /* Check that we're not going to overflow */ 35 total = sizeof(struct host1x_job) + 36 (u64)num_relocs * sizeof(struct host1x_reloc) + 37 (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + 38 (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + 39 (u64)num_unpins * sizeof(dma_addr_t) + 40 (u64)num_unpins * sizeof(u32 *); 41 if (total > ULONG_MAX) 42 return NULL; 43 44 mem = job = kzalloc(total, GFP_KERNEL); 45 if (!job) 46 return NULL; 47 48 kref_init(&job->ref); 49 job->channel = ch; 50 51 /* Redistribute memory to the structs */ 52 mem += sizeof(struct host1x_job); 53 job->relocs = num_relocs ? mem : NULL; 54 mem += num_relocs * sizeof(struct host1x_reloc); 55 job->unpins = num_unpins ? mem : NULL; 56 mem += num_unpins * sizeof(struct host1x_job_unpin_data); 57 job->gathers = num_cmdbufs ? mem : NULL; 58 mem += num_cmdbufs * sizeof(struct host1x_job_gather); 59 job->addr_phys = num_unpins ? mem : NULL; 60 61 job->reloc_addr_phys = job->addr_phys; 62 job->gather_addr_phys = &job->addr_phys[num_relocs]; 63 64 return job; 65 } 66 EXPORT_SYMBOL(host1x_job_alloc); 67 68 struct host1x_job *host1x_job_get(struct host1x_job *job) 69 { 70 kref_get(&job->ref); 71 return job; 72 } 73 EXPORT_SYMBOL(host1x_job_get); 74 75 static void job_free(struct kref *ref) 76 { 77 struct host1x_job *job = container_of(ref, struct host1x_job, ref); 78 79 kfree(job); 80 } 81 82 void host1x_job_put(struct host1x_job *job) 83 { 84 kref_put(&job->ref, job_free); 85 } 86 EXPORT_SYMBOL(host1x_job_put); 87 88 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, 89 unsigned int words, unsigned int offset) 90 { 91 struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; 92 93 gather->words = words; 94 gather->bo = bo; 95 gather->offset = offset; 96 97 job->num_gathers++; 98 } 99 EXPORT_SYMBOL(host1x_job_add_gather); 100 101 static unsigned int pin_job(struct host1x *host, struct host1x_job *job) 102 { 103 struct host1x_client *client = job->client; 104 struct device *dev = client->dev; 105 struct iommu_domain *domain; 106 unsigned int i; 107 int err; 108 109 domain = iommu_get_domain_for_dev(dev); 110 job->num_unpins = 0; 111 112 for (i = 0; i < job->num_relocs; i++) { 113 struct host1x_reloc *reloc = &job->relocs[i]; 114 dma_addr_t phys_addr, *phys; 115 struct sg_table *sgt; 116 117 reloc->target.bo = host1x_bo_get(reloc->target.bo); 118 if (!reloc->target.bo) { 119 err = -EINVAL; 120 goto unpin; 121 } 122 123 /* 124 * If the client device is not attached to an IOMMU, the 125 * physical address of the buffer object can be used. 126 * 127 * Similarly, when an IOMMU domain is shared between all 128 * host1x clients, the IOVA is already available, so no 129 * need to map the buffer object again. 130 * 131 * XXX Note that this isn't always safe to do because it 132 * relies on an assumption that no cache maintenance is 133 * needed on the buffer objects. 134 */ 135 if (!domain || client->group) 136 phys = &phys_addr; 137 else 138 phys = NULL; 139 140 sgt = host1x_bo_pin(dev, reloc->target.bo, phys); 141 if (IS_ERR(sgt)) { 142 err = PTR_ERR(sgt); 143 goto unpin; 144 } 145 146 if (sgt) { 147 unsigned long mask = HOST1X_RELOC_READ | 148 HOST1X_RELOC_WRITE; 149 enum dma_data_direction dir; 150 151 switch (reloc->flags & mask) { 152 case HOST1X_RELOC_READ: 153 dir = DMA_TO_DEVICE; 154 break; 155 156 case HOST1X_RELOC_WRITE: 157 dir = DMA_FROM_DEVICE; 158 break; 159 160 case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: 161 dir = DMA_BIDIRECTIONAL; 162 break; 163 164 default: 165 err = -EINVAL; 166 goto unpin; 167 } 168 169 err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir); 170 if (!err) { 171 err = -ENOMEM; 172 goto unpin; 173 } 174 175 job->unpins[job->num_unpins].dev = dev; 176 job->unpins[job->num_unpins].dir = dir; 177 phys_addr = sg_dma_address(sgt->sgl); 178 } 179 180 job->addr_phys[job->num_unpins] = phys_addr; 181 job->unpins[job->num_unpins].bo = reloc->target.bo; 182 job->unpins[job->num_unpins].sgt = sgt; 183 job->num_unpins++; 184 } 185 186 for (i = 0; i < job->num_gathers; i++) { 187 struct host1x_job_gather *g = &job->gathers[i]; 188 size_t gather_size = 0; 189 struct scatterlist *sg; 190 struct sg_table *sgt; 191 dma_addr_t phys_addr; 192 unsigned long shift; 193 struct iova *alloc; 194 dma_addr_t *phys; 195 unsigned int j; 196 197 g->bo = host1x_bo_get(g->bo); 198 if (!g->bo) { 199 err = -EINVAL; 200 goto unpin; 201 } 202 203 /** 204 * If the host1x is not attached to an IOMMU, there is no need 205 * to map the buffer object for the host1x, since the physical 206 * address can simply be used. 207 */ 208 if (!iommu_get_domain_for_dev(host->dev)) 209 phys = &phys_addr; 210 else 211 phys = NULL; 212 213 sgt = host1x_bo_pin(host->dev, g->bo, phys); 214 if (IS_ERR(sgt)) { 215 err = PTR_ERR(sgt); 216 goto unpin; 217 } 218 219 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { 220 for_each_sg(sgt->sgl, sg, sgt->nents, j) 221 gather_size += sg->length; 222 gather_size = iova_align(&host->iova, gather_size); 223 224 shift = iova_shift(&host->iova); 225 alloc = alloc_iova(&host->iova, gather_size >> shift, 226 host->iova_end >> shift, true); 227 if (!alloc) { 228 err = -ENOMEM; 229 goto unpin; 230 } 231 232 err = iommu_map_sg(host->domain, 233 iova_dma_addr(&host->iova, alloc), 234 sgt->sgl, sgt->nents, IOMMU_READ); 235 if (err == 0) { 236 __free_iova(&host->iova, alloc); 237 err = -EINVAL; 238 goto unpin; 239 } 240 241 job->unpins[job->num_unpins].size = gather_size; 242 phys_addr = iova_dma_addr(&host->iova, alloc); 243 } else if (sgt) { 244 err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, 245 DMA_TO_DEVICE); 246 if (!err) { 247 err = -ENOMEM; 248 goto unpin; 249 } 250 251 job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; 252 job->unpins[job->num_unpins].dev = host->dev; 253 phys_addr = sg_dma_address(sgt->sgl); 254 } 255 256 job->addr_phys[job->num_unpins] = phys_addr; 257 job->gather_addr_phys[i] = phys_addr; 258 259 job->unpins[job->num_unpins].bo = g->bo; 260 job->unpins[job->num_unpins].sgt = sgt; 261 job->num_unpins++; 262 } 263 264 return 0; 265 266 unpin: 267 host1x_job_unpin(job); 268 return err; 269 } 270 271 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) 272 { 273 void *cmdbuf_addr = NULL; 274 struct host1x_bo *cmdbuf = g->bo; 275 unsigned int i; 276 277 /* pin & patch the relocs for one gather */ 278 for (i = 0; i < job->num_relocs; i++) { 279 struct host1x_reloc *reloc = &job->relocs[i]; 280 u32 reloc_addr = (job->reloc_addr_phys[i] + 281 reloc->target.offset) >> reloc->shift; 282 u32 *target; 283 284 /* skip all other gathers */ 285 if (cmdbuf != reloc->cmdbuf.bo) 286 continue; 287 288 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { 289 target = (u32 *)job->gather_copy_mapped + 290 reloc->cmdbuf.offset / sizeof(u32) + 291 g->offset / sizeof(u32); 292 goto patch_reloc; 293 } 294 295 if (!cmdbuf_addr) { 296 cmdbuf_addr = host1x_bo_mmap(cmdbuf); 297 298 if (unlikely(!cmdbuf_addr)) { 299 pr_err("Could not map cmdbuf for relocation\n"); 300 return -ENOMEM; 301 } 302 } 303 304 target = cmdbuf_addr + reloc->cmdbuf.offset; 305 patch_reloc: 306 *target = reloc_addr; 307 } 308 309 if (cmdbuf_addr) 310 host1x_bo_munmap(cmdbuf, cmdbuf_addr); 311 312 return 0; 313 } 314 315 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, 316 unsigned int offset) 317 { 318 offset *= sizeof(u32); 319 320 if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset) 321 return false; 322 323 /* relocation shift value validation isn't implemented yet */ 324 if (reloc->shift) 325 return false; 326 327 return true; 328 } 329 330 struct host1x_firewall { 331 struct host1x_job *job; 332 struct device *dev; 333 334 unsigned int num_relocs; 335 struct host1x_reloc *reloc; 336 337 struct host1x_bo *cmdbuf; 338 unsigned int offset; 339 340 u32 words; 341 u32 class; 342 u32 reg; 343 u32 mask; 344 u32 count; 345 }; 346 347 static int check_register(struct host1x_firewall *fw, unsigned long offset) 348 { 349 if (!fw->job->is_addr_reg) 350 return 0; 351 352 if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) { 353 if (!fw->num_relocs) 354 return -EINVAL; 355 356 if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset)) 357 return -EINVAL; 358 359 fw->num_relocs--; 360 fw->reloc++; 361 } 362 363 return 0; 364 } 365 366 static int check_class(struct host1x_firewall *fw, u32 class) 367 { 368 if (!fw->job->is_valid_class) { 369 if (fw->class != class) 370 return -EINVAL; 371 } else { 372 if (!fw->job->is_valid_class(fw->class)) 373 return -EINVAL; 374 } 375 376 return 0; 377 } 378 379 static int check_mask(struct host1x_firewall *fw) 380 { 381 u32 mask = fw->mask; 382 u32 reg = fw->reg; 383 int ret; 384 385 while (mask) { 386 if (fw->words == 0) 387 return -EINVAL; 388 389 if (mask & 1) { 390 ret = check_register(fw, reg); 391 if (ret < 0) 392 return ret; 393 394 fw->words--; 395 fw->offset++; 396 } 397 mask >>= 1; 398 reg++; 399 } 400 401 return 0; 402 } 403 404 static int check_incr(struct host1x_firewall *fw) 405 { 406 u32 count = fw->count; 407 u32 reg = fw->reg; 408 int ret; 409 410 while (count) { 411 if (fw->words == 0) 412 return -EINVAL; 413 414 ret = check_register(fw, reg); 415 if (ret < 0) 416 return ret; 417 418 reg++; 419 fw->words--; 420 fw->offset++; 421 count--; 422 } 423 424 return 0; 425 } 426 427 static int check_nonincr(struct host1x_firewall *fw) 428 { 429 u32 count = fw->count; 430 int ret; 431 432 while (count) { 433 if (fw->words == 0) 434 return -EINVAL; 435 436 ret = check_register(fw, fw->reg); 437 if (ret < 0) 438 return ret; 439 440 fw->words--; 441 fw->offset++; 442 count--; 443 } 444 445 return 0; 446 } 447 448 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) 449 { 450 u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped + 451 (g->offset / sizeof(u32)); 452 u32 job_class = fw->class; 453 int err = 0; 454 455 fw->words = g->words; 456 fw->cmdbuf = g->bo; 457 fw->offset = 0; 458 459 while (fw->words && !err) { 460 u32 word = cmdbuf_base[fw->offset]; 461 u32 opcode = (word & 0xf0000000) >> 28; 462 463 fw->mask = 0; 464 fw->reg = 0; 465 fw->count = 0; 466 fw->words--; 467 fw->offset++; 468 469 switch (opcode) { 470 case 0: 471 fw->class = word >> 6 & 0x3ff; 472 fw->mask = word & 0x3f; 473 fw->reg = word >> 16 & 0xfff; 474 err = check_class(fw, job_class); 475 if (!err) 476 err = check_mask(fw); 477 if (err) 478 goto out; 479 break; 480 case 1: 481 fw->reg = word >> 16 & 0xfff; 482 fw->count = word & 0xffff; 483 err = check_incr(fw); 484 if (err) 485 goto out; 486 break; 487 488 case 2: 489 fw->reg = word >> 16 & 0xfff; 490 fw->count = word & 0xffff; 491 err = check_nonincr(fw); 492 if (err) 493 goto out; 494 break; 495 496 case 3: 497 fw->mask = word & 0xffff; 498 fw->reg = word >> 16 & 0xfff; 499 err = check_mask(fw); 500 if (err) 501 goto out; 502 break; 503 case 4: 504 case 14: 505 break; 506 default: 507 err = -EINVAL; 508 break; 509 } 510 } 511 512 out: 513 return err; 514 } 515 516 static inline int copy_gathers(struct device *host, struct host1x_job *job, 517 struct device *dev) 518 { 519 struct host1x_firewall fw; 520 size_t size = 0; 521 size_t offset = 0; 522 unsigned int i; 523 524 fw.job = job; 525 fw.dev = dev; 526 fw.reloc = job->relocs; 527 fw.num_relocs = job->num_relocs; 528 fw.class = job->class; 529 530 for (i = 0; i < job->num_gathers; i++) { 531 struct host1x_job_gather *g = &job->gathers[i]; 532 533 size += g->words * sizeof(u32); 534 } 535 536 /* 537 * Try a non-blocking allocation from a higher priority pools first, 538 * as awaiting for the allocation here is a major performance hit. 539 */ 540 job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, 541 GFP_NOWAIT); 542 543 /* the higher priority allocation failed, try the generic-blocking */ 544 if (!job->gather_copy_mapped) 545 job->gather_copy_mapped = dma_alloc_wc(host, size, 546 &job->gather_copy, 547 GFP_KERNEL); 548 if (!job->gather_copy_mapped) 549 return -ENOMEM; 550 551 job->gather_copy_size = size; 552 553 for (i = 0; i < job->num_gathers; i++) { 554 struct host1x_job_gather *g = &job->gathers[i]; 555 void *gather; 556 557 /* Copy the gather */ 558 gather = host1x_bo_mmap(g->bo); 559 memcpy(job->gather_copy_mapped + offset, gather + g->offset, 560 g->words * sizeof(u32)); 561 host1x_bo_munmap(g->bo, gather); 562 563 /* Store the location in the buffer */ 564 g->base = job->gather_copy; 565 g->offset = offset; 566 567 /* Validate the job */ 568 if (validate(&fw, g)) 569 return -EINVAL; 570 571 offset += g->words * sizeof(u32); 572 } 573 574 /* No relocs should remain at this point */ 575 if (fw.num_relocs) 576 return -EINVAL; 577 578 return 0; 579 } 580 581 int host1x_job_pin(struct host1x_job *job, struct device *dev) 582 { 583 int err; 584 unsigned int i, j; 585 struct host1x *host = dev_get_drvdata(dev->parent); 586 587 /* pin memory */ 588 err = pin_job(host, job); 589 if (err) 590 goto out; 591 592 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { 593 err = copy_gathers(host->dev, job, dev); 594 if (err) 595 goto out; 596 } 597 598 /* patch gathers */ 599 for (i = 0; i < job->num_gathers; i++) { 600 struct host1x_job_gather *g = &job->gathers[i]; 601 602 /* process each gather mem only once */ 603 if (g->handled) 604 continue; 605 606 /* copy_gathers() sets gathers base if firewall is enabled */ 607 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 608 g->base = job->gather_addr_phys[i]; 609 610 for (j = i + 1; j < job->num_gathers; j++) { 611 if (job->gathers[j].bo == g->bo) { 612 job->gathers[j].handled = true; 613 job->gathers[j].base = g->base; 614 } 615 } 616 617 err = do_relocs(job, g); 618 if (err) 619 break; 620 } 621 622 out: 623 if (err) 624 host1x_job_unpin(job); 625 wmb(); 626 627 return err; 628 } 629 EXPORT_SYMBOL(host1x_job_pin); 630 631 void host1x_job_unpin(struct host1x_job *job) 632 { 633 struct host1x *host = dev_get_drvdata(job->channel->dev->parent); 634 unsigned int i; 635 636 for (i = 0; i < job->num_unpins; i++) { 637 struct host1x_job_unpin_data *unpin = &job->unpins[i]; 638 struct device *dev = unpin->dev ?: host->dev; 639 struct sg_table *sgt = unpin->sgt; 640 641 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && 642 unpin->size && host->domain) { 643 iommu_unmap(host->domain, job->addr_phys[i], 644 unpin->size); 645 free_iova(&host->iova, 646 iova_pfn(&host->iova, job->addr_phys[i])); 647 } 648 649 if (unpin->dev && sgt) 650 dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents, 651 unpin->dir); 652 653 host1x_bo_unpin(dev, unpin->bo, sgt); 654 host1x_bo_put(unpin->bo); 655 } 656 657 job->num_unpins = 0; 658 659 if (job->gather_copy_size) 660 dma_free_wc(host->dev, job->gather_copy_size, 661 job->gather_copy_mapped, job->gather_copy); 662 } 663 EXPORT_SYMBOL(host1x_job_unpin); 664 665 /* 666 * Debug routine used to dump job entries 667 */ 668 void host1x_job_dump(struct device *dev, struct host1x_job *job) 669 { 670 dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id); 671 dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); 672 dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); 673 dev_dbg(dev, " TIMEOUT %d\n", job->timeout); 674 dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots); 675 dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins); 676 } 677