1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Tegra host1x Job 4 * 5 * Copyright (c) 2010-2015, NVIDIA Corporation. 6 */ 7 8 #include <linux/dma-mapping.h> 9 #include <linux/err.h> 10 #include <linux/host1x.h> 11 #include <linux/iommu.h> 12 #include <linux/kref.h> 13 #include <linux/module.h> 14 #include <linux/scatterlist.h> 15 #include <linux/slab.h> 16 #include <linux/vmalloc.h> 17 #include <trace/events/host1x.h> 18 19 #include "channel.h" 20 #include "dev.h" 21 #include "job.h" 22 #include "syncpt.h" 23 24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 25 26 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, 27 u32 num_cmdbufs, u32 num_relocs, 28 bool skip_firewall) 29 { 30 struct host1x_job *job = NULL; 31 unsigned int num_unpins = num_relocs; 32 bool enable_firewall; 33 u64 total; 34 void *mem; 35 36 enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; 37 38 if (!enable_firewall) 39 num_unpins += num_cmdbufs; 40 41 /* Check that we're not going to overflow */ 42 total = sizeof(struct host1x_job) + 43 (u64)num_relocs * sizeof(struct host1x_reloc) + 44 (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + 45 (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + 46 (u64)num_unpins * sizeof(dma_addr_t) + 47 (u64)num_unpins * sizeof(u32 *); 48 if (total > ULONG_MAX) 49 return NULL; 50 51 mem = job = kzalloc(total, GFP_KERNEL); 52 if (!job) 53 return NULL; 54 55 job->enable_firewall = enable_firewall; 56 57 kref_init(&job->ref); 58 job->channel = ch; 59 60 /* Redistribute memory to the structs */ 61 mem += sizeof(struct host1x_job); 62 job->relocs = num_relocs ? mem : NULL; 63 mem += num_relocs * sizeof(struct host1x_reloc); 64 job->unpins = num_unpins ? mem : NULL; 65 mem += num_unpins * sizeof(struct host1x_job_unpin_data); 66 job->cmds = num_cmdbufs ? mem : NULL; 67 mem += num_cmdbufs * sizeof(struct host1x_job_cmd); 68 job->addr_phys = num_unpins ? mem : NULL; 69 70 job->reloc_addr_phys = job->addr_phys; 71 job->gather_addr_phys = &job->addr_phys[num_relocs]; 72 73 return job; 74 } 75 EXPORT_SYMBOL(host1x_job_alloc); 76 77 struct host1x_job *host1x_job_get(struct host1x_job *job) 78 { 79 kref_get(&job->ref); 80 return job; 81 } 82 EXPORT_SYMBOL(host1x_job_get); 83 84 static void job_free(struct kref *ref) 85 { 86 struct host1x_job *job = container_of(ref, struct host1x_job, ref); 87 88 if (job->release) 89 job->release(job); 90 91 if (job->waiter) 92 host1x_intr_put_ref(job->syncpt->host, job->syncpt->id, 93 job->waiter, false); 94 95 if (job->syncpt) 96 host1x_syncpt_put(job->syncpt); 97 98 kfree(job); 99 } 100 101 void host1x_job_put(struct host1x_job *job) 102 { 103 kref_put(&job->ref, job_free); 104 } 105 EXPORT_SYMBOL(host1x_job_put); 106 107 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, 108 unsigned int words, unsigned int offset) 109 { 110 struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; 111 112 gather->words = words; 113 gather->bo = bo; 114 gather->offset = offset; 115 116 job->num_cmds++; 117 } 118 EXPORT_SYMBOL(host1x_job_add_gather); 119 120 void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, 121 bool relative, u32 next_class) 122 { 123 struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; 124 125 cmd->is_wait = true; 126 cmd->wait.id = id; 127 cmd->wait.threshold = thresh; 128 cmd->wait.next_class = next_class; 129 cmd->wait.relative = relative; 130 131 job->num_cmds++; 132 } 133 EXPORT_SYMBOL(host1x_job_add_wait); 134 135 static unsigned int pin_job(struct host1x *host, struct host1x_job *job) 136 { 137 unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; 138 struct host1x_client *client = job->client; 139 struct device *dev = client->dev; 140 struct host1x_job_gather *g; 141 unsigned int i; 142 int err; 143 144 job->num_unpins = 0; 145 146 for (i = 0; i < job->num_relocs; i++) { 147 struct host1x_reloc *reloc = &job->relocs[i]; 148 enum dma_data_direction direction; 149 struct host1x_bo_mapping *map; 150 struct host1x_bo *bo; 151 152 reloc->target.bo = host1x_bo_get(reloc->target.bo); 153 if (!reloc->target.bo) { 154 err = -EINVAL; 155 goto unpin; 156 } 157 158 bo = reloc->target.bo; 159 160 switch (reloc->flags & mask) { 161 case HOST1X_RELOC_READ: 162 direction = DMA_TO_DEVICE; 163 break; 164 165 case HOST1X_RELOC_WRITE: 166 direction = DMA_FROM_DEVICE; 167 break; 168 169 case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: 170 direction = DMA_BIDIRECTIONAL; 171 break; 172 173 default: 174 err = -EINVAL; 175 goto unpin; 176 } 177 178 map = host1x_bo_pin(dev, bo, direction, NULL); 179 if (IS_ERR(map)) { 180 err = PTR_ERR(map); 181 goto unpin; 182 } 183 184 /* 185 * host1x clients are generally not able to do scatter-gather themselves, so fail 186 * if the buffer is discontiguous and we fail to map its SG table to a single 187 * contiguous chunk of I/O virtual memory. 188 */ 189 if (map->chunks > 1) { 190 err = -EINVAL; 191 goto unpin; 192 } 193 194 job->addr_phys[job->num_unpins] = map->phys; 195 job->unpins[job->num_unpins].map = map; 196 job->num_unpins++; 197 } 198 199 /* 200 * We will copy gathers BO content later, so there is no need to 201 * hold and pin them. 202 */ 203 if (job->enable_firewall) 204 return 0; 205 206 for (i = 0; i < job->num_cmds; i++) { 207 struct host1x_bo_mapping *map; 208 size_t gather_size = 0; 209 struct scatterlist *sg; 210 unsigned long shift; 211 struct iova *alloc; 212 unsigned int j; 213 214 if (job->cmds[i].is_wait) 215 continue; 216 217 g = &job->cmds[i].gather; 218 219 g->bo = host1x_bo_get(g->bo); 220 if (!g->bo) { 221 err = -EINVAL; 222 goto unpin; 223 } 224 225 map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL); 226 if (IS_ERR(map)) { 227 err = PTR_ERR(map); 228 goto unpin; 229 } 230 231 if (host->domain) { 232 for_each_sgtable_sg(map->sgt, sg, j) 233 gather_size += sg->length; 234 235 gather_size = iova_align(&host->iova, gather_size); 236 237 shift = iova_shift(&host->iova); 238 alloc = alloc_iova(&host->iova, gather_size >> shift, 239 host->iova_end >> shift, true); 240 if (!alloc) { 241 err = -ENOMEM; 242 goto put; 243 } 244 245 err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), 246 map->sgt, IOMMU_READ); 247 if (err == 0) { 248 __free_iova(&host->iova, alloc); 249 err = -EINVAL; 250 goto put; 251 } 252 253 map->phys = iova_dma_addr(&host->iova, alloc); 254 map->size = gather_size; 255 } 256 257 job->addr_phys[job->num_unpins] = map->phys; 258 job->unpins[job->num_unpins].map = map; 259 job->num_unpins++; 260 261 job->gather_addr_phys[i] = map->phys; 262 } 263 264 return 0; 265 266 put: 267 host1x_bo_put(g->bo); 268 unpin: 269 host1x_job_unpin(job); 270 return err; 271 } 272 273 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) 274 { 275 void *cmdbuf_addr = NULL; 276 struct host1x_bo *cmdbuf = g->bo; 277 unsigned int i; 278 279 /* pin & patch the relocs for one gather */ 280 for (i = 0; i < job->num_relocs; i++) { 281 struct host1x_reloc *reloc = &job->relocs[i]; 282 u32 reloc_addr = (job->reloc_addr_phys[i] + 283 reloc->target.offset) >> reloc->shift; 284 u32 *target; 285 286 /* skip all other gathers */ 287 if (cmdbuf != reloc->cmdbuf.bo) 288 continue; 289 290 if (job->enable_firewall) { 291 target = (u32 *)job->gather_copy_mapped + 292 reloc->cmdbuf.offset / sizeof(u32) + 293 g->offset / sizeof(u32); 294 goto patch_reloc; 295 } 296 297 if (!cmdbuf_addr) { 298 cmdbuf_addr = host1x_bo_mmap(cmdbuf); 299 300 if (unlikely(!cmdbuf_addr)) { 301 pr_err("Could not map cmdbuf for relocation\n"); 302 return -ENOMEM; 303 } 304 } 305 306 target = cmdbuf_addr + reloc->cmdbuf.offset; 307 patch_reloc: 308 *target = reloc_addr; 309 } 310 311 if (cmdbuf_addr) 312 host1x_bo_munmap(cmdbuf, cmdbuf_addr); 313 314 return 0; 315 } 316 317 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, 318 unsigned int offset) 319 { 320 offset *= sizeof(u32); 321 322 if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset) 323 return false; 324 325 /* relocation shift value validation isn't implemented yet */ 326 if (reloc->shift) 327 return false; 328 329 return true; 330 } 331 332 struct host1x_firewall { 333 struct host1x_job *job; 334 struct device *dev; 335 336 unsigned int num_relocs; 337 struct host1x_reloc *reloc; 338 339 struct host1x_bo *cmdbuf; 340 unsigned int offset; 341 342 u32 words; 343 u32 class; 344 u32 reg; 345 u32 mask; 346 u32 count; 347 }; 348 349 static int check_register(struct host1x_firewall *fw, unsigned long offset) 350 { 351 if (!fw->job->is_addr_reg) 352 return 0; 353 354 if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) { 355 if (!fw->num_relocs) 356 return -EINVAL; 357 358 if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset)) 359 return -EINVAL; 360 361 fw->num_relocs--; 362 fw->reloc++; 363 } 364 365 return 0; 366 } 367 368 static int check_class(struct host1x_firewall *fw, u32 class) 369 { 370 if (!fw->job->is_valid_class) { 371 if (fw->class != class) 372 return -EINVAL; 373 } else { 374 if (!fw->job->is_valid_class(fw->class)) 375 return -EINVAL; 376 } 377 378 return 0; 379 } 380 381 static int check_mask(struct host1x_firewall *fw) 382 { 383 u32 mask = fw->mask; 384 u32 reg = fw->reg; 385 int ret; 386 387 while (mask) { 388 if (fw->words == 0) 389 return -EINVAL; 390 391 if (mask & 1) { 392 ret = check_register(fw, reg); 393 if (ret < 0) 394 return ret; 395 396 fw->words--; 397 fw->offset++; 398 } 399 mask >>= 1; 400 reg++; 401 } 402 403 return 0; 404 } 405 406 static int check_incr(struct host1x_firewall *fw) 407 { 408 u32 count = fw->count; 409 u32 reg = fw->reg; 410 int ret; 411 412 while (count) { 413 if (fw->words == 0) 414 return -EINVAL; 415 416 ret = check_register(fw, reg); 417 if (ret < 0) 418 return ret; 419 420 reg++; 421 fw->words--; 422 fw->offset++; 423 count--; 424 } 425 426 return 0; 427 } 428 429 static int check_nonincr(struct host1x_firewall *fw) 430 { 431 u32 count = fw->count; 432 int ret; 433 434 while (count) { 435 if (fw->words == 0) 436 return -EINVAL; 437 438 ret = check_register(fw, fw->reg); 439 if (ret < 0) 440 return ret; 441 442 fw->words--; 443 fw->offset++; 444 count--; 445 } 446 447 return 0; 448 } 449 450 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) 451 { 452 u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped + 453 (g->offset / sizeof(u32)); 454 u32 job_class = fw->class; 455 int err = 0; 456 457 fw->words = g->words; 458 fw->cmdbuf = g->bo; 459 fw->offset = 0; 460 461 while (fw->words && !err) { 462 u32 word = cmdbuf_base[fw->offset]; 463 u32 opcode = (word & 0xf0000000) >> 28; 464 465 fw->mask = 0; 466 fw->reg = 0; 467 fw->count = 0; 468 fw->words--; 469 fw->offset++; 470 471 switch (opcode) { 472 case 0: 473 fw->class = word >> 6 & 0x3ff; 474 fw->mask = word & 0x3f; 475 fw->reg = word >> 16 & 0xfff; 476 err = check_class(fw, job_class); 477 if (!err) 478 err = check_mask(fw); 479 if (err) 480 goto out; 481 break; 482 case 1: 483 fw->reg = word >> 16 & 0xfff; 484 fw->count = word & 0xffff; 485 err = check_incr(fw); 486 if (err) 487 goto out; 488 break; 489 490 case 2: 491 fw->reg = word >> 16 & 0xfff; 492 fw->count = word & 0xffff; 493 err = check_nonincr(fw); 494 if (err) 495 goto out; 496 break; 497 498 case 3: 499 fw->mask = word & 0xffff; 500 fw->reg = word >> 16 & 0xfff; 501 err = check_mask(fw); 502 if (err) 503 goto out; 504 break; 505 case 4: 506 case 14: 507 break; 508 default: 509 err = -EINVAL; 510 break; 511 } 512 } 513 514 out: 515 return err; 516 } 517 518 static inline int copy_gathers(struct device *host, struct host1x_job *job, 519 struct device *dev) 520 { 521 struct host1x_firewall fw; 522 size_t size = 0; 523 size_t offset = 0; 524 unsigned int i; 525 526 fw.job = job; 527 fw.dev = dev; 528 fw.reloc = job->relocs; 529 fw.num_relocs = job->num_relocs; 530 fw.class = job->class; 531 532 for (i = 0; i < job->num_cmds; i++) { 533 struct host1x_job_gather *g; 534 535 if (job->cmds[i].is_wait) 536 continue; 537 538 g = &job->cmds[i].gather; 539 540 size += g->words * sizeof(u32); 541 } 542 543 /* 544 * Try a non-blocking allocation from a higher priority pools first, 545 * as awaiting for the allocation here is a major performance hit. 546 */ 547 job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, 548 GFP_NOWAIT); 549 550 /* the higher priority allocation failed, try the generic-blocking */ 551 if (!job->gather_copy_mapped) 552 job->gather_copy_mapped = dma_alloc_wc(host, size, 553 &job->gather_copy, 554 GFP_KERNEL); 555 if (!job->gather_copy_mapped) 556 return -ENOMEM; 557 558 job->gather_copy_size = size; 559 560 for (i = 0; i < job->num_cmds; i++) { 561 struct host1x_job_gather *g; 562 void *gather; 563 564 if (job->cmds[i].is_wait) 565 continue; 566 g = &job->cmds[i].gather; 567 568 /* Copy the gather */ 569 gather = host1x_bo_mmap(g->bo); 570 memcpy(job->gather_copy_mapped + offset, gather + g->offset, 571 g->words * sizeof(u32)); 572 host1x_bo_munmap(g->bo, gather); 573 574 /* Store the location in the buffer */ 575 g->base = job->gather_copy; 576 g->offset = offset; 577 578 /* Validate the job */ 579 if (validate(&fw, g)) 580 return -EINVAL; 581 582 offset += g->words * sizeof(u32); 583 } 584 585 /* No relocs should remain at this point */ 586 if (fw.num_relocs) 587 return -EINVAL; 588 589 return 0; 590 } 591 592 int host1x_job_pin(struct host1x_job *job, struct device *dev) 593 { 594 int err; 595 unsigned int i, j; 596 struct host1x *host = dev_get_drvdata(dev->parent); 597 598 /* pin memory */ 599 err = pin_job(host, job); 600 if (err) 601 goto out; 602 603 if (job->enable_firewall) { 604 err = copy_gathers(host->dev, job, dev); 605 if (err) 606 goto out; 607 } 608 609 /* patch gathers */ 610 for (i = 0; i < job->num_cmds; i++) { 611 struct host1x_job_gather *g; 612 613 if (job->cmds[i].is_wait) 614 continue; 615 g = &job->cmds[i].gather; 616 617 /* process each gather mem only once */ 618 if (g->handled) 619 continue; 620 621 /* copy_gathers() sets gathers base if firewall is enabled */ 622 if (!job->enable_firewall) 623 g->base = job->gather_addr_phys[i]; 624 625 for (j = i + 1; j < job->num_cmds; j++) { 626 if (!job->cmds[j].is_wait && 627 job->cmds[j].gather.bo == g->bo) { 628 job->cmds[j].gather.handled = true; 629 job->cmds[j].gather.base = g->base; 630 } 631 } 632 633 err = do_relocs(job, g); 634 if (err) 635 break; 636 } 637 638 out: 639 if (err) 640 host1x_job_unpin(job); 641 wmb(); 642 643 return err; 644 } 645 EXPORT_SYMBOL(host1x_job_pin); 646 647 void host1x_job_unpin(struct host1x_job *job) 648 { 649 struct host1x *host = dev_get_drvdata(job->channel->dev->parent); 650 unsigned int i; 651 652 for (i = 0; i < job->num_unpins; i++) { 653 struct host1x_bo_mapping *map = job->unpins[i].map; 654 struct host1x_bo *bo = map->bo; 655 656 if (!job->enable_firewall && map->size && host->domain) { 657 iommu_unmap(host->domain, job->addr_phys[i], map->size); 658 free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); 659 } 660 661 host1x_bo_unpin(map); 662 host1x_bo_put(bo); 663 } 664 665 job->num_unpins = 0; 666 667 if (job->gather_copy_size) 668 dma_free_wc(host->dev, job->gather_copy_size, 669 job->gather_copy_mapped, job->gather_copy); 670 } 671 EXPORT_SYMBOL(host1x_job_unpin); 672 673 /* 674 * Debug routine used to dump job entries 675 */ 676 void host1x_job_dump(struct device *dev, struct host1x_job *job) 677 { 678 dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id); 679 dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); 680 dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); 681 dev_dbg(dev, " TIMEOUT %d\n", job->timeout); 682 dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots); 683 dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins); 684 } 685