1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Tegra host1x Command DMA 4 * 5 * Copyright (c) 2010-2013, NVIDIA Corporation. 6 */ 7 8 9 #include <asm/cacheflush.h> 10 #include <linux/device.h> 11 #include <linux/dma-mapping.h> 12 #include <linux/host1x.h> 13 #include <linux/interrupt.h> 14 #include <linux/kernel.h> 15 #include <linux/kfifo.h> 16 #include <linux/slab.h> 17 #include <trace/events/host1x.h> 18 19 #include "cdma.h" 20 #include "channel.h" 21 #include "dev.h" 22 #include "debug.h" 23 #include "job.h" 24 25 /* 26 * push_buffer 27 * 28 * The push buffer is a circular array of words to be fetched by command DMA. 29 * Note that it works slightly differently to the sync queue; fence == pos 30 * means that the push buffer is full, not empty. 31 */ 32 33 /* 34 * Typically the commands written into the push buffer are a pair of words. We 35 * use slots to represent each of these pairs and to simplify things. Note the 36 * strange number of slots allocated here. 512 slots will fit exactly within a 37 * single memory page. We also need one additional word at the end of the push 38 * buffer for the RESTART opcode that will instruct the CDMA to jump back to 39 * the beginning of the push buffer. With 512 slots, this means that we'll use 40 * 2 memory pages and waste 4092 bytes of the second page that will never be 41 * used. 42 */ 43 #define HOST1X_PUSHBUFFER_SLOTS 511 44 45 /* 46 * Clean up push buffer resources 47 */ 48 static void host1x_pushbuffer_destroy(struct push_buffer *pb) 49 { 50 struct host1x_cdma *cdma = pb_to_cdma(pb); 51 struct host1x *host1x = cdma_to_host1x(cdma); 52 53 if (!pb->mapped) 54 return; 55 56 if (host1x->domain) { 57 iommu_unmap(host1x->domain, pb->dma, pb->alloc_size); 58 free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); 59 } 60 61 dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); 62 63 pb->mapped = NULL; 64 pb->phys = 0; 65 } 66 67 /* 68 * Init push buffer resources 69 */ 70 static int host1x_pushbuffer_init(struct push_buffer *pb) 71 { 72 struct host1x_cdma *cdma = pb_to_cdma(pb); 73 struct host1x *host1x = cdma_to_host1x(cdma); 74 struct iova *alloc; 75 u32 size; 76 int err; 77 78 pb->mapped = NULL; 79 pb->phys = 0; 80 pb->size = HOST1X_PUSHBUFFER_SLOTS * 8; 81 82 size = pb->size + 4; 83 84 /* initialize buffer pointers */ 85 pb->fence = pb->size - 8; 86 pb->pos = 0; 87 88 if (host1x->domain) { 89 unsigned long shift; 90 91 size = iova_align(&host1x->iova, size); 92 93 pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, 94 GFP_KERNEL); 95 if (!pb->mapped) 96 return -ENOMEM; 97 98 shift = iova_shift(&host1x->iova); 99 alloc = alloc_iova(&host1x->iova, size >> shift, 100 host1x->iova_end >> shift, true); 101 if (!alloc) { 102 err = -ENOMEM; 103 goto iommu_free_mem; 104 } 105 106 pb->dma = iova_dma_addr(&host1x->iova, alloc); 107 err = iommu_map(host1x->domain, pb->dma, pb->phys, size, 108 IOMMU_READ, GFP_KERNEL); 109 if (err) 110 goto iommu_free_iova; 111 } else { 112 pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, 113 GFP_KERNEL); 114 if (!pb->mapped) 115 return -ENOMEM; 116 117 pb->dma = pb->phys; 118 } 119 120 pb->alloc_size = size; 121 122 host1x_hw_pushbuffer_init(host1x, pb); 123 124 return 0; 125 126 iommu_free_iova: 127 __free_iova(&host1x->iova, alloc); 128 iommu_free_mem: 129 dma_free_wc(host1x->dev, size, pb->mapped, pb->phys); 130 131 return err; 132 } 133 134 /* 135 * Push two words to the push buffer 136 * Caller must ensure push buffer is not full 137 */ 138 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) 139 { 140 u32 *p = (u32 *)((void *)pb->mapped + pb->pos); 141 142 WARN_ON(pb->pos == pb->fence); 143 *(p++) = op1; 144 *(p++) = op2; 145 pb->pos += 8; 146 147 if (pb->pos >= pb->size) 148 pb->pos -= pb->size; 149 } 150 151 /* 152 * Pop a number of two word slots from the push buffer 153 * Caller must ensure push buffer is not empty 154 */ 155 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) 156 { 157 /* Advance the next write position */ 158 pb->fence += slots * 8; 159 160 if (pb->fence >= pb->size) 161 pb->fence -= pb->size; 162 } 163 164 /* 165 * Return the number of two word slots free in the push buffer 166 */ 167 static u32 host1x_pushbuffer_space(struct push_buffer *pb) 168 { 169 unsigned int fence = pb->fence; 170 171 if (pb->fence < pb->pos) 172 fence += pb->size; 173 174 return (fence - pb->pos) / 8; 175 } 176 177 /* 178 * Sleep (if necessary) until the requested event happens 179 * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty. 180 * - Returns 1 181 * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer 182 * - Return the amount of space (> 0) 183 * Must be called with the cdma lock held. 184 */ 185 unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, 186 enum cdma_event event) 187 { 188 for (;;) { 189 struct push_buffer *pb = &cdma->push_buffer; 190 unsigned int space; 191 192 switch (event) { 193 case CDMA_EVENT_SYNC_QUEUE_EMPTY: 194 space = list_empty(&cdma->sync_queue) ? 1 : 0; 195 break; 196 197 case CDMA_EVENT_PUSH_BUFFER_SPACE: 198 space = host1x_pushbuffer_space(pb); 199 break; 200 201 default: 202 WARN_ON(1); 203 return -EINVAL; 204 } 205 206 if (space) 207 return space; 208 209 trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), 210 event); 211 212 /* If somebody has managed to already start waiting, yield */ 213 if (cdma->event != CDMA_EVENT_NONE) { 214 mutex_unlock(&cdma->lock); 215 schedule(); 216 mutex_lock(&cdma->lock); 217 continue; 218 } 219 220 cdma->event = event; 221 222 mutex_unlock(&cdma->lock); 223 wait_for_completion(&cdma->complete); 224 mutex_lock(&cdma->lock); 225 } 226 227 return 0; 228 } 229 230 /* 231 * Sleep (if necessary) until the push buffer has enough free space. 232 * 233 * Must be called with the cdma lock held. 234 */ 235 static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, 236 struct host1x_cdma *cdma, 237 unsigned int needed) 238 { 239 while (true) { 240 struct push_buffer *pb = &cdma->push_buffer; 241 unsigned int space; 242 243 space = host1x_pushbuffer_space(pb); 244 if (space >= needed) 245 break; 246 247 trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), 248 CDMA_EVENT_PUSH_BUFFER_SPACE); 249 250 host1x_hw_cdma_flush(host1x, cdma); 251 252 /* If somebody has managed to already start waiting, yield */ 253 if (cdma->event != CDMA_EVENT_NONE) { 254 mutex_unlock(&cdma->lock); 255 schedule(); 256 mutex_lock(&cdma->lock); 257 continue; 258 } 259 260 cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE; 261 262 mutex_unlock(&cdma->lock); 263 wait_for_completion(&cdma->complete); 264 mutex_lock(&cdma->lock); 265 } 266 267 return 0; 268 } 269 /* 270 * Start timer that tracks the time spent by the job. 271 * Must be called with the cdma lock held. 272 */ 273 static void cdma_start_timer_locked(struct host1x_cdma *cdma, 274 struct host1x_job *job) 275 { 276 if (cdma->timeout.client) { 277 /* timer already started */ 278 return; 279 } 280 281 cdma->timeout.client = job->client; 282 cdma->timeout.syncpt = job->syncpt; 283 cdma->timeout.syncpt_val = job->syncpt_end; 284 cdma->timeout.start_ktime = ktime_get(); 285 286 schedule_delayed_work(&cdma->timeout.wq, 287 msecs_to_jiffies(job->timeout)); 288 } 289 290 /* 291 * Stop timer when a buffer submission completes. 292 * Must be called with the cdma lock held. 293 */ 294 static void stop_cdma_timer_locked(struct host1x_cdma *cdma) 295 { 296 cancel_delayed_work(&cdma->timeout.wq); 297 cdma->timeout.client = NULL; 298 } 299 300 /* 301 * For all sync queue entries that have already finished according to the 302 * current sync point registers: 303 * - unpin & unref their mems 304 * - pop their push buffer slots 305 * - remove them from the sync queue 306 * This is normally called from the host code's worker thread, but can be 307 * called manually if necessary. 308 * Must be called with the cdma lock held. 309 */ 310 static void update_cdma_locked(struct host1x_cdma *cdma) 311 { 312 bool signal = false; 313 struct host1x_job *job, *n; 314 315 /* 316 * Walk the sync queue, reading the sync point registers as necessary, 317 * to consume as many sync queue entries as possible without blocking 318 */ 319 list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { 320 struct host1x_syncpt *sp = job->syncpt; 321 322 /* Check whether this syncpt has completed, and bail if not */ 323 if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && 324 !job->cancelled) { 325 /* Start timer on next pending syncpt */ 326 if (job->timeout) 327 cdma_start_timer_locked(cdma, job); 328 329 break; 330 } 331 332 /* Cancel timeout, when a buffer completes */ 333 if (cdma->timeout.client) 334 stop_cdma_timer_locked(cdma); 335 336 /* Unpin the memory */ 337 host1x_job_unpin(job); 338 339 /* Pop push buffer slots */ 340 if (job->num_slots) { 341 struct push_buffer *pb = &cdma->push_buffer; 342 343 host1x_pushbuffer_pop(pb, job->num_slots); 344 345 if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) 346 signal = true; 347 } 348 349 list_del(&job->list); 350 host1x_job_put(job); 351 } 352 353 if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY && 354 list_empty(&cdma->sync_queue)) 355 signal = true; 356 357 if (signal) { 358 cdma->event = CDMA_EVENT_NONE; 359 complete(&cdma->complete); 360 } 361 } 362 363 void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, 364 struct device *dev) 365 { 366 struct host1x *host1x = cdma_to_host1x(cdma); 367 u32 restart_addr, syncpt_incrs, syncpt_val; 368 struct host1x_job *job, *next_job = NULL; 369 370 syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); 371 372 dev_dbg(dev, "%s: starting cleanup (thresh %d)\n", 373 __func__, syncpt_val); 374 375 /* 376 * Move the sync_queue read pointer to the first entry that hasn't 377 * completed based on the current HW syncpt value. It's likely there 378 * won't be any (i.e. we're still at the head), but covers the case 379 * where a syncpt incr happens just prior/during the teardown. 380 */ 381 382 dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n", 383 __func__); 384 385 list_for_each_entry(job, &cdma->sync_queue, list) { 386 if (syncpt_val < job->syncpt_end) { 387 388 if (!list_is_last(&job->list, &cdma->sync_queue)) 389 next_job = list_next_entry(job, list); 390 391 goto syncpt_incr; 392 } 393 394 host1x_job_dump(dev, job); 395 } 396 397 /* all jobs have been completed */ 398 job = NULL; 399 400 syncpt_incr: 401 402 /* 403 * Increment with CPU the remaining syncpts of a partially executed job. 404 * 405 * CDMA will continue execution starting with the next job or will get 406 * into idle state. 407 */ 408 if (next_job) 409 restart_addr = next_job->first_get; 410 else 411 restart_addr = cdma->last_pos; 412 413 if (!job) 414 goto resume; 415 416 /* do CPU increments for the remaining syncpts */ 417 if (job->syncpt_recovery) { 418 dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", 419 __func__); 420 421 /* won't need a timeout when replayed */ 422 job->timeout = 0; 423 424 syncpt_incrs = job->syncpt_end - syncpt_val; 425 dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs); 426 427 host1x_job_dump(dev, job); 428 429 /* safe to use CPU to incr syncpts */ 430 host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, 431 syncpt_incrs, job->syncpt_end, 432 job->num_slots); 433 434 dev_dbg(dev, "%s: finished sync_queue modification\n", 435 __func__); 436 } else { 437 struct host1x_job *failed_job = job; 438 439 host1x_job_dump(dev, job); 440 441 host1x_syncpt_set_locked(job->syncpt); 442 failed_job->cancelled = true; 443 444 list_for_each_entry_continue(job, &cdma->sync_queue, list) { 445 unsigned int i; 446 447 if (job->syncpt != failed_job->syncpt) 448 continue; 449 450 for (i = 0; i < job->num_slots; i++) { 451 unsigned int slot = (job->first_get/8 + i) % 452 HOST1X_PUSHBUFFER_SLOTS; 453 u32 *mapped = cdma->push_buffer.mapped; 454 455 /* 456 * Overwrite opcodes with 0 word writes 457 * to offset 0xbad. This does nothing but 458 * has a easily detected signature in debug 459 * traces. 460 * 461 * On systems with MLOCK enforcement enabled, 462 * the above 0 word writes would fall foul of 463 * the enforcement. As such, in the first slot 464 * put a RESTART_W opcode to the beginning 465 * of the next job. We don't use this for older 466 * chips since those only support the RESTART 467 * opcode with inconvenient alignment requirements. 468 */ 469 if (i == 0 && host1x->info->has_wide_gather) { 470 unsigned int next_job = (job->first_get/8 + job->num_slots) 471 % HOST1X_PUSHBUFFER_SLOTS; 472 mapped[2*slot+0] = (0xd << 28) | (next_job * 2); 473 mapped[2*slot+1] = 0x0; 474 } else { 475 mapped[2*slot+0] = 0x1bad0000; 476 mapped[2*slot+1] = 0x1bad0000; 477 } 478 } 479 480 job->cancelled = true; 481 } 482 483 wmb(); 484 485 update_cdma_locked(cdma); 486 } 487 488 resume: 489 /* roll back DMAGET and start up channel again */ 490 host1x_hw_cdma_resume(host1x, cdma, restart_addr); 491 } 492 493 static void cdma_update_work(struct work_struct *work) 494 { 495 struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work); 496 497 mutex_lock(&cdma->lock); 498 update_cdma_locked(cdma); 499 mutex_unlock(&cdma->lock); 500 } 501 502 /* 503 * Create a cdma 504 */ 505 int host1x_cdma_init(struct host1x_cdma *cdma) 506 { 507 int err; 508 509 mutex_init(&cdma->lock); 510 init_completion(&cdma->complete); 511 INIT_WORK(&cdma->update_work, cdma_update_work); 512 513 INIT_LIST_HEAD(&cdma->sync_queue); 514 515 cdma->event = CDMA_EVENT_NONE; 516 cdma->running = false; 517 cdma->torndown = false; 518 519 err = host1x_pushbuffer_init(&cdma->push_buffer); 520 if (err) 521 return err; 522 523 return 0; 524 } 525 526 /* 527 * Destroy a cdma 528 */ 529 int host1x_cdma_deinit(struct host1x_cdma *cdma) 530 { 531 struct push_buffer *pb = &cdma->push_buffer; 532 struct host1x *host1x = cdma_to_host1x(cdma); 533 534 if (cdma->running) { 535 pr_warn("%s: CDMA still running\n", __func__); 536 return -EBUSY; 537 } 538 539 host1x_pushbuffer_destroy(pb); 540 host1x_hw_cdma_timeout_destroy(host1x, cdma); 541 542 return 0; 543 } 544 545 /* 546 * Begin a cdma submit 547 */ 548 int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) 549 { 550 struct host1x *host1x = cdma_to_host1x(cdma); 551 552 mutex_lock(&cdma->lock); 553 554 /* 555 * Check if syncpoint was locked due to previous job timeout. 556 * This needs to be done within the cdma lock to avoid a race 557 * with the timeout handler. 558 */ 559 if (job->syncpt->locked) { 560 mutex_unlock(&cdma->lock); 561 return -EPERM; 562 } 563 564 if (job->timeout) { 565 /* init state on first submit with timeout value */ 566 if (!cdma->timeout.initialized) { 567 int err; 568 569 err = host1x_hw_cdma_timeout_init(host1x, cdma); 570 if (err) { 571 mutex_unlock(&cdma->lock); 572 return err; 573 } 574 } 575 } 576 577 if (!cdma->running) 578 host1x_hw_cdma_start(host1x, cdma); 579 580 cdma->slots_free = 0; 581 cdma->slots_used = 0; 582 cdma->first_get = cdma->push_buffer.pos; 583 584 trace_host1x_cdma_begin(dev_name(job->channel->dev)); 585 return 0; 586 } 587 588 /* 589 * Push two words into a push buffer slot 590 * Blocks as necessary if the push buffer is full. 591 */ 592 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) 593 { 594 struct host1x *host1x = cdma_to_host1x(cdma); 595 struct push_buffer *pb = &cdma->push_buffer; 596 u32 slots_free = cdma->slots_free; 597 598 if (host1x_debug_trace_cmdbuf) 599 trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), 600 op1, op2); 601 602 if (slots_free == 0) { 603 host1x_hw_cdma_flush(host1x, cdma); 604 slots_free = host1x_cdma_wait_locked(cdma, 605 CDMA_EVENT_PUSH_BUFFER_SPACE); 606 } 607 608 cdma->slots_free = slots_free - 1; 609 cdma->slots_used++; 610 host1x_pushbuffer_push(pb, op1, op2); 611 } 612 613 /* 614 * Push four words into two consecutive push buffer slots. Note that extra 615 * care needs to be taken not to split the two slots across the end of the 616 * push buffer. Otherwise the RESTART opcode at the end of the push buffer 617 * that ensures processing will restart at the beginning will break up the 618 * four words. 619 * 620 * Blocks as necessary if the push buffer is full. 621 */ 622 void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, 623 u32 op3, u32 op4) 624 { 625 struct host1x_channel *channel = cdma_to_channel(cdma); 626 struct host1x *host1x = cdma_to_host1x(cdma); 627 struct push_buffer *pb = &cdma->push_buffer; 628 unsigned int space = cdma->slots_free; 629 unsigned int needed = 2, extra = 0; 630 631 if (host1x_debug_trace_cmdbuf) 632 trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, 633 op3, op4); 634 635 /* compute number of extra slots needed for padding */ 636 if (pb->pos + 16 > pb->size) { 637 extra = (pb->size - pb->pos) / 8; 638 needed += extra; 639 } 640 641 host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed); 642 space = host1x_pushbuffer_space(pb); 643 644 cdma->slots_free = space - needed; 645 cdma->slots_used += needed; 646 647 if (extra > 0) { 648 /* 649 * If there isn't enough space at the tail of the pushbuffer, 650 * insert a RESTART(0) here to go back to the beginning. 651 * The code above adjusted the indexes appropriately. 652 */ 653 host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000); 654 } 655 656 host1x_pushbuffer_push(pb, op1, op2); 657 host1x_pushbuffer_push(pb, op3, op4); 658 } 659 660 /* 661 * End a cdma submit 662 * Kick off DMA, add job to the sync queue, and a number of slots to be freed 663 * from the pushbuffer. The handles for a submit must all be pinned at the same 664 * time, but they can be unpinned in smaller chunks. 665 */ 666 void host1x_cdma_end(struct host1x_cdma *cdma, 667 struct host1x_job *job) 668 { 669 struct host1x *host1x = cdma_to_host1x(cdma); 670 bool idle = list_empty(&cdma->sync_queue); 671 672 host1x_hw_cdma_flush(host1x, cdma); 673 674 job->first_get = cdma->first_get; 675 job->num_slots = cdma->slots_used; 676 host1x_job_get(job); 677 list_add_tail(&job->list, &cdma->sync_queue); 678 679 /* start timer on idle -> active transitions */ 680 if (job->timeout && idle) 681 cdma_start_timer_locked(cdma, job); 682 683 trace_host1x_cdma_end(dev_name(job->channel->dev)); 684 mutex_unlock(&cdma->lock); 685 } 686 687 /* 688 * Update cdma state according to current sync point values 689 */ 690 void host1x_cdma_update(struct host1x_cdma *cdma) 691 { 692 schedule_work(&cdma->update_work); 693 } 694