1 /* 2 * Tegra host1x Command DMA 3 * 4 * Copyright (c) 2010-2013, NVIDIA Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 20 #include <asm/cacheflush.h> 21 #include <linux/device.h> 22 #include <linux/dma-mapping.h> 23 #include <linux/host1x.h> 24 #include <linux/interrupt.h> 25 #include <linux/kernel.h> 26 #include <linux/kfifo.h> 27 #include <linux/slab.h> 28 #include <trace/events/host1x.h> 29 30 #include "cdma.h" 31 #include "channel.h" 32 #include "dev.h" 33 #include "debug.h" 34 #include "job.h" 35 36 /* 37 * push_buffer 38 * 39 * The push buffer is a circular array of words to be fetched by command DMA. 40 * Note that it works slightly differently to the sync queue; fence == pos 41 * means that the push buffer is full, not empty. 42 */ 43 44 /* 45 * Typically the commands written into the push buffer are a pair of words. We 46 * use slots to represent each of these pairs and to simplify things. Note the 47 * strange number of slots allocated here. 512 slots will fit exactly within a 48 * single memory page. We also need one additional word at the end of the push 49 * buffer for the RESTART opcode that will instruct the CDMA to jump back to 50 * the beginning of the push buffer. With 512 slots, this means that we'll use 51 * 2 memory pages and waste 4092 bytes of the second page that will never be 52 * used. 53 */ 54 #define HOST1X_PUSHBUFFER_SLOTS 511 55 56 /* 57 * Clean up push buffer resources 58 */ 59 static void host1x_pushbuffer_destroy(struct push_buffer *pb) 60 { 61 struct host1x_cdma *cdma = pb_to_cdma(pb); 62 struct host1x *host1x = cdma_to_host1x(cdma); 63 64 if (!pb->mapped) 65 return; 66 67 if (host1x->domain) { 68 iommu_unmap(host1x->domain, pb->dma, pb->alloc_size); 69 free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); 70 } 71 72 dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); 73 74 pb->mapped = NULL; 75 pb->phys = 0; 76 } 77 78 /* 79 * Init push buffer resources 80 */ 81 static int host1x_pushbuffer_init(struct push_buffer *pb) 82 { 83 struct host1x_cdma *cdma = pb_to_cdma(pb); 84 struct host1x *host1x = cdma_to_host1x(cdma); 85 struct iova *alloc; 86 u32 size; 87 int err; 88 89 pb->mapped = NULL; 90 pb->phys = 0; 91 pb->size = HOST1X_PUSHBUFFER_SLOTS * 8; 92 93 size = pb->size + 4; 94 95 /* initialize buffer pointers */ 96 pb->fence = pb->size - 8; 97 pb->pos = 0; 98 99 if (host1x->domain) { 100 unsigned long shift; 101 102 size = iova_align(&host1x->iova, size); 103 104 pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, 105 GFP_KERNEL); 106 if (!pb->mapped) 107 return -ENOMEM; 108 109 shift = iova_shift(&host1x->iova); 110 alloc = alloc_iova(&host1x->iova, size >> shift, 111 host1x->iova_end >> shift, true); 112 if (!alloc) { 113 err = -ENOMEM; 114 goto iommu_free_mem; 115 } 116 117 pb->dma = iova_dma_addr(&host1x->iova, alloc); 118 err = iommu_map(host1x->domain, pb->dma, pb->phys, size, 119 IOMMU_READ); 120 if (err) 121 goto iommu_free_iova; 122 } else { 123 pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, 124 GFP_KERNEL); 125 if (!pb->mapped) 126 return -ENOMEM; 127 128 pb->dma = pb->phys; 129 } 130 131 pb->alloc_size = size; 132 133 host1x_hw_pushbuffer_init(host1x, pb); 134 135 return 0; 136 137 iommu_free_iova: 138 __free_iova(&host1x->iova, alloc); 139 iommu_free_mem: 140 dma_free_wc(host1x->dev, size, pb->mapped, pb->phys); 141 142 return err; 143 } 144 145 /* 146 * Push two words to the push buffer 147 * Caller must ensure push buffer is not full 148 */ 149 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) 150 { 151 u32 *p = (u32 *)((void *)pb->mapped + pb->pos); 152 153 WARN_ON(pb->pos == pb->fence); 154 *(p++) = op1; 155 *(p++) = op2; 156 pb->pos += 8; 157 158 if (pb->pos >= pb->size) 159 pb->pos -= pb->size; 160 } 161 162 /* 163 * Pop a number of two word slots from the push buffer 164 * Caller must ensure push buffer is not empty 165 */ 166 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) 167 { 168 /* Advance the next write position */ 169 pb->fence += slots * 8; 170 171 if (pb->fence >= pb->size) 172 pb->fence -= pb->size; 173 } 174 175 /* 176 * Return the number of two word slots free in the push buffer 177 */ 178 static u32 host1x_pushbuffer_space(struct push_buffer *pb) 179 { 180 unsigned int fence = pb->fence; 181 182 if (pb->fence < pb->pos) 183 fence += pb->size; 184 185 return (fence - pb->pos) / 8; 186 } 187 188 /* 189 * Sleep (if necessary) until the requested event happens 190 * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty. 191 * - Returns 1 192 * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer 193 * - Return the amount of space (> 0) 194 * Must be called with the cdma lock held. 195 */ 196 unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, 197 enum cdma_event event) 198 { 199 for (;;) { 200 struct push_buffer *pb = &cdma->push_buffer; 201 unsigned int space; 202 203 switch (event) { 204 case CDMA_EVENT_SYNC_QUEUE_EMPTY: 205 space = list_empty(&cdma->sync_queue) ? 1 : 0; 206 break; 207 208 case CDMA_EVENT_PUSH_BUFFER_SPACE: 209 space = host1x_pushbuffer_space(pb); 210 break; 211 212 default: 213 WARN_ON(1); 214 return -EINVAL; 215 } 216 217 if (space) 218 return space; 219 220 trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), 221 event); 222 223 /* If somebody has managed to already start waiting, yield */ 224 if (cdma->event != CDMA_EVENT_NONE) { 225 mutex_unlock(&cdma->lock); 226 schedule(); 227 mutex_lock(&cdma->lock); 228 continue; 229 } 230 231 cdma->event = event; 232 233 mutex_unlock(&cdma->lock); 234 wait_for_completion(&cdma->complete); 235 mutex_lock(&cdma->lock); 236 } 237 238 return 0; 239 } 240 241 /* 242 * Sleep (if necessary) until the push buffer has enough free space. 243 * 244 * Must be called with the cdma lock held. 245 */ 246 int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, 247 struct host1x_cdma *cdma, 248 unsigned int needed) 249 { 250 while (true) { 251 struct push_buffer *pb = &cdma->push_buffer; 252 unsigned int space; 253 254 space = host1x_pushbuffer_space(pb); 255 if (space >= needed) 256 break; 257 258 trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), 259 CDMA_EVENT_PUSH_BUFFER_SPACE); 260 261 host1x_hw_cdma_flush(host1x, cdma); 262 263 /* If somebody has managed to already start waiting, yield */ 264 if (cdma->event != CDMA_EVENT_NONE) { 265 mutex_unlock(&cdma->lock); 266 schedule(); 267 mutex_lock(&cdma->lock); 268 continue; 269 } 270 271 cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE; 272 273 mutex_unlock(&cdma->lock); 274 wait_for_completion(&cdma->complete); 275 mutex_lock(&cdma->lock); 276 } 277 278 return 0; 279 } 280 /* 281 * Start timer that tracks the time spent by the job. 282 * Must be called with the cdma lock held. 283 */ 284 static void cdma_start_timer_locked(struct host1x_cdma *cdma, 285 struct host1x_job *job) 286 { 287 struct host1x *host = cdma_to_host1x(cdma); 288 289 if (cdma->timeout.client) { 290 /* timer already started */ 291 return; 292 } 293 294 cdma->timeout.client = job->client; 295 cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); 296 cdma->timeout.syncpt_val = job->syncpt_end; 297 cdma->timeout.start_ktime = ktime_get(); 298 299 schedule_delayed_work(&cdma->timeout.wq, 300 msecs_to_jiffies(job->timeout)); 301 } 302 303 /* 304 * Stop timer when a buffer submission completes. 305 * Must be called with the cdma lock held. 306 */ 307 static void stop_cdma_timer_locked(struct host1x_cdma *cdma) 308 { 309 cancel_delayed_work(&cdma->timeout.wq); 310 cdma->timeout.client = NULL; 311 } 312 313 /* 314 * For all sync queue entries that have already finished according to the 315 * current sync point registers: 316 * - unpin & unref their mems 317 * - pop their push buffer slots 318 * - remove them from the sync queue 319 * This is normally called from the host code's worker thread, but can be 320 * called manually if necessary. 321 * Must be called with the cdma lock held. 322 */ 323 static void update_cdma_locked(struct host1x_cdma *cdma) 324 { 325 bool signal = false; 326 struct host1x *host1x = cdma_to_host1x(cdma); 327 struct host1x_job *job, *n; 328 329 /* If CDMA is stopped, queue is cleared and we can return */ 330 if (!cdma->running) 331 return; 332 333 /* 334 * Walk the sync queue, reading the sync point registers as necessary, 335 * to consume as many sync queue entries as possible without blocking 336 */ 337 list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { 338 struct host1x_syncpt *sp = 339 host1x_syncpt_get(host1x, job->syncpt_id); 340 341 /* Check whether this syncpt has completed, and bail if not */ 342 if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { 343 /* Start timer on next pending syncpt */ 344 if (job->timeout) 345 cdma_start_timer_locked(cdma, job); 346 347 break; 348 } 349 350 /* Cancel timeout, when a buffer completes */ 351 if (cdma->timeout.client) 352 stop_cdma_timer_locked(cdma); 353 354 /* Unpin the memory */ 355 host1x_job_unpin(job); 356 357 /* Pop push buffer slots */ 358 if (job->num_slots) { 359 struct push_buffer *pb = &cdma->push_buffer; 360 361 host1x_pushbuffer_pop(pb, job->num_slots); 362 363 if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) 364 signal = true; 365 } 366 367 list_del(&job->list); 368 host1x_job_put(job); 369 } 370 371 if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY && 372 list_empty(&cdma->sync_queue)) 373 signal = true; 374 375 if (signal) { 376 cdma->event = CDMA_EVENT_NONE; 377 complete(&cdma->complete); 378 } 379 } 380 381 void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, 382 struct device *dev) 383 { 384 struct host1x *host1x = cdma_to_host1x(cdma); 385 u32 restart_addr, syncpt_incrs, syncpt_val; 386 struct host1x_job *job, *next_job = NULL; 387 388 syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); 389 390 dev_dbg(dev, "%s: starting cleanup (thresh %d)\n", 391 __func__, syncpt_val); 392 393 /* 394 * Move the sync_queue read pointer to the first entry that hasn't 395 * completed based on the current HW syncpt value. It's likely there 396 * won't be any (i.e. we're still at the head), but covers the case 397 * where a syncpt incr happens just prior/during the teardown. 398 */ 399 400 dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n", 401 __func__); 402 403 list_for_each_entry(job, &cdma->sync_queue, list) { 404 if (syncpt_val < job->syncpt_end) { 405 406 if (!list_is_last(&job->list, &cdma->sync_queue)) 407 next_job = list_next_entry(job, list); 408 409 goto syncpt_incr; 410 } 411 412 host1x_job_dump(dev, job); 413 } 414 415 /* all jobs have been completed */ 416 job = NULL; 417 418 syncpt_incr: 419 420 /* 421 * Increment with CPU the remaining syncpts of a partially executed job. 422 * 423 * CDMA will continue execution starting with the next job or will get 424 * into idle state. 425 */ 426 if (next_job) 427 restart_addr = next_job->first_get; 428 else 429 restart_addr = cdma->last_pos; 430 431 /* do CPU increments for the remaining syncpts */ 432 if (job) { 433 dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", 434 __func__); 435 436 /* won't need a timeout when replayed */ 437 job->timeout = 0; 438 439 syncpt_incrs = job->syncpt_end - syncpt_val; 440 dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs); 441 442 host1x_job_dump(dev, job); 443 444 /* safe to use CPU to incr syncpts */ 445 host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, 446 syncpt_incrs, job->syncpt_end, 447 job->num_slots); 448 449 dev_dbg(dev, "%s: finished sync_queue modification\n", 450 __func__); 451 } 452 453 /* roll back DMAGET and start up channel again */ 454 host1x_hw_cdma_resume(host1x, cdma, restart_addr); 455 } 456 457 /* 458 * Create a cdma 459 */ 460 int host1x_cdma_init(struct host1x_cdma *cdma) 461 { 462 int err; 463 464 mutex_init(&cdma->lock); 465 init_completion(&cdma->complete); 466 467 INIT_LIST_HEAD(&cdma->sync_queue); 468 469 cdma->event = CDMA_EVENT_NONE; 470 cdma->running = false; 471 cdma->torndown = false; 472 473 err = host1x_pushbuffer_init(&cdma->push_buffer); 474 if (err) 475 return err; 476 477 return 0; 478 } 479 480 /* 481 * Destroy a cdma 482 */ 483 int host1x_cdma_deinit(struct host1x_cdma *cdma) 484 { 485 struct push_buffer *pb = &cdma->push_buffer; 486 struct host1x *host1x = cdma_to_host1x(cdma); 487 488 if (cdma->running) { 489 pr_warn("%s: CDMA still running\n", __func__); 490 return -EBUSY; 491 } 492 493 host1x_pushbuffer_destroy(pb); 494 host1x_hw_cdma_timeout_destroy(host1x, cdma); 495 496 return 0; 497 } 498 499 /* 500 * Begin a cdma submit 501 */ 502 int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) 503 { 504 struct host1x *host1x = cdma_to_host1x(cdma); 505 506 mutex_lock(&cdma->lock); 507 508 if (job->timeout) { 509 /* init state on first submit with timeout value */ 510 if (!cdma->timeout.initialized) { 511 int err; 512 513 err = host1x_hw_cdma_timeout_init(host1x, cdma, 514 job->syncpt_id); 515 if (err) { 516 mutex_unlock(&cdma->lock); 517 return err; 518 } 519 } 520 } 521 522 if (!cdma->running) 523 host1x_hw_cdma_start(host1x, cdma); 524 525 cdma->slots_free = 0; 526 cdma->slots_used = 0; 527 cdma->first_get = cdma->push_buffer.pos; 528 529 trace_host1x_cdma_begin(dev_name(job->channel->dev)); 530 return 0; 531 } 532 533 /* 534 * Push two words into a push buffer slot 535 * Blocks as necessary if the push buffer is full. 536 */ 537 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) 538 { 539 struct host1x *host1x = cdma_to_host1x(cdma); 540 struct push_buffer *pb = &cdma->push_buffer; 541 u32 slots_free = cdma->slots_free; 542 543 if (host1x_debug_trace_cmdbuf) 544 trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), 545 op1, op2); 546 547 if (slots_free == 0) { 548 host1x_hw_cdma_flush(host1x, cdma); 549 slots_free = host1x_cdma_wait_locked(cdma, 550 CDMA_EVENT_PUSH_BUFFER_SPACE); 551 } 552 553 cdma->slots_free = slots_free - 1; 554 cdma->slots_used++; 555 host1x_pushbuffer_push(pb, op1, op2); 556 } 557 558 /* 559 * Push four words into two consecutive push buffer slots. Note that extra 560 * care needs to be taken not to split the two slots across the end of the 561 * push buffer. Otherwise the RESTART opcode at the end of the push buffer 562 * that ensures processing will restart at the beginning will break up the 563 * four words. 564 * 565 * Blocks as necessary if the push buffer is full. 566 */ 567 void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, 568 u32 op3, u32 op4) 569 { 570 struct host1x_channel *channel = cdma_to_channel(cdma); 571 struct host1x *host1x = cdma_to_host1x(cdma); 572 struct push_buffer *pb = &cdma->push_buffer; 573 unsigned int needed = 2, extra = 0, i; 574 unsigned int space = cdma->slots_free; 575 576 if (host1x_debug_trace_cmdbuf) 577 trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, 578 op3, op4); 579 580 /* compute number of extra slots needed for padding */ 581 if (pb->pos + 16 > pb->size) { 582 extra = (pb->size - pb->pos) / 8; 583 needed += extra; 584 } 585 586 host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed); 587 space = host1x_pushbuffer_space(pb); 588 589 cdma->slots_free = space - needed; 590 cdma->slots_used += needed; 591 592 /* 593 * Note that we rely on the fact that this is only used to submit wide 594 * gather opcodes, which consist of 3 words, and they are padded with 595 * a NOP to avoid having to deal with fractional slots (a slot always 596 * represents 2 words). The fourth opcode passed to this function will 597 * therefore always be a NOP. 598 * 599 * This works around a slight ambiguity when it comes to opcodes. For 600 * all current host1x incarnations the NOP opcode uses the exact same 601 * encoding (0x20000000), so we could hard-code the value here, but a 602 * new incarnation may change it and break that assumption. 603 */ 604 for (i = 0; i < extra; i++) 605 host1x_pushbuffer_push(pb, op4, op4); 606 607 host1x_pushbuffer_push(pb, op1, op2); 608 host1x_pushbuffer_push(pb, op3, op4); 609 } 610 611 /* 612 * End a cdma submit 613 * Kick off DMA, add job to the sync queue, and a number of slots to be freed 614 * from the pushbuffer. The handles for a submit must all be pinned at the same 615 * time, but they can be unpinned in smaller chunks. 616 */ 617 void host1x_cdma_end(struct host1x_cdma *cdma, 618 struct host1x_job *job) 619 { 620 struct host1x *host1x = cdma_to_host1x(cdma); 621 bool idle = list_empty(&cdma->sync_queue); 622 623 host1x_hw_cdma_flush(host1x, cdma); 624 625 job->first_get = cdma->first_get; 626 job->num_slots = cdma->slots_used; 627 host1x_job_get(job); 628 list_add_tail(&job->list, &cdma->sync_queue); 629 630 /* start timer on idle -> active transitions */ 631 if (job->timeout && idle) 632 cdma_start_timer_locked(cdma, job); 633 634 trace_host1x_cdma_end(dev_name(job->channel->dev)); 635 mutex_unlock(&cdma->lock); 636 } 637 638 /* 639 * Update cdma state according to current sync point values 640 */ 641 void host1x_cdma_update(struct host1x_cdma *cdma) 642 { 643 mutex_lock(&cdma->lock); 644 update_cdma_locked(cdma); 645 mutex_unlock(&cdma->lock); 646 } 647