1 #include <linux/delay.h> 2 #include <linux/dmaengine.h> 3 #include <linux/dma-mapping.h> 4 #include <linux/platform_device.h> 5 #include <linux/module.h> 6 #include <linux/of.h> 7 #include <linux/slab.h> 8 #include <linux/of_dma.h> 9 #include <linux/of_irq.h> 10 #include <linux/dmapool.h> 11 #include <linux/interrupt.h> 12 #include <linux/of_address.h> 13 #include <linux/pm_runtime.h> 14 #include "../dmaengine.h" 15 16 #define DESC_TYPE 27 17 #define DESC_TYPE_HOST 0x10 18 #define DESC_TYPE_TEARD 0x13 19 20 #define TD_DESC_IS_RX (1 << 16) 21 #define TD_DESC_DMA_NUM 10 22 23 #define DESC_LENGTH_BITS_NUM 21 24 25 #define DESC_TYPE_USB (5 << 26) 26 #define DESC_PD_COMPLETE (1 << 31) 27 28 /* DMA engine */ 29 #define DMA_TDFDQ 4 30 #define DMA_TXGCR(x) (0x800 + (x) * 0x20) 31 #define DMA_RXGCR(x) (0x808 + (x) * 0x20) 32 #define RXHPCRA0 4 33 34 #define GCR_CHAN_ENABLE (1 << 31) 35 #define GCR_TEARDOWN (1 << 30) 36 #define GCR_STARV_RETRY (1 << 24) 37 #define GCR_DESC_TYPE_HOST (1 << 14) 38 39 /* DMA scheduler */ 40 #define DMA_SCHED_CTRL 0 41 #define DMA_SCHED_CTRL_EN (1 << 31) 42 #define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 43 44 #define SCHED_ENTRY0_CHAN(x) ((x) << 0) 45 #define SCHED_ENTRY0_IS_RX (1 << 7) 46 47 #define SCHED_ENTRY1_CHAN(x) ((x) << 8) 48 #define SCHED_ENTRY1_IS_RX (1 << 15) 49 50 #define SCHED_ENTRY2_CHAN(x) ((x) << 16) 51 #define SCHED_ENTRY2_IS_RX (1 << 23) 52 53 #define SCHED_ENTRY3_CHAN(x) ((x) << 24) 54 #define SCHED_ENTRY3_IS_RX (1 << 31) 55 56 /* Queue manager */ 57 /* 4 KiB of memory for descriptors, 2 for each endpoint */ 58 #define ALLOC_DECS_NUM 128 59 #define DESCS_AREAS 1 60 #define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 61 #define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 62 63 #define QMGR_LRAM0_BASE 0x80 64 #define QMGR_LRAM_SIZE 0x84 65 #define QMGR_LRAM1_BASE 0x88 66 #define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 67 #define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 68 #define QMGR_MEMCTRL_IDX_SH 16 69 #define QMGR_MEMCTRL_DESC_SH 8 70 71 #define QMGR_PEND(x) (0x90 + (x) * 4) 72 73 #define QMGR_PENDING_SLOT_Q(x) (x / 32) 74 #define QMGR_PENDING_BIT_Q(x) (x % 32) 75 76 #define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 77 #define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 78 #define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 79 #define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 80 81 /* Packet Descriptor */ 82 #define PD2_ZERO_LENGTH (1 << 19) 83 84 struct cppi41_channel { 85 struct dma_chan chan; 86 struct dma_async_tx_descriptor txd; 87 struct cppi41_dd *cdd; 88 struct cppi41_desc *desc; 89 dma_addr_t desc_phys; 90 void __iomem *gcr_reg; 91 int is_tx; 92 u32 residue; 93 94 unsigned int q_num; 95 unsigned int q_comp_num; 96 unsigned int port_num; 97 98 unsigned td_retry; 99 unsigned td_queued:1; 100 unsigned td_seen:1; 101 unsigned td_desc_seen:1; 102 103 struct list_head node; /* Node for pending list */ 104 }; 105 106 struct cppi41_desc { 107 u32 pd0; 108 u32 pd1; 109 u32 pd2; 110 u32 pd3; 111 u32 pd4; 112 u32 pd5; 113 u32 pd6; 114 u32 pd7; 115 } __aligned(32); 116 117 struct chan_queues { 118 u16 submit; 119 u16 complete; 120 }; 121 122 struct cppi41_dd { 123 struct dma_device ddev; 124 125 void *qmgr_scratch; 126 dma_addr_t scratch_phys; 127 128 struct cppi41_desc *cd; 129 dma_addr_t descs_phys; 130 u32 first_td_desc; 131 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 132 133 void __iomem *ctrl_mem; 134 void __iomem *sched_mem; 135 void __iomem *qmgr_mem; 136 unsigned int irq; 137 const struct chan_queues *queues_rx; 138 const struct chan_queues *queues_tx; 139 struct chan_queues td_queue; 140 u16 first_completion_queue; 141 u16 qmgr_num_pend; 142 u32 n_chans; 143 u8 platform; 144 145 struct list_head pending; /* Pending queued transfers */ 146 spinlock_t lock; /* Lock for pending list */ 147 148 /* context for suspend/resume */ 149 unsigned int dma_tdfdq; 150 151 bool is_suspended; 152 }; 153 154 static struct chan_queues am335x_usb_queues_tx[] = { 155 /* USB0 ENDP 1 */ 156 [ 0] = { .submit = 32, .complete = 93}, 157 [ 1] = { .submit = 34, .complete = 94}, 158 [ 2] = { .submit = 36, .complete = 95}, 159 [ 3] = { .submit = 38, .complete = 96}, 160 [ 4] = { .submit = 40, .complete = 97}, 161 [ 5] = { .submit = 42, .complete = 98}, 162 [ 6] = { .submit = 44, .complete = 99}, 163 [ 7] = { .submit = 46, .complete = 100}, 164 [ 8] = { .submit = 48, .complete = 101}, 165 [ 9] = { .submit = 50, .complete = 102}, 166 [10] = { .submit = 52, .complete = 103}, 167 [11] = { .submit = 54, .complete = 104}, 168 [12] = { .submit = 56, .complete = 105}, 169 [13] = { .submit = 58, .complete = 106}, 170 [14] = { .submit = 60, .complete = 107}, 171 172 /* USB1 ENDP1 */ 173 [15] = { .submit = 62, .complete = 125}, 174 [16] = { .submit = 64, .complete = 126}, 175 [17] = { .submit = 66, .complete = 127}, 176 [18] = { .submit = 68, .complete = 128}, 177 [19] = { .submit = 70, .complete = 129}, 178 [20] = { .submit = 72, .complete = 130}, 179 [21] = { .submit = 74, .complete = 131}, 180 [22] = { .submit = 76, .complete = 132}, 181 [23] = { .submit = 78, .complete = 133}, 182 [24] = { .submit = 80, .complete = 134}, 183 [25] = { .submit = 82, .complete = 135}, 184 [26] = { .submit = 84, .complete = 136}, 185 [27] = { .submit = 86, .complete = 137}, 186 [28] = { .submit = 88, .complete = 138}, 187 [29] = { .submit = 90, .complete = 139}, 188 }; 189 190 static const struct chan_queues am335x_usb_queues_rx[] = { 191 /* USB0 ENDP 1 */ 192 [ 0] = { .submit = 1, .complete = 109}, 193 [ 1] = { .submit = 2, .complete = 110}, 194 [ 2] = { .submit = 3, .complete = 111}, 195 [ 3] = { .submit = 4, .complete = 112}, 196 [ 4] = { .submit = 5, .complete = 113}, 197 [ 5] = { .submit = 6, .complete = 114}, 198 [ 6] = { .submit = 7, .complete = 115}, 199 [ 7] = { .submit = 8, .complete = 116}, 200 [ 8] = { .submit = 9, .complete = 117}, 201 [ 9] = { .submit = 10, .complete = 118}, 202 [10] = { .submit = 11, .complete = 119}, 203 [11] = { .submit = 12, .complete = 120}, 204 [12] = { .submit = 13, .complete = 121}, 205 [13] = { .submit = 14, .complete = 122}, 206 [14] = { .submit = 15, .complete = 123}, 207 208 /* USB1 ENDP 1 */ 209 [15] = { .submit = 16, .complete = 141}, 210 [16] = { .submit = 17, .complete = 142}, 211 [17] = { .submit = 18, .complete = 143}, 212 [18] = { .submit = 19, .complete = 144}, 213 [19] = { .submit = 20, .complete = 145}, 214 [20] = { .submit = 21, .complete = 146}, 215 [21] = { .submit = 22, .complete = 147}, 216 [22] = { .submit = 23, .complete = 148}, 217 [23] = { .submit = 24, .complete = 149}, 218 [24] = { .submit = 25, .complete = 150}, 219 [25] = { .submit = 26, .complete = 151}, 220 [26] = { .submit = 27, .complete = 152}, 221 [27] = { .submit = 28, .complete = 153}, 222 [28] = { .submit = 29, .complete = 154}, 223 [29] = { .submit = 30, .complete = 155}, 224 }; 225 226 static const struct chan_queues da8xx_usb_queues_tx[] = { 227 [0] = { .submit = 16, .complete = 24}, 228 [1] = { .submit = 18, .complete = 24}, 229 [2] = { .submit = 20, .complete = 24}, 230 [3] = { .submit = 22, .complete = 24}, 231 }; 232 233 static const struct chan_queues da8xx_usb_queues_rx[] = { 234 [0] = { .submit = 1, .complete = 26}, 235 [1] = { .submit = 3, .complete = 26}, 236 [2] = { .submit = 5, .complete = 26}, 237 [3] = { .submit = 7, .complete = 26}, 238 }; 239 240 struct cppi_glue_infos { 241 const struct chan_queues *queues_rx; 242 const struct chan_queues *queues_tx; 243 struct chan_queues td_queue; 244 u16 first_completion_queue; 245 u16 qmgr_num_pend; 246 }; 247 248 static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 249 { 250 return container_of(c, struct cppi41_channel, chan); 251 } 252 253 static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 254 { 255 struct cppi41_channel *c; 256 u32 descs_size; 257 u32 desc_num; 258 259 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 260 261 if (!((desc >= cdd->descs_phys) && 262 (desc < (cdd->descs_phys + descs_size)))) { 263 return NULL; 264 } 265 266 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 267 BUG_ON(desc_num >= ALLOC_DECS_NUM); 268 c = cdd->chan_busy[desc_num]; 269 cdd->chan_busy[desc_num] = NULL; 270 271 /* Usecount for chan_busy[], paired with push_desc_queue() */ 272 pm_runtime_put(cdd->ddev.dev); 273 274 return c; 275 } 276 277 static void cppi_writel(u32 val, void *__iomem *mem) 278 { 279 __raw_writel(val, mem); 280 } 281 282 static u32 cppi_readl(void *__iomem *mem) 283 { 284 return __raw_readl(mem); 285 } 286 287 static u32 pd_trans_len(u32 val) 288 { 289 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 290 } 291 292 static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 293 { 294 u32 desc; 295 296 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 297 desc &= ~0x1f; 298 return desc; 299 } 300 301 static irqreturn_t cppi41_irq(int irq, void *data) 302 { 303 struct cppi41_dd *cdd = data; 304 u16 first_completion_queue = cdd->first_completion_queue; 305 u16 qmgr_num_pend = cdd->qmgr_num_pend; 306 struct cppi41_channel *c; 307 int i; 308 309 for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend; 310 i++) { 311 u32 val; 312 u32 q_num; 313 314 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 315 if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) { 316 u32 mask; 317 /* set corresponding bit for completetion Q 93 */ 318 mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue); 319 /* not set all bits for queues less than Q 93 */ 320 mask--; 321 /* now invert and keep only Q 93+ set */ 322 val &= ~mask; 323 } 324 325 if (val) 326 __iormb(); 327 328 while (val) { 329 u32 desc, len; 330 331 /* 332 * This should never trigger, see the comments in 333 * push_desc_queue() 334 */ 335 WARN_ON(cdd->is_suspended); 336 337 q_num = __fls(val); 338 val &= ~(1 << q_num); 339 q_num += 32 * i; 340 desc = cppi41_pop_desc(cdd, q_num); 341 c = desc_to_chan(cdd, desc); 342 if (WARN_ON(!c)) { 343 pr_err("%s() q %d desc %08x\n", __func__, 344 q_num, desc); 345 continue; 346 } 347 348 if (c->desc->pd2 & PD2_ZERO_LENGTH) 349 len = 0; 350 else 351 len = pd_trans_len(c->desc->pd0); 352 353 c->residue = pd_trans_len(c->desc->pd6) - len; 354 dma_cookie_complete(&c->txd); 355 dmaengine_desc_get_callback_invoke(&c->txd, NULL); 356 } 357 } 358 return IRQ_HANDLED; 359 } 360 361 static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 362 { 363 dma_cookie_t cookie; 364 365 cookie = dma_cookie_assign(tx); 366 367 return cookie; 368 } 369 370 static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 371 { 372 struct cppi41_channel *c = to_cpp41_chan(chan); 373 struct cppi41_dd *cdd = c->cdd; 374 int error; 375 376 error = pm_runtime_get_sync(cdd->ddev.dev); 377 if (error < 0) { 378 dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", 379 __func__, error); 380 pm_runtime_put_noidle(cdd->ddev.dev); 381 382 return error; 383 } 384 385 dma_cookie_init(chan); 386 dma_async_tx_descriptor_init(&c->txd, chan); 387 c->txd.tx_submit = cppi41_tx_submit; 388 389 if (!c->is_tx) 390 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 391 392 pm_runtime_mark_last_busy(cdd->ddev.dev); 393 pm_runtime_put_autosuspend(cdd->ddev.dev); 394 395 return 0; 396 } 397 398 static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 399 { 400 struct cppi41_channel *c = to_cpp41_chan(chan); 401 struct cppi41_dd *cdd = c->cdd; 402 int error; 403 404 error = pm_runtime_get_sync(cdd->ddev.dev); 405 if (error < 0) { 406 pm_runtime_put_noidle(cdd->ddev.dev); 407 408 return; 409 } 410 411 WARN_ON(!list_empty(&cdd->pending)); 412 413 pm_runtime_mark_last_busy(cdd->ddev.dev); 414 pm_runtime_put_autosuspend(cdd->ddev.dev); 415 } 416 417 static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 418 dma_cookie_t cookie, struct dma_tx_state *txstate) 419 { 420 struct cppi41_channel *c = to_cpp41_chan(chan); 421 enum dma_status ret; 422 423 ret = dma_cookie_status(chan, cookie, txstate); 424 425 dma_set_residue(txstate, c->residue); 426 427 return ret; 428 } 429 430 static void push_desc_queue(struct cppi41_channel *c) 431 { 432 struct cppi41_dd *cdd = c->cdd; 433 u32 desc_num; 434 u32 desc_phys; 435 u32 reg; 436 437 c->residue = 0; 438 439 reg = GCR_CHAN_ENABLE; 440 if (!c->is_tx) { 441 reg |= GCR_STARV_RETRY; 442 reg |= GCR_DESC_TYPE_HOST; 443 reg |= c->q_comp_num; 444 } 445 446 cppi_writel(reg, c->gcr_reg); 447 448 /* 449 * We don't use writel() but __raw_writel() so we have to make sure 450 * that the DMA descriptor in coherent memory made to the main memory 451 * before starting the dma engine. 452 */ 453 __iowmb(); 454 455 /* 456 * DMA transfers can take at least 200ms to complete with USB mass 457 * storage connected. To prevent autosuspend timeouts, we must use 458 * pm_runtime_get/put() when chan_busy[] is modified. This will get 459 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the 460 * outcome of the transfer. 461 */ 462 pm_runtime_get(cdd->ddev.dev); 463 464 desc_phys = lower_32_bits(c->desc_phys); 465 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 466 WARN_ON(cdd->chan_busy[desc_num]); 467 cdd->chan_busy[desc_num] = c; 468 469 reg = (sizeof(struct cppi41_desc) - 24) / 4; 470 reg |= desc_phys; 471 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 472 } 473 474 /* 475 * Caller must hold cdd->lock to prevent push_desc_queue() 476 * getting called out of order. We have both cppi41_dma_issue_pending() 477 * and cppi41_runtime_resume() call this function. 478 */ 479 static void cppi41_run_queue(struct cppi41_dd *cdd) 480 { 481 struct cppi41_channel *c, *_c; 482 483 list_for_each_entry_safe(c, _c, &cdd->pending, node) { 484 push_desc_queue(c); 485 list_del(&c->node); 486 } 487 } 488 489 static void cppi41_dma_issue_pending(struct dma_chan *chan) 490 { 491 struct cppi41_channel *c = to_cpp41_chan(chan); 492 struct cppi41_dd *cdd = c->cdd; 493 unsigned long flags; 494 int error; 495 496 error = pm_runtime_get(cdd->ddev.dev); 497 if ((error != -EINPROGRESS) && error < 0) { 498 pm_runtime_put_noidle(cdd->ddev.dev); 499 dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", 500 error); 501 502 return; 503 } 504 505 spin_lock_irqsave(&cdd->lock, flags); 506 list_add_tail(&c->node, &cdd->pending); 507 if (!cdd->is_suspended) 508 cppi41_run_queue(cdd); 509 spin_unlock_irqrestore(&cdd->lock, flags); 510 511 pm_runtime_mark_last_busy(cdd->ddev.dev); 512 pm_runtime_put_autosuspend(cdd->ddev.dev); 513 } 514 515 static u32 get_host_pd0(u32 length) 516 { 517 u32 reg; 518 519 reg = DESC_TYPE_HOST << DESC_TYPE; 520 reg |= length; 521 522 return reg; 523 } 524 525 static u32 get_host_pd1(struct cppi41_channel *c) 526 { 527 u32 reg; 528 529 reg = 0; 530 531 return reg; 532 } 533 534 static u32 get_host_pd2(struct cppi41_channel *c) 535 { 536 u32 reg; 537 538 reg = DESC_TYPE_USB; 539 reg |= c->q_comp_num; 540 541 return reg; 542 } 543 544 static u32 get_host_pd3(u32 length) 545 { 546 u32 reg; 547 548 /* PD3 = packet size */ 549 reg = length; 550 551 return reg; 552 } 553 554 static u32 get_host_pd6(u32 length) 555 { 556 u32 reg; 557 558 /* PD6 buffer size */ 559 reg = DESC_PD_COMPLETE; 560 reg |= length; 561 562 return reg; 563 } 564 565 static u32 get_host_pd4_or_7(u32 addr) 566 { 567 u32 reg; 568 569 reg = addr; 570 571 return reg; 572 } 573 574 static u32 get_host_pd5(void) 575 { 576 u32 reg; 577 578 reg = 0; 579 580 return reg; 581 } 582 583 static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 584 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 585 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 586 { 587 struct cppi41_channel *c = to_cpp41_chan(chan); 588 struct cppi41_desc *d; 589 struct scatterlist *sg; 590 unsigned int i; 591 592 d = c->desc; 593 for_each_sg(sgl, sg, sg_len, i) { 594 u32 addr; 595 u32 len; 596 597 /* We need to use more than one desc once musb supports sg */ 598 addr = lower_32_bits(sg_dma_address(sg)); 599 len = sg_dma_len(sg); 600 601 d->pd0 = get_host_pd0(len); 602 d->pd1 = get_host_pd1(c); 603 d->pd2 = get_host_pd2(c); 604 d->pd3 = get_host_pd3(len); 605 d->pd4 = get_host_pd4_or_7(addr); 606 d->pd5 = get_host_pd5(); 607 d->pd6 = get_host_pd6(len); 608 d->pd7 = get_host_pd4_or_7(addr); 609 610 d++; 611 } 612 613 return &c->txd; 614 } 615 616 static void cppi41_compute_td_desc(struct cppi41_desc *d) 617 { 618 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 619 } 620 621 static int cppi41_tear_down_chan(struct cppi41_channel *c) 622 { 623 struct dmaengine_result abort_result; 624 struct cppi41_dd *cdd = c->cdd; 625 struct cppi41_desc *td; 626 u32 reg; 627 u32 desc_phys; 628 u32 td_desc_phys; 629 630 td = cdd->cd; 631 td += cdd->first_td_desc; 632 633 td_desc_phys = cdd->descs_phys; 634 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 635 636 if (!c->td_queued) { 637 cppi41_compute_td_desc(td); 638 __iowmb(); 639 640 reg = (sizeof(struct cppi41_desc) - 24) / 4; 641 reg |= td_desc_phys; 642 cppi_writel(reg, cdd->qmgr_mem + 643 QMGR_QUEUE_D(cdd->td_queue.submit)); 644 645 reg = GCR_CHAN_ENABLE; 646 if (!c->is_tx) { 647 reg |= GCR_STARV_RETRY; 648 reg |= GCR_DESC_TYPE_HOST; 649 reg |= cdd->td_queue.complete; 650 } 651 reg |= GCR_TEARDOWN; 652 cppi_writel(reg, c->gcr_reg); 653 c->td_queued = 1; 654 c->td_retry = 500; 655 } 656 657 if (!c->td_seen || !c->td_desc_seen) { 658 659 desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); 660 if (!desc_phys && c->is_tx) 661 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 662 663 if (desc_phys == c->desc_phys) { 664 c->td_desc_seen = 1; 665 666 } else if (desc_phys == td_desc_phys) { 667 u32 pd0; 668 669 __iormb(); 670 pd0 = td->pd0; 671 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 672 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 673 WARN_ON((pd0 & 0x1f) != c->port_num); 674 c->td_seen = 1; 675 } else if (desc_phys) { 676 WARN_ON_ONCE(1); 677 } 678 } 679 c->td_retry--; 680 /* 681 * If the TX descriptor / channel is in use, the caller needs to poke 682 * his TD bit multiple times. After that he hardware releases the 683 * transfer descriptor followed by TD descriptor. Waiting seems not to 684 * cause any difference. 685 * RX seems to be thrown out right away. However once the TearDown 686 * descriptor gets through we are done. If we have seens the transfer 687 * descriptor before the TD we fetch it from enqueue, it has to be 688 * there waiting for us. 689 */ 690 if (!c->td_seen && c->td_retry) { 691 udelay(1); 692 return -EAGAIN; 693 } 694 WARN_ON(!c->td_retry); 695 696 if (!c->td_desc_seen) { 697 desc_phys = cppi41_pop_desc(cdd, c->q_num); 698 if (!desc_phys) 699 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 700 WARN_ON(!desc_phys); 701 } 702 703 c->td_queued = 0; 704 c->td_seen = 0; 705 c->td_desc_seen = 0; 706 cppi_writel(0, c->gcr_reg); 707 708 /* Invoke the callback to do the necessary clean-up */ 709 abort_result.result = DMA_TRANS_ABORTED; 710 dma_cookie_complete(&c->txd); 711 dmaengine_desc_get_callback_invoke(&c->txd, &abort_result); 712 713 return 0; 714 } 715 716 static int cppi41_stop_chan(struct dma_chan *chan) 717 { 718 struct cppi41_channel *c = to_cpp41_chan(chan); 719 struct cppi41_dd *cdd = c->cdd; 720 u32 desc_num; 721 u32 desc_phys; 722 int ret; 723 724 desc_phys = lower_32_bits(c->desc_phys); 725 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 726 if (!cdd->chan_busy[desc_num]) { 727 struct cppi41_channel *cc, *_ct; 728 729 /* 730 * channels might still be in the pendling list if 731 * cppi41_dma_issue_pending() is called after 732 * cppi41_runtime_suspend() is called 733 */ 734 list_for_each_entry_safe(cc, _ct, &cdd->pending, node) { 735 if (cc != c) 736 continue; 737 list_del(&cc->node); 738 break; 739 } 740 return 0; 741 } 742 743 ret = cppi41_tear_down_chan(c); 744 if (ret) 745 return ret; 746 747 WARN_ON(!cdd->chan_busy[desc_num]); 748 cdd->chan_busy[desc_num] = NULL; 749 750 /* Usecount for chan_busy[], paired with push_desc_queue() */ 751 pm_runtime_put(cdd->ddev.dev); 752 753 return 0; 754 } 755 756 static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) 757 { 758 struct cppi41_channel *cchan, *chans; 759 int i; 760 u32 n_chans = cdd->n_chans; 761 762 /* 763 * The channels can only be used as TX or as RX. So we add twice 764 * that much dma channels because USB can only do RX or TX. 765 */ 766 n_chans *= 2; 767 768 chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL); 769 if (!chans) 770 return -ENOMEM; 771 772 for (i = 0; i < n_chans; i++) { 773 cchan = &chans[i]; 774 775 cchan->cdd = cdd; 776 if (i & 1) { 777 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 778 cchan->is_tx = 1; 779 } else { 780 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 781 cchan->is_tx = 0; 782 } 783 cchan->port_num = i >> 1; 784 cchan->desc = &cdd->cd[i]; 785 cchan->desc_phys = cdd->descs_phys; 786 cchan->desc_phys += i * sizeof(struct cppi41_desc); 787 cchan->chan.device = &cdd->ddev; 788 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 789 } 790 cdd->first_td_desc = n_chans; 791 792 return 0; 793 } 794 795 static void purge_descs(struct device *dev, struct cppi41_dd *cdd) 796 { 797 unsigned int mem_decs; 798 int i; 799 800 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 801 802 for (i = 0; i < DESCS_AREAS; i++) { 803 804 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 805 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 806 807 dma_free_coherent(dev, mem_decs, cdd->cd, 808 cdd->descs_phys); 809 } 810 } 811 812 static void disable_sched(struct cppi41_dd *cdd) 813 { 814 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 815 } 816 817 static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) 818 { 819 disable_sched(cdd); 820 821 purge_descs(dev, cdd); 822 823 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 824 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 825 dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 826 cdd->scratch_phys); 827 } 828 829 static int init_descs(struct device *dev, struct cppi41_dd *cdd) 830 { 831 unsigned int desc_size; 832 unsigned int mem_decs; 833 int i; 834 u32 reg; 835 u32 idx; 836 837 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 838 (sizeof(struct cppi41_desc) - 1)); 839 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 840 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 841 842 desc_size = sizeof(struct cppi41_desc); 843 mem_decs = ALLOC_DECS_NUM * desc_size; 844 845 idx = 0; 846 for (i = 0; i < DESCS_AREAS; i++) { 847 848 reg = idx << QMGR_MEMCTRL_IDX_SH; 849 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 850 reg |= ilog2(ALLOC_DECS_NUM) - 5; 851 852 BUILD_BUG_ON(DESCS_AREAS != 1); 853 cdd->cd = dma_alloc_coherent(dev, mem_decs, 854 &cdd->descs_phys, GFP_KERNEL); 855 if (!cdd->cd) 856 return -ENOMEM; 857 858 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 859 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 860 861 idx += ALLOC_DECS_NUM; 862 } 863 return 0; 864 } 865 866 static void init_sched(struct cppi41_dd *cdd) 867 { 868 unsigned ch; 869 unsigned word; 870 u32 reg; 871 872 word = 0; 873 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 874 for (ch = 0; ch < cdd->n_chans; ch += 2) { 875 876 reg = SCHED_ENTRY0_CHAN(ch); 877 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 878 879 reg |= SCHED_ENTRY2_CHAN(ch + 1); 880 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 881 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 882 word++; 883 } 884 reg = cdd->n_chans * 2 - 1; 885 reg |= DMA_SCHED_CTRL_EN; 886 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 887 } 888 889 static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) 890 { 891 int ret; 892 893 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 894 cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, 895 &cdd->scratch_phys, GFP_KERNEL); 896 if (!cdd->qmgr_scratch) 897 return -ENOMEM; 898 899 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 900 cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE); 901 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 902 903 ret = init_descs(dev, cdd); 904 if (ret) 905 goto err_td; 906 907 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 908 init_sched(cdd); 909 910 return 0; 911 err_td: 912 deinit_cppi41(dev, cdd); 913 return ret; 914 } 915 916 static struct platform_driver cpp41_dma_driver; 917 /* 918 * The param format is: 919 * X Y 920 * X: Port 921 * Y: 0 = RX else TX 922 */ 923 #define INFO_PORT 0 924 #define INFO_IS_TX 1 925 926 static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 927 { 928 struct cppi41_channel *cchan; 929 struct cppi41_dd *cdd; 930 const struct chan_queues *queues; 931 u32 *num = param; 932 933 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 934 return false; 935 936 cchan = to_cpp41_chan(chan); 937 938 if (cchan->port_num != num[INFO_PORT]) 939 return false; 940 941 if (cchan->is_tx && !num[INFO_IS_TX]) 942 return false; 943 cdd = cchan->cdd; 944 if (cchan->is_tx) 945 queues = cdd->queues_tx; 946 else 947 queues = cdd->queues_rx; 948 949 BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) != 950 ARRAY_SIZE(am335x_usb_queues_tx)); 951 if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx))) 952 return false; 953 954 cchan->q_num = queues[cchan->port_num].submit; 955 cchan->q_comp_num = queues[cchan->port_num].complete; 956 return true; 957 } 958 959 static struct of_dma_filter_info cpp41_dma_info = { 960 .filter_fn = cpp41_dma_filter_fn, 961 }; 962 963 static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 964 struct of_dma *ofdma) 965 { 966 int count = dma_spec->args_count; 967 struct of_dma_filter_info *info = ofdma->of_dma_data; 968 969 if (!info || !info->filter_fn) 970 return NULL; 971 972 if (count != 2) 973 return NULL; 974 975 return dma_request_channel(info->dma_cap, info->filter_fn, 976 &dma_spec->args[0]); 977 } 978 979 static const struct cppi_glue_infos am335x_usb_infos = { 980 .queues_rx = am335x_usb_queues_rx, 981 .queues_tx = am335x_usb_queues_tx, 982 .td_queue = { .submit = 31, .complete = 0 }, 983 .first_completion_queue = 93, 984 .qmgr_num_pend = 5, 985 }; 986 987 static const struct cppi_glue_infos da8xx_usb_infos = { 988 .queues_rx = da8xx_usb_queues_rx, 989 .queues_tx = da8xx_usb_queues_tx, 990 .td_queue = { .submit = 31, .complete = 0 }, 991 .first_completion_queue = 24, 992 .qmgr_num_pend = 2, 993 }; 994 995 static const struct of_device_id cppi41_dma_ids[] = { 996 { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos}, 997 { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos}, 998 {}, 999 }; 1000 MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 1001 1002 static const struct cppi_glue_infos *get_glue_info(struct device *dev) 1003 { 1004 const struct of_device_id *of_id; 1005 1006 of_id = of_match_node(cppi41_dma_ids, dev->of_node); 1007 if (!of_id) 1008 return NULL; 1009 return of_id->data; 1010 } 1011 1012 #define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ 1013 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ 1014 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ 1015 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) 1016 1017 static int cppi41_dma_probe(struct platform_device *pdev) 1018 { 1019 struct cppi41_dd *cdd; 1020 struct device *dev = &pdev->dev; 1021 const struct cppi_glue_infos *glue_info; 1022 struct resource *mem; 1023 int index; 1024 int irq; 1025 int ret; 1026 1027 glue_info = get_glue_info(dev); 1028 if (!glue_info) 1029 return -EINVAL; 1030 1031 cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL); 1032 if (!cdd) 1033 return -ENOMEM; 1034 1035 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 1036 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 1037 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 1038 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 1039 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 1040 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 1041 cdd->ddev.device_terminate_all = cppi41_stop_chan; 1042 cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); 1043 cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; 1044 cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; 1045 cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; 1046 cdd->ddev.dev = dev; 1047 INIT_LIST_HEAD(&cdd->ddev.channels); 1048 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 1049 1050 index = of_property_match_string(dev->of_node, 1051 "reg-names", "controller"); 1052 if (index < 0) 1053 return index; 1054 1055 mem = platform_get_resource(pdev, IORESOURCE_MEM, index); 1056 cdd->ctrl_mem = devm_ioremap_resource(dev, mem); 1057 if (IS_ERR(cdd->ctrl_mem)) 1058 return PTR_ERR(cdd->ctrl_mem); 1059 1060 mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1); 1061 cdd->sched_mem = devm_ioremap_resource(dev, mem); 1062 if (IS_ERR(cdd->sched_mem)) 1063 return PTR_ERR(cdd->sched_mem); 1064 1065 mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2); 1066 cdd->qmgr_mem = devm_ioremap_resource(dev, mem); 1067 if (IS_ERR(cdd->qmgr_mem)) 1068 return PTR_ERR(cdd->qmgr_mem); 1069 1070 spin_lock_init(&cdd->lock); 1071 INIT_LIST_HEAD(&cdd->pending); 1072 1073 platform_set_drvdata(pdev, cdd); 1074 1075 pm_runtime_enable(dev); 1076 pm_runtime_set_autosuspend_delay(dev, 100); 1077 pm_runtime_use_autosuspend(dev); 1078 ret = pm_runtime_get_sync(dev); 1079 if (ret < 0) 1080 goto err_get_sync; 1081 1082 cdd->queues_rx = glue_info->queues_rx; 1083 cdd->queues_tx = glue_info->queues_tx; 1084 cdd->td_queue = glue_info->td_queue; 1085 cdd->qmgr_num_pend = glue_info->qmgr_num_pend; 1086 cdd->first_completion_queue = glue_info->first_completion_queue; 1087 1088 ret = of_property_read_u32(dev->of_node, 1089 "#dma-channels", &cdd->n_chans); 1090 if (ret) 1091 goto err_get_n_chans; 1092 1093 ret = init_cppi41(dev, cdd); 1094 if (ret) 1095 goto err_init_cppi; 1096 1097 ret = cppi41_add_chans(dev, cdd); 1098 if (ret) 1099 goto err_chans; 1100 1101 irq = irq_of_parse_and_map(dev->of_node, 0); 1102 if (!irq) { 1103 ret = -EINVAL; 1104 goto err_chans; 1105 } 1106 1107 ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED, 1108 dev_name(dev), cdd); 1109 if (ret) 1110 goto err_chans; 1111 cdd->irq = irq; 1112 1113 ret = dma_async_device_register(&cdd->ddev); 1114 if (ret) 1115 goto err_chans; 1116 1117 ret = of_dma_controller_register(dev->of_node, 1118 cppi41_dma_xlate, &cpp41_dma_info); 1119 if (ret) 1120 goto err_of; 1121 1122 pm_runtime_mark_last_busy(dev); 1123 pm_runtime_put_autosuspend(dev); 1124 1125 return 0; 1126 err_of: 1127 dma_async_device_unregister(&cdd->ddev); 1128 err_chans: 1129 deinit_cppi41(dev, cdd); 1130 err_init_cppi: 1131 pm_runtime_dont_use_autosuspend(dev); 1132 err_get_n_chans: 1133 err_get_sync: 1134 pm_runtime_put_sync(dev); 1135 pm_runtime_disable(dev); 1136 return ret; 1137 } 1138 1139 static int cppi41_dma_remove(struct platform_device *pdev) 1140 { 1141 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1142 int error; 1143 1144 error = pm_runtime_get_sync(&pdev->dev); 1145 if (error < 0) 1146 dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", 1147 __func__, error); 1148 of_dma_controller_free(pdev->dev.of_node); 1149 dma_async_device_unregister(&cdd->ddev); 1150 1151 devm_free_irq(&pdev->dev, cdd->irq, cdd); 1152 deinit_cppi41(&pdev->dev, cdd); 1153 pm_runtime_dont_use_autosuspend(&pdev->dev); 1154 pm_runtime_put_sync(&pdev->dev); 1155 pm_runtime_disable(&pdev->dev); 1156 return 0; 1157 } 1158 1159 static int __maybe_unused cppi41_suspend(struct device *dev) 1160 { 1161 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1162 1163 cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); 1164 disable_sched(cdd); 1165 1166 return 0; 1167 } 1168 1169 static int __maybe_unused cppi41_resume(struct device *dev) 1170 { 1171 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1172 struct cppi41_channel *c; 1173 int i; 1174 1175 for (i = 0; i < DESCS_AREAS; i++) 1176 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 1177 1178 list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) 1179 if (!c->is_tx) 1180 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 1181 1182 init_sched(cdd); 1183 1184 cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); 1185 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 1186 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 1187 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 1188 1189 return 0; 1190 } 1191 1192 static int __maybe_unused cppi41_runtime_suspend(struct device *dev) 1193 { 1194 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1195 unsigned long flags; 1196 1197 spin_lock_irqsave(&cdd->lock, flags); 1198 cdd->is_suspended = true; 1199 WARN_ON(!list_empty(&cdd->pending)); 1200 spin_unlock_irqrestore(&cdd->lock, flags); 1201 1202 return 0; 1203 } 1204 1205 static int __maybe_unused cppi41_runtime_resume(struct device *dev) 1206 { 1207 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1208 unsigned long flags; 1209 1210 spin_lock_irqsave(&cdd->lock, flags); 1211 cdd->is_suspended = false; 1212 cppi41_run_queue(cdd); 1213 spin_unlock_irqrestore(&cdd->lock, flags); 1214 1215 return 0; 1216 } 1217 1218 static const struct dev_pm_ops cppi41_pm_ops = { 1219 SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume) 1220 SET_RUNTIME_PM_OPS(cppi41_runtime_suspend, 1221 cppi41_runtime_resume, 1222 NULL) 1223 }; 1224 1225 static struct platform_driver cpp41_dma_driver = { 1226 .probe = cppi41_dma_probe, 1227 .remove = cppi41_dma_remove, 1228 .driver = { 1229 .name = "cppi41-dma-engine", 1230 .pm = &cppi41_pm_ops, 1231 .of_match_table = of_match_ptr(cppi41_dma_ids), 1232 }, 1233 }; 1234 1235 module_platform_driver(cpp41_dma_driver); 1236 MODULE_LICENSE("GPL"); 1237 MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>"); 1238