1 /* 2 * core routines for the asynchronous memory transfer/transform api 3 * 4 * Copyright © 2006, Intel Corporation. 5 * 6 * Dan Williams <dan.j.williams@intel.com> 7 * 8 * with architecture considerations by: 9 * Neil Brown <neilb@suse.de> 10 * Jeff Garzik <jeff@garzik.org> 11 * 12 * This program is free software; you can redistribute it and/or modify it 13 * under the terms and conditions of the GNU General Public License, 14 * version 2, as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 * more details. 20 * 21 * You should have received a copy of the GNU General Public License along with 22 * this program; if not, write to the Free Software Foundation, Inc., 23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 * 25 */ 26 #include <linux/rculist.h> 27 #include <linux/kernel.h> 28 #include <linux/async_tx.h> 29 30 #ifdef CONFIG_DMA_ENGINE 31 static enum dma_state_client 32 dma_channel_add_remove(struct dma_client *client, 33 struct dma_chan *chan, enum dma_state state); 34 35 static struct dma_client async_tx_dma = { 36 .event_callback = dma_channel_add_remove, 37 /* .cap_mask == 0 defaults to all channels */ 38 }; 39 40 /** 41 * dma_cap_mask_all - enable iteration over all operation types 42 */ 43 static dma_cap_mask_t dma_cap_mask_all; 44 45 /** 46 * chan_ref_percpu - tracks channel allocations per core/opertion 47 */ 48 struct chan_ref_percpu { 49 struct dma_chan_ref *ref; 50 }; 51 52 static int channel_table_initialized; 53 static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END]; 54 55 /** 56 * async_tx_lock - protect modification of async_tx_master_list and serialize 57 * rebalance operations 58 */ 59 static spinlock_t async_tx_lock; 60 61 static LIST_HEAD(async_tx_master_list); 62 63 /* async_tx_issue_pending_all - start all transactions on all channels */ 64 void async_tx_issue_pending_all(void) 65 { 66 struct dma_chan_ref *ref; 67 68 rcu_read_lock(); 69 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 70 ref->chan->device->device_issue_pending(ref->chan); 71 rcu_read_unlock(); 72 } 73 EXPORT_SYMBOL_GPL(async_tx_issue_pending_all); 74 75 /* dma_wait_for_async_tx - spin wait for a transcation to complete 76 * @tx: transaction to wait on 77 */ 78 enum dma_status 79 dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 80 { 81 enum dma_status status; 82 struct dma_async_tx_descriptor *iter; 83 struct dma_async_tx_descriptor *parent; 84 85 if (!tx) 86 return DMA_SUCCESS; 87 88 /* poll through the dependency chain, return when tx is complete */ 89 do { 90 iter = tx; 91 92 /* find the root of the unsubmitted dependency chain */ 93 do { 94 parent = iter->parent; 95 if (!parent) 96 break; 97 else 98 iter = parent; 99 } while (parent); 100 101 /* there is a small window for ->parent == NULL and 102 * ->cookie == -EBUSY 103 */ 104 while (iter->cookie == -EBUSY) 105 cpu_relax(); 106 107 status = dma_sync_wait(iter->chan, iter->cookie); 108 } while (status == DMA_IN_PROGRESS || (iter != tx)); 109 110 return status; 111 } 112 EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 113 114 /* async_tx_run_dependencies - helper routine for dma drivers to process 115 * (start) dependent operations on their target channel 116 * @tx: transaction with dependencies 117 */ 118 void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx) 119 { 120 struct dma_async_tx_descriptor *dep = tx->next; 121 struct dma_async_tx_descriptor *dep_next; 122 struct dma_chan *chan; 123 124 if (!dep) 125 return; 126 127 chan = dep->chan; 128 129 /* keep submitting up until a channel switch is detected 130 * in that case we will be called again as a result of 131 * processing the interrupt from async_tx_channel_switch 132 */ 133 for (; dep; dep = dep_next) { 134 spin_lock_bh(&dep->lock); 135 dep->parent = NULL; 136 dep_next = dep->next; 137 if (dep_next && dep_next->chan == chan) 138 dep->next = NULL; /* ->next will be submitted */ 139 else 140 dep_next = NULL; /* submit current dep and terminate */ 141 spin_unlock_bh(&dep->lock); 142 143 dep->tx_submit(dep); 144 } 145 146 chan->device->device_issue_pending(chan); 147 } 148 EXPORT_SYMBOL_GPL(async_tx_run_dependencies); 149 150 static void 151 free_dma_chan_ref(struct rcu_head *rcu) 152 { 153 struct dma_chan_ref *ref; 154 ref = container_of(rcu, struct dma_chan_ref, rcu); 155 kfree(ref); 156 } 157 158 static void 159 init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan) 160 { 161 INIT_LIST_HEAD(&ref->node); 162 INIT_RCU_HEAD(&ref->rcu); 163 ref->chan = chan; 164 atomic_set(&ref->count, 0); 165 } 166 167 /** 168 * get_chan_ref_by_cap - returns the nth channel of the given capability 169 * defaults to returning the channel with the desired capability and the 170 * lowest reference count if the index can not be satisfied 171 * @cap: capability to match 172 * @index: nth channel desired, passing -1 has the effect of forcing the 173 * default return value 174 */ 175 static struct dma_chan_ref * 176 get_chan_ref_by_cap(enum dma_transaction_type cap, int index) 177 { 178 struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref; 179 180 rcu_read_lock(); 181 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 182 if (dma_has_cap(cap, ref->chan->device->cap_mask)) { 183 if (!min_ref) 184 min_ref = ref; 185 else if (atomic_read(&ref->count) < 186 atomic_read(&min_ref->count)) 187 min_ref = ref; 188 189 if (index-- == 0) { 190 ret_ref = ref; 191 break; 192 } 193 } 194 rcu_read_unlock(); 195 196 if (!ret_ref) 197 ret_ref = min_ref; 198 199 if (ret_ref) 200 atomic_inc(&ret_ref->count); 201 202 return ret_ref; 203 } 204 205 /** 206 * async_tx_rebalance - redistribute the available channels, optimize 207 * for cpu isolation in the SMP case, and opertaion isolation in the 208 * uniprocessor case 209 */ 210 static void async_tx_rebalance(void) 211 { 212 int cpu, cap, cpu_idx = 0; 213 unsigned long flags; 214 215 if (!channel_table_initialized) 216 return; 217 218 spin_lock_irqsave(&async_tx_lock, flags); 219 220 /* undo the last distribution */ 221 for_each_dma_cap_mask(cap, dma_cap_mask_all) 222 for_each_possible_cpu(cpu) { 223 struct dma_chan_ref *ref = 224 per_cpu_ptr(channel_table[cap], cpu)->ref; 225 if (ref) { 226 atomic_set(&ref->count, 0); 227 per_cpu_ptr(channel_table[cap], cpu)->ref = 228 NULL; 229 } 230 } 231 232 for_each_dma_cap_mask(cap, dma_cap_mask_all) 233 for_each_online_cpu(cpu) { 234 struct dma_chan_ref *new; 235 if (NR_CPUS > 1) 236 new = get_chan_ref_by_cap(cap, cpu_idx++); 237 else 238 new = get_chan_ref_by_cap(cap, -1); 239 240 per_cpu_ptr(channel_table[cap], cpu)->ref = new; 241 } 242 243 spin_unlock_irqrestore(&async_tx_lock, flags); 244 } 245 246 static enum dma_state_client 247 dma_channel_add_remove(struct dma_client *client, 248 struct dma_chan *chan, enum dma_state state) 249 { 250 unsigned long found, flags; 251 struct dma_chan_ref *master_ref, *ref; 252 enum dma_state_client ack = DMA_DUP; /* default: take no action */ 253 254 switch (state) { 255 case DMA_RESOURCE_AVAILABLE: 256 found = 0; 257 rcu_read_lock(); 258 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 259 if (ref->chan == chan) { 260 found = 1; 261 break; 262 } 263 rcu_read_unlock(); 264 265 pr_debug("async_tx: dma resource available [%s]\n", 266 found ? "old" : "new"); 267 268 if (!found) 269 ack = DMA_ACK; 270 else 271 break; 272 273 /* add the channel to the generic management list */ 274 master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL); 275 if (master_ref) { 276 /* keep a reference until async_tx is unloaded */ 277 dma_chan_get(chan); 278 init_dma_chan_ref(master_ref, chan); 279 spin_lock_irqsave(&async_tx_lock, flags); 280 list_add_tail_rcu(&master_ref->node, 281 &async_tx_master_list); 282 spin_unlock_irqrestore(&async_tx_lock, 283 flags); 284 } else { 285 printk(KERN_WARNING "async_tx: unable to create" 286 " new master entry in response to" 287 " a DMA_RESOURCE_ADDED event" 288 " (-ENOMEM)\n"); 289 return 0; 290 } 291 292 async_tx_rebalance(); 293 break; 294 case DMA_RESOURCE_REMOVED: 295 found = 0; 296 spin_lock_irqsave(&async_tx_lock, flags); 297 list_for_each_entry(ref, &async_tx_master_list, node) 298 if (ref->chan == chan) { 299 /* permit backing devices to go away */ 300 dma_chan_put(ref->chan); 301 list_del_rcu(&ref->node); 302 call_rcu(&ref->rcu, free_dma_chan_ref); 303 found = 1; 304 break; 305 } 306 spin_unlock_irqrestore(&async_tx_lock, flags); 307 308 pr_debug("async_tx: dma resource removed [%s]\n", 309 found ? "ours" : "not ours"); 310 311 if (found) 312 ack = DMA_ACK; 313 else 314 break; 315 316 async_tx_rebalance(); 317 break; 318 case DMA_RESOURCE_SUSPEND: 319 case DMA_RESOURCE_RESUME: 320 printk(KERN_WARNING "async_tx: does not support dma channel" 321 " suspend/resume\n"); 322 break; 323 default: 324 BUG(); 325 } 326 327 return ack; 328 } 329 330 static int __init 331 async_tx_init(void) 332 { 333 enum dma_transaction_type cap; 334 335 spin_lock_init(&async_tx_lock); 336 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); 337 338 /* an interrupt will never be an explicit operation type. 339 * clearing this bit prevents allocation to a slot in 'channel_table' 340 */ 341 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); 342 343 for_each_dma_cap_mask(cap, dma_cap_mask_all) { 344 channel_table[cap] = alloc_percpu(struct chan_ref_percpu); 345 if (!channel_table[cap]) 346 goto err; 347 } 348 349 channel_table_initialized = 1; 350 dma_async_client_register(&async_tx_dma); 351 dma_async_client_chan_request(&async_tx_dma); 352 353 printk(KERN_INFO "async_tx: api initialized (async)\n"); 354 355 return 0; 356 err: 357 printk(KERN_ERR "async_tx: initialization failure\n"); 358 359 while (--cap >= 0) 360 free_percpu(channel_table[cap]); 361 362 return 1; 363 } 364 365 static void __exit async_tx_exit(void) 366 { 367 enum dma_transaction_type cap; 368 369 channel_table_initialized = 0; 370 371 for_each_dma_cap_mask(cap, dma_cap_mask_all) 372 if (channel_table[cap]) 373 free_percpu(channel_table[cap]); 374 375 dma_async_client_unregister(&async_tx_dma); 376 } 377 378 /** 379 * __async_tx_find_channel - find a channel to carry out the operation or let 380 * the transaction execute synchronously 381 * @depend_tx: transaction dependency 382 * @tx_type: transaction type 383 */ 384 struct dma_chan * 385 __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 386 enum dma_transaction_type tx_type) 387 { 388 /* see if we can keep the chain on one channel */ 389 if (depend_tx && 390 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 391 return depend_tx->chan; 392 else if (likely(channel_table_initialized)) { 393 struct dma_chan_ref *ref; 394 int cpu = get_cpu(); 395 ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref; 396 put_cpu(); 397 return ref ? ref->chan : NULL; 398 } else 399 return NULL; 400 } 401 EXPORT_SYMBOL_GPL(__async_tx_find_channel); 402 #else 403 static int __init async_tx_init(void) 404 { 405 printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); 406 return 0; 407 } 408 409 static void __exit async_tx_exit(void) 410 { 411 do { } while (0); 412 } 413 #endif 414 415 416 /** 417 * async_tx_channel_switch - queue an interrupt descriptor with a dependency 418 * pre-attached. 419 * @depend_tx: the operation that must finish before the new operation runs 420 * @tx: the new operation 421 */ 422 static void 423 async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, 424 struct dma_async_tx_descriptor *tx) 425 { 426 struct dma_chan *chan; 427 struct dma_device *device; 428 struct dma_async_tx_descriptor *intr_tx = (void *) ~0; 429 430 /* first check to see if we can still append to depend_tx */ 431 spin_lock_bh(&depend_tx->lock); 432 if (depend_tx->parent && depend_tx->chan == tx->chan) { 433 tx->parent = depend_tx; 434 depend_tx->next = tx; 435 intr_tx = NULL; 436 } 437 spin_unlock_bh(&depend_tx->lock); 438 439 if (!intr_tx) 440 return; 441 442 chan = depend_tx->chan; 443 device = chan->device; 444 445 /* see if we can schedule an interrupt 446 * otherwise poll for completion 447 */ 448 if (dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 449 intr_tx = device->device_prep_dma_interrupt(chan, 0); 450 else 451 intr_tx = NULL; 452 453 if (intr_tx) { 454 intr_tx->callback = NULL; 455 intr_tx->callback_param = NULL; 456 tx->parent = intr_tx; 457 /* safe to set ->next outside the lock since we know we are 458 * not submitted yet 459 */ 460 intr_tx->next = tx; 461 462 /* check if we need to append */ 463 spin_lock_bh(&depend_tx->lock); 464 if (depend_tx->parent) { 465 intr_tx->parent = depend_tx; 466 depend_tx->next = intr_tx; 467 async_tx_ack(intr_tx); 468 intr_tx = NULL; 469 } 470 spin_unlock_bh(&depend_tx->lock); 471 472 if (intr_tx) { 473 intr_tx->parent = NULL; 474 intr_tx->tx_submit(intr_tx); 475 async_tx_ack(intr_tx); 476 } 477 } else { 478 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 479 panic("%s: DMA_ERROR waiting for depend_tx\n", 480 __func__); 481 tx->tx_submit(tx); 482 } 483 } 484 485 486 /** 487 * submit_disposition - while holding depend_tx->lock we must avoid submitting 488 * new operations to prevent a circular locking dependency with 489 * drivers that already hold a channel lock when calling 490 * async_tx_run_dependencies. 491 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock 492 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch 493 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly 494 */ 495 enum submit_disposition { 496 ASYNC_TX_SUBMITTED, 497 ASYNC_TX_CHANNEL_SWITCH, 498 ASYNC_TX_DIRECT_SUBMIT, 499 }; 500 501 void 502 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 503 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 504 dma_async_tx_callback cb_fn, void *cb_param) 505 { 506 tx->callback = cb_fn; 507 tx->callback_param = cb_param; 508 509 if (depend_tx) { 510 enum submit_disposition s; 511 512 /* sanity check the dependency chain: 513 * 1/ if ack is already set then we cannot be sure 514 * we are referring to the correct operation 515 * 2/ dependencies are 1:1 i.e. two transactions can 516 * not depend on the same parent 517 */ 518 BUG_ON(async_tx_test_ack(depend_tx) || depend_tx->next || 519 tx->parent); 520 521 /* the lock prevents async_tx_run_dependencies from missing 522 * the setting of ->next when ->parent != NULL 523 */ 524 spin_lock_bh(&depend_tx->lock); 525 if (depend_tx->parent) { 526 /* we have a parent so we can not submit directly 527 * if we are staying on the same channel: append 528 * else: channel switch 529 */ 530 if (depend_tx->chan == chan) { 531 tx->parent = depend_tx; 532 depend_tx->next = tx; 533 s = ASYNC_TX_SUBMITTED; 534 } else 535 s = ASYNC_TX_CHANNEL_SWITCH; 536 } else { 537 /* we do not have a parent so we may be able to submit 538 * directly if we are staying on the same channel 539 */ 540 if (depend_tx->chan == chan) 541 s = ASYNC_TX_DIRECT_SUBMIT; 542 else 543 s = ASYNC_TX_CHANNEL_SWITCH; 544 } 545 spin_unlock_bh(&depend_tx->lock); 546 547 switch (s) { 548 case ASYNC_TX_SUBMITTED: 549 break; 550 case ASYNC_TX_CHANNEL_SWITCH: 551 async_tx_channel_switch(depend_tx, tx); 552 break; 553 case ASYNC_TX_DIRECT_SUBMIT: 554 tx->parent = NULL; 555 tx->tx_submit(tx); 556 break; 557 } 558 } else { 559 tx->parent = NULL; 560 tx->tx_submit(tx); 561 } 562 563 if (flags & ASYNC_TX_ACK) 564 async_tx_ack(tx); 565 566 if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 567 async_tx_ack(depend_tx); 568 } 569 EXPORT_SYMBOL_GPL(async_tx_submit); 570 571 /** 572 * async_trigger_callback - schedules the callback function to be run after 573 * any dependent operations have been completed. 574 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 575 * @depend_tx: 'callback' requires the completion of this transaction 576 * @cb_fn: function to call after depend_tx completes 577 * @cb_param: parameter to pass to the callback routine 578 */ 579 struct dma_async_tx_descriptor * 580 async_trigger_callback(enum async_tx_flags flags, 581 struct dma_async_tx_descriptor *depend_tx, 582 dma_async_tx_callback cb_fn, void *cb_param) 583 { 584 struct dma_chan *chan; 585 struct dma_device *device; 586 struct dma_async_tx_descriptor *tx; 587 588 if (depend_tx) { 589 chan = depend_tx->chan; 590 device = chan->device; 591 592 /* see if we can schedule an interrupt 593 * otherwise poll for completion 594 */ 595 if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 596 device = NULL; 597 598 tx = device ? device->device_prep_dma_interrupt(chan, 0) : NULL; 599 } else 600 tx = NULL; 601 602 if (tx) { 603 pr_debug("%s: (async)\n", __func__); 604 605 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 606 } else { 607 pr_debug("%s: (sync)\n", __func__); 608 609 /* wait for any prerequisite operations */ 610 async_tx_quiesce(&depend_tx); 611 612 async_tx_sync_epilog(cb_fn, cb_param); 613 } 614 615 return tx; 616 } 617 EXPORT_SYMBOL_GPL(async_trigger_callback); 618 619 /** 620 * async_tx_quiesce - ensure tx is complete and freeable upon return 621 * @tx - transaction to quiesce 622 */ 623 void async_tx_quiesce(struct dma_async_tx_descriptor **tx) 624 { 625 if (*tx) { 626 /* if ack is already set then we cannot be sure 627 * we are referring to the correct operation 628 */ 629 BUG_ON(async_tx_test_ack(*tx)); 630 if (dma_wait_for_async_tx(*tx) == DMA_ERROR) 631 panic("DMA_ERROR waiting for transaction\n"); 632 async_tx_ack(*tx); 633 *tx = NULL; 634 } 635 } 636 EXPORT_SYMBOL_GPL(async_tx_quiesce); 637 638 module_init(async_tx_init); 639 module_exit(async_tx_exit); 640 641 MODULE_AUTHOR("Intel Corporation"); 642 MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); 643 MODULE_LICENSE("GPL"); 644