1 /* 2 * core routines for the asynchronous memory transfer/transform api 3 * 4 * Copyright © 2006, Intel Corporation. 5 * 6 * Dan Williams <dan.j.williams@intel.com> 7 * 8 * with architecture considerations by: 9 * Neil Brown <neilb@suse.de> 10 * Jeff Garzik <jeff@garzik.org> 11 * 12 * This program is free software; you can redistribute it and/or modify it 13 * under the terms and conditions of the GNU General Public License, 14 * version 2, as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 * more details. 20 * 21 * You should have received a copy of the GNU General Public License along with 22 * this program; if not, write to the Free Software Foundation, Inc., 23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 * 25 */ 26 #include <linux/rculist.h> 27 #include <linux/kernel.h> 28 #include <linux/async_tx.h> 29 30 #ifdef CONFIG_DMA_ENGINE 31 static enum dma_state_client 32 dma_channel_add_remove(struct dma_client *client, 33 struct dma_chan *chan, enum dma_state state); 34 35 static struct dma_client async_tx_dma = { 36 .event_callback = dma_channel_add_remove, 37 /* .cap_mask == 0 defaults to all channels */ 38 }; 39 40 /** 41 * dma_cap_mask_all - enable iteration over all operation types 42 */ 43 static dma_cap_mask_t dma_cap_mask_all; 44 45 /** 46 * chan_ref_percpu - tracks channel allocations per core/opertion 47 */ 48 struct chan_ref_percpu { 49 struct dma_chan_ref *ref; 50 }; 51 52 static int channel_table_initialized; 53 static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END]; 54 55 /** 56 * async_tx_lock - protect modification of async_tx_master_list and serialize 57 * rebalance operations 58 */ 59 static spinlock_t async_tx_lock; 60 61 static LIST_HEAD(async_tx_master_list); 62 63 /* async_tx_issue_pending_all - start all transactions on all channels */ 64 void async_tx_issue_pending_all(void) 65 { 66 struct dma_chan_ref *ref; 67 68 rcu_read_lock(); 69 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 70 ref->chan->device->device_issue_pending(ref->chan); 71 rcu_read_unlock(); 72 } 73 EXPORT_SYMBOL_GPL(async_tx_issue_pending_all); 74 75 /* dma_wait_for_async_tx - spin wait for a transcation to complete 76 * @tx: transaction to wait on 77 */ 78 enum dma_status 79 dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 80 { 81 enum dma_status status; 82 struct dma_async_tx_descriptor *iter; 83 struct dma_async_tx_descriptor *parent; 84 85 if (!tx) 86 return DMA_SUCCESS; 87 88 /* poll through the dependency chain, return when tx is complete */ 89 do { 90 iter = tx; 91 92 /* find the root of the unsubmitted dependency chain */ 93 do { 94 parent = iter->parent; 95 if (!parent) 96 break; 97 else 98 iter = parent; 99 } while (parent); 100 101 /* there is a small window for ->parent == NULL and 102 * ->cookie == -EBUSY 103 */ 104 while (iter->cookie == -EBUSY) 105 cpu_relax(); 106 107 status = dma_sync_wait(iter->chan, iter->cookie); 108 } while (status == DMA_IN_PROGRESS || (iter != tx)); 109 110 return status; 111 } 112 EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 113 114 /* async_tx_run_dependencies - helper routine for dma drivers to process 115 * (start) dependent operations on their target channel 116 * @tx: transaction with dependencies 117 */ 118 void 119 async_tx_run_dependencies(struct dma_async_tx_descriptor *tx) 120 { 121 struct dma_async_tx_descriptor *next = tx->next; 122 struct dma_chan *chan; 123 124 if (!next) 125 return; 126 127 tx->next = NULL; 128 chan = next->chan; 129 130 /* keep submitting up until a channel switch is detected 131 * in that case we will be called again as a result of 132 * processing the interrupt from async_tx_channel_switch 133 */ 134 while (next && next->chan == chan) { 135 struct dma_async_tx_descriptor *_next; 136 137 spin_lock_bh(&next->lock); 138 next->parent = NULL; 139 _next = next->next; 140 if (_next && _next->chan == chan) 141 next->next = NULL; 142 spin_unlock_bh(&next->lock); 143 144 next->tx_submit(next); 145 next = _next; 146 } 147 148 chan->device->device_issue_pending(chan); 149 } 150 EXPORT_SYMBOL_GPL(async_tx_run_dependencies); 151 152 static void 153 free_dma_chan_ref(struct rcu_head *rcu) 154 { 155 struct dma_chan_ref *ref; 156 ref = container_of(rcu, struct dma_chan_ref, rcu); 157 kfree(ref); 158 } 159 160 static void 161 init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan) 162 { 163 INIT_LIST_HEAD(&ref->node); 164 INIT_RCU_HEAD(&ref->rcu); 165 ref->chan = chan; 166 atomic_set(&ref->count, 0); 167 } 168 169 /** 170 * get_chan_ref_by_cap - returns the nth channel of the given capability 171 * defaults to returning the channel with the desired capability and the 172 * lowest reference count if the index can not be satisfied 173 * @cap: capability to match 174 * @index: nth channel desired, passing -1 has the effect of forcing the 175 * default return value 176 */ 177 static struct dma_chan_ref * 178 get_chan_ref_by_cap(enum dma_transaction_type cap, int index) 179 { 180 struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref; 181 182 rcu_read_lock(); 183 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 184 if (dma_has_cap(cap, ref->chan->device->cap_mask)) { 185 if (!min_ref) 186 min_ref = ref; 187 else if (atomic_read(&ref->count) < 188 atomic_read(&min_ref->count)) 189 min_ref = ref; 190 191 if (index-- == 0) { 192 ret_ref = ref; 193 break; 194 } 195 } 196 rcu_read_unlock(); 197 198 if (!ret_ref) 199 ret_ref = min_ref; 200 201 if (ret_ref) 202 atomic_inc(&ret_ref->count); 203 204 return ret_ref; 205 } 206 207 /** 208 * async_tx_rebalance - redistribute the available channels, optimize 209 * for cpu isolation in the SMP case, and opertaion isolation in the 210 * uniprocessor case 211 */ 212 static void async_tx_rebalance(void) 213 { 214 int cpu, cap, cpu_idx = 0; 215 unsigned long flags; 216 217 if (!channel_table_initialized) 218 return; 219 220 spin_lock_irqsave(&async_tx_lock, flags); 221 222 /* undo the last distribution */ 223 for_each_dma_cap_mask(cap, dma_cap_mask_all) 224 for_each_possible_cpu(cpu) { 225 struct dma_chan_ref *ref = 226 per_cpu_ptr(channel_table[cap], cpu)->ref; 227 if (ref) { 228 atomic_set(&ref->count, 0); 229 per_cpu_ptr(channel_table[cap], cpu)->ref = 230 NULL; 231 } 232 } 233 234 for_each_dma_cap_mask(cap, dma_cap_mask_all) 235 for_each_online_cpu(cpu) { 236 struct dma_chan_ref *new; 237 if (NR_CPUS > 1) 238 new = get_chan_ref_by_cap(cap, cpu_idx++); 239 else 240 new = get_chan_ref_by_cap(cap, -1); 241 242 per_cpu_ptr(channel_table[cap], cpu)->ref = new; 243 } 244 245 spin_unlock_irqrestore(&async_tx_lock, flags); 246 } 247 248 static enum dma_state_client 249 dma_channel_add_remove(struct dma_client *client, 250 struct dma_chan *chan, enum dma_state state) 251 { 252 unsigned long found, flags; 253 struct dma_chan_ref *master_ref, *ref; 254 enum dma_state_client ack = DMA_DUP; /* default: take no action */ 255 256 switch (state) { 257 case DMA_RESOURCE_AVAILABLE: 258 found = 0; 259 rcu_read_lock(); 260 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 261 if (ref->chan == chan) { 262 found = 1; 263 break; 264 } 265 rcu_read_unlock(); 266 267 pr_debug("async_tx: dma resource available [%s]\n", 268 found ? "old" : "new"); 269 270 if (!found) 271 ack = DMA_ACK; 272 else 273 break; 274 275 /* add the channel to the generic management list */ 276 master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL); 277 if (master_ref) { 278 /* keep a reference until async_tx is unloaded */ 279 dma_chan_get(chan); 280 init_dma_chan_ref(master_ref, chan); 281 spin_lock_irqsave(&async_tx_lock, flags); 282 list_add_tail_rcu(&master_ref->node, 283 &async_tx_master_list); 284 spin_unlock_irqrestore(&async_tx_lock, 285 flags); 286 } else { 287 printk(KERN_WARNING "async_tx: unable to create" 288 " new master entry in response to" 289 " a DMA_RESOURCE_ADDED event" 290 " (-ENOMEM)\n"); 291 return 0; 292 } 293 294 async_tx_rebalance(); 295 break; 296 case DMA_RESOURCE_REMOVED: 297 found = 0; 298 spin_lock_irqsave(&async_tx_lock, flags); 299 list_for_each_entry(ref, &async_tx_master_list, node) 300 if (ref->chan == chan) { 301 /* permit backing devices to go away */ 302 dma_chan_put(ref->chan); 303 list_del_rcu(&ref->node); 304 call_rcu(&ref->rcu, free_dma_chan_ref); 305 found = 1; 306 break; 307 } 308 spin_unlock_irqrestore(&async_tx_lock, flags); 309 310 pr_debug("async_tx: dma resource removed [%s]\n", 311 found ? "ours" : "not ours"); 312 313 if (found) 314 ack = DMA_ACK; 315 else 316 break; 317 318 async_tx_rebalance(); 319 break; 320 case DMA_RESOURCE_SUSPEND: 321 case DMA_RESOURCE_RESUME: 322 printk(KERN_WARNING "async_tx: does not support dma channel" 323 " suspend/resume\n"); 324 break; 325 default: 326 BUG(); 327 } 328 329 return ack; 330 } 331 332 static int __init 333 async_tx_init(void) 334 { 335 enum dma_transaction_type cap; 336 337 spin_lock_init(&async_tx_lock); 338 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); 339 340 /* an interrupt will never be an explicit operation type. 341 * clearing this bit prevents allocation to a slot in 'channel_table' 342 */ 343 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); 344 345 for_each_dma_cap_mask(cap, dma_cap_mask_all) { 346 channel_table[cap] = alloc_percpu(struct chan_ref_percpu); 347 if (!channel_table[cap]) 348 goto err; 349 } 350 351 channel_table_initialized = 1; 352 dma_async_client_register(&async_tx_dma); 353 dma_async_client_chan_request(&async_tx_dma); 354 355 printk(KERN_INFO "async_tx: api initialized (async)\n"); 356 357 return 0; 358 err: 359 printk(KERN_ERR "async_tx: initialization failure\n"); 360 361 while (--cap >= 0) 362 free_percpu(channel_table[cap]); 363 364 return 1; 365 } 366 367 static void __exit async_tx_exit(void) 368 { 369 enum dma_transaction_type cap; 370 371 channel_table_initialized = 0; 372 373 for_each_dma_cap_mask(cap, dma_cap_mask_all) 374 if (channel_table[cap]) 375 free_percpu(channel_table[cap]); 376 377 dma_async_client_unregister(&async_tx_dma); 378 } 379 380 /** 381 * __async_tx_find_channel - find a channel to carry out the operation or let 382 * the transaction execute synchronously 383 * @depend_tx: transaction dependency 384 * @tx_type: transaction type 385 */ 386 struct dma_chan * 387 __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 388 enum dma_transaction_type tx_type) 389 { 390 /* see if we can keep the chain on one channel */ 391 if (depend_tx && 392 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 393 return depend_tx->chan; 394 else if (likely(channel_table_initialized)) { 395 struct dma_chan_ref *ref; 396 int cpu = get_cpu(); 397 ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref; 398 put_cpu(); 399 return ref ? ref->chan : NULL; 400 } else 401 return NULL; 402 } 403 EXPORT_SYMBOL_GPL(__async_tx_find_channel); 404 #else 405 static int __init async_tx_init(void) 406 { 407 printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); 408 return 0; 409 } 410 411 static void __exit async_tx_exit(void) 412 { 413 do { } while (0); 414 } 415 #endif 416 417 418 /** 419 * async_tx_channel_switch - queue an interrupt descriptor with a dependency 420 * pre-attached. 421 * @depend_tx: the operation that must finish before the new operation runs 422 * @tx: the new operation 423 */ 424 static void 425 async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, 426 struct dma_async_tx_descriptor *tx) 427 { 428 struct dma_chan *chan; 429 struct dma_device *device; 430 struct dma_async_tx_descriptor *intr_tx = (void *) ~0; 431 432 /* first check to see if we can still append to depend_tx */ 433 spin_lock_bh(&depend_tx->lock); 434 if (depend_tx->parent && depend_tx->chan == tx->chan) { 435 tx->parent = depend_tx; 436 depend_tx->next = tx; 437 intr_tx = NULL; 438 } 439 spin_unlock_bh(&depend_tx->lock); 440 441 if (!intr_tx) 442 return; 443 444 chan = depend_tx->chan; 445 device = chan->device; 446 447 /* see if we can schedule an interrupt 448 * otherwise poll for completion 449 */ 450 if (dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 451 intr_tx = device->device_prep_dma_interrupt(chan, 0); 452 else 453 intr_tx = NULL; 454 455 if (intr_tx) { 456 intr_tx->callback = NULL; 457 intr_tx->callback_param = NULL; 458 tx->parent = intr_tx; 459 /* safe to set ->next outside the lock since we know we are 460 * not submitted yet 461 */ 462 intr_tx->next = tx; 463 464 /* check if we need to append */ 465 spin_lock_bh(&depend_tx->lock); 466 if (depend_tx->parent) { 467 intr_tx->parent = depend_tx; 468 depend_tx->next = intr_tx; 469 async_tx_ack(intr_tx); 470 intr_tx = NULL; 471 } 472 spin_unlock_bh(&depend_tx->lock); 473 474 if (intr_tx) { 475 intr_tx->parent = NULL; 476 intr_tx->tx_submit(intr_tx); 477 async_tx_ack(intr_tx); 478 } 479 } else { 480 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 481 panic("%s: DMA_ERROR waiting for depend_tx\n", 482 __func__); 483 tx->tx_submit(tx); 484 } 485 } 486 487 488 /** 489 * submit_disposition - while holding depend_tx->lock we must avoid submitting 490 * new operations to prevent a circular locking dependency with 491 * drivers that already hold a channel lock when calling 492 * async_tx_run_dependencies. 493 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock 494 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch 495 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly 496 */ 497 enum submit_disposition { 498 ASYNC_TX_SUBMITTED, 499 ASYNC_TX_CHANNEL_SWITCH, 500 ASYNC_TX_DIRECT_SUBMIT, 501 }; 502 503 void 504 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 505 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 506 dma_async_tx_callback cb_fn, void *cb_param) 507 { 508 tx->callback = cb_fn; 509 tx->callback_param = cb_param; 510 511 if (depend_tx) { 512 enum submit_disposition s; 513 514 /* sanity check the dependency chain: 515 * 1/ if ack is already set then we cannot be sure 516 * we are referring to the correct operation 517 * 2/ dependencies are 1:1 i.e. two transactions can 518 * not depend on the same parent 519 */ 520 BUG_ON(async_tx_test_ack(depend_tx) || depend_tx->next || 521 tx->parent); 522 523 /* the lock prevents async_tx_run_dependencies from missing 524 * the setting of ->next when ->parent != NULL 525 */ 526 spin_lock_bh(&depend_tx->lock); 527 if (depend_tx->parent) { 528 /* we have a parent so we can not submit directly 529 * if we are staying on the same channel: append 530 * else: channel switch 531 */ 532 if (depend_tx->chan == chan) { 533 tx->parent = depend_tx; 534 depend_tx->next = tx; 535 s = ASYNC_TX_SUBMITTED; 536 } else 537 s = ASYNC_TX_CHANNEL_SWITCH; 538 } else { 539 /* we do not have a parent so we may be able to submit 540 * directly if we are staying on the same channel 541 */ 542 if (depend_tx->chan == chan) 543 s = ASYNC_TX_DIRECT_SUBMIT; 544 else 545 s = ASYNC_TX_CHANNEL_SWITCH; 546 } 547 spin_unlock_bh(&depend_tx->lock); 548 549 switch (s) { 550 case ASYNC_TX_SUBMITTED: 551 break; 552 case ASYNC_TX_CHANNEL_SWITCH: 553 async_tx_channel_switch(depend_tx, tx); 554 break; 555 case ASYNC_TX_DIRECT_SUBMIT: 556 tx->parent = NULL; 557 tx->tx_submit(tx); 558 break; 559 } 560 } else { 561 tx->parent = NULL; 562 tx->tx_submit(tx); 563 } 564 565 if (flags & ASYNC_TX_ACK) 566 async_tx_ack(tx); 567 568 if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 569 async_tx_ack(depend_tx); 570 } 571 EXPORT_SYMBOL_GPL(async_tx_submit); 572 573 /** 574 * async_trigger_callback - schedules the callback function to be run after 575 * any dependent operations have been completed. 576 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 577 * @depend_tx: 'callback' requires the completion of this transaction 578 * @cb_fn: function to call after depend_tx completes 579 * @cb_param: parameter to pass to the callback routine 580 */ 581 struct dma_async_tx_descriptor * 582 async_trigger_callback(enum async_tx_flags flags, 583 struct dma_async_tx_descriptor *depend_tx, 584 dma_async_tx_callback cb_fn, void *cb_param) 585 { 586 struct dma_chan *chan; 587 struct dma_device *device; 588 struct dma_async_tx_descriptor *tx; 589 590 if (depend_tx) { 591 chan = depend_tx->chan; 592 device = chan->device; 593 594 /* see if we can schedule an interrupt 595 * otherwise poll for completion 596 */ 597 if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 598 device = NULL; 599 600 tx = device ? device->device_prep_dma_interrupt(chan, 0) : NULL; 601 } else 602 tx = NULL; 603 604 if (tx) { 605 pr_debug("%s: (async)\n", __func__); 606 607 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 608 } else { 609 pr_debug("%s: (sync)\n", __func__); 610 611 /* wait for any prerequisite operations */ 612 async_tx_quiesce(&depend_tx); 613 614 async_tx_sync_epilog(cb_fn, cb_param); 615 } 616 617 return tx; 618 } 619 EXPORT_SYMBOL_GPL(async_trigger_callback); 620 621 /** 622 * async_tx_quiesce - ensure tx is complete and freeable upon return 623 * @tx - transaction to quiesce 624 */ 625 void async_tx_quiesce(struct dma_async_tx_descriptor **tx) 626 { 627 if (*tx) { 628 /* if ack is already set then we cannot be sure 629 * we are referring to the correct operation 630 */ 631 BUG_ON(async_tx_test_ack(*tx)); 632 if (dma_wait_for_async_tx(*tx) == DMA_ERROR) 633 panic("DMA_ERROR waiting for transaction\n"); 634 async_tx_ack(*tx); 635 *tx = NULL; 636 } 637 } 638 EXPORT_SYMBOL_GPL(async_tx_quiesce); 639 640 module_init(async_tx_init); 641 module_exit(async_tx_exit); 642 643 MODULE_AUTHOR("Intel Corporation"); 644 MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); 645 MODULE_LICENSE("GPL"); 646