1 /* 2 * core routines for the asynchronous memory transfer/transform api 3 * 4 * Copyright © 2006, Intel Corporation. 5 * 6 * Dan Williams <dan.j.williams@intel.com> 7 * 8 * with architecture considerations by: 9 * Neil Brown <neilb@suse.de> 10 * Jeff Garzik <jeff@garzik.org> 11 * 12 * This program is free software; you can redistribute it and/or modify it 13 * under the terms and conditions of the GNU General Public License, 14 * version 2, as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 * more details. 20 * 21 * You should have received a copy of the GNU General Public License along with 22 * this program; if not, write to the Free Software Foundation, Inc., 23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 * 25 */ 26 #include <linux/rculist.h> 27 #include <linux/kernel.h> 28 #include <linux/async_tx.h> 29 30 #ifdef CONFIG_DMA_ENGINE 31 static enum dma_state_client 32 dma_channel_add_remove(struct dma_client *client, 33 struct dma_chan *chan, enum dma_state state); 34 35 static struct dma_client async_tx_dma = { 36 .event_callback = dma_channel_add_remove, 37 /* .cap_mask == 0 defaults to all channels */ 38 }; 39 40 /** 41 * dma_cap_mask_all - enable iteration over all operation types 42 */ 43 static dma_cap_mask_t dma_cap_mask_all; 44 45 /** 46 * chan_ref_percpu - tracks channel allocations per core/opertion 47 */ 48 struct chan_ref_percpu { 49 struct dma_chan_ref *ref; 50 }; 51 52 static int channel_table_initialized; 53 static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END]; 54 55 /** 56 * async_tx_lock - protect modification of async_tx_master_list and serialize 57 * rebalance operations 58 */ 59 static spinlock_t async_tx_lock; 60 61 static LIST_HEAD(async_tx_master_list); 62 63 /* async_tx_issue_pending_all - start all transactions on all channels */ 64 void async_tx_issue_pending_all(void) 65 { 66 struct dma_chan_ref *ref; 67 68 rcu_read_lock(); 69 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 70 ref->chan->device->device_issue_pending(ref->chan); 71 rcu_read_unlock(); 72 } 73 EXPORT_SYMBOL_GPL(async_tx_issue_pending_all); 74 75 /* dma_wait_for_async_tx - spin wait for a transcation to complete 76 * @tx: transaction to wait on 77 */ 78 enum dma_status 79 dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 80 { 81 enum dma_status status; 82 struct dma_async_tx_descriptor *iter; 83 struct dma_async_tx_descriptor *parent; 84 85 if (!tx) 86 return DMA_SUCCESS; 87 88 /* poll through the dependency chain, return when tx is complete */ 89 do { 90 iter = tx; 91 92 /* find the root of the unsubmitted dependency chain */ 93 do { 94 parent = iter->parent; 95 if (!parent) 96 break; 97 else 98 iter = parent; 99 } while (parent); 100 101 /* there is a small window for ->parent == NULL and 102 * ->cookie == -EBUSY 103 */ 104 while (iter->cookie == -EBUSY) 105 cpu_relax(); 106 107 status = dma_sync_wait(iter->chan, iter->cookie); 108 } while (status == DMA_IN_PROGRESS || (iter != tx)); 109 110 return status; 111 } 112 EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 113 114 /* async_tx_run_dependencies - helper routine for dma drivers to process 115 * (start) dependent operations on their target channel 116 * @tx: transaction with dependencies 117 */ 118 void 119 async_tx_run_dependencies(struct dma_async_tx_descriptor *tx) 120 { 121 struct dma_async_tx_descriptor *next = tx->next; 122 struct dma_chan *chan; 123 124 if (!next) 125 return; 126 127 tx->next = NULL; 128 chan = next->chan; 129 130 /* keep submitting up until a channel switch is detected 131 * in that case we will be called again as a result of 132 * processing the interrupt from async_tx_channel_switch 133 */ 134 while (next && next->chan == chan) { 135 struct dma_async_tx_descriptor *_next; 136 137 spin_lock_bh(&next->lock); 138 next->parent = NULL; 139 _next = next->next; 140 next->next = NULL; 141 spin_unlock_bh(&next->lock); 142 143 next->tx_submit(next); 144 next = _next; 145 } 146 147 chan->device->device_issue_pending(chan); 148 } 149 EXPORT_SYMBOL_GPL(async_tx_run_dependencies); 150 151 static void 152 free_dma_chan_ref(struct rcu_head *rcu) 153 { 154 struct dma_chan_ref *ref; 155 ref = container_of(rcu, struct dma_chan_ref, rcu); 156 kfree(ref); 157 } 158 159 static void 160 init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan) 161 { 162 INIT_LIST_HEAD(&ref->node); 163 INIT_RCU_HEAD(&ref->rcu); 164 ref->chan = chan; 165 atomic_set(&ref->count, 0); 166 } 167 168 /** 169 * get_chan_ref_by_cap - returns the nth channel of the given capability 170 * defaults to returning the channel with the desired capability and the 171 * lowest reference count if the index can not be satisfied 172 * @cap: capability to match 173 * @index: nth channel desired, passing -1 has the effect of forcing the 174 * default return value 175 */ 176 static struct dma_chan_ref * 177 get_chan_ref_by_cap(enum dma_transaction_type cap, int index) 178 { 179 struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref; 180 181 rcu_read_lock(); 182 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 183 if (dma_has_cap(cap, ref->chan->device->cap_mask)) { 184 if (!min_ref) 185 min_ref = ref; 186 else if (atomic_read(&ref->count) < 187 atomic_read(&min_ref->count)) 188 min_ref = ref; 189 190 if (index-- == 0) { 191 ret_ref = ref; 192 break; 193 } 194 } 195 rcu_read_unlock(); 196 197 if (!ret_ref) 198 ret_ref = min_ref; 199 200 if (ret_ref) 201 atomic_inc(&ret_ref->count); 202 203 return ret_ref; 204 } 205 206 /** 207 * async_tx_rebalance - redistribute the available channels, optimize 208 * for cpu isolation in the SMP case, and opertaion isolation in the 209 * uniprocessor case 210 */ 211 static void async_tx_rebalance(void) 212 { 213 int cpu, cap, cpu_idx = 0; 214 unsigned long flags; 215 216 if (!channel_table_initialized) 217 return; 218 219 spin_lock_irqsave(&async_tx_lock, flags); 220 221 /* undo the last distribution */ 222 for_each_dma_cap_mask(cap, dma_cap_mask_all) 223 for_each_possible_cpu(cpu) { 224 struct dma_chan_ref *ref = 225 per_cpu_ptr(channel_table[cap], cpu)->ref; 226 if (ref) { 227 atomic_set(&ref->count, 0); 228 per_cpu_ptr(channel_table[cap], cpu)->ref = 229 NULL; 230 } 231 } 232 233 for_each_dma_cap_mask(cap, dma_cap_mask_all) 234 for_each_online_cpu(cpu) { 235 struct dma_chan_ref *new; 236 if (NR_CPUS > 1) 237 new = get_chan_ref_by_cap(cap, cpu_idx++); 238 else 239 new = get_chan_ref_by_cap(cap, -1); 240 241 per_cpu_ptr(channel_table[cap], cpu)->ref = new; 242 } 243 244 spin_unlock_irqrestore(&async_tx_lock, flags); 245 } 246 247 static enum dma_state_client 248 dma_channel_add_remove(struct dma_client *client, 249 struct dma_chan *chan, enum dma_state state) 250 { 251 unsigned long found, flags; 252 struct dma_chan_ref *master_ref, *ref; 253 enum dma_state_client ack = DMA_DUP; /* default: take no action */ 254 255 switch (state) { 256 case DMA_RESOURCE_AVAILABLE: 257 found = 0; 258 rcu_read_lock(); 259 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 260 if (ref->chan == chan) { 261 found = 1; 262 break; 263 } 264 rcu_read_unlock(); 265 266 pr_debug("async_tx: dma resource available [%s]\n", 267 found ? "old" : "new"); 268 269 if (!found) 270 ack = DMA_ACK; 271 else 272 break; 273 274 /* add the channel to the generic management list */ 275 master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL); 276 if (master_ref) { 277 /* keep a reference until async_tx is unloaded */ 278 dma_chan_get(chan); 279 init_dma_chan_ref(master_ref, chan); 280 spin_lock_irqsave(&async_tx_lock, flags); 281 list_add_tail_rcu(&master_ref->node, 282 &async_tx_master_list); 283 spin_unlock_irqrestore(&async_tx_lock, 284 flags); 285 } else { 286 printk(KERN_WARNING "async_tx: unable to create" 287 " new master entry in response to" 288 " a DMA_RESOURCE_ADDED event" 289 " (-ENOMEM)\n"); 290 return 0; 291 } 292 293 async_tx_rebalance(); 294 break; 295 case DMA_RESOURCE_REMOVED: 296 found = 0; 297 spin_lock_irqsave(&async_tx_lock, flags); 298 list_for_each_entry_rcu(ref, &async_tx_master_list, node) 299 if (ref->chan == chan) { 300 /* permit backing devices to go away */ 301 dma_chan_put(ref->chan); 302 list_del_rcu(&ref->node); 303 call_rcu(&ref->rcu, free_dma_chan_ref); 304 found = 1; 305 break; 306 } 307 spin_unlock_irqrestore(&async_tx_lock, flags); 308 309 pr_debug("async_tx: dma resource removed [%s]\n", 310 found ? "ours" : "not ours"); 311 312 if (found) 313 ack = DMA_ACK; 314 else 315 break; 316 317 async_tx_rebalance(); 318 break; 319 case DMA_RESOURCE_SUSPEND: 320 case DMA_RESOURCE_RESUME: 321 printk(KERN_WARNING "async_tx: does not support dma channel" 322 " suspend/resume\n"); 323 break; 324 default: 325 BUG(); 326 } 327 328 return ack; 329 } 330 331 static int __init 332 async_tx_init(void) 333 { 334 enum dma_transaction_type cap; 335 336 spin_lock_init(&async_tx_lock); 337 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); 338 339 /* an interrupt will never be an explicit operation type. 340 * clearing this bit prevents allocation to a slot in 'channel_table' 341 */ 342 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); 343 344 for_each_dma_cap_mask(cap, dma_cap_mask_all) { 345 channel_table[cap] = alloc_percpu(struct chan_ref_percpu); 346 if (!channel_table[cap]) 347 goto err; 348 } 349 350 channel_table_initialized = 1; 351 dma_async_client_register(&async_tx_dma); 352 dma_async_client_chan_request(&async_tx_dma); 353 354 printk(KERN_INFO "async_tx: api initialized (async)\n"); 355 356 return 0; 357 err: 358 printk(KERN_ERR "async_tx: initialization failure\n"); 359 360 while (--cap >= 0) 361 free_percpu(channel_table[cap]); 362 363 return 1; 364 } 365 366 static void __exit async_tx_exit(void) 367 { 368 enum dma_transaction_type cap; 369 370 channel_table_initialized = 0; 371 372 for_each_dma_cap_mask(cap, dma_cap_mask_all) 373 if (channel_table[cap]) 374 free_percpu(channel_table[cap]); 375 376 dma_async_client_unregister(&async_tx_dma); 377 } 378 379 /** 380 * __async_tx_find_channel - find a channel to carry out the operation or let 381 * the transaction execute synchronously 382 * @depend_tx: transaction dependency 383 * @tx_type: transaction type 384 */ 385 struct dma_chan * 386 __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 387 enum dma_transaction_type tx_type) 388 { 389 /* see if we can keep the chain on one channel */ 390 if (depend_tx && 391 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 392 return depend_tx->chan; 393 else if (likely(channel_table_initialized)) { 394 struct dma_chan_ref *ref; 395 int cpu = get_cpu(); 396 ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref; 397 put_cpu(); 398 return ref ? ref->chan : NULL; 399 } else 400 return NULL; 401 } 402 EXPORT_SYMBOL_GPL(__async_tx_find_channel); 403 #else 404 static int __init async_tx_init(void) 405 { 406 printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); 407 return 0; 408 } 409 410 static void __exit async_tx_exit(void) 411 { 412 do { } while (0); 413 } 414 #endif 415 416 417 /** 418 * async_tx_channel_switch - queue an interrupt descriptor with a dependency 419 * pre-attached. 420 * @depend_tx: the operation that must finish before the new operation runs 421 * @tx: the new operation 422 */ 423 static void 424 async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, 425 struct dma_async_tx_descriptor *tx) 426 { 427 struct dma_chan *chan; 428 struct dma_device *device; 429 struct dma_async_tx_descriptor *intr_tx = (void *) ~0; 430 431 /* first check to see if we can still append to depend_tx */ 432 spin_lock_bh(&depend_tx->lock); 433 if (depend_tx->parent && depend_tx->chan == tx->chan) { 434 tx->parent = depend_tx; 435 depend_tx->next = tx; 436 intr_tx = NULL; 437 } 438 spin_unlock_bh(&depend_tx->lock); 439 440 if (!intr_tx) 441 return; 442 443 chan = depend_tx->chan; 444 device = chan->device; 445 446 /* see if we can schedule an interrupt 447 * otherwise poll for completion 448 */ 449 if (dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 450 intr_tx = device->device_prep_dma_interrupt(chan, 0); 451 else 452 intr_tx = NULL; 453 454 if (intr_tx) { 455 intr_tx->callback = NULL; 456 intr_tx->callback_param = NULL; 457 tx->parent = intr_tx; 458 /* safe to set ->next outside the lock since we know we are 459 * not submitted yet 460 */ 461 intr_tx->next = tx; 462 463 /* check if we need to append */ 464 spin_lock_bh(&depend_tx->lock); 465 if (depend_tx->parent) { 466 intr_tx->parent = depend_tx; 467 depend_tx->next = intr_tx; 468 async_tx_ack(intr_tx); 469 intr_tx = NULL; 470 } 471 spin_unlock_bh(&depend_tx->lock); 472 473 if (intr_tx) { 474 intr_tx->parent = NULL; 475 intr_tx->tx_submit(intr_tx); 476 async_tx_ack(intr_tx); 477 } 478 } else { 479 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 480 panic("%s: DMA_ERROR waiting for depend_tx\n", 481 __func__); 482 tx->tx_submit(tx); 483 } 484 } 485 486 487 /** 488 * submit_disposition - while holding depend_tx->lock we must avoid submitting 489 * new operations to prevent a circular locking dependency with 490 * drivers that already hold a channel lock when calling 491 * async_tx_run_dependencies. 492 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock 493 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch 494 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly 495 */ 496 enum submit_disposition { 497 ASYNC_TX_SUBMITTED, 498 ASYNC_TX_CHANNEL_SWITCH, 499 ASYNC_TX_DIRECT_SUBMIT, 500 }; 501 502 void 503 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 504 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 505 dma_async_tx_callback cb_fn, void *cb_param) 506 { 507 tx->callback = cb_fn; 508 tx->callback_param = cb_param; 509 510 if (depend_tx) { 511 enum submit_disposition s; 512 513 /* sanity check the dependency chain: 514 * 1/ if ack is already set then we cannot be sure 515 * we are referring to the correct operation 516 * 2/ dependencies are 1:1 i.e. two transactions can 517 * not depend on the same parent 518 */ 519 BUG_ON(async_tx_test_ack(depend_tx) || depend_tx->next || 520 tx->parent); 521 522 /* the lock prevents async_tx_run_dependencies from missing 523 * the setting of ->next when ->parent != NULL 524 */ 525 spin_lock_bh(&depend_tx->lock); 526 if (depend_tx->parent) { 527 /* we have a parent so we can not submit directly 528 * if we are staying on the same channel: append 529 * else: channel switch 530 */ 531 if (depend_tx->chan == chan) { 532 tx->parent = depend_tx; 533 depend_tx->next = tx; 534 s = ASYNC_TX_SUBMITTED; 535 } else 536 s = ASYNC_TX_CHANNEL_SWITCH; 537 } else { 538 /* we do not have a parent so we may be able to submit 539 * directly if we are staying on the same channel 540 */ 541 if (depend_tx->chan == chan) 542 s = ASYNC_TX_DIRECT_SUBMIT; 543 else 544 s = ASYNC_TX_CHANNEL_SWITCH; 545 } 546 spin_unlock_bh(&depend_tx->lock); 547 548 switch (s) { 549 case ASYNC_TX_SUBMITTED: 550 break; 551 case ASYNC_TX_CHANNEL_SWITCH: 552 async_tx_channel_switch(depend_tx, tx); 553 break; 554 case ASYNC_TX_DIRECT_SUBMIT: 555 tx->parent = NULL; 556 tx->tx_submit(tx); 557 break; 558 } 559 } else { 560 tx->parent = NULL; 561 tx->tx_submit(tx); 562 } 563 564 if (flags & ASYNC_TX_ACK) 565 async_tx_ack(tx); 566 567 if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 568 async_tx_ack(depend_tx); 569 } 570 EXPORT_SYMBOL_GPL(async_tx_submit); 571 572 /** 573 * async_trigger_callback - schedules the callback function to be run after 574 * any dependent operations have been completed. 575 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 576 * @depend_tx: 'callback' requires the completion of this transaction 577 * @cb_fn: function to call after depend_tx completes 578 * @cb_param: parameter to pass to the callback routine 579 */ 580 struct dma_async_tx_descriptor * 581 async_trigger_callback(enum async_tx_flags flags, 582 struct dma_async_tx_descriptor *depend_tx, 583 dma_async_tx_callback cb_fn, void *cb_param) 584 { 585 struct dma_chan *chan; 586 struct dma_device *device; 587 struct dma_async_tx_descriptor *tx; 588 589 if (depend_tx) { 590 chan = depend_tx->chan; 591 device = chan->device; 592 593 /* see if we can schedule an interrupt 594 * otherwise poll for completion 595 */ 596 if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 597 device = NULL; 598 599 tx = device ? device->device_prep_dma_interrupt(chan, 0) : NULL; 600 } else 601 tx = NULL; 602 603 if (tx) { 604 pr_debug("%s: (async)\n", __func__); 605 606 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 607 } else { 608 pr_debug("%s: (sync)\n", __func__); 609 610 /* wait for any prerequisite operations */ 611 if (depend_tx) { 612 /* if ack is already set then we cannot be sure 613 * we are referring to the correct operation 614 */ 615 BUG_ON(async_tx_test_ack(depend_tx)); 616 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 617 panic("%s: DMA_ERROR waiting for depend_tx\n", 618 __func__); 619 } 620 621 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); 622 } 623 624 return tx; 625 } 626 EXPORT_SYMBOL_GPL(async_trigger_callback); 627 628 module_init(async_tx_init); 629 module_exit(async_tx_exit); 630 631 MODULE_AUTHOR("Intel Corporation"); 632 MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); 633 MODULE_LICENSE("GPL"); 634