1 /* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * BSD LICENSE 14 * 15 * Copyright(c) 2015 Intel Corporation. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 21 * * Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * * Redistributions in binary form must reproduce the above copy 24 * notice, this list of conditions and the following disclaimer in 25 * the documentation and/or other materials provided with the 26 * distribution. 27 * * Neither the name of Intel Corporation nor the names of its 28 * contributors may be used to endorse or promote products derived 29 * from this software without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 * 43 * PCIe NTB Perf Linux driver 44 */ 45 46 #include <linux/init.h> 47 #include <linux/kernel.h> 48 #include <linux/module.h> 49 #include <linux/kthread.h> 50 #include <linux/time.h> 51 #include <linux/timer.h> 52 #include <linux/dma-mapping.h> 53 #include <linux/pci.h> 54 #include <linux/slab.h> 55 #include <linux/spinlock.h> 56 #include <linux/debugfs.h> 57 #include <linux/dmaengine.h> 58 #include <linux/delay.h> 59 #include <linux/sizes.h> 60 #include <linux/ntb.h> 61 62 #define DRIVER_NAME "ntb_perf" 63 #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" 64 65 #define DRIVER_LICENSE "Dual BSD/GPL" 66 #define DRIVER_VERSION "1.0" 67 #define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>" 68 69 #define PERF_LINK_DOWN_TIMEOUT 10 70 #define PERF_VERSION 0xffff0001 71 #define MAX_THREADS 32 72 #define MAX_TEST_SIZE SZ_1M 73 #define MAX_SRCS 32 74 #define DMA_OUT_RESOURCE_TO 50 75 #define DMA_RETRIES 20 76 #define SZ_4G (1ULL << 32) 77 #define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ 78 79 MODULE_LICENSE(DRIVER_LICENSE); 80 MODULE_VERSION(DRIVER_VERSION); 81 MODULE_AUTHOR(DRIVER_AUTHOR); 82 MODULE_DESCRIPTION(DRIVER_DESCRIPTION); 83 84 static struct dentry *perf_debugfs_dir; 85 86 static unsigned int seg_order = 19; /* 512K */ 87 module_param(seg_order, uint, 0644); 88 MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); 89 90 static unsigned int run_order = 32; /* 4G */ 91 module_param(run_order, uint, 0644); 92 MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer"); 93 94 static bool use_dma; /* default to 0 */ 95 module_param(use_dma, bool, 0644); 96 MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance"); 97 98 struct perf_mw { 99 phys_addr_t phys_addr; 100 resource_size_t phys_size; 101 resource_size_t xlat_align; 102 resource_size_t xlat_align_size; 103 void __iomem *vbase; 104 size_t xlat_size; 105 size_t buf_size; 106 void *virt_addr; 107 dma_addr_t dma_addr; 108 }; 109 110 struct perf_ctx; 111 112 struct pthr_ctx { 113 struct task_struct *thread; 114 struct perf_ctx *perf; 115 atomic_t dma_sync; 116 struct dma_chan *dma_chan; 117 int dma_prep_err; 118 int src_idx; 119 void *srcs[MAX_SRCS]; 120 }; 121 122 struct perf_ctx { 123 struct ntb_dev *ntb; 124 spinlock_t db_lock; 125 struct perf_mw mw; 126 bool link_is_up; 127 struct work_struct link_cleanup; 128 struct delayed_work link_work; 129 struct dentry *debugfs_node_dir; 130 struct dentry *debugfs_run; 131 struct dentry *debugfs_threads; 132 u8 perf_threads; 133 bool run; 134 struct pthr_ctx pthr_ctx[MAX_THREADS]; 135 atomic_t tsync; 136 }; 137 138 enum { 139 VERSION = 0, 140 MW_SZ_HIGH, 141 MW_SZ_LOW, 142 SPAD_MSG, 143 SPAD_ACK, 144 MAX_SPAD 145 }; 146 147 static void perf_link_event(void *ctx) 148 { 149 struct perf_ctx *perf = ctx; 150 151 if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) 152 schedule_delayed_work(&perf->link_work, 2*HZ); 153 else 154 schedule_work(&perf->link_cleanup); 155 } 156 157 static void perf_db_event(void *ctx, int vec) 158 { 159 struct perf_ctx *perf = ctx; 160 u64 db_bits, db_mask; 161 162 db_mask = ntb_db_vector_mask(perf->ntb, vec); 163 db_bits = ntb_db_read(perf->ntb); 164 165 dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", 166 vec, db_mask, db_bits); 167 } 168 169 static const struct ntb_ctx_ops perf_ops = { 170 .link_event = perf_link_event, 171 .db_event = perf_db_event, 172 }; 173 174 static void perf_copy_callback(void *data) 175 { 176 struct pthr_ctx *pctx = data; 177 178 atomic_dec(&pctx->dma_sync); 179 } 180 181 static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst, 182 char *src, size_t size) 183 { 184 struct perf_ctx *perf = pctx->perf; 185 struct dma_async_tx_descriptor *txd; 186 struct dma_chan *chan = pctx->dma_chan; 187 struct dma_device *device; 188 struct dmaengine_unmap_data *unmap; 189 dma_cookie_t cookie; 190 size_t src_off, dst_off; 191 struct perf_mw *mw = &perf->mw; 192 u64 vbase, dst_vaddr; 193 dma_addr_t dst_phys; 194 int retries = 0; 195 196 if (!use_dma) { 197 memcpy_toio(dst, src, size); 198 return size; 199 } 200 201 if (!chan) { 202 dev_err(&perf->ntb->dev, "DMA engine does not exist\n"); 203 return -EINVAL; 204 } 205 206 device = chan->device; 207 src_off = (size_t)src & ~PAGE_MASK; 208 dst_off = (size_t)dst & ~PAGE_MASK; 209 210 if (!is_dma_copy_aligned(device, src_off, dst_off, size)) 211 return -ENODEV; 212 213 vbase = (u64)(u64 *)mw->vbase; 214 dst_vaddr = (u64)(u64 *)dst; 215 dst_phys = mw->phys_addr + (dst_vaddr - vbase); 216 217 unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); 218 if (!unmap) 219 return -ENOMEM; 220 221 unmap->len = size; 222 unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), 223 src_off, size, DMA_TO_DEVICE); 224 if (dma_mapping_error(device->dev, unmap->addr[0])) 225 goto err_get_unmap; 226 227 unmap->to_cnt = 1; 228 229 do { 230 txd = device->device_prep_dma_memcpy(chan, dst_phys, 231 unmap->addr[0], 232 size, DMA_PREP_INTERRUPT); 233 if (!txd) { 234 set_current_state(TASK_INTERRUPTIBLE); 235 schedule_timeout(DMA_OUT_RESOURCE_TO); 236 } 237 } while (!txd && (++retries < DMA_RETRIES)); 238 239 if (!txd) { 240 pctx->dma_prep_err++; 241 goto err_get_unmap; 242 } 243 244 txd->callback = perf_copy_callback; 245 txd->callback_param = pctx; 246 dma_set_unmap(txd, unmap); 247 248 cookie = dmaengine_submit(txd); 249 if (dma_submit_error(cookie)) 250 goto err_set_unmap; 251 252 atomic_inc(&pctx->dma_sync); 253 dma_async_issue_pending(chan); 254 255 return size; 256 257 err_set_unmap: 258 dmaengine_unmap_put(unmap); 259 err_get_unmap: 260 dmaengine_unmap_put(unmap); 261 return 0; 262 } 263 264 static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src, 265 u64 buf_size, u64 win_size, u64 total) 266 { 267 int chunks, total_chunks, i; 268 int copied_chunks = 0; 269 u64 copied = 0, result; 270 char *tmp = dst; 271 u64 perf, diff_us; 272 ktime_t kstart, kstop, kdiff; 273 274 chunks = div64_u64(win_size, buf_size); 275 total_chunks = div64_u64(total, buf_size); 276 kstart = ktime_get(); 277 278 for (i = 0; i < total_chunks; i++) { 279 result = perf_copy(pctx, tmp, src, buf_size); 280 copied += result; 281 copied_chunks++; 282 if (copied_chunks == chunks) { 283 tmp = dst; 284 copied_chunks = 0; 285 } else 286 tmp += buf_size; 287 288 /* Probably should schedule every 4GB to prevent soft hang. */ 289 if (((copied % SZ_4G) == 0) && !use_dma) { 290 set_current_state(TASK_INTERRUPTIBLE); 291 schedule_timeout(1); 292 } 293 } 294 295 if (use_dma) { 296 pr_info("%s: All DMA descriptors submitted\n", current->comm); 297 while (atomic_read(&pctx->dma_sync) != 0) 298 msleep(20); 299 } 300 301 kstop = ktime_get(); 302 kdiff = ktime_sub(kstop, kstart); 303 diff_us = ktime_to_us(kdiff); 304 305 pr_info("%s: copied %llu bytes\n", current->comm, copied); 306 307 pr_info("%s: lasted %llu usecs\n", current->comm, diff_us); 308 309 perf = div64_u64(copied, diff_us); 310 311 pr_info("%s: MBytes/s: %llu\n", current->comm, perf); 312 313 return 0; 314 } 315 316 static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) 317 { 318 return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; 319 } 320 321 static int ntb_perf_thread(void *data) 322 { 323 struct pthr_ctx *pctx = data; 324 struct perf_ctx *perf = pctx->perf; 325 struct pci_dev *pdev = perf->ntb->pdev; 326 struct perf_mw *mw = &perf->mw; 327 char *dst; 328 u64 win_size, buf_size, total; 329 void *src; 330 int rc, node, i; 331 struct dma_chan *dma_chan = NULL; 332 333 pr_info("kthread %s starting...\n", current->comm); 334 335 node = dev_to_node(&pdev->dev); 336 337 if (use_dma && !pctx->dma_chan) { 338 dma_cap_mask_t dma_mask; 339 340 dma_cap_zero(dma_mask); 341 dma_cap_set(DMA_MEMCPY, dma_mask); 342 dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, 343 (void *)(unsigned long)node); 344 if (!dma_chan) { 345 pr_warn("%s: cannot acquire DMA channel, quitting\n", 346 current->comm); 347 return -ENODEV; 348 } 349 pctx->dma_chan = dma_chan; 350 } 351 352 for (i = 0; i < MAX_SRCS; i++) { 353 pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); 354 if (!pctx->srcs[i]) { 355 rc = -ENOMEM; 356 goto err; 357 } 358 } 359 360 win_size = mw->phys_size; 361 buf_size = 1ULL << seg_order; 362 total = 1ULL << run_order; 363 364 if (buf_size > MAX_TEST_SIZE) 365 buf_size = MAX_TEST_SIZE; 366 367 dst = (char *)mw->vbase; 368 369 atomic_inc(&perf->tsync); 370 while (atomic_read(&perf->tsync) != perf->perf_threads) 371 schedule(); 372 373 src = pctx->srcs[pctx->src_idx]; 374 pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1); 375 376 rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); 377 378 atomic_dec(&perf->tsync); 379 380 if (rc < 0) { 381 pr_err("%s: failed\n", current->comm); 382 rc = -ENXIO; 383 goto err; 384 } 385 386 for (i = 0; i < MAX_SRCS; i++) { 387 kfree(pctx->srcs[i]); 388 pctx->srcs[i] = NULL; 389 } 390 391 return 0; 392 393 err: 394 for (i = 0; i < MAX_SRCS; i++) { 395 kfree(pctx->srcs[i]); 396 pctx->srcs[i] = NULL; 397 } 398 399 if (dma_chan) { 400 dma_release_channel(dma_chan); 401 pctx->dma_chan = NULL; 402 } 403 404 return rc; 405 } 406 407 static void perf_free_mw(struct perf_ctx *perf) 408 { 409 struct perf_mw *mw = &perf->mw; 410 struct pci_dev *pdev = perf->ntb->pdev; 411 412 if (!mw->virt_addr) 413 return; 414 415 ntb_mw_clear_trans(perf->ntb, 0); 416 dma_free_coherent(&pdev->dev, mw->buf_size, 417 mw->virt_addr, mw->dma_addr); 418 mw->xlat_size = 0; 419 mw->buf_size = 0; 420 mw->virt_addr = NULL; 421 } 422 423 static int perf_set_mw(struct perf_ctx *perf, resource_size_t size) 424 { 425 struct perf_mw *mw = &perf->mw; 426 size_t xlat_size, buf_size; 427 428 if (!size) 429 return -EINVAL; 430 431 xlat_size = round_up(size, mw->xlat_align_size); 432 buf_size = round_up(size, mw->xlat_align); 433 434 if (mw->xlat_size == xlat_size) 435 return 0; 436 437 if (mw->buf_size) 438 perf_free_mw(perf); 439 440 mw->xlat_size = xlat_size; 441 mw->buf_size = buf_size; 442 443 mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size, 444 &mw->dma_addr, GFP_KERNEL); 445 if (!mw->virt_addr) { 446 mw->xlat_size = 0; 447 mw->buf_size = 0; 448 } 449 450 return 0; 451 } 452 453 static void perf_link_work(struct work_struct *work) 454 { 455 struct perf_ctx *perf = 456 container_of(work, struct perf_ctx, link_work.work); 457 struct ntb_dev *ndev = perf->ntb; 458 struct pci_dev *pdev = ndev->pdev; 459 u32 val; 460 u64 size; 461 int rc; 462 463 dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); 464 465 size = perf->mw.phys_size; 466 ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size)); 467 ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size)); 468 ntb_peer_spad_write(ndev, VERSION, PERF_VERSION); 469 470 /* now read what peer wrote */ 471 val = ntb_spad_read(ndev, VERSION); 472 if (val != PERF_VERSION) { 473 dev_dbg(&pdev->dev, "Remote version = %#x\n", val); 474 goto out; 475 } 476 477 val = ntb_spad_read(ndev, MW_SZ_HIGH); 478 size = (u64)val << 32; 479 480 val = ntb_spad_read(ndev, MW_SZ_LOW); 481 size |= val; 482 483 dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size); 484 485 rc = perf_set_mw(perf, size); 486 if (rc) 487 goto out1; 488 489 perf->link_is_up = true; 490 491 return; 492 493 out1: 494 perf_free_mw(perf); 495 496 out: 497 if (ntb_link_is_up(ndev, NULL, NULL) == 1) 498 schedule_delayed_work(&perf->link_work, 499 msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); 500 } 501 502 static void perf_link_cleanup(struct work_struct *work) 503 { 504 struct perf_ctx *perf = container_of(work, 505 struct perf_ctx, 506 link_cleanup); 507 508 dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); 509 510 if (!perf->link_is_up) 511 cancel_delayed_work_sync(&perf->link_work); 512 } 513 514 static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) 515 { 516 struct perf_mw *mw; 517 int rc; 518 519 mw = &perf->mw; 520 521 rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size, 522 &mw->xlat_align, &mw->xlat_align_size); 523 if (rc) 524 return rc; 525 526 perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size); 527 if (!mw->vbase) 528 return -ENOMEM; 529 530 return 0; 531 } 532 533 static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, 534 size_t count, loff_t *offp) 535 { 536 struct perf_ctx *perf = filp->private_data; 537 char *buf; 538 ssize_t ret, out_offset; 539 540 if (!perf) 541 return 0; 542 543 buf = kmalloc(64, GFP_KERNEL); 544 out_offset = snprintf(buf, 64, "%d\n", perf->run); 545 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); 546 kfree(buf); 547 548 return ret; 549 } 550 551 static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, 552 size_t count, loff_t *offp) 553 { 554 struct perf_ctx *perf = filp->private_data; 555 int node, i; 556 557 if (!perf->link_is_up) 558 return 0; 559 560 if (perf->perf_threads == 0) 561 return 0; 562 563 if (atomic_read(&perf->tsync) == 0) 564 perf->run = false; 565 566 if (perf->run) { 567 /* lets stop the threads */ 568 perf->run = false; 569 for (i = 0; i < MAX_THREADS; i++) { 570 if (perf->pthr_ctx[i].thread) { 571 kthread_stop(perf->pthr_ctx[i].thread); 572 perf->pthr_ctx[i].thread = NULL; 573 } else 574 break; 575 } 576 } else { 577 perf->run = true; 578 579 if (perf->perf_threads > MAX_THREADS) { 580 perf->perf_threads = MAX_THREADS; 581 pr_info("Reset total threads to: %u\n", MAX_THREADS); 582 } 583 584 /* no greater than 1M */ 585 if (seg_order > MAX_SEG_ORDER) { 586 seg_order = MAX_SEG_ORDER; 587 pr_info("Fix seg_order to %u\n", seg_order); 588 } 589 590 if (run_order < seg_order) { 591 run_order = seg_order; 592 pr_info("Fix run_order to %u\n", run_order); 593 } 594 595 node = dev_to_node(&perf->ntb->pdev->dev); 596 /* launch kernel thread */ 597 for (i = 0; i < perf->perf_threads; i++) { 598 struct pthr_ctx *pctx; 599 600 pctx = &perf->pthr_ctx[i]; 601 atomic_set(&pctx->dma_sync, 0); 602 pctx->perf = perf; 603 pctx->thread = 604 kthread_create_on_node(ntb_perf_thread, 605 (void *)pctx, 606 node, "ntb_perf %d", i); 607 if (pctx->thread) 608 wake_up_process(pctx->thread); 609 else { 610 perf->run = false; 611 for (i = 0; i < MAX_THREADS; i++) { 612 if (pctx->thread) { 613 kthread_stop(pctx->thread); 614 pctx->thread = NULL; 615 } 616 } 617 } 618 619 if (perf->run == false) 620 return -ENXIO; 621 } 622 623 } 624 625 return count; 626 } 627 628 static const struct file_operations ntb_perf_debugfs_run = { 629 .owner = THIS_MODULE, 630 .open = simple_open, 631 .read = debugfs_run_read, 632 .write = debugfs_run_write, 633 }; 634 635 static int perf_debugfs_setup(struct perf_ctx *perf) 636 { 637 struct pci_dev *pdev = perf->ntb->pdev; 638 639 if (!debugfs_initialized()) 640 return -ENODEV; 641 642 if (!perf_debugfs_dir) { 643 perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); 644 if (!perf_debugfs_dir) 645 return -ENODEV; 646 } 647 648 perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev), 649 perf_debugfs_dir); 650 if (!perf->debugfs_node_dir) 651 return -ENODEV; 652 653 perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR, 654 perf->debugfs_node_dir, perf, 655 &ntb_perf_debugfs_run); 656 if (!perf->debugfs_run) 657 return -ENODEV; 658 659 perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR, 660 perf->debugfs_node_dir, 661 &perf->perf_threads); 662 if (!perf->debugfs_threads) 663 return -ENODEV; 664 665 return 0; 666 } 667 668 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 669 { 670 struct pci_dev *pdev = ntb->pdev; 671 struct perf_ctx *perf; 672 int node; 673 int rc = 0; 674 675 node = dev_to_node(&pdev->dev); 676 677 perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); 678 if (!perf) { 679 rc = -ENOMEM; 680 goto err_perf; 681 } 682 683 perf->ntb = ntb; 684 perf->perf_threads = 1; 685 atomic_set(&perf->tsync, 0); 686 perf->run = false; 687 spin_lock_init(&perf->db_lock); 688 perf_setup_mw(ntb, perf); 689 INIT_DELAYED_WORK(&perf->link_work, perf_link_work); 690 INIT_WORK(&perf->link_cleanup, perf_link_cleanup); 691 692 rc = ntb_set_ctx(ntb, perf, &perf_ops); 693 if (rc) 694 goto err_ctx; 695 696 perf->link_is_up = false; 697 ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 698 ntb_link_event(ntb); 699 700 rc = perf_debugfs_setup(perf); 701 if (rc) 702 goto err_ctx; 703 704 return 0; 705 706 err_ctx: 707 cancel_delayed_work_sync(&perf->link_work); 708 cancel_work_sync(&perf->link_cleanup); 709 kfree(perf); 710 err_perf: 711 return rc; 712 } 713 714 static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 715 { 716 struct perf_ctx *perf = ntb->ctx; 717 int i; 718 719 dev_dbg(&perf->ntb->dev, "%s called\n", __func__); 720 721 cancel_delayed_work_sync(&perf->link_work); 722 cancel_work_sync(&perf->link_cleanup); 723 724 ntb_clear_ctx(ntb); 725 ntb_link_disable(ntb); 726 727 debugfs_remove_recursive(perf_debugfs_dir); 728 perf_debugfs_dir = NULL; 729 730 if (use_dma) { 731 for (i = 0; i < MAX_THREADS; i++) { 732 struct pthr_ctx *pctx = &perf->pthr_ctx[i]; 733 734 if (pctx->dma_chan) 735 dma_release_channel(pctx->dma_chan); 736 } 737 } 738 739 kfree(perf); 740 } 741 742 static struct ntb_client perf_client = { 743 .ops = { 744 .probe = perf_probe, 745 .remove = perf_remove, 746 }, 747 }; 748 module_ntb_client(perf_client); 749