1 /* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * BSD LICENSE 14 * 15 * Copyright(c) 2015 Intel Corporation. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 21 * * Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * * Redistributions in binary form must reproduce the above copy 24 * notice, this list of conditions and the following disclaimer in 25 * the documentation and/or other materials provided with the 26 * distribution. 27 * * Neither the name of Intel Corporation nor the names of its 28 * contributors may be used to endorse or promote products derived 29 * from this software without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 * 43 * PCIe NTB Perf Linux driver 44 */ 45 46 #include <linux/init.h> 47 #include <linux/kernel.h> 48 #include <linux/module.h> 49 #include <linux/kthread.h> 50 #include <linux/time.h> 51 #include <linux/timer.h> 52 #include <linux/dma-mapping.h> 53 #include <linux/pci.h> 54 #include <linux/slab.h> 55 #include <linux/spinlock.h> 56 #include <linux/debugfs.h> 57 #include <linux/dmaengine.h> 58 #include <linux/delay.h> 59 #include <linux/sizes.h> 60 #include <linux/ntb.h> 61 #include <linux/mutex.h> 62 63 #define DRIVER_NAME "ntb_perf" 64 #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" 65 66 #define DRIVER_LICENSE "Dual BSD/GPL" 67 #define DRIVER_VERSION "1.0" 68 #define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>" 69 70 #define PERF_LINK_DOWN_TIMEOUT 10 71 #define PERF_VERSION 0xffff0001 72 #define MAX_THREADS 32 73 #define MAX_TEST_SIZE SZ_1M 74 #define MAX_SRCS 32 75 #define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) 76 #define DMA_RETRIES 20 77 #define SZ_4G (1ULL << 32) 78 #define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ 79 #define PIDX NTB_DEF_PEER_IDX 80 81 MODULE_LICENSE(DRIVER_LICENSE); 82 MODULE_VERSION(DRIVER_VERSION); 83 MODULE_AUTHOR(DRIVER_AUTHOR); 84 MODULE_DESCRIPTION(DRIVER_DESCRIPTION); 85 86 static struct dentry *perf_debugfs_dir; 87 88 static unsigned long max_mw_size; 89 module_param(max_mw_size, ulong, 0644); 90 MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); 91 92 static unsigned int seg_order = 19; /* 512K */ 93 module_param(seg_order, uint, 0644); 94 MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing"); 95 96 static unsigned int run_order = 32; /* 4G */ 97 module_param(run_order, uint, 0644); 98 MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer"); 99 100 static bool use_dma; /* default to 0 */ 101 module_param(use_dma, bool, 0644); 102 MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance"); 103 104 static bool on_node = true; /* default to 1 */ 105 module_param(on_node, bool, 0644); 106 MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)"); 107 108 struct perf_mw { 109 phys_addr_t phys_addr; 110 resource_size_t phys_size; 111 void __iomem *vbase; 112 size_t xlat_size; 113 size_t buf_size; 114 void *virt_addr; 115 dma_addr_t dma_addr; 116 }; 117 118 struct perf_ctx; 119 120 struct pthr_ctx { 121 struct task_struct *thread; 122 struct perf_ctx *perf; 123 atomic_t dma_sync; 124 struct dma_chan *dma_chan; 125 int dma_prep_err; 126 int src_idx; 127 void *srcs[MAX_SRCS]; 128 wait_queue_head_t *wq; 129 int status; 130 u64 copied; 131 u64 diff_us; 132 }; 133 134 struct perf_ctx { 135 struct ntb_dev *ntb; 136 spinlock_t db_lock; 137 struct perf_mw mw; 138 bool link_is_up; 139 struct delayed_work link_work; 140 wait_queue_head_t link_wq; 141 u8 perf_threads; 142 /* mutex ensures only one set of threads run at once */ 143 struct mutex run_mutex; 144 struct pthr_ctx pthr_ctx[MAX_THREADS]; 145 atomic_t tsync; 146 atomic_t tdone; 147 }; 148 149 enum { 150 VERSION = 0, 151 MW_SZ_HIGH, 152 MW_SZ_LOW, 153 MAX_SPAD 154 }; 155 156 static void perf_link_event(void *ctx) 157 { 158 struct perf_ctx *perf = ctx; 159 160 if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { 161 schedule_delayed_work(&perf->link_work, 2*HZ); 162 } else { 163 dev_dbg(&perf->ntb->pdev->dev, "link down\n"); 164 165 if (!perf->link_is_up) 166 cancel_delayed_work_sync(&perf->link_work); 167 168 perf->link_is_up = false; 169 } 170 } 171 172 static void perf_db_event(void *ctx, int vec) 173 { 174 struct perf_ctx *perf = ctx; 175 u64 db_bits, db_mask; 176 177 db_mask = ntb_db_vector_mask(perf->ntb, vec); 178 db_bits = ntb_db_read(perf->ntb); 179 180 dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", 181 vec, db_mask, db_bits); 182 } 183 184 static const struct ntb_ctx_ops perf_ops = { 185 .link_event = perf_link_event, 186 .db_event = perf_db_event, 187 }; 188 189 static void perf_copy_callback(void *data) 190 { 191 struct pthr_ctx *pctx = data; 192 193 atomic_dec(&pctx->dma_sync); 194 } 195 196 static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst, 197 char *src, size_t size) 198 { 199 struct perf_ctx *perf = pctx->perf; 200 struct dma_async_tx_descriptor *txd; 201 struct dma_chan *chan = pctx->dma_chan; 202 struct dma_device *device; 203 struct dmaengine_unmap_data *unmap; 204 dma_cookie_t cookie; 205 size_t src_off, dst_off; 206 struct perf_mw *mw = &perf->mw; 207 void __iomem *vbase; 208 void __iomem *dst_vaddr; 209 dma_addr_t dst_phys; 210 int retries = 0; 211 212 if (!use_dma) { 213 memcpy_toio(dst, src, size); 214 return size; 215 } 216 217 if (!chan) { 218 dev_err(&perf->ntb->dev, "DMA engine does not exist\n"); 219 return -EINVAL; 220 } 221 222 device = chan->device; 223 src_off = (uintptr_t)src & ~PAGE_MASK; 224 dst_off = (uintptr_t __force)dst & ~PAGE_MASK; 225 226 if (!is_dma_copy_aligned(device, src_off, dst_off, size)) 227 return -ENODEV; 228 229 vbase = mw->vbase; 230 dst_vaddr = dst; 231 dst_phys = mw->phys_addr + (dst_vaddr - vbase); 232 233 unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); 234 if (!unmap) 235 return -ENOMEM; 236 237 unmap->len = size; 238 unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), 239 src_off, size, DMA_TO_DEVICE); 240 if (dma_mapping_error(device->dev, unmap->addr[0])) 241 goto err_get_unmap; 242 243 unmap->to_cnt = 1; 244 245 do { 246 txd = device->device_prep_dma_memcpy(chan, dst_phys, 247 unmap->addr[0], 248 size, DMA_PREP_INTERRUPT); 249 if (!txd) { 250 set_current_state(TASK_INTERRUPTIBLE); 251 schedule_timeout(DMA_OUT_RESOURCE_TO); 252 } 253 } while (!txd && (++retries < DMA_RETRIES)); 254 255 if (!txd) { 256 pctx->dma_prep_err++; 257 goto err_get_unmap; 258 } 259 260 txd->callback = perf_copy_callback; 261 txd->callback_param = pctx; 262 dma_set_unmap(txd, unmap); 263 264 cookie = dmaengine_submit(txd); 265 if (dma_submit_error(cookie)) 266 goto err_set_unmap; 267 268 dmaengine_unmap_put(unmap); 269 270 atomic_inc(&pctx->dma_sync); 271 dma_async_issue_pending(chan); 272 273 return size; 274 275 err_set_unmap: 276 dmaengine_unmap_put(unmap); 277 err_get_unmap: 278 dmaengine_unmap_put(unmap); 279 return 0; 280 } 281 282 static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, 283 u64 buf_size, u64 win_size, u64 total) 284 { 285 int chunks, total_chunks, i; 286 int copied_chunks = 0; 287 u64 copied = 0, result; 288 char __iomem *tmp = dst; 289 u64 perf, diff_us; 290 ktime_t kstart, kstop, kdiff; 291 unsigned long last_sleep = jiffies; 292 293 chunks = div64_u64(win_size, buf_size); 294 total_chunks = div64_u64(total, buf_size); 295 kstart = ktime_get(); 296 297 for (i = 0; i < total_chunks; i++) { 298 result = perf_copy(pctx, tmp, src, buf_size); 299 copied += result; 300 copied_chunks++; 301 if (copied_chunks == chunks) { 302 tmp = dst; 303 copied_chunks = 0; 304 } else 305 tmp += buf_size; 306 307 /* Probably should schedule every 5s to prevent soft hang. */ 308 if (unlikely((jiffies - last_sleep) > 5 * HZ)) { 309 last_sleep = jiffies; 310 set_current_state(TASK_INTERRUPTIBLE); 311 schedule_timeout(1); 312 } 313 314 if (unlikely(kthread_should_stop())) 315 break; 316 } 317 318 if (use_dma) { 319 pr_debug("%s: All DMA descriptors submitted\n", current->comm); 320 while (atomic_read(&pctx->dma_sync) != 0) { 321 if (kthread_should_stop()) 322 break; 323 msleep(20); 324 } 325 } 326 327 kstop = ktime_get(); 328 kdiff = ktime_sub(kstop, kstart); 329 diff_us = ktime_to_us(kdiff); 330 331 pr_debug("%s: copied %llu bytes\n", current->comm, copied); 332 333 pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); 334 335 perf = div64_u64(copied, diff_us); 336 337 pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); 338 339 pctx->copied = copied; 340 pctx->diff_us = diff_us; 341 342 return 0; 343 } 344 345 static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) 346 { 347 /* Is the channel required to be on the same node as the device? */ 348 if (!on_node) 349 return true; 350 351 return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; 352 } 353 354 static int ntb_perf_thread(void *data) 355 { 356 struct pthr_ctx *pctx = data; 357 struct perf_ctx *perf = pctx->perf; 358 struct pci_dev *pdev = perf->ntb->pdev; 359 struct perf_mw *mw = &perf->mw; 360 char __iomem *dst; 361 u64 win_size, buf_size, total; 362 void *src; 363 int rc, node, i; 364 struct dma_chan *dma_chan = NULL; 365 366 pr_debug("kthread %s starting...\n", current->comm); 367 368 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; 369 370 if (use_dma && !pctx->dma_chan) { 371 dma_cap_mask_t dma_mask; 372 373 dma_cap_zero(dma_mask); 374 dma_cap_set(DMA_MEMCPY, dma_mask); 375 dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, 376 (void *)(unsigned long)node); 377 if (!dma_chan) { 378 pr_warn("%s: cannot acquire DMA channel, quitting\n", 379 current->comm); 380 return -ENODEV; 381 } 382 pctx->dma_chan = dma_chan; 383 } 384 385 for (i = 0; i < MAX_SRCS; i++) { 386 pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); 387 if (!pctx->srcs[i]) { 388 rc = -ENOMEM; 389 goto err; 390 } 391 } 392 393 win_size = mw->phys_size; 394 buf_size = 1ULL << seg_order; 395 total = 1ULL << run_order; 396 397 if (buf_size > MAX_TEST_SIZE) 398 buf_size = MAX_TEST_SIZE; 399 400 dst = (char __iomem *)mw->vbase; 401 402 atomic_inc(&perf->tsync); 403 while (atomic_read(&perf->tsync) != perf->perf_threads) 404 schedule(); 405 406 src = pctx->srcs[pctx->src_idx]; 407 pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1); 408 409 rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); 410 411 atomic_dec(&perf->tsync); 412 413 if (rc < 0) { 414 pr_err("%s: failed\n", current->comm); 415 rc = -ENXIO; 416 goto err; 417 } 418 419 for (i = 0; i < MAX_SRCS; i++) { 420 kfree(pctx->srcs[i]); 421 pctx->srcs[i] = NULL; 422 } 423 424 atomic_inc(&perf->tdone); 425 wake_up(pctx->wq); 426 rc = 0; 427 goto done; 428 429 err: 430 for (i = 0; i < MAX_SRCS; i++) { 431 kfree(pctx->srcs[i]); 432 pctx->srcs[i] = NULL; 433 } 434 435 if (dma_chan) { 436 dma_release_channel(dma_chan); 437 pctx->dma_chan = NULL; 438 } 439 440 done: 441 /* Wait until we are told to stop */ 442 for (;;) { 443 set_current_state(TASK_INTERRUPTIBLE); 444 if (kthread_should_stop()) 445 break; 446 schedule(); 447 } 448 __set_current_state(TASK_RUNNING); 449 450 return rc; 451 } 452 453 static void perf_free_mw(struct perf_ctx *perf) 454 { 455 struct perf_mw *mw = &perf->mw; 456 struct pci_dev *pdev = perf->ntb->pdev; 457 458 if (!mw->virt_addr) 459 return; 460 461 ntb_mw_clear_trans(perf->ntb, PIDX, 0); 462 dma_free_coherent(&pdev->dev, mw->buf_size, 463 mw->virt_addr, mw->dma_addr); 464 mw->xlat_size = 0; 465 mw->buf_size = 0; 466 mw->virt_addr = NULL; 467 } 468 469 static int perf_set_mw(struct perf_ctx *perf, resource_size_t size) 470 { 471 struct perf_mw *mw = &perf->mw; 472 size_t xlat_size, buf_size; 473 resource_size_t xlat_align; 474 resource_size_t xlat_align_size; 475 int rc; 476 477 if (!size) 478 return -EINVAL; 479 480 rc = ntb_mw_get_align(perf->ntb, PIDX, 0, &xlat_align, 481 &xlat_align_size, NULL); 482 if (rc) 483 return rc; 484 485 xlat_size = round_up(size, xlat_align_size); 486 buf_size = round_up(size, xlat_align); 487 488 if (mw->xlat_size == xlat_size) 489 return 0; 490 491 if (mw->buf_size) 492 perf_free_mw(perf); 493 494 mw->xlat_size = xlat_size; 495 mw->buf_size = buf_size; 496 497 mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size, 498 &mw->dma_addr, GFP_KERNEL); 499 if (!mw->virt_addr) { 500 mw->xlat_size = 0; 501 mw->buf_size = 0; 502 } 503 504 rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size); 505 if (rc) { 506 dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n"); 507 perf_free_mw(perf); 508 return -EIO; 509 } 510 511 return 0; 512 } 513 514 static void perf_link_work(struct work_struct *work) 515 { 516 struct perf_ctx *perf = 517 container_of(work, struct perf_ctx, link_work.work); 518 struct ntb_dev *ndev = perf->ntb; 519 struct pci_dev *pdev = ndev->pdev; 520 u32 val; 521 u64 size; 522 int rc; 523 524 dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); 525 526 size = perf->mw.phys_size; 527 528 if (max_mw_size && size > max_mw_size) 529 size = max_mw_size; 530 531 ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size)); 532 ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size)); 533 ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION); 534 535 /* now read what peer wrote */ 536 val = ntb_spad_read(ndev, VERSION); 537 if (val != PERF_VERSION) { 538 dev_dbg(&pdev->dev, "Remote version = %#x\n", val); 539 goto out; 540 } 541 542 val = ntb_spad_read(ndev, MW_SZ_HIGH); 543 size = (u64)val << 32; 544 545 val = ntb_spad_read(ndev, MW_SZ_LOW); 546 size |= val; 547 548 dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size); 549 550 rc = perf_set_mw(perf, size); 551 if (rc) 552 goto out1; 553 554 perf->link_is_up = true; 555 wake_up(&perf->link_wq); 556 557 return; 558 559 out1: 560 perf_free_mw(perf); 561 562 out: 563 if (ntb_link_is_up(ndev, NULL, NULL) == 1) 564 schedule_delayed_work(&perf->link_work, 565 msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); 566 } 567 568 static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) 569 { 570 struct perf_mw *mw; 571 int rc; 572 573 mw = &perf->mw; 574 575 rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size); 576 if (rc) 577 return rc; 578 579 perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size); 580 if (!mw->vbase) 581 return -ENOMEM; 582 583 return 0; 584 } 585 586 static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, 587 size_t count, loff_t *offp) 588 { 589 struct perf_ctx *perf = filp->private_data; 590 char *buf; 591 ssize_t ret, out_off = 0; 592 struct pthr_ctx *pctx; 593 int i; 594 u64 rate; 595 596 if (!perf) 597 return 0; 598 599 buf = kmalloc(1024, GFP_KERNEL); 600 if (!buf) 601 return -ENOMEM; 602 603 if (mutex_is_locked(&perf->run_mutex)) { 604 out_off = scnprintf(buf, 64, "running\n"); 605 goto read_from_buf; 606 } 607 608 for (i = 0; i < MAX_THREADS; i++) { 609 pctx = &perf->pthr_ctx[i]; 610 611 if (pctx->status == -ENODATA) 612 break; 613 614 if (pctx->status) { 615 out_off += scnprintf(buf + out_off, 1024 - out_off, 616 "%d: error %d\n", i, 617 pctx->status); 618 continue; 619 } 620 621 rate = div64_u64(pctx->copied, pctx->diff_us); 622 out_off += scnprintf(buf + out_off, 1024 - out_off, 623 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", 624 i, pctx->copied, pctx->diff_us, rate); 625 } 626 627 read_from_buf: 628 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); 629 kfree(buf); 630 631 return ret; 632 } 633 634 static void threads_cleanup(struct perf_ctx *perf) 635 { 636 struct pthr_ctx *pctx; 637 int i; 638 639 for (i = 0; i < MAX_THREADS; i++) { 640 pctx = &perf->pthr_ctx[i]; 641 if (pctx->thread) { 642 pctx->status = kthread_stop(pctx->thread); 643 pctx->thread = NULL; 644 } 645 } 646 } 647 648 static void perf_clear_thread_status(struct perf_ctx *perf) 649 { 650 int i; 651 652 for (i = 0; i < MAX_THREADS; i++) 653 perf->pthr_ctx[i].status = -ENODATA; 654 } 655 656 static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, 657 size_t count, loff_t *offp) 658 { 659 struct perf_ctx *perf = filp->private_data; 660 int node, i; 661 DECLARE_WAIT_QUEUE_HEAD(wq); 662 663 if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) 664 return -ENOLINK; 665 666 if (perf->perf_threads == 0) 667 return -EINVAL; 668 669 if (!mutex_trylock(&perf->run_mutex)) 670 return -EBUSY; 671 672 perf_clear_thread_status(perf); 673 674 if (perf->perf_threads > MAX_THREADS) { 675 perf->perf_threads = MAX_THREADS; 676 pr_info("Reset total threads to: %u\n", MAX_THREADS); 677 } 678 679 /* no greater than 1M */ 680 if (seg_order > MAX_SEG_ORDER) { 681 seg_order = MAX_SEG_ORDER; 682 pr_info("Fix seg_order to %u\n", seg_order); 683 } 684 685 if (run_order < seg_order) { 686 run_order = seg_order; 687 pr_info("Fix run_order to %u\n", run_order); 688 } 689 690 node = on_node ? dev_to_node(&perf->ntb->pdev->dev) 691 : NUMA_NO_NODE; 692 atomic_set(&perf->tdone, 0); 693 694 /* launch kernel thread */ 695 for (i = 0; i < perf->perf_threads; i++) { 696 struct pthr_ctx *pctx; 697 698 pctx = &perf->pthr_ctx[i]; 699 atomic_set(&pctx->dma_sync, 0); 700 pctx->perf = perf; 701 pctx->wq = &wq; 702 pctx->thread = 703 kthread_create_on_node(ntb_perf_thread, 704 (void *)pctx, 705 node, "ntb_perf %d", i); 706 if (IS_ERR(pctx->thread)) { 707 pctx->thread = NULL; 708 goto err; 709 } else { 710 wake_up_process(pctx->thread); 711 } 712 } 713 714 wait_event_interruptible(wq, 715 atomic_read(&perf->tdone) == perf->perf_threads); 716 717 threads_cleanup(perf); 718 mutex_unlock(&perf->run_mutex); 719 return count; 720 721 err: 722 threads_cleanup(perf); 723 mutex_unlock(&perf->run_mutex); 724 return -ENXIO; 725 } 726 727 static const struct file_operations ntb_perf_debugfs_run = { 728 .owner = THIS_MODULE, 729 .open = simple_open, 730 .read = debugfs_run_read, 731 .write = debugfs_run_write, 732 }; 733 734 static int perf_debugfs_setup(struct perf_ctx *perf) 735 { 736 struct pci_dev *pdev = perf->ntb->pdev; 737 struct dentry *debugfs_node_dir; 738 struct dentry *debugfs_run; 739 struct dentry *debugfs_threads; 740 struct dentry *debugfs_seg_order; 741 struct dentry *debugfs_run_order; 742 struct dentry *debugfs_use_dma; 743 struct dentry *debugfs_on_node; 744 745 if (!debugfs_initialized()) 746 return -ENODEV; 747 748 /* Assumpion: only one NTB device in the system */ 749 if (!perf_debugfs_dir) { 750 perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); 751 if (!perf_debugfs_dir) 752 return -ENODEV; 753 } 754 755 debugfs_node_dir = debugfs_create_dir(pci_name(pdev), 756 perf_debugfs_dir); 757 if (!debugfs_node_dir) 758 goto err; 759 760 debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR, 761 debugfs_node_dir, perf, 762 &ntb_perf_debugfs_run); 763 if (!debugfs_run) 764 goto err; 765 766 debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR, 767 debugfs_node_dir, 768 &perf->perf_threads); 769 if (!debugfs_threads) 770 goto err; 771 772 debugfs_seg_order = debugfs_create_u32("seg_order", 0600, 773 debugfs_node_dir, 774 &seg_order); 775 if (!debugfs_seg_order) 776 goto err; 777 778 debugfs_run_order = debugfs_create_u32("run_order", 0600, 779 debugfs_node_dir, 780 &run_order); 781 if (!debugfs_run_order) 782 goto err; 783 784 debugfs_use_dma = debugfs_create_bool("use_dma", 0600, 785 debugfs_node_dir, 786 &use_dma); 787 if (!debugfs_use_dma) 788 goto err; 789 790 debugfs_on_node = debugfs_create_bool("on_node", 0600, 791 debugfs_node_dir, 792 &on_node); 793 if (!debugfs_on_node) 794 goto err; 795 796 return 0; 797 798 err: 799 debugfs_remove_recursive(perf_debugfs_dir); 800 perf_debugfs_dir = NULL; 801 return -ENODEV; 802 } 803 804 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 805 { 806 struct pci_dev *pdev = ntb->pdev; 807 struct perf_ctx *perf; 808 int node; 809 int rc = 0; 810 811 if (ntb_spad_count(ntb) < MAX_SPAD) { 812 dev_err(&ntb->dev, "Not enough scratch pad registers for %s", 813 DRIVER_NAME); 814 return -EIO; 815 } 816 817 if (!ntb->ops->mw_set_trans) { 818 dev_err(&ntb->dev, "Need inbound MW based NTB API\n"); 819 return -EINVAL; 820 } 821 822 if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT) 823 dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n"); 824 825 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; 826 perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); 827 if (!perf) { 828 rc = -ENOMEM; 829 goto err_perf; 830 } 831 832 perf->ntb = ntb; 833 perf->perf_threads = 1; 834 atomic_set(&perf->tsync, 0); 835 mutex_init(&perf->run_mutex); 836 spin_lock_init(&perf->db_lock); 837 perf_setup_mw(ntb, perf); 838 init_waitqueue_head(&perf->link_wq); 839 INIT_DELAYED_WORK(&perf->link_work, perf_link_work); 840 841 rc = ntb_set_ctx(ntb, perf, &perf_ops); 842 if (rc) 843 goto err_ctx; 844 845 perf->link_is_up = false; 846 ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 847 ntb_link_event(ntb); 848 849 rc = perf_debugfs_setup(perf); 850 if (rc) 851 goto err_ctx; 852 853 perf_clear_thread_status(perf); 854 855 return 0; 856 857 err_ctx: 858 cancel_delayed_work_sync(&perf->link_work); 859 kfree(perf); 860 err_perf: 861 return rc; 862 } 863 864 static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 865 { 866 struct perf_ctx *perf = ntb->ctx; 867 int i; 868 869 dev_dbg(&perf->ntb->dev, "%s called\n", __func__); 870 871 mutex_lock(&perf->run_mutex); 872 873 cancel_delayed_work_sync(&perf->link_work); 874 875 ntb_clear_ctx(ntb); 876 ntb_link_disable(ntb); 877 878 debugfs_remove_recursive(perf_debugfs_dir); 879 perf_debugfs_dir = NULL; 880 881 if (use_dma) { 882 for (i = 0; i < MAX_THREADS; i++) { 883 struct pthr_ctx *pctx = &perf->pthr_ctx[i]; 884 885 if (pctx->dma_chan) 886 dma_release_channel(pctx->dma_chan); 887 } 888 } 889 890 kfree(perf); 891 } 892 893 static struct ntb_client perf_client = { 894 .ops = { 895 .probe = perf_probe, 896 .remove = perf_remove, 897 }, 898 }; 899 module_ntb_client(perf_client); 900