1 /* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * BSD LICENSE 14 * 15 * Copyright(c) 2015 Intel Corporation. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 21 * * Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * * Redistributions in binary form must reproduce the above copy 24 * notice, this list of conditions and the following disclaimer in 25 * the documentation and/or other materials provided with the 26 * distribution. 27 * * Neither the name of Intel Corporation nor the names of its 28 * contributors may be used to endorse or promote products derived 29 * from this software without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 * 43 * PCIe NTB Perf Linux driver 44 */ 45 46 #include <linux/init.h> 47 #include <linux/kernel.h> 48 #include <linux/module.h> 49 #include <linux/kthread.h> 50 #include <linux/time.h> 51 #include <linux/timer.h> 52 #include <linux/dma-mapping.h> 53 #include <linux/pci.h> 54 #include <linux/slab.h> 55 #include <linux/spinlock.h> 56 #include <linux/debugfs.h> 57 #include <linux/dmaengine.h> 58 #include <linux/delay.h> 59 #include <linux/sizes.h> 60 #include <linux/ntb.h> 61 #include <linux/mutex.h> 62 63 #define DRIVER_NAME "ntb_perf" 64 #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" 65 66 #define DRIVER_LICENSE "Dual BSD/GPL" 67 #define DRIVER_VERSION "1.0" 68 #define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>" 69 70 #define PERF_LINK_DOWN_TIMEOUT 10 71 #define PERF_VERSION 0xffff0001 72 #define MAX_THREADS 32 73 #define MAX_TEST_SIZE SZ_1M 74 #define MAX_SRCS 32 75 #define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) 76 #define DMA_RETRIES 20 77 #define SZ_4G (1ULL << 32) 78 #define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ 79 #define PIDX NTB_DEF_PEER_IDX 80 81 MODULE_LICENSE(DRIVER_LICENSE); 82 MODULE_VERSION(DRIVER_VERSION); 83 MODULE_AUTHOR(DRIVER_AUTHOR); 84 MODULE_DESCRIPTION(DRIVER_DESCRIPTION); 85 86 static struct dentry *perf_debugfs_dir; 87 88 static unsigned long max_mw_size; 89 module_param(max_mw_size, ulong, 0644); 90 MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); 91 92 static unsigned int seg_order = 19; /* 512K */ 93 module_param(seg_order, uint, 0644); 94 MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing"); 95 96 static unsigned int run_order = 32; /* 4G */ 97 module_param(run_order, uint, 0644); 98 MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer"); 99 100 static bool use_dma; /* default to 0 */ 101 module_param(use_dma, bool, 0644); 102 MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance"); 103 104 static bool on_node = true; /* default to 1 */ 105 module_param(on_node, bool, 0644); 106 MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)"); 107 108 struct perf_mw { 109 phys_addr_t phys_addr; 110 resource_size_t phys_size; 111 resource_size_t xlat_align; 112 resource_size_t xlat_align_size; 113 void __iomem *vbase; 114 size_t xlat_size; 115 size_t buf_size; 116 void *virt_addr; 117 dma_addr_t dma_addr; 118 }; 119 120 struct perf_ctx; 121 122 struct pthr_ctx { 123 struct task_struct *thread; 124 struct perf_ctx *perf; 125 atomic_t dma_sync; 126 struct dma_chan *dma_chan; 127 int dma_prep_err; 128 int src_idx; 129 void *srcs[MAX_SRCS]; 130 wait_queue_head_t *wq; 131 int status; 132 u64 copied; 133 u64 diff_us; 134 }; 135 136 struct perf_ctx { 137 struct ntb_dev *ntb; 138 spinlock_t db_lock; 139 struct perf_mw mw; 140 bool link_is_up; 141 struct delayed_work link_work; 142 wait_queue_head_t link_wq; 143 u8 perf_threads; 144 /* mutex ensures only one set of threads run at once */ 145 struct mutex run_mutex; 146 struct pthr_ctx pthr_ctx[MAX_THREADS]; 147 atomic_t tsync; 148 atomic_t tdone; 149 }; 150 151 enum { 152 VERSION = 0, 153 MW_SZ_HIGH, 154 MW_SZ_LOW, 155 MAX_SPAD 156 }; 157 158 static void perf_link_event(void *ctx) 159 { 160 struct perf_ctx *perf = ctx; 161 162 if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { 163 schedule_delayed_work(&perf->link_work, 2*HZ); 164 } else { 165 dev_dbg(&perf->ntb->pdev->dev, "link down\n"); 166 167 if (!perf->link_is_up) 168 cancel_delayed_work_sync(&perf->link_work); 169 170 perf->link_is_up = false; 171 } 172 } 173 174 static void perf_db_event(void *ctx, int vec) 175 { 176 struct perf_ctx *perf = ctx; 177 u64 db_bits, db_mask; 178 179 db_mask = ntb_db_vector_mask(perf->ntb, vec); 180 db_bits = ntb_db_read(perf->ntb); 181 182 dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", 183 vec, db_mask, db_bits); 184 } 185 186 static const struct ntb_ctx_ops perf_ops = { 187 .link_event = perf_link_event, 188 .db_event = perf_db_event, 189 }; 190 191 static void perf_copy_callback(void *data) 192 { 193 struct pthr_ctx *pctx = data; 194 195 atomic_dec(&pctx->dma_sync); 196 } 197 198 static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst, 199 char *src, size_t size) 200 { 201 struct perf_ctx *perf = pctx->perf; 202 struct dma_async_tx_descriptor *txd; 203 struct dma_chan *chan = pctx->dma_chan; 204 struct dma_device *device; 205 struct dmaengine_unmap_data *unmap; 206 dma_cookie_t cookie; 207 size_t src_off, dst_off; 208 struct perf_mw *mw = &perf->mw; 209 void __iomem *vbase; 210 void __iomem *dst_vaddr; 211 dma_addr_t dst_phys; 212 int retries = 0; 213 214 if (!use_dma) { 215 memcpy_toio(dst, src, size); 216 return size; 217 } 218 219 if (!chan) { 220 dev_err(&perf->ntb->dev, "DMA engine does not exist\n"); 221 return -EINVAL; 222 } 223 224 device = chan->device; 225 src_off = (uintptr_t)src & ~PAGE_MASK; 226 dst_off = (uintptr_t __force)dst & ~PAGE_MASK; 227 228 if (!is_dma_copy_aligned(device, src_off, dst_off, size)) 229 return -ENODEV; 230 231 vbase = mw->vbase; 232 dst_vaddr = dst; 233 dst_phys = mw->phys_addr + (dst_vaddr - vbase); 234 235 unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); 236 if (!unmap) 237 return -ENOMEM; 238 239 unmap->len = size; 240 unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), 241 src_off, size, DMA_TO_DEVICE); 242 if (dma_mapping_error(device->dev, unmap->addr[0])) 243 goto err_get_unmap; 244 245 unmap->to_cnt = 1; 246 247 do { 248 txd = device->device_prep_dma_memcpy(chan, dst_phys, 249 unmap->addr[0], 250 size, DMA_PREP_INTERRUPT); 251 if (!txd) { 252 set_current_state(TASK_INTERRUPTIBLE); 253 schedule_timeout(DMA_OUT_RESOURCE_TO); 254 } 255 } while (!txd && (++retries < DMA_RETRIES)); 256 257 if (!txd) { 258 pctx->dma_prep_err++; 259 goto err_get_unmap; 260 } 261 262 txd->callback = perf_copy_callback; 263 txd->callback_param = pctx; 264 dma_set_unmap(txd, unmap); 265 266 cookie = dmaengine_submit(txd); 267 if (dma_submit_error(cookie)) 268 goto err_set_unmap; 269 270 dmaengine_unmap_put(unmap); 271 272 atomic_inc(&pctx->dma_sync); 273 dma_async_issue_pending(chan); 274 275 return size; 276 277 err_set_unmap: 278 dmaengine_unmap_put(unmap); 279 err_get_unmap: 280 dmaengine_unmap_put(unmap); 281 return 0; 282 } 283 284 static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, 285 u64 buf_size, u64 win_size, u64 total) 286 { 287 int chunks, total_chunks, i; 288 int copied_chunks = 0; 289 u64 copied = 0, result; 290 char __iomem *tmp = dst; 291 u64 perf, diff_us; 292 ktime_t kstart, kstop, kdiff; 293 unsigned long last_sleep = jiffies; 294 295 chunks = div64_u64(win_size, buf_size); 296 total_chunks = div64_u64(total, buf_size); 297 kstart = ktime_get(); 298 299 for (i = 0; i < total_chunks; i++) { 300 result = perf_copy(pctx, tmp, src, buf_size); 301 copied += result; 302 copied_chunks++; 303 if (copied_chunks == chunks) { 304 tmp = dst; 305 copied_chunks = 0; 306 } else 307 tmp += buf_size; 308 309 /* Probably should schedule every 5s to prevent soft hang. */ 310 if (unlikely((jiffies - last_sleep) > 5 * HZ)) { 311 last_sleep = jiffies; 312 set_current_state(TASK_INTERRUPTIBLE); 313 schedule_timeout(1); 314 } 315 316 if (unlikely(kthread_should_stop())) 317 break; 318 } 319 320 if (use_dma) { 321 pr_debug("%s: All DMA descriptors submitted\n", current->comm); 322 while (atomic_read(&pctx->dma_sync) != 0) { 323 if (kthread_should_stop()) 324 break; 325 msleep(20); 326 } 327 } 328 329 kstop = ktime_get(); 330 kdiff = ktime_sub(kstop, kstart); 331 diff_us = ktime_to_us(kdiff); 332 333 pr_debug("%s: copied %llu bytes\n", current->comm, copied); 334 335 pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); 336 337 perf = div64_u64(copied, diff_us); 338 339 pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); 340 341 pctx->copied = copied; 342 pctx->diff_us = diff_us; 343 344 return 0; 345 } 346 347 static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) 348 { 349 /* Is the channel required to be on the same node as the device? */ 350 if (!on_node) 351 return true; 352 353 return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; 354 } 355 356 static int ntb_perf_thread(void *data) 357 { 358 struct pthr_ctx *pctx = data; 359 struct perf_ctx *perf = pctx->perf; 360 struct pci_dev *pdev = perf->ntb->pdev; 361 struct perf_mw *mw = &perf->mw; 362 char __iomem *dst; 363 u64 win_size, buf_size, total; 364 void *src; 365 int rc, node, i; 366 struct dma_chan *dma_chan = NULL; 367 368 pr_debug("kthread %s starting...\n", current->comm); 369 370 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; 371 372 if (use_dma && !pctx->dma_chan) { 373 dma_cap_mask_t dma_mask; 374 375 dma_cap_zero(dma_mask); 376 dma_cap_set(DMA_MEMCPY, dma_mask); 377 dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, 378 (void *)(unsigned long)node); 379 if (!dma_chan) { 380 pr_warn("%s: cannot acquire DMA channel, quitting\n", 381 current->comm); 382 return -ENODEV; 383 } 384 pctx->dma_chan = dma_chan; 385 } 386 387 for (i = 0; i < MAX_SRCS; i++) { 388 pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); 389 if (!pctx->srcs[i]) { 390 rc = -ENOMEM; 391 goto err; 392 } 393 } 394 395 win_size = mw->phys_size; 396 buf_size = 1ULL << seg_order; 397 total = 1ULL << run_order; 398 399 if (buf_size > MAX_TEST_SIZE) 400 buf_size = MAX_TEST_SIZE; 401 402 dst = (char __iomem *)mw->vbase; 403 404 atomic_inc(&perf->tsync); 405 while (atomic_read(&perf->tsync) != perf->perf_threads) 406 schedule(); 407 408 src = pctx->srcs[pctx->src_idx]; 409 pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1); 410 411 rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); 412 413 atomic_dec(&perf->tsync); 414 415 if (rc < 0) { 416 pr_err("%s: failed\n", current->comm); 417 rc = -ENXIO; 418 goto err; 419 } 420 421 for (i = 0; i < MAX_SRCS; i++) { 422 kfree(pctx->srcs[i]); 423 pctx->srcs[i] = NULL; 424 } 425 426 atomic_inc(&perf->tdone); 427 wake_up(pctx->wq); 428 rc = 0; 429 goto done; 430 431 err: 432 for (i = 0; i < MAX_SRCS; i++) { 433 kfree(pctx->srcs[i]); 434 pctx->srcs[i] = NULL; 435 } 436 437 if (dma_chan) { 438 dma_release_channel(dma_chan); 439 pctx->dma_chan = NULL; 440 } 441 442 done: 443 /* Wait until we are told to stop */ 444 for (;;) { 445 set_current_state(TASK_INTERRUPTIBLE); 446 if (kthread_should_stop()) 447 break; 448 schedule(); 449 } 450 __set_current_state(TASK_RUNNING); 451 452 return rc; 453 } 454 455 static void perf_free_mw(struct perf_ctx *perf) 456 { 457 struct perf_mw *mw = &perf->mw; 458 struct pci_dev *pdev = perf->ntb->pdev; 459 460 if (!mw->virt_addr) 461 return; 462 463 ntb_mw_clear_trans(perf->ntb, PIDX, 0); 464 dma_free_coherent(&pdev->dev, mw->buf_size, 465 mw->virt_addr, mw->dma_addr); 466 mw->xlat_size = 0; 467 mw->buf_size = 0; 468 mw->virt_addr = NULL; 469 } 470 471 static int perf_set_mw(struct perf_ctx *perf, resource_size_t size) 472 { 473 struct perf_mw *mw = &perf->mw; 474 size_t xlat_size, buf_size; 475 int rc; 476 477 if (!size) 478 return -EINVAL; 479 480 xlat_size = round_up(size, mw->xlat_align_size); 481 buf_size = round_up(size, mw->xlat_align); 482 483 if (mw->xlat_size == xlat_size) 484 return 0; 485 486 if (mw->buf_size) 487 perf_free_mw(perf); 488 489 mw->xlat_size = xlat_size; 490 mw->buf_size = buf_size; 491 492 mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size, 493 &mw->dma_addr, GFP_KERNEL); 494 if (!mw->virt_addr) { 495 mw->xlat_size = 0; 496 mw->buf_size = 0; 497 } 498 499 rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size); 500 if (rc) { 501 dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n"); 502 perf_free_mw(perf); 503 return -EIO; 504 } 505 506 return 0; 507 } 508 509 static void perf_link_work(struct work_struct *work) 510 { 511 struct perf_ctx *perf = 512 container_of(work, struct perf_ctx, link_work.work); 513 struct ntb_dev *ndev = perf->ntb; 514 struct pci_dev *pdev = ndev->pdev; 515 u32 val; 516 u64 size; 517 int rc; 518 519 dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); 520 521 size = perf->mw.phys_size; 522 523 if (max_mw_size && size > max_mw_size) 524 size = max_mw_size; 525 526 ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size)); 527 ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size)); 528 ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION); 529 530 /* now read what peer wrote */ 531 val = ntb_spad_read(ndev, VERSION); 532 if (val != PERF_VERSION) { 533 dev_dbg(&pdev->dev, "Remote version = %#x\n", val); 534 goto out; 535 } 536 537 val = ntb_spad_read(ndev, MW_SZ_HIGH); 538 size = (u64)val << 32; 539 540 val = ntb_spad_read(ndev, MW_SZ_LOW); 541 size |= val; 542 543 dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size); 544 545 rc = perf_set_mw(perf, size); 546 if (rc) 547 goto out1; 548 549 perf->link_is_up = true; 550 wake_up(&perf->link_wq); 551 552 return; 553 554 out1: 555 perf_free_mw(perf); 556 557 out: 558 if (ntb_link_is_up(ndev, NULL, NULL) == 1) 559 schedule_delayed_work(&perf->link_work, 560 msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); 561 } 562 563 static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) 564 { 565 struct perf_mw *mw; 566 int rc; 567 568 mw = &perf->mw; 569 570 rc = ntb_mw_get_align(ntb, PIDX, 0, &mw->xlat_align, 571 &mw->xlat_align_size, NULL); 572 if (rc) 573 return rc; 574 575 rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size); 576 if (rc) 577 return rc; 578 579 perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size); 580 if (!mw->vbase) 581 return -ENOMEM; 582 583 return 0; 584 } 585 586 static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, 587 size_t count, loff_t *offp) 588 { 589 struct perf_ctx *perf = filp->private_data; 590 char *buf; 591 ssize_t ret, out_off = 0; 592 struct pthr_ctx *pctx; 593 int i; 594 u64 rate; 595 596 if (!perf) 597 return 0; 598 599 buf = kmalloc(1024, GFP_KERNEL); 600 if (!buf) 601 return -ENOMEM; 602 603 if (mutex_is_locked(&perf->run_mutex)) { 604 out_off = scnprintf(buf, 64, "running\n"); 605 goto read_from_buf; 606 } 607 608 for (i = 0; i < MAX_THREADS; i++) { 609 pctx = &perf->pthr_ctx[i]; 610 611 if (pctx->status == -ENODATA) 612 break; 613 614 if (pctx->status) { 615 out_off += scnprintf(buf + out_off, 1024 - out_off, 616 "%d: error %d\n", i, 617 pctx->status); 618 continue; 619 } 620 621 rate = div64_u64(pctx->copied, pctx->diff_us); 622 out_off += scnprintf(buf + out_off, 1024 - out_off, 623 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", 624 i, pctx->copied, pctx->diff_us, rate); 625 } 626 627 read_from_buf: 628 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); 629 kfree(buf); 630 631 return ret; 632 } 633 634 static void threads_cleanup(struct perf_ctx *perf) 635 { 636 struct pthr_ctx *pctx; 637 int i; 638 639 for (i = 0; i < MAX_THREADS; i++) { 640 pctx = &perf->pthr_ctx[i]; 641 if (pctx->thread) { 642 pctx->status = kthread_stop(pctx->thread); 643 pctx->thread = NULL; 644 } 645 } 646 } 647 648 static void perf_clear_thread_status(struct perf_ctx *perf) 649 { 650 int i; 651 652 for (i = 0; i < MAX_THREADS; i++) 653 perf->pthr_ctx[i].status = -ENODATA; 654 } 655 656 static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, 657 size_t count, loff_t *offp) 658 { 659 struct perf_ctx *perf = filp->private_data; 660 int node, i; 661 DECLARE_WAIT_QUEUE_HEAD(wq); 662 663 if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) 664 return -ENOLINK; 665 666 if (perf->perf_threads == 0) 667 return -EINVAL; 668 669 if (!mutex_trylock(&perf->run_mutex)) 670 return -EBUSY; 671 672 perf_clear_thread_status(perf); 673 674 if (perf->perf_threads > MAX_THREADS) { 675 perf->perf_threads = MAX_THREADS; 676 pr_info("Reset total threads to: %u\n", MAX_THREADS); 677 } 678 679 /* no greater than 1M */ 680 if (seg_order > MAX_SEG_ORDER) { 681 seg_order = MAX_SEG_ORDER; 682 pr_info("Fix seg_order to %u\n", seg_order); 683 } 684 685 if (run_order < seg_order) { 686 run_order = seg_order; 687 pr_info("Fix run_order to %u\n", run_order); 688 } 689 690 node = on_node ? dev_to_node(&perf->ntb->pdev->dev) 691 : NUMA_NO_NODE; 692 atomic_set(&perf->tdone, 0); 693 694 /* launch kernel thread */ 695 for (i = 0; i < perf->perf_threads; i++) { 696 struct pthr_ctx *pctx; 697 698 pctx = &perf->pthr_ctx[i]; 699 atomic_set(&pctx->dma_sync, 0); 700 pctx->perf = perf; 701 pctx->wq = &wq; 702 pctx->thread = 703 kthread_create_on_node(ntb_perf_thread, 704 (void *)pctx, 705 node, "ntb_perf %d", i); 706 if (IS_ERR(pctx->thread)) { 707 pctx->thread = NULL; 708 goto err; 709 } else { 710 wake_up_process(pctx->thread); 711 } 712 } 713 714 wait_event_interruptible(wq, 715 atomic_read(&perf->tdone) == perf->perf_threads); 716 717 threads_cleanup(perf); 718 mutex_unlock(&perf->run_mutex); 719 return count; 720 721 err: 722 threads_cleanup(perf); 723 mutex_unlock(&perf->run_mutex); 724 return -ENXIO; 725 } 726 727 static const struct file_operations ntb_perf_debugfs_run = { 728 .owner = THIS_MODULE, 729 .open = simple_open, 730 .read = debugfs_run_read, 731 .write = debugfs_run_write, 732 }; 733 734 static int perf_debugfs_setup(struct perf_ctx *perf) 735 { 736 struct pci_dev *pdev = perf->ntb->pdev; 737 struct dentry *debugfs_node_dir; 738 struct dentry *debugfs_run; 739 struct dentry *debugfs_threads; 740 struct dentry *debugfs_seg_order; 741 struct dentry *debugfs_run_order; 742 struct dentry *debugfs_use_dma; 743 struct dentry *debugfs_on_node; 744 745 if (!debugfs_initialized()) 746 return -ENODEV; 747 748 /* Assumpion: only one NTB device in the system */ 749 if (!perf_debugfs_dir) { 750 perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); 751 if (!perf_debugfs_dir) 752 return -ENODEV; 753 } 754 755 debugfs_node_dir = debugfs_create_dir(pci_name(pdev), 756 perf_debugfs_dir); 757 if (!debugfs_node_dir) 758 goto err; 759 760 debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR, 761 debugfs_node_dir, perf, 762 &ntb_perf_debugfs_run); 763 if (!debugfs_run) 764 goto err; 765 766 debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR, 767 debugfs_node_dir, 768 &perf->perf_threads); 769 if (!debugfs_threads) 770 goto err; 771 772 debugfs_seg_order = debugfs_create_u32("seg_order", 0600, 773 debugfs_node_dir, 774 &seg_order); 775 if (!debugfs_seg_order) 776 goto err; 777 778 debugfs_run_order = debugfs_create_u32("run_order", 0600, 779 debugfs_node_dir, 780 &run_order); 781 if (!debugfs_run_order) 782 goto err; 783 784 debugfs_use_dma = debugfs_create_bool("use_dma", 0600, 785 debugfs_node_dir, 786 &use_dma); 787 if (!debugfs_use_dma) 788 goto err; 789 790 debugfs_on_node = debugfs_create_bool("on_node", 0600, 791 debugfs_node_dir, 792 &on_node); 793 if (!debugfs_on_node) 794 goto err; 795 796 return 0; 797 798 err: 799 debugfs_remove_recursive(perf_debugfs_dir); 800 perf_debugfs_dir = NULL; 801 return -ENODEV; 802 } 803 804 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 805 { 806 struct pci_dev *pdev = ntb->pdev; 807 struct perf_ctx *perf; 808 int node; 809 int rc = 0; 810 811 if (ntb_spad_count(ntb) < MAX_SPAD) { 812 dev_err(&ntb->dev, "Not enough scratch pad registers for %s", 813 DRIVER_NAME); 814 return -EIO; 815 } 816 817 if (!ntb->ops->mw_set_trans) { 818 dev_err(&ntb->dev, "Need inbound MW based NTB API\n"); 819 return -EINVAL; 820 } 821 822 if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT) 823 dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n"); 824 825 node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; 826 perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); 827 if (!perf) { 828 rc = -ENOMEM; 829 goto err_perf; 830 } 831 832 perf->ntb = ntb; 833 perf->perf_threads = 1; 834 atomic_set(&perf->tsync, 0); 835 mutex_init(&perf->run_mutex); 836 spin_lock_init(&perf->db_lock); 837 perf_setup_mw(ntb, perf); 838 init_waitqueue_head(&perf->link_wq); 839 INIT_DELAYED_WORK(&perf->link_work, perf_link_work); 840 841 rc = ntb_set_ctx(ntb, perf, &perf_ops); 842 if (rc) 843 goto err_ctx; 844 845 perf->link_is_up = false; 846 ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 847 ntb_link_event(ntb); 848 849 rc = perf_debugfs_setup(perf); 850 if (rc) 851 goto err_ctx; 852 853 perf_clear_thread_status(perf); 854 855 return 0; 856 857 err_ctx: 858 cancel_delayed_work_sync(&perf->link_work); 859 kfree(perf); 860 err_perf: 861 return rc; 862 } 863 864 static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 865 { 866 struct perf_ctx *perf = ntb->ctx; 867 int i; 868 869 dev_dbg(&perf->ntb->dev, "%s called\n", __func__); 870 871 mutex_lock(&perf->run_mutex); 872 873 cancel_delayed_work_sync(&perf->link_work); 874 875 ntb_clear_ctx(ntb); 876 ntb_link_disable(ntb); 877 878 debugfs_remove_recursive(perf_debugfs_dir); 879 perf_debugfs_dir = NULL; 880 881 if (use_dma) { 882 for (i = 0; i < MAX_THREADS; i++) { 883 struct pthr_ctx *pctx = &perf->pthr_ctx[i]; 884 885 if (pctx->dma_chan) 886 dma_release_channel(pctx->dma_chan); 887 } 888 } 889 890 kfree(perf); 891 } 892 893 static struct ntb_client perf_client = { 894 .ops = { 895 .probe = perf_probe, 896 .remove = perf_remove, 897 }, 898 }; 899 module_ntb_client(perf_client); 900