1 /* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2015 Intel Corporation. All rights reserved. 8 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License as 12 * published by the Free Software Foundation. 13 * 14 * BSD LICENSE 15 * 16 * Copyright(c) 2015 Intel Corporation. All rights reserved. 17 * Copyright(c) 2017 T-Platforms. All Rights Reserved. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * * Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * * Redistributions in binary form must reproduce the above copy 26 * notice, this list of conditions and the following disclaimer in 27 * the documentation and/or other materials provided with the 28 * distribution. 29 * * Neither the name of Intel Corporation nor the names of its 30 * contributors may be used to endorse or promote products derived 31 * from this software without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 36 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 37 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 38 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 39 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 40 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 41 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 43 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 * 45 * PCIe NTB Perf Linux driver 46 */ 47 48 /* 49 * How to use this tool, by example. 50 * 51 * Assuming $DBG_DIR is something like: 52 * '/sys/kernel/debug/ntb_perf/0000:00:03.0' 53 * Suppose aside from local device there is at least one remote device 54 * connected to NTB with index 0. 55 *----------------------------------------------------------------------------- 56 * Eg: install driver with specified chunk/total orders and dma-enabled flag 57 * 58 * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma 59 *----------------------------------------------------------------------------- 60 * Eg: check NTB ports (index) and MW mapping information 61 * 62 * root@self# cat $DBG_DIR/info 63 *----------------------------------------------------------------------------- 64 * Eg: start performance test with peer (index 0) and get the test metrics 65 * 66 * root@self# echo 0 > $DBG_DIR/run 67 * root@self# cat $DBG_DIR/run 68 */ 69 70 #include <linux/init.h> 71 #include <linux/kernel.h> 72 #include <linux/module.h> 73 #include <linux/sched.h> 74 #include <linux/wait.h> 75 #include <linux/dma-mapping.h> 76 #include <linux/dmaengine.h> 77 #include <linux/pci.h> 78 #include <linux/ktime.h> 79 #include <linux/slab.h> 80 #include <linux/delay.h> 81 #include <linux/sizes.h> 82 #include <linux/workqueue.h> 83 #include <linux/debugfs.h> 84 #include <linux/random.h> 85 #include <linux/ntb.h> 86 87 #define DRIVER_NAME "ntb_perf" 88 #define DRIVER_VERSION "2.0" 89 90 MODULE_LICENSE("Dual BSD/GPL"); 91 MODULE_VERSION(DRIVER_VERSION); 92 MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>"); 93 MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); 94 95 #define MAX_THREADS_CNT 32 96 #define DEF_THREADS_CNT 1 97 #define MAX_CHUNK_SIZE SZ_1M 98 #define MAX_CHUNK_ORDER 20 /* no larger than 1M */ 99 100 #define DMA_TRIES 100 101 #define DMA_MDELAY 10 102 103 #define MSG_TRIES 1000 104 #define MSG_UDELAY_LOW 1000000 105 #define MSG_UDELAY_HIGH 2000000 106 107 #define PERF_BUF_LEN 1024 108 109 static unsigned long max_mw_size; 110 module_param(max_mw_size, ulong, 0644); 111 MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size"); 112 113 static unsigned char chunk_order = 19; /* 512K */ 114 module_param(chunk_order, byte, 0644); 115 MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer"); 116 117 static unsigned char total_order = 30; /* 1G */ 118 module_param(total_order, byte, 0644); 119 MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer"); 120 121 static bool use_dma; /* default to 0 */ 122 module_param(use_dma, bool, 0644); 123 MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance"); 124 125 /*============================================================================== 126 * Perf driver data definition 127 *============================================================================== 128 */ 129 130 enum perf_cmd { 131 PERF_CMD_INVAL = -1,/* invalid spad command */ 132 PERF_CMD_SSIZE = 0, /* send out buffer size */ 133 PERF_CMD_RSIZE = 1, /* recv in buffer size */ 134 PERF_CMD_SXLAT = 2, /* send in buffer xlat */ 135 PERF_CMD_RXLAT = 3, /* recv out buffer xlat */ 136 PERF_CMD_CLEAR = 4, /* clear allocated memory */ 137 PERF_STS_DONE = 5, /* init is done */ 138 PERF_STS_LNKUP = 6, /* link up state flag */ 139 }; 140 141 struct perf_ctx; 142 143 struct perf_peer { 144 struct perf_ctx *perf; 145 int pidx; 146 int gidx; 147 148 /* Outbound MW params */ 149 u64 outbuf_xlat; 150 resource_size_t outbuf_size; 151 void __iomem *outbuf; 152 phys_addr_t out_phys_addr; 153 dma_addr_t dma_dst_addr; 154 /* Inbound MW params */ 155 dma_addr_t inbuf_xlat; 156 resource_size_t inbuf_size; 157 void *inbuf; 158 159 /* NTB connection setup service */ 160 struct work_struct service; 161 unsigned long sts; 162 163 struct completion init_comp; 164 }; 165 #define to_peer_service(__work) \ 166 container_of(__work, struct perf_peer, service) 167 168 struct perf_thread { 169 struct perf_ctx *perf; 170 int tidx; 171 172 /* DMA-based test sync parameters */ 173 atomic_t dma_sync; 174 wait_queue_head_t dma_wait; 175 struct dma_chan *dma_chan; 176 177 /* Data source and measured statistics */ 178 void *src; 179 u64 copied; 180 ktime_t duration; 181 int status; 182 struct work_struct work; 183 }; 184 #define to_thread_work(__work) \ 185 container_of(__work, struct perf_thread, work) 186 187 struct perf_ctx { 188 struct ntb_dev *ntb; 189 190 /* Global device index and peers descriptors */ 191 int gidx; 192 int pcnt; 193 struct perf_peer *peers; 194 195 /* Performance measuring work-threads interface */ 196 unsigned long busy_flag; 197 wait_queue_head_t twait; 198 atomic_t tsync; 199 u8 tcnt; 200 struct perf_peer *test_peer; 201 struct perf_thread threads[MAX_THREADS_CNT]; 202 203 /* Scratchpad/Message IO operations */ 204 int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data); 205 int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd, 206 u64 *data); 207 208 struct dentry *dbgfs_dir; 209 }; 210 211 /* 212 * Scratchpads-base commands interface 213 */ 214 #define PERF_SPAD_CNT(_pcnt) \ 215 (3*((_pcnt) + 1)) 216 #define PERF_SPAD_CMD(_gidx) \ 217 (3*(_gidx)) 218 #define PERF_SPAD_LDATA(_gidx) \ 219 (3*(_gidx) + 1) 220 #define PERF_SPAD_HDATA(_gidx) \ 221 (3*(_gidx) + 2) 222 #define PERF_SPAD_NOTIFY(_gidx) \ 223 (BIT_ULL(_gidx)) 224 225 /* 226 * Messages-base commands interface 227 */ 228 #define PERF_MSG_CNT 3 229 #define PERF_MSG_CMD 0 230 #define PERF_MSG_LDATA 1 231 #define PERF_MSG_HDATA 2 232 233 /*============================================================================== 234 * Static data declarations 235 *============================================================================== 236 */ 237 238 static struct dentry *perf_dbgfs_topdir; 239 240 static struct workqueue_struct *perf_wq __read_mostly; 241 242 /*============================================================================== 243 * NTB cross-link commands execution service 244 *============================================================================== 245 */ 246 247 static void perf_terminate_test(struct perf_ctx *perf); 248 249 static inline bool perf_link_is_up(struct perf_peer *peer) 250 { 251 u64 link; 252 253 link = ntb_link_is_up(peer->perf->ntb, NULL, NULL); 254 return !!(link & BIT_ULL_MASK(peer->pidx)); 255 } 256 257 static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 258 u64 data) 259 { 260 struct perf_ctx *perf = peer->perf; 261 int try; 262 u32 sts; 263 264 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 265 266 /* 267 * Perform predefined number of attempts before give up. 268 * We are sending the data to the port specific scratchpad, so 269 * to prevent a multi-port access race-condition. Additionally 270 * there is no need in local locking since only thread-safe 271 * service work is using this method. 272 */ 273 for (try = 0; try < MSG_TRIES; try++) { 274 if (!perf_link_is_up(peer)) 275 return -ENOLINK; 276 277 sts = ntb_peer_spad_read(perf->ntb, peer->pidx, 278 PERF_SPAD_CMD(perf->gidx)); 279 if (sts != PERF_CMD_INVAL) { 280 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 281 continue; 282 } 283 284 ntb_peer_spad_write(perf->ntb, peer->pidx, 285 PERF_SPAD_LDATA(perf->gidx), 286 lower_32_bits(data)); 287 ntb_peer_spad_write(perf->ntb, peer->pidx, 288 PERF_SPAD_HDATA(perf->gidx), 289 upper_32_bits(data)); 290 ntb_peer_spad_write(perf->ntb, peer->pidx, 291 PERF_SPAD_CMD(perf->gidx), 292 cmd); 293 ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); 294 295 dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n", 296 PERF_SPAD_NOTIFY(peer->gidx)); 297 298 break; 299 } 300 301 return try < MSG_TRIES ? 0 : -EAGAIN; 302 } 303 304 static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, 305 enum perf_cmd *cmd, u64 *data) 306 { 307 struct perf_peer *peer; 308 u32 val; 309 310 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 311 312 /* 313 * We start scanning all over, since cleared DB may have been set 314 * by any peer. Yes, it makes peer with smaller index being 315 * serviced with greater priority, but it's convenient for spad 316 * and message code unification and simplicity. 317 */ 318 for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) { 319 peer = &perf->peers[*pidx]; 320 321 if (!perf_link_is_up(peer)) 322 continue; 323 324 val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx)); 325 if (val == PERF_CMD_INVAL) 326 continue; 327 328 *cmd = val; 329 330 val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx)); 331 *data = val; 332 333 val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx)); 334 *data |= (u64)val << 32; 335 336 /* Next command can be retrieved from now */ 337 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 338 PERF_CMD_INVAL); 339 340 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 341 342 return 0; 343 } 344 345 return -ENODATA; 346 } 347 348 static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, 349 u64 data) 350 { 351 struct perf_ctx *perf = peer->perf; 352 int try, ret; 353 u64 outbits; 354 355 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); 356 357 /* 358 * Perform predefined number of attempts before give up. Message 359 * registers are free of race-condition problem when accessed 360 * from different ports, so we don't need splitting registers 361 * by global device index. We also won't have local locking, 362 * since the method is used from service work only. 363 */ 364 outbits = ntb_msg_outbits(perf->ntb); 365 for (try = 0; try < MSG_TRIES; try++) { 366 if (!perf_link_is_up(peer)) 367 return -ENOLINK; 368 369 ret = ntb_msg_clear_sts(perf->ntb, outbits); 370 if (ret) 371 return ret; 372 373 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA, 374 lower_32_bits(data)); 375 376 if (ntb_msg_read_sts(perf->ntb) & outbits) { 377 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); 378 continue; 379 } 380 381 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA, 382 upper_32_bits(data)); 383 384 /* This call shall trigger peer message event */ 385 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd); 386 387 break; 388 } 389 390 return try < MSG_TRIES ? 0 : -EAGAIN; 391 } 392 393 static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, 394 enum perf_cmd *cmd, u64 *data) 395 { 396 u64 inbits; 397 u32 val; 398 399 inbits = ntb_msg_inbits(perf->ntb); 400 401 if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3) 402 return -ENODATA; 403 404 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD); 405 *cmd = val; 406 407 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA); 408 *data = val; 409 410 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA); 411 *data |= (u64)val << 32; 412 413 /* Next command can be retrieved from now */ 414 ntb_msg_clear_sts(perf->ntb, inbits); 415 416 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); 417 418 return 0; 419 } 420 421 static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data) 422 { 423 struct perf_ctx *perf = peer->perf; 424 425 if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT) 426 return perf->cmd_send(peer, cmd, data); 427 428 dev_err(&perf->ntb->dev, "Send invalid command\n"); 429 return -EINVAL; 430 } 431 432 static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) 433 { 434 switch (cmd) { 435 case PERF_CMD_SSIZE: 436 case PERF_CMD_RSIZE: 437 case PERF_CMD_SXLAT: 438 case PERF_CMD_RXLAT: 439 case PERF_CMD_CLEAR: 440 break; 441 default: 442 dev_err(&peer->perf->ntb->dev, "Exec invalid command\n"); 443 return -EINVAL; 444 } 445 446 /* No need of memory barrier, since bit ops have invernal lock */ 447 set_bit(cmd, &peer->sts); 448 449 dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd); 450 451 (void)queue_work(system_highpri_wq, &peer->service); 452 453 return 0; 454 } 455 456 static int perf_cmd_recv(struct perf_ctx *perf) 457 { 458 struct perf_peer *peer; 459 int ret, pidx, cmd; 460 u64 data; 461 462 while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) { 463 peer = &perf->peers[pidx]; 464 465 switch (cmd) { 466 case PERF_CMD_SSIZE: 467 peer->inbuf_size = data; 468 return perf_cmd_exec(peer, PERF_CMD_RSIZE); 469 case PERF_CMD_SXLAT: 470 peer->outbuf_xlat = data; 471 return perf_cmd_exec(peer, PERF_CMD_RXLAT); 472 default: 473 dev_err(&perf->ntb->dev, "Recv invalid command\n"); 474 return -EINVAL; 475 } 476 } 477 478 /* Return 0 if no data left to process, otherwise an error */ 479 return ret == -ENODATA ? 0 : ret; 480 } 481 482 static void perf_link_event(void *ctx) 483 { 484 struct perf_ctx *perf = ctx; 485 struct perf_peer *peer; 486 bool lnk_up; 487 int pidx; 488 489 for (pidx = 0; pidx < perf->pcnt; pidx++) { 490 peer = &perf->peers[pidx]; 491 492 lnk_up = perf_link_is_up(peer); 493 494 if (lnk_up && 495 !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) { 496 perf_cmd_exec(peer, PERF_CMD_SSIZE); 497 } else if (!lnk_up && 498 test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) { 499 perf_cmd_exec(peer, PERF_CMD_CLEAR); 500 } 501 } 502 } 503 504 static void perf_db_event(void *ctx, int vec) 505 { 506 struct perf_ctx *perf = ctx; 507 508 dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec, 509 ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb)); 510 511 /* Just receive all available commands */ 512 (void)perf_cmd_recv(perf); 513 } 514 515 static void perf_msg_event(void *ctx) 516 { 517 struct perf_ctx *perf = ctx; 518 519 dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n", 520 ntb_msg_read_sts(perf->ntb)); 521 522 /* Messages are only sent one-by-one */ 523 (void)perf_cmd_recv(perf); 524 } 525 526 static const struct ntb_ctx_ops perf_ops = { 527 .link_event = perf_link_event, 528 .db_event = perf_db_event, 529 .msg_event = perf_msg_event 530 }; 531 532 static void perf_free_outbuf(struct perf_peer *peer) 533 { 534 (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 535 } 536 537 static int perf_setup_outbuf(struct perf_peer *peer) 538 { 539 struct perf_ctx *perf = peer->perf; 540 int ret; 541 542 /* Outbuf size can be unaligned due to custom max_mw_size */ 543 ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 544 peer->outbuf_xlat, peer->outbuf_size); 545 if (ret) { 546 dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n"); 547 return ret; 548 } 549 550 /* Initialization is finally done */ 551 set_bit(PERF_STS_DONE, &peer->sts); 552 complete_all(&peer->init_comp); 553 554 return 0; 555 } 556 557 static void perf_free_inbuf(struct perf_peer *peer) 558 { 559 if (!peer->inbuf) 560 return; 561 562 (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); 563 dma_free_coherent(&peer->perf->ntb->pdev->dev, peer->inbuf_size, 564 peer->inbuf, peer->inbuf_xlat); 565 peer->inbuf = NULL; 566 } 567 568 static int perf_setup_inbuf(struct perf_peer *peer) 569 { 570 resource_size_t xlat_align, size_align, size_max; 571 struct perf_ctx *perf = peer->perf; 572 int ret; 573 574 /* Get inbound MW parameters */ 575 ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx, 576 &xlat_align, &size_align, &size_max); 577 if (ret) { 578 dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n"); 579 return ret; 580 } 581 582 if (peer->inbuf_size > size_max) { 583 dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n", 584 &peer->inbuf_size, &size_max); 585 return -EINVAL; 586 } 587 588 peer->inbuf_size = round_up(peer->inbuf_size, size_align); 589 590 perf_free_inbuf(peer); 591 592 peer->inbuf = dma_alloc_coherent(&perf->ntb->pdev->dev, 593 peer->inbuf_size, &peer->inbuf_xlat, 594 GFP_KERNEL); 595 if (!peer->inbuf) { 596 dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", 597 &peer->inbuf_size); 598 return -ENOMEM; 599 } 600 if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { 601 dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); 602 goto err_free_inbuf; 603 } 604 605 ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, 606 peer->inbuf_xlat, peer->inbuf_size); 607 if (ret) { 608 dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n"); 609 goto err_free_inbuf; 610 } 611 612 /* 613 * We submit inbuf xlat transmission cmd for execution here to follow 614 * the code architecture, even though this method is called from service 615 * work itself so the command will be executed right after it returns. 616 */ 617 (void)perf_cmd_exec(peer, PERF_CMD_SXLAT); 618 619 return 0; 620 621 err_free_inbuf: 622 perf_free_inbuf(peer); 623 624 return ret; 625 } 626 627 static void perf_service_work(struct work_struct *work) 628 { 629 struct perf_peer *peer = to_peer_service(work); 630 631 if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts)) 632 perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size); 633 634 if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts)) 635 perf_setup_inbuf(peer); 636 637 if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts)) 638 perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat); 639 640 if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts)) 641 perf_setup_outbuf(peer); 642 643 if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { 644 init_completion(&peer->init_comp); 645 clear_bit(PERF_STS_DONE, &peer->sts); 646 if (test_bit(0, &peer->perf->busy_flag) && 647 peer == peer->perf->test_peer) { 648 dev_warn(&peer->perf->ntb->dev, 649 "Freeing while test on-fly\n"); 650 perf_terminate_test(peer->perf); 651 } 652 perf_free_outbuf(peer); 653 perf_free_inbuf(peer); 654 } 655 } 656 657 static int perf_init_service(struct perf_ctx *perf) 658 { 659 u64 mask; 660 661 if (ntb_peer_mw_count(perf->ntb) < perf->pcnt) { 662 dev_err(&perf->ntb->dev, "Not enough memory windows\n"); 663 return -EINVAL; 664 } 665 666 if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) { 667 perf->cmd_send = perf_msg_cmd_send; 668 perf->cmd_recv = perf_msg_cmd_recv; 669 670 dev_dbg(&perf->ntb->dev, "Message service initialized\n"); 671 672 return 0; 673 } 674 675 dev_dbg(&perf->ntb->dev, "Message service unsupported\n"); 676 677 mask = GENMASK_ULL(perf->pcnt, 0); 678 if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) && 679 (ntb_db_valid_mask(perf->ntb) & mask) == mask) { 680 perf->cmd_send = perf_spad_cmd_send; 681 perf->cmd_recv = perf_spad_cmd_recv; 682 683 dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n"); 684 685 return 0; 686 } 687 688 dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n"); 689 690 dev_err(&perf->ntb->dev, "Command services unsupported\n"); 691 692 return -EINVAL; 693 } 694 695 static int perf_enable_service(struct perf_ctx *perf) 696 { 697 u64 mask, incmd_bit; 698 int ret, sidx, scnt; 699 700 mask = ntb_db_valid_mask(perf->ntb); 701 (void)ntb_db_set_mask(perf->ntb, mask); 702 703 ret = ntb_set_ctx(perf->ntb, perf, &perf_ops); 704 if (ret) 705 return ret; 706 707 if (perf->cmd_send == perf_msg_cmd_send) { 708 u64 inbits, outbits; 709 710 inbits = ntb_msg_inbits(perf->ntb); 711 outbits = ntb_msg_outbits(perf->ntb); 712 (void)ntb_msg_set_mask(perf->ntb, inbits | outbits); 713 714 incmd_bit = BIT_ULL(__ffs64(inbits)); 715 ret = ntb_msg_clear_mask(perf->ntb, incmd_bit); 716 717 dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit); 718 } else { 719 scnt = ntb_spad_count(perf->ntb); 720 for (sidx = 0; sidx < scnt; sidx++) 721 ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL); 722 incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); 723 ret = ntb_db_clear_mask(perf->ntb, incmd_bit); 724 725 dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit); 726 } 727 if (ret) { 728 ntb_clear_ctx(perf->ntb); 729 return ret; 730 } 731 732 ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); 733 /* Might be not necessary */ 734 ntb_link_event(perf->ntb); 735 736 return 0; 737 } 738 739 static void perf_disable_service(struct perf_ctx *perf) 740 { 741 int pidx; 742 743 if (perf->cmd_send == perf_msg_cmd_send) { 744 u64 inbits; 745 746 inbits = ntb_msg_inbits(perf->ntb); 747 (void)ntb_msg_set_mask(perf->ntb, inbits); 748 } else { 749 (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 750 } 751 752 ntb_clear_ctx(perf->ntb); 753 754 for (pidx = 0; pidx < perf->pcnt; pidx++) 755 perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR); 756 757 for (pidx = 0; pidx < perf->pcnt; pidx++) 758 flush_work(&perf->peers[pidx].service); 759 760 for (pidx = 0; pidx < perf->pcnt; pidx++) { 761 struct perf_peer *peer = &perf->peers[pidx]; 762 763 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0); 764 } 765 766 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); 767 768 ntb_link_disable(perf->ntb); 769 } 770 771 /*============================================================================== 772 * Performance measuring work-thread 773 *============================================================================== 774 */ 775 776 static void perf_dma_copy_callback(void *data) 777 { 778 struct perf_thread *pthr = data; 779 780 atomic_dec(&pthr->dma_sync); 781 wake_up(&pthr->dma_wait); 782 } 783 784 static int perf_copy_chunk(struct perf_thread *pthr, 785 void __iomem *dst, void *src, size_t len) 786 { 787 struct dma_async_tx_descriptor *tx; 788 struct dmaengine_unmap_data *unmap; 789 struct device *dma_dev; 790 int try = 0, ret = 0; 791 struct perf_peer *peer = pthr->perf->test_peer; 792 void __iomem *vbase; 793 void __iomem *dst_vaddr; 794 dma_addr_t dst_dma_addr; 795 796 if (!use_dma) { 797 memcpy_toio(dst, src, len); 798 goto ret_check_tsync; 799 } 800 801 dma_dev = pthr->dma_chan->device->dev; 802 803 if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src), 804 offset_in_page(dst), len)) 805 return -EIO; 806 807 vbase = peer->outbuf; 808 dst_vaddr = dst; 809 dst_dma_addr = peer->dma_dst_addr + (dst_vaddr - vbase); 810 811 unmap = dmaengine_get_unmap_data(dma_dev, 1, GFP_NOWAIT); 812 if (!unmap) 813 return -ENOMEM; 814 815 unmap->len = len; 816 unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src), 817 offset_in_page(src), len, DMA_TO_DEVICE); 818 if (dma_mapping_error(dma_dev, unmap->addr[0])) { 819 ret = -EIO; 820 goto err_free_resource; 821 } 822 unmap->to_cnt = 1; 823 824 do { 825 tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, dst_dma_addr, 826 unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 827 if (!tx) 828 msleep(DMA_MDELAY); 829 } while (!tx && (try++ < DMA_TRIES)); 830 831 if (!tx) { 832 ret = -EIO; 833 goto err_free_resource; 834 } 835 836 tx->callback = perf_dma_copy_callback; 837 tx->callback_param = pthr; 838 dma_set_unmap(tx, unmap); 839 840 ret = dma_submit_error(dmaengine_submit(tx)); 841 if (ret) { 842 dmaengine_unmap_put(unmap); 843 goto err_free_resource; 844 } 845 846 dmaengine_unmap_put(unmap); 847 848 atomic_inc(&pthr->dma_sync); 849 dma_async_issue_pending(pthr->dma_chan); 850 851 ret_check_tsync: 852 return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR; 853 854 err_free_resource: 855 dmaengine_unmap_put(unmap); 856 857 return ret; 858 } 859 860 static bool perf_dma_filter(struct dma_chan *chan, void *data) 861 { 862 struct perf_ctx *perf = data; 863 int node; 864 865 node = dev_to_node(&perf->ntb->dev); 866 867 return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev); 868 } 869 870 static int perf_init_test(struct perf_thread *pthr) 871 { 872 struct perf_ctx *perf = pthr->perf; 873 dma_cap_mask_t dma_mask; 874 struct perf_peer *peer = pthr->perf->test_peer; 875 876 pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL, 877 dev_to_node(&perf->ntb->dev)); 878 if (!pthr->src) 879 return -ENOMEM; 880 881 get_random_bytes(pthr->src, perf->test_peer->outbuf_size); 882 883 if (!use_dma) 884 return 0; 885 886 dma_cap_zero(dma_mask); 887 dma_cap_set(DMA_MEMCPY, dma_mask); 888 pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf); 889 if (!pthr->dma_chan) { 890 dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n", 891 pthr->tidx); 892 goto err_free; 893 } 894 peer->dma_dst_addr = 895 dma_map_resource(pthr->dma_chan->device->dev, 896 peer->out_phys_addr, peer->outbuf_size, 897 DMA_FROM_DEVICE, 0); 898 if (dma_mapping_error(pthr->dma_chan->device->dev, 899 peer->dma_dst_addr)) { 900 dev_err(pthr->dma_chan->device->dev, "%d: Failed to map DMA addr\n", 901 pthr->tidx); 902 peer->dma_dst_addr = 0; 903 dma_release_channel(pthr->dma_chan); 904 goto err_free; 905 } 906 dev_dbg(pthr->dma_chan->device->dev, "%d: Map MMIO %pa to DMA addr %pad\n", 907 pthr->tidx, 908 &peer->out_phys_addr, 909 &peer->dma_dst_addr); 910 911 atomic_set(&pthr->dma_sync, 0); 912 return 0; 913 914 err_free: 915 atomic_dec(&perf->tsync); 916 wake_up(&perf->twait); 917 kfree(pthr->src); 918 return -ENODEV; 919 } 920 921 static int perf_run_test(struct perf_thread *pthr) 922 { 923 struct perf_peer *peer = pthr->perf->test_peer; 924 struct perf_ctx *perf = pthr->perf; 925 void __iomem *flt_dst, *bnd_dst; 926 u64 total_size, chunk_size; 927 void *flt_src; 928 int ret = 0; 929 930 total_size = 1ULL << total_order; 931 chunk_size = 1ULL << chunk_order; 932 chunk_size = min_t(u64, peer->outbuf_size, chunk_size); 933 934 flt_src = pthr->src; 935 bnd_dst = peer->outbuf + peer->outbuf_size; 936 flt_dst = peer->outbuf; 937 938 pthr->duration = ktime_get(); 939 940 /* Copied field is cleared on test launch stage */ 941 while (pthr->copied < total_size) { 942 ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size); 943 if (ret) { 944 dev_err(&perf->ntb->dev, "%d: Got error %d on test\n", 945 pthr->tidx, ret); 946 return ret; 947 } 948 949 pthr->copied += chunk_size; 950 951 flt_dst += chunk_size; 952 flt_src += chunk_size; 953 if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) { 954 flt_dst = peer->outbuf; 955 flt_src = pthr->src; 956 } 957 958 /* Give up CPU to give a chance for other threads to use it */ 959 schedule(); 960 } 961 962 return 0; 963 } 964 965 static int perf_sync_test(struct perf_thread *pthr) 966 { 967 struct perf_ctx *perf = pthr->perf; 968 969 if (!use_dma) 970 goto no_dma_ret; 971 972 wait_event(pthr->dma_wait, 973 (atomic_read(&pthr->dma_sync) == 0 || 974 atomic_read(&perf->tsync) < 0)); 975 976 if (atomic_read(&perf->tsync) < 0) 977 return -EINTR; 978 979 no_dma_ret: 980 pthr->duration = ktime_sub(ktime_get(), pthr->duration); 981 982 dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n", 983 pthr->tidx, pthr->copied); 984 985 dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n", 986 pthr->tidx, ktime_to_us(pthr->duration)); 987 988 dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx, 989 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 990 991 return 0; 992 } 993 994 static void perf_clear_test(struct perf_thread *pthr) 995 { 996 struct perf_ctx *perf = pthr->perf; 997 998 if (!use_dma) 999 goto no_dma_notify; 1000 1001 /* 1002 * If test finished without errors, termination isn't needed. 1003 * We call it anyway just to be sure of the transfers completion. 1004 */ 1005 (void)dmaengine_terminate_sync(pthr->dma_chan); 1006 if (pthr->perf->test_peer->dma_dst_addr) 1007 dma_unmap_resource(pthr->dma_chan->device->dev, 1008 pthr->perf->test_peer->dma_dst_addr, 1009 pthr->perf->test_peer->outbuf_size, 1010 DMA_FROM_DEVICE, 0); 1011 1012 dma_release_channel(pthr->dma_chan); 1013 1014 no_dma_notify: 1015 atomic_dec(&perf->tsync); 1016 wake_up(&perf->twait); 1017 kfree(pthr->src); 1018 } 1019 1020 static void perf_thread_work(struct work_struct *work) 1021 { 1022 struct perf_thread *pthr = to_thread_work(work); 1023 int ret; 1024 1025 /* 1026 * Perform stages in compliance with use_dma flag value. 1027 * Test status is changed only if error happened, otherwise 1028 * status -ENODATA is kept while test is on-fly. Results 1029 * synchronization is performed only if test fininshed 1030 * without an error or interruption. 1031 */ 1032 ret = perf_init_test(pthr); 1033 if (ret) { 1034 pthr->status = ret; 1035 return; 1036 } 1037 1038 ret = perf_run_test(pthr); 1039 if (ret) { 1040 pthr->status = ret; 1041 goto err_clear_test; 1042 } 1043 1044 pthr->status = perf_sync_test(pthr); 1045 1046 err_clear_test: 1047 perf_clear_test(pthr); 1048 } 1049 1050 static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt) 1051 { 1052 if (tcnt == 0 || tcnt > MAX_THREADS_CNT) 1053 return -EINVAL; 1054 1055 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1056 return -EBUSY; 1057 1058 perf->tcnt = tcnt; 1059 1060 clear_bit_unlock(0, &perf->busy_flag); 1061 1062 return 0; 1063 } 1064 1065 static void perf_terminate_test(struct perf_ctx *perf) 1066 { 1067 int tidx; 1068 1069 atomic_set(&perf->tsync, -1); 1070 wake_up(&perf->twait); 1071 1072 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1073 wake_up(&perf->threads[tidx].dma_wait); 1074 cancel_work_sync(&perf->threads[tidx].work); 1075 } 1076 } 1077 1078 static int perf_submit_test(struct perf_peer *peer) 1079 { 1080 struct perf_ctx *perf = peer->perf; 1081 struct perf_thread *pthr; 1082 int tidx, ret; 1083 1084 ret = wait_for_completion_interruptible(&peer->init_comp); 1085 if (ret < 0) 1086 return ret; 1087 1088 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1089 return -EBUSY; 1090 1091 perf->test_peer = peer; 1092 atomic_set(&perf->tsync, perf->tcnt); 1093 1094 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1095 pthr = &perf->threads[tidx]; 1096 1097 pthr->status = -ENODATA; 1098 pthr->copied = 0; 1099 pthr->duration = ktime_set(0, 0); 1100 if (tidx < perf->tcnt) 1101 (void)queue_work(perf_wq, &pthr->work); 1102 } 1103 1104 ret = wait_event_interruptible(perf->twait, 1105 atomic_read(&perf->tsync) <= 0); 1106 if (ret == -ERESTARTSYS) { 1107 perf_terminate_test(perf); 1108 ret = -EINTR; 1109 } 1110 1111 clear_bit_unlock(0, &perf->busy_flag); 1112 1113 return ret; 1114 } 1115 1116 static int perf_read_stats(struct perf_ctx *perf, char *buf, 1117 size_t size, ssize_t *pos) 1118 { 1119 struct perf_thread *pthr; 1120 int tidx; 1121 1122 if (test_and_set_bit_lock(0, &perf->busy_flag)) 1123 return -EBUSY; 1124 1125 (*pos) += scnprintf(buf + *pos, size - *pos, 1126 " Peer %d test statistics:\n", perf->test_peer->pidx); 1127 1128 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1129 pthr = &perf->threads[tidx]; 1130 1131 if (pthr->status == -ENODATA) 1132 continue; 1133 1134 if (pthr->status) { 1135 (*pos) += scnprintf(buf + *pos, size - *pos, 1136 "%d: error status %d\n", tidx, pthr->status); 1137 continue; 1138 } 1139 1140 (*pos) += scnprintf(buf + *pos, size - *pos, 1141 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", 1142 tidx, pthr->copied, ktime_to_us(pthr->duration), 1143 div64_u64(pthr->copied, ktime_to_us(pthr->duration))); 1144 } 1145 1146 clear_bit_unlock(0, &perf->busy_flag); 1147 1148 return 0; 1149 } 1150 1151 static void perf_init_threads(struct perf_ctx *perf) 1152 { 1153 struct perf_thread *pthr; 1154 int tidx; 1155 1156 perf->tcnt = DEF_THREADS_CNT; 1157 perf->test_peer = &perf->peers[0]; 1158 init_waitqueue_head(&perf->twait); 1159 1160 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { 1161 pthr = &perf->threads[tidx]; 1162 1163 pthr->perf = perf; 1164 pthr->tidx = tidx; 1165 pthr->status = -ENODATA; 1166 init_waitqueue_head(&pthr->dma_wait); 1167 INIT_WORK(&pthr->work, perf_thread_work); 1168 } 1169 } 1170 1171 static void perf_clear_threads(struct perf_ctx *perf) 1172 { 1173 perf_terminate_test(perf); 1174 } 1175 1176 /*============================================================================== 1177 * DebugFS nodes 1178 *============================================================================== 1179 */ 1180 1181 static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, 1182 size_t size, loff_t *offp) 1183 { 1184 struct perf_ctx *perf = filep->private_data; 1185 struct perf_peer *peer; 1186 size_t buf_size; 1187 ssize_t pos = 0; 1188 int ret, pidx; 1189 char *buf; 1190 1191 buf_size = min_t(size_t, size, 0x1000U); 1192 1193 buf = kmalloc(buf_size, GFP_KERNEL); 1194 if (!buf) 1195 return -ENOMEM; 1196 1197 pos += scnprintf(buf + pos, buf_size - pos, 1198 " Performance measuring tool info:\n\n"); 1199 1200 pos += scnprintf(buf + pos, buf_size - pos, 1201 "Local port %d, Global index %d\n", ntb_port_number(perf->ntb), 1202 perf->gidx); 1203 pos += scnprintf(buf + pos, buf_size - pos, "Test status: "); 1204 if (test_bit(0, &perf->busy_flag)) { 1205 pos += scnprintf(buf + pos, buf_size - pos, 1206 "on-fly with port %d (%d)\n", 1207 ntb_peer_port_number(perf->ntb, perf->test_peer->pidx), 1208 perf->test_peer->pidx); 1209 } else { 1210 pos += scnprintf(buf + pos, buf_size - pos, "idle\n"); 1211 } 1212 1213 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1214 peer = &perf->peers[pidx]; 1215 1216 pos += scnprintf(buf + pos, buf_size - pos, 1217 "Port %d (%d), Global index %d:\n", 1218 ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx, 1219 peer->gidx); 1220 1221 pos += scnprintf(buf + pos, buf_size - pos, 1222 "\tLink status: %s\n", 1223 test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down"); 1224 1225 pos += scnprintf(buf + pos, buf_size - pos, 1226 "\tOut buffer addr 0x%pK\n", peer->outbuf); 1227 1228 pos += scnprintf(buf + pos, buf_size - pos, 1229 "\tOut buff phys addr %pa[p]\n", &peer->out_phys_addr); 1230 1231 pos += scnprintf(buf + pos, buf_size - pos, 1232 "\tOut buffer size %pa\n", &peer->outbuf_size); 1233 1234 pos += scnprintf(buf + pos, buf_size - pos, 1235 "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat); 1236 1237 if (!peer->inbuf) { 1238 pos += scnprintf(buf + pos, buf_size - pos, 1239 "\tIn buffer addr: unallocated\n"); 1240 continue; 1241 } 1242 1243 pos += scnprintf(buf + pos, buf_size - pos, 1244 "\tIn buffer addr 0x%pK\n", peer->inbuf); 1245 1246 pos += scnprintf(buf + pos, buf_size - pos, 1247 "\tIn buffer size %pa\n", &peer->inbuf_size); 1248 1249 pos += scnprintf(buf + pos, buf_size - pos, 1250 "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat); 1251 } 1252 1253 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1254 kfree(buf); 1255 1256 return ret; 1257 } 1258 1259 static const struct file_operations perf_dbgfs_info = { 1260 .open = simple_open, 1261 .read = perf_dbgfs_read_info 1262 }; 1263 1264 static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf, 1265 size_t size, loff_t *offp) 1266 { 1267 struct perf_ctx *perf = filep->private_data; 1268 ssize_t ret, pos = 0; 1269 char *buf; 1270 1271 buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL); 1272 if (!buf) 1273 return -ENOMEM; 1274 1275 ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos); 1276 if (ret) 1277 goto err_free; 1278 1279 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); 1280 err_free: 1281 kfree(buf); 1282 1283 return ret; 1284 } 1285 1286 static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf, 1287 size_t size, loff_t *offp) 1288 { 1289 struct perf_ctx *perf = filep->private_data; 1290 struct perf_peer *peer; 1291 int pidx, ret; 1292 1293 ret = kstrtoint_from_user(ubuf, size, 0, &pidx); 1294 if (ret) 1295 return ret; 1296 1297 if (pidx < 0 || pidx >= perf->pcnt) 1298 return -EINVAL; 1299 1300 peer = &perf->peers[pidx]; 1301 1302 ret = perf_submit_test(peer); 1303 if (ret) 1304 return ret; 1305 1306 return size; 1307 } 1308 1309 static const struct file_operations perf_dbgfs_run = { 1310 .open = simple_open, 1311 .read = perf_dbgfs_read_run, 1312 .write = perf_dbgfs_write_run 1313 }; 1314 1315 static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf, 1316 size_t size, loff_t *offp) 1317 { 1318 struct perf_ctx *perf = filep->private_data; 1319 char buf[8]; 1320 ssize_t pos; 1321 1322 pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt); 1323 1324 return simple_read_from_buffer(ubuf, size, offp, buf, pos); 1325 } 1326 1327 static ssize_t perf_dbgfs_write_tcnt(struct file *filep, 1328 const char __user *ubuf, 1329 size_t size, loff_t *offp) 1330 { 1331 struct perf_ctx *perf = filep->private_data; 1332 int ret; 1333 u8 val; 1334 1335 ret = kstrtou8_from_user(ubuf, size, 0, &val); 1336 if (ret) 1337 return ret; 1338 1339 ret = perf_set_tcnt(perf, val); 1340 if (ret) 1341 return ret; 1342 1343 return size; 1344 } 1345 1346 static const struct file_operations perf_dbgfs_tcnt = { 1347 .open = simple_open, 1348 .read = perf_dbgfs_read_tcnt, 1349 .write = perf_dbgfs_write_tcnt 1350 }; 1351 1352 static void perf_setup_dbgfs(struct perf_ctx *perf) 1353 { 1354 struct pci_dev *pdev = perf->ntb->pdev; 1355 1356 perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir); 1357 if (!perf->dbgfs_dir) { 1358 dev_warn(&perf->ntb->dev, "DebugFS unsupported\n"); 1359 return; 1360 } 1361 1362 debugfs_create_file("info", 0600, perf->dbgfs_dir, perf, 1363 &perf_dbgfs_info); 1364 1365 debugfs_create_file("run", 0600, perf->dbgfs_dir, perf, 1366 &perf_dbgfs_run); 1367 1368 debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf, 1369 &perf_dbgfs_tcnt); 1370 1371 /* They are made read-only for test exec safety and integrity */ 1372 debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order); 1373 1374 debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order); 1375 1376 debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma); 1377 } 1378 1379 static void perf_clear_dbgfs(struct perf_ctx *perf) 1380 { 1381 debugfs_remove_recursive(perf->dbgfs_dir); 1382 } 1383 1384 /*============================================================================== 1385 * Basic driver initialization 1386 *============================================================================== 1387 */ 1388 1389 static struct perf_ctx *perf_create_data(struct ntb_dev *ntb) 1390 { 1391 struct perf_ctx *perf; 1392 1393 perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL); 1394 if (!perf) 1395 return ERR_PTR(-ENOMEM); 1396 1397 perf->pcnt = ntb_peer_port_count(ntb); 1398 perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers), 1399 GFP_KERNEL); 1400 if (!perf->peers) 1401 return ERR_PTR(-ENOMEM); 1402 1403 perf->ntb = ntb; 1404 1405 return perf; 1406 } 1407 1408 static int perf_setup_peer_mw(struct perf_peer *peer) 1409 { 1410 struct perf_ctx *perf = peer->perf; 1411 phys_addr_t phys_addr; 1412 int ret; 1413 1414 /* Get outbound MW parameters and map it */ 1415 ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr, 1416 &peer->outbuf_size); 1417 if (ret) 1418 return ret; 1419 1420 peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr, 1421 peer->outbuf_size); 1422 if (!peer->outbuf) 1423 return -ENOMEM; 1424 1425 peer->out_phys_addr = phys_addr; 1426 1427 if (max_mw_size && peer->outbuf_size > max_mw_size) { 1428 peer->outbuf_size = max_mw_size; 1429 dev_warn(&peer->perf->ntb->dev, 1430 "Peer %d outbuf reduced to %pa\n", peer->pidx, 1431 &peer->outbuf_size); 1432 } 1433 1434 return 0; 1435 } 1436 1437 static int perf_init_peers(struct perf_ctx *perf) 1438 { 1439 struct perf_peer *peer; 1440 int pidx, lport, ret; 1441 1442 lport = ntb_port_number(perf->ntb); 1443 perf->gidx = -1; 1444 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1445 peer = &perf->peers[pidx]; 1446 1447 peer->perf = perf; 1448 peer->pidx = pidx; 1449 if (lport < ntb_peer_port_number(perf->ntb, pidx)) { 1450 if (perf->gidx == -1) 1451 perf->gidx = pidx; 1452 peer->gidx = pidx + 1; 1453 } else { 1454 peer->gidx = pidx; 1455 } 1456 INIT_WORK(&peer->service, perf_service_work); 1457 init_completion(&peer->init_comp); 1458 } 1459 if (perf->gidx == -1) 1460 perf->gidx = pidx; 1461 1462 /* 1463 * Hardware with only two ports may not have unique port 1464 * numbers. In this case, the gidxs should all be zero. 1465 */ 1466 if (perf->pcnt == 1 && ntb_port_number(perf->ntb) == 0 && 1467 ntb_peer_port_number(perf->ntb, 0) == 0) { 1468 perf->gidx = 0; 1469 perf->peers[0].gidx = 0; 1470 } 1471 1472 for (pidx = 0; pidx < perf->pcnt; pidx++) { 1473 ret = perf_setup_peer_mw(&perf->peers[pidx]); 1474 if (ret) 1475 return ret; 1476 } 1477 1478 dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx); 1479 1480 return 0; 1481 } 1482 1483 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) 1484 { 1485 struct perf_ctx *perf; 1486 int ret; 1487 1488 perf = perf_create_data(ntb); 1489 if (IS_ERR(perf)) 1490 return PTR_ERR(perf); 1491 1492 ret = perf_init_peers(perf); 1493 if (ret) 1494 return ret; 1495 1496 perf_init_threads(perf); 1497 1498 ret = perf_init_service(perf); 1499 if (ret) 1500 return ret; 1501 1502 ret = perf_enable_service(perf); 1503 if (ret) 1504 return ret; 1505 1506 perf_setup_dbgfs(perf); 1507 1508 return 0; 1509 } 1510 1511 static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) 1512 { 1513 struct perf_ctx *perf = ntb->ctx; 1514 1515 perf_clear_dbgfs(perf); 1516 1517 perf_disable_service(perf); 1518 1519 perf_clear_threads(perf); 1520 } 1521 1522 static struct ntb_client perf_client = { 1523 .ops = { 1524 .probe = perf_probe, 1525 .remove = perf_remove 1526 } 1527 }; 1528 1529 static int __init perf_init(void) 1530 { 1531 int ret; 1532 1533 if (chunk_order > MAX_CHUNK_ORDER) { 1534 chunk_order = MAX_CHUNK_ORDER; 1535 pr_info("Chunk order reduced to %hhu\n", chunk_order); 1536 } 1537 1538 if (total_order < chunk_order) { 1539 total_order = chunk_order; 1540 pr_info("Total data order reduced to %hhu\n", total_order); 1541 } 1542 1543 perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0); 1544 if (!perf_wq) 1545 return -ENOMEM; 1546 1547 if (debugfs_initialized()) 1548 perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); 1549 1550 ret = ntb_register_client(&perf_client); 1551 if (ret) { 1552 debugfs_remove_recursive(perf_dbgfs_topdir); 1553 destroy_workqueue(perf_wq); 1554 } 1555 1556 return ret; 1557 } 1558 module_init(perf_init); 1559 1560 static void __exit perf_exit(void) 1561 { 1562 ntb_unregister_client(&perf_client); 1563 debugfs_remove_recursive(perf_dbgfs_topdir); 1564 destroy_workqueue(perf_wq); 1565 } 1566 module_exit(perf_exit); 1567