1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) 12 13 static int __counter_set_mode(struct rdma_counter_mode *curr, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if ((new_mode == RDMA_COUNTER_MODE_AUTO) && 18 ((new_mask & (~ALL_AUTO_MODE_MASKS)) || 19 (curr->mode != RDMA_COUNTER_MODE_NONE))) 20 return -EINVAL; 21 22 curr->mode = new_mode; 23 curr->mask = new_mask; 24 return 0; 25 } 26 27 /** 28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 29 * 30 * When @on is true, the @mask must be set; When @on is false, it goes 31 * into manual mode if there's any counter, so that the user is able to 32 * manually access them. 33 */ 34 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 35 bool on, enum rdma_nl_counter_mask mask) 36 { 37 struct rdma_port_counter *port_counter; 38 int ret; 39 40 port_counter = &dev->port_data[port].port_counter; 41 if (!port_counter->hstats) 42 return -EOPNOTSUPP; 43 44 mutex_lock(&port_counter->lock); 45 if (on) { 46 ret = __counter_set_mode(&port_counter->mode, 47 RDMA_COUNTER_MODE_AUTO, mask); 48 } else { 49 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { 50 ret = -EINVAL; 51 goto out; 52 } 53 54 if (port_counter->num_counters) 55 ret = __counter_set_mode(&port_counter->mode, 56 RDMA_COUNTER_MODE_MANUAL, 0); 57 else 58 ret = __counter_set_mode(&port_counter->mode, 59 RDMA_COUNTER_MODE_NONE, 0); 60 } 61 62 out: 63 mutex_unlock(&port_counter->lock); 64 return ret; 65 } 66 67 static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 68 enum rdma_nl_counter_mode mode) 69 { 70 struct rdma_port_counter *port_counter; 71 struct rdma_counter *counter; 72 int ret; 73 74 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 75 return NULL; 76 77 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 78 if (!counter) 79 return NULL; 80 81 counter->device = dev; 82 counter->port = port; 83 counter->res.type = RDMA_RESTRACK_COUNTER; 84 counter->stats = dev->ops.counter_alloc_stats(counter); 85 if (!counter->stats) 86 goto err_stats; 87 88 port_counter = &dev->port_data[port].port_counter; 89 mutex_lock(&port_counter->lock); 90 if (mode == RDMA_COUNTER_MODE_MANUAL) { 91 ret = __counter_set_mode(&port_counter->mode, 92 RDMA_COUNTER_MODE_MANUAL, 0); 93 if (ret) 94 goto err_mode; 95 } 96 97 port_counter->num_counters++; 98 mutex_unlock(&port_counter->lock); 99 100 counter->mode.mode = mode; 101 kref_init(&counter->kref); 102 mutex_init(&counter->lock); 103 104 return counter; 105 106 err_mode: 107 mutex_unlock(&port_counter->lock); 108 kfree(counter->stats); 109 err_stats: 110 kfree(counter); 111 return NULL; 112 } 113 114 static void rdma_counter_free(struct rdma_counter *counter) 115 { 116 struct rdma_port_counter *port_counter; 117 118 port_counter = &counter->device->port_data[counter->port].port_counter; 119 mutex_lock(&port_counter->lock); 120 port_counter->num_counters--; 121 if (!port_counter->num_counters && 122 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 123 __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 124 0); 125 126 mutex_unlock(&port_counter->lock); 127 128 rdma_restrack_del(&counter->res); 129 kfree(counter->stats); 130 kfree(counter); 131 } 132 133 static void auto_mode_init_counter(struct rdma_counter *counter, 134 const struct ib_qp *qp, 135 enum rdma_nl_counter_mask new_mask) 136 { 137 struct auto_mode_param *param = &counter->mode.param; 138 139 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 140 counter->mode.mask = new_mask; 141 142 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 143 param->qp_type = qp->qp_type; 144 } 145 146 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 147 enum rdma_nl_counter_mask auto_mask) 148 { 149 struct auto_mode_param *param = &counter->mode.param; 150 bool match = true; 151 152 /* 153 * Ensure that counter belongs to the right PID. This operation can 154 * race with user space which kills the process and leaves QP and 155 * counters orphans. 156 * 157 * It is not a big deal because exitted task will leave both QP and 158 * counter in the same bucket of zombie process. Just ensure that 159 * process is still alive before procedding. 160 * 161 */ 162 if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || 163 !task_pid_nr(qp->res.task)) 164 return false; 165 166 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 167 match &= (param->qp_type == qp->qp_type); 168 169 return match; 170 } 171 172 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 173 struct ib_qp *qp) 174 { 175 int ret; 176 177 if (qp->counter) 178 return -EINVAL; 179 180 if (!qp->device->ops.counter_bind_qp) 181 return -EOPNOTSUPP; 182 183 mutex_lock(&counter->lock); 184 ret = qp->device->ops.counter_bind_qp(counter, qp); 185 mutex_unlock(&counter->lock); 186 187 return ret; 188 } 189 190 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 191 { 192 struct rdma_counter *counter = qp->counter; 193 int ret; 194 195 if (!qp->device->ops.counter_unbind_qp) 196 return -EOPNOTSUPP; 197 198 mutex_lock(&counter->lock); 199 ret = qp->device->ops.counter_unbind_qp(qp); 200 mutex_unlock(&counter->lock); 201 202 return ret; 203 } 204 205 static void counter_history_stat_update(struct rdma_counter *counter) 206 { 207 struct ib_device *dev = counter->device; 208 struct rdma_port_counter *port_counter; 209 int i; 210 211 port_counter = &dev->port_data[counter->port].port_counter; 212 if (!port_counter->hstats) 213 return; 214 215 rdma_counter_query_stats(counter); 216 217 for (i = 0; i < counter->stats->num_counters; i++) 218 port_counter->hstats->value[i] += counter->stats->value[i]; 219 } 220 221 /** 222 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 223 * with in auto mode 224 * 225 * Return: The counter (with ref-count increased) if found 226 */ 227 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 228 u8 port) 229 { 230 struct rdma_port_counter *port_counter; 231 struct rdma_counter *counter = NULL; 232 struct ib_device *dev = qp->device; 233 struct rdma_restrack_entry *res; 234 struct rdma_restrack_root *rt; 235 unsigned long id = 0; 236 237 port_counter = &dev->port_data[port].port_counter; 238 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 239 xa_lock(&rt->xa); 240 xa_for_each(&rt->xa, id, res) { 241 counter = container_of(res, struct rdma_counter, res); 242 if ((counter->device != qp->device) || (counter->port != port)) 243 goto next; 244 245 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 246 break; 247 next: 248 counter = NULL; 249 } 250 251 if (counter && !kref_get_unless_zero(&counter->kref)) 252 counter = NULL; 253 254 xa_unlock(&rt->xa); 255 return counter; 256 } 257 258 static void rdma_counter_res_add(struct rdma_counter *counter, 259 struct ib_qp *qp) 260 { 261 if (rdma_is_kernel_res(&qp->res)) { 262 rdma_restrack_set_task(&counter->res, qp->res.kern_name); 263 rdma_restrack_kadd(&counter->res); 264 } else { 265 rdma_restrack_attach_task(&counter->res, qp->res.task); 266 rdma_restrack_uadd(&counter->res); 267 } 268 } 269 270 static void counter_release(struct kref *kref) 271 { 272 struct rdma_counter *counter; 273 274 counter = container_of(kref, struct rdma_counter, kref); 275 counter_history_stat_update(counter); 276 counter->device->ops.counter_dealloc(counter); 277 rdma_counter_free(counter); 278 } 279 280 /** 281 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 282 * the auto-mode rule 283 */ 284 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 285 { 286 struct rdma_port_counter *port_counter; 287 struct ib_device *dev = qp->device; 288 struct rdma_counter *counter; 289 int ret; 290 291 if (!qp->res.valid) 292 return 0; 293 294 if (!rdma_is_port_valid(dev, port)) 295 return -EINVAL; 296 297 port_counter = &dev->port_data[port].port_counter; 298 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 299 return 0; 300 301 counter = rdma_get_counter_auto_mode(qp, port); 302 if (counter) { 303 ret = __rdma_counter_bind_qp(counter, qp); 304 if (ret) { 305 kref_put(&counter->kref, counter_release); 306 return ret; 307 } 308 } else { 309 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 310 if (!counter) 311 return -ENOMEM; 312 313 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 314 315 ret = __rdma_counter_bind_qp(counter, qp); 316 if (ret) { 317 rdma_counter_free(counter); 318 return ret; 319 } 320 321 rdma_counter_res_add(counter, qp); 322 } 323 324 return 0; 325 } 326 327 /** 328 * rdma_counter_unbind_qp - Unbind a qp from a counter 329 * @force: 330 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 331 */ 332 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 333 { 334 struct rdma_counter *counter = qp->counter; 335 int ret; 336 337 if (!counter) 338 return -EINVAL; 339 340 ret = __rdma_counter_unbind_qp(qp); 341 if (ret && !force) 342 return ret; 343 344 kref_put(&counter->kref, counter_release); 345 return 0; 346 } 347 348 int rdma_counter_query_stats(struct rdma_counter *counter) 349 { 350 struct ib_device *dev = counter->device; 351 int ret; 352 353 if (!dev->ops.counter_update_stats) 354 return -EINVAL; 355 356 mutex_lock(&counter->lock); 357 ret = dev->ops.counter_update_stats(counter); 358 mutex_unlock(&counter->lock); 359 360 return ret; 361 } 362 363 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 364 u8 port, u32 index) 365 { 366 struct rdma_restrack_entry *res; 367 struct rdma_restrack_root *rt; 368 struct rdma_counter *counter; 369 unsigned long id = 0; 370 u64 sum = 0; 371 372 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 373 xa_lock(&rt->xa); 374 xa_for_each(&rt->xa, id, res) { 375 if (!rdma_restrack_get(res)) 376 continue; 377 378 xa_unlock(&rt->xa); 379 380 counter = container_of(res, struct rdma_counter, res); 381 if ((counter->device != dev) || (counter->port != port) || 382 rdma_counter_query_stats(counter)) 383 goto next; 384 385 sum += counter->stats->value[index]; 386 387 next: 388 xa_lock(&rt->xa); 389 rdma_restrack_put(res); 390 } 391 392 xa_unlock(&rt->xa); 393 return sum; 394 } 395 396 /** 397 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 398 * specific port, including the running ones and history data 399 */ 400 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) 401 { 402 struct rdma_port_counter *port_counter; 403 u64 sum; 404 405 port_counter = &dev->port_data[port].port_counter; 406 if (!port_counter->hstats) 407 return 0; 408 409 sum = get_running_counters_hwstat_sum(dev, port, index); 410 sum += port_counter->hstats->value[index]; 411 412 return sum; 413 } 414 415 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 416 { 417 struct rdma_restrack_entry *res = NULL; 418 struct ib_qp *qp = NULL; 419 420 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 421 if (IS_ERR(res)) 422 return NULL; 423 424 qp = container_of(res, struct ib_qp, res); 425 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 426 goto err; 427 428 return qp; 429 430 err: 431 rdma_restrack_put(res); 432 return NULL; 433 } 434 435 static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, 436 struct ib_qp *qp) 437 { 438 if ((counter->device != qp->device) || (counter->port != qp->port)) 439 return -EINVAL; 440 441 return __rdma_counter_bind_qp(counter, qp); 442 } 443 444 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 445 u32 counter_id) 446 { 447 struct rdma_restrack_entry *res; 448 struct rdma_counter *counter; 449 450 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 451 if (IS_ERR(res)) 452 return NULL; 453 454 counter = container_of(res, struct rdma_counter, res); 455 kref_get(&counter->kref); 456 rdma_restrack_put(res); 457 458 return counter; 459 } 460 461 /** 462 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 463 */ 464 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, 465 u32 qp_num, u32 counter_id) 466 { 467 struct rdma_port_counter *port_counter; 468 struct rdma_counter *counter; 469 struct ib_qp *qp; 470 int ret; 471 472 port_counter = &dev->port_data[port].port_counter; 473 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 474 return -EINVAL; 475 476 qp = rdma_counter_get_qp(dev, qp_num); 477 if (!qp) 478 return -ENOENT; 479 480 counter = rdma_get_counter_by_id(dev, counter_id); 481 if (!counter) { 482 ret = -ENOENT; 483 goto err; 484 } 485 486 if (counter->res.task != qp->res.task) { 487 ret = -EINVAL; 488 goto err_task; 489 } 490 491 ret = rdma_counter_bind_qp_manual(counter, qp); 492 if (ret) 493 goto err_task; 494 495 rdma_restrack_put(&qp->res); 496 return 0; 497 498 err_task: 499 kref_put(&counter->kref, counter_release); 500 err: 501 rdma_restrack_put(&qp->res); 502 return ret; 503 } 504 505 /** 506 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 507 * The id of new counter is returned in @counter_id 508 */ 509 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, 510 u32 qp_num, u32 *counter_id) 511 { 512 struct rdma_port_counter *port_counter; 513 struct rdma_counter *counter; 514 struct ib_qp *qp; 515 int ret; 516 517 if (!rdma_is_port_valid(dev, port)) 518 return -EINVAL; 519 520 port_counter = &dev->port_data[port].port_counter; 521 if (!port_counter->hstats) 522 return -EOPNOTSUPP; 523 524 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 525 return -EINVAL; 526 527 qp = rdma_counter_get_qp(dev, qp_num); 528 if (!qp) 529 return -ENOENT; 530 531 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 532 ret = -EINVAL; 533 goto err; 534 } 535 536 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); 537 if (!counter) { 538 ret = -ENOMEM; 539 goto err; 540 } 541 542 ret = rdma_counter_bind_qp_manual(counter, qp); 543 if (ret) 544 goto err_bind; 545 546 if (counter_id) 547 *counter_id = counter->id; 548 549 rdma_counter_res_add(counter, qp); 550 551 rdma_restrack_put(&qp->res); 552 return ret; 553 554 err_bind: 555 rdma_counter_free(counter); 556 err: 557 rdma_restrack_put(&qp->res); 558 return ret; 559 } 560 561 /** 562 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 563 */ 564 int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, 565 u32 qp_num, u32 counter_id) 566 { 567 struct rdma_port_counter *port_counter; 568 struct ib_qp *qp; 569 int ret; 570 571 if (!rdma_is_port_valid(dev, port)) 572 return -EINVAL; 573 574 qp = rdma_counter_get_qp(dev, qp_num); 575 if (!qp) 576 return -ENOENT; 577 578 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 579 ret = -EINVAL; 580 goto out; 581 } 582 583 port_counter = &dev->port_data[port].port_counter; 584 if (!qp->counter || qp->counter->id != counter_id || 585 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 586 ret = -EINVAL; 587 goto out; 588 } 589 590 ret = rdma_counter_unbind_qp(qp, false); 591 592 out: 593 rdma_restrack_put(&qp->res); 594 return ret; 595 } 596 597 int rdma_counter_get_mode(struct ib_device *dev, u8 port, 598 enum rdma_nl_counter_mode *mode, 599 enum rdma_nl_counter_mask *mask) 600 { 601 struct rdma_port_counter *port_counter; 602 603 port_counter = &dev->port_data[port].port_counter; 604 *mode = port_counter->mode.mode; 605 *mask = port_counter->mode.mask; 606 607 return 0; 608 } 609 610 void rdma_counter_init(struct ib_device *dev) 611 { 612 struct rdma_port_counter *port_counter; 613 u32 port, i; 614 615 if (!dev->port_data) 616 return; 617 618 rdma_for_each_port(dev, port) { 619 port_counter = &dev->port_data[port].port_counter; 620 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 621 mutex_init(&port_counter->lock); 622 623 if (!dev->ops.alloc_hw_stats) 624 continue; 625 626 port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); 627 if (!port_counter->hstats) 628 goto fail; 629 } 630 631 return; 632 633 fail: 634 for (i = port; i >= rdma_start_port(dev); i--) { 635 port_counter = &dev->port_data[port].port_counter; 636 kfree(port_counter->hstats); 637 port_counter->hstats = NULL; 638 mutex_destroy(&port_counter->lock); 639 } 640 } 641 642 void rdma_counter_release(struct ib_device *dev) 643 { 644 struct rdma_port_counter *port_counter; 645 u32 port; 646 647 rdma_for_each_port(dev, port) { 648 port_counter = &dev->port_data[port].port_counter; 649 kfree(port_counter->hstats); 650 mutex_destroy(&port_counter->lock); 651 } 652 } 653