1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) 12 13 static int __counter_set_mode(struct rdma_counter_mode *curr, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if ((new_mode == RDMA_COUNTER_MODE_AUTO) && 18 ((new_mask & (~ALL_AUTO_MODE_MASKS)) || 19 (curr->mode != RDMA_COUNTER_MODE_NONE))) 20 return -EINVAL; 21 22 curr->mode = new_mode; 23 curr->mask = new_mask; 24 return 0; 25 } 26 27 /** 28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 29 * 30 * When @on is true, the @mask must be set; When @on is false, it goes 31 * into manual mode if there's any counter, so that the user is able to 32 * manually access them. 33 */ 34 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 35 bool on, enum rdma_nl_counter_mask mask) 36 { 37 struct rdma_port_counter *port_counter; 38 int ret; 39 40 port_counter = &dev->port_data[port].port_counter; 41 if (!port_counter->hstats) 42 return -EOPNOTSUPP; 43 44 mutex_lock(&port_counter->lock); 45 if (on) { 46 ret = __counter_set_mode(&port_counter->mode, 47 RDMA_COUNTER_MODE_AUTO, mask); 48 } else { 49 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { 50 ret = -EINVAL; 51 goto out; 52 } 53 54 if (port_counter->num_counters) 55 ret = __counter_set_mode(&port_counter->mode, 56 RDMA_COUNTER_MODE_MANUAL, 0); 57 else 58 ret = __counter_set_mode(&port_counter->mode, 59 RDMA_COUNTER_MODE_NONE, 0); 60 } 61 62 out: 63 mutex_unlock(&port_counter->lock); 64 return ret; 65 } 66 67 static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 68 enum rdma_nl_counter_mode mode) 69 { 70 struct rdma_port_counter *port_counter; 71 struct rdma_counter *counter; 72 int ret; 73 74 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 75 return NULL; 76 77 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 78 if (!counter) 79 return NULL; 80 81 counter->device = dev; 82 counter->port = port; 83 84 rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); 85 counter->stats = dev->ops.counter_alloc_stats(counter); 86 if (!counter->stats) 87 goto err_stats; 88 89 port_counter = &dev->port_data[port].port_counter; 90 mutex_lock(&port_counter->lock); 91 if (mode == RDMA_COUNTER_MODE_MANUAL) { 92 ret = __counter_set_mode(&port_counter->mode, 93 RDMA_COUNTER_MODE_MANUAL, 0); 94 if (ret) 95 goto err_mode; 96 } 97 98 port_counter->num_counters++; 99 mutex_unlock(&port_counter->lock); 100 101 counter->mode.mode = mode; 102 kref_init(&counter->kref); 103 mutex_init(&counter->lock); 104 105 return counter; 106 107 err_mode: 108 mutex_unlock(&port_counter->lock); 109 kfree(counter->stats); 110 err_stats: 111 rdma_restrack_put(&counter->res); 112 kfree(counter); 113 return NULL; 114 } 115 116 static void rdma_counter_free(struct rdma_counter *counter) 117 { 118 struct rdma_port_counter *port_counter; 119 120 port_counter = &counter->device->port_data[counter->port].port_counter; 121 mutex_lock(&port_counter->lock); 122 port_counter->num_counters--; 123 if (!port_counter->num_counters && 124 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 125 __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 126 0); 127 128 mutex_unlock(&port_counter->lock); 129 130 rdma_restrack_del(&counter->res); 131 kfree(counter->stats); 132 kfree(counter); 133 } 134 135 static void auto_mode_init_counter(struct rdma_counter *counter, 136 const struct ib_qp *qp, 137 enum rdma_nl_counter_mask new_mask) 138 { 139 struct auto_mode_param *param = &counter->mode.param; 140 141 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 142 counter->mode.mask = new_mask; 143 144 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 145 param->qp_type = qp->qp_type; 146 } 147 148 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 149 enum rdma_nl_counter_mask auto_mask) 150 { 151 struct auto_mode_param *param = &counter->mode.param; 152 bool match = true; 153 154 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 155 match &= (param->qp_type == qp->qp_type); 156 157 if (auto_mask & RDMA_COUNTER_MASK_PID) 158 match &= (task_pid_nr(counter->res.task) == 159 task_pid_nr(qp->res.task)); 160 161 return match; 162 } 163 164 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 165 struct ib_qp *qp) 166 { 167 int ret; 168 169 if (qp->counter) 170 return -EINVAL; 171 172 if (!qp->device->ops.counter_bind_qp) 173 return -EOPNOTSUPP; 174 175 mutex_lock(&counter->lock); 176 ret = qp->device->ops.counter_bind_qp(counter, qp); 177 mutex_unlock(&counter->lock); 178 179 return ret; 180 } 181 182 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 183 { 184 struct rdma_counter *counter = qp->counter; 185 int ret; 186 187 if (!qp->device->ops.counter_unbind_qp) 188 return -EOPNOTSUPP; 189 190 mutex_lock(&counter->lock); 191 ret = qp->device->ops.counter_unbind_qp(qp); 192 mutex_unlock(&counter->lock); 193 194 return ret; 195 } 196 197 static void counter_history_stat_update(struct rdma_counter *counter) 198 { 199 struct ib_device *dev = counter->device; 200 struct rdma_port_counter *port_counter; 201 int i; 202 203 port_counter = &dev->port_data[counter->port].port_counter; 204 if (!port_counter->hstats) 205 return; 206 207 rdma_counter_query_stats(counter); 208 209 for (i = 0; i < counter->stats->num_counters; i++) 210 port_counter->hstats->value[i] += counter->stats->value[i]; 211 } 212 213 /** 214 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 215 * with in auto mode 216 * 217 * Return: The counter (with ref-count increased) if found 218 */ 219 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 220 u8 port) 221 { 222 struct rdma_port_counter *port_counter; 223 struct rdma_counter *counter = NULL; 224 struct ib_device *dev = qp->device; 225 struct rdma_restrack_entry *res; 226 struct rdma_restrack_root *rt; 227 unsigned long id = 0; 228 229 port_counter = &dev->port_data[port].port_counter; 230 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 231 xa_lock(&rt->xa); 232 xa_for_each(&rt->xa, id, res) { 233 counter = container_of(res, struct rdma_counter, res); 234 if ((counter->device != qp->device) || (counter->port != port)) 235 goto next; 236 237 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 238 break; 239 next: 240 counter = NULL; 241 } 242 243 if (counter && !kref_get_unless_zero(&counter->kref)) 244 counter = NULL; 245 246 xa_unlock(&rt->xa); 247 return counter; 248 } 249 250 static void rdma_counter_res_add(struct rdma_counter *counter, 251 struct ib_qp *qp) 252 { 253 rdma_restrack_parent_name(&counter->res, &qp->res); 254 rdma_restrack_add(&counter->res); 255 } 256 257 static void counter_release(struct kref *kref) 258 { 259 struct rdma_counter *counter; 260 261 counter = container_of(kref, struct rdma_counter, kref); 262 counter_history_stat_update(counter); 263 counter->device->ops.counter_dealloc(counter); 264 rdma_counter_free(counter); 265 } 266 267 /** 268 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 269 * the auto-mode rule 270 */ 271 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 272 { 273 struct rdma_port_counter *port_counter; 274 struct ib_device *dev = qp->device; 275 struct rdma_counter *counter; 276 int ret; 277 278 if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) 279 return 0; 280 281 if (!rdma_is_port_valid(dev, port)) 282 return -EINVAL; 283 284 port_counter = &dev->port_data[port].port_counter; 285 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 286 return 0; 287 288 counter = rdma_get_counter_auto_mode(qp, port); 289 if (counter) { 290 ret = __rdma_counter_bind_qp(counter, qp); 291 if (ret) { 292 kref_put(&counter->kref, counter_release); 293 return ret; 294 } 295 } else { 296 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 297 if (!counter) 298 return -ENOMEM; 299 300 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 301 302 ret = __rdma_counter_bind_qp(counter, qp); 303 if (ret) { 304 rdma_counter_free(counter); 305 return ret; 306 } 307 308 rdma_counter_res_add(counter, qp); 309 } 310 311 return 0; 312 } 313 314 /** 315 * rdma_counter_unbind_qp - Unbind a qp from a counter 316 * @force: 317 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 318 */ 319 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 320 { 321 struct rdma_counter *counter = qp->counter; 322 int ret; 323 324 if (!counter) 325 return -EINVAL; 326 327 ret = __rdma_counter_unbind_qp(qp); 328 if (ret && !force) 329 return ret; 330 331 kref_put(&counter->kref, counter_release); 332 return 0; 333 } 334 335 int rdma_counter_query_stats(struct rdma_counter *counter) 336 { 337 struct ib_device *dev = counter->device; 338 int ret; 339 340 if (!dev->ops.counter_update_stats) 341 return -EINVAL; 342 343 mutex_lock(&counter->lock); 344 ret = dev->ops.counter_update_stats(counter); 345 mutex_unlock(&counter->lock); 346 347 return ret; 348 } 349 350 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 351 u8 port, u32 index) 352 { 353 struct rdma_restrack_entry *res; 354 struct rdma_restrack_root *rt; 355 struct rdma_counter *counter; 356 unsigned long id = 0; 357 u64 sum = 0; 358 359 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 360 xa_lock(&rt->xa); 361 xa_for_each(&rt->xa, id, res) { 362 if (!rdma_restrack_get(res)) 363 continue; 364 365 xa_unlock(&rt->xa); 366 367 counter = container_of(res, struct rdma_counter, res); 368 if ((counter->device != dev) || (counter->port != port) || 369 rdma_counter_query_stats(counter)) 370 goto next; 371 372 sum += counter->stats->value[index]; 373 374 next: 375 xa_lock(&rt->xa); 376 rdma_restrack_put(res); 377 } 378 379 xa_unlock(&rt->xa); 380 return sum; 381 } 382 383 /** 384 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 385 * specific port, including the running ones and history data 386 */ 387 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) 388 { 389 struct rdma_port_counter *port_counter; 390 u64 sum; 391 392 port_counter = &dev->port_data[port].port_counter; 393 if (!port_counter->hstats) 394 return 0; 395 396 sum = get_running_counters_hwstat_sum(dev, port, index); 397 sum += port_counter->hstats->value[index]; 398 399 return sum; 400 } 401 402 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 403 { 404 struct rdma_restrack_entry *res = NULL; 405 struct ib_qp *qp = NULL; 406 407 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 408 if (IS_ERR(res)) 409 return NULL; 410 411 qp = container_of(res, struct ib_qp, res); 412 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 413 goto err; 414 415 return qp; 416 417 err: 418 rdma_restrack_put(res); 419 return NULL; 420 } 421 422 static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, 423 struct ib_qp *qp) 424 { 425 if ((counter->device != qp->device) || (counter->port != qp->port)) 426 return -EINVAL; 427 428 return __rdma_counter_bind_qp(counter, qp); 429 } 430 431 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 432 u32 counter_id) 433 { 434 struct rdma_restrack_entry *res; 435 struct rdma_counter *counter; 436 437 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 438 if (IS_ERR(res)) 439 return NULL; 440 441 counter = container_of(res, struct rdma_counter, res); 442 kref_get(&counter->kref); 443 rdma_restrack_put(res); 444 445 return counter; 446 } 447 448 /** 449 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 450 */ 451 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, 452 u32 qp_num, u32 counter_id) 453 { 454 struct rdma_port_counter *port_counter; 455 struct rdma_counter *counter; 456 struct ib_qp *qp; 457 int ret; 458 459 port_counter = &dev->port_data[port].port_counter; 460 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 461 return -EINVAL; 462 463 qp = rdma_counter_get_qp(dev, qp_num); 464 if (!qp) 465 return -ENOENT; 466 467 counter = rdma_get_counter_by_id(dev, counter_id); 468 if (!counter) { 469 ret = -ENOENT; 470 goto err; 471 } 472 473 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { 474 ret = -EINVAL; 475 goto err_task; 476 } 477 478 ret = rdma_counter_bind_qp_manual(counter, qp); 479 if (ret) 480 goto err_task; 481 482 rdma_restrack_put(&qp->res); 483 return 0; 484 485 err_task: 486 kref_put(&counter->kref, counter_release); 487 err: 488 rdma_restrack_put(&qp->res); 489 return ret; 490 } 491 492 /** 493 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 494 * The id of new counter is returned in @counter_id 495 */ 496 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, 497 u32 qp_num, u32 *counter_id) 498 { 499 struct rdma_port_counter *port_counter; 500 struct rdma_counter *counter; 501 struct ib_qp *qp; 502 int ret; 503 504 if (!rdma_is_port_valid(dev, port)) 505 return -EINVAL; 506 507 port_counter = &dev->port_data[port].port_counter; 508 if (!port_counter->hstats) 509 return -EOPNOTSUPP; 510 511 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 512 return -EINVAL; 513 514 qp = rdma_counter_get_qp(dev, qp_num); 515 if (!qp) 516 return -ENOENT; 517 518 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 519 ret = -EINVAL; 520 goto err; 521 } 522 523 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); 524 if (!counter) { 525 ret = -ENOMEM; 526 goto err; 527 } 528 529 ret = rdma_counter_bind_qp_manual(counter, qp); 530 if (ret) 531 goto err_bind; 532 533 if (counter_id) 534 *counter_id = counter->id; 535 536 rdma_counter_res_add(counter, qp); 537 538 rdma_restrack_put(&qp->res); 539 return ret; 540 541 err_bind: 542 rdma_counter_free(counter); 543 err: 544 rdma_restrack_put(&qp->res); 545 return ret; 546 } 547 548 /** 549 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 550 */ 551 int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, 552 u32 qp_num, u32 counter_id) 553 { 554 struct rdma_port_counter *port_counter; 555 struct ib_qp *qp; 556 int ret; 557 558 if (!rdma_is_port_valid(dev, port)) 559 return -EINVAL; 560 561 qp = rdma_counter_get_qp(dev, qp_num); 562 if (!qp) 563 return -ENOENT; 564 565 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 566 ret = -EINVAL; 567 goto out; 568 } 569 570 port_counter = &dev->port_data[port].port_counter; 571 if (!qp->counter || qp->counter->id != counter_id || 572 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 573 ret = -EINVAL; 574 goto out; 575 } 576 577 ret = rdma_counter_unbind_qp(qp, false); 578 579 out: 580 rdma_restrack_put(&qp->res); 581 return ret; 582 } 583 584 int rdma_counter_get_mode(struct ib_device *dev, u8 port, 585 enum rdma_nl_counter_mode *mode, 586 enum rdma_nl_counter_mask *mask) 587 { 588 struct rdma_port_counter *port_counter; 589 590 port_counter = &dev->port_data[port].port_counter; 591 *mode = port_counter->mode.mode; 592 *mask = port_counter->mode.mask; 593 594 return 0; 595 } 596 597 void rdma_counter_init(struct ib_device *dev) 598 { 599 struct rdma_port_counter *port_counter; 600 u32 port, i; 601 602 if (!dev->port_data) 603 return; 604 605 rdma_for_each_port(dev, port) { 606 port_counter = &dev->port_data[port].port_counter; 607 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 608 mutex_init(&port_counter->lock); 609 610 if (!dev->ops.alloc_hw_stats) 611 continue; 612 613 port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); 614 if (!port_counter->hstats) 615 goto fail; 616 } 617 618 return; 619 620 fail: 621 for (i = port; i >= rdma_start_port(dev); i--) { 622 port_counter = &dev->port_data[port].port_counter; 623 kfree(port_counter->hstats); 624 port_counter->hstats = NULL; 625 mutex_destroy(&port_counter->lock); 626 } 627 } 628 629 void rdma_counter_release(struct ib_device *dev) 630 { 631 struct rdma_port_counter *port_counter; 632 u32 port; 633 634 rdma_for_each_port(dev, port) { 635 port_counter = &dev->port_data[port].port_counter; 636 kfree(port_counter->hstats); 637 mutex_destroy(&port_counter->lock); 638 } 639 } 640