1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) 12 13 static int __counter_set_mode(struct rdma_counter_mode *curr, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if ((new_mode == RDMA_COUNTER_MODE_AUTO) && 18 ((new_mask & (~ALL_AUTO_MODE_MASKS)) || 19 (curr->mode != RDMA_COUNTER_MODE_NONE))) 20 return -EINVAL; 21 22 curr->mode = new_mode; 23 curr->mask = new_mask; 24 return 0; 25 } 26 27 /** 28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 29 * 30 * When @on is true, the @mask must be set; When @on is false, it goes 31 * into manual mode if there's any counter, so that the user is able to 32 * manually access them. 33 */ 34 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 35 bool on, enum rdma_nl_counter_mask mask) 36 { 37 struct rdma_port_counter *port_counter; 38 int ret; 39 40 port_counter = &dev->port_data[port].port_counter; 41 if (!port_counter->hstats) 42 return -EOPNOTSUPP; 43 44 mutex_lock(&port_counter->lock); 45 if (on) { 46 ret = __counter_set_mode(&port_counter->mode, 47 RDMA_COUNTER_MODE_AUTO, mask); 48 } else { 49 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { 50 ret = -EINVAL; 51 goto out; 52 } 53 54 if (port_counter->num_counters) 55 ret = __counter_set_mode(&port_counter->mode, 56 RDMA_COUNTER_MODE_MANUAL, 0); 57 else 58 ret = __counter_set_mode(&port_counter->mode, 59 RDMA_COUNTER_MODE_NONE, 0); 60 } 61 62 out: 63 mutex_unlock(&port_counter->lock); 64 return ret; 65 } 66 67 static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 68 enum rdma_nl_counter_mode mode) 69 { 70 struct rdma_port_counter *port_counter; 71 struct rdma_counter *counter; 72 int ret; 73 74 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 75 return NULL; 76 77 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 78 if (!counter) 79 return NULL; 80 81 counter->device = dev; 82 counter->port = port; 83 counter->res.type = RDMA_RESTRACK_COUNTER; 84 counter->stats = dev->ops.counter_alloc_stats(counter); 85 if (!counter->stats) 86 goto err_stats; 87 88 port_counter = &dev->port_data[port].port_counter; 89 mutex_lock(&port_counter->lock); 90 if (mode == RDMA_COUNTER_MODE_MANUAL) { 91 ret = __counter_set_mode(&port_counter->mode, 92 RDMA_COUNTER_MODE_MANUAL, 0); 93 if (ret) 94 goto err_mode; 95 } 96 97 port_counter->num_counters++; 98 mutex_unlock(&port_counter->lock); 99 100 counter->mode.mode = mode; 101 kref_init(&counter->kref); 102 mutex_init(&counter->lock); 103 104 return counter; 105 106 err_mode: 107 mutex_unlock(&port_counter->lock); 108 kfree(counter->stats); 109 err_stats: 110 kfree(counter); 111 return NULL; 112 } 113 114 static void rdma_counter_free(struct rdma_counter *counter) 115 { 116 struct rdma_port_counter *port_counter; 117 118 port_counter = &counter->device->port_data[counter->port].port_counter; 119 mutex_lock(&port_counter->lock); 120 port_counter->num_counters--; 121 if (!port_counter->num_counters && 122 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 123 __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 124 0); 125 126 mutex_unlock(&port_counter->lock); 127 128 rdma_restrack_del(&counter->res); 129 kfree(counter->stats); 130 kfree(counter); 131 } 132 133 static void auto_mode_init_counter(struct rdma_counter *counter, 134 const struct ib_qp *qp, 135 enum rdma_nl_counter_mask new_mask) 136 { 137 struct auto_mode_param *param = &counter->mode.param; 138 139 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 140 counter->mode.mask = new_mask; 141 142 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 143 param->qp_type = qp->qp_type; 144 } 145 146 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 147 enum rdma_nl_counter_mask auto_mask) 148 { 149 struct auto_mode_param *param = &counter->mode.param; 150 bool match = true; 151 152 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 153 match &= (param->qp_type == qp->qp_type); 154 155 if (auto_mask & RDMA_COUNTER_MASK_PID) 156 match &= (task_pid_nr(counter->res.task) == 157 task_pid_nr(qp->res.task)); 158 159 return match; 160 } 161 162 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 163 struct ib_qp *qp) 164 { 165 int ret; 166 167 if (qp->counter) 168 return -EINVAL; 169 170 if (!qp->device->ops.counter_bind_qp) 171 return -EOPNOTSUPP; 172 173 mutex_lock(&counter->lock); 174 ret = qp->device->ops.counter_bind_qp(counter, qp); 175 mutex_unlock(&counter->lock); 176 177 return ret; 178 } 179 180 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 181 { 182 struct rdma_counter *counter = qp->counter; 183 int ret; 184 185 if (!qp->device->ops.counter_unbind_qp) 186 return -EOPNOTSUPP; 187 188 mutex_lock(&counter->lock); 189 ret = qp->device->ops.counter_unbind_qp(qp); 190 mutex_unlock(&counter->lock); 191 192 return ret; 193 } 194 195 static void counter_history_stat_update(struct rdma_counter *counter) 196 { 197 struct ib_device *dev = counter->device; 198 struct rdma_port_counter *port_counter; 199 int i; 200 201 port_counter = &dev->port_data[counter->port].port_counter; 202 if (!port_counter->hstats) 203 return; 204 205 rdma_counter_query_stats(counter); 206 207 for (i = 0; i < counter->stats->num_counters; i++) 208 port_counter->hstats->value[i] += counter->stats->value[i]; 209 } 210 211 /** 212 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 213 * with in auto mode 214 * 215 * Return: The counter (with ref-count increased) if found 216 */ 217 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 218 u8 port) 219 { 220 struct rdma_port_counter *port_counter; 221 struct rdma_counter *counter = NULL; 222 struct ib_device *dev = qp->device; 223 struct rdma_restrack_entry *res; 224 struct rdma_restrack_root *rt; 225 unsigned long id = 0; 226 227 port_counter = &dev->port_data[port].port_counter; 228 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 229 xa_lock(&rt->xa); 230 xa_for_each(&rt->xa, id, res) { 231 counter = container_of(res, struct rdma_counter, res); 232 if ((counter->device != qp->device) || (counter->port != port)) 233 goto next; 234 235 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 236 break; 237 next: 238 counter = NULL; 239 } 240 241 if (counter && !kref_get_unless_zero(&counter->kref)) 242 counter = NULL; 243 244 xa_unlock(&rt->xa); 245 return counter; 246 } 247 248 static void rdma_counter_res_add(struct rdma_counter *counter, 249 struct ib_qp *qp) 250 { 251 if (rdma_is_kernel_res(&qp->res)) { 252 rdma_restrack_set_task(&counter->res, qp->res.kern_name); 253 rdma_restrack_kadd(&counter->res); 254 } else { 255 rdma_restrack_attach_task(&counter->res, qp->res.task); 256 rdma_restrack_uadd(&counter->res); 257 } 258 } 259 260 static void counter_release(struct kref *kref) 261 { 262 struct rdma_counter *counter; 263 264 counter = container_of(kref, struct rdma_counter, kref); 265 counter_history_stat_update(counter); 266 counter->device->ops.counter_dealloc(counter); 267 rdma_counter_free(counter); 268 } 269 270 /** 271 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 272 * the auto-mode rule 273 */ 274 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 275 { 276 struct rdma_port_counter *port_counter; 277 struct ib_device *dev = qp->device; 278 struct rdma_counter *counter; 279 int ret; 280 281 if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) 282 return 0; 283 284 if (!rdma_is_port_valid(dev, port)) 285 return -EINVAL; 286 287 port_counter = &dev->port_data[port].port_counter; 288 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 289 return 0; 290 291 counter = rdma_get_counter_auto_mode(qp, port); 292 if (counter) { 293 ret = __rdma_counter_bind_qp(counter, qp); 294 if (ret) { 295 kref_put(&counter->kref, counter_release); 296 return ret; 297 } 298 } else { 299 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 300 if (!counter) 301 return -ENOMEM; 302 303 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 304 305 ret = __rdma_counter_bind_qp(counter, qp); 306 if (ret) { 307 rdma_counter_free(counter); 308 return ret; 309 } 310 311 rdma_counter_res_add(counter, qp); 312 } 313 314 return 0; 315 } 316 317 /** 318 * rdma_counter_unbind_qp - Unbind a qp from a counter 319 * @force: 320 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 321 */ 322 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 323 { 324 struct rdma_counter *counter = qp->counter; 325 int ret; 326 327 if (!counter) 328 return -EINVAL; 329 330 ret = __rdma_counter_unbind_qp(qp); 331 if (ret && !force) 332 return ret; 333 334 kref_put(&counter->kref, counter_release); 335 return 0; 336 } 337 338 int rdma_counter_query_stats(struct rdma_counter *counter) 339 { 340 struct ib_device *dev = counter->device; 341 int ret; 342 343 if (!dev->ops.counter_update_stats) 344 return -EINVAL; 345 346 mutex_lock(&counter->lock); 347 ret = dev->ops.counter_update_stats(counter); 348 mutex_unlock(&counter->lock); 349 350 return ret; 351 } 352 353 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 354 u8 port, u32 index) 355 { 356 struct rdma_restrack_entry *res; 357 struct rdma_restrack_root *rt; 358 struct rdma_counter *counter; 359 unsigned long id = 0; 360 u64 sum = 0; 361 362 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 363 xa_lock(&rt->xa); 364 xa_for_each(&rt->xa, id, res) { 365 if (!rdma_restrack_get(res)) 366 continue; 367 368 xa_unlock(&rt->xa); 369 370 counter = container_of(res, struct rdma_counter, res); 371 if ((counter->device != dev) || (counter->port != port) || 372 rdma_counter_query_stats(counter)) 373 goto next; 374 375 sum += counter->stats->value[index]; 376 377 next: 378 xa_lock(&rt->xa); 379 rdma_restrack_put(res); 380 } 381 382 xa_unlock(&rt->xa); 383 return sum; 384 } 385 386 /** 387 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 388 * specific port, including the running ones and history data 389 */ 390 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) 391 { 392 struct rdma_port_counter *port_counter; 393 u64 sum; 394 395 port_counter = &dev->port_data[port].port_counter; 396 if (!port_counter->hstats) 397 return 0; 398 399 sum = get_running_counters_hwstat_sum(dev, port, index); 400 sum += port_counter->hstats->value[index]; 401 402 return sum; 403 } 404 405 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 406 { 407 struct rdma_restrack_entry *res = NULL; 408 struct ib_qp *qp = NULL; 409 410 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 411 if (IS_ERR(res)) 412 return NULL; 413 414 qp = container_of(res, struct ib_qp, res); 415 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 416 goto err; 417 418 return qp; 419 420 err: 421 rdma_restrack_put(res); 422 return NULL; 423 } 424 425 static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, 426 struct ib_qp *qp) 427 { 428 if ((counter->device != qp->device) || (counter->port != qp->port)) 429 return -EINVAL; 430 431 return __rdma_counter_bind_qp(counter, qp); 432 } 433 434 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 435 u32 counter_id) 436 { 437 struct rdma_restrack_entry *res; 438 struct rdma_counter *counter; 439 440 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 441 if (IS_ERR(res)) 442 return NULL; 443 444 counter = container_of(res, struct rdma_counter, res); 445 kref_get(&counter->kref); 446 rdma_restrack_put(res); 447 448 return counter; 449 } 450 451 /** 452 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 453 */ 454 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, 455 u32 qp_num, u32 counter_id) 456 { 457 struct rdma_port_counter *port_counter; 458 struct rdma_counter *counter; 459 struct ib_qp *qp; 460 int ret; 461 462 port_counter = &dev->port_data[port].port_counter; 463 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 464 return -EINVAL; 465 466 qp = rdma_counter_get_qp(dev, qp_num); 467 if (!qp) 468 return -ENOENT; 469 470 counter = rdma_get_counter_by_id(dev, counter_id); 471 if (!counter) { 472 ret = -ENOENT; 473 goto err; 474 } 475 476 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { 477 ret = -EINVAL; 478 goto err_task; 479 } 480 481 ret = rdma_counter_bind_qp_manual(counter, qp); 482 if (ret) 483 goto err_task; 484 485 rdma_restrack_put(&qp->res); 486 return 0; 487 488 err_task: 489 kref_put(&counter->kref, counter_release); 490 err: 491 rdma_restrack_put(&qp->res); 492 return ret; 493 } 494 495 /** 496 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 497 * The id of new counter is returned in @counter_id 498 */ 499 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, 500 u32 qp_num, u32 *counter_id) 501 { 502 struct rdma_port_counter *port_counter; 503 struct rdma_counter *counter; 504 struct ib_qp *qp; 505 int ret; 506 507 if (!rdma_is_port_valid(dev, port)) 508 return -EINVAL; 509 510 port_counter = &dev->port_data[port].port_counter; 511 if (!port_counter->hstats) 512 return -EOPNOTSUPP; 513 514 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 515 return -EINVAL; 516 517 qp = rdma_counter_get_qp(dev, qp_num); 518 if (!qp) 519 return -ENOENT; 520 521 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 522 ret = -EINVAL; 523 goto err; 524 } 525 526 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); 527 if (!counter) { 528 ret = -ENOMEM; 529 goto err; 530 } 531 532 ret = rdma_counter_bind_qp_manual(counter, qp); 533 if (ret) 534 goto err_bind; 535 536 if (counter_id) 537 *counter_id = counter->id; 538 539 rdma_counter_res_add(counter, qp); 540 541 rdma_restrack_put(&qp->res); 542 return ret; 543 544 err_bind: 545 rdma_counter_free(counter); 546 err: 547 rdma_restrack_put(&qp->res); 548 return ret; 549 } 550 551 /** 552 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 553 */ 554 int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, 555 u32 qp_num, u32 counter_id) 556 { 557 struct rdma_port_counter *port_counter; 558 struct ib_qp *qp; 559 int ret; 560 561 if (!rdma_is_port_valid(dev, port)) 562 return -EINVAL; 563 564 qp = rdma_counter_get_qp(dev, qp_num); 565 if (!qp) 566 return -ENOENT; 567 568 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 569 ret = -EINVAL; 570 goto out; 571 } 572 573 port_counter = &dev->port_data[port].port_counter; 574 if (!qp->counter || qp->counter->id != counter_id || 575 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 576 ret = -EINVAL; 577 goto out; 578 } 579 580 ret = rdma_counter_unbind_qp(qp, false); 581 582 out: 583 rdma_restrack_put(&qp->res); 584 return ret; 585 } 586 587 int rdma_counter_get_mode(struct ib_device *dev, u8 port, 588 enum rdma_nl_counter_mode *mode, 589 enum rdma_nl_counter_mask *mask) 590 { 591 struct rdma_port_counter *port_counter; 592 593 port_counter = &dev->port_data[port].port_counter; 594 *mode = port_counter->mode.mode; 595 *mask = port_counter->mode.mask; 596 597 return 0; 598 } 599 600 void rdma_counter_init(struct ib_device *dev) 601 { 602 struct rdma_port_counter *port_counter; 603 u32 port, i; 604 605 if (!dev->port_data) 606 return; 607 608 rdma_for_each_port(dev, port) { 609 port_counter = &dev->port_data[port].port_counter; 610 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 611 mutex_init(&port_counter->lock); 612 613 if (!dev->ops.alloc_hw_stats) 614 continue; 615 616 port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); 617 if (!port_counter->hstats) 618 goto fail; 619 } 620 621 return; 622 623 fail: 624 for (i = port; i >= rdma_start_port(dev); i--) { 625 port_counter = &dev->port_data[port].port_counter; 626 kfree(port_counter->hstats); 627 port_counter->hstats = NULL; 628 mutex_destroy(&port_counter->lock); 629 } 630 } 631 632 void rdma_counter_release(struct ib_device *dev) 633 { 634 struct rdma_port_counter *port_counter; 635 u32 port; 636 637 rdma_for_each_port(dev, port) { 638 port_counter = &dev->port_data[port].port_counter; 639 kfree(port_counter->hstats); 640 mutex_destroy(&port_counter->lock); 641 } 642 } 643