1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) 12 13 static int __counter_set_mode(struct rdma_counter_mode *curr, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if ((new_mode == RDMA_COUNTER_MODE_AUTO) && 18 ((new_mask & (~ALL_AUTO_MODE_MASKS)) || 19 (curr->mode != RDMA_COUNTER_MODE_NONE))) 20 return -EINVAL; 21 22 curr->mode = new_mode; 23 curr->mask = new_mask; 24 return 0; 25 } 26 27 /** 28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 29 * 30 * When @on is true, the @mask must be set; When @on is false, it goes 31 * into manual mode if there's any counter, so that the user is able to 32 * manually access them. 33 */ 34 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 35 bool on, enum rdma_nl_counter_mask mask) 36 { 37 struct rdma_port_counter *port_counter; 38 int ret; 39 40 port_counter = &dev->port_data[port].port_counter; 41 if (!port_counter->hstats) 42 return -EOPNOTSUPP; 43 44 mutex_lock(&port_counter->lock); 45 if (on) { 46 ret = __counter_set_mode(&port_counter->mode, 47 RDMA_COUNTER_MODE_AUTO, mask); 48 } else { 49 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { 50 ret = -EINVAL; 51 goto out; 52 } 53 54 if (port_counter->num_counters) 55 ret = __counter_set_mode(&port_counter->mode, 56 RDMA_COUNTER_MODE_MANUAL, 0); 57 else 58 ret = __counter_set_mode(&port_counter->mode, 59 RDMA_COUNTER_MODE_NONE, 0); 60 } 61 62 out: 63 mutex_unlock(&port_counter->lock); 64 return ret; 65 } 66 67 static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 68 enum rdma_nl_counter_mode mode) 69 { 70 struct rdma_port_counter *port_counter; 71 struct rdma_counter *counter; 72 int ret; 73 74 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 75 return NULL; 76 77 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 78 if (!counter) 79 return NULL; 80 81 counter->device = dev; 82 counter->port = port; 83 counter->res.type = RDMA_RESTRACK_COUNTER; 84 counter->stats = dev->ops.counter_alloc_stats(counter); 85 if (!counter->stats) 86 goto err_stats; 87 88 port_counter = &dev->port_data[port].port_counter; 89 mutex_lock(&port_counter->lock); 90 if (mode == RDMA_COUNTER_MODE_MANUAL) { 91 ret = __counter_set_mode(&port_counter->mode, 92 RDMA_COUNTER_MODE_MANUAL, 0); 93 if (ret) 94 goto err_mode; 95 } 96 97 port_counter->num_counters++; 98 mutex_unlock(&port_counter->lock); 99 100 counter->mode.mode = mode; 101 kref_init(&counter->kref); 102 mutex_init(&counter->lock); 103 104 return counter; 105 106 err_mode: 107 mutex_unlock(&port_counter->lock); 108 kfree(counter->stats); 109 err_stats: 110 kfree(counter); 111 return NULL; 112 } 113 114 static void rdma_counter_free(struct rdma_counter *counter) 115 { 116 struct rdma_port_counter *port_counter; 117 118 port_counter = &counter->device->port_data[counter->port].port_counter; 119 mutex_lock(&port_counter->lock); 120 port_counter->num_counters--; 121 if (!port_counter->num_counters && 122 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 123 __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 124 0); 125 126 mutex_unlock(&port_counter->lock); 127 128 rdma_restrack_del(&counter->res); 129 kfree(counter->stats); 130 kfree(counter); 131 } 132 133 static void auto_mode_init_counter(struct rdma_counter *counter, 134 const struct ib_qp *qp, 135 enum rdma_nl_counter_mask new_mask) 136 { 137 struct auto_mode_param *param = &counter->mode.param; 138 139 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 140 counter->mode.mask = new_mask; 141 142 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 143 param->qp_type = qp->qp_type; 144 } 145 146 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 147 enum rdma_nl_counter_mask auto_mask) 148 { 149 struct auto_mode_param *param = &counter->mode.param; 150 bool match = true; 151 152 if (!rdma_is_visible_in_pid_ns(&qp->res)) 153 return false; 154 155 /* Ensure that counter belongs to the right PID */ 156 if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task)) 157 return false; 158 159 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 160 match &= (param->qp_type == qp->qp_type); 161 162 return match; 163 } 164 165 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 166 struct ib_qp *qp) 167 { 168 int ret; 169 170 if (qp->counter) 171 return -EINVAL; 172 173 if (!qp->device->ops.counter_bind_qp) 174 return -EOPNOTSUPP; 175 176 mutex_lock(&counter->lock); 177 ret = qp->device->ops.counter_bind_qp(counter, qp); 178 mutex_unlock(&counter->lock); 179 180 return ret; 181 } 182 183 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 184 { 185 struct rdma_counter *counter = qp->counter; 186 int ret; 187 188 if (!qp->device->ops.counter_unbind_qp) 189 return -EOPNOTSUPP; 190 191 mutex_lock(&counter->lock); 192 ret = qp->device->ops.counter_unbind_qp(qp); 193 mutex_unlock(&counter->lock); 194 195 return ret; 196 } 197 198 static void counter_history_stat_update(const struct rdma_counter *counter) 199 { 200 struct ib_device *dev = counter->device; 201 struct rdma_port_counter *port_counter; 202 int i; 203 204 port_counter = &dev->port_data[counter->port].port_counter; 205 if (!port_counter->hstats) 206 return; 207 208 for (i = 0; i < counter->stats->num_counters; i++) 209 port_counter->hstats->value[i] += counter->stats->value[i]; 210 } 211 212 /** 213 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 214 * with in auto mode 215 * 216 * Return: The counter (with ref-count increased) if found 217 */ 218 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 219 u8 port) 220 { 221 struct rdma_port_counter *port_counter; 222 struct rdma_counter *counter = NULL; 223 struct ib_device *dev = qp->device; 224 struct rdma_restrack_entry *res; 225 struct rdma_restrack_root *rt; 226 unsigned long id = 0; 227 228 port_counter = &dev->port_data[port].port_counter; 229 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 230 xa_lock(&rt->xa); 231 xa_for_each(&rt->xa, id, res) { 232 if (!rdma_is_visible_in_pid_ns(res)) 233 continue; 234 235 counter = container_of(res, struct rdma_counter, res); 236 if ((counter->device != qp->device) || (counter->port != port)) 237 goto next; 238 239 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 240 break; 241 next: 242 counter = NULL; 243 } 244 245 if (counter && !kref_get_unless_zero(&counter->kref)) 246 counter = NULL; 247 248 xa_unlock(&rt->xa); 249 return counter; 250 } 251 252 static void rdma_counter_res_add(struct rdma_counter *counter, 253 struct ib_qp *qp) 254 { 255 if (rdma_is_kernel_res(&qp->res)) { 256 rdma_restrack_set_task(&counter->res, qp->res.kern_name); 257 rdma_restrack_kadd(&counter->res); 258 } else { 259 rdma_restrack_attach_task(&counter->res, qp->res.task); 260 rdma_restrack_uadd(&counter->res); 261 } 262 } 263 264 static void counter_release(struct kref *kref) 265 { 266 struct rdma_counter *counter; 267 268 counter = container_of(kref, struct rdma_counter, kref); 269 counter_history_stat_update(counter); 270 counter->device->ops.counter_dealloc(counter); 271 rdma_counter_free(counter); 272 } 273 274 /** 275 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 276 * the auto-mode rule 277 */ 278 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 279 { 280 struct rdma_port_counter *port_counter; 281 struct ib_device *dev = qp->device; 282 struct rdma_counter *counter; 283 int ret; 284 285 if (!rdma_is_port_valid(dev, port)) 286 return -EINVAL; 287 288 port_counter = &dev->port_data[port].port_counter; 289 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 290 return 0; 291 292 counter = rdma_get_counter_auto_mode(qp, port); 293 if (counter) { 294 ret = __rdma_counter_bind_qp(counter, qp); 295 if (ret) { 296 kref_put(&counter->kref, counter_release); 297 return ret; 298 } 299 } else { 300 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 301 if (!counter) 302 return -ENOMEM; 303 304 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 305 306 ret = __rdma_counter_bind_qp(counter, qp); 307 if (ret) { 308 rdma_counter_free(counter); 309 return ret; 310 } 311 312 rdma_counter_res_add(counter, qp); 313 } 314 315 return 0; 316 } 317 318 /** 319 * rdma_counter_unbind_qp - Unbind a qp from a counter 320 * @force: 321 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 322 */ 323 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 324 { 325 struct rdma_counter *counter = qp->counter; 326 int ret; 327 328 if (!counter) 329 return -EINVAL; 330 331 ret = __rdma_counter_unbind_qp(qp); 332 if (ret && !force) 333 return ret; 334 335 kref_put(&counter->kref, counter_release); 336 return 0; 337 } 338 339 int rdma_counter_query_stats(struct rdma_counter *counter) 340 { 341 struct ib_device *dev = counter->device; 342 int ret; 343 344 if (!dev->ops.counter_update_stats) 345 return -EINVAL; 346 347 mutex_lock(&counter->lock); 348 ret = dev->ops.counter_update_stats(counter); 349 mutex_unlock(&counter->lock); 350 351 return ret; 352 } 353 354 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 355 u8 port, u32 index) 356 { 357 struct rdma_restrack_entry *res; 358 struct rdma_restrack_root *rt; 359 struct rdma_counter *counter; 360 unsigned long id = 0; 361 u64 sum = 0; 362 363 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 364 xa_lock(&rt->xa); 365 xa_for_each(&rt->xa, id, res) { 366 if (!rdma_restrack_get(res)) 367 continue; 368 369 xa_unlock(&rt->xa); 370 371 counter = container_of(res, struct rdma_counter, res); 372 if ((counter->device != dev) || (counter->port != port) || 373 rdma_counter_query_stats(counter)) 374 goto next; 375 376 sum += counter->stats->value[index]; 377 378 next: 379 xa_lock(&rt->xa); 380 rdma_restrack_put(res); 381 } 382 383 xa_unlock(&rt->xa); 384 return sum; 385 } 386 387 /** 388 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 389 * specific port, including the running ones and history data 390 */ 391 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) 392 { 393 struct rdma_port_counter *port_counter; 394 u64 sum; 395 396 port_counter = &dev->port_data[port].port_counter; 397 if (!port_counter->hstats) 398 return 0; 399 400 sum = get_running_counters_hwstat_sum(dev, port, index); 401 sum += port_counter->hstats->value[index]; 402 403 return sum; 404 } 405 406 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 407 { 408 struct rdma_restrack_entry *res = NULL; 409 struct ib_qp *qp = NULL; 410 411 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 412 if (IS_ERR(res)) 413 return NULL; 414 415 if (!rdma_is_visible_in_pid_ns(res)) 416 goto err; 417 418 qp = container_of(res, struct ib_qp, res); 419 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 420 goto err; 421 422 return qp; 423 424 err: 425 rdma_restrack_put(res); 426 return NULL; 427 } 428 429 static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, 430 struct ib_qp *qp) 431 { 432 if ((counter->device != qp->device) || (counter->port != qp->port)) 433 return -EINVAL; 434 435 return __rdma_counter_bind_qp(counter, qp); 436 } 437 438 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 439 u32 counter_id) 440 { 441 struct rdma_restrack_entry *res; 442 struct rdma_counter *counter; 443 444 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 445 if (IS_ERR(res)) 446 return NULL; 447 448 if (!rdma_is_visible_in_pid_ns(res)) { 449 rdma_restrack_put(res); 450 return NULL; 451 } 452 453 counter = container_of(res, struct rdma_counter, res); 454 kref_get(&counter->kref); 455 rdma_restrack_put(res); 456 457 return counter; 458 } 459 460 /** 461 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 462 */ 463 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, 464 u32 qp_num, u32 counter_id) 465 { 466 struct rdma_counter *counter; 467 struct ib_qp *qp; 468 int ret; 469 470 qp = rdma_counter_get_qp(dev, qp_num); 471 if (!qp) 472 return -ENOENT; 473 474 counter = rdma_get_counter_by_id(dev, counter_id); 475 if (!counter) { 476 ret = -ENOENT; 477 goto err; 478 } 479 480 if (counter->res.task != qp->res.task) { 481 ret = -EINVAL; 482 goto err_task; 483 } 484 485 ret = rdma_counter_bind_qp_manual(counter, qp); 486 if (ret) 487 goto err_task; 488 489 rdma_restrack_put(&qp->res); 490 return 0; 491 492 err_task: 493 kref_put(&counter->kref, counter_release); 494 err: 495 rdma_restrack_put(&qp->res); 496 return ret; 497 } 498 499 /** 500 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 501 * The id of new counter is returned in @counter_id 502 */ 503 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, 504 u32 qp_num, u32 *counter_id) 505 { 506 struct rdma_counter *counter; 507 struct ib_qp *qp; 508 int ret; 509 510 if (!rdma_is_port_valid(dev, port)) 511 return -EINVAL; 512 513 if (!dev->port_data[port].port_counter.hstats) 514 return -EOPNOTSUPP; 515 516 qp = rdma_counter_get_qp(dev, qp_num); 517 if (!qp) 518 return -ENOENT; 519 520 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 521 ret = -EINVAL; 522 goto err; 523 } 524 525 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); 526 if (!counter) { 527 ret = -ENOMEM; 528 goto err; 529 } 530 531 ret = rdma_counter_bind_qp_manual(counter, qp); 532 if (ret) 533 goto err_bind; 534 535 if (counter_id) 536 *counter_id = counter->id; 537 538 rdma_counter_res_add(counter, qp); 539 540 rdma_restrack_put(&qp->res); 541 return ret; 542 543 err_bind: 544 rdma_counter_free(counter); 545 err: 546 rdma_restrack_put(&qp->res); 547 return ret; 548 } 549 550 /** 551 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 552 */ 553 int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, 554 u32 qp_num, u32 counter_id) 555 { 556 struct rdma_port_counter *port_counter; 557 struct ib_qp *qp; 558 int ret; 559 560 if (!rdma_is_port_valid(dev, port)) 561 return -EINVAL; 562 563 qp = rdma_counter_get_qp(dev, qp_num); 564 if (!qp) 565 return -ENOENT; 566 567 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 568 ret = -EINVAL; 569 goto out; 570 } 571 572 port_counter = &dev->port_data[port].port_counter; 573 if (!qp->counter || qp->counter->id != counter_id || 574 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 575 ret = -EINVAL; 576 goto out; 577 } 578 579 ret = rdma_counter_unbind_qp(qp, false); 580 581 out: 582 rdma_restrack_put(&qp->res); 583 return ret; 584 } 585 586 int rdma_counter_get_mode(struct ib_device *dev, u8 port, 587 enum rdma_nl_counter_mode *mode, 588 enum rdma_nl_counter_mask *mask) 589 { 590 struct rdma_port_counter *port_counter; 591 592 port_counter = &dev->port_data[port].port_counter; 593 *mode = port_counter->mode.mode; 594 *mask = port_counter->mode.mask; 595 596 return 0; 597 } 598 599 void rdma_counter_init(struct ib_device *dev) 600 { 601 struct rdma_port_counter *port_counter; 602 u32 port, i; 603 604 if (!dev->port_data) 605 return; 606 607 rdma_for_each_port(dev, port) { 608 port_counter = &dev->port_data[port].port_counter; 609 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 610 mutex_init(&port_counter->lock); 611 612 if (!dev->ops.alloc_hw_stats) 613 continue; 614 615 port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); 616 if (!port_counter->hstats) 617 goto fail; 618 } 619 620 return; 621 622 fail: 623 for (i = port; i >= rdma_start_port(dev); i--) { 624 port_counter = &dev->port_data[port].port_counter; 625 kfree(port_counter->hstats); 626 port_counter->hstats = NULL; 627 mutex_destroy(&port_counter->lock); 628 } 629 } 630 631 void rdma_counter_release(struct ib_device *dev) 632 { 633 struct rdma_port_counter *port_counter; 634 u32 port; 635 636 rdma_for_each_port(dev, port) { 637 port_counter = &dev->port_data[port].port_counter; 638 kfree(port_counter->hstats); 639 mutex_destroy(&port_counter->lock); 640 } 641 } 642