1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) 12 13 static int __counter_set_mode(struct rdma_counter_mode *curr, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if ((new_mode == RDMA_COUNTER_MODE_AUTO) && 18 ((new_mask & (~ALL_AUTO_MODE_MASKS)) || 19 (curr->mode != RDMA_COUNTER_MODE_NONE))) 20 return -EINVAL; 21 22 curr->mode = new_mode; 23 curr->mask = new_mask; 24 return 0; 25 } 26 27 /** 28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 29 * 30 * When @on is true, the @mask must be set; When @on is false, it goes 31 * into manual mode if there's any counter, so that the user is able to 32 * manually access them. 33 */ 34 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 35 bool on, enum rdma_nl_counter_mask mask) 36 { 37 struct rdma_port_counter *port_counter; 38 int ret; 39 40 port_counter = &dev->port_data[port].port_counter; 41 mutex_lock(&port_counter->lock); 42 if (on) { 43 ret = __counter_set_mode(&port_counter->mode, 44 RDMA_COUNTER_MODE_AUTO, mask); 45 } else { 46 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { 47 ret = -EINVAL; 48 goto out; 49 } 50 51 if (port_counter->num_counters) 52 ret = __counter_set_mode(&port_counter->mode, 53 RDMA_COUNTER_MODE_MANUAL, 0); 54 else 55 ret = __counter_set_mode(&port_counter->mode, 56 RDMA_COUNTER_MODE_NONE, 0); 57 } 58 59 out: 60 mutex_unlock(&port_counter->lock); 61 return ret; 62 } 63 64 static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 65 enum rdma_nl_counter_mode mode) 66 { 67 struct rdma_port_counter *port_counter; 68 struct rdma_counter *counter; 69 int ret; 70 71 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 72 return NULL; 73 74 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 75 if (!counter) 76 return NULL; 77 78 counter->device = dev; 79 counter->port = port; 80 counter->res.type = RDMA_RESTRACK_COUNTER; 81 counter->stats = dev->ops.counter_alloc_stats(counter); 82 if (!counter->stats) 83 goto err_stats; 84 85 port_counter = &dev->port_data[port].port_counter; 86 mutex_lock(&port_counter->lock); 87 if (mode == RDMA_COUNTER_MODE_MANUAL) { 88 ret = __counter_set_mode(&port_counter->mode, 89 RDMA_COUNTER_MODE_MANUAL, 0); 90 if (ret) 91 goto err_mode; 92 } 93 94 port_counter->num_counters++; 95 mutex_unlock(&port_counter->lock); 96 97 counter->mode.mode = mode; 98 kref_init(&counter->kref); 99 mutex_init(&counter->lock); 100 101 return counter; 102 103 err_mode: 104 mutex_unlock(&port_counter->lock); 105 kfree(counter->stats); 106 err_stats: 107 kfree(counter); 108 return NULL; 109 } 110 111 static void rdma_counter_free(struct rdma_counter *counter) 112 { 113 struct rdma_port_counter *port_counter; 114 115 port_counter = &counter->device->port_data[counter->port].port_counter; 116 mutex_lock(&port_counter->lock); 117 port_counter->num_counters--; 118 if (!port_counter->num_counters && 119 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 120 __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 121 0); 122 123 mutex_unlock(&port_counter->lock); 124 125 rdma_restrack_del(&counter->res); 126 kfree(counter->stats); 127 kfree(counter); 128 } 129 130 static void auto_mode_init_counter(struct rdma_counter *counter, 131 const struct ib_qp *qp, 132 enum rdma_nl_counter_mask new_mask) 133 { 134 struct auto_mode_param *param = &counter->mode.param; 135 136 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 137 counter->mode.mask = new_mask; 138 139 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 140 param->qp_type = qp->qp_type; 141 } 142 143 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 144 enum rdma_nl_counter_mask auto_mask) 145 { 146 struct auto_mode_param *param = &counter->mode.param; 147 bool match = true; 148 149 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) 150 return false; 151 152 /* Ensure that counter belong to right PID */ 153 if (!rdma_is_kernel_res(&counter->res) && 154 !rdma_is_kernel_res(&qp->res) && 155 (task_pid_vnr(counter->res.task) != current->pid)) 156 return false; 157 158 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 159 match &= (param->qp_type == qp->qp_type); 160 161 return match; 162 } 163 164 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 165 struct ib_qp *qp) 166 { 167 int ret; 168 169 if (qp->counter) 170 return -EINVAL; 171 172 if (!qp->device->ops.counter_bind_qp) 173 return -EOPNOTSUPP; 174 175 mutex_lock(&counter->lock); 176 ret = qp->device->ops.counter_bind_qp(counter, qp); 177 mutex_unlock(&counter->lock); 178 179 return ret; 180 } 181 182 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 183 { 184 struct rdma_counter *counter = qp->counter; 185 int ret; 186 187 if (!qp->device->ops.counter_unbind_qp) 188 return -EOPNOTSUPP; 189 190 mutex_lock(&counter->lock); 191 ret = qp->device->ops.counter_unbind_qp(qp); 192 mutex_unlock(&counter->lock); 193 194 return ret; 195 } 196 197 static void counter_history_stat_update(const struct rdma_counter *counter) 198 { 199 struct ib_device *dev = counter->device; 200 struct rdma_port_counter *port_counter; 201 int i; 202 203 port_counter = &dev->port_data[counter->port].port_counter; 204 if (!port_counter->hstats) 205 return; 206 207 for (i = 0; i < counter->stats->num_counters; i++) 208 port_counter->hstats->value[i] += counter->stats->value[i]; 209 } 210 211 /** 212 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 213 * with in auto mode 214 * 215 * Return: The counter (with ref-count increased) if found 216 */ 217 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 218 u8 port) 219 { 220 struct rdma_port_counter *port_counter; 221 struct rdma_counter *counter = NULL; 222 struct ib_device *dev = qp->device; 223 struct rdma_restrack_entry *res; 224 struct rdma_restrack_root *rt; 225 unsigned long id = 0; 226 227 port_counter = &dev->port_data[port].port_counter; 228 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 229 xa_lock(&rt->xa); 230 xa_for_each(&rt->xa, id, res) { 231 if (!rdma_is_visible_in_pid_ns(res)) 232 continue; 233 234 counter = container_of(res, struct rdma_counter, res); 235 if ((counter->device != qp->device) || (counter->port != port)) 236 goto next; 237 238 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 239 break; 240 next: 241 counter = NULL; 242 } 243 244 if (counter && !kref_get_unless_zero(&counter->kref)) 245 counter = NULL; 246 247 xa_unlock(&rt->xa); 248 return counter; 249 } 250 251 static void rdma_counter_res_add(struct rdma_counter *counter, 252 struct ib_qp *qp) 253 { 254 if (rdma_is_kernel_res(&qp->res)) { 255 rdma_restrack_set_task(&counter->res, qp->res.kern_name); 256 rdma_restrack_kadd(&counter->res); 257 } else { 258 rdma_restrack_attach_task(&counter->res, qp->res.task); 259 rdma_restrack_uadd(&counter->res); 260 } 261 } 262 263 static void counter_release(struct kref *kref) 264 { 265 struct rdma_counter *counter; 266 267 counter = container_of(kref, struct rdma_counter, kref); 268 counter_history_stat_update(counter); 269 counter->device->ops.counter_dealloc(counter); 270 rdma_counter_free(counter); 271 } 272 273 /** 274 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 275 * the auto-mode rule 276 */ 277 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 278 { 279 struct rdma_port_counter *port_counter; 280 struct ib_device *dev = qp->device; 281 struct rdma_counter *counter; 282 int ret; 283 284 if (!rdma_is_port_valid(dev, port)) 285 return -EINVAL; 286 287 port_counter = &dev->port_data[port].port_counter; 288 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 289 return 0; 290 291 counter = rdma_get_counter_auto_mode(qp, port); 292 if (counter) { 293 ret = __rdma_counter_bind_qp(counter, qp); 294 if (ret) { 295 kref_put(&counter->kref, counter_release); 296 return ret; 297 } 298 } else { 299 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 300 if (!counter) 301 return -ENOMEM; 302 303 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 304 305 ret = __rdma_counter_bind_qp(counter, qp); 306 if (ret) { 307 rdma_counter_free(counter); 308 return ret; 309 } 310 311 rdma_counter_res_add(counter, qp); 312 } 313 314 return 0; 315 } 316 317 /** 318 * rdma_counter_unbind_qp - Unbind a qp from a counter 319 * @force: 320 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 321 */ 322 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 323 { 324 struct rdma_counter *counter = qp->counter; 325 int ret; 326 327 if (!counter) 328 return -EINVAL; 329 330 ret = __rdma_counter_unbind_qp(qp); 331 if (ret && !force) 332 return ret; 333 334 kref_put(&counter->kref, counter_release); 335 return 0; 336 } 337 338 int rdma_counter_query_stats(struct rdma_counter *counter) 339 { 340 struct ib_device *dev = counter->device; 341 int ret; 342 343 if (!dev->ops.counter_update_stats) 344 return -EINVAL; 345 346 mutex_lock(&counter->lock); 347 ret = dev->ops.counter_update_stats(counter); 348 mutex_unlock(&counter->lock); 349 350 return ret; 351 } 352 353 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 354 u8 port, u32 index) 355 { 356 struct rdma_restrack_entry *res; 357 struct rdma_restrack_root *rt; 358 struct rdma_counter *counter; 359 unsigned long id = 0; 360 u64 sum = 0; 361 362 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 363 xa_lock(&rt->xa); 364 xa_for_each(&rt->xa, id, res) { 365 if (!rdma_restrack_get(res)) 366 continue; 367 368 xa_unlock(&rt->xa); 369 370 counter = container_of(res, struct rdma_counter, res); 371 if ((counter->device != dev) || (counter->port != port) || 372 rdma_counter_query_stats(counter)) 373 goto next; 374 375 sum += counter->stats->value[index]; 376 377 next: 378 xa_lock(&rt->xa); 379 rdma_restrack_put(res); 380 } 381 382 xa_unlock(&rt->xa); 383 return sum; 384 } 385 386 /** 387 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 388 * specific port, including the running ones and history data 389 */ 390 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) 391 { 392 struct rdma_port_counter *port_counter; 393 u64 sum; 394 395 port_counter = &dev->port_data[port].port_counter; 396 if (!port_counter->hstats) 397 return 0; 398 399 sum = get_running_counters_hwstat_sum(dev, port, index); 400 sum += port_counter->hstats->value[index]; 401 402 return sum; 403 } 404 405 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 406 { 407 struct rdma_restrack_entry *res = NULL; 408 struct ib_qp *qp = NULL; 409 410 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 411 if (IS_ERR(res)) 412 return NULL; 413 414 if (!rdma_is_visible_in_pid_ns(res)) 415 goto err; 416 417 qp = container_of(res, struct ib_qp, res); 418 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 419 goto err; 420 421 return qp; 422 423 err: 424 rdma_restrack_put(&qp->res); 425 return NULL; 426 } 427 428 static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, 429 struct ib_qp *qp) 430 { 431 if ((counter->device != qp->device) || (counter->port != qp->port)) 432 return -EINVAL; 433 434 return __rdma_counter_bind_qp(counter, qp); 435 } 436 437 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 438 u32 counter_id) 439 { 440 struct rdma_restrack_entry *res; 441 struct rdma_counter *counter; 442 443 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 444 if (IS_ERR(res)) 445 return NULL; 446 447 if (!rdma_is_visible_in_pid_ns(res)) { 448 rdma_restrack_put(res); 449 return NULL; 450 } 451 452 counter = container_of(res, struct rdma_counter, res); 453 kref_get(&counter->kref); 454 rdma_restrack_put(res); 455 456 return counter; 457 } 458 459 /** 460 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 461 */ 462 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, 463 u32 qp_num, u32 counter_id) 464 { 465 struct rdma_counter *counter; 466 struct ib_qp *qp; 467 int ret; 468 469 qp = rdma_counter_get_qp(dev, qp_num); 470 if (!qp) 471 return -ENOENT; 472 473 counter = rdma_get_counter_by_id(dev, counter_id); 474 if (!counter) { 475 ret = -ENOENT; 476 goto err; 477 } 478 479 if (counter->res.task != qp->res.task) { 480 ret = -EINVAL; 481 goto err_task; 482 } 483 484 ret = rdma_counter_bind_qp_manual(counter, qp); 485 if (ret) 486 goto err_task; 487 488 rdma_restrack_put(&qp->res); 489 return 0; 490 491 err_task: 492 kref_put(&counter->kref, counter_release); 493 err: 494 rdma_restrack_put(&qp->res); 495 return ret; 496 } 497 498 /** 499 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 500 * The id of new counter is returned in @counter_id 501 */ 502 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, 503 u32 qp_num, u32 *counter_id) 504 { 505 struct rdma_counter *counter; 506 struct ib_qp *qp; 507 int ret; 508 509 if (!rdma_is_port_valid(dev, port)) 510 return -EINVAL; 511 512 qp = rdma_counter_get_qp(dev, qp_num); 513 if (!qp) 514 return -ENOENT; 515 516 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 517 ret = -EINVAL; 518 goto err; 519 } 520 521 counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); 522 if (!counter) { 523 ret = -ENOMEM; 524 goto err; 525 } 526 527 ret = rdma_counter_bind_qp_manual(counter, qp); 528 if (ret) 529 goto err_bind; 530 531 if (counter_id) 532 *counter_id = counter->id; 533 534 rdma_counter_res_add(counter, qp); 535 536 rdma_restrack_put(&qp->res); 537 return ret; 538 539 err_bind: 540 rdma_counter_free(counter); 541 err: 542 rdma_restrack_put(&qp->res); 543 return ret; 544 } 545 546 /** 547 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 548 */ 549 int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, 550 u32 qp_num, u32 counter_id) 551 { 552 struct rdma_port_counter *port_counter; 553 struct ib_qp *qp; 554 int ret; 555 556 if (!rdma_is_port_valid(dev, port)) 557 return -EINVAL; 558 559 qp = rdma_counter_get_qp(dev, qp_num); 560 if (!qp) 561 return -ENOENT; 562 563 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 564 ret = -EINVAL; 565 goto out; 566 } 567 568 port_counter = &dev->port_data[port].port_counter; 569 if (!qp->counter || qp->counter->id != counter_id || 570 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 571 ret = -EINVAL; 572 goto out; 573 } 574 575 ret = rdma_counter_unbind_qp(qp, false); 576 577 out: 578 rdma_restrack_put(&qp->res); 579 return ret; 580 } 581 582 int rdma_counter_get_mode(struct ib_device *dev, u8 port, 583 enum rdma_nl_counter_mode *mode, 584 enum rdma_nl_counter_mask *mask) 585 { 586 struct rdma_port_counter *port_counter; 587 588 port_counter = &dev->port_data[port].port_counter; 589 *mode = port_counter->mode.mode; 590 *mask = port_counter->mode.mask; 591 592 return 0; 593 } 594 595 void rdma_counter_init(struct ib_device *dev) 596 { 597 struct rdma_port_counter *port_counter; 598 u32 port; 599 600 if (!dev->port_data) 601 return; 602 603 rdma_for_each_port(dev, port) { 604 port_counter = &dev->port_data[port].port_counter; 605 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 606 mutex_init(&port_counter->lock); 607 608 if (!dev->ops.alloc_hw_stats) 609 continue; 610 611 port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); 612 if (!port_counter->hstats) 613 goto fail; 614 } 615 616 return; 617 618 fail: 619 rdma_for_each_port(dev, port) { 620 port_counter = &dev->port_data[port].port_counter; 621 kfree(port_counter->hstats); 622 port_counter->hstats = NULL; 623 } 624 625 return; 626 } 627 628 void rdma_counter_release(struct ib_device *dev) 629 { 630 struct rdma_port_counter *port_counter; 631 u32 port; 632 633 rdma_for_each_port(dev, port) { 634 port_counter = &dev->port_data[port].port_counter; 635 kfree(port_counter->hstats); 636 } 637 } 638