1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved. 4 */ 5 #include <rdma/ib_verbs.h> 6 #include <rdma/rdma_counter.h> 7 8 #include "core_priv.h" 9 #include "restrack.h" 10 11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) 12 13 static int __counter_set_mode(struct rdma_port_counter *port_counter, 14 enum rdma_nl_counter_mode new_mode, 15 enum rdma_nl_counter_mask new_mask) 16 { 17 if (new_mode == RDMA_COUNTER_MODE_AUTO && port_counter->num_counters) 18 if (new_mask & ~ALL_AUTO_MODE_MASKS || 19 port_counter->mode.mode != RDMA_COUNTER_MODE_NONE) 20 return -EINVAL; 21 22 port_counter->mode.mode = new_mode; 23 port_counter->mode.mask = new_mask; 24 return 0; 25 } 26 27 /* 28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode 29 * 30 * @dev: Device to operate 31 * @port: Port to use 32 * @mask: Mask to configure 33 * @extack: Message to the user 34 * 35 * Return 0 on success. 36 */ 37 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 38 enum rdma_nl_counter_mask mask, 39 struct netlink_ext_ack *extack) 40 { 41 enum rdma_nl_counter_mode mode = RDMA_COUNTER_MODE_AUTO; 42 struct rdma_port_counter *port_counter; 43 int ret; 44 45 port_counter = &dev->port_data[port].port_counter; 46 if (!port_counter->hstats) 47 return -EOPNOTSUPP; 48 49 mutex_lock(&port_counter->lock); 50 if (mask) { 51 ret = __counter_set_mode(port_counter, mode, mask); 52 if (ret) 53 NL_SET_ERR_MSG( 54 extack, 55 "Turning on auto mode is not allowed when there is bound QP"); 56 goto out; 57 } 58 59 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { 60 ret = -EINVAL; 61 goto out; 62 } 63 64 mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : 65 RDMA_COUNTER_MODE_NONE; 66 ret = __counter_set_mode(port_counter, mode, 0); 67 out: 68 mutex_unlock(&port_counter->lock); 69 return ret; 70 } 71 72 static void auto_mode_init_counter(struct rdma_counter *counter, 73 const struct ib_qp *qp, 74 enum rdma_nl_counter_mask new_mask) 75 { 76 struct auto_mode_param *param = &counter->mode.param; 77 78 counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 79 counter->mode.mask = new_mask; 80 81 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 82 param->qp_type = qp->qp_type; 83 } 84 85 static int __rdma_counter_bind_qp(struct rdma_counter *counter, 86 struct ib_qp *qp) 87 { 88 int ret; 89 90 if (qp->counter) 91 return -EINVAL; 92 93 if (!qp->device->ops.counter_bind_qp) 94 return -EOPNOTSUPP; 95 96 mutex_lock(&counter->lock); 97 ret = qp->device->ops.counter_bind_qp(counter, qp); 98 mutex_unlock(&counter->lock); 99 100 return ret; 101 } 102 103 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, 104 struct ib_qp *qp, 105 enum rdma_nl_counter_mode mode) 106 { 107 struct rdma_port_counter *port_counter; 108 struct rdma_counter *counter; 109 int ret; 110 111 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) 112 return NULL; 113 114 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 115 if (!counter) 116 return NULL; 117 118 counter->device = dev; 119 counter->port = port; 120 121 rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); 122 counter->stats = dev->ops.counter_alloc_stats(counter); 123 if (!counter->stats) 124 goto err_stats; 125 126 port_counter = &dev->port_data[port].port_counter; 127 mutex_lock(&port_counter->lock); 128 switch (mode) { 129 case RDMA_COUNTER_MODE_MANUAL: 130 ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, 131 0); 132 if (ret) { 133 mutex_unlock(&port_counter->lock); 134 goto err_mode; 135 } 136 break; 137 case RDMA_COUNTER_MODE_AUTO: 138 auto_mode_init_counter(counter, qp, port_counter->mode.mask); 139 break; 140 default: 141 ret = -EOPNOTSUPP; 142 mutex_unlock(&port_counter->lock); 143 goto err_mode; 144 } 145 146 port_counter->num_counters++; 147 mutex_unlock(&port_counter->lock); 148 149 counter->mode.mode = mode; 150 kref_init(&counter->kref); 151 mutex_init(&counter->lock); 152 153 ret = __rdma_counter_bind_qp(counter, qp); 154 if (ret) 155 goto err_mode; 156 157 rdma_restrack_parent_name(&counter->res, &qp->res); 158 rdma_restrack_add(&counter->res); 159 return counter; 160 161 err_mode: 162 kfree(counter->stats); 163 err_stats: 164 rdma_restrack_put(&counter->res); 165 kfree(counter); 166 return NULL; 167 } 168 169 static void rdma_counter_free(struct rdma_counter *counter) 170 { 171 struct rdma_port_counter *port_counter; 172 173 port_counter = &counter->device->port_data[counter->port].port_counter; 174 mutex_lock(&port_counter->lock); 175 port_counter->num_counters--; 176 if (!port_counter->num_counters && 177 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) 178 __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0); 179 180 mutex_unlock(&port_counter->lock); 181 182 rdma_restrack_del(&counter->res); 183 kfree(counter->stats); 184 kfree(counter); 185 } 186 187 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 188 enum rdma_nl_counter_mask auto_mask) 189 { 190 struct auto_mode_param *param = &counter->mode.param; 191 bool match = true; 192 193 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 194 match &= (param->qp_type == qp->qp_type); 195 196 if (auto_mask & RDMA_COUNTER_MASK_PID) 197 match &= (task_pid_nr(counter->res.task) == 198 task_pid_nr(qp->res.task)); 199 200 return match; 201 } 202 203 static int __rdma_counter_unbind_qp(struct ib_qp *qp) 204 { 205 struct rdma_counter *counter = qp->counter; 206 int ret; 207 208 if (!qp->device->ops.counter_unbind_qp) 209 return -EOPNOTSUPP; 210 211 mutex_lock(&counter->lock); 212 ret = qp->device->ops.counter_unbind_qp(qp); 213 mutex_unlock(&counter->lock); 214 215 return ret; 216 } 217 218 static void counter_history_stat_update(struct rdma_counter *counter) 219 { 220 struct ib_device *dev = counter->device; 221 struct rdma_port_counter *port_counter; 222 int i; 223 224 port_counter = &dev->port_data[counter->port].port_counter; 225 if (!port_counter->hstats) 226 return; 227 228 rdma_counter_query_stats(counter); 229 230 for (i = 0; i < counter->stats->num_counters; i++) 231 port_counter->hstats->value[i] += counter->stats->value[i]; 232 } 233 234 /* 235 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 236 * with in auto mode 237 * 238 * Return: The counter (with ref-count increased) if found 239 */ 240 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 241 u8 port) 242 { 243 struct rdma_port_counter *port_counter; 244 struct rdma_counter *counter = NULL; 245 struct ib_device *dev = qp->device; 246 struct rdma_restrack_entry *res; 247 struct rdma_restrack_root *rt; 248 unsigned long id = 0; 249 250 port_counter = &dev->port_data[port].port_counter; 251 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 252 xa_lock(&rt->xa); 253 xa_for_each(&rt->xa, id, res) { 254 counter = container_of(res, struct rdma_counter, res); 255 if ((counter->device != qp->device) || (counter->port != port)) 256 goto next; 257 258 if (auto_mode_match(qp, counter, port_counter->mode.mask)) 259 break; 260 next: 261 counter = NULL; 262 } 263 264 if (counter && !kref_get_unless_zero(&counter->kref)) 265 counter = NULL; 266 267 xa_unlock(&rt->xa); 268 return counter; 269 } 270 271 static void counter_release(struct kref *kref) 272 { 273 struct rdma_counter *counter; 274 275 counter = container_of(kref, struct rdma_counter, kref); 276 counter_history_stat_update(counter); 277 counter->device->ops.counter_dealloc(counter); 278 rdma_counter_free(counter); 279 } 280 281 /* 282 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 283 * the auto-mode rule 284 */ 285 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 286 { 287 struct rdma_port_counter *port_counter; 288 struct ib_device *dev = qp->device; 289 struct rdma_counter *counter; 290 int ret; 291 292 if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) 293 return 0; 294 295 if (!rdma_is_port_valid(dev, port)) 296 return -EINVAL; 297 298 port_counter = &dev->port_data[port].port_counter; 299 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 300 return 0; 301 302 counter = rdma_get_counter_auto_mode(qp, port); 303 if (counter) { 304 ret = __rdma_counter_bind_qp(counter, qp); 305 if (ret) { 306 kref_put(&counter->kref, counter_release); 307 return ret; 308 } 309 } else { 310 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); 311 if (!counter) 312 return -ENOMEM; 313 } 314 315 return 0; 316 } 317 318 /* 319 * rdma_counter_unbind_qp - Unbind a qp from a counter 320 * @force: 321 * true - Decrease the counter ref-count anyway (e.g., qp destroy) 322 */ 323 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 324 { 325 struct rdma_counter *counter = qp->counter; 326 int ret; 327 328 if (!counter) 329 return -EINVAL; 330 331 ret = __rdma_counter_unbind_qp(qp); 332 if (ret && !force) 333 return ret; 334 335 kref_put(&counter->kref, counter_release); 336 return 0; 337 } 338 339 int rdma_counter_query_stats(struct rdma_counter *counter) 340 { 341 struct ib_device *dev = counter->device; 342 int ret; 343 344 if (!dev->ops.counter_update_stats) 345 return -EINVAL; 346 347 mutex_lock(&counter->lock); 348 ret = dev->ops.counter_update_stats(counter); 349 mutex_unlock(&counter->lock); 350 351 return ret; 352 } 353 354 static u64 get_running_counters_hwstat_sum(struct ib_device *dev, 355 u8 port, u32 index) 356 { 357 struct rdma_restrack_entry *res; 358 struct rdma_restrack_root *rt; 359 struct rdma_counter *counter; 360 unsigned long id = 0; 361 u64 sum = 0; 362 363 rt = &dev->res[RDMA_RESTRACK_COUNTER]; 364 xa_lock(&rt->xa); 365 xa_for_each(&rt->xa, id, res) { 366 if (!rdma_restrack_get(res)) 367 continue; 368 369 xa_unlock(&rt->xa); 370 371 counter = container_of(res, struct rdma_counter, res); 372 if ((counter->device != dev) || (counter->port != port) || 373 rdma_counter_query_stats(counter)) 374 goto next; 375 376 sum += counter->stats->value[index]; 377 378 next: 379 xa_lock(&rt->xa); 380 rdma_restrack_put(res); 381 } 382 383 xa_unlock(&rt->xa); 384 return sum; 385 } 386 387 /* 388 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a 389 * specific port, including the running ones and history data 390 */ 391 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) 392 { 393 struct rdma_port_counter *port_counter; 394 u64 sum; 395 396 port_counter = &dev->port_data[port].port_counter; 397 if (!port_counter->hstats) 398 return 0; 399 400 sum = get_running_counters_hwstat_sum(dev, port, index); 401 sum += port_counter->hstats->value[index]; 402 403 return sum; 404 } 405 406 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) 407 { 408 struct rdma_restrack_entry *res = NULL; 409 struct ib_qp *qp = NULL; 410 411 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); 412 if (IS_ERR(res)) 413 return NULL; 414 415 qp = container_of(res, struct ib_qp, res); 416 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 417 goto err; 418 419 return qp; 420 421 err: 422 rdma_restrack_put(res); 423 return NULL; 424 } 425 426 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 427 u32 counter_id) 428 { 429 struct rdma_restrack_entry *res; 430 struct rdma_counter *counter; 431 432 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); 433 if (IS_ERR(res)) 434 return NULL; 435 436 counter = container_of(res, struct rdma_counter, res); 437 kref_get(&counter->kref); 438 rdma_restrack_put(res); 439 440 return counter; 441 } 442 443 /* 444 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id 445 */ 446 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, 447 u32 qp_num, u32 counter_id) 448 { 449 struct rdma_port_counter *port_counter; 450 struct rdma_counter *counter; 451 struct ib_qp *qp; 452 int ret; 453 454 port_counter = &dev->port_data[port].port_counter; 455 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 456 return -EINVAL; 457 458 qp = rdma_counter_get_qp(dev, qp_num); 459 if (!qp) 460 return -ENOENT; 461 462 counter = rdma_get_counter_by_id(dev, counter_id); 463 if (!counter) { 464 ret = -ENOENT; 465 goto err; 466 } 467 468 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { 469 ret = -EINVAL; 470 goto err_task; 471 } 472 473 if ((counter->device != qp->device) || (counter->port != qp->port)) { 474 ret = -EINVAL; 475 goto err_task; 476 } 477 478 ret = __rdma_counter_bind_qp(counter, qp); 479 if (ret) 480 goto err_task; 481 482 rdma_restrack_put(&qp->res); 483 return 0; 484 485 err_task: 486 kref_put(&counter->kref, counter_release); 487 err: 488 rdma_restrack_put(&qp->res); 489 return ret; 490 } 491 492 /* 493 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it 494 * The id of new counter is returned in @counter_id 495 */ 496 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, 497 u32 qp_num, u32 *counter_id) 498 { 499 struct rdma_port_counter *port_counter; 500 struct rdma_counter *counter; 501 struct ib_qp *qp; 502 int ret; 503 504 if (!rdma_is_port_valid(dev, port)) 505 return -EINVAL; 506 507 port_counter = &dev->port_data[port].port_counter; 508 if (!port_counter->hstats) 509 return -EOPNOTSUPP; 510 511 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) 512 return -EINVAL; 513 514 qp = rdma_counter_get_qp(dev, qp_num); 515 if (!qp) 516 return -ENOENT; 517 518 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 519 ret = -EINVAL; 520 goto err; 521 } 522 523 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); 524 if (!counter) { 525 ret = -ENOMEM; 526 goto err; 527 } 528 529 if (counter_id) 530 *counter_id = counter->id; 531 532 rdma_restrack_put(&qp->res); 533 return 0; 534 535 err: 536 rdma_restrack_put(&qp->res); 537 return ret; 538 } 539 540 /* 541 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter 542 */ 543 int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, 544 u32 qp_num, u32 counter_id) 545 { 546 struct rdma_port_counter *port_counter; 547 struct ib_qp *qp; 548 int ret; 549 550 if (!rdma_is_port_valid(dev, port)) 551 return -EINVAL; 552 553 qp = rdma_counter_get_qp(dev, qp_num); 554 if (!qp) 555 return -ENOENT; 556 557 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { 558 ret = -EINVAL; 559 goto out; 560 } 561 562 port_counter = &dev->port_data[port].port_counter; 563 if (!qp->counter || qp->counter->id != counter_id || 564 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { 565 ret = -EINVAL; 566 goto out; 567 } 568 569 ret = rdma_counter_unbind_qp(qp, false); 570 571 out: 572 rdma_restrack_put(&qp->res); 573 return ret; 574 } 575 576 int rdma_counter_get_mode(struct ib_device *dev, u8 port, 577 enum rdma_nl_counter_mode *mode, 578 enum rdma_nl_counter_mask *mask) 579 { 580 struct rdma_port_counter *port_counter; 581 582 port_counter = &dev->port_data[port].port_counter; 583 *mode = port_counter->mode.mode; 584 *mask = port_counter->mode.mask; 585 586 return 0; 587 } 588 589 void rdma_counter_init(struct ib_device *dev) 590 { 591 struct rdma_port_counter *port_counter; 592 u32 port, i; 593 594 if (!dev->port_data) 595 return; 596 597 rdma_for_each_port(dev, port) { 598 port_counter = &dev->port_data[port].port_counter; 599 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; 600 mutex_init(&port_counter->lock); 601 602 if (!dev->ops.alloc_hw_stats) 603 continue; 604 605 port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); 606 if (!port_counter->hstats) 607 goto fail; 608 } 609 610 return; 611 612 fail: 613 for (i = port; i >= rdma_start_port(dev); i--) { 614 port_counter = &dev->port_data[port].port_counter; 615 kfree(port_counter->hstats); 616 port_counter->hstats = NULL; 617 mutex_destroy(&port_counter->lock); 618 } 619 } 620 621 void rdma_counter_release(struct ib_device *dev) 622 { 623 struct rdma_port_counter *port_counter; 624 u32 port; 625 626 rdma_for_each_port(dev, port) { 627 port_counter = &dev->port_data[port].port_counter; 628 kfree(port_counter->hstats); 629 mutex_destroy(&port_counter->lock); 630 } 631 } 632