1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/debugfs.h> 34 #include <linux/highmem.h> 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/errno.h> 38 #include <linux/pci.h> 39 #include <linux/dma-mapping.h> 40 #include <linux/slab.h> 41 #if defined(CONFIG_X86) 42 #include <asm/pat.h> 43 #endif 44 #include <linux/sched.h> 45 #include <linux/sched/mm.h> 46 #include <linux/sched/task.h> 47 #include <linux/delay.h> 48 #include <rdma/ib_user_verbs.h> 49 #include <rdma/ib_addr.h> 50 #include <rdma/ib_cache.h> 51 #include <linux/mlx5/port.h> 52 #include <linux/mlx5/vport.h> 53 #include <linux/mlx5/fs.h> 54 #include <linux/list.h> 55 #include <rdma/ib_smi.h> 56 #include <rdma/ib_umem.h> 57 #include <linux/in.h> 58 #include <linux/etherdevice.h> 59 #include "mlx5_ib.h" 60 #include "ib_rep.h" 61 #include "cmd.h" 62 63 #define DRIVER_NAME "mlx5_ib" 64 #define DRIVER_VERSION "5.0-0" 65 66 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 67 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); 68 MODULE_LICENSE("Dual BSD/GPL"); 69 70 static char mlx5_version[] = 71 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 72 DRIVER_VERSION "\n"; 73 74 struct mlx5_ib_event_work { 75 struct work_struct work; 76 struct mlx5_core_dev *dev; 77 void *context; 78 enum mlx5_dev_event event; 79 unsigned long param; 80 }; 81 82 enum { 83 MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, 84 }; 85 86 static struct workqueue_struct *mlx5_ib_event_wq; 87 static LIST_HEAD(mlx5_ib_unaffiliated_port_list); 88 static LIST_HEAD(mlx5_ib_dev_list); 89 /* 90 * This mutex should be held when accessing either of the above lists 91 */ 92 static DEFINE_MUTEX(mlx5_ib_multiport_mutex); 93 94 struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) 95 { 96 struct mlx5_ib_dev *dev; 97 98 mutex_lock(&mlx5_ib_multiport_mutex); 99 dev = mpi->ibdev; 100 mutex_unlock(&mlx5_ib_multiport_mutex); 101 return dev; 102 } 103 104 static enum rdma_link_layer 105 mlx5_port_type_cap_to_rdma_ll(int port_type_cap) 106 { 107 switch (port_type_cap) { 108 case MLX5_CAP_PORT_TYPE_IB: 109 return IB_LINK_LAYER_INFINIBAND; 110 case MLX5_CAP_PORT_TYPE_ETH: 111 return IB_LINK_LAYER_ETHERNET; 112 default: 113 return IB_LINK_LAYER_UNSPECIFIED; 114 } 115 } 116 117 static enum rdma_link_layer 118 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) 119 { 120 struct mlx5_ib_dev *dev = to_mdev(device); 121 int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); 122 123 return mlx5_port_type_cap_to_rdma_ll(port_type_cap); 124 } 125 126 static int get_port_state(struct ib_device *ibdev, 127 u8 port_num, 128 enum ib_port_state *state) 129 { 130 struct ib_port_attr attr; 131 int ret; 132 133 memset(&attr, 0, sizeof(attr)); 134 ret = mlx5_ib_query_port(ibdev, port_num, &attr); 135 if (!ret) 136 *state = attr.state; 137 return ret; 138 } 139 140 static int mlx5_netdev_event(struct notifier_block *this, 141 unsigned long event, void *ptr) 142 { 143 struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb); 144 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 145 u8 port_num = roce->native_port_num; 146 struct mlx5_core_dev *mdev; 147 struct mlx5_ib_dev *ibdev; 148 149 ibdev = roce->dev; 150 mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); 151 if (!mdev) 152 return NOTIFY_DONE; 153 154 switch (event) { 155 case NETDEV_REGISTER: 156 case NETDEV_UNREGISTER: 157 write_lock(&roce->netdev_lock); 158 if (ibdev->rep) { 159 struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; 160 struct net_device *rep_ndev; 161 162 rep_ndev = mlx5_ib_get_rep_netdev(esw, 163 ibdev->rep->vport); 164 if (rep_ndev == ndev) 165 roce->netdev = (event == NETDEV_UNREGISTER) ? 166 NULL : ndev; 167 } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) { 168 roce->netdev = (event == NETDEV_UNREGISTER) ? 169 NULL : ndev; 170 } 171 write_unlock(&roce->netdev_lock); 172 break; 173 174 case NETDEV_CHANGE: 175 case NETDEV_UP: 176 case NETDEV_DOWN: { 177 struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev); 178 struct net_device *upper = NULL; 179 180 if (lag_ndev) { 181 upper = netdev_master_upper_dev_get(lag_ndev); 182 dev_put(lag_ndev); 183 } 184 185 if ((upper == ndev || (!upper && ndev == roce->netdev)) 186 && ibdev->ib_active) { 187 struct ib_event ibev = { }; 188 enum ib_port_state port_state; 189 190 if (get_port_state(&ibdev->ib_dev, port_num, 191 &port_state)) 192 goto done; 193 194 if (roce->last_port_state == port_state) 195 goto done; 196 197 roce->last_port_state = port_state; 198 ibev.device = &ibdev->ib_dev; 199 if (port_state == IB_PORT_DOWN) 200 ibev.event = IB_EVENT_PORT_ERR; 201 else if (port_state == IB_PORT_ACTIVE) 202 ibev.event = IB_EVENT_PORT_ACTIVE; 203 else 204 goto done; 205 206 ibev.element.port_num = port_num; 207 ib_dispatch_event(&ibev); 208 } 209 break; 210 } 211 212 default: 213 break; 214 } 215 done: 216 mlx5_ib_put_native_port_mdev(ibdev, port_num); 217 return NOTIFY_DONE; 218 } 219 220 static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, 221 u8 port_num) 222 { 223 struct mlx5_ib_dev *ibdev = to_mdev(device); 224 struct net_device *ndev; 225 struct mlx5_core_dev *mdev; 226 227 mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); 228 if (!mdev) 229 return NULL; 230 231 ndev = mlx5_lag_get_roce_netdev(mdev); 232 if (ndev) 233 goto out; 234 235 /* Ensure ndev does not disappear before we invoke dev_hold() 236 */ 237 read_lock(&ibdev->roce[port_num - 1].netdev_lock); 238 ndev = ibdev->roce[port_num - 1].netdev; 239 if (ndev) 240 dev_hold(ndev); 241 read_unlock(&ibdev->roce[port_num - 1].netdev_lock); 242 243 out: 244 mlx5_ib_put_native_port_mdev(ibdev, port_num); 245 return ndev; 246 } 247 248 struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, 249 u8 ib_port_num, 250 u8 *native_port_num) 251 { 252 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, 253 ib_port_num); 254 struct mlx5_core_dev *mdev = NULL; 255 struct mlx5_ib_multiport_info *mpi; 256 struct mlx5_ib_port *port; 257 258 if (native_port_num) 259 *native_port_num = 1; 260 261 if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) 262 return ibdev->mdev; 263 264 port = &ibdev->port[ib_port_num - 1]; 265 if (!port) 266 return NULL; 267 268 spin_lock(&port->mp.mpi_lock); 269 mpi = ibdev->port[ib_port_num - 1].mp.mpi; 270 if (mpi && !mpi->unaffiliate) { 271 mdev = mpi->mdev; 272 /* If it's the master no need to refcount, it'll exist 273 * as long as the ib_dev exists. 274 */ 275 if (!mpi->is_master) 276 mpi->mdev_refcnt++; 277 } 278 spin_unlock(&port->mp.mpi_lock); 279 280 return mdev; 281 } 282 283 void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num) 284 { 285 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, 286 port_num); 287 struct mlx5_ib_multiport_info *mpi; 288 struct mlx5_ib_port *port; 289 290 if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) 291 return; 292 293 port = &ibdev->port[port_num - 1]; 294 295 spin_lock(&port->mp.mpi_lock); 296 mpi = ibdev->port[port_num - 1].mp.mpi; 297 if (mpi->is_master) 298 goto out; 299 300 mpi->mdev_refcnt--; 301 if (mpi->unaffiliate) 302 complete(&mpi->unref_comp); 303 out: 304 spin_unlock(&port->mp.mpi_lock); 305 } 306 307 static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, 308 u8 *active_width) 309 { 310 switch (eth_proto_oper) { 311 case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): 312 case MLX5E_PROT_MASK(MLX5E_1000BASE_KX): 313 case MLX5E_PROT_MASK(MLX5E_100BASE_TX): 314 case MLX5E_PROT_MASK(MLX5E_1000BASE_T): 315 *active_width = IB_WIDTH_1X; 316 *active_speed = IB_SPEED_SDR; 317 break; 318 case MLX5E_PROT_MASK(MLX5E_10GBASE_T): 319 case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4): 320 case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4): 321 case MLX5E_PROT_MASK(MLX5E_10GBASE_KR): 322 case MLX5E_PROT_MASK(MLX5E_10GBASE_CR): 323 case MLX5E_PROT_MASK(MLX5E_10GBASE_SR): 324 case MLX5E_PROT_MASK(MLX5E_10GBASE_ER): 325 *active_width = IB_WIDTH_1X; 326 *active_speed = IB_SPEED_QDR; 327 break; 328 case MLX5E_PROT_MASK(MLX5E_25GBASE_CR): 329 case MLX5E_PROT_MASK(MLX5E_25GBASE_KR): 330 case MLX5E_PROT_MASK(MLX5E_25GBASE_SR): 331 *active_width = IB_WIDTH_1X; 332 *active_speed = IB_SPEED_EDR; 333 break; 334 case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4): 335 case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4): 336 case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4): 337 case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4): 338 *active_width = IB_WIDTH_4X; 339 *active_speed = IB_SPEED_QDR; 340 break; 341 case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2): 342 case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2): 343 case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2): 344 *active_width = IB_WIDTH_1X; 345 *active_speed = IB_SPEED_HDR; 346 break; 347 case MLX5E_PROT_MASK(MLX5E_56GBASE_R4): 348 *active_width = IB_WIDTH_4X; 349 *active_speed = IB_SPEED_FDR; 350 break; 351 case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4): 352 case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4): 353 case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4): 354 case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4): 355 *active_width = IB_WIDTH_4X; 356 *active_speed = IB_SPEED_EDR; 357 break; 358 default: 359 return -EINVAL; 360 } 361 362 return 0; 363 } 364 365 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, 366 struct ib_port_attr *props) 367 { 368 struct mlx5_ib_dev *dev = to_mdev(device); 369 struct mlx5_core_dev *mdev; 370 struct net_device *ndev, *upper; 371 enum ib_mtu ndev_ib_mtu; 372 bool put_mdev = true; 373 u16 qkey_viol_cntr; 374 u32 eth_prot_oper; 375 u8 mdev_port_num; 376 int err; 377 378 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); 379 if (!mdev) { 380 /* This means the port isn't affiliated yet. Get the 381 * info for the master port instead. 382 */ 383 put_mdev = false; 384 mdev = dev->mdev; 385 mdev_port_num = 1; 386 port_num = 1; 387 } 388 389 /* Possible bad flows are checked before filling out props so in case 390 * of an error it will still be zeroed out. 391 */ 392 err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, 393 mdev_port_num); 394 if (err) 395 goto out; 396 397 translate_eth_proto_oper(eth_prot_oper, &props->active_speed, 398 &props->active_width); 399 400 props->port_cap_flags |= IB_PORT_CM_SUP; 401 props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; 402 403 props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, 404 roce_address_table_size); 405 props->max_mtu = IB_MTU_4096; 406 props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); 407 props->pkey_tbl_len = 1; 408 props->state = IB_PORT_DOWN; 409 props->phys_state = 3; 410 411 mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); 412 props->qkey_viol_cntr = qkey_viol_cntr; 413 414 /* If this is a stub query for an unaffiliated port stop here */ 415 if (!put_mdev) 416 goto out; 417 418 ndev = mlx5_ib_get_netdev(device, port_num); 419 if (!ndev) 420 goto out; 421 422 if (mlx5_lag_is_active(dev->mdev)) { 423 rcu_read_lock(); 424 upper = netdev_master_upper_dev_get_rcu(ndev); 425 if (upper) { 426 dev_put(ndev); 427 ndev = upper; 428 dev_hold(ndev); 429 } 430 rcu_read_unlock(); 431 } 432 433 if (netif_running(ndev) && netif_carrier_ok(ndev)) { 434 props->state = IB_PORT_ACTIVE; 435 props->phys_state = 5; 436 } 437 438 ndev_ib_mtu = iboe_get_mtu(ndev->mtu); 439 440 dev_put(ndev); 441 442 props->active_mtu = min(props->max_mtu, ndev_ib_mtu); 443 out: 444 if (put_mdev) 445 mlx5_ib_put_native_port_mdev(dev, port_num); 446 return err; 447 } 448 449 static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, 450 unsigned int index, const union ib_gid *gid, 451 const struct ib_gid_attr *attr) 452 { 453 enum ib_gid_type gid_type = IB_GID_TYPE_IB; 454 u8 roce_version = 0; 455 u8 roce_l3_type = 0; 456 bool vlan = false; 457 u8 mac[ETH_ALEN]; 458 u16 vlan_id = 0; 459 460 if (gid) { 461 gid_type = attr->gid_type; 462 ether_addr_copy(mac, attr->ndev->dev_addr); 463 464 if (is_vlan_dev(attr->ndev)) { 465 vlan = true; 466 vlan_id = vlan_dev_vlan_id(attr->ndev); 467 } 468 } 469 470 switch (gid_type) { 471 case IB_GID_TYPE_IB: 472 roce_version = MLX5_ROCE_VERSION_1; 473 break; 474 case IB_GID_TYPE_ROCE_UDP_ENCAP: 475 roce_version = MLX5_ROCE_VERSION_2; 476 if (ipv6_addr_v4mapped((void *)gid)) 477 roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4; 478 else 479 roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6; 480 break; 481 482 default: 483 mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type); 484 } 485 486 return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, 487 roce_l3_type, gid->raw, mac, vlan, 488 vlan_id, port_num); 489 } 490 491 static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, 492 unsigned int index, const union ib_gid *gid, 493 const struct ib_gid_attr *attr, 494 __always_unused void **context) 495 { 496 return set_roce_addr(to_mdev(device), port_num, index, gid, attr); 497 } 498 499 static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, 500 unsigned int index, __always_unused void **context) 501 { 502 return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL); 503 } 504 505 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 506 int index) 507 { 508 struct ib_gid_attr attr; 509 union ib_gid gid; 510 511 if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) 512 return 0; 513 514 if (!attr.ndev) 515 return 0; 516 517 dev_put(attr.ndev); 518 519 if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) 520 return 0; 521 522 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); 523 } 524 525 int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, 526 int index, enum ib_gid_type *gid_type) 527 { 528 struct ib_gid_attr attr; 529 union ib_gid gid; 530 int ret; 531 532 ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr); 533 if (ret) 534 return ret; 535 536 if (!attr.ndev) 537 return -ENODEV; 538 539 dev_put(attr.ndev); 540 541 *gid_type = attr.gid_type; 542 543 return 0; 544 } 545 546 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 547 { 548 if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) 549 return !MLX5_CAP_GEN(dev->mdev, ib_virt); 550 return 0; 551 } 552 553 enum { 554 MLX5_VPORT_ACCESS_METHOD_MAD, 555 MLX5_VPORT_ACCESS_METHOD_HCA, 556 MLX5_VPORT_ACCESS_METHOD_NIC, 557 }; 558 559 static int mlx5_get_vport_access_method(struct ib_device *ibdev) 560 { 561 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 562 return MLX5_VPORT_ACCESS_METHOD_MAD; 563 564 if (mlx5_ib_port_link_layer(ibdev, 1) == 565 IB_LINK_LAYER_ETHERNET) 566 return MLX5_VPORT_ACCESS_METHOD_NIC; 567 568 return MLX5_VPORT_ACCESS_METHOD_HCA; 569 } 570 571 static void get_atomic_caps(struct mlx5_ib_dev *dev, 572 u8 atomic_size_qp, 573 struct ib_device_attr *props) 574 { 575 u8 tmp; 576 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 577 u8 atomic_req_8B_endianness_mode = 578 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode); 579 580 /* Check if HW supports 8 bytes standard atomic operations and capable 581 * of host endianness respond 582 */ 583 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; 584 if (((atomic_operations & tmp) == tmp) && 585 (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && 586 (atomic_req_8B_endianness_mode)) { 587 props->atomic_cap = IB_ATOMIC_HCA; 588 } else { 589 props->atomic_cap = IB_ATOMIC_NONE; 590 } 591 } 592 593 static void get_atomic_caps_qp(struct mlx5_ib_dev *dev, 594 struct ib_device_attr *props) 595 { 596 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 597 598 get_atomic_caps(dev, atomic_size_qp, props); 599 } 600 601 static void get_atomic_caps_dc(struct mlx5_ib_dev *dev, 602 struct ib_device_attr *props) 603 { 604 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); 605 606 get_atomic_caps(dev, atomic_size_qp, props); 607 } 608 609 bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev) 610 { 611 struct ib_device_attr props = {}; 612 613 get_atomic_caps_dc(dev, &props); 614 return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false; 615 } 616 static int mlx5_query_system_image_guid(struct ib_device *ibdev, 617 __be64 *sys_image_guid) 618 { 619 struct mlx5_ib_dev *dev = to_mdev(ibdev); 620 struct mlx5_core_dev *mdev = dev->mdev; 621 u64 tmp; 622 int err; 623 624 switch (mlx5_get_vport_access_method(ibdev)) { 625 case MLX5_VPORT_ACCESS_METHOD_MAD: 626 return mlx5_query_mad_ifc_system_image_guid(ibdev, 627 sys_image_guid); 628 629 case MLX5_VPORT_ACCESS_METHOD_HCA: 630 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 631 break; 632 633 case MLX5_VPORT_ACCESS_METHOD_NIC: 634 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); 635 break; 636 637 default: 638 return -EINVAL; 639 } 640 641 if (!err) 642 *sys_image_guid = cpu_to_be64(tmp); 643 644 return err; 645 646 } 647 648 static int mlx5_query_max_pkeys(struct ib_device *ibdev, 649 u16 *max_pkeys) 650 { 651 struct mlx5_ib_dev *dev = to_mdev(ibdev); 652 struct mlx5_core_dev *mdev = dev->mdev; 653 654 switch (mlx5_get_vport_access_method(ibdev)) { 655 case MLX5_VPORT_ACCESS_METHOD_MAD: 656 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys); 657 658 case MLX5_VPORT_ACCESS_METHOD_HCA: 659 case MLX5_VPORT_ACCESS_METHOD_NIC: 660 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, 661 pkey_table_size)); 662 return 0; 663 664 default: 665 return -EINVAL; 666 } 667 } 668 669 static int mlx5_query_vendor_id(struct ib_device *ibdev, 670 u32 *vendor_id) 671 { 672 struct mlx5_ib_dev *dev = to_mdev(ibdev); 673 674 switch (mlx5_get_vport_access_method(ibdev)) { 675 case MLX5_VPORT_ACCESS_METHOD_MAD: 676 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id); 677 678 case MLX5_VPORT_ACCESS_METHOD_HCA: 679 case MLX5_VPORT_ACCESS_METHOD_NIC: 680 return mlx5_core_query_vendor_id(dev->mdev, vendor_id); 681 682 default: 683 return -EINVAL; 684 } 685 } 686 687 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, 688 __be64 *node_guid) 689 { 690 u64 tmp; 691 int err; 692 693 switch (mlx5_get_vport_access_method(&dev->ib_dev)) { 694 case MLX5_VPORT_ACCESS_METHOD_MAD: 695 return mlx5_query_mad_ifc_node_guid(dev, node_guid); 696 697 case MLX5_VPORT_ACCESS_METHOD_HCA: 698 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 699 break; 700 701 case MLX5_VPORT_ACCESS_METHOD_NIC: 702 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); 703 break; 704 705 default: 706 return -EINVAL; 707 } 708 709 if (!err) 710 *node_guid = cpu_to_be64(tmp); 711 712 return err; 713 } 714 715 struct mlx5_reg_node_desc { 716 u8 desc[IB_DEVICE_NODE_DESC_MAX]; 717 }; 718 719 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) 720 { 721 struct mlx5_reg_node_desc in; 722 723 if (mlx5_use_mad_ifc(dev)) 724 return mlx5_query_mad_ifc_node_desc(dev, node_desc); 725 726 memset(&in, 0, sizeof(in)); 727 728 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, 729 sizeof(struct mlx5_reg_node_desc), 730 MLX5_REG_NODE_DESC, 0, 0); 731 } 732 733 static int mlx5_ib_query_device(struct ib_device *ibdev, 734 struct ib_device_attr *props, 735 struct ib_udata *uhw) 736 { 737 struct mlx5_ib_dev *dev = to_mdev(ibdev); 738 struct mlx5_core_dev *mdev = dev->mdev; 739 int err = -ENOMEM; 740 int max_sq_desc; 741 int max_rq_sg; 742 int max_sq_sg; 743 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); 744 bool raw_support = !mlx5_core_mp_enabled(mdev); 745 struct mlx5_ib_query_device_resp resp = {}; 746 size_t resp_len; 747 u64 max_tso; 748 749 resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); 750 if (uhw->outlen && uhw->outlen < resp_len) 751 return -EINVAL; 752 else 753 resp.response_length = resp_len; 754 755 if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) 756 return -EINVAL; 757 758 memset(props, 0, sizeof(*props)); 759 err = mlx5_query_system_image_guid(ibdev, 760 &props->sys_image_guid); 761 if (err) 762 return err; 763 764 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); 765 if (err) 766 return err; 767 768 err = mlx5_query_vendor_id(ibdev, &props->vendor_id); 769 if (err) 770 return err; 771 772 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | 773 (fw_rev_min(dev->mdev) << 16) | 774 fw_rev_sub(dev->mdev); 775 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 776 IB_DEVICE_PORT_ACTIVE_EVENT | 777 IB_DEVICE_SYS_IMAGE_GUID | 778 IB_DEVICE_RC_RNR_NAK_GEN; 779 780 if (MLX5_CAP_GEN(mdev, pkv)) 781 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 782 if (MLX5_CAP_GEN(mdev, qkv)) 783 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 784 if (MLX5_CAP_GEN(mdev, apm)) 785 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 786 if (MLX5_CAP_GEN(mdev, xrc)) 787 props->device_cap_flags |= IB_DEVICE_XRC; 788 if (MLX5_CAP_GEN(mdev, imaicl)) { 789 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | 790 IB_DEVICE_MEM_WINDOW_TYPE_2B; 791 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 792 /* We support 'Gappy' memory registration too */ 793 props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; 794 } 795 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 796 if (MLX5_CAP_GEN(mdev, sho)) { 797 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; 798 /* At this stage no support for signature handover */ 799 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | 800 IB_PROT_T10DIF_TYPE_2 | 801 IB_PROT_T10DIF_TYPE_3; 802 props->sig_guard_cap = IB_GUARD_T10DIF_CRC | 803 IB_GUARD_T10DIF_CSUM; 804 } 805 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 806 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 807 808 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) { 809 if (MLX5_CAP_ETH(mdev, csum_cap)) { 810 /* Legacy bit to support old userspace libraries */ 811 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 812 props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM; 813 } 814 815 if (MLX5_CAP_ETH(dev->mdev, vlan_cap)) 816 props->raw_packet_caps |= 817 IB_RAW_PACKET_CAP_CVLAN_STRIPPING; 818 819 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { 820 max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); 821 if (max_tso) { 822 resp.tso_caps.max_tso = 1 << max_tso; 823 resp.tso_caps.supported_qpts |= 824 1 << IB_QPT_RAW_PACKET; 825 resp.response_length += sizeof(resp.tso_caps); 826 } 827 } 828 829 if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { 830 resp.rss_caps.rx_hash_function = 831 MLX5_RX_HASH_FUNC_TOEPLITZ; 832 resp.rss_caps.rx_hash_fields_mask = 833 MLX5_RX_HASH_SRC_IPV4 | 834 MLX5_RX_HASH_DST_IPV4 | 835 MLX5_RX_HASH_SRC_IPV6 | 836 MLX5_RX_HASH_DST_IPV6 | 837 MLX5_RX_HASH_SRC_PORT_TCP | 838 MLX5_RX_HASH_DST_PORT_TCP | 839 MLX5_RX_HASH_SRC_PORT_UDP | 840 MLX5_RX_HASH_DST_PORT_UDP | 841 MLX5_RX_HASH_INNER; 842 resp.response_length += sizeof(resp.rss_caps); 843 } 844 } else { 845 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) 846 resp.response_length += sizeof(resp.tso_caps); 847 if (field_avail(typeof(resp), rss_caps, uhw->outlen)) 848 resp.response_length += sizeof(resp.rss_caps); 849 } 850 851 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 852 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 853 props->device_cap_flags |= IB_DEVICE_UD_TSO; 854 } 855 856 if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && 857 MLX5_CAP_GEN(dev->mdev, general_notification_event) && 858 raw_support) 859 props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; 860 861 if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && 862 MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap)) 863 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 864 865 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 866 MLX5_CAP_ETH(dev->mdev, scatter_fcs) && 867 raw_support) { 868 /* Legacy bit to support old userspace libraries */ 869 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; 870 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS; 871 } 872 873 if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) 874 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; 875 876 if (MLX5_CAP_GEN(mdev, end_pad)) 877 props->device_cap_flags |= IB_DEVICE_PCI_WRITE_END_PADDING; 878 879 props->vendor_part_id = mdev->pdev->device; 880 props->hw_ver = mdev->pdev->revision; 881 882 props->max_mr_size = ~0ull; 883 props->page_size_cap = ~(min_page_size - 1); 884 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); 885 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 886 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 887 sizeof(struct mlx5_wqe_data_seg); 888 max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); 889 max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - 890 sizeof(struct mlx5_wqe_raddr_seg)) / 891 sizeof(struct mlx5_wqe_data_seg); 892 props->max_sge = min(max_rq_sg, max_sq_sg); 893 props->max_sge_rd = MLX5_MAX_SGE_RD; 894 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 895 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 896 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 897 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 898 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 899 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); 900 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); 901 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; 902 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 903 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 904 props->max_srq_sge = max_rq_sg - 1; 905 props->max_fast_reg_page_list_len = 906 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); 907 get_atomic_caps_qp(dev, props); 908 props->masked_atomic_cap = IB_ATOMIC_NONE; 909 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 910 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 911 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 912 props->max_mcast_grp; 913 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 914 props->max_ah = INT_MAX; 915 props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); 916 props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; 917 918 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 919 if (MLX5_CAP_GEN(mdev, pg)) 920 props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; 921 props->odp_caps = dev->odp_caps; 922 #endif 923 924 if (MLX5_CAP_GEN(mdev, cd)) 925 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; 926 927 if (!mlx5_core_is_pf(mdev)) 928 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; 929 930 if (mlx5_ib_port_link_layer(ibdev, 1) == 931 IB_LINK_LAYER_ETHERNET && raw_support) { 932 props->rss_caps.max_rwq_indirection_tables = 933 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); 934 props->rss_caps.max_rwq_indirection_table_size = 935 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size); 936 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; 937 props->max_wq_type_rq = 938 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); 939 } 940 941 if (MLX5_CAP_GEN(mdev, tag_matching)) { 942 props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; 943 props->tm_caps.max_num_tags = 944 (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; 945 props->tm_caps.flags = IB_TM_CAP_RC; 946 props->tm_caps.max_ops = 947 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 948 props->tm_caps.max_sge = MLX5_TM_MAX_SGE; 949 } 950 951 if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) { 952 props->cq_caps.max_cq_moderation_count = 953 MLX5_MAX_CQ_COUNT; 954 props->cq_caps.max_cq_moderation_period = 955 MLX5_MAX_CQ_PERIOD; 956 } 957 958 if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { 959 resp.cqe_comp_caps.max_num = 960 MLX5_CAP_GEN(dev->mdev, cqe_compression) ? 961 MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; 962 resp.cqe_comp_caps.supported_format = 963 MLX5_IB_CQE_RES_FORMAT_HASH | 964 MLX5_IB_CQE_RES_FORMAT_CSUM; 965 resp.response_length += sizeof(resp.cqe_comp_caps); 966 } 967 968 if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && 969 raw_support) { 970 if (MLX5_CAP_QOS(mdev, packet_pacing) && 971 MLX5_CAP_GEN(mdev, qos)) { 972 resp.packet_pacing_caps.qp_rate_limit_max = 973 MLX5_CAP_QOS(mdev, packet_pacing_max_rate); 974 resp.packet_pacing_caps.qp_rate_limit_min = 975 MLX5_CAP_QOS(mdev, packet_pacing_min_rate); 976 resp.packet_pacing_caps.supported_qpts |= 977 1 << IB_QPT_RAW_PACKET; 978 } 979 resp.response_length += sizeof(resp.packet_pacing_caps); 980 } 981 982 if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, 983 uhw->outlen)) { 984 if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) 985 resp.mlx5_ib_support_multi_pkt_send_wqes = 986 MLX5_IB_ALLOW_MPW; 987 988 if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) 989 resp.mlx5_ib_support_multi_pkt_send_wqes |= 990 MLX5_IB_SUPPORT_EMPW; 991 992 resp.response_length += 993 sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); 994 } 995 996 if (field_avail(typeof(resp), flags, uhw->outlen)) { 997 resp.response_length += sizeof(resp.flags); 998 999 if (MLX5_CAP_GEN(mdev, cqe_compression_128)) 1000 resp.flags |= 1001 MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; 1002 1003 if (MLX5_CAP_GEN(mdev, cqe_128_always)) 1004 resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; 1005 } 1006 1007 if (field_avail(typeof(resp), sw_parsing_caps, 1008 uhw->outlen)) { 1009 resp.response_length += sizeof(resp.sw_parsing_caps); 1010 if (MLX5_CAP_ETH(mdev, swp)) { 1011 resp.sw_parsing_caps.sw_parsing_offloads |= 1012 MLX5_IB_SW_PARSING; 1013 1014 if (MLX5_CAP_ETH(mdev, swp_csum)) 1015 resp.sw_parsing_caps.sw_parsing_offloads |= 1016 MLX5_IB_SW_PARSING_CSUM; 1017 1018 if (MLX5_CAP_ETH(mdev, swp_lso)) 1019 resp.sw_parsing_caps.sw_parsing_offloads |= 1020 MLX5_IB_SW_PARSING_LSO; 1021 1022 if (resp.sw_parsing_caps.sw_parsing_offloads) 1023 resp.sw_parsing_caps.supported_qpts = 1024 BIT(IB_QPT_RAW_PACKET); 1025 } 1026 } 1027 1028 if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) && 1029 raw_support) { 1030 resp.response_length += sizeof(resp.striding_rq_caps); 1031 if (MLX5_CAP_GEN(mdev, striding_rq)) { 1032 resp.striding_rq_caps.min_single_stride_log_num_of_bytes = 1033 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; 1034 resp.striding_rq_caps.max_single_stride_log_num_of_bytes = 1035 MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; 1036 resp.striding_rq_caps.min_single_wqe_log_num_of_strides = 1037 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; 1038 resp.striding_rq_caps.max_single_wqe_log_num_of_strides = 1039 MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; 1040 resp.striding_rq_caps.supported_qpts = 1041 BIT(IB_QPT_RAW_PACKET); 1042 } 1043 } 1044 1045 if (field_avail(typeof(resp), tunnel_offloads_caps, 1046 uhw->outlen)) { 1047 resp.response_length += sizeof(resp.tunnel_offloads_caps); 1048 if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) 1049 resp.tunnel_offloads_caps |= 1050 MLX5_IB_TUNNELED_OFFLOADS_VXLAN; 1051 if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx)) 1052 resp.tunnel_offloads_caps |= 1053 MLX5_IB_TUNNELED_OFFLOADS_GENEVE; 1054 if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) 1055 resp.tunnel_offloads_caps |= 1056 MLX5_IB_TUNNELED_OFFLOADS_GRE; 1057 } 1058 1059 if (uhw->outlen) { 1060 err = ib_copy_to_udata(uhw, &resp, resp.response_length); 1061 1062 if (err) 1063 return err; 1064 } 1065 1066 return 0; 1067 } 1068 1069 enum mlx5_ib_width { 1070 MLX5_IB_WIDTH_1X = 1 << 0, 1071 MLX5_IB_WIDTH_2X = 1 << 1, 1072 MLX5_IB_WIDTH_4X = 1 << 2, 1073 MLX5_IB_WIDTH_8X = 1 << 3, 1074 MLX5_IB_WIDTH_12X = 1 << 4 1075 }; 1076 1077 static int translate_active_width(struct ib_device *ibdev, u8 active_width, 1078 u8 *ib_width) 1079 { 1080 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1081 int err = 0; 1082 1083 if (active_width & MLX5_IB_WIDTH_1X) { 1084 *ib_width = IB_WIDTH_1X; 1085 } else if (active_width & MLX5_IB_WIDTH_2X) { 1086 mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n", 1087 (int)active_width); 1088 err = -EINVAL; 1089 } else if (active_width & MLX5_IB_WIDTH_4X) { 1090 *ib_width = IB_WIDTH_4X; 1091 } else if (active_width & MLX5_IB_WIDTH_8X) { 1092 *ib_width = IB_WIDTH_8X; 1093 } else if (active_width & MLX5_IB_WIDTH_12X) { 1094 *ib_width = IB_WIDTH_12X; 1095 } else { 1096 mlx5_ib_dbg(dev, "Invalid active_width %d\n", 1097 (int)active_width); 1098 err = -EINVAL; 1099 } 1100 1101 return err; 1102 } 1103 1104 static int mlx5_mtu_to_ib_mtu(int mtu) 1105 { 1106 switch (mtu) { 1107 case 256: return 1; 1108 case 512: return 2; 1109 case 1024: return 3; 1110 case 2048: return 4; 1111 case 4096: return 5; 1112 default: 1113 pr_warn("invalid mtu\n"); 1114 return -1; 1115 } 1116 } 1117 1118 enum ib_max_vl_num { 1119 __IB_MAX_VL_0 = 1, 1120 __IB_MAX_VL_0_1 = 2, 1121 __IB_MAX_VL_0_3 = 3, 1122 __IB_MAX_VL_0_7 = 4, 1123 __IB_MAX_VL_0_14 = 5, 1124 }; 1125 1126 enum mlx5_vl_hw_cap { 1127 MLX5_VL_HW_0 = 1, 1128 MLX5_VL_HW_0_1 = 2, 1129 MLX5_VL_HW_0_2 = 3, 1130 MLX5_VL_HW_0_3 = 4, 1131 MLX5_VL_HW_0_4 = 5, 1132 MLX5_VL_HW_0_5 = 6, 1133 MLX5_VL_HW_0_6 = 7, 1134 MLX5_VL_HW_0_7 = 8, 1135 MLX5_VL_HW_0_14 = 15 1136 }; 1137 1138 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, 1139 u8 *max_vl_num) 1140 { 1141 switch (vl_hw_cap) { 1142 case MLX5_VL_HW_0: 1143 *max_vl_num = __IB_MAX_VL_0; 1144 break; 1145 case MLX5_VL_HW_0_1: 1146 *max_vl_num = __IB_MAX_VL_0_1; 1147 break; 1148 case MLX5_VL_HW_0_3: 1149 *max_vl_num = __IB_MAX_VL_0_3; 1150 break; 1151 case MLX5_VL_HW_0_7: 1152 *max_vl_num = __IB_MAX_VL_0_7; 1153 break; 1154 case MLX5_VL_HW_0_14: 1155 *max_vl_num = __IB_MAX_VL_0_14; 1156 break; 1157 1158 default: 1159 return -EINVAL; 1160 } 1161 1162 return 0; 1163 } 1164 1165 static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, 1166 struct ib_port_attr *props) 1167 { 1168 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1169 struct mlx5_core_dev *mdev = dev->mdev; 1170 struct mlx5_hca_vport_context *rep; 1171 u16 max_mtu; 1172 u16 oper_mtu; 1173 int err; 1174 u8 ib_link_width_oper; 1175 u8 vl_hw_cap; 1176 1177 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 1178 if (!rep) { 1179 err = -ENOMEM; 1180 goto out; 1181 } 1182 1183 /* props being zeroed by the caller, avoid zeroing it here */ 1184 1185 err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); 1186 if (err) 1187 goto out; 1188 1189 props->lid = rep->lid; 1190 props->lmc = rep->lmc; 1191 props->sm_lid = rep->sm_lid; 1192 props->sm_sl = rep->sm_sl; 1193 props->state = rep->vport_state; 1194 props->phys_state = rep->port_physical_state; 1195 props->port_cap_flags = rep->cap_mask1; 1196 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); 1197 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); 1198 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); 1199 props->bad_pkey_cntr = rep->pkey_violation_counter; 1200 props->qkey_viol_cntr = rep->qkey_violation_counter; 1201 props->subnet_timeout = rep->subnet_timeout; 1202 props->init_type_reply = rep->init_type_reply; 1203 props->grh_required = rep->grh_required; 1204 1205 err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); 1206 if (err) 1207 goto out; 1208 1209 err = translate_active_width(ibdev, ib_link_width_oper, 1210 &props->active_width); 1211 if (err) 1212 goto out; 1213 err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); 1214 if (err) 1215 goto out; 1216 1217 mlx5_query_port_max_mtu(mdev, &max_mtu, port); 1218 1219 props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); 1220 1221 mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); 1222 1223 props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); 1224 1225 err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port); 1226 if (err) 1227 goto out; 1228 1229 err = translate_max_vl_num(ibdev, vl_hw_cap, 1230 &props->max_vl_num); 1231 out: 1232 kfree(rep); 1233 return err; 1234 } 1235 1236 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, 1237 struct ib_port_attr *props) 1238 { 1239 unsigned int count; 1240 int ret; 1241 1242 switch (mlx5_get_vport_access_method(ibdev)) { 1243 case MLX5_VPORT_ACCESS_METHOD_MAD: 1244 ret = mlx5_query_mad_ifc_port(ibdev, port, props); 1245 break; 1246 1247 case MLX5_VPORT_ACCESS_METHOD_HCA: 1248 ret = mlx5_query_hca_port(ibdev, port, props); 1249 break; 1250 1251 case MLX5_VPORT_ACCESS_METHOD_NIC: 1252 ret = mlx5_query_port_roce(ibdev, port, props); 1253 break; 1254 1255 default: 1256 ret = -EINVAL; 1257 } 1258 1259 if (!ret && props) { 1260 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1261 struct mlx5_core_dev *mdev; 1262 bool put_mdev = true; 1263 1264 mdev = mlx5_ib_get_native_port_mdev(dev, port, NULL); 1265 if (!mdev) { 1266 /* If the port isn't affiliated yet query the master. 1267 * The master and slave will have the same values. 1268 */ 1269 mdev = dev->mdev; 1270 port = 1; 1271 put_mdev = false; 1272 } 1273 count = mlx5_core_reserved_gids_count(mdev); 1274 if (put_mdev) 1275 mlx5_ib_put_native_port_mdev(dev, port); 1276 props->gid_tbl_len -= count; 1277 } 1278 return ret; 1279 } 1280 1281 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 1282 union ib_gid *gid) 1283 { 1284 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1285 struct mlx5_core_dev *mdev = dev->mdev; 1286 1287 switch (mlx5_get_vport_access_method(ibdev)) { 1288 case MLX5_VPORT_ACCESS_METHOD_MAD: 1289 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid); 1290 1291 case MLX5_VPORT_ACCESS_METHOD_HCA: 1292 return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid); 1293 1294 default: 1295 return -EINVAL; 1296 } 1297 1298 } 1299 1300 static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port, 1301 u16 index, u16 *pkey) 1302 { 1303 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1304 struct mlx5_core_dev *mdev; 1305 bool put_mdev = true; 1306 u8 mdev_port_num; 1307 int err; 1308 1309 mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num); 1310 if (!mdev) { 1311 /* The port isn't affiliated yet, get the PKey from the master 1312 * port. For RoCE the PKey tables will be the same. 1313 */ 1314 put_mdev = false; 1315 mdev = dev->mdev; 1316 mdev_port_num = 1; 1317 } 1318 1319 err = mlx5_query_hca_vport_pkey(mdev, 0, mdev_port_num, 0, 1320 index, pkey); 1321 if (put_mdev) 1322 mlx5_ib_put_native_port_mdev(dev, port); 1323 1324 return err; 1325 } 1326 1327 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 1328 u16 *pkey) 1329 { 1330 switch (mlx5_get_vport_access_method(ibdev)) { 1331 case MLX5_VPORT_ACCESS_METHOD_MAD: 1332 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); 1333 1334 case MLX5_VPORT_ACCESS_METHOD_HCA: 1335 case MLX5_VPORT_ACCESS_METHOD_NIC: 1336 return mlx5_query_hca_nic_pkey(ibdev, port, index, pkey); 1337 default: 1338 return -EINVAL; 1339 } 1340 } 1341 1342 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, 1343 struct ib_device_modify *props) 1344 { 1345 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1346 struct mlx5_reg_node_desc in; 1347 struct mlx5_reg_node_desc out; 1348 int err; 1349 1350 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 1351 return -EOPNOTSUPP; 1352 1353 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 1354 return 0; 1355 1356 /* 1357 * If possible, pass node desc to FW, so it can generate 1358 * a 144 trap. If cmd fails, just ignore. 1359 */ 1360 memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); 1361 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, 1362 sizeof(out), MLX5_REG_NODE_DESC, 0, 1); 1363 if (err) 1364 return err; 1365 1366 memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); 1367 1368 return err; 1369 } 1370 1371 static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask, 1372 u32 value) 1373 { 1374 struct mlx5_hca_vport_context ctx = {}; 1375 struct mlx5_core_dev *mdev; 1376 u8 mdev_port_num; 1377 int err; 1378 1379 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); 1380 if (!mdev) 1381 return -ENODEV; 1382 1383 err = mlx5_query_hca_vport_context(mdev, 0, mdev_port_num, 0, &ctx); 1384 if (err) 1385 goto out; 1386 1387 if (~ctx.cap_mask1_perm & mask) { 1388 mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n", 1389 mask, ctx.cap_mask1_perm); 1390 err = -EINVAL; 1391 goto out; 1392 } 1393 1394 ctx.cap_mask1 = value; 1395 ctx.cap_mask1_perm = mask; 1396 err = mlx5_core_modify_hca_vport_context(mdev, 0, mdev_port_num, 1397 0, &ctx); 1398 1399 out: 1400 mlx5_ib_put_native_port_mdev(dev, port_num); 1401 1402 return err; 1403 } 1404 1405 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 1406 struct ib_port_modify *props) 1407 { 1408 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1409 struct ib_port_attr attr; 1410 u32 tmp; 1411 int err; 1412 u32 change_mask; 1413 u32 value; 1414 bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) == 1415 IB_LINK_LAYER_INFINIBAND); 1416 1417 /* CM layer calls ib_modify_port() regardless of the link layer. For 1418 * Ethernet ports, qkey violation and Port capabilities are meaningless. 1419 */ 1420 if (!is_ib) 1421 return 0; 1422 1423 if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) { 1424 change_mask = props->clr_port_cap_mask | props->set_port_cap_mask; 1425 value = ~props->clr_port_cap_mask | props->set_port_cap_mask; 1426 return set_port_caps_atomic(dev, port, change_mask, value); 1427 } 1428 1429 mutex_lock(&dev->cap_mask_mutex); 1430 1431 err = ib_query_port(ibdev, port, &attr); 1432 if (err) 1433 goto out; 1434 1435 tmp = (attr.port_cap_flags | props->set_port_cap_mask) & 1436 ~props->clr_port_cap_mask; 1437 1438 err = mlx5_set_port_caps(dev->mdev, port, tmp); 1439 1440 out: 1441 mutex_unlock(&dev->cap_mask_mutex); 1442 return err; 1443 } 1444 1445 static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps) 1446 { 1447 mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n", 1448 caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n"); 1449 } 1450 1451 static u16 calc_dynamic_bfregs(int uars_per_sys_page) 1452 { 1453 /* Large page with non 4k uar support might limit the dynamic size */ 1454 if (uars_per_sys_page == 1 && PAGE_SIZE > 4096) 1455 return MLX5_MIN_DYN_BFREGS; 1456 1457 return MLX5_MAX_DYN_BFREGS; 1458 } 1459 1460 static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, 1461 struct mlx5_ib_alloc_ucontext_req_v2 *req, 1462 struct mlx5_bfreg_info *bfregi) 1463 { 1464 int uars_per_sys_page; 1465 int bfregs_per_sys_page; 1466 int ref_bfregs = req->total_num_bfregs; 1467 1468 if (req->total_num_bfregs == 0) 1469 return -EINVAL; 1470 1471 BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE); 1472 BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE); 1473 1474 if (req->total_num_bfregs > MLX5_MAX_BFREGS) 1475 return -ENOMEM; 1476 1477 uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k); 1478 bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR; 1479 /* This holds the required static allocation asked by the user */ 1480 req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page); 1481 if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) 1482 return -EINVAL; 1483 1484 bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page; 1485 bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page); 1486 bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs; 1487 bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page; 1488 1489 mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n", 1490 MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", 1491 lib_uar_4k ? "yes" : "no", ref_bfregs, 1492 req->total_num_bfregs, bfregi->total_num_bfregs, 1493 bfregi->num_sys_pages); 1494 1495 return 0; 1496 } 1497 1498 static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 1499 { 1500 struct mlx5_bfreg_info *bfregi; 1501 int err; 1502 int i; 1503 1504 bfregi = &context->bfregi; 1505 for (i = 0; i < bfregi->num_static_sys_pages; i++) { 1506 err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]); 1507 if (err) 1508 goto error; 1509 1510 mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]); 1511 } 1512 1513 for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++) 1514 bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX; 1515 1516 return 0; 1517 1518 error: 1519 for (--i; i >= 0; i--) 1520 if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i])) 1521 mlx5_ib_warn(dev, "failed to free uar %d\n", i); 1522 1523 return err; 1524 } 1525 1526 static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 1527 { 1528 struct mlx5_bfreg_info *bfregi; 1529 int err; 1530 int i; 1531 1532 bfregi = &context->bfregi; 1533 for (i = 0; i < bfregi->num_sys_pages; i++) { 1534 if (i < bfregi->num_static_sys_pages || 1535 bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) { 1536 err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); 1537 if (err) { 1538 mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err); 1539 return err; 1540 } 1541 } 1542 } 1543 1544 return 0; 1545 } 1546 1547 static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) 1548 { 1549 int err; 1550 1551 err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); 1552 if (err) 1553 return err; 1554 1555 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || 1556 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && 1557 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1558 return err; 1559 1560 mutex_lock(&dev->lb_mutex); 1561 dev->user_td++; 1562 1563 if (dev->user_td == 2) 1564 err = mlx5_nic_vport_update_local_lb(dev->mdev, true); 1565 1566 mutex_unlock(&dev->lb_mutex); 1567 return err; 1568 } 1569 1570 static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) 1571 { 1572 mlx5_core_dealloc_transport_domain(dev->mdev, tdn); 1573 1574 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || 1575 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && 1576 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1577 return; 1578 1579 mutex_lock(&dev->lb_mutex); 1580 dev->user_td--; 1581 1582 if (dev->user_td < 2) 1583 mlx5_nic_vport_update_local_lb(dev->mdev, false); 1584 1585 mutex_unlock(&dev->lb_mutex); 1586 } 1587 1588 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 1589 struct ib_udata *udata) 1590 { 1591 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1592 struct mlx5_ib_alloc_ucontext_req_v2 req = {}; 1593 struct mlx5_ib_alloc_ucontext_resp resp = {}; 1594 struct mlx5_core_dev *mdev = dev->mdev; 1595 struct mlx5_ib_ucontext *context; 1596 struct mlx5_bfreg_info *bfregi; 1597 int ver; 1598 int err; 1599 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, 1600 max_cqe_version); 1601 bool lib_uar_4k; 1602 1603 if (!dev->ib_active) 1604 return ERR_PTR(-EAGAIN); 1605 1606 if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 1607 ver = 0; 1608 else if (udata->inlen >= min_req_v2) 1609 ver = 2; 1610 else 1611 return ERR_PTR(-EINVAL); 1612 1613 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1614 if (err) 1615 return ERR_PTR(err); 1616 1617 if (req.flags) 1618 return ERR_PTR(-EINVAL); 1619 1620 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) 1621 return ERR_PTR(-EOPNOTSUPP); 1622 1623 req.total_num_bfregs = ALIGN(req.total_num_bfregs, 1624 MLX5_NON_FP_BFREGS_PER_UAR); 1625 if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) 1626 return ERR_PTR(-EINVAL); 1627 1628 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 1629 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) 1630 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 1631 resp.cache_line_size = cache_line_size(); 1632 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 1633 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 1634 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1635 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1636 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 1637 resp.cqe_version = min_t(__u8, 1638 (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), 1639 req.max_cqe_version); 1640 resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1641 MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; 1642 resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1643 MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; 1644 resp.response_length = min(offsetof(typeof(resp), response_length) + 1645 sizeof(resp.response_length), udata->outlen); 1646 1647 context = kzalloc(sizeof(*context), GFP_KERNEL); 1648 if (!context) 1649 return ERR_PTR(-ENOMEM); 1650 1651 lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; 1652 bfregi = &context->bfregi; 1653 1654 /* updates req->total_num_bfregs */ 1655 err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); 1656 if (err) 1657 goto out_ctx; 1658 1659 mutex_init(&bfregi->lock); 1660 bfregi->lib_uar_4k = lib_uar_4k; 1661 bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count), 1662 GFP_KERNEL); 1663 if (!bfregi->count) { 1664 err = -ENOMEM; 1665 goto out_ctx; 1666 } 1667 1668 bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, 1669 sizeof(*bfregi->sys_pages), 1670 GFP_KERNEL); 1671 if (!bfregi->sys_pages) { 1672 err = -ENOMEM; 1673 goto out_count; 1674 } 1675 1676 err = allocate_uars(dev, context); 1677 if (err) 1678 goto out_sys_pages; 1679 1680 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1681 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; 1682 #endif 1683 1684 context->upd_xlt_page = __get_free_page(GFP_KERNEL); 1685 if (!context->upd_xlt_page) { 1686 err = -ENOMEM; 1687 goto out_uars; 1688 } 1689 mutex_init(&context->upd_xlt_page_mutex); 1690 1691 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { 1692 err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); 1693 if (err) 1694 goto out_page; 1695 } 1696 1697 INIT_LIST_HEAD(&context->vma_private_list); 1698 mutex_init(&context->vma_private_list_mutex); 1699 INIT_LIST_HEAD(&context->db_page_list); 1700 mutex_init(&context->db_page_mutex); 1701 1702 resp.tot_bfregs = req.total_num_bfregs; 1703 resp.num_ports = dev->num_ports; 1704 1705 if (field_avail(typeof(resp), cqe_version, udata->outlen)) 1706 resp.response_length += sizeof(resp.cqe_version); 1707 1708 if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { 1709 resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | 1710 MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; 1711 resp.response_length += sizeof(resp.cmds_supp_uhw); 1712 } 1713 1714 if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) { 1715 if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { 1716 mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); 1717 resp.eth_min_inline++; 1718 } 1719 resp.response_length += sizeof(resp.eth_min_inline); 1720 } 1721 1722 if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) { 1723 if (mdev->clock_info) 1724 resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); 1725 resp.response_length += sizeof(resp.clock_info_versions); 1726 } 1727 1728 /* 1729 * We don't want to expose information from the PCI bar that is located 1730 * after 4096 bytes, so if the arch only supports larger pages, let's 1731 * pretend we don't support reading the HCA's core clock. This is also 1732 * forced by mmap function. 1733 */ 1734 if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { 1735 if (PAGE_SIZE <= 4096) { 1736 resp.comp_mask |= 1737 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; 1738 resp.hca_core_clock_offset = 1739 offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; 1740 } 1741 resp.response_length += sizeof(resp.hca_core_clock_offset); 1742 } 1743 1744 if (field_avail(typeof(resp), log_uar_size, udata->outlen)) 1745 resp.response_length += sizeof(resp.log_uar_size); 1746 1747 if (field_avail(typeof(resp), num_uars_per_page, udata->outlen)) 1748 resp.response_length += sizeof(resp.num_uars_per_page); 1749 1750 if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) { 1751 resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; 1752 resp.response_length += sizeof(resp.num_dyn_bfregs); 1753 } 1754 1755 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1756 if (err) 1757 goto out_td; 1758 1759 bfregi->ver = ver; 1760 bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; 1761 context->cqe_version = resp.cqe_version; 1762 context->lib_caps = req.lib_caps; 1763 print_lib_caps(dev, context->lib_caps); 1764 1765 return &context->ibucontext; 1766 1767 out_td: 1768 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1769 mlx5_ib_dealloc_transport_domain(dev, context->tdn); 1770 1771 out_page: 1772 free_page(context->upd_xlt_page); 1773 1774 out_uars: 1775 deallocate_uars(dev, context); 1776 1777 out_sys_pages: 1778 kfree(bfregi->sys_pages); 1779 1780 out_count: 1781 kfree(bfregi->count); 1782 1783 out_ctx: 1784 kfree(context); 1785 1786 return ERR_PTR(err); 1787 } 1788 1789 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 1790 { 1791 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1792 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1793 struct mlx5_bfreg_info *bfregi; 1794 1795 bfregi = &context->bfregi; 1796 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1797 mlx5_ib_dealloc_transport_domain(dev, context->tdn); 1798 1799 free_page(context->upd_xlt_page); 1800 deallocate_uars(dev, context); 1801 kfree(bfregi->sys_pages); 1802 kfree(bfregi->count); 1803 kfree(context); 1804 1805 return 0; 1806 } 1807 1808 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, 1809 int uar_idx) 1810 { 1811 int fw_uars_per_page; 1812 1813 fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1; 1814 1815 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; 1816 } 1817 1818 static int get_command(unsigned long offset) 1819 { 1820 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; 1821 } 1822 1823 static int get_arg(unsigned long offset) 1824 { 1825 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); 1826 } 1827 1828 static int get_index(unsigned long offset) 1829 { 1830 return get_arg(offset); 1831 } 1832 1833 /* Index resides in an extra byte to enable larger values than 255 */ 1834 static int get_extended_index(unsigned long offset) 1835 { 1836 return get_arg(offset) | ((offset >> 16) & 0xff) << 8; 1837 } 1838 1839 static void mlx5_ib_vma_open(struct vm_area_struct *area) 1840 { 1841 /* vma_open is called when a new VMA is created on top of our VMA. This 1842 * is done through either mremap flow or split_vma (usually due to 1843 * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, 1844 * as this VMA is strongly hardware related. Therefore we set the 1845 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from 1846 * calling us again and trying to do incorrect actions. We assume that 1847 * the original VMA size is exactly a single page, and therefore all 1848 * "splitting" operation will not happen to it. 1849 */ 1850 area->vm_ops = NULL; 1851 } 1852 1853 static void mlx5_ib_vma_close(struct vm_area_struct *area) 1854 { 1855 struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; 1856 1857 /* It's guaranteed that all VMAs opened on a FD are closed before the 1858 * file itself is closed, therefore no sync is needed with the regular 1859 * closing flow. (e.g. mlx5 ib_dealloc_ucontext) 1860 * However need a sync with accessing the vma as part of 1861 * mlx5_ib_disassociate_ucontext. 1862 * The close operation is usually called under mm->mmap_sem except when 1863 * process is exiting. 1864 * The exiting case is handled explicitly as part of 1865 * mlx5_ib_disassociate_ucontext. 1866 */ 1867 mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; 1868 1869 /* setting the vma context pointer to null in the mlx5_ib driver's 1870 * private data, to protect a race condition in 1871 * mlx5_ib_disassociate_ucontext(). 1872 */ 1873 mlx5_ib_vma_priv_data->vma = NULL; 1874 mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex); 1875 list_del(&mlx5_ib_vma_priv_data->list); 1876 mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex); 1877 kfree(mlx5_ib_vma_priv_data); 1878 } 1879 1880 static const struct vm_operations_struct mlx5_ib_vm_ops = { 1881 .open = mlx5_ib_vma_open, 1882 .close = mlx5_ib_vma_close 1883 }; 1884 1885 static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, 1886 struct mlx5_ib_ucontext *ctx) 1887 { 1888 struct mlx5_ib_vma_private_data *vma_prv; 1889 struct list_head *vma_head = &ctx->vma_private_list; 1890 1891 vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); 1892 if (!vma_prv) 1893 return -ENOMEM; 1894 1895 vma_prv->vma = vma; 1896 vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex; 1897 vma->vm_private_data = vma_prv; 1898 vma->vm_ops = &mlx5_ib_vm_ops; 1899 1900 mutex_lock(&ctx->vma_private_list_mutex); 1901 list_add(&vma_prv->list, vma_head); 1902 mutex_unlock(&ctx->vma_private_list_mutex); 1903 1904 return 0; 1905 } 1906 1907 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 1908 { 1909 int ret; 1910 struct vm_area_struct *vma; 1911 struct mlx5_ib_vma_private_data *vma_private, *n; 1912 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1913 struct task_struct *owning_process = NULL; 1914 struct mm_struct *owning_mm = NULL; 1915 1916 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); 1917 if (!owning_process) 1918 return; 1919 1920 owning_mm = get_task_mm(owning_process); 1921 if (!owning_mm) { 1922 pr_info("no mm, disassociate ucontext is pending task termination\n"); 1923 while (1) { 1924 put_task_struct(owning_process); 1925 usleep_range(1000, 2000); 1926 owning_process = get_pid_task(ibcontext->tgid, 1927 PIDTYPE_PID); 1928 if (!owning_process || 1929 owning_process->state == TASK_DEAD) { 1930 pr_info("disassociate ucontext done, task was terminated\n"); 1931 /* in case task was dead need to release the 1932 * task struct. 1933 */ 1934 if (owning_process) 1935 put_task_struct(owning_process); 1936 return; 1937 } 1938 } 1939 } 1940 1941 /* need to protect from a race on closing the vma as part of 1942 * mlx5_ib_vma_close. 1943 */ 1944 down_write(&owning_mm->mmap_sem); 1945 mutex_lock(&context->vma_private_list_mutex); 1946 list_for_each_entry_safe(vma_private, n, &context->vma_private_list, 1947 list) { 1948 vma = vma_private->vma; 1949 ret = zap_vma_ptes(vma, vma->vm_start, 1950 PAGE_SIZE); 1951 WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); 1952 /* context going to be destroyed, should 1953 * not access ops any more. 1954 */ 1955 vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); 1956 vma->vm_ops = NULL; 1957 list_del(&vma_private->list); 1958 kfree(vma_private); 1959 } 1960 mutex_unlock(&context->vma_private_list_mutex); 1961 up_write(&owning_mm->mmap_sem); 1962 mmput(owning_mm); 1963 put_task_struct(owning_process); 1964 } 1965 1966 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) 1967 { 1968 switch (cmd) { 1969 case MLX5_IB_MMAP_WC_PAGE: 1970 return "WC"; 1971 case MLX5_IB_MMAP_REGULAR_PAGE: 1972 return "best effort WC"; 1973 case MLX5_IB_MMAP_NC_PAGE: 1974 return "NC"; 1975 default: 1976 return NULL; 1977 } 1978 } 1979 1980 static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, 1981 struct vm_area_struct *vma, 1982 struct mlx5_ib_ucontext *context) 1983 { 1984 phys_addr_t pfn; 1985 int err; 1986 1987 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1988 return -EINVAL; 1989 1990 if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1) 1991 return -EOPNOTSUPP; 1992 1993 if (vma->vm_flags & VM_WRITE) 1994 return -EPERM; 1995 1996 if (!dev->mdev->clock_info_page) 1997 return -EOPNOTSUPP; 1998 1999 pfn = page_to_pfn(dev->mdev->clock_info_page); 2000 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, 2001 vma->vm_page_prot); 2002 if (err) 2003 return err; 2004 2005 mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n", 2006 vma->vm_start, 2007 (unsigned long long)pfn << PAGE_SHIFT); 2008 2009 return mlx5_ib_set_vma_data(vma, context); 2010 } 2011 2012 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, 2013 struct vm_area_struct *vma, 2014 struct mlx5_ib_ucontext *context) 2015 { 2016 struct mlx5_bfreg_info *bfregi = &context->bfregi; 2017 int err; 2018 unsigned long idx; 2019 phys_addr_t pfn, pa; 2020 pgprot_t prot; 2021 u32 bfreg_dyn_idx = 0; 2022 u32 uar_index; 2023 int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC); 2024 int max_valid_idx = dyn_uar ? bfregi->num_sys_pages : 2025 bfregi->num_static_sys_pages; 2026 2027 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 2028 return -EINVAL; 2029 2030 if (dyn_uar) 2031 idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages; 2032 else 2033 idx = get_index(vma->vm_pgoff); 2034 2035 if (idx >= max_valid_idx) { 2036 mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n", 2037 idx, max_valid_idx); 2038 return -EINVAL; 2039 } 2040 2041 switch (cmd) { 2042 case MLX5_IB_MMAP_WC_PAGE: 2043 case MLX5_IB_MMAP_ALLOC_WC: 2044 /* Some architectures don't support WC memory */ 2045 #if defined(CONFIG_X86) 2046 if (!pat_enabled()) 2047 return -EPERM; 2048 #elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) 2049 return -EPERM; 2050 #endif 2051 /* fall through */ 2052 case MLX5_IB_MMAP_REGULAR_PAGE: 2053 /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ 2054 prot = pgprot_writecombine(vma->vm_page_prot); 2055 break; 2056 case MLX5_IB_MMAP_NC_PAGE: 2057 prot = pgprot_noncached(vma->vm_page_prot); 2058 break; 2059 default: 2060 return -EINVAL; 2061 } 2062 2063 if (dyn_uar) { 2064 int uars_per_page; 2065 2066 uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k); 2067 bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR); 2068 if (bfreg_dyn_idx >= bfregi->total_num_bfregs) { 2069 mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n", 2070 bfreg_dyn_idx, bfregi->total_num_bfregs); 2071 return -EINVAL; 2072 } 2073 2074 mutex_lock(&bfregi->lock); 2075 /* Fail if uar already allocated, first bfreg index of each 2076 * page holds its count. 2077 */ 2078 if (bfregi->count[bfreg_dyn_idx]) { 2079 mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx); 2080 mutex_unlock(&bfregi->lock); 2081 return -EINVAL; 2082 } 2083 2084 bfregi->count[bfreg_dyn_idx]++; 2085 mutex_unlock(&bfregi->lock); 2086 2087 err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index); 2088 if (err) { 2089 mlx5_ib_warn(dev, "UAR alloc failed\n"); 2090 goto free_bfreg; 2091 } 2092 } else { 2093 uar_index = bfregi->sys_pages[idx]; 2094 } 2095 2096 pfn = uar_index2pfn(dev, uar_index); 2097 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); 2098 2099 vma->vm_page_prot = prot; 2100 err = io_remap_pfn_range(vma, vma->vm_start, pfn, 2101 PAGE_SIZE, vma->vm_page_prot); 2102 if (err) { 2103 mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", 2104 err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); 2105 err = -EAGAIN; 2106 goto err; 2107 } 2108 2109 pa = pfn << PAGE_SHIFT; 2110 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), 2111 vma->vm_start, &pa); 2112 2113 err = mlx5_ib_set_vma_data(vma, context); 2114 if (err) 2115 goto err; 2116 2117 if (dyn_uar) 2118 bfregi->sys_pages[idx] = uar_index; 2119 return 0; 2120 2121 err: 2122 if (!dyn_uar) 2123 return err; 2124 2125 mlx5_cmd_free_uar(dev->mdev, idx); 2126 2127 free_bfreg: 2128 mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx); 2129 2130 return err; 2131 } 2132 2133 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 2134 { 2135 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 2136 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 2137 unsigned long command; 2138 phys_addr_t pfn; 2139 2140 command = get_command(vma->vm_pgoff); 2141 switch (command) { 2142 case MLX5_IB_MMAP_WC_PAGE: 2143 case MLX5_IB_MMAP_NC_PAGE: 2144 case MLX5_IB_MMAP_REGULAR_PAGE: 2145 case MLX5_IB_MMAP_ALLOC_WC: 2146 return uar_mmap(dev, command, vma, context); 2147 2148 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 2149 return -ENOSYS; 2150 2151 case MLX5_IB_MMAP_CORE_CLOCK: 2152 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 2153 return -EINVAL; 2154 2155 if (vma->vm_flags & VM_WRITE) 2156 return -EPERM; 2157 2158 /* Don't expose to user-space information it shouldn't have */ 2159 if (PAGE_SIZE > 4096) 2160 return -EOPNOTSUPP; 2161 2162 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 2163 pfn = (dev->mdev->iseg_base + 2164 offsetof(struct mlx5_init_seg, internal_timer_h)) >> 2165 PAGE_SHIFT; 2166 if (io_remap_pfn_range(vma, vma->vm_start, pfn, 2167 PAGE_SIZE, vma->vm_page_prot)) 2168 return -EAGAIN; 2169 2170 mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", 2171 vma->vm_start, 2172 (unsigned long long)pfn << PAGE_SHIFT); 2173 break; 2174 case MLX5_IB_MMAP_CLOCK_INFO: 2175 return mlx5_ib_mmap_clock_info_page(dev, vma, context); 2176 2177 default: 2178 return -EINVAL; 2179 } 2180 2181 return 0; 2182 } 2183 2184 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, 2185 struct ib_ucontext *context, 2186 struct ib_udata *udata) 2187 { 2188 struct mlx5_ib_alloc_pd_resp resp; 2189 struct mlx5_ib_pd *pd; 2190 int err; 2191 2192 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 2193 if (!pd) 2194 return ERR_PTR(-ENOMEM); 2195 2196 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 2197 if (err) { 2198 kfree(pd); 2199 return ERR_PTR(err); 2200 } 2201 2202 if (context) { 2203 resp.pdn = pd->pdn; 2204 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 2205 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 2206 kfree(pd); 2207 return ERR_PTR(-EFAULT); 2208 } 2209 } 2210 2211 return &pd->ibpd; 2212 } 2213 2214 static int mlx5_ib_dealloc_pd(struct ib_pd *pd) 2215 { 2216 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 2217 struct mlx5_ib_pd *mpd = to_mpd(pd); 2218 2219 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 2220 kfree(mpd); 2221 2222 return 0; 2223 } 2224 2225 enum { 2226 MATCH_CRITERIA_ENABLE_OUTER_BIT, 2227 MATCH_CRITERIA_ENABLE_MISC_BIT, 2228 MATCH_CRITERIA_ENABLE_INNER_BIT 2229 }; 2230 2231 #define HEADER_IS_ZERO(match_criteria, headers) \ 2232 !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ 2233 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ 2234 2235 static u8 get_match_criteria_enable(u32 *match_criteria) 2236 { 2237 u8 match_criteria_enable; 2238 2239 match_criteria_enable = 2240 (!HEADER_IS_ZERO(match_criteria, outer_headers)) << 2241 MATCH_CRITERIA_ENABLE_OUTER_BIT; 2242 match_criteria_enable |= 2243 (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << 2244 MATCH_CRITERIA_ENABLE_MISC_BIT; 2245 match_criteria_enable |= 2246 (!HEADER_IS_ZERO(match_criteria, inner_headers)) << 2247 MATCH_CRITERIA_ENABLE_INNER_BIT; 2248 2249 return match_criteria_enable; 2250 } 2251 2252 static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) 2253 { 2254 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); 2255 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); 2256 } 2257 2258 static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, 2259 bool inner) 2260 { 2261 if (inner) { 2262 MLX5_SET(fte_match_set_misc, 2263 misc_c, inner_ipv6_flow_label, mask); 2264 MLX5_SET(fte_match_set_misc, 2265 misc_v, inner_ipv6_flow_label, val); 2266 } else { 2267 MLX5_SET(fte_match_set_misc, 2268 misc_c, outer_ipv6_flow_label, mask); 2269 MLX5_SET(fte_match_set_misc, 2270 misc_v, outer_ipv6_flow_label, val); 2271 } 2272 } 2273 2274 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) 2275 { 2276 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); 2277 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); 2278 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); 2279 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); 2280 } 2281 2282 #define LAST_ETH_FIELD vlan_tag 2283 #define LAST_IB_FIELD sl 2284 #define LAST_IPV4_FIELD tos 2285 #define LAST_IPV6_FIELD traffic_class 2286 #define LAST_TCP_UDP_FIELD src_port 2287 #define LAST_TUNNEL_FIELD tunnel_id 2288 #define LAST_FLOW_TAG_FIELD tag_id 2289 #define LAST_DROP_FIELD size 2290 2291 /* Field is the last supported field */ 2292 #define FIELDS_NOT_SUPPORTED(filter, field)\ 2293 memchr_inv((void *)&filter.field +\ 2294 sizeof(filter.field), 0,\ 2295 sizeof(filter) -\ 2296 offsetof(typeof(filter), field) -\ 2297 sizeof(filter.field)) 2298 2299 #define IPV4_VERSION 4 2300 #define IPV6_VERSION 6 2301 static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, 2302 u32 *match_v, const union ib_flow_spec *ib_spec, 2303 u32 *tag_id, bool *is_drop) 2304 { 2305 void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, 2306 misc_parameters); 2307 void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, 2308 misc_parameters); 2309 void *headers_c; 2310 void *headers_v; 2311 int match_ipv; 2312 2313 if (ib_spec->type & IB_FLOW_SPEC_INNER) { 2314 headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 2315 inner_headers); 2316 headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 2317 inner_headers); 2318 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2319 ft_field_support.inner_ip_version); 2320 } else { 2321 headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 2322 outer_headers); 2323 headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 2324 outer_headers); 2325 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2326 ft_field_support.outer_ip_version); 2327 } 2328 2329 switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { 2330 case IB_FLOW_SPEC_ETH: 2331 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) 2332 return -EOPNOTSUPP; 2333 2334 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2335 dmac_47_16), 2336 ib_spec->eth.mask.dst_mac); 2337 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2338 dmac_47_16), 2339 ib_spec->eth.val.dst_mac); 2340 2341 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2342 smac_47_16), 2343 ib_spec->eth.mask.src_mac); 2344 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2345 smac_47_16), 2346 ib_spec->eth.val.src_mac); 2347 2348 if (ib_spec->eth.mask.vlan_tag) { 2349 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2350 cvlan_tag, 1); 2351 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2352 cvlan_tag, 1); 2353 2354 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2355 first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); 2356 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2357 first_vid, ntohs(ib_spec->eth.val.vlan_tag)); 2358 2359 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2360 first_cfi, 2361 ntohs(ib_spec->eth.mask.vlan_tag) >> 12); 2362 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2363 first_cfi, 2364 ntohs(ib_spec->eth.val.vlan_tag) >> 12); 2365 2366 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2367 first_prio, 2368 ntohs(ib_spec->eth.mask.vlan_tag) >> 13); 2369 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2370 first_prio, 2371 ntohs(ib_spec->eth.val.vlan_tag) >> 13); 2372 } 2373 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2374 ethertype, ntohs(ib_spec->eth.mask.ether_type)); 2375 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2376 ethertype, ntohs(ib_spec->eth.val.ether_type)); 2377 break; 2378 case IB_FLOW_SPEC_IPV4: 2379 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) 2380 return -EOPNOTSUPP; 2381 2382 if (match_ipv) { 2383 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2384 ip_version, 0xf); 2385 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2386 ip_version, IPV4_VERSION); 2387 } else { 2388 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2389 ethertype, 0xffff); 2390 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2391 ethertype, ETH_P_IP); 2392 } 2393 2394 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2395 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2396 &ib_spec->ipv4.mask.src_ip, 2397 sizeof(ib_spec->ipv4.mask.src_ip)); 2398 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2399 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2400 &ib_spec->ipv4.val.src_ip, 2401 sizeof(ib_spec->ipv4.val.src_ip)); 2402 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2403 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2404 &ib_spec->ipv4.mask.dst_ip, 2405 sizeof(ib_spec->ipv4.mask.dst_ip)); 2406 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2407 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2408 &ib_spec->ipv4.val.dst_ip, 2409 sizeof(ib_spec->ipv4.val.dst_ip)); 2410 2411 set_tos(headers_c, headers_v, 2412 ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); 2413 2414 set_proto(headers_c, headers_v, 2415 ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); 2416 break; 2417 case IB_FLOW_SPEC_IPV6: 2418 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) 2419 return -EOPNOTSUPP; 2420 2421 if (match_ipv) { 2422 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2423 ip_version, 0xf); 2424 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2425 ip_version, IPV6_VERSION); 2426 } else { 2427 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2428 ethertype, 0xffff); 2429 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2430 ethertype, ETH_P_IPV6); 2431 } 2432 2433 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2434 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2435 &ib_spec->ipv6.mask.src_ip, 2436 sizeof(ib_spec->ipv6.mask.src_ip)); 2437 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2438 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2439 &ib_spec->ipv6.val.src_ip, 2440 sizeof(ib_spec->ipv6.val.src_ip)); 2441 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2442 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2443 &ib_spec->ipv6.mask.dst_ip, 2444 sizeof(ib_spec->ipv6.mask.dst_ip)); 2445 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2446 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2447 &ib_spec->ipv6.val.dst_ip, 2448 sizeof(ib_spec->ipv6.val.dst_ip)); 2449 2450 set_tos(headers_c, headers_v, 2451 ib_spec->ipv6.mask.traffic_class, 2452 ib_spec->ipv6.val.traffic_class); 2453 2454 set_proto(headers_c, headers_v, 2455 ib_spec->ipv6.mask.next_hdr, 2456 ib_spec->ipv6.val.next_hdr); 2457 2458 set_flow_label(misc_params_c, misc_params_v, 2459 ntohl(ib_spec->ipv6.mask.flow_label), 2460 ntohl(ib_spec->ipv6.val.flow_label), 2461 ib_spec->type & IB_FLOW_SPEC_INNER); 2462 2463 break; 2464 case IB_FLOW_SPEC_TCP: 2465 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 2466 LAST_TCP_UDP_FIELD)) 2467 return -EOPNOTSUPP; 2468 2469 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2470 0xff); 2471 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2472 IPPROTO_TCP); 2473 2474 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, 2475 ntohs(ib_spec->tcp_udp.mask.src_port)); 2476 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, 2477 ntohs(ib_spec->tcp_udp.val.src_port)); 2478 2479 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, 2480 ntohs(ib_spec->tcp_udp.mask.dst_port)); 2481 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, 2482 ntohs(ib_spec->tcp_udp.val.dst_port)); 2483 break; 2484 case IB_FLOW_SPEC_UDP: 2485 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 2486 LAST_TCP_UDP_FIELD)) 2487 return -EOPNOTSUPP; 2488 2489 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2490 0xff); 2491 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2492 IPPROTO_UDP); 2493 2494 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, 2495 ntohs(ib_spec->tcp_udp.mask.src_port)); 2496 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, 2497 ntohs(ib_spec->tcp_udp.val.src_port)); 2498 2499 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, 2500 ntohs(ib_spec->tcp_udp.mask.dst_port)); 2501 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, 2502 ntohs(ib_spec->tcp_udp.val.dst_port)); 2503 break; 2504 case IB_FLOW_SPEC_VXLAN_TUNNEL: 2505 if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, 2506 LAST_TUNNEL_FIELD)) 2507 return -EOPNOTSUPP; 2508 2509 MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, 2510 ntohl(ib_spec->tunnel.mask.tunnel_id)); 2511 MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, 2512 ntohl(ib_spec->tunnel.val.tunnel_id)); 2513 break; 2514 case IB_FLOW_SPEC_ACTION_TAG: 2515 if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, 2516 LAST_FLOW_TAG_FIELD)) 2517 return -EOPNOTSUPP; 2518 if (ib_spec->flow_tag.tag_id >= BIT(24)) 2519 return -EINVAL; 2520 2521 *tag_id = ib_spec->flow_tag.tag_id; 2522 break; 2523 case IB_FLOW_SPEC_ACTION_DROP: 2524 if (FIELDS_NOT_SUPPORTED(ib_spec->drop, 2525 LAST_DROP_FIELD)) 2526 return -EOPNOTSUPP; 2527 *is_drop = true; 2528 break; 2529 default: 2530 return -EINVAL; 2531 } 2532 2533 return 0; 2534 } 2535 2536 /* If a flow could catch both multicast and unicast packets, 2537 * it won't fall into the multicast flow steering table and this rule 2538 * could steal other multicast packets. 2539 */ 2540 static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) 2541 { 2542 union ib_flow_spec *flow_spec; 2543 2544 if (ib_attr->type != IB_FLOW_ATTR_NORMAL || 2545 ib_attr->num_of_specs < 1) 2546 return false; 2547 2548 flow_spec = (union ib_flow_spec *)(ib_attr + 1); 2549 if (flow_spec->type == IB_FLOW_SPEC_IPV4) { 2550 struct ib_flow_spec_ipv4 *ipv4_spec; 2551 2552 ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; 2553 if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) 2554 return true; 2555 2556 return false; 2557 } 2558 2559 if (flow_spec->type == IB_FLOW_SPEC_ETH) { 2560 struct ib_flow_spec_eth *eth_spec; 2561 2562 eth_spec = (struct ib_flow_spec_eth *)flow_spec; 2563 return is_multicast_ether_addr(eth_spec->mask.dst_mac) && 2564 is_multicast_ether_addr(eth_spec->val.dst_mac); 2565 } 2566 2567 return false; 2568 } 2569 2570 static bool is_valid_ethertype(struct mlx5_core_dev *mdev, 2571 const struct ib_flow_attr *flow_attr, 2572 bool check_inner) 2573 { 2574 union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); 2575 int match_ipv = check_inner ? 2576 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2577 ft_field_support.inner_ip_version) : 2578 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2579 ft_field_support.outer_ip_version); 2580 int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; 2581 bool ipv4_spec_valid, ipv6_spec_valid; 2582 unsigned int ip_spec_type = 0; 2583 bool has_ethertype = false; 2584 unsigned int spec_index; 2585 bool mask_valid = true; 2586 u16 eth_type = 0; 2587 bool type_valid; 2588 2589 /* Validate that ethertype is correct */ 2590 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 2591 if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && 2592 ib_spec->eth.mask.ether_type) { 2593 mask_valid = (ib_spec->eth.mask.ether_type == 2594 htons(0xffff)); 2595 has_ethertype = true; 2596 eth_type = ntohs(ib_spec->eth.val.ether_type); 2597 } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || 2598 (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { 2599 ip_spec_type = ib_spec->type; 2600 } 2601 ib_spec = (void *)ib_spec + ib_spec->size; 2602 } 2603 2604 type_valid = (!has_ethertype) || (!ip_spec_type); 2605 if (!type_valid && mask_valid) { 2606 ipv4_spec_valid = (eth_type == ETH_P_IP) && 2607 (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); 2608 ipv6_spec_valid = (eth_type == ETH_P_IPV6) && 2609 (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); 2610 2611 type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || 2612 (((eth_type == ETH_P_MPLS_UC) || 2613 (eth_type == ETH_P_MPLS_MC)) && match_ipv); 2614 } 2615 2616 return type_valid; 2617 } 2618 2619 static bool is_valid_attr(struct mlx5_core_dev *mdev, 2620 const struct ib_flow_attr *flow_attr) 2621 { 2622 return is_valid_ethertype(mdev, flow_attr, false) && 2623 is_valid_ethertype(mdev, flow_attr, true); 2624 } 2625 2626 static void put_flow_table(struct mlx5_ib_dev *dev, 2627 struct mlx5_ib_flow_prio *prio, bool ft_added) 2628 { 2629 prio->refcount -= !!ft_added; 2630 if (!prio->refcount) { 2631 mlx5_destroy_flow_table(prio->flow_table); 2632 prio->flow_table = NULL; 2633 } 2634 } 2635 2636 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) 2637 { 2638 struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); 2639 struct mlx5_ib_flow_handler *handler = container_of(flow_id, 2640 struct mlx5_ib_flow_handler, 2641 ibflow); 2642 struct mlx5_ib_flow_handler *iter, *tmp; 2643 2644 mutex_lock(&dev->flow_db->lock); 2645 2646 list_for_each_entry_safe(iter, tmp, &handler->list, list) { 2647 mlx5_del_flow_rules(iter->rule); 2648 put_flow_table(dev, iter->prio, true); 2649 list_del(&iter->list); 2650 kfree(iter); 2651 } 2652 2653 mlx5_del_flow_rules(handler->rule); 2654 put_flow_table(dev, handler->prio, true); 2655 mutex_unlock(&dev->flow_db->lock); 2656 2657 kfree(handler); 2658 2659 return 0; 2660 } 2661 2662 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) 2663 { 2664 priority *= 2; 2665 if (!dont_trap) 2666 priority++; 2667 return priority; 2668 } 2669 2670 enum flow_table_type { 2671 MLX5_IB_FT_RX, 2672 MLX5_IB_FT_TX 2673 }; 2674 2675 #define MLX5_FS_MAX_TYPES 6 2676 #define MLX5_FS_MAX_ENTRIES BIT(16) 2677 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 2678 struct ib_flow_attr *flow_attr, 2679 enum flow_table_type ft_type) 2680 { 2681 bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; 2682 struct mlx5_flow_namespace *ns = NULL; 2683 struct mlx5_ib_flow_prio *prio; 2684 struct mlx5_flow_table *ft; 2685 int max_table_size; 2686 int num_entries; 2687 int num_groups; 2688 int priority; 2689 int err = 0; 2690 2691 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 2692 log_max_ft_size)); 2693 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 2694 if (flow_is_multicast_only(flow_attr) && 2695 !dont_trap) 2696 priority = MLX5_IB_FLOW_MCAST_PRIO; 2697 else 2698 priority = ib_prio_to_core_prio(flow_attr->priority, 2699 dont_trap); 2700 ns = mlx5_get_flow_namespace(dev->mdev, 2701 MLX5_FLOW_NAMESPACE_BYPASS); 2702 num_entries = MLX5_FS_MAX_ENTRIES; 2703 num_groups = MLX5_FS_MAX_TYPES; 2704 prio = &dev->flow_db->prios[priority]; 2705 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 2706 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 2707 ns = mlx5_get_flow_namespace(dev->mdev, 2708 MLX5_FLOW_NAMESPACE_LEFTOVERS); 2709 build_leftovers_ft_param(&priority, 2710 &num_entries, 2711 &num_groups); 2712 prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; 2713 } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 2714 if (!MLX5_CAP_FLOWTABLE(dev->mdev, 2715 allow_sniffer_and_nic_rx_shared_tir)) 2716 return ERR_PTR(-ENOTSUPP); 2717 2718 ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? 2719 MLX5_FLOW_NAMESPACE_SNIFFER_RX : 2720 MLX5_FLOW_NAMESPACE_SNIFFER_TX); 2721 2722 prio = &dev->flow_db->sniffer[ft_type]; 2723 priority = 0; 2724 num_entries = 1; 2725 num_groups = 1; 2726 } 2727 2728 if (!ns) 2729 return ERR_PTR(-ENOTSUPP); 2730 2731 if (num_entries > max_table_size) 2732 return ERR_PTR(-ENOMEM); 2733 2734 ft = prio->flow_table; 2735 if (!ft) { 2736 ft = mlx5_create_auto_grouped_flow_table(ns, priority, 2737 num_entries, 2738 num_groups, 2739 0, 0); 2740 2741 if (!IS_ERR(ft)) { 2742 prio->refcount = 0; 2743 prio->flow_table = ft; 2744 } else { 2745 err = PTR_ERR(ft); 2746 } 2747 } 2748 2749 return err ? ERR_PTR(err) : prio; 2750 } 2751 2752 static void set_underlay_qp(struct mlx5_ib_dev *dev, 2753 struct mlx5_flow_spec *spec, 2754 u32 underlay_qpn) 2755 { 2756 void *misc_params_c = MLX5_ADDR_OF(fte_match_param, 2757 spec->match_criteria, 2758 misc_parameters); 2759 void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2760 misc_parameters); 2761 2762 if (underlay_qpn && 2763 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 2764 ft_field_support.bth_dst_qp)) { 2765 MLX5_SET(fte_match_set_misc, 2766 misc_params_v, bth_dst_qp, underlay_qpn); 2767 MLX5_SET(fte_match_set_misc, 2768 misc_params_c, bth_dst_qp, 0xffffff); 2769 } 2770 } 2771 2772 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, 2773 struct mlx5_ib_flow_prio *ft_prio, 2774 const struct ib_flow_attr *flow_attr, 2775 struct mlx5_flow_destination *dst, 2776 u32 underlay_qpn) 2777 { 2778 struct mlx5_flow_table *ft = ft_prio->flow_table; 2779 struct mlx5_ib_flow_handler *handler; 2780 struct mlx5_flow_act flow_act = {0}; 2781 struct mlx5_flow_spec *spec; 2782 struct mlx5_flow_destination *rule_dst = dst; 2783 const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); 2784 unsigned int spec_index; 2785 u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 2786 bool is_drop = false; 2787 int err = 0; 2788 int dest_num = 1; 2789 2790 if (!is_valid_attr(dev->mdev, flow_attr)) 2791 return ERR_PTR(-EINVAL); 2792 2793 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 2794 handler = kzalloc(sizeof(*handler), GFP_KERNEL); 2795 if (!handler || !spec) { 2796 err = -ENOMEM; 2797 goto free; 2798 } 2799 2800 INIT_LIST_HEAD(&handler->list); 2801 2802 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 2803 err = parse_flow_attr(dev->mdev, spec->match_criteria, 2804 spec->match_value, 2805 ib_flow, &flow_tag, &is_drop); 2806 if (err < 0) 2807 goto free; 2808 2809 ib_flow += ((union ib_flow_spec *)ib_flow)->size; 2810 } 2811 2812 if (!flow_is_multicast_only(flow_attr)) 2813 set_underlay_qp(dev, spec, underlay_qpn); 2814 2815 if (dev->rep) { 2816 void *misc; 2817 2818 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2819 misc_parameters); 2820 MLX5_SET(fte_match_set_misc, misc, source_port, 2821 dev->rep->vport); 2822 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2823 misc_parameters); 2824 MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); 2825 } 2826 2827 spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); 2828 if (is_drop) { 2829 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; 2830 rule_dst = NULL; 2831 dest_num = 0; 2832 } else { 2833 flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : 2834 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 2835 } 2836 2837 if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG && 2838 (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 2839 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { 2840 mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", 2841 flow_tag, flow_attr->type); 2842 err = -EINVAL; 2843 goto free; 2844 } 2845 flow_act.flow_tag = flow_tag; 2846 handler->rule = mlx5_add_flow_rules(ft, spec, 2847 &flow_act, 2848 rule_dst, dest_num); 2849 2850 if (IS_ERR(handler->rule)) { 2851 err = PTR_ERR(handler->rule); 2852 goto free; 2853 } 2854 2855 ft_prio->refcount++; 2856 handler->prio = ft_prio; 2857 2858 ft_prio->flow_table = ft; 2859 free: 2860 if (err) 2861 kfree(handler); 2862 kvfree(spec); 2863 return err ? ERR_PTR(err) : handler; 2864 } 2865 2866 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, 2867 struct mlx5_ib_flow_prio *ft_prio, 2868 const struct ib_flow_attr *flow_attr, 2869 struct mlx5_flow_destination *dst) 2870 { 2871 return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); 2872 } 2873 2874 static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, 2875 struct mlx5_ib_flow_prio *ft_prio, 2876 struct ib_flow_attr *flow_attr, 2877 struct mlx5_flow_destination *dst) 2878 { 2879 struct mlx5_ib_flow_handler *handler_dst = NULL; 2880 struct mlx5_ib_flow_handler *handler = NULL; 2881 2882 handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); 2883 if (!IS_ERR(handler)) { 2884 handler_dst = create_flow_rule(dev, ft_prio, 2885 flow_attr, dst); 2886 if (IS_ERR(handler_dst)) { 2887 mlx5_del_flow_rules(handler->rule); 2888 ft_prio->refcount--; 2889 kfree(handler); 2890 handler = handler_dst; 2891 } else { 2892 list_add(&handler_dst->list, &handler->list); 2893 } 2894 } 2895 2896 return handler; 2897 } 2898 enum { 2899 LEFTOVERS_MC, 2900 LEFTOVERS_UC, 2901 }; 2902 2903 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, 2904 struct mlx5_ib_flow_prio *ft_prio, 2905 struct ib_flow_attr *flow_attr, 2906 struct mlx5_flow_destination *dst) 2907 { 2908 struct mlx5_ib_flow_handler *handler_ucast = NULL; 2909 struct mlx5_ib_flow_handler *handler = NULL; 2910 2911 static struct { 2912 struct ib_flow_attr flow_attr; 2913 struct ib_flow_spec_eth eth_flow; 2914 } leftovers_specs[] = { 2915 [LEFTOVERS_MC] = { 2916 .flow_attr = { 2917 .num_of_specs = 1, 2918 .size = sizeof(leftovers_specs[0]) 2919 }, 2920 .eth_flow = { 2921 .type = IB_FLOW_SPEC_ETH, 2922 .size = sizeof(struct ib_flow_spec_eth), 2923 .mask = {.dst_mac = {0x1} }, 2924 .val = {.dst_mac = {0x1} } 2925 } 2926 }, 2927 [LEFTOVERS_UC] = { 2928 .flow_attr = { 2929 .num_of_specs = 1, 2930 .size = sizeof(leftovers_specs[0]) 2931 }, 2932 .eth_flow = { 2933 .type = IB_FLOW_SPEC_ETH, 2934 .size = sizeof(struct ib_flow_spec_eth), 2935 .mask = {.dst_mac = {0x1} }, 2936 .val = {.dst_mac = {} } 2937 } 2938 } 2939 }; 2940 2941 handler = create_flow_rule(dev, ft_prio, 2942 &leftovers_specs[LEFTOVERS_MC].flow_attr, 2943 dst); 2944 if (!IS_ERR(handler) && 2945 flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { 2946 handler_ucast = create_flow_rule(dev, ft_prio, 2947 &leftovers_specs[LEFTOVERS_UC].flow_attr, 2948 dst); 2949 if (IS_ERR(handler_ucast)) { 2950 mlx5_del_flow_rules(handler->rule); 2951 ft_prio->refcount--; 2952 kfree(handler); 2953 handler = handler_ucast; 2954 } else { 2955 list_add(&handler_ucast->list, &handler->list); 2956 } 2957 } 2958 2959 return handler; 2960 } 2961 2962 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, 2963 struct mlx5_ib_flow_prio *ft_rx, 2964 struct mlx5_ib_flow_prio *ft_tx, 2965 struct mlx5_flow_destination *dst) 2966 { 2967 struct mlx5_ib_flow_handler *handler_rx; 2968 struct mlx5_ib_flow_handler *handler_tx; 2969 int err; 2970 static const struct ib_flow_attr flow_attr = { 2971 .num_of_specs = 0, 2972 .size = sizeof(flow_attr) 2973 }; 2974 2975 handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); 2976 if (IS_ERR(handler_rx)) { 2977 err = PTR_ERR(handler_rx); 2978 goto err; 2979 } 2980 2981 handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); 2982 if (IS_ERR(handler_tx)) { 2983 err = PTR_ERR(handler_tx); 2984 goto err_tx; 2985 } 2986 2987 list_add(&handler_tx->list, &handler_rx->list); 2988 2989 return handler_rx; 2990 2991 err_tx: 2992 mlx5_del_flow_rules(handler_rx->rule); 2993 ft_rx->refcount--; 2994 kfree(handler_rx); 2995 err: 2996 return ERR_PTR(err); 2997 } 2998 2999 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, 3000 struct ib_flow_attr *flow_attr, 3001 int domain) 3002 { 3003 struct mlx5_ib_dev *dev = to_mdev(qp->device); 3004 struct mlx5_ib_qp *mqp = to_mqp(qp); 3005 struct mlx5_ib_flow_handler *handler = NULL; 3006 struct mlx5_flow_destination *dst = NULL; 3007 struct mlx5_ib_flow_prio *ft_prio_tx = NULL; 3008 struct mlx5_ib_flow_prio *ft_prio; 3009 int err; 3010 int underlay_qpn; 3011 3012 if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) 3013 return ERR_PTR(-ENOMEM); 3014 3015 if (domain != IB_FLOW_DOMAIN_USER || 3016 flow_attr->port > dev->num_ports || 3017 (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) 3018 return ERR_PTR(-EINVAL); 3019 3020 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 3021 if (!dst) 3022 return ERR_PTR(-ENOMEM); 3023 3024 mutex_lock(&dev->flow_db->lock); 3025 3026 ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); 3027 if (IS_ERR(ft_prio)) { 3028 err = PTR_ERR(ft_prio); 3029 goto unlock; 3030 } 3031 if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 3032 ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); 3033 if (IS_ERR(ft_prio_tx)) { 3034 err = PTR_ERR(ft_prio_tx); 3035 ft_prio_tx = NULL; 3036 goto destroy_ft; 3037 } 3038 } 3039 3040 dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; 3041 if (mqp->flags & MLX5_IB_QP_RSS) 3042 dst->tir_num = mqp->rss_qp.tirn; 3043 else 3044 dst->tir_num = mqp->raw_packet_qp.rq.tirn; 3045 3046 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 3047 if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { 3048 handler = create_dont_trap_rule(dev, ft_prio, 3049 flow_attr, dst); 3050 } else { 3051 underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? 3052 mqp->underlay_qpn : 0; 3053 handler = _create_flow_rule(dev, ft_prio, flow_attr, 3054 dst, underlay_qpn); 3055 } 3056 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 3057 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 3058 handler = create_leftovers_rule(dev, ft_prio, flow_attr, 3059 dst); 3060 } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 3061 handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); 3062 } else { 3063 err = -EINVAL; 3064 goto destroy_ft; 3065 } 3066 3067 if (IS_ERR(handler)) { 3068 err = PTR_ERR(handler); 3069 handler = NULL; 3070 goto destroy_ft; 3071 } 3072 3073 mutex_unlock(&dev->flow_db->lock); 3074 kfree(dst); 3075 3076 return &handler->ibflow; 3077 3078 destroy_ft: 3079 put_flow_table(dev, ft_prio, false); 3080 if (ft_prio_tx) 3081 put_flow_table(dev, ft_prio_tx, false); 3082 unlock: 3083 mutex_unlock(&dev->flow_db->lock); 3084 kfree(dst); 3085 kfree(handler); 3086 return ERR_PTR(err); 3087 } 3088 3089 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 3090 { 3091 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 3092 struct mlx5_ib_qp *mqp = to_mqp(ibqp); 3093 int err; 3094 3095 if (mqp->flags & MLX5_IB_QP_UNDERLAY) { 3096 mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); 3097 return -EOPNOTSUPP; 3098 } 3099 3100 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 3101 if (err) 3102 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 3103 ibqp->qp_num, gid->raw); 3104 3105 return err; 3106 } 3107 3108 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 3109 { 3110 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 3111 int err; 3112 3113 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 3114 if (err) 3115 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 3116 ibqp->qp_num, gid->raw); 3117 3118 return err; 3119 } 3120 3121 static int init_node_data(struct mlx5_ib_dev *dev) 3122 { 3123 int err; 3124 3125 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); 3126 if (err) 3127 return err; 3128 3129 dev->mdev->rev_id = dev->mdev->pdev->revision; 3130 3131 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 3132 } 3133 3134 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 3135 char *buf) 3136 { 3137 struct mlx5_ib_dev *dev = 3138 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 3139 3140 return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); 3141 } 3142 3143 static ssize_t show_reg_pages(struct device *device, 3144 struct device_attribute *attr, char *buf) 3145 { 3146 struct mlx5_ib_dev *dev = 3147 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 3148 3149 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 3150 } 3151 3152 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 3153 char *buf) 3154 { 3155 struct mlx5_ib_dev *dev = 3156 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 3157 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 3158 } 3159 3160 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 3161 char *buf) 3162 { 3163 struct mlx5_ib_dev *dev = 3164 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 3165 return sprintf(buf, "%x\n", dev->mdev->rev_id); 3166 } 3167 3168 static ssize_t show_board(struct device *device, struct device_attribute *attr, 3169 char *buf) 3170 { 3171 struct mlx5_ib_dev *dev = 3172 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 3173 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 3174 dev->mdev->board_id); 3175 } 3176 3177 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 3178 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 3179 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 3180 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 3181 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 3182 3183 static struct device_attribute *mlx5_class_attributes[] = { 3184 &dev_attr_hw_rev, 3185 &dev_attr_hca_type, 3186 &dev_attr_board_id, 3187 &dev_attr_fw_pages, 3188 &dev_attr_reg_pages, 3189 }; 3190 3191 static void pkey_change_handler(struct work_struct *work) 3192 { 3193 struct mlx5_ib_port_resources *ports = 3194 container_of(work, struct mlx5_ib_port_resources, 3195 pkey_change_work); 3196 3197 mutex_lock(&ports->devr->mutex); 3198 mlx5_ib_gsi_pkey_change(ports->gsi); 3199 mutex_unlock(&ports->devr->mutex); 3200 } 3201 3202 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) 3203 { 3204 struct mlx5_ib_qp *mqp; 3205 struct mlx5_ib_cq *send_mcq, *recv_mcq; 3206 struct mlx5_core_cq *mcq; 3207 struct list_head cq_armed_list; 3208 unsigned long flags_qp; 3209 unsigned long flags_cq; 3210 unsigned long flags; 3211 3212 INIT_LIST_HEAD(&cq_armed_list); 3213 3214 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ 3215 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); 3216 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { 3217 spin_lock_irqsave(&mqp->sq.lock, flags_qp); 3218 if (mqp->sq.tail != mqp->sq.head) { 3219 send_mcq = to_mcq(mqp->ibqp.send_cq); 3220 spin_lock_irqsave(&send_mcq->lock, flags_cq); 3221 if (send_mcq->mcq.comp && 3222 mqp->ibqp.send_cq->comp_handler) { 3223 if (!send_mcq->mcq.reset_notify_added) { 3224 send_mcq->mcq.reset_notify_added = 1; 3225 list_add_tail(&send_mcq->mcq.reset_notify, 3226 &cq_armed_list); 3227 } 3228 } 3229 spin_unlock_irqrestore(&send_mcq->lock, flags_cq); 3230 } 3231 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); 3232 spin_lock_irqsave(&mqp->rq.lock, flags_qp); 3233 /* no handling is needed for SRQ */ 3234 if (!mqp->ibqp.srq) { 3235 if (mqp->rq.tail != mqp->rq.head) { 3236 recv_mcq = to_mcq(mqp->ibqp.recv_cq); 3237 spin_lock_irqsave(&recv_mcq->lock, flags_cq); 3238 if (recv_mcq->mcq.comp && 3239 mqp->ibqp.recv_cq->comp_handler) { 3240 if (!recv_mcq->mcq.reset_notify_added) { 3241 recv_mcq->mcq.reset_notify_added = 1; 3242 list_add_tail(&recv_mcq->mcq.reset_notify, 3243 &cq_armed_list); 3244 } 3245 } 3246 spin_unlock_irqrestore(&recv_mcq->lock, 3247 flags_cq); 3248 } 3249 } 3250 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); 3251 } 3252 /*At that point all inflight post send were put to be executed as of we 3253 * lock/unlock above locks Now need to arm all involved CQs. 3254 */ 3255 list_for_each_entry(mcq, &cq_armed_list, reset_notify) { 3256 mcq->comp(mcq); 3257 } 3258 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); 3259 } 3260 3261 static void delay_drop_handler(struct work_struct *work) 3262 { 3263 int err; 3264 struct mlx5_ib_delay_drop *delay_drop = 3265 container_of(work, struct mlx5_ib_delay_drop, 3266 delay_drop_work); 3267 3268 atomic_inc(&delay_drop->events_cnt); 3269 3270 mutex_lock(&delay_drop->lock); 3271 err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, 3272 delay_drop->timeout); 3273 if (err) { 3274 mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n", 3275 delay_drop->timeout); 3276 delay_drop->activate = false; 3277 } 3278 mutex_unlock(&delay_drop->lock); 3279 } 3280 3281 static void mlx5_ib_handle_event(struct work_struct *_work) 3282 { 3283 struct mlx5_ib_event_work *work = 3284 container_of(_work, struct mlx5_ib_event_work, work); 3285 struct mlx5_ib_dev *ibdev; 3286 struct ib_event ibev; 3287 bool fatal = false; 3288 u8 port = 0; 3289 3290 if (mlx5_core_is_mp_slave(work->dev)) { 3291 ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); 3292 if (!ibdev) 3293 goto out; 3294 } else { 3295 ibdev = work->context; 3296 } 3297 3298 switch (work->event) { 3299 case MLX5_DEV_EVENT_SYS_ERROR: 3300 ibev.event = IB_EVENT_DEVICE_FATAL; 3301 mlx5_ib_handle_internal_error(ibdev); 3302 fatal = true; 3303 break; 3304 3305 case MLX5_DEV_EVENT_PORT_UP: 3306 case MLX5_DEV_EVENT_PORT_DOWN: 3307 case MLX5_DEV_EVENT_PORT_INITIALIZED: 3308 port = (u8)work->param; 3309 3310 /* In RoCE, port up/down events are handled in 3311 * mlx5_netdev_event(). 3312 */ 3313 if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == 3314 IB_LINK_LAYER_ETHERNET) 3315 goto out; 3316 3317 ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ? 3318 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; 3319 break; 3320 3321 case MLX5_DEV_EVENT_LID_CHANGE: 3322 ibev.event = IB_EVENT_LID_CHANGE; 3323 port = (u8)work->param; 3324 break; 3325 3326 case MLX5_DEV_EVENT_PKEY_CHANGE: 3327 ibev.event = IB_EVENT_PKEY_CHANGE; 3328 port = (u8)work->param; 3329 3330 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); 3331 break; 3332 3333 case MLX5_DEV_EVENT_GUID_CHANGE: 3334 ibev.event = IB_EVENT_GID_CHANGE; 3335 port = (u8)work->param; 3336 break; 3337 3338 case MLX5_DEV_EVENT_CLIENT_REREG: 3339 ibev.event = IB_EVENT_CLIENT_REREGISTER; 3340 port = (u8)work->param; 3341 break; 3342 case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: 3343 schedule_work(&ibdev->delay_drop.delay_drop_work); 3344 goto out; 3345 default: 3346 goto out; 3347 } 3348 3349 ibev.device = &ibdev->ib_dev; 3350 ibev.element.port_num = port; 3351 3352 if (port < 1 || port > ibdev->num_ports) { 3353 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); 3354 goto out; 3355 } 3356 3357 if (ibdev->ib_active) 3358 ib_dispatch_event(&ibev); 3359 3360 if (fatal) 3361 ibdev->ib_active = false; 3362 out: 3363 kfree(work); 3364 } 3365 3366 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 3367 enum mlx5_dev_event event, unsigned long param) 3368 { 3369 struct mlx5_ib_event_work *work; 3370 3371 work = kmalloc(sizeof(*work), GFP_ATOMIC); 3372 if (!work) 3373 return; 3374 3375 INIT_WORK(&work->work, mlx5_ib_handle_event); 3376 work->dev = dev; 3377 work->param = param; 3378 work->context = context; 3379 work->event = event; 3380 3381 queue_work(mlx5_ib_event_wq, &work->work); 3382 } 3383 3384 static int set_has_smi_cap(struct mlx5_ib_dev *dev) 3385 { 3386 struct mlx5_hca_vport_context vport_ctx; 3387 int err; 3388 int port; 3389 3390 for (port = 1; port <= dev->num_ports; port++) { 3391 dev->mdev->port_caps[port - 1].has_smi = false; 3392 if (MLX5_CAP_GEN(dev->mdev, port_type) == 3393 MLX5_CAP_PORT_TYPE_IB) { 3394 if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { 3395 err = mlx5_query_hca_vport_context(dev->mdev, 0, 3396 port, 0, 3397 &vport_ctx); 3398 if (err) { 3399 mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", 3400 port, err); 3401 return err; 3402 } 3403 dev->mdev->port_caps[port - 1].has_smi = 3404 vport_ctx.has_smi; 3405 } else { 3406 dev->mdev->port_caps[port - 1].has_smi = true; 3407 } 3408 } 3409 } 3410 return 0; 3411 } 3412 3413 static void get_ext_port_caps(struct mlx5_ib_dev *dev) 3414 { 3415 int port; 3416 3417 for (port = 1; port <= dev->num_ports; port++) 3418 mlx5_query_ext_port_caps(dev, port); 3419 } 3420 3421 static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) 3422 { 3423 struct ib_device_attr *dprops = NULL; 3424 struct ib_port_attr *pprops = NULL; 3425 int err = -ENOMEM; 3426 struct ib_udata uhw = {.inlen = 0, .outlen = 0}; 3427 3428 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); 3429 if (!pprops) 3430 goto out; 3431 3432 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); 3433 if (!dprops) 3434 goto out; 3435 3436 err = set_has_smi_cap(dev); 3437 if (err) 3438 goto out; 3439 3440 err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); 3441 if (err) { 3442 mlx5_ib_warn(dev, "query_device failed %d\n", err); 3443 goto out; 3444 } 3445 3446 memset(pprops, 0, sizeof(*pprops)); 3447 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 3448 if (err) { 3449 mlx5_ib_warn(dev, "query_port %d failed %d\n", 3450 port, err); 3451 goto out; 3452 } 3453 3454 dev->mdev->port_caps[port - 1].pkey_table_len = 3455 dprops->max_pkeys; 3456 dev->mdev->port_caps[port - 1].gid_table_len = 3457 pprops->gid_tbl_len; 3458 mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", 3459 port, dprops->max_pkeys, pprops->gid_tbl_len); 3460 3461 out: 3462 kfree(pprops); 3463 kfree(dprops); 3464 3465 return err; 3466 } 3467 3468 static void destroy_umrc_res(struct mlx5_ib_dev *dev) 3469 { 3470 int err; 3471 3472 err = mlx5_mr_cache_cleanup(dev); 3473 if (err) 3474 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 3475 3476 mlx5_ib_destroy_qp(dev->umrc.qp); 3477 ib_free_cq(dev->umrc.cq); 3478 ib_dealloc_pd(dev->umrc.pd); 3479 } 3480 3481 enum { 3482 MAX_UMR_WR = 128, 3483 }; 3484 3485 static int create_umr_res(struct mlx5_ib_dev *dev) 3486 { 3487 struct ib_qp_init_attr *init_attr = NULL; 3488 struct ib_qp_attr *attr = NULL; 3489 struct ib_pd *pd; 3490 struct ib_cq *cq; 3491 struct ib_qp *qp; 3492 int ret; 3493 3494 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 3495 init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); 3496 if (!attr || !init_attr) { 3497 ret = -ENOMEM; 3498 goto error_0; 3499 } 3500 3501 pd = ib_alloc_pd(&dev->ib_dev, 0); 3502 if (IS_ERR(pd)) { 3503 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 3504 ret = PTR_ERR(pd); 3505 goto error_0; 3506 } 3507 3508 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 3509 if (IS_ERR(cq)) { 3510 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 3511 ret = PTR_ERR(cq); 3512 goto error_2; 3513 } 3514 3515 init_attr->send_cq = cq; 3516 init_attr->recv_cq = cq; 3517 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 3518 init_attr->cap.max_send_wr = MAX_UMR_WR; 3519 init_attr->cap.max_send_sge = 1; 3520 init_attr->qp_type = MLX5_IB_QPT_REG_UMR; 3521 init_attr->port_num = 1; 3522 qp = mlx5_ib_create_qp(pd, init_attr, NULL); 3523 if (IS_ERR(qp)) { 3524 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 3525 ret = PTR_ERR(qp); 3526 goto error_3; 3527 } 3528 qp->device = &dev->ib_dev; 3529 qp->real_qp = qp; 3530 qp->uobject = NULL; 3531 qp->qp_type = MLX5_IB_QPT_REG_UMR; 3532 qp->send_cq = init_attr->send_cq; 3533 qp->recv_cq = init_attr->recv_cq; 3534 3535 attr->qp_state = IB_QPS_INIT; 3536 attr->port_num = 1; 3537 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | 3538 IB_QP_PORT, NULL); 3539 if (ret) { 3540 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 3541 goto error_4; 3542 } 3543 3544 memset(attr, 0, sizeof(*attr)); 3545 attr->qp_state = IB_QPS_RTR; 3546 attr->path_mtu = IB_MTU_256; 3547 3548 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 3549 if (ret) { 3550 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 3551 goto error_4; 3552 } 3553 3554 memset(attr, 0, sizeof(*attr)); 3555 attr->qp_state = IB_QPS_RTS; 3556 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 3557 if (ret) { 3558 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 3559 goto error_4; 3560 } 3561 3562 dev->umrc.qp = qp; 3563 dev->umrc.cq = cq; 3564 dev->umrc.pd = pd; 3565 3566 sema_init(&dev->umrc.sem, MAX_UMR_WR); 3567 ret = mlx5_mr_cache_init(dev); 3568 if (ret) { 3569 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 3570 goto error_4; 3571 } 3572 3573 kfree(attr); 3574 kfree(init_attr); 3575 3576 return 0; 3577 3578 error_4: 3579 mlx5_ib_destroy_qp(qp); 3580 3581 error_3: 3582 ib_free_cq(cq); 3583 3584 error_2: 3585 ib_dealloc_pd(pd); 3586 3587 error_0: 3588 kfree(attr); 3589 kfree(init_attr); 3590 return ret; 3591 } 3592 3593 static u8 mlx5_get_umr_fence(u8 umr_fence_cap) 3594 { 3595 switch (umr_fence_cap) { 3596 case MLX5_CAP_UMR_FENCE_NONE: 3597 return MLX5_FENCE_MODE_NONE; 3598 case MLX5_CAP_UMR_FENCE_SMALL: 3599 return MLX5_FENCE_MODE_INITIATOR_SMALL; 3600 default: 3601 return MLX5_FENCE_MODE_STRONG_ORDERING; 3602 } 3603 } 3604 3605 static int create_dev_resources(struct mlx5_ib_resources *devr) 3606 { 3607 struct ib_srq_init_attr attr; 3608 struct mlx5_ib_dev *dev; 3609 struct ib_cq_init_attr cq_attr = {.cqe = 1}; 3610 int port; 3611 int ret = 0; 3612 3613 dev = container_of(devr, struct mlx5_ib_dev, devr); 3614 3615 mutex_init(&devr->mutex); 3616 3617 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 3618 if (IS_ERR(devr->p0)) { 3619 ret = PTR_ERR(devr->p0); 3620 goto error0; 3621 } 3622 devr->p0->device = &dev->ib_dev; 3623 devr->p0->uobject = NULL; 3624 atomic_set(&devr->p0->usecnt, 0); 3625 3626 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); 3627 if (IS_ERR(devr->c0)) { 3628 ret = PTR_ERR(devr->c0); 3629 goto error1; 3630 } 3631 devr->c0->device = &dev->ib_dev; 3632 devr->c0->uobject = NULL; 3633 devr->c0->comp_handler = NULL; 3634 devr->c0->event_handler = NULL; 3635 devr->c0->cq_context = NULL; 3636 atomic_set(&devr->c0->usecnt, 0); 3637 3638 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 3639 if (IS_ERR(devr->x0)) { 3640 ret = PTR_ERR(devr->x0); 3641 goto error2; 3642 } 3643 devr->x0->device = &dev->ib_dev; 3644 devr->x0->inode = NULL; 3645 atomic_set(&devr->x0->usecnt, 0); 3646 mutex_init(&devr->x0->tgt_qp_mutex); 3647 INIT_LIST_HEAD(&devr->x0->tgt_qp_list); 3648 3649 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 3650 if (IS_ERR(devr->x1)) { 3651 ret = PTR_ERR(devr->x1); 3652 goto error3; 3653 } 3654 devr->x1->device = &dev->ib_dev; 3655 devr->x1->inode = NULL; 3656 atomic_set(&devr->x1->usecnt, 0); 3657 mutex_init(&devr->x1->tgt_qp_mutex); 3658 INIT_LIST_HEAD(&devr->x1->tgt_qp_list); 3659 3660 memset(&attr, 0, sizeof(attr)); 3661 attr.attr.max_sge = 1; 3662 attr.attr.max_wr = 1; 3663 attr.srq_type = IB_SRQT_XRC; 3664 attr.ext.cq = devr->c0; 3665 attr.ext.xrc.xrcd = devr->x0; 3666 3667 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 3668 if (IS_ERR(devr->s0)) { 3669 ret = PTR_ERR(devr->s0); 3670 goto error4; 3671 } 3672 devr->s0->device = &dev->ib_dev; 3673 devr->s0->pd = devr->p0; 3674 devr->s0->uobject = NULL; 3675 devr->s0->event_handler = NULL; 3676 devr->s0->srq_context = NULL; 3677 devr->s0->srq_type = IB_SRQT_XRC; 3678 devr->s0->ext.xrc.xrcd = devr->x0; 3679 devr->s0->ext.cq = devr->c0; 3680 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); 3681 atomic_inc(&devr->s0->ext.cq->usecnt); 3682 atomic_inc(&devr->p0->usecnt); 3683 atomic_set(&devr->s0->usecnt, 0); 3684 3685 memset(&attr, 0, sizeof(attr)); 3686 attr.attr.max_sge = 1; 3687 attr.attr.max_wr = 1; 3688 attr.srq_type = IB_SRQT_BASIC; 3689 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 3690 if (IS_ERR(devr->s1)) { 3691 ret = PTR_ERR(devr->s1); 3692 goto error5; 3693 } 3694 devr->s1->device = &dev->ib_dev; 3695 devr->s1->pd = devr->p0; 3696 devr->s1->uobject = NULL; 3697 devr->s1->event_handler = NULL; 3698 devr->s1->srq_context = NULL; 3699 devr->s1->srq_type = IB_SRQT_BASIC; 3700 devr->s1->ext.cq = devr->c0; 3701 atomic_inc(&devr->p0->usecnt); 3702 atomic_set(&devr->s1->usecnt, 0); 3703 3704 for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { 3705 INIT_WORK(&devr->ports[port].pkey_change_work, 3706 pkey_change_handler); 3707 devr->ports[port].devr = devr; 3708 } 3709 3710 return 0; 3711 3712 error5: 3713 mlx5_ib_destroy_srq(devr->s0); 3714 error4: 3715 mlx5_ib_dealloc_xrcd(devr->x1); 3716 error3: 3717 mlx5_ib_dealloc_xrcd(devr->x0); 3718 error2: 3719 mlx5_ib_destroy_cq(devr->c0); 3720 error1: 3721 mlx5_ib_dealloc_pd(devr->p0); 3722 error0: 3723 return ret; 3724 } 3725 3726 static void destroy_dev_resources(struct mlx5_ib_resources *devr) 3727 { 3728 struct mlx5_ib_dev *dev = 3729 container_of(devr, struct mlx5_ib_dev, devr); 3730 int port; 3731 3732 mlx5_ib_destroy_srq(devr->s1); 3733 mlx5_ib_destroy_srq(devr->s0); 3734 mlx5_ib_dealloc_xrcd(devr->x0); 3735 mlx5_ib_dealloc_xrcd(devr->x1); 3736 mlx5_ib_destroy_cq(devr->c0); 3737 mlx5_ib_dealloc_pd(devr->p0); 3738 3739 /* Make sure no change P_Key work items are still executing */ 3740 for (port = 0; port < dev->num_ports; ++port) 3741 cancel_work_sync(&devr->ports[port].pkey_change_work); 3742 } 3743 3744 static u32 get_core_cap_flags(struct ib_device *ibdev) 3745 { 3746 struct mlx5_ib_dev *dev = to_mdev(ibdev); 3747 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); 3748 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); 3749 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); 3750 bool raw_support = !mlx5_core_mp_enabled(dev->mdev); 3751 u32 ret = 0; 3752 3753 if (ll == IB_LINK_LAYER_INFINIBAND) 3754 return RDMA_CORE_PORT_IBA_IB; 3755 3756 if (raw_support) 3757 ret = RDMA_CORE_PORT_RAW_PACKET; 3758 3759 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 3760 return ret; 3761 3762 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) 3763 return ret; 3764 3765 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) 3766 ret |= RDMA_CORE_PORT_IBA_ROCE; 3767 3768 if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) 3769 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 3770 3771 return ret; 3772 } 3773 3774 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, 3775 struct ib_port_immutable *immutable) 3776 { 3777 struct ib_port_attr attr; 3778 struct mlx5_ib_dev *dev = to_mdev(ibdev); 3779 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); 3780 int err; 3781 3782 immutable->core_cap_flags = get_core_cap_flags(ibdev); 3783 3784 err = ib_query_port(ibdev, port_num, &attr); 3785 if (err) 3786 return err; 3787 3788 immutable->pkey_tbl_len = attr.pkey_tbl_len; 3789 immutable->gid_tbl_len = attr.gid_tbl_len; 3790 immutable->core_cap_flags = get_core_cap_flags(ibdev); 3791 if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) 3792 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 3793 3794 return 0; 3795 } 3796 3797 static void get_dev_fw_str(struct ib_device *ibdev, char *str) 3798 { 3799 struct mlx5_ib_dev *dev = 3800 container_of(ibdev, struct mlx5_ib_dev, ib_dev); 3801 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d", 3802 fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), 3803 fw_rev_sub(dev->mdev)); 3804 } 3805 3806 static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) 3807 { 3808 struct mlx5_core_dev *mdev = dev->mdev; 3809 struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, 3810 MLX5_FLOW_NAMESPACE_LAG); 3811 struct mlx5_flow_table *ft; 3812 int err; 3813 3814 if (!ns || !mlx5_lag_is_active(mdev)) 3815 return 0; 3816 3817 err = mlx5_cmd_create_vport_lag(mdev); 3818 if (err) 3819 return err; 3820 3821 ft = mlx5_create_lag_demux_flow_table(ns, 0, 0); 3822 if (IS_ERR(ft)) { 3823 err = PTR_ERR(ft); 3824 goto err_destroy_vport_lag; 3825 } 3826 3827 dev->flow_db->lag_demux_ft = ft; 3828 return 0; 3829 3830 err_destroy_vport_lag: 3831 mlx5_cmd_destroy_vport_lag(mdev); 3832 return err; 3833 } 3834 3835 static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) 3836 { 3837 struct mlx5_core_dev *mdev = dev->mdev; 3838 3839 if (dev->flow_db->lag_demux_ft) { 3840 mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); 3841 dev->flow_db->lag_demux_ft = NULL; 3842 3843 mlx5_cmd_destroy_vport_lag(mdev); 3844 } 3845 } 3846 3847 static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) 3848 { 3849 int err; 3850 3851 dev->roce[port_num].nb.notifier_call = mlx5_netdev_event; 3852 err = register_netdevice_notifier(&dev->roce[port_num].nb); 3853 if (err) { 3854 dev->roce[port_num].nb.notifier_call = NULL; 3855 return err; 3856 } 3857 3858 return 0; 3859 } 3860 3861 static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) 3862 { 3863 if (dev->roce[port_num].nb.notifier_call) { 3864 unregister_netdevice_notifier(&dev->roce[port_num].nb); 3865 dev->roce[port_num].nb.notifier_call = NULL; 3866 } 3867 } 3868 3869 static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) 3870 { 3871 int err; 3872 3873 err = mlx5_add_netdev_notifier(dev, port_num); 3874 if (err) 3875 return err; 3876 3877 if (MLX5_CAP_GEN(dev->mdev, roce)) { 3878 err = mlx5_nic_vport_enable_roce(dev->mdev); 3879 if (err) 3880 goto err_unregister_netdevice_notifier; 3881 } 3882 3883 err = mlx5_eth_lag_init(dev); 3884 if (err) 3885 goto err_disable_roce; 3886 3887 return 0; 3888 3889 err_disable_roce: 3890 if (MLX5_CAP_GEN(dev->mdev, roce)) 3891 mlx5_nic_vport_disable_roce(dev->mdev); 3892 3893 err_unregister_netdevice_notifier: 3894 mlx5_remove_netdev_notifier(dev, port_num); 3895 return err; 3896 } 3897 3898 static void mlx5_disable_eth(struct mlx5_ib_dev *dev) 3899 { 3900 mlx5_eth_lag_cleanup(dev); 3901 if (MLX5_CAP_GEN(dev->mdev, roce)) 3902 mlx5_nic_vport_disable_roce(dev->mdev); 3903 } 3904 3905 struct mlx5_ib_counter { 3906 const char *name; 3907 size_t offset; 3908 }; 3909 3910 #define INIT_Q_COUNTER(_name) \ 3911 { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} 3912 3913 static const struct mlx5_ib_counter basic_q_cnts[] = { 3914 INIT_Q_COUNTER(rx_write_requests), 3915 INIT_Q_COUNTER(rx_read_requests), 3916 INIT_Q_COUNTER(rx_atomic_requests), 3917 INIT_Q_COUNTER(out_of_buffer), 3918 }; 3919 3920 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { 3921 INIT_Q_COUNTER(out_of_sequence), 3922 }; 3923 3924 static const struct mlx5_ib_counter retrans_q_cnts[] = { 3925 INIT_Q_COUNTER(duplicate_request), 3926 INIT_Q_COUNTER(rnr_nak_retry_err), 3927 INIT_Q_COUNTER(packet_seq_err), 3928 INIT_Q_COUNTER(implied_nak_seq_err), 3929 INIT_Q_COUNTER(local_ack_timeout_err), 3930 }; 3931 3932 #define INIT_CONG_COUNTER(_name) \ 3933 { .name = #_name, .offset = \ 3934 MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} 3935 3936 static const struct mlx5_ib_counter cong_cnts[] = { 3937 INIT_CONG_COUNTER(rp_cnp_ignored), 3938 INIT_CONG_COUNTER(rp_cnp_handled), 3939 INIT_CONG_COUNTER(np_ecn_marked_roce_packets), 3940 INIT_CONG_COUNTER(np_cnp_sent), 3941 }; 3942 3943 static const struct mlx5_ib_counter extended_err_cnts[] = { 3944 INIT_Q_COUNTER(resp_local_length_error), 3945 INIT_Q_COUNTER(resp_cqe_error), 3946 INIT_Q_COUNTER(req_cqe_error), 3947 INIT_Q_COUNTER(req_remote_invalid_request), 3948 INIT_Q_COUNTER(req_remote_access_errors), 3949 INIT_Q_COUNTER(resp_remote_access_errors), 3950 INIT_Q_COUNTER(resp_cqe_flush_error), 3951 INIT_Q_COUNTER(req_cqe_flush_error), 3952 }; 3953 3954 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 3955 { 3956 int i; 3957 3958 for (i = 0; i < dev->num_ports; i++) { 3959 if (dev->port[i].cnts.set_id) 3960 mlx5_core_dealloc_q_counter(dev->mdev, 3961 dev->port[i].cnts.set_id); 3962 kfree(dev->port[i].cnts.names); 3963 kfree(dev->port[i].cnts.offsets); 3964 } 3965 } 3966 3967 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, 3968 struct mlx5_ib_counters *cnts) 3969 { 3970 u32 num_counters; 3971 3972 num_counters = ARRAY_SIZE(basic_q_cnts); 3973 3974 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) 3975 num_counters += ARRAY_SIZE(out_of_seq_q_cnts); 3976 3977 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 3978 num_counters += ARRAY_SIZE(retrans_q_cnts); 3979 3980 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) 3981 num_counters += ARRAY_SIZE(extended_err_cnts); 3982 3983 cnts->num_q_counters = num_counters; 3984 3985 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 3986 cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); 3987 num_counters += ARRAY_SIZE(cong_cnts); 3988 } 3989 3990 cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); 3991 if (!cnts->names) 3992 return -ENOMEM; 3993 3994 cnts->offsets = kcalloc(num_counters, 3995 sizeof(cnts->offsets), GFP_KERNEL); 3996 if (!cnts->offsets) 3997 goto err_names; 3998 3999 return 0; 4000 4001 err_names: 4002 kfree(cnts->names); 4003 cnts->names = NULL; 4004 return -ENOMEM; 4005 } 4006 4007 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, 4008 const char **names, 4009 size_t *offsets) 4010 { 4011 int i; 4012 int j = 0; 4013 4014 for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { 4015 names[j] = basic_q_cnts[i].name; 4016 offsets[j] = basic_q_cnts[i].offset; 4017 } 4018 4019 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { 4020 for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { 4021 names[j] = out_of_seq_q_cnts[i].name; 4022 offsets[j] = out_of_seq_q_cnts[i].offset; 4023 } 4024 } 4025 4026 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 4027 for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { 4028 names[j] = retrans_q_cnts[i].name; 4029 offsets[j] = retrans_q_cnts[i].offset; 4030 } 4031 } 4032 4033 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { 4034 for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { 4035 names[j] = extended_err_cnts[i].name; 4036 offsets[j] = extended_err_cnts[i].offset; 4037 } 4038 } 4039 4040 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 4041 for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { 4042 names[j] = cong_cnts[i].name; 4043 offsets[j] = cong_cnts[i].offset; 4044 } 4045 } 4046 } 4047 4048 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) 4049 { 4050 int err = 0; 4051 int i; 4052 4053 for (i = 0; i < dev->num_ports; i++) { 4054 err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); 4055 if (err) 4056 goto err_alloc; 4057 4058 mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, 4059 dev->port[i].cnts.offsets); 4060 4061 err = mlx5_core_alloc_q_counter(dev->mdev, 4062 &dev->port[i].cnts.set_id); 4063 if (err) { 4064 mlx5_ib_warn(dev, 4065 "couldn't allocate queue counter for port %d, err %d\n", 4066 i + 1, err); 4067 goto err_alloc; 4068 } 4069 dev->port[i].cnts.set_id_valid = true; 4070 } 4071 4072 return 0; 4073 4074 err_alloc: 4075 mlx5_ib_dealloc_counters(dev); 4076 return err; 4077 } 4078 4079 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, 4080 u8 port_num) 4081 { 4082 struct mlx5_ib_dev *dev = to_mdev(ibdev); 4083 struct mlx5_ib_port *port = &dev->port[port_num - 1]; 4084 4085 /* We support only per port stats */ 4086 if (port_num == 0) 4087 return NULL; 4088 4089 return rdma_alloc_hw_stats_struct(port->cnts.names, 4090 port->cnts.num_q_counters + 4091 port->cnts.num_cong_counters, 4092 RDMA_HW_STATS_DEFAULT_LIFESPAN); 4093 } 4094 4095 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, 4096 struct mlx5_ib_port *port, 4097 struct rdma_hw_stats *stats) 4098 { 4099 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); 4100 void *out; 4101 __be32 val; 4102 int ret, i; 4103 4104 out = kvzalloc(outlen, GFP_KERNEL); 4105 if (!out) 4106 return -ENOMEM; 4107 4108 ret = mlx5_core_query_q_counter(mdev, 4109 port->cnts.set_id, 0, 4110 out, outlen); 4111 if (ret) 4112 goto free; 4113 4114 for (i = 0; i < port->cnts.num_q_counters; i++) { 4115 val = *(__be32 *)(out + port->cnts.offsets[i]); 4116 stats->value[i] = (u64)be32_to_cpu(val); 4117 } 4118 4119 free: 4120 kvfree(out); 4121 return ret; 4122 } 4123 4124 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 4125 struct rdma_hw_stats *stats, 4126 u8 port_num, int index) 4127 { 4128 struct mlx5_ib_dev *dev = to_mdev(ibdev); 4129 struct mlx5_ib_port *port = &dev->port[port_num - 1]; 4130 struct mlx5_core_dev *mdev; 4131 int ret, num_counters; 4132 u8 mdev_port_num; 4133 4134 if (!stats) 4135 return -EINVAL; 4136 4137 num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters; 4138 4139 /* q_counters are per IB device, query the master mdev */ 4140 ret = mlx5_ib_query_q_counters(dev->mdev, port, stats); 4141 if (ret) 4142 return ret; 4143 4144 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 4145 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, 4146 &mdev_port_num); 4147 if (!mdev) { 4148 /* If port is not affiliated yet, its in down state 4149 * which doesn't have any counters yet, so it would be 4150 * zero. So no need to read from the HCA. 4151 */ 4152 goto done; 4153 } 4154 ret = mlx5_lag_query_cong_counters(dev->mdev, 4155 stats->value + 4156 port->cnts.num_q_counters, 4157 port->cnts.num_cong_counters, 4158 port->cnts.offsets + 4159 port->cnts.num_q_counters); 4160 4161 mlx5_ib_put_native_port_mdev(dev, port_num); 4162 if (ret) 4163 return ret; 4164 } 4165 4166 done: 4167 return num_counters; 4168 } 4169 4170 static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) 4171 { 4172 return mlx5_rdma_netdev_free(netdev); 4173 } 4174 4175 static struct net_device* 4176 mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, 4177 u8 port_num, 4178 enum rdma_netdev_t type, 4179 const char *name, 4180 unsigned char name_assign_type, 4181 void (*setup)(struct net_device *)) 4182 { 4183 struct net_device *netdev; 4184 struct rdma_netdev *rn; 4185 4186 if (type != RDMA_NETDEV_IPOIB) 4187 return ERR_PTR(-EOPNOTSUPP); 4188 4189 netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, 4190 name, setup); 4191 if (likely(!IS_ERR_OR_NULL(netdev))) { 4192 rn = netdev_priv(netdev); 4193 rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev; 4194 } 4195 return netdev; 4196 } 4197 4198 static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) 4199 { 4200 if (!dev->delay_drop.dbg) 4201 return; 4202 debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs); 4203 kfree(dev->delay_drop.dbg); 4204 dev->delay_drop.dbg = NULL; 4205 } 4206 4207 static void cancel_delay_drop(struct mlx5_ib_dev *dev) 4208 { 4209 if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 4210 return; 4211 4212 cancel_work_sync(&dev->delay_drop.delay_drop_work); 4213 delay_drop_debugfs_cleanup(dev); 4214 } 4215 4216 static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, 4217 size_t count, loff_t *pos) 4218 { 4219 struct mlx5_ib_delay_drop *delay_drop = filp->private_data; 4220 char lbuf[20]; 4221 int len; 4222 4223 len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout); 4224 return simple_read_from_buffer(buf, count, pos, lbuf, len); 4225 } 4226 4227 static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf, 4228 size_t count, loff_t *pos) 4229 { 4230 struct mlx5_ib_delay_drop *delay_drop = filp->private_data; 4231 u32 timeout; 4232 u32 var; 4233 4234 if (kstrtouint_from_user(buf, count, 0, &var)) 4235 return -EFAULT; 4236 4237 timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 4238 1000); 4239 if (timeout != var) 4240 mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n", 4241 timeout); 4242 4243 delay_drop->timeout = timeout; 4244 4245 return count; 4246 } 4247 4248 static const struct file_operations fops_delay_drop_timeout = { 4249 .owner = THIS_MODULE, 4250 .open = simple_open, 4251 .write = delay_drop_timeout_write, 4252 .read = delay_drop_timeout_read, 4253 }; 4254 4255 static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) 4256 { 4257 struct mlx5_ib_dbg_delay_drop *dbg; 4258 4259 if (!mlx5_debugfs_root) 4260 return 0; 4261 4262 dbg = kzalloc(sizeof(*dbg), GFP_KERNEL); 4263 if (!dbg) 4264 return -ENOMEM; 4265 4266 dev->delay_drop.dbg = dbg; 4267 4268 dbg->dir_debugfs = 4269 debugfs_create_dir("delay_drop", 4270 dev->mdev->priv.dbg_root); 4271 if (!dbg->dir_debugfs) 4272 goto out_debugfs; 4273 4274 dbg->events_cnt_debugfs = 4275 debugfs_create_atomic_t("num_timeout_events", 0400, 4276 dbg->dir_debugfs, 4277 &dev->delay_drop.events_cnt); 4278 if (!dbg->events_cnt_debugfs) 4279 goto out_debugfs; 4280 4281 dbg->rqs_cnt_debugfs = 4282 debugfs_create_atomic_t("num_rqs", 0400, 4283 dbg->dir_debugfs, 4284 &dev->delay_drop.rqs_cnt); 4285 if (!dbg->rqs_cnt_debugfs) 4286 goto out_debugfs; 4287 4288 dbg->timeout_debugfs = 4289 debugfs_create_file("timeout", 0600, 4290 dbg->dir_debugfs, 4291 &dev->delay_drop, 4292 &fops_delay_drop_timeout); 4293 if (!dbg->timeout_debugfs) 4294 goto out_debugfs; 4295 4296 return 0; 4297 4298 out_debugfs: 4299 delay_drop_debugfs_cleanup(dev); 4300 return -ENOMEM; 4301 } 4302 4303 static void init_delay_drop(struct mlx5_ib_dev *dev) 4304 { 4305 if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 4306 return; 4307 4308 mutex_init(&dev->delay_drop.lock); 4309 dev->delay_drop.dev = dev; 4310 dev->delay_drop.activate = false; 4311 dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; 4312 INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); 4313 atomic_set(&dev->delay_drop.rqs_cnt, 0); 4314 atomic_set(&dev->delay_drop.events_cnt, 0); 4315 4316 if (delay_drop_debugfs_init(dev)) 4317 mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); 4318 } 4319 4320 static const struct cpumask * 4321 mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) 4322 { 4323 struct mlx5_ib_dev *dev = to_mdev(ibdev); 4324 4325 return mlx5_get_vector_affinity(dev->mdev, comp_vector); 4326 } 4327 4328 /* The mlx5_ib_multiport_mutex should be held when calling this function */ 4329 static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, 4330 struct mlx5_ib_multiport_info *mpi) 4331 { 4332 u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; 4333 struct mlx5_ib_port *port = &ibdev->port[port_num]; 4334 int comps; 4335 int err; 4336 int i; 4337 4338 mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); 4339 4340 spin_lock(&port->mp.mpi_lock); 4341 if (!mpi->ibdev) { 4342 spin_unlock(&port->mp.mpi_lock); 4343 return; 4344 } 4345 mpi->ibdev = NULL; 4346 4347 spin_unlock(&port->mp.mpi_lock); 4348 mlx5_remove_netdev_notifier(ibdev, port_num); 4349 spin_lock(&port->mp.mpi_lock); 4350 4351 comps = mpi->mdev_refcnt; 4352 if (comps) { 4353 mpi->unaffiliate = true; 4354 init_completion(&mpi->unref_comp); 4355 spin_unlock(&port->mp.mpi_lock); 4356 4357 for (i = 0; i < comps; i++) 4358 wait_for_completion(&mpi->unref_comp); 4359 4360 spin_lock(&port->mp.mpi_lock); 4361 mpi->unaffiliate = false; 4362 } 4363 4364 port->mp.mpi = NULL; 4365 4366 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); 4367 4368 spin_unlock(&port->mp.mpi_lock); 4369 4370 err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev); 4371 4372 mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1); 4373 /* Log an error, still needed to cleanup the pointers and add 4374 * it back to the list. 4375 */ 4376 if (err) 4377 mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n", 4378 port_num + 1); 4379 4380 ibdev->roce[port_num].last_port_state = IB_PORT_DOWN; 4381 } 4382 4383 /* The mlx5_ib_multiport_mutex should be held when calling this function */ 4384 static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, 4385 struct mlx5_ib_multiport_info *mpi) 4386 { 4387 u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; 4388 int err; 4389 4390 spin_lock(&ibdev->port[port_num].mp.mpi_lock); 4391 if (ibdev->port[port_num].mp.mpi) { 4392 mlx5_ib_warn(ibdev, "port %d already affiliated.\n", 4393 port_num + 1); 4394 spin_unlock(&ibdev->port[port_num].mp.mpi_lock); 4395 return false; 4396 } 4397 4398 ibdev->port[port_num].mp.mpi = mpi; 4399 mpi->ibdev = ibdev; 4400 spin_unlock(&ibdev->port[port_num].mp.mpi_lock); 4401 4402 err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); 4403 if (err) 4404 goto unbind; 4405 4406 err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); 4407 if (err) 4408 goto unbind; 4409 4410 err = mlx5_add_netdev_notifier(ibdev, port_num); 4411 if (err) { 4412 mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", 4413 port_num + 1); 4414 goto unbind; 4415 } 4416 4417 err = mlx5_ib_init_cong_debugfs(ibdev, port_num); 4418 if (err) 4419 goto unbind; 4420 4421 return true; 4422 4423 unbind: 4424 mlx5_ib_unbind_slave_port(ibdev, mpi); 4425 return false; 4426 } 4427 4428 static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) 4429 { 4430 int port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4431 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 4432 port_num + 1); 4433 struct mlx5_ib_multiport_info *mpi; 4434 int err; 4435 int i; 4436 4437 if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) 4438 return 0; 4439 4440 err = mlx5_query_nic_vport_system_image_guid(dev->mdev, 4441 &dev->sys_image_guid); 4442 if (err) 4443 return err; 4444 4445 err = mlx5_nic_vport_enable_roce(dev->mdev); 4446 if (err) 4447 return err; 4448 4449 mutex_lock(&mlx5_ib_multiport_mutex); 4450 for (i = 0; i < dev->num_ports; i++) { 4451 bool bound = false; 4452 4453 /* build a stub multiport info struct for the native port. */ 4454 if (i == port_num) { 4455 mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); 4456 if (!mpi) { 4457 mutex_unlock(&mlx5_ib_multiport_mutex); 4458 mlx5_nic_vport_disable_roce(dev->mdev); 4459 return -ENOMEM; 4460 } 4461 4462 mpi->is_master = true; 4463 mpi->mdev = dev->mdev; 4464 mpi->sys_image_guid = dev->sys_image_guid; 4465 dev->port[i].mp.mpi = mpi; 4466 mpi->ibdev = dev; 4467 mpi = NULL; 4468 continue; 4469 } 4470 4471 list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list, 4472 list) { 4473 if (dev->sys_image_guid == mpi->sys_image_guid && 4474 (mlx5_core_native_port_num(mpi->mdev) - 1) == i) { 4475 bound = mlx5_ib_bind_slave_port(dev, mpi); 4476 } 4477 4478 if (bound) { 4479 dev_dbg(&mpi->mdev->pdev->dev, "removing port from unaffiliated list.\n"); 4480 mlx5_ib_dbg(dev, "port %d bound\n", i + 1); 4481 list_del(&mpi->list); 4482 break; 4483 } 4484 } 4485 if (!bound) { 4486 get_port_caps(dev, i + 1); 4487 mlx5_ib_dbg(dev, "no free port found for port %d\n", 4488 i + 1); 4489 } 4490 } 4491 4492 list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); 4493 mutex_unlock(&mlx5_ib_multiport_mutex); 4494 return err; 4495 } 4496 4497 static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) 4498 { 4499 int port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4500 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 4501 port_num + 1); 4502 int i; 4503 4504 if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) 4505 return; 4506 4507 mutex_lock(&mlx5_ib_multiport_mutex); 4508 for (i = 0; i < dev->num_ports; i++) { 4509 if (dev->port[i].mp.mpi) { 4510 /* Destroy the native port stub */ 4511 if (i == port_num) { 4512 kfree(dev->port[i].mp.mpi); 4513 dev->port[i].mp.mpi = NULL; 4514 } else { 4515 mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1); 4516 mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi); 4517 } 4518 } 4519 } 4520 4521 mlx5_ib_dbg(dev, "removing from devlist\n"); 4522 list_del(&dev->ib_dev_list); 4523 mutex_unlock(&mlx5_ib_multiport_mutex); 4524 4525 mlx5_nic_vport_disable_roce(dev->mdev); 4526 } 4527 4528 static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) 4529 { 4530 mlx5_ib_cleanup_multiport_master(dev); 4531 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 4532 cleanup_srcu_struct(&dev->mr_srcu); 4533 #endif 4534 kfree(dev->port); 4535 } 4536 4537 static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) 4538 { 4539 struct mlx5_core_dev *mdev = dev->mdev; 4540 const char *name; 4541 int err; 4542 int i; 4543 4544 dev->port = kcalloc(dev->num_ports, sizeof(*dev->port), 4545 GFP_KERNEL); 4546 if (!dev->port) 4547 return -ENOMEM; 4548 4549 for (i = 0; i < dev->num_ports; i++) { 4550 spin_lock_init(&dev->port[i].mp.mpi_lock); 4551 rwlock_init(&dev->roce[i].netdev_lock); 4552 } 4553 4554 err = mlx5_ib_init_multiport_master(dev); 4555 if (err) 4556 goto err_free_port; 4557 4558 if (!mlx5_core_mp_enabled(mdev)) { 4559 int i; 4560 4561 for (i = 1; i <= dev->num_ports; i++) { 4562 err = get_port_caps(dev, i); 4563 if (err) 4564 break; 4565 } 4566 } else { 4567 err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); 4568 } 4569 if (err) 4570 goto err_mp; 4571 4572 if (mlx5_use_mad_ifc(dev)) 4573 get_ext_port_caps(dev); 4574 4575 if (!mlx5_lag_is_active(mdev)) 4576 name = "mlx5_%d"; 4577 else 4578 name = "mlx5_bond_%d"; 4579 4580 strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX); 4581 dev->ib_dev.owner = THIS_MODULE; 4582 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 4583 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; 4584 dev->ib_dev.phys_port_cnt = dev->num_ports; 4585 dev->ib_dev.num_comp_vectors = 4586 dev->mdev->priv.eq_table.num_comp_vectors; 4587 dev->ib_dev.dev.parent = &mdev->pdev->dev; 4588 4589 mutex_init(&dev->cap_mask_mutex); 4590 INIT_LIST_HEAD(&dev->qp_list); 4591 spin_lock_init(&dev->reset_flow_resource_lock); 4592 4593 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 4594 err = init_srcu_struct(&dev->mr_srcu); 4595 if (err) 4596 goto err_free_port; 4597 #endif 4598 4599 return 0; 4600 err_mp: 4601 mlx5_ib_cleanup_multiport_master(dev); 4602 4603 err_free_port: 4604 kfree(dev->port); 4605 4606 return -ENOMEM; 4607 } 4608 4609 static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) 4610 { 4611 dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); 4612 4613 if (!dev->flow_db) 4614 return -ENOMEM; 4615 4616 mutex_init(&dev->flow_db->lock); 4617 4618 return 0; 4619 } 4620 4621 static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) 4622 { 4623 kfree(dev->flow_db); 4624 } 4625 4626 static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) 4627 { 4628 struct mlx5_core_dev *mdev = dev->mdev; 4629 int err; 4630 4631 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; 4632 dev->ib_dev.uverbs_cmd_mask = 4633 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 4634 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 4635 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 4636 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 4637 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 4638 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 4639 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 4640 (1ull << IB_USER_VERBS_CMD_REG_MR) | 4641 (1ull << IB_USER_VERBS_CMD_REREG_MR) | 4642 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 4643 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 4644 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 4645 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 4646 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 4647 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 4648 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 4649 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 4650 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 4651 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 4652 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 4653 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 4654 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 4655 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 4656 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 4657 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 4658 (1ull << IB_USER_VERBS_CMD_OPEN_QP); 4659 dev->ib_dev.uverbs_ex_cmd_mask = 4660 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 4661 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | 4662 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | 4663 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | 4664 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); 4665 4666 dev->ib_dev.query_device = mlx5_ib_query_device; 4667 dev->ib_dev.query_port = mlx5_ib_query_port; 4668 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; 4669 dev->ib_dev.query_gid = mlx5_ib_query_gid; 4670 dev->ib_dev.add_gid = mlx5_ib_add_gid; 4671 dev->ib_dev.del_gid = mlx5_ib_del_gid; 4672 dev->ib_dev.query_pkey = mlx5_ib_query_pkey; 4673 dev->ib_dev.modify_device = mlx5_ib_modify_device; 4674 dev->ib_dev.modify_port = mlx5_ib_modify_port; 4675 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; 4676 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; 4677 dev->ib_dev.mmap = mlx5_ib_mmap; 4678 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; 4679 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; 4680 dev->ib_dev.create_ah = mlx5_ib_create_ah; 4681 dev->ib_dev.query_ah = mlx5_ib_query_ah; 4682 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; 4683 dev->ib_dev.create_srq = mlx5_ib_create_srq; 4684 dev->ib_dev.modify_srq = mlx5_ib_modify_srq; 4685 dev->ib_dev.query_srq = mlx5_ib_query_srq; 4686 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; 4687 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; 4688 dev->ib_dev.create_qp = mlx5_ib_create_qp; 4689 dev->ib_dev.modify_qp = mlx5_ib_modify_qp; 4690 dev->ib_dev.query_qp = mlx5_ib_query_qp; 4691 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; 4692 dev->ib_dev.post_send = mlx5_ib_post_send; 4693 dev->ib_dev.post_recv = mlx5_ib_post_recv; 4694 dev->ib_dev.create_cq = mlx5_ib_create_cq; 4695 dev->ib_dev.modify_cq = mlx5_ib_modify_cq; 4696 dev->ib_dev.resize_cq = mlx5_ib_resize_cq; 4697 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; 4698 dev->ib_dev.poll_cq = mlx5_ib_poll_cq; 4699 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 4700 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 4701 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 4702 dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; 4703 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 4704 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 4705 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 4706 dev->ib_dev.process_mad = mlx5_ib_process_mad; 4707 dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; 4708 dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; 4709 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; 4710 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 4711 dev->ib_dev.get_dev_fw_str = get_dev_fw_str; 4712 dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; 4713 if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) 4714 dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; 4715 4716 if (mlx5_core_is_pf(mdev)) { 4717 dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; 4718 dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; 4719 dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; 4720 dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; 4721 } 4722 4723 dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; 4724 4725 dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); 4726 4727 if (MLX5_CAP_GEN(mdev, imaicl)) { 4728 dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; 4729 dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; 4730 dev->ib_dev.uverbs_cmd_mask |= 4731 (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 4732 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 4733 } 4734 4735 if (MLX5_CAP_GEN(mdev, xrc)) { 4736 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 4737 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 4738 dev->ib_dev.uverbs_cmd_mask |= 4739 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 4740 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 4741 } 4742 4743 dev->ib_dev.create_flow = mlx5_ib_create_flow; 4744 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; 4745 dev->ib_dev.uverbs_ex_cmd_mask |= 4746 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 4747 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); 4748 4749 err = init_node_data(dev); 4750 if (err) 4751 return err; 4752 4753 if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && 4754 (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) || 4755 MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 4756 mutex_init(&dev->lb_mutex); 4757 4758 return 0; 4759 } 4760 4761 static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) 4762 { 4763 struct mlx5_core_dev *mdev = dev->mdev; 4764 enum rdma_link_layer ll; 4765 int port_type_cap; 4766 u8 port_num; 4767 int err; 4768 int i; 4769 4770 port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4771 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4772 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4773 4774 if (ll == IB_LINK_LAYER_ETHERNET) { 4775 for (i = 0; i < dev->num_ports; i++) { 4776 dev->roce[i].dev = dev; 4777 dev->roce[i].native_port_num = i + 1; 4778 dev->roce[i].last_port_state = IB_PORT_DOWN; 4779 } 4780 4781 dev->ib_dev.get_netdev = mlx5_ib_get_netdev; 4782 dev->ib_dev.create_wq = mlx5_ib_create_wq; 4783 dev->ib_dev.modify_wq = mlx5_ib_modify_wq; 4784 dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; 4785 dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; 4786 dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; 4787 dev->ib_dev.uverbs_ex_cmd_mask |= 4788 (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | 4789 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | 4790 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | 4791 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 4792 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 4793 err = mlx5_enable_eth(dev, port_num); 4794 if (err) 4795 return err; 4796 } 4797 4798 return 0; 4799 } 4800 4801 static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) 4802 { 4803 struct mlx5_core_dev *mdev = dev->mdev; 4804 enum rdma_link_layer ll; 4805 int port_type_cap; 4806 u8 port_num; 4807 4808 port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4809 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4810 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4811 4812 if (ll == IB_LINK_LAYER_ETHERNET) { 4813 mlx5_disable_eth(dev); 4814 mlx5_remove_netdev_notifier(dev, port_num); 4815 } 4816 } 4817 4818 static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) 4819 { 4820 return create_dev_resources(&dev->devr); 4821 } 4822 4823 static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) 4824 { 4825 destroy_dev_resources(&dev->devr); 4826 } 4827 4828 static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) 4829 { 4830 mlx5_ib_internal_fill_odp_caps(dev); 4831 4832 return mlx5_ib_odp_init_one(dev); 4833 } 4834 4835 static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) 4836 { 4837 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { 4838 dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; 4839 dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; 4840 4841 return mlx5_ib_alloc_counters(dev); 4842 } 4843 4844 return 0; 4845 } 4846 4847 static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) 4848 { 4849 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 4850 mlx5_ib_dealloc_counters(dev); 4851 } 4852 4853 static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) 4854 { 4855 return mlx5_ib_init_cong_debugfs(dev, 4856 mlx5_core_native_port_num(dev->mdev) - 1); 4857 } 4858 4859 static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) 4860 { 4861 mlx5_ib_cleanup_cong_debugfs(dev, 4862 mlx5_core_native_port_num(dev->mdev) - 1); 4863 } 4864 4865 static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev) 4866 { 4867 dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); 4868 if (!dev->mdev->priv.uar) 4869 return -ENOMEM; 4870 return 0; 4871 } 4872 4873 static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) 4874 { 4875 mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); 4876 } 4877 4878 static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) 4879 { 4880 int err; 4881 4882 err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); 4883 if (err) 4884 return err; 4885 4886 err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); 4887 if (err) 4888 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 4889 4890 return err; 4891 } 4892 4893 static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) 4894 { 4895 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 4896 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 4897 } 4898 4899 static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) 4900 { 4901 return ib_register_device(&dev->ib_dev, NULL); 4902 } 4903 4904 static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) 4905 { 4906 ib_unregister_device(&dev->ib_dev); 4907 } 4908 4909 static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev) 4910 { 4911 return create_umr_res(dev); 4912 } 4913 4914 static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev) 4915 { 4916 destroy_umrc_res(dev); 4917 } 4918 4919 static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) 4920 { 4921 init_delay_drop(dev); 4922 4923 return 0; 4924 } 4925 4926 static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) 4927 { 4928 cancel_delay_drop(dev); 4929 } 4930 4931 static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev) 4932 { 4933 int err; 4934 int i; 4935 4936 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 4937 err = device_create_file(&dev->ib_dev.dev, 4938 mlx5_class_attributes[i]); 4939 if (err) 4940 return err; 4941 } 4942 4943 return 0; 4944 } 4945 4946 static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) 4947 { 4948 mlx5_ib_register_vport_reps(dev); 4949 4950 return 0; 4951 } 4952 4953 static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) 4954 { 4955 mlx5_ib_unregister_vport_reps(dev); 4956 } 4957 4958 static void __mlx5_ib_remove(struct mlx5_ib_dev *dev, 4959 const struct mlx5_ib_profile *profile, 4960 int stage) 4961 { 4962 /* Number of stages to cleanup */ 4963 while (stage) { 4964 stage--; 4965 if (profile->stage[stage].cleanup) 4966 profile->stage[stage].cleanup(dev); 4967 } 4968 4969 ib_dealloc_device((struct ib_device *)dev); 4970 } 4971 4972 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); 4973 4974 static void *__mlx5_ib_add(struct mlx5_core_dev *mdev, 4975 const struct mlx5_ib_profile *profile) 4976 { 4977 struct mlx5_ib_dev *dev; 4978 int err; 4979 int i; 4980 4981 printk_once(KERN_INFO "%s", mlx5_version); 4982 4983 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 4984 if (!dev) 4985 return NULL; 4986 4987 dev->mdev = mdev; 4988 dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), 4989 MLX5_CAP_GEN(mdev, num_vhca_ports)); 4990 4991 for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { 4992 if (profile->stage[i].init) { 4993 err = profile->stage[i].init(dev); 4994 if (err) 4995 goto err_out; 4996 } 4997 } 4998 4999 dev->profile = profile; 5000 dev->ib_active = true; 5001 5002 return dev; 5003 5004 err_out: 5005 __mlx5_ib_remove(dev, profile, i); 5006 5007 return NULL; 5008 } 5009 5010 static const struct mlx5_ib_profile pf_profile = { 5011 STAGE_CREATE(MLX5_IB_STAGE_INIT, 5012 mlx5_ib_stage_init_init, 5013 mlx5_ib_stage_init_cleanup), 5014 STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, 5015 mlx5_ib_stage_flow_db_init, 5016 mlx5_ib_stage_flow_db_cleanup), 5017 STAGE_CREATE(MLX5_IB_STAGE_CAPS, 5018 mlx5_ib_stage_caps_init, 5019 NULL), 5020 STAGE_CREATE(MLX5_IB_STAGE_ROCE, 5021 mlx5_ib_stage_roce_init, 5022 mlx5_ib_stage_roce_cleanup), 5023 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 5024 mlx5_ib_stage_dev_res_init, 5025 mlx5_ib_stage_dev_res_cleanup), 5026 STAGE_CREATE(MLX5_IB_STAGE_ODP, 5027 mlx5_ib_stage_odp_init, 5028 NULL), 5029 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, 5030 mlx5_ib_stage_counters_init, 5031 mlx5_ib_stage_counters_cleanup), 5032 STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, 5033 mlx5_ib_stage_cong_debugfs_init, 5034 mlx5_ib_stage_cong_debugfs_cleanup), 5035 STAGE_CREATE(MLX5_IB_STAGE_UAR, 5036 mlx5_ib_stage_uar_init, 5037 mlx5_ib_stage_uar_cleanup), 5038 STAGE_CREATE(MLX5_IB_STAGE_BFREG, 5039 mlx5_ib_stage_bfrag_init, 5040 mlx5_ib_stage_bfrag_cleanup), 5041 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 5042 mlx5_ib_stage_ib_reg_init, 5043 mlx5_ib_stage_ib_reg_cleanup), 5044 STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES, 5045 mlx5_ib_stage_umr_res_init, 5046 mlx5_ib_stage_umr_res_cleanup), 5047 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, 5048 mlx5_ib_stage_delay_drop_init, 5049 mlx5_ib_stage_delay_drop_cleanup), 5050 STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, 5051 mlx5_ib_stage_class_attr_init, 5052 NULL), 5053 }; 5054 5055 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) 5056 { 5057 struct mlx5_ib_multiport_info *mpi; 5058 struct mlx5_ib_dev *dev; 5059 bool bound = false; 5060 int err; 5061 5062 mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); 5063 if (!mpi) 5064 return NULL; 5065 5066 mpi->mdev = mdev; 5067 5068 err = mlx5_query_nic_vport_system_image_guid(mdev, 5069 &mpi->sys_image_guid); 5070 if (err) { 5071 kfree(mpi); 5072 return NULL; 5073 } 5074 5075 mutex_lock(&mlx5_ib_multiport_mutex); 5076 list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) { 5077 if (dev->sys_image_guid == mpi->sys_image_guid) 5078 bound = mlx5_ib_bind_slave_port(dev, mpi); 5079 5080 if (bound) { 5081 rdma_roce_rescan_device(&dev->ib_dev); 5082 break; 5083 } 5084 } 5085 5086 if (!bound) { 5087 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); 5088 dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); 5089 } else { 5090 mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1); 5091 } 5092 mutex_unlock(&mlx5_ib_multiport_mutex); 5093 5094 return mpi; 5095 } 5096 5097 static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 5098 { 5099 enum rdma_link_layer ll; 5100 int port_type_cap; 5101 5102 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 5103 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 5104 5105 if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { 5106 u8 port_num = mlx5_core_native_port_num(mdev) - 1; 5107 5108 return mlx5_ib_add_slave_port(mdev, port_num); 5109 } 5110 5111 return __mlx5_ib_add(mdev, &pf_profile); 5112 } 5113 5114 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 5115 { 5116 struct mlx5_ib_multiport_info *mpi; 5117 struct mlx5_ib_dev *dev; 5118 5119 if (mlx5_core_is_mp_slave(mdev)) { 5120 mpi = context; 5121 mutex_lock(&mlx5_ib_multiport_mutex); 5122 if (mpi->ibdev) 5123 mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); 5124 list_del(&mpi->list); 5125 mutex_unlock(&mlx5_ib_multiport_mutex); 5126 return; 5127 } 5128 5129 dev = context; 5130 __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); 5131 } 5132 5133 static struct mlx5_interface mlx5_ib_interface = { 5134 .add = mlx5_ib_add, 5135 .remove = mlx5_ib_remove, 5136 .event = mlx5_ib_event, 5137 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 5138 .pfault = mlx5_ib_pfault, 5139 #endif 5140 .protocol = MLX5_INTERFACE_PROTOCOL_IB, 5141 }; 5142 5143 static int __init mlx5_ib_init(void) 5144 { 5145 int err; 5146 5147 mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); 5148 if (!mlx5_ib_event_wq) 5149 return -ENOMEM; 5150 5151 mlx5_ib_odp_init(); 5152 5153 err = mlx5_register_interface(&mlx5_ib_interface); 5154 5155 return err; 5156 } 5157 5158 static void __exit mlx5_ib_cleanup(void) 5159 { 5160 mlx5_unregister_interface(&mlx5_ib_interface); 5161 destroy_workqueue(mlx5_ib_event_wq); 5162 } 5163 5164 module_init(mlx5_ib_init); 5165 module_exit(mlx5_ib_cleanup); 5166