1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/highmem.h> 34 #include <linux/module.h> 35 #include <linux/init.h> 36 #include <linux/errno.h> 37 #include <linux/pci.h> 38 #include <linux/dma-mapping.h> 39 #include <linux/slab.h> 40 #include <linux/io-mapping.h> 41 #include <linux/sched.h> 42 #include <rdma/ib_user_verbs.h> 43 #include <rdma/ib_addr.h> 44 #include <rdma/ib_cache.h> 45 #include <linux/mlx5/port.h> 46 #include <linux/mlx5/vport.h> 47 #include <rdma/ib_smi.h> 48 #include <rdma/ib_umem.h> 49 #include <linux/in.h> 50 #include <linux/etherdevice.h> 51 #include <linux/mlx5/fs.h> 52 #include "user.h" 53 #include "mlx5_ib.h" 54 55 #define DRIVER_NAME "mlx5_ib" 56 #define DRIVER_VERSION "2.2-1" 57 #define DRIVER_RELDATE "Feb 2014" 58 59 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 60 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); 61 MODULE_LICENSE("Dual BSD/GPL"); 62 MODULE_VERSION(DRIVER_VERSION); 63 64 static int deprecated_prof_sel = 2; 65 module_param_named(prof_sel, deprecated_prof_sel, int, 0444); 66 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core"); 67 68 static char mlx5_version[] = 69 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 70 DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; 71 72 enum { 73 MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, 74 }; 75 76 static enum rdma_link_layer 77 mlx5_port_type_cap_to_rdma_ll(int port_type_cap) 78 { 79 switch (port_type_cap) { 80 case MLX5_CAP_PORT_TYPE_IB: 81 return IB_LINK_LAYER_INFINIBAND; 82 case MLX5_CAP_PORT_TYPE_ETH: 83 return IB_LINK_LAYER_ETHERNET; 84 default: 85 return IB_LINK_LAYER_UNSPECIFIED; 86 } 87 } 88 89 static enum rdma_link_layer 90 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) 91 { 92 struct mlx5_ib_dev *dev = to_mdev(device); 93 int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); 94 95 return mlx5_port_type_cap_to_rdma_ll(port_type_cap); 96 } 97 98 static int mlx5_netdev_event(struct notifier_block *this, 99 unsigned long event, void *ptr) 100 { 101 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 102 struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, 103 roce.nb); 104 105 if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER)) 106 return NOTIFY_DONE; 107 108 write_lock(&ibdev->roce.netdev_lock); 109 if (ndev->dev.parent == &ibdev->mdev->pdev->dev) 110 ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; 111 write_unlock(&ibdev->roce.netdev_lock); 112 113 return NOTIFY_DONE; 114 } 115 116 static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, 117 u8 port_num) 118 { 119 struct mlx5_ib_dev *ibdev = to_mdev(device); 120 struct net_device *ndev; 121 122 /* Ensure ndev does not disappear before we invoke dev_hold() 123 */ 124 read_lock(&ibdev->roce.netdev_lock); 125 ndev = ibdev->roce.netdev; 126 if (ndev) 127 dev_hold(ndev); 128 read_unlock(&ibdev->roce.netdev_lock); 129 130 return ndev; 131 } 132 133 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, 134 struct ib_port_attr *props) 135 { 136 struct mlx5_ib_dev *dev = to_mdev(device); 137 struct net_device *ndev; 138 enum ib_mtu ndev_ib_mtu; 139 u16 qkey_viol_cntr; 140 141 memset(props, 0, sizeof(*props)); 142 143 props->port_cap_flags |= IB_PORT_CM_SUP; 144 props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; 145 146 props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, 147 roce_address_table_size); 148 props->max_mtu = IB_MTU_4096; 149 props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); 150 props->pkey_tbl_len = 1; 151 props->state = IB_PORT_DOWN; 152 props->phys_state = 3; 153 154 mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr); 155 props->qkey_viol_cntr = qkey_viol_cntr; 156 157 ndev = mlx5_ib_get_netdev(device, port_num); 158 if (!ndev) 159 return 0; 160 161 if (netif_running(ndev) && netif_carrier_ok(ndev)) { 162 props->state = IB_PORT_ACTIVE; 163 props->phys_state = 5; 164 } 165 166 ndev_ib_mtu = iboe_get_mtu(ndev->mtu); 167 168 dev_put(ndev); 169 170 props->active_mtu = min(props->max_mtu, ndev_ib_mtu); 171 172 props->active_width = IB_WIDTH_4X; /* TODO */ 173 props->active_speed = IB_SPEED_QDR; /* TODO */ 174 175 return 0; 176 } 177 178 static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid, 179 const struct ib_gid_attr *attr, 180 void *mlx5_addr) 181 { 182 #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) 183 char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, 184 source_l3_address); 185 void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, 186 source_mac_47_32); 187 188 if (!gid) 189 return; 190 191 ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr); 192 193 if (is_vlan_dev(attr->ndev)) { 194 MLX5_SET_RA(mlx5_addr, vlan_valid, 1); 195 MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev)); 196 } 197 198 switch (attr->gid_type) { 199 case IB_GID_TYPE_IB: 200 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1); 201 break; 202 case IB_GID_TYPE_ROCE_UDP_ENCAP: 203 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2); 204 break; 205 206 default: 207 WARN_ON(true); 208 } 209 210 if (attr->gid_type != IB_GID_TYPE_IB) { 211 if (ipv6_addr_v4mapped((void *)gid)) 212 MLX5_SET_RA(mlx5_addr, roce_l3_type, 213 MLX5_ROCE_L3_TYPE_IPV4); 214 else 215 MLX5_SET_RA(mlx5_addr, roce_l3_type, 216 MLX5_ROCE_L3_TYPE_IPV6); 217 } 218 219 if ((attr->gid_type == IB_GID_TYPE_IB) || 220 !ipv6_addr_v4mapped((void *)gid)) 221 memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid)); 222 else 223 memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4); 224 } 225 226 static int set_roce_addr(struct ib_device *device, u8 port_num, 227 unsigned int index, 228 const union ib_gid *gid, 229 const struct ib_gid_attr *attr) 230 { 231 struct mlx5_ib_dev *dev = to_mdev(device); 232 u32 in[MLX5_ST_SZ_DW(set_roce_address_in)]; 233 u32 out[MLX5_ST_SZ_DW(set_roce_address_out)]; 234 void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); 235 enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); 236 237 if (ll != IB_LINK_LAYER_ETHERNET) 238 return -EINVAL; 239 240 memset(in, 0, sizeof(in)); 241 242 ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); 243 244 MLX5_SET(set_roce_address_in, in, roce_address_index, index); 245 MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); 246 247 memset(out, 0, sizeof(out)); 248 return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); 249 } 250 251 static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, 252 unsigned int index, const union ib_gid *gid, 253 const struct ib_gid_attr *attr, 254 __always_unused void **context) 255 { 256 return set_roce_addr(device, port_num, index, gid, attr); 257 } 258 259 static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, 260 unsigned int index, __always_unused void **context) 261 { 262 return set_roce_addr(device, port_num, index, NULL, NULL); 263 } 264 265 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 266 int index) 267 { 268 struct ib_gid_attr attr; 269 union ib_gid gid; 270 271 if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) 272 return 0; 273 274 if (!attr.ndev) 275 return 0; 276 277 dev_put(attr.ndev); 278 279 if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) 280 return 0; 281 282 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); 283 } 284 285 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 286 { 287 return !MLX5_CAP_GEN(dev->mdev, ib_virt); 288 } 289 290 enum { 291 MLX5_VPORT_ACCESS_METHOD_MAD, 292 MLX5_VPORT_ACCESS_METHOD_HCA, 293 MLX5_VPORT_ACCESS_METHOD_NIC, 294 }; 295 296 static int mlx5_get_vport_access_method(struct ib_device *ibdev) 297 { 298 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 299 return MLX5_VPORT_ACCESS_METHOD_MAD; 300 301 if (mlx5_ib_port_link_layer(ibdev, 1) == 302 IB_LINK_LAYER_ETHERNET) 303 return MLX5_VPORT_ACCESS_METHOD_NIC; 304 305 return MLX5_VPORT_ACCESS_METHOD_HCA; 306 } 307 308 static void get_atomic_caps(struct mlx5_ib_dev *dev, 309 struct ib_device_attr *props) 310 { 311 u8 tmp; 312 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 313 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 314 u8 atomic_req_8B_endianness_mode = 315 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode); 316 317 /* Check if HW supports 8 bytes standard atomic operations and capable 318 * of host endianness respond 319 */ 320 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; 321 if (((atomic_operations & tmp) == tmp) && 322 (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && 323 (atomic_req_8B_endianness_mode)) { 324 props->atomic_cap = IB_ATOMIC_HCA; 325 } else { 326 props->atomic_cap = IB_ATOMIC_NONE; 327 } 328 } 329 330 static int mlx5_query_system_image_guid(struct ib_device *ibdev, 331 __be64 *sys_image_guid) 332 { 333 struct mlx5_ib_dev *dev = to_mdev(ibdev); 334 struct mlx5_core_dev *mdev = dev->mdev; 335 u64 tmp; 336 int err; 337 338 switch (mlx5_get_vport_access_method(ibdev)) { 339 case MLX5_VPORT_ACCESS_METHOD_MAD: 340 return mlx5_query_mad_ifc_system_image_guid(ibdev, 341 sys_image_guid); 342 343 case MLX5_VPORT_ACCESS_METHOD_HCA: 344 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 345 break; 346 347 case MLX5_VPORT_ACCESS_METHOD_NIC: 348 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); 349 break; 350 351 default: 352 return -EINVAL; 353 } 354 355 if (!err) 356 *sys_image_guid = cpu_to_be64(tmp); 357 358 return err; 359 360 } 361 362 static int mlx5_query_max_pkeys(struct ib_device *ibdev, 363 u16 *max_pkeys) 364 { 365 struct mlx5_ib_dev *dev = to_mdev(ibdev); 366 struct mlx5_core_dev *mdev = dev->mdev; 367 368 switch (mlx5_get_vport_access_method(ibdev)) { 369 case MLX5_VPORT_ACCESS_METHOD_MAD: 370 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys); 371 372 case MLX5_VPORT_ACCESS_METHOD_HCA: 373 case MLX5_VPORT_ACCESS_METHOD_NIC: 374 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, 375 pkey_table_size)); 376 return 0; 377 378 default: 379 return -EINVAL; 380 } 381 } 382 383 static int mlx5_query_vendor_id(struct ib_device *ibdev, 384 u32 *vendor_id) 385 { 386 struct mlx5_ib_dev *dev = to_mdev(ibdev); 387 388 switch (mlx5_get_vport_access_method(ibdev)) { 389 case MLX5_VPORT_ACCESS_METHOD_MAD: 390 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id); 391 392 case MLX5_VPORT_ACCESS_METHOD_HCA: 393 case MLX5_VPORT_ACCESS_METHOD_NIC: 394 return mlx5_core_query_vendor_id(dev->mdev, vendor_id); 395 396 default: 397 return -EINVAL; 398 } 399 } 400 401 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, 402 __be64 *node_guid) 403 { 404 u64 tmp; 405 int err; 406 407 switch (mlx5_get_vport_access_method(&dev->ib_dev)) { 408 case MLX5_VPORT_ACCESS_METHOD_MAD: 409 return mlx5_query_mad_ifc_node_guid(dev, node_guid); 410 411 case MLX5_VPORT_ACCESS_METHOD_HCA: 412 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 413 break; 414 415 case MLX5_VPORT_ACCESS_METHOD_NIC: 416 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); 417 break; 418 419 default: 420 return -EINVAL; 421 } 422 423 if (!err) 424 *node_guid = cpu_to_be64(tmp); 425 426 return err; 427 } 428 429 struct mlx5_reg_node_desc { 430 u8 desc[64]; 431 }; 432 433 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) 434 { 435 struct mlx5_reg_node_desc in; 436 437 if (mlx5_use_mad_ifc(dev)) 438 return mlx5_query_mad_ifc_node_desc(dev, node_desc); 439 440 memset(&in, 0, sizeof(in)); 441 442 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, 443 sizeof(struct mlx5_reg_node_desc), 444 MLX5_REG_NODE_DESC, 0, 0); 445 } 446 447 static int mlx5_ib_query_device(struct ib_device *ibdev, 448 struct ib_device_attr *props, 449 struct ib_udata *uhw) 450 { 451 struct mlx5_ib_dev *dev = to_mdev(ibdev); 452 struct mlx5_core_dev *mdev = dev->mdev; 453 int err = -ENOMEM; 454 int max_rq_sg; 455 int max_sq_sg; 456 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); 457 458 if (uhw->inlen || uhw->outlen) 459 return -EINVAL; 460 461 memset(props, 0, sizeof(*props)); 462 err = mlx5_query_system_image_guid(ibdev, 463 &props->sys_image_guid); 464 if (err) 465 return err; 466 467 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); 468 if (err) 469 return err; 470 471 err = mlx5_query_vendor_id(ibdev, &props->vendor_id); 472 if (err) 473 return err; 474 475 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | 476 (fw_rev_min(dev->mdev) << 16) | 477 fw_rev_sub(dev->mdev); 478 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 479 IB_DEVICE_PORT_ACTIVE_EVENT | 480 IB_DEVICE_SYS_IMAGE_GUID | 481 IB_DEVICE_RC_RNR_NAK_GEN; 482 483 if (MLX5_CAP_GEN(mdev, pkv)) 484 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 485 if (MLX5_CAP_GEN(mdev, qkv)) 486 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 487 if (MLX5_CAP_GEN(mdev, apm)) 488 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 489 if (MLX5_CAP_GEN(mdev, xrc)) 490 props->device_cap_flags |= IB_DEVICE_XRC; 491 if (MLX5_CAP_GEN(mdev, imaicl)) { 492 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | 493 IB_DEVICE_MEM_WINDOW_TYPE_2B; 494 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 495 /* We support 'Gappy' memory registration too */ 496 props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; 497 } 498 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 499 if (MLX5_CAP_GEN(mdev, sho)) { 500 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; 501 /* At this stage no support for signature handover */ 502 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | 503 IB_PROT_T10DIF_TYPE_2 | 504 IB_PROT_T10DIF_TYPE_3; 505 props->sig_guard_cap = IB_GUARD_T10DIF_CRC | 506 IB_GUARD_T10DIF_CSUM; 507 } 508 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 509 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 510 511 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 512 (MLX5_CAP_ETH(dev->mdev, csum_cap))) 513 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 514 515 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 516 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 517 props->device_cap_flags |= IB_DEVICE_UD_TSO; 518 } 519 520 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 521 MLX5_CAP_ETH(dev->mdev, scatter_fcs)) 522 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; 523 524 props->vendor_part_id = mdev->pdev->device; 525 props->hw_ver = mdev->pdev->revision; 526 527 props->max_mr_size = ~0ull; 528 props->page_size_cap = ~(min_page_size - 1); 529 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); 530 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 531 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 532 sizeof(struct mlx5_wqe_data_seg); 533 max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - 534 sizeof(struct mlx5_wqe_ctrl_seg)) / 535 sizeof(struct mlx5_wqe_data_seg); 536 props->max_sge = min(max_rq_sg, max_sq_sg); 537 props->max_sge_rd = MLX5_MAX_SGE_RD; 538 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 539 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 540 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 541 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 542 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 543 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); 544 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); 545 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; 546 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 547 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 548 props->max_srq_sge = max_rq_sg - 1; 549 props->max_fast_reg_page_list_len = 550 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); 551 get_atomic_caps(dev, props); 552 props->masked_atomic_cap = IB_ATOMIC_NONE; 553 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 554 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 555 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 556 props->max_mcast_grp; 557 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 558 props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); 559 props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; 560 561 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 562 if (MLX5_CAP_GEN(mdev, pg)) 563 props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; 564 props->odp_caps = dev->odp_caps; 565 #endif 566 567 if (MLX5_CAP_GEN(mdev, cd)) 568 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; 569 570 if (!mlx5_core_is_pf(mdev)) 571 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; 572 573 return 0; 574 } 575 576 enum mlx5_ib_width { 577 MLX5_IB_WIDTH_1X = 1 << 0, 578 MLX5_IB_WIDTH_2X = 1 << 1, 579 MLX5_IB_WIDTH_4X = 1 << 2, 580 MLX5_IB_WIDTH_8X = 1 << 3, 581 MLX5_IB_WIDTH_12X = 1 << 4 582 }; 583 584 static int translate_active_width(struct ib_device *ibdev, u8 active_width, 585 u8 *ib_width) 586 { 587 struct mlx5_ib_dev *dev = to_mdev(ibdev); 588 int err = 0; 589 590 if (active_width & MLX5_IB_WIDTH_1X) { 591 *ib_width = IB_WIDTH_1X; 592 } else if (active_width & MLX5_IB_WIDTH_2X) { 593 mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n", 594 (int)active_width); 595 err = -EINVAL; 596 } else if (active_width & MLX5_IB_WIDTH_4X) { 597 *ib_width = IB_WIDTH_4X; 598 } else if (active_width & MLX5_IB_WIDTH_8X) { 599 *ib_width = IB_WIDTH_8X; 600 } else if (active_width & MLX5_IB_WIDTH_12X) { 601 *ib_width = IB_WIDTH_12X; 602 } else { 603 mlx5_ib_dbg(dev, "Invalid active_width %d\n", 604 (int)active_width); 605 err = -EINVAL; 606 } 607 608 return err; 609 } 610 611 static int mlx5_mtu_to_ib_mtu(int mtu) 612 { 613 switch (mtu) { 614 case 256: return 1; 615 case 512: return 2; 616 case 1024: return 3; 617 case 2048: return 4; 618 case 4096: return 5; 619 default: 620 pr_warn("invalid mtu\n"); 621 return -1; 622 } 623 } 624 625 enum ib_max_vl_num { 626 __IB_MAX_VL_0 = 1, 627 __IB_MAX_VL_0_1 = 2, 628 __IB_MAX_VL_0_3 = 3, 629 __IB_MAX_VL_0_7 = 4, 630 __IB_MAX_VL_0_14 = 5, 631 }; 632 633 enum mlx5_vl_hw_cap { 634 MLX5_VL_HW_0 = 1, 635 MLX5_VL_HW_0_1 = 2, 636 MLX5_VL_HW_0_2 = 3, 637 MLX5_VL_HW_0_3 = 4, 638 MLX5_VL_HW_0_4 = 5, 639 MLX5_VL_HW_0_5 = 6, 640 MLX5_VL_HW_0_6 = 7, 641 MLX5_VL_HW_0_7 = 8, 642 MLX5_VL_HW_0_14 = 15 643 }; 644 645 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, 646 u8 *max_vl_num) 647 { 648 switch (vl_hw_cap) { 649 case MLX5_VL_HW_0: 650 *max_vl_num = __IB_MAX_VL_0; 651 break; 652 case MLX5_VL_HW_0_1: 653 *max_vl_num = __IB_MAX_VL_0_1; 654 break; 655 case MLX5_VL_HW_0_3: 656 *max_vl_num = __IB_MAX_VL_0_3; 657 break; 658 case MLX5_VL_HW_0_7: 659 *max_vl_num = __IB_MAX_VL_0_7; 660 break; 661 case MLX5_VL_HW_0_14: 662 *max_vl_num = __IB_MAX_VL_0_14; 663 break; 664 665 default: 666 return -EINVAL; 667 } 668 669 return 0; 670 } 671 672 static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, 673 struct ib_port_attr *props) 674 { 675 struct mlx5_ib_dev *dev = to_mdev(ibdev); 676 struct mlx5_core_dev *mdev = dev->mdev; 677 struct mlx5_hca_vport_context *rep; 678 u16 max_mtu; 679 u16 oper_mtu; 680 int err; 681 u8 ib_link_width_oper; 682 u8 vl_hw_cap; 683 684 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 685 if (!rep) { 686 err = -ENOMEM; 687 goto out; 688 } 689 690 memset(props, 0, sizeof(*props)); 691 692 err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); 693 if (err) 694 goto out; 695 696 props->lid = rep->lid; 697 props->lmc = rep->lmc; 698 props->sm_lid = rep->sm_lid; 699 props->sm_sl = rep->sm_sl; 700 props->state = rep->vport_state; 701 props->phys_state = rep->port_physical_state; 702 props->port_cap_flags = rep->cap_mask1; 703 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); 704 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); 705 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); 706 props->bad_pkey_cntr = rep->pkey_violation_counter; 707 props->qkey_viol_cntr = rep->qkey_violation_counter; 708 props->subnet_timeout = rep->subnet_timeout; 709 props->init_type_reply = rep->init_type_reply; 710 props->grh_required = rep->grh_required; 711 712 err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); 713 if (err) 714 goto out; 715 716 err = translate_active_width(ibdev, ib_link_width_oper, 717 &props->active_width); 718 if (err) 719 goto out; 720 err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB, 721 port); 722 if (err) 723 goto out; 724 725 mlx5_query_port_max_mtu(mdev, &max_mtu, port); 726 727 props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); 728 729 mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); 730 731 props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); 732 733 err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port); 734 if (err) 735 goto out; 736 737 err = translate_max_vl_num(ibdev, vl_hw_cap, 738 &props->max_vl_num); 739 out: 740 kfree(rep); 741 return err; 742 } 743 744 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, 745 struct ib_port_attr *props) 746 { 747 switch (mlx5_get_vport_access_method(ibdev)) { 748 case MLX5_VPORT_ACCESS_METHOD_MAD: 749 return mlx5_query_mad_ifc_port(ibdev, port, props); 750 751 case MLX5_VPORT_ACCESS_METHOD_HCA: 752 return mlx5_query_hca_port(ibdev, port, props); 753 754 case MLX5_VPORT_ACCESS_METHOD_NIC: 755 return mlx5_query_port_roce(ibdev, port, props); 756 757 default: 758 return -EINVAL; 759 } 760 } 761 762 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 763 union ib_gid *gid) 764 { 765 struct mlx5_ib_dev *dev = to_mdev(ibdev); 766 struct mlx5_core_dev *mdev = dev->mdev; 767 768 switch (mlx5_get_vport_access_method(ibdev)) { 769 case MLX5_VPORT_ACCESS_METHOD_MAD: 770 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid); 771 772 case MLX5_VPORT_ACCESS_METHOD_HCA: 773 return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid); 774 775 default: 776 return -EINVAL; 777 } 778 779 } 780 781 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 782 u16 *pkey) 783 { 784 struct mlx5_ib_dev *dev = to_mdev(ibdev); 785 struct mlx5_core_dev *mdev = dev->mdev; 786 787 switch (mlx5_get_vport_access_method(ibdev)) { 788 case MLX5_VPORT_ACCESS_METHOD_MAD: 789 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); 790 791 case MLX5_VPORT_ACCESS_METHOD_HCA: 792 case MLX5_VPORT_ACCESS_METHOD_NIC: 793 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, 794 pkey); 795 default: 796 return -EINVAL; 797 } 798 } 799 800 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, 801 struct ib_device_modify *props) 802 { 803 struct mlx5_ib_dev *dev = to_mdev(ibdev); 804 struct mlx5_reg_node_desc in; 805 struct mlx5_reg_node_desc out; 806 int err; 807 808 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 809 return -EOPNOTSUPP; 810 811 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 812 return 0; 813 814 /* 815 * If possible, pass node desc to FW, so it can generate 816 * a 144 trap. If cmd fails, just ignore. 817 */ 818 memcpy(&in, props->node_desc, 64); 819 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, 820 sizeof(out), MLX5_REG_NODE_DESC, 0, 1); 821 if (err) 822 return err; 823 824 memcpy(ibdev->node_desc, props->node_desc, 64); 825 826 return err; 827 } 828 829 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 830 struct ib_port_modify *props) 831 { 832 struct mlx5_ib_dev *dev = to_mdev(ibdev); 833 struct ib_port_attr attr; 834 u32 tmp; 835 int err; 836 837 mutex_lock(&dev->cap_mask_mutex); 838 839 err = mlx5_ib_query_port(ibdev, port, &attr); 840 if (err) 841 goto out; 842 843 tmp = (attr.port_cap_flags | props->set_port_cap_mask) & 844 ~props->clr_port_cap_mask; 845 846 err = mlx5_set_port_caps(dev->mdev, port, tmp); 847 848 out: 849 mutex_unlock(&dev->cap_mask_mutex); 850 return err; 851 } 852 853 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 854 struct ib_udata *udata) 855 { 856 struct mlx5_ib_dev *dev = to_mdev(ibdev); 857 struct mlx5_ib_alloc_ucontext_req_v2 req = {}; 858 struct mlx5_ib_alloc_ucontext_resp resp = {}; 859 struct mlx5_ib_ucontext *context; 860 struct mlx5_uuar_info *uuari; 861 struct mlx5_uar *uars; 862 int gross_uuars; 863 int num_uars; 864 int ver; 865 int uuarn; 866 int err; 867 int i; 868 size_t reqlen; 869 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, 870 max_cqe_version); 871 872 if (!dev->ib_active) 873 return ERR_PTR(-EAGAIN); 874 875 if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) 876 return ERR_PTR(-EINVAL); 877 878 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); 879 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 880 ver = 0; 881 else if (reqlen >= min_req_v2) 882 ver = 2; 883 else 884 return ERR_PTR(-EINVAL); 885 886 err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); 887 if (err) 888 return ERR_PTR(err); 889 890 if (req.flags) 891 return ERR_PTR(-EINVAL); 892 893 if (req.total_num_uuars > MLX5_MAX_UUARS) 894 return ERR_PTR(-ENOMEM); 895 896 if (req.total_num_uuars == 0) 897 return ERR_PTR(-EINVAL); 898 899 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) 900 return ERR_PTR(-EOPNOTSUPP); 901 902 if (reqlen > sizeof(req) && 903 !ib_is_udata_cleared(udata, sizeof(req), 904 reqlen - sizeof(req))) 905 return ERR_PTR(-EOPNOTSUPP); 906 907 req.total_num_uuars = ALIGN(req.total_num_uuars, 908 MLX5_NON_FP_BF_REGS_PER_PAGE); 909 if (req.num_low_latency_uuars > req.total_num_uuars - 1) 910 return ERR_PTR(-EINVAL); 911 912 num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; 913 gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; 914 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 915 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 916 resp.cache_line_size = L1_CACHE_BYTES; 917 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 918 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 919 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 920 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 921 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 922 resp.cqe_version = min_t(__u8, 923 (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), 924 req.max_cqe_version); 925 resp.response_length = min(offsetof(typeof(resp), response_length) + 926 sizeof(resp.response_length), udata->outlen); 927 928 context = kzalloc(sizeof(*context), GFP_KERNEL); 929 if (!context) 930 return ERR_PTR(-ENOMEM); 931 932 uuari = &context->uuari; 933 mutex_init(&uuari->lock); 934 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); 935 if (!uars) { 936 err = -ENOMEM; 937 goto out_ctx; 938 } 939 940 uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), 941 sizeof(*uuari->bitmap), 942 GFP_KERNEL); 943 if (!uuari->bitmap) { 944 err = -ENOMEM; 945 goto out_uar_ctx; 946 } 947 /* 948 * clear all fast path uuars 949 */ 950 for (i = 0; i < gross_uuars; i++) { 951 uuarn = i & 3; 952 if (uuarn == 2 || uuarn == 3) 953 set_bit(i, uuari->bitmap); 954 } 955 956 uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); 957 if (!uuari->count) { 958 err = -ENOMEM; 959 goto out_bitmap; 960 } 961 962 for (i = 0; i < num_uars; i++) { 963 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index); 964 if (err) 965 goto out_count; 966 } 967 968 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 969 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; 970 #endif 971 972 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { 973 err = mlx5_core_alloc_transport_domain(dev->mdev, 974 &context->tdn); 975 if (err) 976 goto out_uars; 977 } 978 979 INIT_LIST_HEAD(&context->db_page_list); 980 mutex_init(&context->db_page_mutex); 981 982 resp.tot_uuars = req.total_num_uuars; 983 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); 984 985 if (field_avail(typeof(resp), cqe_version, udata->outlen)) 986 resp.response_length += sizeof(resp.cqe_version); 987 988 if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { 989 resp.comp_mask |= 990 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; 991 resp.hca_core_clock_offset = 992 offsetof(struct mlx5_init_seg, internal_timer_h) % 993 PAGE_SIZE; 994 resp.response_length += sizeof(resp.hca_core_clock_offset) + 995 sizeof(resp.reserved2) + 996 sizeof(resp.reserved3); 997 } 998 999 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1000 if (err) 1001 goto out_td; 1002 1003 uuari->ver = ver; 1004 uuari->num_low_latency_uuars = req.num_low_latency_uuars; 1005 uuari->uars = uars; 1006 uuari->num_uars = num_uars; 1007 context->cqe_version = resp.cqe_version; 1008 1009 return &context->ibucontext; 1010 1011 out_td: 1012 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1013 mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); 1014 1015 out_uars: 1016 for (i--; i >= 0; i--) 1017 mlx5_cmd_free_uar(dev->mdev, uars[i].index); 1018 out_count: 1019 kfree(uuari->count); 1020 1021 out_bitmap: 1022 kfree(uuari->bitmap); 1023 1024 out_uar_ctx: 1025 kfree(uars); 1026 1027 out_ctx: 1028 kfree(context); 1029 return ERR_PTR(err); 1030 } 1031 1032 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 1033 { 1034 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1035 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1036 struct mlx5_uuar_info *uuari = &context->uuari; 1037 int i; 1038 1039 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1040 mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); 1041 1042 for (i = 0; i < uuari->num_uars; i++) { 1043 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) 1044 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); 1045 } 1046 1047 kfree(uuari->count); 1048 kfree(uuari->bitmap); 1049 kfree(uuari->uars); 1050 kfree(context); 1051 1052 return 0; 1053 } 1054 1055 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) 1056 { 1057 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index; 1058 } 1059 1060 static int get_command(unsigned long offset) 1061 { 1062 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; 1063 } 1064 1065 static int get_arg(unsigned long offset) 1066 { 1067 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); 1068 } 1069 1070 static int get_index(unsigned long offset) 1071 { 1072 return get_arg(offset); 1073 } 1074 1075 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1076 { 1077 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1078 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1079 struct mlx5_uuar_info *uuari = &context->uuari; 1080 unsigned long command; 1081 unsigned long idx; 1082 phys_addr_t pfn; 1083 1084 command = get_command(vma->vm_pgoff); 1085 switch (command) { 1086 case MLX5_IB_MMAP_REGULAR_PAGE: 1087 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1088 return -EINVAL; 1089 1090 idx = get_index(vma->vm_pgoff); 1091 if (idx >= uuari->num_uars) 1092 return -EINVAL; 1093 1094 pfn = uar_index2pfn(dev, uuari->uars[idx].index); 1095 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, 1096 (unsigned long long)pfn); 1097 1098 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 1099 if (io_remap_pfn_range(vma, vma->vm_start, pfn, 1100 PAGE_SIZE, vma->vm_page_prot)) 1101 return -EAGAIN; 1102 1103 mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n", 1104 vma->vm_start, 1105 (unsigned long long)pfn << PAGE_SHIFT); 1106 break; 1107 1108 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 1109 return -ENOSYS; 1110 1111 case MLX5_IB_MMAP_CORE_CLOCK: 1112 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1113 return -EINVAL; 1114 1115 if (vma->vm_flags & (VM_WRITE | VM_EXEC)) 1116 return -EPERM; 1117 1118 /* Don't expose to user-space information it shouldn't have */ 1119 if (PAGE_SIZE > 4096) 1120 return -EOPNOTSUPP; 1121 1122 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1123 pfn = (dev->mdev->iseg_base + 1124 offsetof(struct mlx5_init_seg, internal_timer_h)) >> 1125 PAGE_SHIFT; 1126 if (io_remap_pfn_range(vma, vma->vm_start, pfn, 1127 PAGE_SIZE, vma->vm_page_prot)) 1128 return -EAGAIN; 1129 1130 mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", 1131 vma->vm_start, 1132 (unsigned long long)pfn << PAGE_SHIFT); 1133 break; 1134 1135 default: 1136 return -EINVAL; 1137 } 1138 1139 return 0; 1140 } 1141 1142 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, 1143 struct ib_ucontext *context, 1144 struct ib_udata *udata) 1145 { 1146 struct mlx5_ib_alloc_pd_resp resp; 1147 struct mlx5_ib_pd *pd; 1148 int err; 1149 1150 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 1151 if (!pd) 1152 return ERR_PTR(-ENOMEM); 1153 1154 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 1155 if (err) { 1156 kfree(pd); 1157 return ERR_PTR(err); 1158 } 1159 1160 if (context) { 1161 resp.pdn = pd->pdn; 1162 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 1163 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1164 kfree(pd); 1165 return ERR_PTR(-EFAULT); 1166 } 1167 } 1168 1169 return &pd->ibpd; 1170 } 1171 1172 static int mlx5_ib_dealloc_pd(struct ib_pd *pd) 1173 { 1174 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 1175 struct mlx5_ib_pd *mpd = to_mpd(pd); 1176 1177 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 1178 kfree(mpd); 1179 1180 return 0; 1181 } 1182 1183 static bool outer_header_zero(u32 *match_criteria) 1184 { 1185 int size = MLX5_ST_SZ_BYTES(fte_match_param); 1186 char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, 1187 outer_headers); 1188 1189 return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, 1190 outer_headers_c + 1, 1191 size - 1); 1192 } 1193 1194 static int parse_flow_attr(u32 *match_c, u32 *match_v, 1195 union ib_flow_spec *ib_spec) 1196 { 1197 void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 1198 outer_headers); 1199 void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 1200 outer_headers); 1201 switch (ib_spec->type) { 1202 case IB_FLOW_SPEC_ETH: 1203 if (ib_spec->size != sizeof(ib_spec->eth)) 1204 return -EINVAL; 1205 1206 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1207 dmac_47_16), 1208 ib_spec->eth.mask.dst_mac); 1209 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1210 dmac_47_16), 1211 ib_spec->eth.val.dst_mac); 1212 1213 if (ib_spec->eth.mask.vlan_tag) { 1214 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1215 vlan_tag, 1); 1216 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1217 vlan_tag, 1); 1218 1219 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1220 first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); 1221 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1222 first_vid, ntohs(ib_spec->eth.val.vlan_tag)); 1223 1224 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1225 first_cfi, 1226 ntohs(ib_spec->eth.mask.vlan_tag) >> 12); 1227 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1228 first_cfi, 1229 ntohs(ib_spec->eth.val.vlan_tag) >> 12); 1230 1231 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1232 first_prio, 1233 ntohs(ib_spec->eth.mask.vlan_tag) >> 13); 1234 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1235 first_prio, 1236 ntohs(ib_spec->eth.val.vlan_tag) >> 13); 1237 } 1238 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1239 ethertype, ntohs(ib_spec->eth.mask.ether_type)); 1240 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1241 ethertype, ntohs(ib_spec->eth.val.ether_type)); 1242 break; 1243 case IB_FLOW_SPEC_IPV4: 1244 if (ib_spec->size != sizeof(ib_spec->ipv4)) 1245 return -EINVAL; 1246 1247 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, 1248 ethertype, 0xffff); 1249 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, 1250 ethertype, ETH_P_IP); 1251 1252 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1253 src_ipv4_src_ipv6.ipv4_layout.ipv4), 1254 &ib_spec->ipv4.mask.src_ip, 1255 sizeof(ib_spec->ipv4.mask.src_ip)); 1256 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1257 src_ipv4_src_ipv6.ipv4_layout.ipv4), 1258 &ib_spec->ipv4.val.src_ip, 1259 sizeof(ib_spec->ipv4.val.src_ip)); 1260 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, 1261 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 1262 &ib_spec->ipv4.mask.dst_ip, 1263 sizeof(ib_spec->ipv4.mask.dst_ip)); 1264 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, 1265 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 1266 &ib_spec->ipv4.val.dst_ip, 1267 sizeof(ib_spec->ipv4.val.dst_ip)); 1268 break; 1269 case IB_FLOW_SPEC_TCP: 1270 if (ib_spec->size != sizeof(ib_spec->tcp_udp)) 1271 return -EINVAL; 1272 1273 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 1274 0xff); 1275 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, 1276 IPPROTO_TCP); 1277 1278 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, 1279 ntohs(ib_spec->tcp_udp.mask.src_port)); 1280 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, 1281 ntohs(ib_spec->tcp_udp.val.src_port)); 1282 1283 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, 1284 ntohs(ib_spec->tcp_udp.mask.dst_port)); 1285 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, 1286 ntohs(ib_spec->tcp_udp.val.dst_port)); 1287 break; 1288 case IB_FLOW_SPEC_UDP: 1289 if (ib_spec->size != sizeof(ib_spec->tcp_udp)) 1290 return -EINVAL; 1291 1292 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 1293 0xff); 1294 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, 1295 IPPROTO_UDP); 1296 1297 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, 1298 ntohs(ib_spec->tcp_udp.mask.src_port)); 1299 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, 1300 ntohs(ib_spec->tcp_udp.val.src_port)); 1301 1302 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, 1303 ntohs(ib_spec->tcp_udp.mask.dst_port)); 1304 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, 1305 ntohs(ib_spec->tcp_udp.val.dst_port)); 1306 break; 1307 default: 1308 return -EINVAL; 1309 } 1310 1311 return 0; 1312 } 1313 1314 /* If a flow could catch both multicast and unicast packets, 1315 * it won't fall into the multicast flow steering table and this rule 1316 * could steal other multicast packets. 1317 */ 1318 static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) 1319 { 1320 struct ib_flow_spec_eth *eth_spec; 1321 1322 if (ib_attr->type != IB_FLOW_ATTR_NORMAL || 1323 ib_attr->size < sizeof(struct ib_flow_attr) + 1324 sizeof(struct ib_flow_spec_eth) || 1325 ib_attr->num_of_specs < 1) 1326 return false; 1327 1328 eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); 1329 if (eth_spec->type != IB_FLOW_SPEC_ETH || 1330 eth_spec->size != sizeof(*eth_spec)) 1331 return false; 1332 1333 return is_multicast_ether_addr(eth_spec->mask.dst_mac) && 1334 is_multicast_ether_addr(eth_spec->val.dst_mac); 1335 } 1336 1337 static bool is_valid_attr(struct ib_flow_attr *flow_attr) 1338 { 1339 union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); 1340 bool has_ipv4_spec = false; 1341 bool eth_type_ipv4 = true; 1342 unsigned int spec_index; 1343 1344 /* Validate that ethertype is correct */ 1345 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 1346 if (ib_spec->type == IB_FLOW_SPEC_ETH && 1347 ib_spec->eth.mask.ether_type) { 1348 if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) && 1349 ib_spec->eth.val.ether_type == htons(ETH_P_IP))) 1350 eth_type_ipv4 = false; 1351 } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) { 1352 has_ipv4_spec = true; 1353 } 1354 ib_spec = (void *)ib_spec + ib_spec->size; 1355 } 1356 return !has_ipv4_spec || eth_type_ipv4; 1357 } 1358 1359 static void put_flow_table(struct mlx5_ib_dev *dev, 1360 struct mlx5_ib_flow_prio *prio, bool ft_added) 1361 { 1362 prio->refcount -= !!ft_added; 1363 if (!prio->refcount) { 1364 mlx5_destroy_flow_table(prio->flow_table); 1365 prio->flow_table = NULL; 1366 } 1367 } 1368 1369 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) 1370 { 1371 struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); 1372 struct mlx5_ib_flow_handler *handler = container_of(flow_id, 1373 struct mlx5_ib_flow_handler, 1374 ibflow); 1375 struct mlx5_ib_flow_handler *iter, *tmp; 1376 1377 mutex_lock(&dev->flow_db.lock); 1378 1379 list_for_each_entry_safe(iter, tmp, &handler->list, list) { 1380 mlx5_del_flow_rule(iter->rule); 1381 list_del(&iter->list); 1382 kfree(iter); 1383 } 1384 1385 mlx5_del_flow_rule(handler->rule); 1386 put_flow_table(dev, &dev->flow_db.prios[handler->prio], true); 1387 mutex_unlock(&dev->flow_db.lock); 1388 1389 kfree(handler); 1390 1391 return 0; 1392 } 1393 1394 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) 1395 { 1396 priority *= 2; 1397 if (!dont_trap) 1398 priority++; 1399 return priority; 1400 } 1401 1402 #define MLX5_FS_MAX_TYPES 10 1403 #define MLX5_FS_MAX_ENTRIES 32000UL 1404 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 1405 struct ib_flow_attr *flow_attr) 1406 { 1407 bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; 1408 struct mlx5_flow_namespace *ns = NULL; 1409 struct mlx5_ib_flow_prio *prio; 1410 struct mlx5_flow_table *ft; 1411 int num_entries; 1412 int num_groups; 1413 int priority; 1414 int err = 0; 1415 1416 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1417 if (flow_is_multicast_only(flow_attr) && 1418 !dont_trap) 1419 priority = MLX5_IB_FLOW_MCAST_PRIO; 1420 else 1421 priority = ib_prio_to_core_prio(flow_attr->priority, 1422 dont_trap); 1423 ns = mlx5_get_flow_namespace(dev->mdev, 1424 MLX5_FLOW_NAMESPACE_BYPASS); 1425 num_entries = MLX5_FS_MAX_ENTRIES; 1426 num_groups = MLX5_FS_MAX_TYPES; 1427 prio = &dev->flow_db.prios[priority]; 1428 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1429 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 1430 ns = mlx5_get_flow_namespace(dev->mdev, 1431 MLX5_FLOW_NAMESPACE_LEFTOVERS); 1432 build_leftovers_ft_param(&priority, 1433 &num_entries, 1434 &num_groups); 1435 prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; 1436 } 1437 1438 if (!ns) 1439 return ERR_PTR(-ENOTSUPP); 1440 1441 ft = prio->flow_table; 1442 if (!ft) { 1443 ft = mlx5_create_auto_grouped_flow_table(ns, priority, 1444 num_entries, 1445 num_groups); 1446 1447 if (!IS_ERR(ft)) { 1448 prio->refcount = 0; 1449 prio->flow_table = ft; 1450 } else { 1451 err = PTR_ERR(ft); 1452 } 1453 } 1454 1455 return err ? ERR_PTR(err) : prio; 1456 } 1457 1458 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, 1459 struct mlx5_ib_flow_prio *ft_prio, 1460 struct ib_flow_attr *flow_attr, 1461 struct mlx5_flow_destination *dst) 1462 { 1463 struct mlx5_flow_table *ft = ft_prio->flow_table; 1464 struct mlx5_ib_flow_handler *handler; 1465 void *ib_flow = flow_attr + 1; 1466 u8 match_criteria_enable = 0; 1467 unsigned int spec_index; 1468 u32 *match_c; 1469 u32 *match_v; 1470 u32 action; 1471 int err = 0; 1472 1473 if (!is_valid_attr(flow_attr)) 1474 return ERR_PTR(-EINVAL); 1475 1476 match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); 1477 match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); 1478 handler = kzalloc(sizeof(*handler), GFP_KERNEL); 1479 if (!handler || !match_c || !match_v) { 1480 err = -ENOMEM; 1481 goto free; 1482 } 1483 1484 INIT_LIST_HEAD(&handler->list); 1485 1486 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 1487 err = parse_flow_attr(match_c, match_v, ib_flow); 1488 if (err < 0) 1489 goto free; 1490 1491 ib_flow += ((union ib_flow_spec *)ib_flow)->size; 1492 } 1493 1494 /* Outer header support only */ 1495 match_criteria_enable = (!outer_header_zero(match_c)) << 0; 1496 action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : 1497 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 1498 handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, 1499 match_c, match_v, 1500 action, 1501 MLX5_FS_DEFAULT_FLOW_TAG, 1502 dst); 1503 1504 if (IS_ERR(handler->rule)) { 1505 err = PTR_ERR(handler->rule); 1506 goto free; 1507 } 1508 1509 handler->prio = ft_prio - dev->flow_db.prios; 1510 1511 ft_prio->flow_table = ft; 1512 free: 1513 if (err) 1514 kfree(handler); 1515 kfree(match_c); 1516 kfree(match_v); 1517 return err ? ERR_PTR(err) : handler; 1518 } 1519 1520 static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, 1521 struct mlx5_ib_flow_prio *ft_prio, 1522 struct ib_flow_attr *flow_attr, 1523 struct mlx5_flow_destination *dst) 1524 { 1525 struct mlx5_ib_flow_handler *handler_dst = NULL; 1526 struct mlx5_ib_flow_handler *handler = NULL; 1527 1528 handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); 1529 if (!IS_ERR(handler)) { 1530 handler_dst = create_flow_rule(dev, ft_prio, 1531 flow_attr, dst); 1532 if (IS_ERR(handler_dst)) { 1533 mlx5_del_flow_rule(handler->rule); 1534 kfree(handler); 1535 handler = handler_dst; 1536 } else { 1537 list_add(&handler_dst->list, &handler->list); 1538 } 1539 } 1540 1541 return handler; 1542 } 1543 enum { 1544 LEFTOVERS_MC, 1545 LEFTOVERS_UC, 1546 }; 1547 1548 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, 1549 struct mlx5_ib_flow_prio *ft_prio, 1550 struct ib_flow_attr *flow_attr, 1551 struct mlx5_flow_destination *dst) 1552 { 1553 struct mlx5_ib_flow_handler *handler_ucast = NULL; 1554 struct mlx5_ib_flow_handler *handler = NULL; 1555 1556 static struct { 1557 struct ib_flow_attr flow_attr; 1558 struct ib_flow_spec_eth eth_flow; 1559 } leftovers_specs[] = { 1560 [LEFTOVERS_MC] = { 1561 .flow_attr = { 1562 .num_of_specs = 1, 1563 .size = sizeof(leftovers_specs[0]) 1564 }, 1565 .eth_flow = { 1566 .type = IB_FLOW_SPEC_ETH, 1567 .size = sizeof(struct ib_flow_spec_eth), 1568 .mask = {.dst_mac = {0x1} }, 1569 .val = {.dst_mac = {0x1} } 1570 } 1571 }, 1572 [LEFTOVERS_UC] = { 1573 .flow_attr = { 1574 .num_of_specs = 1, 1575 .size = sizeof(leftovers_specs[0]) 1576 }, 1577 .eth_flow = { 1578 .type = IB_FLOW_SPEC_ETH, 1579 .size = sizeof(struct ib_flow_spec_eth), 1580 .mask = {.dst_mac = {0x1} }, 1581 .val = {.dst_mac = {} } 1582 } 1583 } 1584 }; 1585 1586 handler = create_flow_rule(dev, ft_prio, 1587 &leftovers_specs[LEFTOVERS_MC].flow_attr, 1588 dst); 1589 if (!IS_ERR(handler) && 1590 flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { 1591 handler_ucast = create_flow_rule(dev, ft_prio, 1592 &leftovers_specs[LEFTOVERS_UC].flow_attr, 1593 dst); 1594 if (IS_ERR(handler_ucast)) { 1595 kfree(handler); 1596 handler = handler_ucast; 1597 } else { 1598 list_add(&handler_ucast->list, &handler->list); 1599 } 1600 } 1601 1602 return handler; 1603 } 1604 1605 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, 1606 struct ib_flow_attr *flow_attr, 1607 int domain) 1608 { 1609 struct mlx5_ib_dev *dev = to_mdev(qp->device); 1610 struct mlx5_ib_flow_handler *handler = NULL; 1611 struct mlx5_flow_destination *dst = NULL; 1612 struct mlx5_ib_flow_prio *ft_prio; 1613 int err; 1614 1615 if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) 1616 return ERR_PTR(-ENOSPC); 1617 1618 if (domain != IB_FLOW_DOMAIN_USER || 1619 flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || 1620 (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) 1621 return ERR_PTR(-EINVAL); 1622 1623 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 1624 if (!dst) 1625 return ERR_PTR(-ENOMEM); 1626 1627 mutex_lock(&dev->flow_db.lock); 1628 1629 ft_prio = get_flow_table(dev, flow_attr); 1630 if (IS_ERR(ft_prio)) { 1631 err = PTR_ERR(ft_prio); 1632 goto unlock; 1633 } 1634 1635 dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1636 dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; 1637 1638 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1639 if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { 1640 handler = create_dont_trap_rule(dev, ft_prio, 1641 flow_attr, dst); 1642 } else { 1643 handler = create_flow_rule(dev, ft_prio, flow_attr, 1644 dst); 1645 } 1646 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1647 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 1648 handler = create_leftovers_rule(dev, ft_prio, flow_attr, 1649 dst); 1650 } else { 1651 err = -EINVAL; 1652 goto destroy_ft; 1653 } 1654 1655 if (IS_ERR(handler)) { 1656 err = PTR_ERR(handler); 1657 handler = NULL; 1658 goto destroy_ft; 1659 } 1660 1661 ft_prio->refcount++; 1662 mutex_unlock(&dev->flow_db.lock); 1663 kfree(dst); 1664 1665 return &handler->ibflow; 1666 1667 destroy_ft: 1668 put_flow_table(dev, ft_prio, false); 1669 unlock: 1670 mutex_unlock(&dev->flow_db.lock); 1671 kfree(dst); 1672 kfree(handler); 1673 return ERR_PTR(err); 1674 } 1675 1676 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1677 { 1678 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1679 int err; 1680 1681 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 1682 if (err) 1683 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 1684 ibqp->qp_num, gid->raw); 1685 1686 return err; 1687 } 1688 1689 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1690 { 1691 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1692 int err; 1693 1694 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 1695 if (err) 1696 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 1697 ibqp->qp_num, gid->raw); 1698 1699 return err; 1700 } 1701 1702 static int init_node_data(struct mlx5_ib_dev *dev) 1703 { 1704 int err; 1705 1706 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); 1707 if (err) 1708 return err; 1709 1710 dev->mdev->rev_id = dev->mdev->pdev->revision; 1711 1712 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 1713 } 1714 1715 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 1716 char *buf) 1717 { 1718 struct mlx5_ib_dev *dev = 1719 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1720 1721 return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); 1722 } 1723 1724 static ssize_t show_reg_pages(struct device *device, 1725 struct device_attribute *attr, char *buf) 1726 { 1727 struct mlx5_ib_dev *dev = 1728 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1729 1730 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 1731 } 1732 1733 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1734 char *buf) 1735 { 1736 struct mlx5_ib_dev *dev = 1737 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1738 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 1739 } 1740 1741 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, 1742 char *buf) 1743 { 1744 struct mlx5_ib_dev *dev = 1745 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1746 return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev), 1747 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); 1748 } 1749 1750 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1751 char *buf) 1752 { 1753 struct mlx5_ib_dev *dev = 1754 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1755 return sprintf(buf, "%x\n", dev->mdev->rev_id); 1756 } 1757 1758 static ssize_t show_board(struct device *device, struct device_attribute *attr, 1759 char *buf) 1760 { 1761 struct mlx5_ib_dev *dev = 1762 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1763 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 1764 dev->mdev->board_id); 1765 } 1766 1767 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1768 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 1769 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1770 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1771 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 1772 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 1773 1774 static struct device_attribute *mlx5_class_attributes[] = { 1775 &dev_attr_hw_rev, 1776 &dev_attr_fw_ver, 1777 &dev_attr_hca_type, 1778 &dev_attr_board_id, 1779 &dev_attr_fw_pages, 1780 &dev_attr_reg_pages, 1781 }; 1782 1783 static void pkey_change_handler(struct work_struct *work) 1784 { 1785 struct mlx5_ib_port_resources *ports = 1786 container_of(work, struct mlx5_ib_port_resources, 1787 pkey_change_work); 1788 1789 mutex_lock(&ports->devr->mutex); 1790 mlx5_ib_gsi_pkey_change(ports->gsi); 1791 mutex_unlock(&ports->devr->mutex); 1792 } 1793 1794 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 1795 enum mlx5_dev_event event, unsigned long param) 1796 { 1797 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; 1798 struct ib_event ibev; 1799 1800 u8 port = 0; 1801 1802 switch (event) { 1803 case MLX5_DEV_EVENT_SYS_ERROR: 1804 ibdev->ib_active = false; 1805 ibev.event = IB_EVENT_DEVICE_FATAL; 1806 break; 1807 1808 case MLX5_DEV_EVENT_PORT_UP: 1809 ibev.event = IB_EVENT_PORT_ACTIVE; 1810 port = (u8)param; 1811 break; 1812 1813 case MLX5_DEV_EVENT_PORT_DOWN: 1814 ibev.event = IB_EVENT_PORT_ERR; 1815 port = (u8)param; 1816 break; 1817 1818 case MLX5_DEV_EVENT_PORT_INITIALIZED: 1819 /* not used by ULPs */ 1820 return; 1821 1822 case MLX5_DEV_EVENT_LID_CHANGE: 1823 ibev.event = IB_EVENT_LID_CHANGE; 1824 port = (u8)param; 1825 break; 1826 1827 case MLX5_DEV_EVENT_PKEY_CHANGE: 1828 ibev.event = IB_EVENT_PKEY_CHANGE; 1829 port = (u8)param; 1830 1831 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); 1832 break; 1833 1834 case MLX5_DEV_EVENT_GUID_CHANGE: 1835 ibev.event = IB_EVENT_GID_CHANGE; 1836 port = (u8)param; 1837 break; 1838 1839 case MLX5_DEV_EVENT_CLIENT_REREG: 1840 ibev.event = IB_EVENT_CLIENT_REREGISTER; 1841 port = (u8)param; 1842 break; 1843 } 1844 1845 ibev.device = &ibdev->ib_dev; 1846 ibev.element.port_num = port; 1847 1848 if (port < 1 || port > ibdev->num_ports) { 1849 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); 1850 return; 1851 } 1852 1853 if (ibdev->ib_active) 1854 ib_dispatch_event(&ibev); 1855 } 1856 1857 static void get_ext_port_caps(struct mlx5_ib_dev *dev) 1858 { 1859 int port; 1860 1861 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) 1862 mlx5_query_ext_port_caps(dev, port); 1863 } 1864 1865 static int get_port_caps(struct mlx5_ib_dev *dev) 1866 { 1867 struct ib_device_attr *dprops = NULL; 1868 struct ib_port_attr *pprops = NULL; 1869 int err = -ENOMEM; 1870 int port; 1871 struct ib_udata uhw = {.inlen = 0, .outlen = 0}; 1872 1873 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); 1874 if (!pprops) 1875 goto out; 1876 1877 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); 1878 if (!dprops) 1879 goto out; 1880 1881 err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); 1882 if (err) { 1883 mlx5_ib_warn(dev, "query_device failed %d\n", err); 1884 goto out; 1885 } 1886 1887 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 1888 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 1889 if (err) { 1890 mlx5_ib_warn(dev, "query_port %d failed %d\n", 1891 port, err); 1892 break; 1893 } 1894 dev->mdev->port_caps[port - 1].pkey_table_len = 1895 dprops->max_pkeys; 1896 dev->mdev->port_caps[port - 1].gid_table_len = 1897 pprops->gid_tbl_len; 1898 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", 1899 dprops->max_pkeys, pprops->gid_tbl_len); 1900 } 1901 1902 out: 1903 kfree(pprops); 1904 kfree(dprops); 1905 1906 return err; 1907 } 1908 1909 static void destroy_umrc_res(struct mlx5_ib_dev *dev) 1910 { 1911 int err; 1912 1913 err = mlx5_mr_cache_cleanup(dev); 1914 if (err) 1915 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 1916 1917 mlx5_ib_destroy_qp(dev->umrc.qp); 1918 ib_free_cq(dev->umrc.cq); 1919 ib_dealloc_pd(dev->umrc.pd); 1920 } 1921 1922 enum { 1923 MAX_UMR_WR = 128, 1924 }; 1925 1926 static int create_umr_res(struct mlx5_ib_dev *dev) 1927 { 1928 struct ib_qp_init_attr *init_attr = NULL; 1929 struct ib_qp_attr *attr = NULL; 1930 struct ib_pd *pd; 1931 struct ib_cq *cq; 1932 struct ib_qp *qp; 1933 int ret; 1934 1935 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 1936 init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); 1937 if (!attr || !init_attr) { 1938 ret = -ENOMEM; 1939 goto error_0; 1940 } 1941 1942 pd = ib_alloc_pd(&dev->ib_dev); 1943 if (IS_ERR(pd)) { 1944 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 1945 ret = PTR_ERR(pd); 1946 goto error_0; 1947 } 1948 1949 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 1950 if (IS_ERR(cq)) { 1951 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 1952 ret = PTR_ERR(cq); 1953 goto error_2; 1954 } 1955 1956 init_attr->send_cq = cq; 1957 init_attr->recv_cq = cq; 1958 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 1959 init_attr->cap.max_send_wr = MAX_UMR_WR; 1960 init_attr->cap.max_send_sge = 1; 1961 init_attr->qp_type = MLX5_IB_QPT_REG_UMR; 1962 init_attr->port_num = 1; 1963 qp = mlx5_ib_create_qp(pd, init_attr, NULL); 1964 if (IS_ERR(qp)) { 1965 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 1966 ret = PTR_ERR(qp); 1967 goto error_3; 1968 } 1969 qp->device = &dev->ib_dev; 1970 qp->real_qp = qp; 1971 qp->uobject = NULL; 1972 qp->qp_type = MLX5_IB_QPT_REG_UMR; 1973 1974 attr->qp_state = IB_QPS_INIT; 1975 attr->port_num = 1; 1976 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | 1977 IB_QP_PORT, NULL); 1978 if (ret) { 1979 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 1980 goto error_4; 1981 } 1982 1983 memset(attr, 0, sizeof(*attr)); 1984 attr->qp_state = IB_QPS_RTR; 1985 attr->path_mtu = IB_MTU_256; 1986 1987 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 1988 if (ret) { 1989 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 1990 goto error_4; 1991 } 1992 1993 memset(attr, 0, sizeof(*attr)); 1994 attr->qp_state = IB_QPS_RTS; 1995 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 1996 if (ret) { 1997 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 1998 goto error_4; 1999 } 2000 2001 dev->umrc.qp = qp; 2002 dev->umrc.cq = cq; 2003 dev->umrc.pd = pd; 2004 2005 sema_init(&dev->umrc.sem, MAX_UMR_WR); 2006 ret = mlx5_mr_cache_init(dev); 2007 if (ret) { 2008 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 2009 goto error_4; 2010 } 2011 2012 kfree(attr); 2013 kfree(init_attr); 2014 2015 return 0; 2016 2017 error_4: 2018 mlx5_ib_destroy_qp(qp); 2019 2020 error_3: 2021 ib_free_cq(cq); 2022 2023 error_2: 2024 ib_dealloc_pd(pd); 2025 2026 error_0: 2027 kfree(attr); 2028 kfree(init_attr); 2029 return ret; 2030 } 2031 2032 static int create_dev_resources(struct mlx5_ib_resources *devr) 2033 { 2034 struct ib_srq_init_attr attr; 2035 struct mlx5_ib_dev *dev; 2036 struct ib_cq_init_attr cq_attr = {.cqe = 1}; 2037 int port; 2038 int ret = 0; 2039 2040 dev = container_of(devr, struct mlx5_ib_dev, devr); 2041 2042 mutex_init(&devr->mutex); 2043 2044 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 2045 if (IS_ERR(devr->p0)) { 2046 ret = PTR_ERR(devr->p0); 2047 goto error0; 2048 } 2049 devr->p0->device = &dev->ib_dev; 2050 devr->p0->uobject = NULL; 2051 atomic_set(&devr->p0->usecnt, 0); 2052 2053 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); 2054 if (IS_ERR(devr->c0)) { 2055 ret = PTR_ERR(devr->c0); 2056 goto error1; 2057 } 2058 devr->c0->device = &dev->ib_dev; 2059 devr->c0->uobject = NULL; 2060 devr->c0->comp_handler = NULL; 2061 devr->c0->event_handler = NULL; 2062 devr->c0->cq_context = NULL; 2063 atomic_set(&devr->c0->usecnt, 0); 2064 2065 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 2066 if (IS_ERR(devr->x0)) { 2067 ret = PTR_ERR(devr->x0); 2068 goto error2; 2069 } 2070 devr->x0->device = &dev->ib_dev; 2071 devr->x0->inode = NULL; 2072 atomic_set(&devr->x0->usecnt, 0); 2073 mutex_init(&devr->x0->tgt_qp_mutex); 2074 INIT_LIST_HEAD(&devr->x0->tgt_qp_list); 2075 2076 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 2077 if (IS_ERR(devr->x1)) { 2078 ret = PTR_ERR(devr->x1); 2079 goto error3; 2080 } 2081 devr->x1->device = &dev->ib_dev; 2082 devr->x1->inode = NULL; 2083 atomic_set(&devr->x1->usecnt, 0); 2084 mutex_init(&devr->x1->tgt_qp_mutex); 2085 INIT_LIST_HEAD(&devr->x1->tgt_qp_list); 2086 2087 memset(&attr, 0, sizeof(attr)); 2088 attr.attr.max_sge = 1; 2089 attr.attr.max_wr = 1; 2090 attr.srq_type = IB_SRQT_XRC; 2091 attr.ext.xrc.cq = devr->c0; 2092 attr.ext.xrc.xrcd = devr->x0; 2093 2094 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 2095 if (IS_ERR(devr->s0)) { 2096 ret = PTR_ERR(devr->s0); 2097 goto error4; 2098 } 2099 devr->s0->device = &dev->ib_dev; 2100 devr->s0->pd = devr->p0; 2101 devr->s0->uobject = NULL; 2102 devr->s0->event_handler = NULL; 2103 devr->s0->srq_context = NULL; 2104 devr->s0->srq_type = IB_SRQT_XRC; 2105 devr->s0->ext.xrc.xrcd = devr->x0; 2106 devr->s0->ext.xrc.cq = devr->c0; 2107 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); 2108 atomic_inc(&devr->s0->ext.xrc.cq->usecnt); 2109 atomic_inc(&devr->p0->usecnt); 2110 atomic_set(&devr->s0->usecnt, 0); 2111 2112 memset(&attr, 0, sizeof(attr)); 2113 attr.attr.max_sge = 1; 2114 attr.attr.max_wr = 1; 2115 attr.srq_type = IB_SRQT_BASIC; 2116 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 2117 if (IS_ERR(devr->s1)) { 2118 ret = PTR_ERR(devr->s1); 2119 goto error5; 2120 } 2121 devr->s1->device = &dev->ib_dev; 2122 devr->s1->pd = devr->p0; 2123 devr->s1->uobject = NULL; 2124 devr->s1->event_handler = NULL; 2125 devr->s1->srq_context = NULL; 2126 devr->s1->srq_type = IB_SRQT_BASIC; 2127 devr->s1->ext.xrc.cq = devr->c0; 2128 atomic_inc(&devr->p0->usecnt); 2129 atomic_set(&devr->s0->usecnt, 0); 2130 2131 for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { 2132 INIT_WORK(&devr->ports[port].pkey_change_work, 2133 pkey_change_handler); 2134 devr->ports[port].devr = devr; 2135 } 2136 2137 return 0; 2138 2139 error5: 2140 mlx5_ib_destroy_srq(devr->s0); 2141 error4: 2142 mlx5_ib_dealloc_xrcd(devr->x1); 2143 error3: 2144 mlx5_ib_dealloc_xrcd(devr->x0); 2145 error2: 2146 mlx5_ib_destroy_cq(devr->c0); 2147 error1: 2148 mlx5_ib_dealloc_pd(devr->p0); 2149 error0: 2150 return ret; 2151 } 2152 2153 static void destroy_dev_resources(struct mlx5_ib_resources *devr) 2154 { 2155 struct mlx5_ib_dev *dev = 2156 container_of(devr, struct mlx5_ib_dev, devr); 2157 int port; 2158 2159 mlx5_ib_destroy_srq(devr->s1); 2160 mlx5_ib_destroy_srq(devr->s0); 2161 mlx5_ib_dealloc_xrcd(devr->x0); 2162 mlx5_ib_dealloc_xrcd(devr->x1); 2163 mlx5_ib_destroy_cq(devr->c0); 2164 mlx5_ib_dealloc_pd(devr->p0); 2165 2166 /* Make sure no change P_Key work items are still executing */ 2167 for (port = 0; port < dev->num_ports; ++port) 2168 cancel_work_sync(&devr->ports[port].pkey_change_work); 2169 } 2170 2171 static u32 get_core_cap_flags(struct ib_device *ibdev) 2172 { 2173 struct mlx5_ib_dev *dev = to_mdev(ibdev); 2174 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); 2175 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); 2176 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); 2177 u32 ret = 0; 2178 2179 if (ll == IB_LINK_LAYER_INFINIBAND) 2180 return RDMA_CORE_PORT_IBA_IB; 2181 2182 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 2183 return 0; 2184 2185 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) 2186 return 0; 2187 2188 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) 2189 ret |= RDMA_CORE_PORT_IBA_ROCE; 2190 2191 if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) 2192 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 2193 2194 return ret; 2195 } 2196 2197 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, 2198 struct ib_port_immutable *immutable) 2199 { 2200 struct ib_port_attr attr; 2201 int err; 2202 2203 err = mlx5_ib_query_port(ibdev, port_num, &attr); 2204 if (err) 2205 return err; 2206 2207 immutable->pkey_tbl_len = attr.pkey_tbl_len; 2208 immutable->gid_tbl_len = attr.gid_tbl_len; 2209 immutable->core_cap_flags = get_core_cap_flags(ibdev); 2210 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2211 2212 return 0; 2213 } 2214 2215 static int mlx5_enable_roce(struct mlx5_ib_dev *dev) 2216 { 2217 int err; 2218 2219 dev->roce.nb.notifier_call = mlx5_netdev_event; 2220 err = register_netdevice_notifier(&dev->roce.nb); 2221 if (err) 2222 return err; 2223 2224 err = mlx5_nic_vport_enable_roce(dev->mdev); 2225 if (err) 2226 goto err_unregister_netdevice_notifier; 2227 2228 return 0; 2229 2230 err_unregister_netdevice_notifier: 2231 unregister_netdevice_notifier(&dev->roce.nb); 2232 return err; 2233 } 2234 2235 static void mlx5_disable_roce(struct mlx5_ib_dev *dev) 2236 { 2237 mlx5_nic_vport_disable_roce(dev->mdev); 2238 unregister_netdevice_notifier(&dev->roce.nb); 2239 } 2240 2241 static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 2242 { 2243 struct mlx5_ib_dev *dev; 2244 enum rdma_link_layer ll; 2245 int port_type_cap; 2246 int err; 2247 int i; 2248 2249 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 2250 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 2251 2252 if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) 2253 return NULL; 2254 2255 printk_once(KERN_INFO "%s", mlx5_version); 2256 2257 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 2258 if (!dev) 2259 return NULL; 2260 2261 dev->mdev = mdev; 2262 2263 rwlock_init(&dev->roce.netdev_lock); 2264 err = get_port_caps(dev); 2265 if (err) 2266 goto err_dealloc; 2267 2268 if (mlx5_use_mad_ifc(dev)) 2269 get_ext_port_caps(dev); 2270 2271 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); 2272 2273 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); 2274 dev->ib_dev.owner = THIS_MODULE; 2275 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 2276 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; 2277 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); 2278 dev->ib_dev.phys_port_cnt = dev->num_ports; 2279 dev->ib_dev.num_comp_vectors = 2280 dev->mdev->priv.eq_table.num_comp_vectors; 2281 dev->ib_dev.dma_device = &mdev->pdev->dev; 2282 2283 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; 2284 dev->ib_dev.uverbs_cmd_mask = 2285 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2286 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2287 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2288 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2289 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2290 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2291 (1ull << IB_USER_VERBS_CMD_REREG_MR) | 2292 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2293 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2294 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2295 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2296 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2297 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2298 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2299 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2300 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2301 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2302 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2303 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2304 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2305 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2306 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2307 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 2308 (1ull << IB_USER_VERBS_CMD_OPEN_QP); 2309 dev->ib_dev.uverbs_ex_cmd_mask = 2310 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 2311 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | 2312 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); 2313 2314 dev->ib_dev.query_device = mlx5_ib_query_device; 2315 dev->ib_dev.query_port = mlx5_ib_query_port; 2316 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; 2317 if (ll == IB_LINK_LAYER_ETHERNET) 2318 dev->ib_dev.get_netdev = mlx5_ib_get_netdev; 2319 dev->ib_dev.query_gid = mlx5_ib_query_gid; 2320 dev->ib_dev.add_gid = mlx5_ib_add_gid; 2321 dev->ib_dev.del_gid = mlx5_ib_del_gid; 2322 dev->ib_dev.query_pkey = mlx5_ib_query_pkey; 2323 dev->ib_dev.modify_device = mlx5_ib_modify_device; 2324 dev->ib_dev.modify_port = mlx5_ib_modify_port; 2325 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; 2326 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; 2327 dev->ib_dev.mmap = mlx5_ib_mmap; 2328 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; 2329 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; 2330 dev->ib_dev.create_ah = mlx5_ib_create_ah; 2331 dev->ib_dev.query_ah = mlx5_ib_query_ah; 2332 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; 2333 dev->ib_dev.create_srq = mlx5_ib_create_srq; 2334 dev->ib_dev.modify_srq = mlx5_ib_modify_srq; 2335 dev->ib_dev.query_srq = mlx5_ib_query_srq; 2336 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; 2337 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; 2338 dev->ib_dev.create_qp = mlx5_ib_create_qp; 2339 dev->ib_dev.modify_qp = mlx5_ib_modify_qp; 2340 dev->ib_dev.query_qp = mlx5_ib_query_qp; 2341 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; 2342 dev->ib_dev.post_send = mlx5_ib_post_send; 2343 dev->ib_dev.post_recv = mlx5_ib_post_recv; 2344 dev->ib_dev.create_cq = mlx5_ib_create_cq; 2345 dev->ib_dev.modify_cq = mlx5_ib_modify_cq; 2346 dev->ib_dev.resize_cq = mlx5_ib_resize_cq; 2347 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; 2348 dev->ib_dev.poll_cq = mlx5_ib_poll_cq; 2349 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 2350 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 2351 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 2352 dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; 2353 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 2354 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 2355 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 2356 dev->ib_dev.process_mad = mlx5_ib_process_mad; 2357 dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; 2358 dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; 2359 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; 2360 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 2361 if (mlx5_core_is_pf(mdev)) { 2362 dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; 2363 dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; 2364 dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; 2365 dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; 2366 } 2367 2368 mlx5_ib_internal_fill_odp_caps(dev); 2369 2370 if (MLX5_CAP_GEN(mdev, imaicl)) { 2371 dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; 2372 dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; 2373 dev->ib_dev.uverbs_cmd_mask |= 2374 (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 2375 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 2376 } 2377 2378 if (MLX5_CAP_GEN(mdev, xrc)) { 2379 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 2380 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 2381 dev->ib_dev.uverbs_cmd_mask |= 2382 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 2383 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 2384 } 2385 2386 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2387 IB_LINK_LAYER_ETHERNET) { 2388 dev->ib_dev.create_flow = mlx5_ib_create_flow; 2389 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; 2390 dev->ib_dev.uverbs_ex_cmd_mask |= 2391 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 2392 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); 2393 } 2394 err = init_node_data(dev); 2395 if (err) 2396 goto err_dealloc; 2397 2398 mutex_init(&dev->flow_db.lock); 2399 mutex_init(&dev->cap_mask_mutex); 2400 2401 if (ll == IB_LINK_LAYER_ETHERNET) { 2402 err = mlx5_enable_roce(dev); 2403 if (err) 2404 goto err_dealloc; 2405 } 2406 2407 err = create_dev_resources(&dev->devr); 2408 if (err) 2409 goto err_disable_roce; 2410 2411 err = mlx5_ib_odp_init_one(dev); 2412 if (err) 2413 goto err_rsrc; 2414 2415 err = ib_register_device(&dev->ib_dev, NULL); 2416 if (err) 2417 goto err_odp; 2418 2419 err = create_umr_res(dev); 2420 if (err) 2421 goto err_dev; 2422 2423 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 2424 err = device_create_file(&dev->ib_dev.dev, 2425 mlx5_class_attributes[i]); 2426 if (err) 2427 goto err_umrc; 2428 } 2429 2430 dev->ib_active = true; 2431 2432 return dev; 2433 2434 err_umrc: 2435 destroy_umrc_res(dev); 2436 2437 err_dev: 2438 ib_unregister_device(&dev->ib_dev); 2439 2440 err_odp: 2441 mlx5_ib_odp_remove_one(dev); 2442 2443 err_rsrc: 2444 destroy_dev_resources(&dev->devr); 2445 2446 err_disable_roce: 2447 if (ll == IB_LINK_LAYER_ETHERNET) 2448 mlx5_disable_roce(dev); 2449 2450 err_dealloc: 2451 ib_dealloc_device((struct ib_device *)dev); 2452 2453 return NULL; 2454 } 2455 2456 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 2457 { 2458 struct mlx5_ib_dev *dev = context; 2459 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); 2460 2461 ib_unregister_device(&dev->ib_dev); 2462 destroy_umrc_res(dev); 2463 mlx5_ib_odp_remove_one(dev); 2464 destroy_dev_resources(&dev->devr); 2465 if (ll == IB_LINK_LAYER_ETHERNET) 2466 mlx5_disable_roce(dev); 2467 ib_dealloc_device(&dev->ib_dev); 2468 } 2469 2470 static struct mlx5_interface mlx5_ib_interface = { 2471 .add = mlx5_ib_add, 2472 .remove = mlx5_ib_remove, 2473 .event = mlx5_ib_event, 2474 .protocol = MLX5_INTERFACE_PROTOCOL_IB, 2475 }; 2476 2477 static int __init mlx5_ib_init(void) 2478 { 2479 int err; 2480 2481 if (deprecated_prof_sel != 2) 2482 pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); 2483 2484 err = mlx5_ib_odp_init(); 2485 if (err) 2486 return err; 2487 2488 err = mlx5_register_interface(&mlx5_ib_interface); 2489 if (err) 2490 goto clean_odp; 2491 2492 return err; 2493 2494 clean_odp: 2495 mlx5_ib_odp_cleanup(); 2496 return err; 2497 } 2498 2499 static void __exit mlx5_ib_cleanup(void) 2500 { 2501 mlx5_unregister_interface(&mlx5_ib_interface); 2502 mlx5_ib_odp_cleanup(); 2503 } 2504 2505 module_init(mlx5_ib_init); 2506 module_exit(mlx5_ib_cleanup); 2507