1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/module.h> 37 #include <linux/init.h> 38 #include <linux/errno.h> 39 #include <linux/pci.h> 40 #include <linux/dma-mapping.h> 41 #include <linux/slab.h> 42 #include <linux/io-mapping.h> 43 #include <linux/delay.h> 44 #include <linux/netdevice.h> 45 #include <linux/kmod.h> 46 47 #include <linux/mlx4/device.h> 48 #include <linux/mlx4/doorbell.h> 49 50 #include "mlx4.h" 51 #include "fw.h" 52 #include "icm.h" 53 54 MODULE_AUTHOR("Roland Dreier"); 55 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); 56 MODULE_LICENSE("Dual BSD/GPL"); 57 MODULE_VERSION(DRV_VERSION); 58 59 struct workqueue_struct *mlx4_wq; 60 61 #ifdef CONFIG_MLX4_DEBUG 62 63 int mlx4_debug_level = 0; 64 module_param_named(debug_level, mlx4_debug_level, int, 0644); 65 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 66 67 #endif /* CONFIG_MLX4_DEBUG */ 68 69 #ifdef CONFIG_PCI_MSI 70 71 static int msi_x = 1; 72 module_param(msi_x, int, 0444); 73 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); 74 75 #else /* CONFIG_PCI_MSI */ 76 77 #define msi_x (0) 78 79 #endif /* CONFIG_PCI_MSI */ 80 81 static int num_vfs; 82 module_param(num_vfs, int, 0444); 83 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0"); 84 85 static int probe_vf; 86 module_param(probe_vf, int, 0644); 87 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)"); 88 89 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 90 module_param_named(log_num_mgm_entry_size, 91 mlx4_log_num_mgm_entry_size, int, 0444); 92 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" 93 " of qp per mcg, for example:" 94 " 10 gives 248.range: 7 <=" 95 " log_num_mgm_entry_size <= 12." 96 " To activate device managed" 97 " flow steering when available, set to -1"); 98 99 static bool enable_64b_cqe_eqe = true; 100 module_param(enable_64b_cqe_eqe, bool, 0444); 101 MODULE_PARM_DESC(enable_64b_cqe_eqe, 102 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)"); 103 104 #define HCA_GLOBAL_CAP_MASK 0 105 106 #define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE 107 108 static char mlx4_version[] = 109 DRV_NAME ": Mellanox ConnectX core driver v" 110 DRV_VERSION " (" DRV_RELDATE ")\n"; 111 112 static struct mlx4_profile default_profile = { 113 .num_qp = 1 << 18, 114 .num_srq = 1 << 16, 115 .rdmarc_per_qp = 1 << 4, 116 .num_cq = 1 << 16, 117 .num_mcg = 1 << 13, 118 .num_mpt = 1 << 19, 119 .num_mtt = 1 << 20, /* It is really num mtt segements */ 120 }; 121 122 static int log_num_mac = 7; 123 module_param_named(log_num_mac, log_num_mac, int, 0444); 124 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); 125 126 static int log_num_vlan; 127 module_param_named(log_num_vlan, log_num_vlan, int, 0444); 128 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)"); 129 /* Log2 max number of VLANs per ETH port (0-7) */ 130 #define MLX4_LOG_NUM_VLANS 7 131 132 static bool use_prio; 133 module_param_named(use_prio, use_prio, bool, 0444); 134 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " 135 "(0/1, default 0)"); 136 137 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); 138 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 139 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)"); 140 141 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE}; 142 static int arr_argc = 2; 143 module_param_array(port_type_array, int, &arr_argc, 0444); 144 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default " 145 "1 for IB, 2 for Ethernet"); 146 147 struct mlx4_port_config { 148 struct list_head list; 149 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; 150 struct pci_dev *pdev; 151 }; 152 153 int mlx4_check_port_params(struct mlx4_dev *dev, 154 enum mlx4_port_type *port_type) 155 { 156 int i; 157 158 for (i = 0; i < dev->caps.num_ports - 1; i++) { 159 if (port_type[i] != port_type[i + 1]) { 160 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { 161 mlx4_err(dev, "Only same port types supported " 162 "on this HCA, aborting.\n"); 163 return -EINVAL; 164 } 165 } 166 } 167 168 for (i = 0; i < dev->caps.num_ports; i++) { 169 if (!(port_type[i] & dev->caps.supported_type[i+1])) { 170 mlx4_err(dev, "Requested port type for port %d is not " 171 "supported on this HCA\n", i + 1); 172 return -EINVAL; 173 } 174 } 175 return 0; 176 } 177 178 static void mlx4_set_port_mask(struct mlx4_dev *dev) 179 { 180 int i; 181 182 for (i = 1; i <= dev->caps.num_ports; ++i) 183 dev->caps.port_mask[i] = dev->caps.port_type[i]; 184 } 185 186 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 187 { 188 int err; 189 int i; 190 191 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 192 if (err) { 193 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 194 return err; 195 } 196 197 if (dev_cap->min_page_sz > PAGE_SIZE) { 198 mlx4_err(dev, "HCA minimum page size of %d bigger than " 199 "kernel PAGE_SIZE of %ld, aborting.\n", 200 dev_cap->min_page_sz, PAGE_SIZE); 201 return -ENODEV; 202 } 203 if (dev_cap->num_ports > MLX4_MAX_PORTS) { 204 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 205 "aborting.\n", 206 dev_cap->num_ports, MLX4_MAX_PORTS); 207 return -ENODEV; 208 } 209 210 if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { 211 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " 212 "PCI resource 2 size of 0x%llx, aborting.\n", 213 dev_cap->uar_size, 214 (unsigned long long) pci_resource_len(dev->pdev, 2)); 215 return -ENODEV; 216 } 217 218 dev->caps.num_ports = dev_cap->num_ports; 219 dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; 220 for (i = 1; i <= dev->caps.num_ports; ++i) { 221 dev->caps.vl_cap[i] = dev_cap->max_vl[i]; 222 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; 223 dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; 224 dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; 225 /* set gid and pkey table operating lengths by default 226 * to non-sriov values */ 227 dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; 228 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; 229 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; 230 dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; 231 dev->caps.def_mac[i] = dev_cap->def_mac[i]; 232 dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; 233 dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; 234 dev->caps.default_sense[i] = dev_cap->default_sense[i]; 235 dev->caps.trans_type[i] = dev_cap->trans_type[i]; 236 dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; 237 dev->caps.wavelength[i] = dev_cap->wavelength[i]; 238 dev->caps.trans_code[i] = dev_cap->trans_code[i]; 239 } 240 241 dev->caps.uar_page_size = PAGE_SIZE; 242 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; 243 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; 244 dev->caps.bf_reg_size = dev_cap->bf_reg_size; 245 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; 246 dev->caps.max_sq_sg = dev_cap->max_sq_sg; 247 dev->caps.max_rq_sg = dev_cap->max_rq_sg; 248 dev->caps.max_wqes = dev_cap->max_qp_sz; 249 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; 250 dev->caps.max_srq_wqes = dev_cap->max_srq_sz; 251 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; 252 dev->caps.reserved_srqs = dev_cap->reserved_srqs; 253 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; 254 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; 255 /* 256 * Subtract 1 from the limit because we need to allocate a 257 * spare CQE so the HCA HW can tell the difference between an 258 * empty CQ and a full CQ. 259 */ 260 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 261 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 262 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 263 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 264 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 265 266 /* The first 128 UARs are used for EQ doorbells */ 267 dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); 268 dev->caps.reserved_pds = dev_cap->reserved_pds; 269 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 270 dev_cap->reserved_xrcds : 0; 271 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 272 dev_cap->max_xrcds : 0; 273 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; 274 275 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 276 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 277 dev->caps.flags = dev_cap->flags; 278 dev->caps.flags2 = dev_cap->flags2; 279 dev->caps.bmme_flags = dev_cap->bmme_flags; 280 dev->caps.reserved_lkey = dev_cap->reserved_lkey; 281 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 282 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 283 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; 284 285 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ 286 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) 287 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 288 /* Don't do sense port on multifunction devices (for now at least) */ 289 if (mlx4_is_mfunc(dev)) 290 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 291 292 dev->caps.log_num_macs = log_num_mac; 293 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 294 dev->caps.log_num_prios = use_prio ? 3 : 0; 295 296 for (i = 1; i <= dev->caps.num_ports; ++i) { 297 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; 298 if (dev->caps.supported_type[i]) { 299 /* if only ETH is supported - assign ETH */ 300 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) 301 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; 302 /* if only IB is supported, assign IB */ 303 else if (dev->caps.supported_type[i] == 304 MLX4_PORT_TYPE_IB) 305 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; 306 else { 307 /* if IB and ETH are supported, we set the port 308 * type according to user selection of port type; 309 * if user selected none, take the FW hint */ 310 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE) 311 dev->caps.port_type[i] = dev->caps.suggested_type[i] ? 312 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; 313 else 314 dev->caps.port_type[i] = port_type_array[i - 1]; 315 } 316 } 317 /* 318 * Link sensing is allowed on the port if 3 conditions are true: 319 * 1. Both protocols are supported on the port. 320 * 2. Different types are supported on the port 321 * 3. FW declared that it supports link sensing 322 */ 323 mlx4_priv(dev)->sense.sense_allowed[i] = 324 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && 325 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 326 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); 327 328 /* 329 * If "default_sense" bit is set, we move the port to "AUTO" mode 330 * and perform sense_port FW command to try and set the correct 331 * port type from beginning 332 */ 333 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { 334 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; 335 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; 336 mlx4_SENSE_PORT(dev, i, &sensed_port); 337 if (sensed_port != MLX4_PORT_TYPE_NONE) 338 dev->caps.port_type[i] = sensed_port; 339 } else { 340 dev->caps.possible_type[i] = dev->caps.port_type[i]; 341 } 342 343 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { 344 dev->caps.log_num_macs = dev_cap->log_max_macs[i]; 345 mlx4_warn(dev, "Requested number of MACs is too much " 346 "for port %d, reducing to %d.\n", 347 i, 1 << dev->caps.log_num_macs); 348 } 349 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { 350 dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; 351 mlx4_warn(dev, "Requested number of VLANs is too much " 352 "for port %d, reducing to %d.\n", 353 i, 1 << dev->caps.log_num_vlans); 354 } 355 } 356 357 dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters); 358 359 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; 360 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = 361 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = 362 (1 << dev->caps.log_num_macs) * 363 (1 << dev->caps.log_num_vlans) * 364 (1 << dev->caps.log_num_prios) * 365 dev->caps.num_ports; 366 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 367 368 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 369 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 370 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 371 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 372 373 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 374 375 if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { 376 if (dev_cap->flags & 377 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { 378 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); 379 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 380 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 381 } 382 } 383 384 if ((dev->caps.flags & 385 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && 386 mlx4_is_master(dev)) 387 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; 388 389 return 0; 390 } 391 392 static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, 393 enum pci_bus_speed *speed, 394 enum pcie_link_width *width) 395 { 396 u32 lnkcap1, lnkcap2; 397 int err1, err2; 398 399 #define PCIE_MLW_CAP_SHIFT 4 /* start of MLW mask in link capabilities */ 400 401 *speed = PCI_SPEED_UNKNOWN; 402 *width = PCIE_LNK_WIDTH_UNKNOWN; 403 404 err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1); 405 err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2); 406 if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ 407 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) 408 *speed = PCIE_SPEED_8_0GT; 409 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB) 410 *speed = PCIE_SPEED_5_0GT; 411 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB) 412 *speed = PCIE_SPEED_2_5GT; 413 } 414 if (!err1) { 415 *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT; 416 if (!lnkcap2) { /* pre-r3.0 */ 417 if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB) 418 *speed = PCIE_SPEED_5_0GT; 419 else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB) 420 *speed = PCIE_SPEED_2_5GT; 421 } 422 } 423 424 if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) { 425 return err1 ? err1 : 426 err2 ? err2 : -EINVAL; 427 } 428 return 0; 429 } 430 431 static void mlx4_check_pcie_caps(struct mlx4_dev *dev) 432 { 433 enum pcie_link_width width, width_cap; 434 enum pci_bus_speed speed, speed_cap; 435 int err; 436 437 #define PCIE_SPEED_STR(speed) \ 438 (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \ 439 speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \ 440 speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \ 441 "Unknown") 442 443 err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap); 444 if (err) { 445 mlx4_warn(dev, 446 "Unable to determine PCIe device BW capabilities\n"); 447 return; 448 } 449 450 err = pcie_get_minimum_link(dev->pdev, &speed, &width); 451 if (err || speed == PCI_SPEED_UNKNOWN || 452 width == PCIE_LNK_WIDTH_UNKNOWN) { 453 mlx4_warn(dev, 454 "Unable to determine PCI device chain minimum BW\n"); 455 return; 456 } 457 458 if (width != width_cap || speed != speed_cap) 459 mlx4_warn(dev, 460 "PCIe BW is different than device's capability\n"); 461 462 mlx4_info(dev, "PCIe link speed is %s, device supports %s\n", 463 PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap)); 464 mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n", 465 width, width_cap); 466 return; 467 } 468 469 /*The function checks if there are live vf, return the num of them*/ 470 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) 471 { 472 struct mlx4_priv *priv = mlx4_priv(dev); 473 struct mlx4_slave_state *s_state; 474 int i; 475 int ret = 0; 476 477 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { 478 s_state = &priv->mfunc.master.slave_state[i]; 479 if (s_state->active && s_state->last_cmd != 480 MLX4_COMM_CMD_RESET) { 481 mlx4_warn(dev, "%s: slave: %d is still active\n", 482 __func__, i); 483 ret++; 484 } 485 } 486 return ret; 487 } 488 489 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 490 { 491 u32 qk = MLX4_RESERVED_QKEY_BASE; 492 493 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 494 qpn < dev->phys_caps.base_proxy_sqpn) 495 return -EINVAL; 496 497 if (qpn >= dev->phys_caps.base_tunnel_sqpn) 498 /* tunnel qp */ 499 qk += qpn - dev->phys_caps.base_tunnel_sqpn; 500 else 501 qk += qpn - dev->phys_caps.base_proxy_sqpn; 502 *qkey = qk; 503 return 0; 504 } 505 EXPORT_SYMBOL(mlx4_get_parav_qkey); 506 507 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) 508 { 509 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 510 511 if (!mlx4_is_master(dev)) 512 return; 513 514 priv->virt2phys_pkey[slave][port - 1][i] = val; 515 } 516 EXPORT_SYMBOL(mlx4_sync_pkey_table); 517 518 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) 519 { 520 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 521 522 if (!mlx4_is_master(dev)) 523 return; 524 525 priv->slave_node_guids[slave] = guid; 526 } 527 EXPORT_SYMBOL(mlx4_put_slave_node_guid); 528 529 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) 530 { 531 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 532 533 if (!mlx4_is_master(dev)) 534 return 0; 535 536 return priv->slave_node_guids[slave]; 537 } 538 EXPORT_SYMBOL(mlx4_get_slave_node_guid); 539 540 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 541 { 542 struct mlx4_priv *priv = mlx4_priv(dev); 543 struct mlx4_slave_state *s_slave; 544 545 if (!mlx4_is_master(dev)) 546 return 0; 547 548 s_slave = &priv->mfunc.master.slave_state[slave]; 549 return !!s_slave->active; 550 } 551 EXPORT_SYMBOL(mlx4_is_slave_active); 552 553 static void slave_adjust_steering_mode(struct mlx4_dev *dev, 554 struct mlx4_dev_cap *dev_cap, 555 struct mlx4_init_hca_param *hca_param) 556 { 557 dev->caps.steering_mode = hca_param->steering_mode; 558 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { 559 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 560 dev->caps.fs_log_max_ucast_qp_range_size = 561 dev_cap->fs_log_max_ucast_qp_range_size; 562 } else 563 dev->caps.num_qp_per_mgm = 564 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); 565 566 mlx4_dbg(dev, "Steering mode is: %s\n", 567 mlx4_steering_mode_str(dev->caps.steering_mode)); 568 } 569 570 static int mlx4_slave_cap(struct mlx4_dev *dev) 571 { 572 int err; 573 u32 page_size; 574 struct mlx4_dev_cap dev_cap; 575 struct mlx4_func_cap func_cap; 576 struct mlx4_init_hca_param hca_param; 577 int i; 578 579 memset(&hca_param, 0, sizeof(hca_param)); 580 err = mlx4_QUERY_HCA(dev, &hca_param); 581 if (err) { 582 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); 583 return err; 584 } 585 586 /*fail if the hca has an unknown capability */ 587 if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != 588 HCA_GLOBAL_CAP_MASK) { 589 mlx4_err(dev, "Unknown hca global capabilities\n"); 590 return -ENOSYS; 591 } 592 593 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; 594 595 dev->caps.hca_core_clock = hca_param.hca_core_clock; 596 597 memset(&dev_cap, 0, sizeof(dev_cap)); 598 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; 599 err = mlx4_dev_cap(dev, &dev_cap); 600 if (err) { 601 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 602 return err; 603 } 604 605 err = mlx4_QUERY_FW(dev); 606 if (err) 607 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); 608 609 page_size = ~dev->caps.page_size_cap + 1; 610 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); 611 if (page_size > PAGE_SIZE) { 612 mlx4_err(dev, "HCA minimum page size of %d bigger than " 613 "kernel PAGE_SIZE of %ld, aborting.\n", 614 page_size, PAGE_SIZE); 615 return -ENODEV; 616 } 617 618 /* slave gets uar page size from QUERY_HCA fw command */ 619 dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); 620 621 /* TODO: relax this assumption */ 622 if (dev->caps.uar_page_size != PAGE_SIZE) { 623 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n", 624 dev->caps.uar_page_size, PAGE_SIZE); 625 return -ENODEV; 626 } 627 628 memset(&func_cap, 0, sizeof(func_cap)); 629 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); 630 if (err) { 631 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", 632 err); 633 return err; 634 } 635 636 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != 637 PF_CONTEXT_BEHAVIOUR_MASK) { 638 mlx4_err(dev, "Unknown pf context behaviour\n"); 639 return -ENOSYS; 640 } 641 642 dev->caps.num_ports = func_cap.num_ports; 643 dev->quotas.qp = func_cap.qp_quota; 644 dev->quotas.srq = func_cap.srq_quota; 645 dev->quotas.cq = func_cap.cq_quota; 646 dev->quotas.mpt = func_cap.mpt_quota; 647 dev->quotas.mtt = func_cap.mtt_quota; 648 dev->caps.num_qps = 1 << hca_param.log_num_qps; 649 dev->caps.num_srqs = 1 << hca_param.log_num_srqs; 650 dev->caps.num_cqs = 1 << hca_param.log_num_cqs; 651 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; 652 dev->caps.num_eqs = func_cap.max_eq; 653 dev->caps.reserved_eqs = func_cap.reserved_eq; 654 dev->caps.num_pds = MLX4_NUM_PDS; 655 dev->caps.num_mgms = 0; 656 dev->caps.num_amgms = 0; 657 658 if (dev->caps.num_ports > MLX4_MAX_PORTS) { 659 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 660 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); 661 return -ENODEV; 662 } 663 664 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 665 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 666 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 667 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 668 669 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || 670 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { 671 err = -ENOMEM; 672 goto err_mem; 673 } 674 675 for (i = 1; i <= dev->caps.num_ports; ++i) { 676 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); 677 if (err) { 678 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" 679 " port %d, aborting (%d).\n", i, err); 680 goto err_mem; 681 } 682 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; 683 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; 684 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; 685 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; 686 dev->caps.port_mask[i] = dev->caps.port_type[i]; 687 dev->caps.phys_port_id[i] = func_cap.phys_port_id; 688 if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, 689 &dev->caps.gid_table_len[i], 690 &dev->caps.pkey_table_len[i])) 691 goto err_mem; 692 } 693 694 if (dev->caps.uar_page_size * (dev->caps.num_uars - 695 dev->caps.reserved_uars) > 696 pci_resource_len(dev->pdev, 2)) { 697 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " 698 "PCI resource 2 size of 0x%llx, aborting.\n", 699 dev->caps.uar_page_size * dev->caps.num_uars, 700 (unsigned long long) pci_resource_len(dev->pdev, 2)); 701 goto err_mem; 702 } 703 704 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { 705 dev->caps.eqe_size = 64; 706 dev->caps.eqe_factor = 1; 707 } else { 708 dev->caps.eqe_size = 32; 709 dev->caps.eqe_factor = 0; 710 } 711 712 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { 713 dev->caps.cqe_size = 64; 714 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; 715 } else { 716 dev->caps.cqe_size = 32; 717 } 718 719 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 720 mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); 721 722 slave_adjust_steering_mode(dev, &dev_cap, &hca_param); 723 724 return 0; 725 726 err_mem: 727 kfree(dev->caps.qp0_tunnel); 728 kfree(dev->caps.qp0_proxy); 729 kfree(dev->caps.qp1_tunnel); 730 kfree(dev->caps.qp1_proxy); 731 dev->caps.qp0_tunnel = dev->caps.qp0_proxy = 732 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; 733 734 return err; 735 } 736 737 static void mlx4_request_modules(struct mlx4_dev *dev) 738 { 739 int port; 740 int has_ib_port = false; 741 int has_eth_port = false; 742 #define EN_DRV_NAME "mlx4_en" 743 #define IB_DRV_NAME "mlx4_ib" 744 745 for (port = 1; port <= dev->caps.num_ports; port++) { 746 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) 747 has_ib_port = true; 748 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) 749 has_eth_port = true; 750 } 751 752 if (has_ib_port) 753 request_module_nowait(IB_DRV_NAME); 754 if (has_eth_port) 755 request_module_nowait(EN_DRV_NAME); 756 } 757 758 /* 759 * Change the port configuration of the device. 760 * Every user of this function must hold the port mutex. 761 */ 762 int mlx4_change_port_types(struct mlx4_dev *dev, 763 enum mlx4_port_type *port_types) 764 { 765 int err = 0; 766 int change = 0; 767 int port; 768 769 for (port = 0; port < dev->caps.num_ports; port++) { 770 /* Change the port type only if the new type is different 771 * from the current, and not set to Auto */ 772 if (port_types[port] != dev->caps.port_type[port + 1]) 773 change = 1; 774 } 775 if (change) { 776 mlx4_unregister_device(dev); 777 for (port = 1; port <= dev->caps.num_ports; port++) { 778 mlx4_CLOSE_PORT(dev, port); 779 dev->caps.port_type[port] = port_types[port - 1]; 780 err = mlx4_SET_PORT(dev, port, -1); 781 if (err) { 782 mlx4_err(dev, "Failed to set port %d, " 783 "aborting\n", port); 784 goto out; 785 } 786 } 787 mlx4_set_port_mask(dev); 788 err = mlx4_register_device(dev); 789 if (err) { 790 mlx4_err(dev, "Failed to register device\n"); 791 goto out; 792 } 793 mlx4_request_modules(dev); 794 } 795 796 out: 797 return err; 798 } 799 800 static ssize_t show_port_type(struct device *dev, 801 struct device_attribute *attr, 802 char *buf) 803 { 804 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 805 port_attr); 806 struct mlx4_dev *mdev = info->dev; 807 char type[8]; 808 809 sprintf(type, "%s", 810 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? 811 "ib" : "eth"); 812 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) 813 sprintf(buf, "auto (%s)\n", type); 814 else 815 sprintf(buf, "%s\n", type); 816 817 return strlen(buf); 818 } 819 820 static ssize_t set_port_type(struct device *dev, 821 struct device_attribute *attr, 822 const char *buf, size_t count) 823 { 824 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 825 port_attr); 826 struct mlx4_dev *mdev = info->dev; 827 struct mlx4_priv *priv = mlx4_priv(mdev); 828 enum mlx4_port_type types[MLX4_MAX_PORTS]; 829 enum mlx4_port_type new_types[MLX4_MAX_PORTS]; 830 int i; 831 int err = 0; 832 833 if (!strcmp(buf, "ib\n")) 834 info->tmp_type = MLX4_PORT_TYPE_IB; 835 else if (!strcmp(buf, "eth\n")) 836 info->tmp_type = MLX4_PORT_TYPE_ETH; 837 else if (!strcmp(buf, "auto\n")) 838 info->tmp_type = MLX4_PORT_TYPE_AUTO; 839 else { 840 mlx4_err(mdev, "%s is not supported port type\n", buf); 841 return -EINVAL; 842 } 843 844 mlx4_stop_sense(mdev); 845 mutex_lock(&priv->port_mutex); 846 /* Possible type is always the one that was delivered */ 847 mdev->caps.possible_type[info->port] = info->tmp_type; 848 849 for (i = 0; i < mdev->caps.num_ports; i++) { 850 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : 851 mdev->caps.possible_type[i+1]; 852 if (types[i] == MLX4_PORT_TYPE_AUTO) 853 types[i] = mdev->caps.port_type[i+1]; 854 } 855 856 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 857 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { 858 for (i = 1; i <= mdev->caps.num_ports; i++) { 859 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { 860 mdev->caps.possible_type[i] = mdev->caps.port_type[i]; 861 err = -EINVAL; 862 } 863 } 864 } 865 if (err) { 866 mlx4_err(mdev, "Auto sensing is not supported on this HCA. " 867 "Set only 'eth' or 'ib' for both ports " 868 "(should be the same)\n"); 869 goto out; 870 } 871 872 mlx4_do_sense_ports(mdev, new_types, types); 873 874 err = mlx4_check_port_params(mdev, new_types); 875 if (err) 876 goto out; 877 878 /* We are about to apply the changes after the configuration 879 * was verified, no need to remember the temporary types 880 * any more */ 881 for (i = 0; i < mdev->caps.num_ports; i++) 882 priv->port[i + 1].tmp_type = 0; 883 884 err = mlx4_change_port_types(mdev, new_types); 885 886 out: 887 mlx4_start_sense(mdev); 888 mutex_unlock(&priv->port_mutex); 889 return err ? err : count; 890 } 891 892 enum ibta_mtu { 893 IB_MTU_256 = 1, 894 IB_MTU_512 = 2, 895 IB_MTU_1024 = 3, 896 IB_MTU_2048 = 4, 897 IB_MTU_4096 = 5 898 }; 899 900 static inline int int_to_ibta_mtu(int mtu) 901 { 902 switch (mtu) { 903 case 256: return IB_MTU_256; 904 case 512: return IB_MTU_512; 905 case 1024: return IB_MTU_1024; 906 case 2048: return IB_MTU_2048; 907 case 4096: return IB_MTU_4096; 908 default: return -1; 909 } 910 } 911 912 static inline int ibta_mtu_to_int(enum ibta_mtu mtu) 913 { 914 switch (mtu) { 915 case IB_MTU_256: return 256; 916 case IB_MTU_512: return 512; 917 case IB_MTU_1024: return 1024; 918 case IB_MTU_2048: return 2048; 919 case IB_MTU_4096: return 4096; 920 default: return -1; 921 } 922 } 923 924 static ssize_t show_port_ib_mtu(struct device *dev, 925 struct device_attribute *attr, 926 char *buf) 927 { 928 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 929 port_mtu_attr); 930 struct mlx4_dev *mdev = info->dev; 931 932 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) 933 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 934 935 sprintf(buf, "%d\n", 936 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); 937 return strlen(buf); 938 } 939 940 static ssize_t set_port_ib_mtu(struct device *dev, 941 struct device_attribute *attr, 942 const char *buf, size_t count) 943 { 944 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 945 port_mtu_attr); 946 struct mlx4_dev *mdev = info->dev; 947 struct mlx4_priv *priv = mlx4_priv(mdev); 948 int err, port, mtu, ibta_mtu = -1; 949 950 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { 951 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 952 return -EINVAL; 953 } 954 955 err = kstrtoint(buf, 0, &mtu); 956 if (!err) 957 ibta_mtu = int_to_ibta_mtu(mtu); 958 959 if (err || ibta_mtu < 0) { 960 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); 961 return -EINVAL; 962 } 963 964 mdev->caps.port_ib_mtu[info->port] = ibta_mtu; 965 966 mlx4_stop_sense(mdev); 967 mutex_lock(&priv->port_mutex); 968 mlx4_unregister_device(mdev); 969 for (port = 1; port <= mdev->caps.num_ports; port++) { 970 mlx4_CLOSE_PORT(mdev, port); 971 err = mlx4_SET_PORT(mdev, port, -1); 972 if (err) { 973 mlx4_err(mdev, "Failed to set port %d, " 974 "aborting\n", port); 975 goto err_set_port; 976 } 977 } 978 err = mlx4_register_device(mdev); 979 err_set_port: 980 mutex_unlock(&priv->port_mutex); 981 mlx4_start_sense(mdev); 982 return err ? err : count; 983 } 984 985 static int mlx4_load_fw(struct mlx4_dev *dev) 986 { 987 struct mlx4_priv *priv = mlx4_priv(dev); 988 int err; 989 990 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 991 GFP_HIGHUSER | __GFP_NOWARN, 0); 992 if (!priv->fw.fw_icm) { 993 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); 994 return -ENOMEM; 995 } 996 997 err = mlx4_MAP_FA(dev, priv->fw.fw_icm); 998 if (err) { 999 mlx4_err(dev, "MAP_FA command failed, aborting.\n"); 1000 goto err_free; 1001 } 1002 1003 err = mlx4_RUN_FW(dev); 1004 if (err) { 1005 mlx4_err(dev, "RUN_FW command failed, aborting.\n"); 1006 goto err_unmap_fa; 1007 } 1008 1009 return 0; 1010 1011 err_unmap_fa: 1012 mlx4_UNMAP_FA(dev); 1013 1014 err_free: 1015 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1016 return err; 1017 } 1018 1019 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, 1020 int cmpt_entry_sz) 1021 { 1022 struct mlx4_priv *priv = mlx4_priv(dev); 1023 int err; 1024 int num_eqs; 1025 1026 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, 1027 cmpt_base + 1028 ((u64) (MLX4_CMPT_TYPE_QP * 1029 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1030 cmpt_entry_sz, dev->caps.num_qps, 1031 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1032 0, 0); 1033 if (err) 1034 goto err; 1035 1036 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, 1037 cmpt_base + 1038 ((u64) (MLX4_CMPT_TYPE_SRQ * 1039 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1040 cmpt_entry_sz, dev->caps.num_srqs, 1041 dev->caps.reserved_srqs, 0, 0); 1042 if (err) 1043 goto err_qp; 1044 1045 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, 1046 cmpt_base + 1047 ((u64) (MLX4_CMPT_TYPE_CQ * 1048 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1049 cmpt_entry_sz, dev->caps.num_cqs, 1050 dev->caps.reserved_cqs, 0, 0); 1051 if (err) 1052 goto err_srq; 1053 1054 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1055 dev->caps.num_eqs; 1056 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1057 cmpt_base + 1058 ((u64) (MLX4_CMPT_TYPE_EQ * 1059 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1060 cmpt_entry_sz, num_eqs, num_eqs, 0, 0); 1061 if (err) 1062 goto err_cq; 1063 1064 return 0; 1065 1066 err_cq: 1067 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1068 1069 err_srq: 1070 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1071 1072 err_qp: 1073 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1074 1075 err: 1076 return err; 1077 } 1078 1079 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 1080 struct mlx4_init_hca_param *init_hca, u64 icm_size) 1081 { 1082 struct mlx4_priv *priv = mlx4_priv(dev); 1083 u64 aux_pages; 1084 int num_eqs; 1085 int err; 1086 1087 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); 1088 if (err) { 1089 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); 1090 return err; 1091 } 1092 1093 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", 1094 (unsigned long long) icm_size >> 10, 1095 (unsigned long long) aux_pages << 2); 1096 1097 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 1098 GFP_HIGHUSER | __GFP_NOWARN, 0); 1099 if (!priv->fw.aux_icm) { 1100 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); 1101 return -ENOMEM; 1102 } 1103 1104 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); 1105 if (err) { 1106 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); 1107 goto err_free_aux; 1108 } 1109 1110 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); 1111 if (err) { 1112 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); 1113 goto err_unmap_aux; 1114 } 1115 1116 1117 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1118 dev->caps.num_eqs; 1119 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1120 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1121 num_eqs, num_eqs, 0, 0); 1122 if (err) { 1123 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); 1124 goto err_unmap_cmpt; 1125 } 1126 1127 /* 1128 * Reserved MTT entries must be aligned up to a cacheline 1129 * boundary, since the FW will write to them, while the driver 1130 * writes to all other MTT entries. (The variable 1131 * dev->caps.mtt_entry_sz below is really the MTT segment 1132 * size, not the raw entry size) 1133 */ 1134 dev->caps.reserved_mtts = 1135 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, 1136 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; 1137 1138 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 1139 init_hca->mtt_base, 1140 dev->caps.mtt_entry_sz, 1141 dev->caps.num_mtts, 1142 dev->caps.reserved_mtts, 1, 0); 1143 if (err) { 1144 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); 1145 goto err_unmap_eq; 1146 } 1147 1148 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, 1149 init_hca->dmpt_base, 1150 dev_cap->dmpt_entry_sz, 1151 dev->caps.num_mpts, 1152 dev->caps.reserved_mrws, 1, 1); 1153 if (err) { 1154 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); 1155 goto err_unmap_mtt; 1156 } 1157 1158 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, 1159 init_hca->qpc_base, 1160 dev_cap->qpc_entry_sz, 1161 dev->caps.num_qps, 1162 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1163 0, 0); 1164 if (err) { 1165 mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); 1166 goto err_unmap_dmpt; 1167 } 1168 1169 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, 1170 init_hca->auxc_base, 1171 dev_cap->aux_entry_sz, 1172 dev->caps.num_qps, 1173 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1174 0, 0); 1175 if (err) { 1176 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); 1177 goto err_unmap_qp; 1178 } 1179 1180 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, 1181 init_hca->altc_base, 1182 dev_cap->altc_entry_sz, 1183 dev->caps.num_qps, 1184 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1185 0, 0); 1186 if (err) { 1187 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); 1188 goto err_unmap_auxc; 1189 } 1190 1191 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, 1192 init_hca->rdmarc_base, 1193 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 1194 dev->caps.num_qps, 1195 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1196 0, 0); 1197 if (err) { 1198 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 1199 goto err_unmap_altc; 1200 } 1201 1202 err = mlx4_init_icm_table(dev, &priv->cq_table.table, 1203 init_hca->cqc_base, 1204 dev_cap->cqc_entry_sz, 1205 dev->caps.num_cqs, 1206 dev->caps.reserved_cqs, 0, 0); 1207 if (err) { 1208 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); 1209 goto err_unmap_rdmarc; 1210 } 1211 1212 err = mlx4_init_icm_table(dev, &priv->srq_table.table, 1213 init_hca->srqc_base, 1214 dev_cap->srq_entry_sz, 1215 dev->caps.num_srqs, 1216 dev->caps.reserved_srqs, 0, 0); 1217 if (err) { 1218 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); 1219 goto err_unmap_cq; 1220 } 1221 1222 /* 1223 * For flow steering device managed mode it is required to use 1224 * mlx4_init_icm_table. For B0 steering mode it's not strictly 1225 * required, but for simplicity just map the whole multicast 1226 * group table now. The table isn't very big and it's a lot 1227 * easier than trying to track ref counts. 1228 */ 1229 err = mlx4_init_icm_table(dev, &priv->mcg_table.table, 1230 init_hca->mc_base, 1231 mlx4_get_mgm_entry_size(dev), 1232 dev->caps.num_mgms + dev->caps.num_amgms, 1233 dev->caps.num_mgms + dev->caps.num_amgms, 1234 0, 0); 1235 if (err) { 1236 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); 1237 goto err_unmap_srq; 1238 } 1239 1240 return 0; 1241 1242 err_unmap_srq: 1243 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1244 1245 err_unmap_cq: 1246 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1247 1248 err_unmap_rdmarc: 1249 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1250 1251 err_unmap_altc: 1252 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1253 1254 err_unmap_auxc: 1255 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1256 1257 err_unmap_qp: 1258 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1259 1260 err_unmap_dmpt: 1261 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1262 1263 err_unmap_mtt: 1264 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1265 1266 err_unmap_eq: 1267 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1268 1269 err_unmap_cmpt: 1270 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1271 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1272 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1273 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1274 1275 err_unmap_aux: 1276 mlx4_UNMAP_ICM_AUX(dev); 1277 1278 err_free_aux: 1279 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1280 1281 return err; 1282 } 1283 1284 static void mlx4_free_icms(struct mlx4_dev *dev) 1285 { 1286 struct mlx4_priv *priv = mlx4_priv(dev); 1287 1288 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); 1289 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1290 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1291 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1292 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1293 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1294 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1295 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1296 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1297 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1298 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1299 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1300 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1301 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1302 1303 mlx4_UNMAP_ICM_AUX(dev); 1304 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1305 } 1306 1307 static void mlx4_slave_exit(struct mlx4_dev *dev) 1308 { 1309 struct mlx4_priv *priv = mlx4_priv(dev); 1310 1311 mutex_lock(&priv->cmd.slave_cmd_mutex); 1312 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) 1313 mlx4_warn(dev, "Failed to close slave function.\n"); 1314 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1315 } 1316 1317 static int map_bf_area(struct mlx4_dev *dev) 1318 { 1319 struct mlx4_priv *priv = mlx4_priv(dev); 1320 resource_size_t bf_start; 1321 resource_size_t bf_len; 1322 int err = 0; 1323 1324 if (!dev->caps.bf_reg_size) 1325 return -ENXIO; 1326 1327 bf_start = pci_resource_start(dev->pdev, 2) + 1328 (dev->caps.num_uars << PAGE_SHIFT); 1329 bf_len = pci_resource_len(dev->pdev, 2) - 1330 (dev->caps.num_uars << PAGE_SHIFT); 1331 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); 1332 if (!priv->bf_mapping) 1333 err = -ENOMEM; 1334 1335 return err; 1336 } 1337 1338 static void unmap_bf_area(struct mlx4_dev *dev) 1339 { 1340 if (mlx4_priv(dev)->bf_mapping) 1341 io_mapping_free(mlx4_priv(dev)->bf_mapping); 1342 } 1343 1344 cycle_t mlx4_read_clock(struct mlx4_dev *dev) 1345 { 1346 u32 clockhi, clocklo, clockhi1; 1347 cycle_t cycles; 1348 int i; 1349 struct mlx4_priv *priv = mlx4_priv(dev); 1350 1351 for (i = 0; i < 10; i++) { 1352 clockhi = swab32(readl(priv->clock_mapping)); 1353 clocklo = swab32(readl(priv->clock_mapping + 4)); 1354 clockhi1 = swab32(readl(priv->clock_mapping)); 1355 if (clockhi == clockhi1) 1356 break; 1357 } 1358 1359 cycles = (u64) clockhi << 32 | (u64) clocklo; 1360 1361 return cycles; 1362 } 1363 EXPORT_SYMBOL_GPL(mlx4_read_clock); 1364 1365 1366 static int map_internal_clock(struct mlx4_dev *dev) 1367 { 1368 struct mlx4_priv *priv = mlx4_priv(dev); 1369 1370 priv->clock_mapping = 1371 ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + 1372 priv->fw.clock_offset, MLX4_CLOCK_SIZE); 1373 1374 if (!priv->clock_mapping) 1375 return -ENOMEM; 1376 1377 return 0; 1378 } 1379 1380 static void unmap_internal_clock(struct mlx4_dev *dev) 1381 { 1382 struct mlx4_priv *priv = mlx4_priv(dev); 1383 1384 if (priv->clock_mapping) 1385 iounmap(priv->clock_mapping); 1386 } 1387 1388 static void mlx4_close_hca(struct mlx4_dev *dev) 1389 { 1390 unmap_internal_clock(dev); 1391 unmap_bf_area(dev); 1392 if (mlx4_is_slave(dev)) 1393 mlx4_slave_exit(dev); 1394 else { 1395 mlx4_CLOSE_HCA(dev, 0); 1396 mlx4_free_icms(dev); 1397 mlx4_UNMAP_FA(dev); 1398 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); 1399 } 1400 } 1401 1402 static int mlx4_init_slave(struct mlx4_dev *dev) 1403 { 1404 struct mlx4_priv *priv = mlx4_priv(dev); 1405 u64 dma = (u64) priv->mfunc.vhcr_dma; 1406 int ret_from_reset = 0; 1407 u32 slave_read; 1408 u32 cmd_channel_ver; 1409 1410 mutex_lock(&priv->cmd.slave_cmd_mutex); 1411 priv->cmd.max_cmds = 1; 1412 mlx4_warn(dev, "Sending reset\n"); 1413 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1414 MLX4_COMM_TIME); 1415 /* if we are in the middle of flr the slave will try 1416 * NUM_OF_RESET_RETRIES times before leaving.*/ 1417 if (ret_from_reset) { 1418 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { 1419 mlx4_warn(dev, "slave is currently in the " 1420 "middle of FLR. Deferring probe.\n"); 1421 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1422 return -EPROBE_DEFER; 1423 } else 1424 goto err; 1425 } 1426 1427 /* check the driver version - the slave I/F revision 1428 * must match the master's */ 1429 slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); 1430 cmd_channel_ver = mlx4_comm_get_version(); 1431 1432 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != 1433 MLX4_COMM_GET_IF_REV(slave_read)) { 1434 mlx4_err(dev, "slave driver version is not supported" 1435 " by the master\n"); 1436 goto err; 1437 } 1438 1439 mlx4_warn(dev, "Sending vhcr0\n"); 1440 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, 1441 MLX4_COMM_TIME)) 1442 goto err; 1443 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, 1444 MLX4_COMM_TIME)) 1445 goto err; 1446 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, 1447 MLX4_COMM_TIME)) 1448 goto err; 1449 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) 1450 goto err; 1451 1452 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1453 return 0; 1454 1455 err: 1456 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); 1457 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1458 return -EIO; 1459 } 1460 1461 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) 1462 { 1463 int i; 1464 1465 for (i = 1; i <= dev->caps.num_ports; i++) { 1466 dev->caps.gid_table_len[i] = 1; 1467 dev->caps.pkey_table_len[i] = 1468 dev->phys_caps.pkey_phys_table_len[i] - 1; 1469 } 1470 } 1471 1472 static int choose_log_fs_mgm_entry_size(int qp_per_entry) 1473 { 1474 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; 1475 1476 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; 1477 i++) { 1478 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) 1479 break; 1480 } 1481 1482 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; 1483 } 1484 1485 static void choose_steering_mode(struct mlx4_dev *dev, 1486 struct mlx4_dev_cap *dev_cap) 1487 { 1488 if (mlx4_log_num_mgm_entry_size == -1 && 1489 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && 1490 (!mlx4_is_mfunc(dev) || 1491 (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && 1492 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= 1493 MLX4_MIN_MGM_LOG_ENTRY_SIZE) { 1494 dev->oper_log_mgm_entry_size = 1495 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); 1496 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; 1497 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 1498 dev->caps.fs_log_max_ucast_qp_range_size = 1499 dev_cap->fs_log_max_ucast_qp_range_size; 1500 } else { 1501 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && 1502 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1503 dev->caps.steering_mode = MLX4_STEERING_MODE_B0; 1504 else { 1505 dev->caps.steering_mode = MLX4_STEERING_MODE_A0; 1506 1507 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || 1508 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1509 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " 1510 "set to use B0 steering. Falling back to A0 steering mode.\n"); 1511 } 1512 dev->oper_log_mgm_entry_size = 1513 mlx4_log_num_mgm_entry_size > 0 ? 1514 mlx4_log_num_mgm_entry_size : 1515 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 1516 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); 1517 } 1518 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " 1519 "modparam log_num_mgm_entry_size = %d\n", 1520 mlx4_steering_mode_str(dev->caps.steering_mode), 1521 dev->oper_log_mgm_entry_size, 1522 mlx4_log_num_mgm_entry_size); 1523 } 1524 1525 static void choose_tunnel_offload_mode(struct mlx4_dev *dev, 1526 struct mlx4_dev_cap *dev_cap) 1527 { 1528 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && 1529 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) 1530 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN; 1531 else 1532 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE; 1533 1534 mlx4_dbg(dev, "Tunneling offload mode is: %s\n", (dev->caps.tunnel_offload_mode 1535 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none"); 1536 } 1537 1538 static int mlx4_init_hca(struct mlx4_dev *dev) 1539 { 1540 struct mlx4_priv *priv = mlx4_priv(dev); 1541 struct mlx4_adapter adapter; 1542 struct mlx4_dev_cap dev_cap; 1543 struct mlx4_mod_stat_cfg mlx4_cfg; 1544 struct mlx4_profile profile; 1545 struct mlx4_init_hca_param init_hca; 1546 u64 icm_size; 1547 int err; 1548 1549 if (!mlx4_is_slave(dev)) { 1550 err = mlx4_QUERY_FW(dev); 1551 if (err) { 1552 if (err == -EACCES) 1553 mlx4_info(dev, "non-primary physical function, skipping.\n"); 1554 else 1555 mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); 1556 return err; 1557 } 1558 1559 err = mlx4_load_fw(dev); 1560 if (err) { 1561 mlx4_err(dev, "Failed to start FW, aborting.\n"); 1562 return err; 1563 } 1564 1565 mlx4_cfg.log_pg_sz_m = 1; 1566 mlx4_cfg.log_pg_sz = 0; 1567 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); 1568 if (err) 1569 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); 1570 1571 err = mlx4_dev_cap(dev, &dev_cap); 1572 if (err) { 1573 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 1574 goto err_stop_fw; 1575 } 1576 1577 choose_steering_mode(dev, &dev_cap); 1578 choose_tunnel_offload_mode(dev, &dev_cap); 1579 1580 err = mlx4_get_phys_port_id(dev); 1581 if (err) 1582 mlx4_err(dev, "Fail to get physical port id\n"); 1583 1584 if (mlx4_is_master(dev)) 1585 mlx4_parav_master_pf_caps(dev); 1586 1587 profile = default_profile; 1588 if (dev->caps.steering_mode == 1589 MLX4_STEERING_MODE_DEVICE_MANAGED) 1590 profile.num_mcg = MLX4_FS_NUM_MCG; 1591 1592 icm_size = mlx4_make_profile(dev, &profile, &dev_cap, 1593 &init_hca); 1594 if ((long long) icm_size < 0) { 1595 err = icm_size; 1596 goto err_stop_fw; 1597 } 1598 1599 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; 1600 1601 init_hca.log_uar_sz = ilog2(dev->caps.num_uars); 1602 init_hca.uar_page_sz = PAGE_SHIFT - 12; 1603 init_hca.mw_enabled = 0; 1604 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || 1605 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) 1606 init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE; 1607 1608 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); 1609 if (err) 1610 goto err_stop_fw; 1611 1612 err = mlx4_INIT_HCA(dev, &init_hca); 1613 if (err) { 1614 mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); 1615 goto err_free_icm; 1616 } 1617 /* 1618 * If TS is supported by FW 1619 * read HCA frequency by QUERY_HCA command 1620 */ 1621 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { 1622 memset(&init_hca, 0, sizeof(init_hca)); 1623 err = mlx4_QUERY_HCA(dev, &init_hca); 1624 if (err) { 1625 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); 1626 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1627 } else { 1628 dev->caps.hca_core_clock = 1629 init_hca.hca_core_clock; 1630 } 1631 1632 /* In case we got HCA frequency 0 - disable timestamping 1633 * to avoid dividing by zero 1634 */ 1635 if (!dev->caps.hca_core_clock) { 1636 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1637 mlx4_err(dev, 1638 "HCA frequency is 0. Timestamping is not supported."); 1639 } else if (map_internal_clock(dev)) { 1640 /* 1641 * Map internal clock, 1642 * in case of failure disable timestamping 1643 */ 1644 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1645 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); 1646 } 1647 } 1648 } else { 1649 err = mlx4_init_slave(dev); 1650 if (err) { 1651 if (err != -EPROBE_DEFER) 1652 mlx4_err(dev, "Failed to initialize slave\n"); 1653 return err; 1654 } 1655 1656 err = mlx4_slave_cap(dev); 1657 if (err) { 1658 mlx4_err(dev, "Failed to obtain slave caps\n"); 1659 goto err_close; 1660 } 1661 } 1662 1663 if (map_bf_area(dev)) 1664 mlx4_dbg(dev, "Failed to map blue flame area\n"); 1665 1666 /*Only the master set the ports, all the rest got it from it.*/ 1667 if (!mlx4_is_slave(dev)) 1668 mlx4_set_port_mask(dev); 1669 1670 err = mlx4_QUERY_ADAPTER(dev, &adapter); 1671 if (err) { 1672 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); 1673 goto unmap_bf; 1674 } 1675 1676 priv->eq_table.inta_pin = adapter.inta_pin; 1677 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); 1678 1679 return 0; 1680 1681 unmap_bf: 1682 unmap_internal_clock(dev); 1683 unmap_bf_area(dev); 1684 1685 err_close: 1686 if (mlx4_is_slave(dev)) 1687 mlx4_slave_exit(dev); 1688 else 1689 mlx4_CLOSE_HCA(dev, 0); 1690 1691 err_free_icm: 1692 if (!mlx4_is_slave(dev)) 1693 mlx4_free_icms(dev); 1694 1695 err_stop_fw: 1696 if (!mlx4_is_slave(dev)) { 1697 mlx4_UNMAP_FA(dev); 1698 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1699 } 1700 return err; 1701 } 1702 1703 static int mlx4_init_counters_table(struct mlx4_dev *dev) 1704 { 1705 struct mlx4_priv *priv = mlx4_priv(dev); 1706 int nent; 1707 1708 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 1709 return -ENOENT; 1710 1711 nent = dev->caps.max_counters; 1712 return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0); 1713 } 1714 1715 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) 1716 { 1717 mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); 1718 } 1719 1720 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) 1721 { 1722 struct mlx4_priv *priv = mlx4_priv(dev); 1723 1724 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 1725 return -ENOENT; 1726 1727 *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); 1728 if (*idx == -1) 1729 return -ENOMEM; 1730 1731 return 0; 1732 } 1733 1734 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) 1735 { 1736 u64 out_param; 1737 int err; 1738 1739 if (mlx4_is_mfunc(dev)) { 1740 err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, 1741 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, 1742 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 1743 if (!err) 1744 *idx = get_param_l(&out_param); 1745 1746 return err; 1747 } 1748 return __mlx4_counter_alloc(dev, idx); 1749 } 1750 EXPORT_SYMBOL_GPL(mlx4_counter_alloc); 1751 1752 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) 1753 { 1754 mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR); 1755 return; 1756 } 1757 1758 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) 1759 { 1760 u64 in_param = 0; 1761 1762 if (mlx4_is_mfunc(dev)) { 1763 set_param_l(&in_param, idx); 1764 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE, 1765 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, 1766 MLX4_CMD_WRAPPED); 1767 return; 1768 } 1769 __mlx4_counter_free(dev, idx); 1770 } 1771 EXPORT_SYMBOL_GPL(mlx4_counter_free); 1772 1773 static int mlx4_setup_hca(struct mlx4_dev *dev) 1774 { 1775 struct mlx4_priv *priv = mlx4_priv(dev); 1776 int err; 1777 int port; 1778 __be32 ib_port_default_caps; 1779 1780 err = mlx4_init_uar_table(dev); 1781 if (err) { 1782 mlx4_err(dev, "Failed to initialize " 1783 "user access region table, aborting.\n"); 1784 return err; 1785 } 1786 1787 err = mlx4_uar_alloc(dev, &priv->driver_uar); 1788 if (err) { 1789 mlx4_err(dev, "Failed to allocate driver access region, " 1790 "aborting.\n"); 1791 goto err_uar_table_free; 1792 } 1793 1794 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 1795 if (!priv->kar) { 1796 mlx4_err(dev, "Couldn't map kernel access region, " 1797 "aborting.\n"); 1798 err = -ENOMEM; 1799 goto err_uar_free; 1800 } 1801 1802 err = mlx4_init_pd_table(dev); 1803 if (err) { 1804 mlx4_err(dev, "Failed to initialize " 1805 "protection domain table, aborting.\n"); 1806 goto err_kar_unmap; 1807 } 1808 1809 err = mlx4_init_xrcd_table(dev); 1810 if (err) { 1811 mlx4_err(dev, "Failed to initialize " 1812 "reliable connection domain table, aborting.\n"); 1813 goto err_pd_table_free; 1814 } 1815 1816 err = mlx4_init_mr_table(dev); 1817 if (err) { 1818 mlx4_err(dev, "Failed to initialize " 1819 "memory region table, aborting.\n"); 1820 goto err_xrcd_table_free; 1821 } 1822 1823 if (!mlx4_is_slave(dev)) { 1824 err = mlx4_init_mcg_table(dev); 1825 if (err) { 1826 mlx4_err(dev, "Failed to initialize multicast group table, aborting.\n"); 1827 goto err_mr_table_free; 1828 } 1829 } 1830 1831 err = mlx4_init_eq_table(dev); 1832 if (err) { 1833 mlx4_err(dev, "Failed to initialize " 1834 "event queue table, aborting.\n"); 1835 goto err_mcg_table_free; 1836 } 1837 1838 err = mlx4_cmd_use_events(dev); 1839 if (err) { 1840 mlx4_err(dev, "Failed to switch to event-driven " 1841 "firmware commands, aborting.\n"); 1842 goto err_eq_table_free; 1843 } 1844 1845 err = mlx4_NOP(dev); 1846 if (err) { 1847 if (dev->flags & MLX4_FLAG_MSI_X) { 1848 mlx4_warn(dev, "NOP command failed to generate MSI-X " 1849 "interrupt IRQ %d).\n", 1850 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 1851 mlx4_warn(dev, "Trying again without MSI-X.\n"); 1852 } else { 1853 mlx4_err(dev, "NOP command failed to generate interrupt " 1854 "(IRQ %d), aborting.\n", 1855 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 1856 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 1857 } 1858 1859 goto err_cmd_poll; 1860 } 1861 1862 mlx4_dbg(dev, "NOP command IRQ test passed\n"); 1863 1864 err = mlx4_init_cq_table(dev); 1865 if (err) { 1866 mlx4_err(dev, "Failed to initialize " 1867 "completion queue table, aborting.\n"); 1868 goto err_cmd_poll; 1869 } 1870 1871 err = mlx4_init_srq_table(dev); 1872 if (err) { 1873 mlx4_err(dev, "Failed to initialize " 1874 "shared receive queue table, aborting.\n"); 1875 goto err_cq_table_free; 1876 } 1877 1878 err = mlx4_init_qp_table(dev); 1879 if (err) { 1880 mlx4_err(dev, "Failed to initialize " 1881 "queue pair table, aborting.\n"); 1882 goto err_srq_table_free; 1883 } 1884 1885 err = mlx4_init_counters_table(dev); 1886 if (err && err != -ENOENT) { 1887 mlx4_err(dev, "Failed to initialize counters table, aborting.\n"); 1888 goto err_qp_table_free; 1889 } 1890 1891 if (!mlx4_is_slave(dev)) { 1892 for (port = 1; port <= dev->caps.num_ports; port++) { 1893 ib_port_default_caps = 0; 1894 err = mlx4_get_port_ib_caps(dev, port, 1895 &ib_port_default_caps); 1896 if (err) 1897 mlx4_warn(dev, "failed to get port %d default " 1898 "ib capabilities (%d). Continuing " 1899 "with caps = 0\n", port, err); 1900 dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 1901 1902 /* initialize per-slave default ib port capabilities */ 1903 if (mlx4_is_master(dev)) { 1904 int i; 1905 for (i = 0; i < dev->num_slaves; i++) { 1906 if (i == mlx4_master_func_num(dev)) 1907 continue; 1908 priv->mfunc.master.slave_state[i].ib_cap_mask[port] = 1909 ib_port_default_caps; 1910 } 1911 } 1912 1913 if (mlx4_is_mfunc(dev)) 1914 dev->caps.port_ib_mtu[port] = IB_MTU_2048; 1915 else 1916 dev->caps.port_ib_mtu[port] = IB_MTU_4096; 1917 1918 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? 1919 dev->caps.pkey_table_len[port] : -1); 1920 if (err) { 1921 mlx4_err(dev, "Failed to set port %d, aborting\n", 1922 port); 1923 goto err_counters_table_free; 1924 } 1925 } 1926 } 1927 1928 return 0; 1929 1930 err_counters_table_free: 1931 mlx4_cleanup_counters_table(dev); 1932 1933 err_qp_table_free: 1934 mlx4_cleanup_qp_table(dev); 1935 1936 err_srq_table_free: 1937 mlx4_cleanup_srq_table(dev); 1938 1939 err_cq_table_free: 1940 mlx4_cleanup_cq_table(dev); 1941 1942 err_cmd_poll: 1943 mlx4_cmd_use_polling(dev); 1944 1945 err_eq_table_free: 1946 mlx4_cleanup_eq_table(dev); 1947 1948 err_mcg_table_free: 1949 if (!mlx4_is_slave(dev)) 1950 mlx4_cleanup_mcg_table(dev); 1951 1952 err_mr_table_free: 1953 mlx4_cleanup_mr_table(dev); 1954 1955 err_xrcd_table_free: 1956 mlx4_cleanup_xrcd_table(dev); 1957 1958 err_pd_table_free: 1959 mlx4_cleanup_pd_table(dev); 1960 1961 err_kar_unmap: 1962 iounmap(priv->kar); 1963 1964 err_uar_free: 1965 mlx4_uar_free(dev, &priv->driver_uar); 1966 1967 err_uar_table_free: 1968 mlx4_cleanup_uar_table(dev); 1969 return err; 1970 } 1971 1972 static void mlx4_enable_msi_x(struct mlx4_dev *dev) 1973 { 1974 struct mlx4_priv *priv = mlx4_priv(dev); 1975 struct msix_entry *entries; 1976 int nreq = min_t(int, dev->caps.num_ports * 1977 min_t(int, netif_get_num_default_rss_queues() + 1, 1978 MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); 1979 int err; 1980 int i; 1981 1982 if (msi_x) { 1983 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 1984 nreq); 1985 1986 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); 1987 if (!entries) 1988 goto no_msi; 1989 1990 for (i = 0; i < nreq; ++i) 1991 entries[i].entry = i; 1992 1993 retry: 1994 err = pci_enable_msix(dev->pdev, entries, nreq); 1995 if (err) { 1996 /* Try again if at least 2 vectors are available */ 1997 if (err > 1) { 1998 mlx4_info(dev, "Requested %d vectors, " 1999 "but only %d MSI-X vectors available, " 2000 "trying again\n", nreq, err); 2001 nreq = err; 2002 goto retry; 2003 } 2004 kfree(entries); 2005 goto no_msi; 2006 } 2007 2008 if (nreq < 2009 MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { 2010 /*Working in legacy mode , all EQ's shared*/ 2011 dev->caps.comp_pool = 0; 2012 dev->caps.num_comp_vectors = nreq - 1; 2013 } else { 2014 dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; 2015 dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; 2016 } 2017 for (i = 0; i < nreq; ++i) 2018 priv->eq_table.eq[i].irq = entries[i].vector; 2019 2020 dev->flags |= MLX4_FLAG_MSI_X; 2021 2022 kfree(entries); 2023 return; 2024 } 2025 2026 no_msi: 2027 dev->caps.num_comp_vectors = 1; 2028 dev->caps.comp_pool = 0; 2029 2030 for (i = 0; i < 2; ++i) 2031 priv->eq_table.eq[i].irq = dev->pdev->irq; 2032 } 2033 2034 static int mlx4_init_port_info(struct mlx4_dev *dev, int port) 2035 { 2036 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 2037 int err = 0; 2038 2039 info->dev = dev; 2040 info->port = port; 2041 if (!mlx4_is_slave(dev)) { 2042 mlx4_init_mac_table(dev, &info->mac_table); 2043 mlx4_init_vlan_table(dev, &info->vlan_table); 2044 info->base_qpn = mlx4_get_base_qpn(dev, port); 2045 } 2046 2047 sprintf(info->dev_name, "mlx4_port%d", port); 2048 info->port_attr.attr.name = info->dev_name; 2049 if (mlx4_is_mfunc(dev)) 2050 info->port_attr.attr.mode = S_IRUGO; 2051 else { 2052 info->port_attr.attr.mode = S_IRUGO | S_IWUSR; 2053 info->port_attr.store = set_port_type; 2054 } 2055 info->port_attr.show = show_port_type; 2056 sysfs_attr_init(&info->port_attr.attr); 2057 2058 err = device_create_file(&dev->pdev->dev, &info->port_attr); 2059 if (err) { 2060 mlx4_err(dev, "Failed to create file for port %d\n", port); 2061 info->port = -1; 2062 } 2063 2064 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); 2065 info->port_mtu_attr.attr.name = info->dev_mtu_name; 2066 if (mlx4_is_mfunc(dev)) 2067 info->port_mtu_attr.attr.mode = S_IRUGO; 2068 else { 2069 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; 2070 info->port_mtu_attr.store = set_port_ib_mtu; 2071 } 2072 info->port_mtu_attr.show = show_port_ib_mtu; 2073 sysfs_attr_init(&info->port_mtu_attr.attr); 2074 2075 err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); 2076 if (err) { 2077 mlx4_err(dev, "Failed to create mtu file for port %d\n", port); 2078 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 2079 info->port = -1; 2080 } 2081 2082 return err; 2083 } 2084 2085 static void mlx4_cleanup_port_info(struct mlx4_port_info *info) 2086 { 2087 if (info->port < 0) 2088 return; 2089 2090 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 2091 device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); 2092 } 2093 2094 static int mlx4_init_steering(struct mlx4_dev *dev) 2095 { 2096 struct mlx4_priv *priv = mlx4_priv(dev); 2097 int num_entries = dev->caps.num_ports; 2098 int i, j; 2099 2100 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); 2101 if (!priv->steer) 2102 return -ENOMEM; 2103 2104 for (i = 0; i < num_entries; i++) 2105 for (j = 0; j < MLX4_NUM_STEERS; j++) { 2106 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); 2107 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); 2108 } 2109 return 0; 2110 } 2111 2112 static void mlx4_clear_steering(struct mlx4_dev *dev) 2113 { 2114 struct mlx4_priv *priv = mlx4_priv(dev); 2115 struct mlx4_steer_index *entry, *tmp_entry; 2116 struct mlx4_promisc_qp *pqp, *tmp_pqp; 2117 int num_entries = dev->caps.num_ports; 2118 int i, j; 2119 2120 for (i = 0; i < num_entries; i++) { 2121 for (j = 0; j < MLX4_NUM_STEERS; j++) { 2122 list_for_each_entry_safe(pqp, tmp_pqp, 2123 &priv->steer[i].promisc_qps[j], 2124 list) { 2125 list_del(&pqp->list); 2126 kfree(pqp); 2127 } 2128 list_for_each_entry_safe(entry, tmp_entry, 2129 &priv->steer[i].steer_entries[j], 2130 list) { 2131 list_del(&entry->list); 2132 list_for_each_entry_safe(pqp, tmp_pqp, 2133 &entry->duplicates, 2134 list) { 2135 list_del(&pqp->list); 2136 kfree(pqp); 2137 } 2138 kfree(entry); 2139 } 2140 } 2141 } 2142 kfree(priv->steer); 2143 } 2144 2145 static int extended_func_num(struct pci_dev *pdev) 2146 { 2147 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); 2148 } 2149 2150 #define MLX4_OWNER_BASE 0x8069c 2151 #define MLX4_OWNER_SIZE 4 2152 2153 static int mlx4_get_ownership(struct mlx4_dev *dev) 2154 { 2155 void __iomem *owner; 2156 u32 ret; 2157 2158 if (pci_channel_offline(dev->pdev)) 2159 return -EIO; 2160 2161 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 2162 MLX4_OWNER_SIZE); 2163 if (!owner) { 2164 mlx4_err(dev, "Failed to obtain ownership bit\n"); 2165 return -ENOMEM; 2166 } 2167 2168 ret = readl(owner); 2169 iounmap(owner); 2170 return (int) !!ret; 2171 } 2172 2173 static void mlx4_free_ownership(struct mlx4_dev *dev) 2174 { 2175 void __iomem *owner; 2176 2177 if (pci_channel_offline(dev->pdev)) 2178 return; 2179 2180 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 2181 MLX4_OWNER_SIZE); 2182 if (!owner) { 2183 mlx4_err(dev, "Failed to obtain ownership bit\n"); 2184 return; 2185 } 2186 writel(0, owner); 2187 msleep(1000); 2188 iounmap(owner); 2189 } 2190 2191 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) 2192 { 2193 struct mlx4_priv *priv; 2194 struct mlx4_dev *dev; 2195 int err; 2196 int port; 2197 2198 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); 2199 2200 err = pci_enable_device(pdev); 2201 if (err) { 2202 dev_err(&pdev->dev, "Cannot enable PCI device, " 2203 "aborting.\n"); 2204 return err; 2205 } 2206 2207 /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS 2208 * per port, we must limit the number of VFs to 63 (since their are 2209 * 128 MACs) 2210 */ 2211 if (num_vfs >= MLX4_MAX_NUM_VF) { 2212 dev_err(&pdev->dev, 2213 "Requested more VF's (%d) than allowed (%d)\n", 2214 num_vfs, MLX4_MAX_NUM_VF - 1); 2215 return -EINVAL; 2216 } 2217 2218 if (num_vfs < 0) { 2219 pr_err("num_vfs module parameter cannot be negative\n"); 2220 return -EINVAL; 2221 } 2222 /* 2223 * Check for BARs. 2224 */ 2225 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && 2226 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 2227 dev_err(&pdev->dev, "Missing DCS, aborting." 2228 "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n", 2229 pci_dev_data, pci_resource_flags(pdev, 0)); 2230 err = -ENODEV; 2231 goto err_disable_pdev; 2232 } 2233 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { 2234 dev_err(&pdev->dev, "Missing UAR, aborting.\n"); 2235 err = -ENODEV; 2236 goto err_disable_pdev; 2237 } 2238 2239 err = pci_request_regions(pdev, DRV_NAME); 2240 if (err) { 2241 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 2242 goto err_disable_pdev; 2243 } 2244 2245 pci_set_master(pdev); 2246 2247 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 2248 if (err) { 2249 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); 2250 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 2251 if (err) { 2252 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); 2253 goto err_release_regions; 2254 } 2255 } 2256 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 2257 if (err) { 2258 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " 2259 "consistent PCI DMA mask.\n"); 2260 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 2261 if (err) { 2262 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " 2263 "aborting.\n"); 2264 goto err_release_regions; 2265 } 2266 } 2267 2268 /* Allow large DMA segments, up to the firmware limit of 1 GB */ 2269 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); 2270 2271 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 2272 if (!priv) { 2273 err = -ENOMEM; 2274 goto err_release_regions; 2275 } 2276 2277 dev = &priv->dev; 2278 dev->pdev = pdev; 2279 INIT_LIST_HEAD(&priv->ctx_list); 2280 spin_lock_init(&priv->ctx_lock); 2281 2282 mutex_init(&priv->port_mutex); 2283 2284 INIT_LIST_HEAD(&priv->pgdir_list); 2285 mutex_init(&priv->pgdir_mutex); 2286 2287 INIT_LIST_HEAD(&priv->bf_list); 2288 mutex_init(&priv->bf_mutex); 2289 2290 dev->rev_id = pdev->revision; 2291 dev->numa_node = dev_to_node(&pdev->dev); 2292 /* Detect if this device is a virtual function */ 2293 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 2294 /* When acting as pf, we normally skip vfs unless explicitly 2295 * requested to probe them. */ 2296 if (num_vfs && extended_func_num(pdev) > probe_vf) { 2297 mlx4_warn(dev, "Skipping virtual function:%d\n", 2298 extended_func_num(pdev)); 2299 err = -ENODEV; 2300 goto err_free_dev; 2301 } 2302 mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); 2303 dev->flags |= MLX4_FLAG_SLAVE; 2304 } else { 2305 /* We reset the device and enable SRIOV only for physical 2306 * devices. Try to claim ownership on the device; 2307 * if already taken, skip -- do not allow multiple PFs */ 2308 err = mlx4_get_ownership(dev); 2309 if (err) { 2310 if (err < 0) 2311 goto err_free_dev; 2312 else { 2313 mlx4_warn(dev, "Multiple PFs not yet supported." 2314 " Skipping PF.\n"); 2315 err = -EINVAL; 2316 goto err_free_dev; 2317 } 2318 } 2319 2320 if (num_vfs) { 2321 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); 2322 err = pci_enable_sriov(pdev, num_vfs); 2323 if (err) { 2324 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", 2325 err); 2326 err = 0; 2327 } else { 2328 mlx4_warn(dev, "Running in master mode\n"); 2329 dev->flags |= MLX4_FLAG_SRIOV | 2330 MLX4_FLAG_MASTER; 2331 dev->num_vfs = num_vfs; 2332 } 2333 } 2334 2335 atomic_set(&priv->opreq_count, 0); 2336 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 2337 2338 /* 2339 * Now reset the HCA before we touch the PCI capabilities or 2340 * attempt a firmware command, since a boot ROM may have left 2341 * the HCA in an undefined state. 2342 */ 2343 err = mlx4_reset(dev); 2344 if (err) { 2345 mlx4_err(dev, "Failed to reset HCA, aborting.\n"); 2346 goto err_rel_own; 2347 } 2348 } 2349 2350 slave_start: 2351 err = mlx4_cmd_init(dev); 2352 if (err) { 2353 mlx4_err(dev, "Failed to init command interface, aborting.\n"); 2354 goto err_sriov; 2355 } 2356 2357 /* In slave functions, the communication channel must be initialized 2358 * before posting commands. Also, init num_slaves before calling 2359 * mlx4_init_hca */ 2360 if (mlx4_is_mfunc(dev)) { 2361 if (mlx4_is_master(dev)) 2362 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 2363 else { 2364 dev->num_slaves = 0; 2365 err = mlx4_multi_func_init(dev); 2366 if (err) { 2367 mlx4_err(dev, "Failed to init slave mfunc" 2368 " interface, aborting.\n"); 2369 goto err_cmd; 2370 } 2371 } 2372 } 2373 2374 err = mlx4_init_hca(dev); 2375 if (err) { 2376 if (err == -EACCES) { 2377 /* Not primary Physical function 2378 * Running in slave mode */ 2379 mlx4_cmd_cleanup(dev); 2380 dev->flags |= MLX4_FLAG_SLAVE; 2381 dev->flags &= ~MLX4_FLAG_MASTER; 2382 goto slave_start; 2383 } else 2384 goto err_mfunc; 2385 } 2386 2387 /* check if the device is functioning at its maximum possible speed. 2388 * No return code for this call, just warn the user in case of PCI 2389 * express device capabilities are under-satisfied by the bus. 2390 */ 2391 mlx4_check_pcie_caps(dev); 2392 2393 /* In master functions, the communication channel must be initialized 2394 * after obtaining its address from fw */ 2395 if (mlx4_is_master(dev)) { 2396 err = mlx4_multi_func_init(dev); 2397 if (err) { 2398 mlx4_err(dev, "Failed to init master mfunc" 2399 "interface, aborting.\n"); 2400 goto err_close; 2401 } 2402 } 2403 2404 err = mlx4_alloc_eq_table(dev); 2405 if (err) 2406 goto err_master_mfunc; 2407 2408 priv->msix_ctl.pool_bm = 0; 2409 mutex_init(&priv->msix_ctl.pool_lock); 2410 2411 mlx4_enable_msi_x(dev); 2412 if ((mlx4_is_mfunc(dev)) && 2413 !(dev->flags & MLX4_FLAG_MSI_X)) { 2414 err = -ENOSYS; 2415 mlx4_err(dev, "INTx is not supported in multi-function mode." 2416 " aborting.\n"); 2417 goto err_free_eq; 2418 } 2419 2420 if (!mlx4_is_slave(dev)) { 2421 err = mlx4_init_steering(dev); 2422 if (err) 2423 goto err_free_eq; 2424 } 2425 2426 err = mlx4_setup_hca(dev); 2427 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && 2428 !mlx4_is_mfunc(dev)) { 2429 dev->flags &= ~MLX4_FLAG_MSI_X; 2430 dev->caps.num_comp_vectors = 1; 2431 dev->caps.comp_pool = 0; 2432 pci_disable_msix(pdev); 2433 err = mlx4_setup_hca(dev); 2434 } 2435 2436 if (err) 2437 goto err_steer; 2438 2439 mlx4_init_quotas(dev); 2440 2441 for (port = 1; port <= dev->caps.num_ports; port++) { 2442 err = mlx4_init_port_info(dev, port); 2443 if (err) 2444 goto err_port; 2445 } 2446 2447 err = mlx4_register_device(dev); 2448 if (err) 2449 goto err_port; 2450 2451 mlx4_request_modules(dev); 2452 2453 mlx4_sense_init(dev); 2454 mlx4_start_sense(dev); 2455 2456 priv->pci_dev_data = pci_dev_data; 2457 pci_set_drvdata(pdev, dev); 2458 2459 return 0; 2460 2461 err_port: 2462 for (--port; port >= 1; --port) 2463 mlx4_cleanup_port_info(&priv->port[port]); 2464 2465 mlx4_cleanup_counters_table(dev); 2466 mlx4_cleanup_qp_table(dev); 2467 mlx4_cleanup_srq_table(dev); 2468 mlx4_cleanup_cq_table(dev); 2469 mlx4_cmd_use_polling(dev); 2470 mlx4_cleanup_eq_table(dev); 2471 mlx4_cleanup_mcg_table(dev); 2472 mlx4_cleanup_mr_table(dev); 2473 mlx4_cleanup_xrcd_table(dev); 2474 mlx4_cleanup_pd_table(dev); 2475 mlx4_cleanup_uar_table(dev); 2476 2477 err_steer: 2478 if (!mlx4_is_slave(dev)) 2479 mlx4_clear_steering(dev); 2480 2481 err_free_eq: 2482 mlx4_free_eq_table(dev); 2483 2484 err_master_mfunc: 2485 if (mlx4_is_master(dev)) 2486 mlx4_multi_func_cleanup(dev); 2487 2488 err_close: 2489 if (dev->flags & MLX4_FLAG_MSI_X) 2490 pci_disable_msix(pdev); 2491 2492 mlx4_close_hca(dev); 2493 2494 err_mfunc: 2495 if (mlx4_is_slave(dev)) 2496 mlx4_multi_func_cleanup(dev); 2497 2498 err_cmd: 2499 mlx4_cmd_cleanup(dev); 2500 2501 err_sriov: 2502 if (dev->flags & MLX4_FLAG_SRIOV) 2503 pci_disable_sriov(pdev); 2504 2505 err_rel_own: 2506 if (!mlx4_is_slave(dev)) 2507 mlx4_free_ownership(dev); 2508 2509 err_free_dev: 2510 kfree(priv); 2511 2512 err_release_regions: 2513 pci_release_regions(pdev); 2514 2515 err_disable_pdev: 2516 pci_disable_device(pdev); 2517 pci_set_drvdata(pdev, NULL); 2518 return err; 2519 } 2520 2521 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) 2522 { 2523 printk_once(KERN_INFO "%s", mlx4_version); 2524 2525 return __mlx4_init_one(pdev, id->driver_data); 2526 } 2527 2528 static void mlx4_remove_one(struct pci_dev *pdev) 2529 { 2530 struct mlx4_dev *dev = pci_get_drvdata(pdev); 2531 struct mlx4_priv *priv = mlx4_priv(dev); 2532 int p; 2533 2534 if (dev) { 2535 /* in SRIOV it is not allowed to unload the pf's 2536 * driver while there are alive vf's */ 2537 if (mlx4_is_master(dev)) { 2538 if (mlx4_how_many_lives_vf(dev)) 2539 printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n"); 2540 } 2541 mlx4_stop_sense(dev); 2542 mlx4_unregister_device(dev); 2543 2544 for (p = 1; p <= dev->caps.num_ports; p++) { 2545 mlx4_cleanup_port_info(&priv->port[p]); 2546 mlx4_CLOSE_PORT(dev, p); 2547 } 2548 2549 if (mlx4_is_master(dev)) 2550 mlx4_free_resource_tracker(dev, 2551 RES_TR_FREE_SLAVES_ONLY); 2552 2553 mlx4_cleanup_counters_table(dev); 2554 mlx4_cleanup_qp_table(dev); 2555 mlx4_cleanup_srq_table(dev); 2556 mlx4_cleanup_cq_table(dev); 2557 mlx4_cmd_use_polling(dev); 2558 mlx4_cleanup_eq_table(dev); 2559 mlx4_cleanup_mcg_table(dev); 2560 mlx4_cleanup_mr_table(dev); 2561 mlx4_cleanup_xrcd_table(dev); 2562 mlx4_cleanup_pd_table(dev); 2563 2564 if (mlx4_is_master(dev)) 2565 mlx4_free_resource_tracker(dev, 2566 RES_TR_FREE_STRUCTS_ONLY); 2567 2568 iounmap(priv->kar); 2569 mlx4_uar_free(dev, &priv->driver_uar); 2570 mlx4_cleanup_uar_table(dev); 2571 if (!mlx4_is_slave(dev)) 2572 mlx4_clear_steering(dev); 2573 mlx4_free_eq_table(dev); 2574 if (mlx4_is_master(dev)) 2575 mlx4_multi_func_cleanup(dev); 2576 mlx4_close_hca(dev); 2577 if (mlx4_is_slave(dev)) 2578 mlx4_multi_func_cleanup(dev); 2579 mlx4_cmd_cleanup(dev); 2580 2581 if (dev->flags & MLX4_FLAG_MSI_X) 2582 pci_disable_msix(pdev); 2583 if (dev->flags & MLX4_FLAG_SRIOV) { 2584 mlx4_warn(dev, "Disabling SR-IOV\n"); 2585 pci_disable_sriov(pdev); 2586 } 2587 2588 if (!mlx4_is_slave(dev)) 2589 mlx4_free_ownership(dev); 2590 2591 kfree(dev->caps.qp0_tunnel); 2592 kfree(dev->caps.qp0_proxy); 2593 kfree(dev->caps.qp1_tunnel); 2594 kfree(dev->caps.qp1_proxy); 2595 2596 kfree(priv); 2597 pci_release_regions(pdev); 2598 pci_disable_device(pdev); 2599 pci_set_drvdata(pdev, NULL); 2600 } 2601 } 2602 2603 int mlx4_restart_one(struct pci_dev *pdev) 2604 { 2605 struct mlx4_dev *dev = pci_get_drvdata(pdev); 2606 struct mlx4_priv *priv = mlx4_priv(dev); 2607 int pci_dev_data; 2608 2609 pci_dev_data = priv->pci_dev_data; 2610 mlx4_remove_one(pdev); 2611 return __mlx4_init_one(pdev, pci_dev_data); 2612 } 2613 2614 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { 2615 /* MT25408 "Hermon" SDR */ 2616 { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2617 /* MT25408 "Hermon" DDR */ 2618 { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2619 /* MT25408 "Hermon" QDR */ 2620 { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2621 /* MT25408 "Hermon" DDR PCIe gen2 */ 2622 { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2623 /* MT25408 "Hermon" QDR PCIe gen2 */ 2624 { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2625 /* MT25408 "Hermon" EN 10GigE */ 2626 { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2627 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 2628 { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2629 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 2630 { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2631 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 2632 { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2633 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 2634 { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2635 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 2636 { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2637 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 2638 { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 2639 /* MT25400 Family [ConnectX-2 Virtual Function] */ 2640 { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, 2641 /* MT27500 Family [ConnectX-3] */ 2642 { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, 2643 /* MT27500 Family [ConnectX-3 Virtual Function] */ 2644 { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, 2645 { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ 2646 { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ 2647 { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ 2648 { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ 2649 { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ 2650 { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ 2651 { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ 2652 { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ 2653 { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ 2654 { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ 2655 { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ 2656 { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ 2657 { 0, } 2658 }; 2659 2660 MODULE_DEVICE_TABLE(pci, mlx4_pci_table); 2661 2662 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, 2663 pci_channel_state_t state) 2664 { 2665 mlx4_remove_one(pdev); 2666 2667 return state == pci_channel_io_perm_failure ? 2668 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 2669 } 2670 2671 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 2672 { 2673 int ret = __mlx4_init_one(pdev, 0); 2674 2675 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 2676 } 2677 2678 static const struct pci_error_handlers mlx4_err_handler = { 2679 .error_detected = mlx4_pci_err_detected, 2680 .slot_reset = mlx4_pci_slot_reset, 2681 }; 2682 2683 static struct pci_driver mlx4_driver = { 2684 .name = DRV_NAME, 2685 .id_table = mlx4_pci_table, 2686 .probe = mlx4_init_one, 2687 .remove = mlx4_remove_one, 2688 .err_handler = &mlx4_err_handler, 2689 }; 2690 2691 static int __init mlx4_verify_params(void) 2692 { 2693 if ((log_num_mac < 0) || (log_num_mac > 7)) { 2694 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); 2695 return -1; 2696 } 2697 2698 if (log_num_vlan != 0) 2699 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", 2700 MLX4_LOG_NUM_VLANS); 2701 2702 if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) { 2703 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); 2704 return -1; 2705 } 2706 2707 /* Check if module param for ports type has legal combination */ 2708 if (port_type_array[0] == false && port_type_array[1] == true) { 2709 printk(KERN_WARNING "Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n"); 2710 port_type_array[0] = true; 2711 } 2712 2713 if (mlx4_log_num_mgm_entry_size != -1 && 2714 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || 2715 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { 2716 pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " 2717 "in legal range (-1 or %d..%d)\n", 2718 mlx4_log_num_mgm_entry_size, 2719 MLX4_MIN_MGM_LOG_ENTRY_SIZE, 2720 MLX4_MAX_MGM_LOG_ENTRY_SIZE); 2721 return -1; 2722 } 2723 2724 return 0; 2725 } 2726 2727 static int __init mlx4_init(void) 2728 { 2729 int ret; 2730 2731 if (mlx4_verify_params()) 2732 return -EINVAL; 2733 2734 mlx4_catas_init(); 2735 2736 mlx4_wq = create_singlethread_workqueue("mlx4"); 2737 if (!mlx4_wq) 2738 return -ENOMEM; 2739 2740 ret = pci_register_driver(&mlx4_driver); 2741 if (ret < 0) 2742 destroy_workqueue(mlx4_wq); 2743 return ret < 0 ? ret : 0; 2744 } 2745 2746 static void __exit mlx4_cleanup(void) 2747 { 2748 pci_unregister_driver(&mlx4_driver); 2749 destroy_workqueue(mlx4_wq); 2750 } 2751 2752 module_init(mlx4_init); 2753 module_exit(mlx4_cleanup); 2754