1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/module.h> 37 #include <linux/init.h> 38 #include <linux/errno.h> 39 #include <linux/pci.h> 40 #include <linux/dma-mapping.h> 41 #include <linux/slab.h> 42 #include <linux/io-mapping.h> 43 #include <linux/delay.h> 44 #include <linux/kmod.h> 45 #include <net/devlink.h> 46 47 #include <linux/mlx4/device.h> 48 #include <linux/mlx4/doorbell.h> 49 50 #include "mlx4.h" 51 #include "fw.h" 52 #include "icm.h" 53 54 MODULE_AUTHOR("Roland Dreier"); 55 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); 56 MODULE_LICENSE("Dual BSD/GPL"); 57 MODULE_VERSION(DRV_VERSION); 58 59 struct workqueue_struct *mlx4_wq; 60 61 #ifdef CONFIG_MLX4_DEBUG 62 63 int mlx4_debug_level = 0; 64 module_param_named(debug_level, mlx4_debug_level, int, 0644); 65 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 66 67 #endif /* CONFIG_MLX4_DEBUG */ 68 69 #ifdef CONFIG_PCI_MSI 70 71 static int msi_x = 1; 72 module_param(msi_x, int, 0444); 73 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); 74 75 #else /* CONFIG_PCI_MSI */ 76 77 #define msi_x (0) 78 79 #endif /* CONFIG_PCI_MSI */ 80 81 static uint8_t num_vfs[3] = {0, 0, 0}; 82 static int num_vfs_argc; 83 module_param_array(num_vfs, byte , &num_vfs_argc, 0444); 84 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n" 85 "num_vfs=port1,port2,port1+2"); 86 87 static uint8_t probe_vf[3] = {0, 0, 0}; 88 static int probe_vfs_argc; 89 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444); 90 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n" 91 "probe_vf=port1,port2,port1+2"); 92 93 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 94 module_param_named(log_num_mgm_entry_size, 95 mlx4_log_num_mgm_entry_size, int, 0444); 96 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" 97 " of qp per mcg, for example:" 98 " 10 gives 248.range: 7 <=" 99 " log_num_mgm_entry_size <= 12." 100 " To activate device managed" 101 " flow steering when available, set to -1"); 102 103 static bool enable_64b_cqe_eqe = true; 104 module_param(enable_64b_cqe_eqe, bool, 0444); 105 MODULE_PARM_DESC(enable_64b_cqe_eqe, 106 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)"); 107 108 static bool enable_4k_uar; 109 module_param(enable_4k_uar, bool, 0444); 110 MODULE_PARM_DESC(enable_4k_uar, 111 "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)"); 112 113 #define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \ 114 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ 115 MLX4_FUNC_CAP_DMFS_A0_STATIC) 116 117 #define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV) 118 119 static char mlx4_version[] = 120 DRV_NAME ": Mellanox ConnectX core driver v" 121 DRV_VERSION " (" DRV_RELDATE ")\n"; 122 123 static struct mlx4_profile default_profile = { 124 .num_qp = 1 << 18, 125 .num_srq = 1 << 16, 126 .rdmarc_per_qp = 1 << 4, 127 .num_cq = 1 << 16, 128 .num_mcg = 1 << 13, 129 .num_mpt = 1 << 19, 130 .num_mtt = 1 << 20, /* It is really num mtt segements */ 131 }; 132 133 static struct mlx4_profile low_mem_profile = { 134 .num_qp = 1 << 17, 135 .num_srq = 1 << 6, 136 .rdmarc_per_qp = 1 << 4, 137 .num_cq = 1 << 8, 138 .num_mcg = 1 << 8, 139 .num_mpt = 1 << 9, 140 .num_mtt = 1 << 7, 141 }; 142 143 static int log_num_mac = 7; 144 module_param_named(log_num_mac, log_num_mac, int, 0444); 145 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); 146 147 static int log_num_vlan; 148 module_param_named(log_num_vlan, log_num_vlan, int, 0444); 149 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)"); 150 /* Log2 max number of VLANs per ETH port (0-7) */ 151 #define MLX4_LOG_NUM_VLANS 7 152 #define MLX4_MIN_LOG_NUM_VLANS 0 153 #define MLX4_MIN_LOG_NUM_MAC 1 154 155 static bool use_prio; 156 module_param_named(use_prio, use_prio, bool, 0444); 157 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)"); 158 159 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); 160 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 161 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)"); 162 163 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE}; 164 static int arr_argc = 2; 165 module_param_array(port_type_array, int, &arr_argc, 0444); 166 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default " 167 "1 for IB, 2 for Ethernet"); 168 169 struct mlx4_port_config { 170 struct list_head list; 171 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; 172 struct pci_dev *pdev; 173 }; 174 175 static atomic_t pf_loading = ATOMIC_INIT(0); 176 177 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev, 178 struct mlx4_dev_cap *dev_cap) 179 { 180 /* The reserved_uars is calculated by system page size unit. 181 * Therefore, adjustment is added when the uar page size is less 182 * than the system page size 183 */ 184 dev->caps.reserved_uars = 185 max_t(int, 186 mlx4_get_num_reserved_uar(dev), 187 dev_cap->reserved_uars / 188 (1 << (PAGE_SHIFT - dev->uar_page_shift))); 189 } 190 191 int mlx4_check_port_params(struct mlx4_dev *dev, 192 enum mlx4_port_type *port_type) 193 { 194 int i; 195 196 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { 197 for (i = 0; i < dev->caps.num_ports - 1; i++) { 198 if (port_type[i] != port_type[i + 1]) { 199 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n"); 200 return -EINVAL; 201 } 202 } 203 } 204 205 for (i = 0; i < dev->caps.num_ports; i++) { 206 if (!(port_type[i] & dev->caps.supported_type[i+1])) { 207 mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n", 208 i + 1); 209 return -EINVAL; 210 } 211 } 212 return 0; 213 } 214 215 static void mlx4_set_port_mask(struct mlx4_dev *dev) 216 { 217 int i; 218 219 for (i = 1; i <= dev->caps.num_ports; ++i) 220 dev->caps.port_mask[i] = dev->caps.port_type[i]; 221 } 222 223 enum { 224 MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0, 225 }; 226 227 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 228 { 229 int err = 0; 230 struct mlx4_func func; 231 232 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { 233 err = mlx4_QUERY_FUNC(dev, &func, 0); 234 if (err) { 235 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 236 return err; 237 } 238 dev_cap->max_eqs = func.max_eq; 239 dev_cap->reserved_eqs = func.rsvd_eqs; 240 dev_cap->reserved_uars = func.rsvd_uars; 241 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS; 242 } 243 return err; 244 } 245 246 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) 247 { 248 struct mlx4_caps *dev_cap = &dev->caps; 249 250 /* FW not supporting or cancelled by user */ 251 if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) || 252 !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) 253 return; 254 255 /* Must have 64B CQE_EQE enabled by FW to use bigger stride 256 * When FW has NCSI it may decide not to report 64B CQE/EQEs 257 */ 258 if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) || 259 !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) { 260 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; 261 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; 262 return; 263 } 264 265 if (cache_line_size() == 128 || cache_line_size() == 256) { 266 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n"); 267 /* Changing the real data inside CQE size to 32B */ 268 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 269 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 270 271 if (mlx4_is_master(dev)) 272 dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE; 273 } else { 274 if (cache_line_size() != 32 && cache_line_size() != 64) 275 mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n"); 276 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; 277 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; 278 } 279 } 280 281 static int _mlx4_dev_port(struct mlx4_dev *dev, int port, 282 struct mlx4_port_cap *port_cap) 283 { 284 dev->caps.vl_cap[port] = port_cap->max_vl; 285 dev->caps.ib_mtu_cap[port] = port_cap->ib_mtu; 286 dev->phys_caps.gid_phys_table_len[port] = port_cap->max_gids; 287 dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys; 288 /* set gid and pkey table operating lengths by default 289 * to non-sriov values 290 */ 291 dev->caps.gid_table_len[port] = port_cap->max_gids; 292 dev->caps.pkey_table_len[port] = port_cap->max_pkeys; 293 dev->caps.port_width_cap[port] = port_cap->max_port_width; 294 dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu; 295 dev->caps.def_mac[port] = port_cap->def_mac; 296 dev->caps.supported_type[port] = port_cap->supported_port_types; 297 dev->caps.suggested_type[port] = port_cap->suggested_type; 298 dev->caps.default_sense[port] = port_cap->default_sense; 299 dev->caps.trans_type[port] = port_cap->trans_type; 300 dev->caps.vendor_oui[port] = port_cap->vendor_oui; 301 dev->caps.wavelength[port] = port_cap->wavelength; 302 dev->caps.trans_code[port] = port_cap->trans_code; 303 304 return 0; 305 } 306 307 static int mlx4_dev_port(struct mlx4_dev *dev, int port, 308 struct mlx4_port_cap *port_cap) 309 { 310 int err = 0; 311 312 err = mlx4_QUERY_PORT(dev, port, port_cap); 313 314 if (err) 315 mlx4_err(dev, "QUERY_PORT command failed.\n"); 316 317 return err; 318 } 319 320 static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev) 321 { 322 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS)) 323 return; 324 325 if (mlx4_is_mfunc(dev)) { 326 mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS"); 327 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS; 328 return; 329 } 330 331 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) { 332 mlx4_dbg(dev, 333 "Keep FCS is not supported - Disabling Ignore FCS"); 334 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS; 335 return; 336 } 337 } 338 339 #define MLX4_A0_STEERING_TABLE_SIZE 256 340 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 341 { 342 int err; 343 int i; 344 345 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 346 if (err) { 347 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); 348 return err; 349 } 350 mlx4_dev_cap_dump(dev, dev_cap); 351 352 if (dev_cap->min_page_sz > PAGE_SIZE) { 353 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n", 354 dev_cap->min_page_sz, PAGE_SIZE); 355 return -ENODEV; 356 } 357 if (dev_cap->num_ports > MLX4_MAX_PORTS) { 358 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n", 359 dev_cap->num_ports, MLX4_MAX_PORTS); 360 return -ENODEV; 361 } 362 363 if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) { 364 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", 365 dev_cap->uar_size, 366 (unsigned long long) 367 pci_resource_len(dev->persist->pdev, 2)); 368 return -ENODEV; 369 } 370 371 dev->caps.num_ports = dev_cap->num_ports; 372 dev->caps.num_sys_eqs = dev_cap->num_sys_eqs; 373 dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ? 374 dev->caps.num_sys_eqs : 375 MLX4_MAX_EQ_NUM; 376 for (i = 1; i <= dev->caps.num_ports; ++i) { 377 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i); 378 if (err) { 379 mlx4_err(dev, "QUERY_PORT command failed, aborting\n"); 380 return err; 381 } 382 } 383 384 dev->caps.uar_page_size = PAGE_SIZE; 385 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; 386 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; 387 dev->caps.bf_reg_size = dev_cap->bf_reg_size; 388 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; 389 dev->caps.max_sq_sg = dev_cap->max_sq_sg; 390 dev->caps.max_rq_sg = dev_cap->max_rq_sg; 391 dev->caps.max_wqes = dev_cap->max_qp_sz; 392 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; 393 dev->caps.max_srq_wqes = dev_cap->max_srq_sz; 394 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; 395 dev->caps.reserved_srqs = dev_cap->reserved_srqs; 396 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; 397 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; 398 /* 399 * Subtract 1 from the limit because we need to allocate a 400 * spare CQE so the HCA HW can tell the difference between an 401 * empty CQ and a full CQ. 402 */ 403 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 404 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 405 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 406 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 407 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 408 409 dev->caps.reserved_pds = dev_cap->reserved_pds; 410 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 411 dev_cap->reserved_xrcds : 0; 412 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 413 dev_cap->max_xrcds : 0; 414 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; 415 416 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 417 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 418 dev->caps.flags = dev_cap->flags; 419 dev->caps.flags2 = dev_cap->flags2; 420 dev->caps.bmme_flags = dev_cap->bmme_flags; 421 dev->caps.reserved_lkey = dev_cap->reserved_lkey; 422 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 423 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 424 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; 425 426 /* Save uar page shift */ 427 if (!mlx4_is_slave(dev)) { 428 /* Virtual PCI function needs to determine UAR page size from 429 * firmware. Only master PCI function can set the uar page size 430 */ 431 if (enable_4k_uar) 432 dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; 433 else 434 dev->uar_page_shift = PAGE_SHIFT; 435 436 mlx4_set_num_reserved_uars(dev, dev_cap); 437 } 438 439 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) { 440 struct mlx4_init_hca_param hca_param; 441 442 memset(&hca_param, 0, sizeof(hca_param)); 443 err = mlx4_QUERY_HCA(dev, &hca_param); 444 /* Turn off PHV_EN flag in case phv_check_en is set. 445 * phv_check_en is a HW check that parse the packet and verify 446 * phv bit was reported correctly in the wqe. To allow QinQ 447 * PHV_EN flag should be set and phv_check_en must be cleared 448 * otherwise QinQ packets will be drop by the HW. 449 */ 450 if (err || hca_param.phv_check_en) 451 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN; 452 } 453 454 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ 455 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) 456 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 457 /* Don't do sense port on multifunction devices (for now at least) */ 458 if (mlx4_is_mfunc(dev)) 459 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 460 461 if (mlx4_low_memory_profile()) { 462 dev->caps.log_num_macs = MLX4_MIN_LOG_NUM_MAC; 463 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS; 464 } else { 465 dev->caps.log_num_macs = log_num_mac; 466 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 467 } 468 469 for (i = 1; i <= dev->caps.num_ports; ++i) { 470 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; 471 if (dev->caps.supported_type[i]) { 472 /* if only ETH is supported - assign ETH */ 473 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) 474 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; 475 /* if only IB is supported, assign IB */ 476 else if (dev->caps.supported_type[i] == 477 MLX4_PORT_TYPE_IB) 478 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; 479 else { 480 /* if IB and ETH are supported, we set the port 481 * type according to user selection of port type; 482 * if user selected none, take the FW hint */ 483 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE) 484 dev->caps.port_type[i] = dev->caps.suggested_type[i] ? 485 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; 486 else 487 dev->caps.port_type[i] = port_type_array[i - 1]; 488 } 489 } 490 /* 491 * Link sensing is allowed on the port if 3 conditions are true: 492 * 1. Both protocols are supported on the port. 493 * 2. Different types are supported on the port 494 * 3. FW declared that it supports link sensing 495 */ 496 mlx4_priv(dev)->sense.sense_allowed[i] = 497 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && 498 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 499 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); 500 501 /* 502 * If "default_sense" bit is set, we move the port to "AUTO" mode 503 * and perform sense_port FW command to try and set the correct 504 * port type from beginning 505 */ 506 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { 507 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; 508 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; 509 mlx4_SENSE_PORT(dev, i, &sensed_port); 510 if (sensed_port != MLX4_PORT_TYPE_NONE) 511 dev->caps.port_type[i] = sensed_port; 512 } else { 513 dev->caps.possible_type[i] = dev->caps.port_type[i]; 514 } 515 516 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) { 517 dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs; 518 mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n", 519 i, 1 << dev->caps.log_num_macs); 520 } 521 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) { 522 dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans; 523 mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n", 524 i, 1 << dev->caps.log_num_vlans); 525 } 526 } 527 528 if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) && 529 (port_type_array[0] == MLX4_PORT_TYPE_IB) && 530 (port_type_array[1] == MLX4_PORT_TYPE_ETH)) { 531 mlx4_warn(dev, 532 "Granular QoS per VF not supported with IB/Eth configuration\n"); 533 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP; 534 } 535 536 dev->caps.max_counters = dev_cap->max_counters; 537 538 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; 539 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = 540 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = 541 (1 << dev->caps.log_num_macs) * 542 (1 << dev->caps.log_num_vlans) * 543 dev->caps.num_ports; 544 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 545 546 if (dev_cap->dmfs_high_rate_qpn_base > 0 && 547 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) 548 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base; 549 else 550 dev->caps.dmfs_high_rate_qpn_base = 551 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; 552 553 if (dev_cap->dmfs_high_rate_qpn_range > 0 && 554 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { 555 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range; 556 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT; 557 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0; 558 } else { 559 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED; 560 dev->caps.dmfs_high_rate_qpn_base = 561 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; 562 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE; 563 } 564 565 dev->caps.rl_caps = dev_cap->rl_caps; 566 567 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] = 568 dev->caps.dmfs_high_rate_qpn_range; 569 570 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 571 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 572 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 573 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 574 575 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 576 577 if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { 578 if (dev_cap->flags & 579 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { 580 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); 581 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 582 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 583 } 584 585 if (dev_cap->flags2 & 586 (MLX4_DEV_CAP_FLAG2_CQE_STRIDE | 587 MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) { 588 mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n"); 589 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; 590 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; 591 } 592 } 593 594 if ((dev->caps.flags & 595 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && 596 mlx4_is_master(dev)) 597 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; 598 599 if (!mlx4_is_slave(dev)) { 600 mlx4_enable_cqe_eqe_stride(dev); 601 dev->caps.alloc_res_qp_mask = 602 (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) | 603 MLX4_RESERVE_A0_QP; 604 605 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) && 606 dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { 607 mlx4_warn(dev, "Old device ETS support detected\n"); 608 mlx4_warn(dev, "Consider upgrading device FW.\n"); 609 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; 610 } 611 612 } else { 613 dev->caps.alloc_res_qp_mask = 0; 614 } 615 616 mlx4_enable_ignore_fcs(dev); 617 618 return 0; 619 } 620 621 static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, 622 enum pci_bus_speed *speed, 623 enum pcie_link_width *width) 624 { 625 u32 lnkcap1, lnkcap2; 626 int err1, err2; 627 628 #define PCIE_MLW_CAP_SHIFT 4 /* start of MLW mask in link capabilities */ 629 630 *speed = PCI_SPEED_UNKNOWN; 631 *width = PCIE_LNK_WIDTH_UNKNOWN; 632 633 err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP, 634 &lnkcap1); 635 err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2, 636 &lnkcap2); 637 if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ 638 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) 639 *speed = PCIE_SPEED_8_0GT; 640 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB) 641 *speed = PCIE_SPEED_5_0GT; 642 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB) 643 *speed = PCIE_SPEED_2_5GT; 644 } 645 if (!err1) { 646 *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT; 647 if (!lnkcap2) { /* pre-r3.0 */ 648 if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB) 649 *speed = PCIE_SPEED_5_0GT; 650 else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB) 651 *speed = PCIE_SPEED_2_5GT; 652 } 653 } 654 655 if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) { 656 return err1 ? err1 : 657 err2 ? err2 : -EINVAL; 658 } 659 return 0; 660 } 661 662 static void mlx4_check_pcie_caps(struct mlx4_dev *dev) 663 { 664 enum pcie_link_width width, width_cap; 665 enum pci_bus_speed speed, speed_cap; 666 int err; 667 668 #define PCIE_SPEED_STR(speed) \ 669 (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \ 670 speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \ 671 speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \ 672 "Unknown") 673 674 err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap); 675 if (err) { 676 mlx4_warn(dev, 677 "Unable to determine PCIe device BW capabilities\n"); 678 return; 679 } 680 681 err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width); 682 if (err || speed == PCI_SPEED_UNKNOWN || 683 width == PCIE_LNK_WIDTH_UNKNOWN) { 684 mlx4_warn(dev, 685 "Unable to determine PCI device chain minimum BW\n"); 686 return; 687 } 688 689 if (width != width_cap || speed != speed_cap) 690 mlx4_warn(dev, 691 "PCIe BW is different than device's capability\n"); 692 693 mlx4_info(dev, "PCIe link speed is %s, device supports %s\n", 694 PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap)); 695 mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n", 696 width, width_cap); 697 return; 698 } 699 700 /*The function checks if there are live vf, return the num of them*/ 701 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) 702 { 703 struct mlx4_priv *priv = mlx4_priv(dev); 704 struct mlx4_slave_state *s_state; 705 int i; 706 int ret = 0; 707 708 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { 709 s_state = &priv->mfunc.master.slave_state[i]; 710 if (s_state->active && s_state->last_cmd != 711 MLX4_COMM_CMD_RESET) { 712 mlx4_warn(dev, "%s: slave: %d is still active\n", 713 __func__, i); 714 ret++; 715 } 716 } 717 return ret; 718 } 719 720 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 721 { 722 u32 qk = MLX4_RESERVED_QKEY_BASE; 723 724 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 725 qpn < dev->phys_caps.base_proxy_sqpn) 726 return -EINVAL; 727 728 if (qpn >= dev->phys_caps.base_tunnel_sqpn) 729 /* tunnel qp */ 730 qk += qpn - dev->phys_caps.base_tunnel_sqpn; 731 else 732 qk += qpn - dev->phys_caps.base_proxy_sqpn; 733 *qkey = qk; 734 return 0; 735 } 736 EXPORT_SYMBOL(mlx4_get_parav_qkey); 737 738 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) 739 { 740 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 741 742 if (!mlx4_is_master(dev)) 743 return; 744 745 priv->virt2phys_pkey[slave][port - 1][i] = val; 746 } 747 EXPORT_SYMBOL(mlx4_sync_pkey_table); 748 749 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) 750 { 751 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 752 753 if (!mlx4_is_master(dev)) 754 return; 755 756 priv->slave_node_guids[slave] = guid; 757 } 758 EXPORT_SYMBOL(mlx4_put_slave_node_guid); 759 760 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) 761 { 762 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 763 764 if (!mlx4_is_master(dev)) 765 return 0; 766 767 return priv->slave_node_guids[slave]; 768 } 769 EXPORT_SYMBOL(mlx4_get_slave_node_guid); 770 771 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 772 { 773 struct mlx4_priv *priv = mlx4_priv(dev); 774 struct mlx4_slave_state *s_slave; 775 776 if (!mlx4_is_master(dev)) 777 return 0; 778 779 s_slave = &priv->mfunc.master.slave_state[slave]; 780 return !!s_slave->active; 781 } 782 EXPORT_SYMBOL(mlx4_is_slave_active); 783 784 static void slave_adjust_steering_mode(struct mlx4_dev *dev, 785 struct mlx4_dev_cap *dev_cap, 786 struct mlx4_init_hca_param *hca_param) 787 { 788 dev->caps.steering_mode = hca_param->steering_mode; 789 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { 790 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 791 dev->caps.fs_log_max_ucast_qp_range_size = 792 dev_cap->fs_log_max_ucast_qp_range_size; 793 } else 794 dev->caps.num_qp_per_mgm = 795 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); 796 797 mlx4_dbg(dev, "Steering mode is: %s\n", 798 mlx4_steering_mode_str(dev->caps.steering_mode)); 799 } 800 801 static int mlx4_slave_cap(struct mlx4_dev *dev) 802 { 803 int err; 804 u32 page_size; 805 struct mlx4_dev_cap dev_cap; 806 struct mlx4_func_cap func_cap; 807 struct mlx4_init_hca_param hca_param; 808 u8 i; 809 810 memset(&hca_param, 0, sizeof(hca_param)); 811 err = mlx4_QUERY_HCA(dev, &hca_param); 812 if (err) { 813 mlx4_err(dev, "QUERY_HCA command failed, aborting\n"); 814 return err; 815 } 816 817 /* fail if the hca has an unknown global capability 818 * at this time global_caps should be always zeroed 819 */ 820 if (hca_param.global_caps) { 821 mlx4_err(dev, "Unknown hca global capabilities\n"); 822 return -ENOSYS; 823 } 824 825 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; 826 827 dev->caps.hca_core_clock = hca_param.hca_core_clock; 828 829 memset(&dev_cap, 0, sizeof(dev_cap)); 830 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; 831 err = mlx4_dev_cap(dev, &dev_cap); 832 if (err) { 833 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); 834 return err; 835 } 836 837 err = mlx4_QUERY_FW(dev); 838 if (err) 839 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n"); 840 841 page_size = ~dev->caps.page_size_cap + 1; 842 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); 843 if (page_size > PAGE_SIZE) { 844 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n", 845 page_size, PAGE_SIZE); 846 return -ENODEV; 847 } 848 849 /* Set uar_page_shift for VF */ 850 dev->uar_page_shift = hca_param.uar_page_sz + 12; 851 852 /* Make sure the master uar page size is valid */ 853 if (dev->uar_page_shift > PAGE_SHIFT) { 854 mlx4_err(dev, 855 "Invalid configuration: uar page size is larger than system page size\n"); 856 return -ENODEV; 857 } 858 859 /* Set reserved_uars based on the uar_page_shift */ 860 mlx4_set_num_reserved_uars(dev, &dev_cap); 861 862 /* Although uar page size in FW differs from system page size, 863 * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core) 864 * still works with assumption that uar page size == system page size 865 */ 866 dev->caps.uar_page_size = PAGE_SIZE; 867 868 memset(&func_cap, 0, sizeof(func_cap)); 869 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); 870 if (err) { 871 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n", 872 err); 873 return err; 874 } 875 876 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != 877 PF_CONTEXT_BEHAVIOUR_MASK) { 878 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", 879 func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK); 880 return -ENOSYS; 881 } 882 883 dev->caps.num_ports = func_cap.num_ports; 884 dev->quotas.qp = func_cap.qp_quota; 885 dev->quotas.srq = func_cap.srq_quota; 886 dev->quotas.cq = func_cap.cq_quota; 887 dev->quotas.mpt = func_cap.mpt_quota; 888 dev->quotas.mtt = func_cap.mtt_quota; 889 dev->caps.num_qps = 1 << hca_param.log_num_qps; 890 dev->caps.num_srqs = 1 << hca_param.log_num_srqs; 891 dev->caps.num_cqs = 1 << hca_param.log_num_cqs; 892 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; 893 dev->caps.num_eqs = func_cap.max_eq; 894 dev->caps.reserved_eqs = func_cap.reserved_eq; 895 dev->caps.reserved_lkey = func_cap.reserved_lkey; 896 dev->caps.num_pds = MLX4_NUM_PDS; 897 dev->caps.num_mgms = 0; 898 dev->caps.num_amgms = 0; 899 900 if (dev->caps.num_ports > MLX4_MAX_PORTS) { 901 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n", 902 dev->caps.num_ports, MLX4_MAX_PORTS); 903 return -ENODEV; 904 } 905 906 mlx4_replace_zero_macs(dev); 907 908 dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); 909 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 910 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 911 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 912 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 913 914 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || 915 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy || 916 !dev->caps.qp0_qkey) { 917 err = -ENOMEM; 918 goto err_mem; 919 } 920 921 for (i = 1; i <= dev->caps.num_ports; ++i) { 922 err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap); 923 if (err) { 924 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", 925 i, err); 926 goto err_mem; 927 } 928 dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey; 929 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; 930 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; 931 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; 932 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; 933 dev->caps.port_mask[i] = dev->caps.port_type[i]; 934 dev->caps.phys_port_id[i] = func_cap.phys_port_id; 935 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, 936 &dev->caps.gid_table_len[i], 937 &dev->caps.pkey_table_len[i]); 938 if (err) 939 goto err_mem; 940 } 941 942 if (dev->caps.uar_page_size * (dev->caps.num_uars - 943 dev->caps.reserved_uars) > 944 pci_resource_len(dev->persist->pdev, 945 2)) { 946 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", 947 dev->caps.uar_page_size * dev->caps.num_uars, 948 (unsigned long long) 949 pci_resource_len(dev->persist->pdev, 2)); 950 err = -ENOMEM; 951 goto err_mem; 952 } 953 954 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { 955 dev->caps.eqe_size = 64; 956 dev->caps.eqe_factor = 1; 957 } else { 958 dev->caps.eqe_size = 32; 959 dev->caps.eqe_factor = 0; 960 } 961 962 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { 963 dev->caps.cqe_size = 64; 964 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; 965 } else { 966 dev->caps.cqe_size = 32; 967 } 968 969 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { 970 dev->caps.eqe_size = hca_param.eqe_size; 971 dev->caps.eqe_factor = 0; 972 } 973 974 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { 975 dev->caps.cqe_size = hca_param.cqe_size; 976 /* User still need to know when CQE > 32B */ 977 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; 978 } 979 980 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 981 mlx4_warn(dev, "Timestamping is not supported in slave mode\n"); 982 983 slave_adjust_steering_mode(dev, &dev_cap, &hca_param); 984 mlx4_dbg(dev, "RSS support for IP fragments is %s\n", 985 hca_param.rss_ip_frags ? "on" : "off"); 986 987 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && 988 dev->caps.bf_reg_size) 989 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; 990 991 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) 992 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP; 993 994 return 0; 995 996 err_mem: 997 kfree(dev->caps.qp0_qkey); 998 kfree(dev->caps.qp0_tunnel); 999 kfree(dev->caps.qp0_proxy); 1000 kfree(dev->caps.qp1_tunnel); 1001 kfree(dev->caps.qp1_proxy); 1002 dev->caps.qp0_qkey = NULL; 1003 dev->caps.qp0_tunnel = NULL; 1004 dev->caps.qp0_proxy = NULL; 1005 dev->caps.qp1_tunnel = NULL; 1006 dev->caps.qp1_proxy = NULL; 1007 1008 return err; 1009 } 1010 1011 static void mlx4_request_modules(struct mlx4_dev *dev) 1012 { 1013 int port; 1014 int has_ib_port = false; 1015 int has_eth_port = false; 1016 #define EN_DRV_NAME "mlx4_en" 1017 #define IB_DRV_NAME "mlx4_ib" 1018 1019 for (port = 1; port <= dev->caps.num_ports; port++) { 1020 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) 1021 has_ib_port = true; 1022 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) 1023 has_eth_port = true; 1024 } 1025 1026 if (has_eth_port) 1027 request_module_nowait(EN_DRV_NAME); 1028 if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) 1029 request_module_nowait(IB_DRV_NAME); 1030 } 1031 1032 /* 1033 * Change the port configuration of the device. 1034 * Every user of this function must hold the port mutex. 1035 */ 1036 int mlx4_change_port_types(struct mlx4_dev *dev, 1037 enum mlx4_port_type *port_types) 1038 { 1039 int err = 0; 1040 int change = 0; 1041 int port; 1042 1043 for (port = 0; port < dev->caps.num_ports; port++) { 1044 /* Change the port type only if the new type is different 1045 * from the current, and not set to Auto */ 1046 if (port_types[port] != dev->caps.port_type[port + 1]) 1047 change = 1; 1048 } 1049 if (change) { 1050 mlx4_unregister_device(dev); 1051 for (port = 1; port <= dev->caps.num_ports; port++) { 1052 mlx4_CLOSE_PORT(dev, port); 1053 dev->caps.port_type[port] = port_types[port - 1]; 1054 err = mlx4_SET_PORT(dev, port, -1); 1055 if (err) { 1056 mlx4_err(dev, "Failed to set port %d, aborting\n", 1057 port); 1058 goto out; 1059 } 1060 } 1061 mlx4_set_port_mask(dev); 1062 err = mlx4_register_device(dev); 1063 if (err) { 1064 mlx4_err(dev, "Failed to register device\n"); 1065 goto out; 1066 } 1067 mlx4_request_modules(dev); 1068 } 1069 1070 out: 1071 return err; 1072 } 1073 1074 static ssize_t show_port_type(struct device *dev, 1075 struct device_attribute *attr, 1076 char *buf) 1077 { 1078 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1079 port_attr); 1080 struct mlx4_dev *mdev = info->dev; 1081 char type[8]; 1082 1083 sprintf(type, "%s", 1084 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? 1085 "ib" : "eth"); 1086 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) 1087 sprintf(buf, "auto (%s)\n", type); 1088 else 1089 sprintf(buf, "%s\n", type); 1090 1091 return strlen(buf); 1092 } 1093 1094 static int __set_port_type(struct mlx4_port_info *info, 1095 enum mlx4_port_type port_type) 1096 { 1097 struct mlx4_dev *mdev = info->dev; 1098 struct mlx4_priv *priv = mlx4_priv(mdev); 1099 enum mlx4_port_type types[MLX4_MAX_PORTS]; 1100 enum mlx4_port_type new_types[MLX4_MAX_PORTS]; 1101 int i; 1102 int err = 0; 1103 1104 mlx4_stop_sense(mdev); 1105 mutex_lock(&priv->port_mutex); 1106 info->tmp_type = port_type; 1107 1108 /* Possible type is always the one that was delivered */ 1109 mdev->caps.possible_type[info->port] = info->tmp_type; 1110 1111 for (i = 0; i < mdev->caps.num_ports; i++) { 1112 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : 1113 mdev->caps.possible_type[i+1]; 1114 if (types[i] == MLX4_PORT_TYPE_AUTO) 1115 types[i] = mdev->caps.port_type[i+1]; 1116 } 1117 1118 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 1119 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { 1120 for (i = 1; i <= mdev->caps.num_ports; i++) { 1121 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { 1122 mdev->caps.possible_type[i] = mdev->caps.port_type[i]; 1123 err = -EINVAL; 1124 } 1125 } 1126 } 1127 if (err) { 1128 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n"); 1129 goto out; 1130 } 1131 1132 mlx4_do_sense_ports(mdev, new_types, types); 1133 1134 err = mlx4_check_port_params(mdev, new_types); 1135 if (err) 1136 goto out; 1137 1138 /* We are about to apply the changes after the configuration 1139 * was verified, no need to remember the temporary types 1140 * any more */ 1141 for (i = 0; i < mdev->caps.num_ports; i++) 1142 priv->port[i + 1].tmp_type = 0; 1143 1144 err = mlx4_change_port_types(mdev, new_types); 1145 1146 out: 1147 mlx4_start_sense(mdev); 1148 mutex_unlock(&priv->port_mutex); 1149 1150 return err; 1151 } 1152 1153 static ssize_t set_port_type(struct device *dev, 1154 struct device_attribute *attr, 1155 const char *buf, size_t count) 1156 { 1157 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1158 port_attr); 1159 struct mlx4_dev *mdev = info->dev; 1160 enum mlx4_port_type port_type; 1161 static DEFINE_MUTEX(set_port_type_mutex); 1162 int err; 1163 1164 mutex_lock(&set_port_type_mutex); 1165 1166 if (!strcmp(buf, "ib\n")) { 1167 port_type = MLX4_PORT_TYPE_IB; 1168 } else if (!strcmp(buf, "eth\n")) { 1169 port_type = MLX4_PORT_TYPE_ETH; 1170 } else if (!strcmp(buf, "auto\n")) { 1171 port_type = MLX4_PORT_TYPE_AUTO; 1172 } else { 1173 mlx4_err(mdev, "%s is not supported port type\n", buf); 1174 err = -EINVAL; 1175 goto err_out; 1176 } 1177 1178 err = __set_port_type(info, port_type); 1179 1180 err_out: 1181 mutex_unlock(&set_port_type_mutex); 1182 1183 return err ? err : count; 1184 } 1185 1186 enum ibta_mtu { 1187 IB_MTU_256 = 1, 1188 IB_MTU_512 = 2, 1189 IB_MTU_1024 = 3, 1190 IB_MTU_2048 = 4, 1191 IB_MTU_4096 = 5 1192 }; 1193 1194 static inline int int_to_ibta_mtu(int mtu) 1195 { 1196 switch (mtu) { 1197 case 256: return IB_MTU_256; 1198 case 512: return IB_MTU_512; 1199 case 1024: return IB_MTU_1024; 1200 case 2048: return IB_MTU_2048; 1201 case 4096: return IB_MTU_4096; 1202 default: return -1; 1203 } 1204 } 1205 1206 static inline int ibta_mtu_to_int(enum ibta_mtu mtu) 1207 { 1208 switch (mtu) { 1209 case IB_MTU_256: return 256; 1210 case IB_MTU_512: return 512; 1211 case IB_MTU_1024: return 1024; 1212 case IB_MTU_2048: return 2048; 1213 case IB_MTU_4096: return 4096; 1214 default: return -1; 1215 } 1216 } 1217 1218 static ssize_t show_port_ib_mtu(struct device *dev, 1219 struct device_attribute *attr, 1220 char *buf) 1221 { 1222 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1223 port_mtu_attr); 1224 struct mlx4_dev *mdev = info->dev; 1225 1226 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) 1227 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 1228 1229 sprintf(buf, "%d\n", 1230 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); 1231 return strlen(buf); 1232 } 1233 1234 static ssize_t set_port_ib_mtu(struct device *dev, 1235 struct device_attribute *attr, 1236 const char *buf, size_t count) 1237 { 1238 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1239 port_mtu_attr); 1240 struct mlx4_dev *mdev = info->dev; 1241 struct mlx4_priv *priv = mlx4_priv(mdev); 1242 int err, port, mtu, ibta_mtu = -1; 1243 1244 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { 1245 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 1246 return -EINVAL; 1247 } 1248 1249 err = kstrtoint(buf, 0, &mtu); 1250 if (!err) 1251 ibta_mtu = int_to_ibta_mtu(mtu); 1252 1253 if (err || ibta_mtu < 0) { 1254 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); 1255 return -EINVAL; 1256 } 1257 1258 mdev->caps.port_ib_mtu[info->port] = ibta_mtu; 1259 1260 mlx4_stop_sense(mdev); 1261 mutex_lock(&priv->port_mutex); 1262 mlx4_unregister_device(mdev); 1263 for (port = 1; port <= mdev->caps.num_ports; port++) { 1264 mlx4_CLOSE_PORT(mdev, port); 1265 err = mlx4_SET_PORT(mdev, port, -1); 1266 if (err) { 1267 mlx4_err(mdev, "Failed to set port %d, aborting\n", 1268 port); 1269 goto err_set_port; 1270 } 1271 } 1272 err = mlx4_register_device(mdev); 1273 err_set_port: 1274 mutex_unlock(&priv->port_mutex); 1275 mlx4_start_sense(mdev); 1276 return err ? err : count; 1277 } 1278 1279 /* bond for multi-function device */ 1280 #define MAX_MF_BOND_ALLOWED_SLAVES 63 1281 static int mlx4_mf_bond(struct mlx4_dev *dev) 1282 { 1283 int err = 0; 1284 int nvfs; 1285 struct mlx4_slaves_pport slaves_port1; 1286 struct mlx4_slaves_pport slaves_port2; 1287 DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); 1288 1289 slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1); 1290 slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2); 1291 bitmap_and(slaves_port_1_2, 1292 slaves_port1.slaves, slaves_port2.slaves, 1293 dev->persist->num_vfs + 1); 1294 1295 /* only single port vfs are allowed */ 1296 if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) { 1297 mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n"); 1298 return -EINVAL; 1299 } 1300 1301 /* number of virtual functions is number of total functions minus one 1302 * physical function for each port. 1303 */ 1304 nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) + 1305 bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2; 1306 1307 /* limit on maximum allowed VFs */ 1308 if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) { 1309 mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n", 1310 nvfs, MAX_MF_BOND_ALLOWED_SLAVES); 1311 return -EINVAL; 1312 } 1313 1314 if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { 1315 mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n"); 1316 return -EINVAL; 1317 } 1318 1319 err = mlx4_bond_mac_table(dev); 1320 if (err) 1321 return err; 1322 err = mlx4_bond_vlan_table(dev); 1323 if (err) 1324 goto err1; 1325 err = mlx4_bond_fs_rules(dev); 1326 if (err) 1327 goto err2; 1328 1329 return 0; 1330 err2: 1331 (void)mlx4_unbond_vlan_table(dev); 1332 err1: 1333 (void)mlx4_unbond_mac_table(dev); 1334 return err; 1335 } 1336 1337 static int mlx4_mf_unbond(struct mlx4_dev *dev) 1338 { 1339 int ret, ret1; 1340 1341 ret = mlx4_unbond_fs_rules(dev); 1342 if (ret) 1343 mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret); 1344 ret1 = mlx4_unbond_mac_table(dev); 1345 if (ret1) { 1346 mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1); 1347 ret = ret1; 1348 } 1349 ret1 = mlx4_unbond_vlan_table(dev); 1350 if (ret1) { 1351 mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1); 1352 ret = ret1; 1353 } 1354 return ret; 1355 } 1356 1357 int mlx4_bond(struct mlx4_dev *dev) 1358 { 1359 int ret = 0; 1360 struct mlx4_priv *priv = mlx4_priv(dev); 1361 1362 mutex_lock(&priv->bond_mutex); 1363 1364 if (!mlx4_is_bonded(dev)) { 1365 ret = mlx4_do_bond(dev, true); 1366 if (ret) 1367 mlx4_err(dev, "Failed to bond device: %d\n", ret); 1368 if (!ret && mlx4_is_master(dev)) { 1369 ret = mlx4_mf_bond(dev); 1370 if (ret) { 1371 mlx4_err(dev, "bond for multifunction failed\n"); 1372 mlx4_do_bond(dev, false); 1373 } 1374 } 1375 } 1376 1377 mutex_unlock(&priv->bond_mutex); 1378 if (!ret) 1379 mlx4_dbg(dev, "Device is bonded\n"); 1380 1381 return ret; 1382 } 1383 EXPORT_SYMBOL_GPL(mlx4_bond); 1384 1385 int mlx4_unbond(struct mlx4_dev *dev) 1386 { 1387 int ret = 0; 1388 struct mlx4_priv *priv = mlx4_priv(dev); 1389 1390 mutex_lock(&priv->bond_mutex); 1391 1392 if (mlx4_is_bonded(dev)) { 1393 int ret2 = 0; 1394 1395 ret = mlx4_do_bond(dev, false); 1396 if (ret) 1397 mlx4_err(dev, "Failed to unbond device: %d\n", ret); 1398 if (mlx4_is_master(dev)) 1399 ret2 = mlx4_mf_unbond(dev); 1400 if (ret2) { 1401 mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2); 1402 ret = ret2; 1403 } 1404 } 1405 1406 mutex_unlock(&priv->bond_mutex); 1407 if (!ret) 1408 mlx4_dbg(dev, "Device is unbonded\n"); 1409 1410 return ret; 1411 } 1412 EXPORT_SYMBOL_GPL(mlx4_unbond); 1413 1414 1415 int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) 1416 { 1417 u8 port1 = v2p->port1; 1418 u8 port2 = v2p->port2; 1419 struct mlx4_priv *priv = mlx4_priv(dev); 1420 int err; 1421 1422 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) 1423 return -ENOTSUPP; 1424 1425 mutex_lock(&priv->bond_mutex); 1426 1427 /* zero means keep current mapping for this port */ 1428 if (port1 == 0) 1429 port1 = priv->v2p.port1; 1430 if (port2 == 0) 1431 port2 = priv->v2p.port2; 1432 1433 if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) || 1434 (port2 < 1) || (port2 > MLX4_MAX_PORTS) || 1435 (port1 == 2 && port2 == 1)) { 1436 /* besides boundary checks cross mapping makes 1437 * no sense and therefore not allowed */ 1438 err = -EINVAL; 1439 } else if ((port1 == priv->v2p.port1) && 1440 (port2 == priv->v2p.port2)) { 1441 err = 0; 1442 } else { 1443 err = mlx4_virt2phy_port_map(dev, port1, port2); 1444 if (!err) { 1445 mlx4_dbg(dev, "port map changed: [%d][%d]\n", 1446 port1, port2); 1447 priv->v2p.port1 = port1; 1448 priv->v2p.port2 = port2; 1449 } else { 1450 mlx4_err(dev, "Failed to change port mape: %d\n", err); 1451 } 1452 } 1453 1454 mutex_unlock(&priv->bond_mutex); 1455 return err; 1456 } 1457 EXPORT_SYMBOL_GPL(mlx4_port_map_set); 1458 1459 static int mlx4_load_fw(struct mlx4_dev *dev) 1460 { 1461 struct mlx4_priv *priv = mlx4_priv(dev); 1462 int err; 1463 1464 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 1465 GFP_HIGHUSER | __GFP_NOWARN, 0); 1466 if (!priv->fw.fw_icm) { 1467 mlx4_err(dev, "Couldn't allocate FW area, aborting\n"); 1468 return -ENOMEM; 1469 } 1470 1471 err = mlx4_MAP_FA(dev, priv->fw.fw_icm); 1472 if (err) { 1473 mlx4_err(dev, "MAP_FA command failed, aborting\n"); 1474 goto err_free; 1475 } 1476 1477 err = mlx4_RUN_FW(dev); 1478 if (err) { 1479 mlx4_err(dev, "RUN_FW command failed, aborting\n"); 1480 goto err_unmap_fa; 1481 } 1482 1483 return 0; 1484 1485 err_unmap_fa: 1486 mlx4_UNMAP_FA(dev); 1487 1488 err_free: 1489 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1490 return err; 1491 } 1492 1493 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, 1494 int cmpt_entry_sz) 1495 { 1496 struct mlx4_priv *priv = mlx4_priv(dev); 1497 int err; 1498 int num_eqs; 1499 1500 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, 1501 cmpt_base + 1502 ((u64) (MLX4_CMPT_TYPE_QP * 1503 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1504 cmpt_entry_sz, dev->caps.num_qps, 1505 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1506 0, 0); 1507 if (err) 1508 goto err; 1509 1510 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, 1511 cmpt_base + 1512 ((u64) (MLX4_CMPT_TYPE_SRQ * 1513 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1514 cmpt_entry_sz, dev->caps.num_srqs, 1515 dev->caps.reserved_srqs, 0, 0); 1516 if (err) 1517 goto err_qp; 1518 1519 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, 1520 cmpt_base + 1521 ((u64) (MLX4_CMPT_TYPE_CQ * 1522 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1523 cmpt_entry_sz, dev->caps.num_cqs, 1524 dev->caps.reserved_cqs, 0, 0); 1525 if (err) 1526 goto err_srq; 1527 1528 num_eqs = dev->phys_caps.num_phys_eqs; 1529 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1530 cmpt_base + 1531 ((u64) (MLX4_CMPT_TYPE_EQ * 1532 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1533 cmpt_entry_sz, num_eqs, num_eqs, 0, 0); 1534 if (err) 1535 goto err_cq; 1536 1537 return 0; 1538 1539 err_cq: 1540 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1541 1542 err_srq: 1543 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1544 1545 err_qp: 1546 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1547 1548 err: 1549 return err; 1550 } 1551 1552 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 1553 struct mlx4_init_hca_param *init_hca, u64 icm_size) 1554 { 1555 struct mlx4_priv *priv = mlx4_priv(dev); 1556 u64 aux_pages; 1557 int num_eqs; 1558 int err; 1559 1560 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); 1561 if (err) { 1562 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n"); 1563 return err; 1564 } 1565 1566 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n", 1567 (unsigned long long) icm_size >> 10, 1568 (unsigned long long) aux_pages << 2); 1569 1570 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 1571 GFP_HIGHUSER | __GFP_NOWARN, 0); 1572 if (!priv->fw.aux_icm) { 1573 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n"); 1574 return -ENOMEM; 1575 } 1576 1577 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); 1578 if (err) { 1579 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n"); 1580 goto err_free_aux; 1581 } 1582 1583 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); 1584 if (err) { 1585 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n"); 1586 goto err_unmap_aux; 1587 } 1588 1589 1590 num_eqs = dev->phys_caps.num_phys_eqs; 1591 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1592 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1593 num_eqs, num_eqs, 0, 0); 1594 if (err) { 1595 mlx4_err(dev, "Failed to map EQ context memory, aborting\n"); 1596 goto err_unmap_cmpt; 1597 } 1598 1599 /* 1600 * Reserved MTT entries must be aligned up to a cacheline 1601 * boundary, since the FW will write to them, while the driver 1602 * writes to all other MTT entries. (The variable 1603 * dev->caps.mtt_entry_sz below is really the MTT segment 1604 * size, not the raw entry size) 1605 */ 1606 dev->caps.reserved_mtts = 1607 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, 1608 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; 1609 1610 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 1611 init_hca->mtt_base, 1612 dev->caps.mtt_entry_sz, 1613 dev->caps.num_mtts, 1614 dev->caps.reserved_mtts, 1, 0); 1615 if (err) { 1616 mlx4_err(dev, "Failed to map MTT context memory, aborting\n"); 1617 goto err_unmap_eq; 1618 } 1619 1620 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, 1621 init_hca->dmpt_base, 1622 dev_cap->dmpt_entry_sz, 1623 dev->caps.num_mpts, 1624 dev->caps.reserved_mrws, 1, 1); 1625 if (err) { 1626 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n"); 1627 goto err_unmap_mtt; 1628 } 1629 1630 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, 1631 init_hca->qpc_base, 1632 dev_cap->qpc_entry_sz, 1633 dev->caps.num_qps, 1634 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1635 0, 0); 1636 if (err) { 1637 mlx4_err(dev, "Failed to map QP context memory, aborting\n"); 1638 goto err_unmap_dmpt; 1639 } 1640 1641 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, 1642 init_hca->auxc_base, 1643 dev_cap->aux_entry_sz, 1644 dev->caps.num_qps, 1645 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1646 0, 0); 1647 if (err) { 1648 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n"); 1649 goto err_unmap_qp; 1650 } 1651 1652 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, 1653 init_hca->altc_base, 1654 dev_cap->altc_entry_sz, 1655 dev->caps.num_qps, 1656 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1657 0, 0); 1658 if (err) { 1659 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n"); 1660 goto err_unmap_auxc; 1661 } 1662 1663 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, 1664 init_hca->rdmarc_base, 1665 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 1666 dev->caps.num_qps, 1667 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1668 0, 0); 1669 if (err) { 1670 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 1671 goto err_unmap_altc; 1672 } 1673 1674 err = mlx4_init_icm_table(dev, &priv->cq_table.table, 1675 init_hca->cqc_base, 1676 dev_cap->cqc_entry_sz, 1677 dev->caps.num_cqs, 1678 dev->caps.reserved_cqs, 0, 0); 1679 if (err) { 1680 mlx4_err(dev, "Failed to map CQ context memory, aborting\n"); 1681 goto err_unmap_rdmarc; 1682 } 1683 1684 err = mlx4_init_icm_table(dev, &priv->srq_table.table, 1685 init_hca->srqc_base, 1686 dev_cap->srq_entry_sz, 1687 dev->caps.num_srqs, 1688 dev->caps.reserved_srqs, 0, 0); 1689 if (err) { 1690 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n"); 1691 goto err_unmap_cq; 1692 } 1693 1694 /* 1695 * For flow steering device managed mode it is required to use 1696 * mlx4_init_icm_table. For B0 steering mode it's not strictly 1697 * required, but for simplicity just map the whole multicast 1698 * group table now. The table isn't very big and it's a lot 1699 * easier than trying to track ref counts. 1700 */ 1701 err = mlx4_init_icm_table(dev, &priv->mcg_table.table, 1702 init_hca->mc_base, 1703 mlx4_get_mgm_entry_size(dev), 1704 dev->caps.num_mgms + dev->caps.num_amgms, 1705 dev->caps.num_mgms + dev->caps.num_amgms, 1706 0, 0); 1707 if (err) { 1708 mlx4_err(dev, "Failed to map MCG context memory, aborting\n"); 1709 goto err_unmap_srq; 1710 } 1711 1712 return 0; 1713 1714 err_unmap_srq: 1715 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1716 1717 err_unmap_cq: 1718 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1719 1720 err_unmap_rdmarc: 1721 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1722 1723 err_unmap_altc: 1724 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1725 1726 err_unmap_auxc: 1727 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1728 1729 err_unmap_qp: 1730 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1731 1732 err_unmap_dmpt: 1733 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1734 1735 err_unmap_mtt: 1736 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1737 1738 err_unmap_eq: 1739 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1740 1741 err_unmap_cmpt: 1742 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1743 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1744 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1745 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1746 1747 err_unmap_aux: 1748 mlx4_UNMAP_ICM_AUX(dev); 1749 1750 err_free_aux: 1751 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1752 1753 return err; 1754 } 1755 1756 static void mlx4_free_icms(struct mlx4_dev *dev) 1757 { 1758 struct mlx4_priv *priv = mlx4_priv(dev); 1759 1760 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); 1761 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1762 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1763 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1764 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1765 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1766 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1767 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1768 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1769 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1770 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1771 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1772 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1773 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1774 1775 mlx4_UNMAP_ICM_AUX(dev); 1776 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1777 } 1778 1779 static void mlx4_slave_exit(struct mlx4_dev *dev) 1780 { 1781 struct mlx4_priv *priv = mlx4_priv(dev); 1782 1783 mutex_lock(&priv->cmd.slave_cmd_mutex); 1784 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 1785 MLX4_COMM_TIME)) 1786 mlx4_warn(dev, "Failed to close slave function\n"); 1787 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1788 } 1789 1790 static int map_bf_area(struct mlx4_dev *dev) 1791 { 1792 struct mlx4_priv *priv = mlx4_priv(dev); 1793 resource_size_t bf_start; 1794 resource_size_t bf_len; 1795 int err = 0; 1796 1797 if (!dev->caps.bf_reg_size) 1798 return -ENXIO; 1799 1800 bf_start = pci_resource_start(dev->persist->pdev, 2) + 1801 (dev->caps.num_uars << PAGE_SHIFT); 1802 bf_len = pci_resource_len(dev->persist->pdev, 2) - 1803 (dev->caps.num_uars << PAGE_SHIFT); 1804 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); 1805 if (!priv->bf_mapping) 1806 err = -ENOMEM; 1807 1808 return err; 1809 } 1810 1811 static void unmap_bf_area(struct mlx4_dev *dev) 1812 { 1813 if (mlx4_priv(dev)->bf_mapping) 1814 io_mapping_free(mlx4_priv(dev)->bf_mapping); 1815 } 1816 1817 cycle_t mlx4_read_clock(struct mlx4_dev *dev) 1818 { 1819 u32 clockhi, clocklo, clockhi1; 1820 cycle_t cycles; 1821 int i; 1822 struct mlx4_priv *priv = mlx4_priv(dev); 1823 1824 for (i = 0; i < 10; i++) { 1825 clockhi = swab32(readl(priv->clock_mapping)); 1826 clocklo = swab32(readl(priv->clock_mapping + 4)); 1827 clockhi1 = swab32(readl(priv->clock_mapping)); 1828 if (clockhi == clockhi1) 1829 break; 1830 } 1831 1832 cycles = (u64) clockhi << 32 | (u64) clocklo; 1833 1834 return cycles; 1835 } 1836 EXPORT_SYMBOL_GPL(mlx4_read_clock); 1837 1838 1839 static int map_internal_clock(struct mlx4_dev *dev) 1840 { 1841 struct mlx4_priv *priv = mlx4_priv(dev); 1842 1843 priv->clock_mapping = 1844 ioremap(pci_resource_start(dev->persist->pdev, 1845 priv->fw.clock_bar) + 1846 priv->fw.clock_offset, MLX4_CLOCK_SIZE); 1847 1848 if (!priv->clock_mapping) 1849 return -ENOMEM; 1850 1851 return 0; 1852 } 1853 1854 int mlx4_get_internal_clock_params(struct mlx4_dev *dev, 1855 struct mlx4_clock_params *params) 1856 { 1857 struct mlx4_priv *priv = mlx4_priv(dev); 1858 1859 if (mlx4_is_slave(dev)) 1860 return -ENOTSUPP; 1861 1862 if (!params) 1863 return -EINVAL; 1864 1865 params->bar = priv->fw.clock_bar; 1866 params->offset = priv->fw.clock_offset; 1867 params->size = MLX4_CLOCK_SIZE; 1868 1869 return 0; 1870 } 1871 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); 1872 1873 static void unmap_internal_clock(struct mlx4_dev *dev) 1874 { 1875 struct mlx4_priv *priv = mlx4_priv(dev); 1876 1877 if (priv->clock_mapping) 1878 iounmap(priv->clock_mapping); 1879 } 1880 1881 static void mlx4_close_hca(struct mlx4_dev *dev) 1882 { 1883 unmap_internal_clock(dev); 1884 unmap_bf_area(dev); 1885 if (mlx4_is_slave(dev)) 1886 mlx4_slave_exit(dev); 1887 else { 1888 mlx4_CLOSE_HCA(dev, 0); 1889 mlx4_free_icms(dev); 1890 } 1891 } 1892 1893 static void mlx4_close_fw(struct mlx4_dev *dev) 1894 { 1895 if (!mlx4_is_slave(dev)) { 1896 mlx4_UNMAP_FA(dev); 1897 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); 1898 } 1899 } 1900 1901 static int mlx4_comm_check_offline(struct mlx4_dev *dev) 1902 { 1903 #define COMM_CHAN_OFFLINE_OFFSET 0x09 1904 1905 u32 comm_flags; 1906 u32 offline_bit; 1907 unsigned long end; 1908 struct mlx4_priv *priv = mlx4_priv(dev); 1909 1910 end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies; 1911 while (time_before(jiffies, end)) { 1912 comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + 1913 MLX4_COMM_CHAN_FLAGS)); 1914 offline_bit = (comm_flags & 1915 (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); 1916 if (!offline_bit) 1917 return 0; 1918 /* There are cases as part of AER/Reset flow that PF needs 1919 * around 100 msec to load. We therefore sleep for 100 msec 1920 * to allow other tasks to make use of that CPU during this 1921 * time interval. 1922 */ 1923 msleep(100); 1924 } 1925 mlx4_err(dev, "Communication channel is offline.\n"); 1926 return -EIO; 1927 } 1928 1929 static void mlx4_reset_vf_support(struct mlx4_dev *dev) 1930 { 1931 #define COMM_CHAN_RST_OFFSET 0x1e 1932 1933 struct mlx4_priv *priv = mlx4_priv(dev); 1934 u32 comm_rst; 1935 u32 comm_caps; 1936 1937 comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm + 1938 MLX4_COMM_CHAN_CAPS)); 1939 comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET)); 1940 1941 if (comm_rst) 1942 dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET; 1943 } 1944 1945 static int mlx4_init_slave(struct mlx4_dev *dev) 1946 { 1947 struct mlx4_priv *priv = mlx4_priv(dev); 1948 u64 dma = (u64) priv->mfunc.vhcr_dma; 1949 int ret_from_reset = 0; 1950 u32 slave_read; 1951 u32 cmd_channel_ver; 1952 1953 if (atomic_read(&pf_loading)) { 1954 mlx4_warn(dev, "PF is not ready - Deferring probe\n"); 1955 return -EPROBE_DEFER; 1956 } 1957 1958 mutex_lock(&priv->cmd.slave_cmd_mutex); 1959 priv->cmd.max_cmds = 1; 1960 if (mlx4_comm_check_offline(dev)) { 1961 mlx4_err(dev, "PF is not responsive, skipping initialization\n"); 1962 goto err_offline; 1963 } 1964 1965 mlx4_reset_vf_support(dev); 1966 mlx4_warn(dev, "Sending reset\n"); 1967 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1968 MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME); 1969 /* if we are in the middle of flr the slave will try 1970 * NUM_OF_RESET_RETRIES times before leaving.*/ 1971 if (ret_from_reset) { 1972 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { 1973 mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n"); 1974 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1975 return -EPROBE_DEFER; 1976 } else 1977 goto err; 1978 } 1979 1980 /* check the driver version - the slave I/F revision 1981 * must match the master's */ 1982 slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); 1983 cmd_channel_ver = mlx4_comm_get_version(); 1984 1985 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != 1986 MLX4_COMM_GET_IF_REV(slave_read)) { 1987 mlx4_err(dev, "slave driver version is not supported by the master\n"); 1988 goto err; 1989 } 1990 1991 mlx4_warn(dev, "Sending vhcr0\n"); 1992 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, 1993 MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) 1994 goto err; 1995 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, 1996 MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) 1997 goto err; 1998 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, 1999 MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) 2000 goto err; 2001 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, 2002 MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) 2003 goto err; 2004 2005 mutex_unlock(&priv->cmd.slave_cmd_mutex); 2006 return 0; 2007 2008 err: 2009 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0); 2010 err_offline: 2011 mutex_unlock(&priv->cmd.slave_cmd_mutex); 2012 return -EIO; 2013 } 2014 2015 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) 2016 { 2017 int i; 2018 2019 for (i = 1; i <= dev->caps.num_ports; i++) { 2020 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) 2021 dev->caps.gid_table_len[i] = 2022 mlx4_get_slave_num_gids(dev, 0, i); 2023 else 2024 dev->caps.gid_table_len[i] = 1; 2025 dev->caps.pkey_table_len[i] = 2026 dev->phys_caps.pkey_phys_table_len[i] - 1; 2027 } 2028 } 2029 2030 static int choose_log_fs_mgm_entry_size(int qp_per_entry) 2031 { 2032 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; 2033 2034 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; 2035 i++) { 2036 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) 2037 break; 2038 } 2039 2040 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; 2041 } 2042 2043 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode) 2044 { 2045 switch (dmfs_high_steer_mode) { 2046 case MLX4_STEERING_DMFS_A0_DEFAULT: 2047 return "default performance"; 2048 2049 case MLX4_STEERING_DMFS_A0_DYNAMIC: 2050 return "dynamic hybrid mode"; 2051 2052 case MLX4_STEERING_DMFS_A0_STATIC: 2053 return "performance optimized for limited rule configuration (static)"; 2054 2055 case MLX4_STEERING_DMFS_A0_DISABLE: 2056 return "disabled performance optimized steering"; 2057 2058 case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED: 2059 return "performance optimized steering not supported"; 2060 2061 default: 2062 return "Unrecognized mode"; 2063 } 2064 } 2065 2066 #define MLX4_DMFS_A0_STEERING (1UL << 2) 2067 2068 static void choose_steering_mode(struct mlx4_dev *dev, 2069 struct mlx4_dev_cap *dev_cap) 2070 { 2071 if (mlx4_log_num_mgm_entry_size <= 0) { 2072 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) { 2073 if (dev->caps.dmfs_high_steer_mode == 2074 MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) 2075 mlx4_err(dev, "DMFS high rate mode not supported\n"); 2076 else 2077 dev->caps.dmfs_high_steer_mode = 2078 MLX4_STEERING_DMFS_A0_STATIC; 2079 } 2080 } 2081 2082 if (mlx4_log_num_mgm_entry_size <= 0 && 2083 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && 2084 (!mlx4_is_mfunc(dev) || 2085 (dev_cap->fs_max_num_qp_per_entry >= 2086 (dev->persist->num_vfs + 1))) && 2087 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= 2088 MLX4_MIN_MGM_LOG_ENTRY_SIZE) { 2089 dev->oper_log_mgm_entry_size = 2090 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); 2091 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; 2092 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 2093 dev->caps.fs_log_max_ucast_qp_range_size = 2094 dev_cap->fs_log_max_ucast_qp_range_size; 2095 } else { 2096 if (dev->caps.dmfs_high_steer_mode != 2097 MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) 2098 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE; 2099 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && 2100 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 2101 dev->caps.steering_mode = MLX4_STEERING_MODE_B0; 2102 else { 2103 dev->caps.steering_mode = MLX4_STEERING_MODE_A0; 2104 2105 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || 2106 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 2107 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n"); 2108 } 2109 dev->oper_log_mgm_entry_size = 2110 mlx4_log_num_mgm_entry_size > 0 ? 2111 mlx4_log_num_mgm_entry_size : 2112 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 2113 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); 2114 } 2115 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n", 2116 mlx4_steering_mode_str(dev->caps.steering_mode), 2117 dev->oper_log_mgm_entry_size, 2118 mlx4_log_num_mgm_entry_size); 2119 } 2120 2121 static void choose_tunnel_offload_mode(struct mlx4_dev *dev, 2122 struct mlx4_dev_cap *dev_cap) 2123 { 2124 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && 2125 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) 2126 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN; 2127 else 2128 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE; 2129 2130 mlx4_dbg(dev, "Tunneling offload mode is: %s\n", (dev->caps.tunnel_offload_mode 2131 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none"); 2132 } 2133 2134 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev) 2135 { 2136 int i; 2137 struct mlx4_port_cap port_cap; 2138 2139 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) 2140 return -EINVAL; 2141 2142 for (i = 1; i <= dev->caps.num_ports; i++) { 2143 if (mlx4_dev_port(dev, i, &port_cap)) { 2144 mlx4_err(dev, 2145 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n"); 2146 } else if ((dev->caps.dmfs_high_steer_mode != 2147 MLX4_STEERING_DMFS_A0_DEFAULT) && 2148 (port_cap.dmfs_optimized_state == 2149 !!(dev->caps.dmfs_high_steer_mode == 2150 MLX4_STEERING_DMFS_A0_DISABLE))) { 2151 mlx4_err(dev, 2152 "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n", 2153 dmfs_high_rate_steering_mode_str( 2154 dev->caps.dmfs_high_steer_mode), 2155 (port_cap.dmfs_optimized_state ? 2156 "enabled" : "disabled")); 2157 } 2158 } 2159 2160 return 0; 2161 } 2162 2163 static int mlx4_init_fw(struct mlx4_dev *dev) 2164 { 2165 struct mlx4_mod_stat_cfg mlx4_cfg; 2166 int err = 0; 2167 2168 if (!mlx4_is_slave(dev)) { 2169 err = mlx4_QUERY_FW(dev); 2170 if (err) { 2171 if (err == -EACCES) 2172 mlx4_info(dev, "non-primary physical function, skipping\n"); 2173 else 2174 mlx4_err(dev, "QUERY_FW command failed, aborting\n"); 2175 return err; 2176 } 2177 2178 err = mlx4_load_fw(dev); 2179 if (err) { 2180 mlx4_err(dev, "Failed to start FW, aborting\n"); 2181 return err; 2182 } 2183 2184 mlx4_cfg.log_pg_sz_m = 1; 2185 mlx4_cfg.log_pg_sz = 0; 2186 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); 2187 if (err) 2188 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); 2189 } 2190 2191 return err; 2192 } 2193 2194 static int mlx4_init_hca(struct mlx4_dev *dev) 2195 { 2196 struct mlx4_priv *priv = mlx4_priv(dev); 2197 struct mlx4_adapter adapter; 2198 struct mlx4_dev_cap dev_cap; 2199 struct mlx4_profile profile; 2200 struct mlx4_init_hca_param init_hca; 2201 u64 icm_size; 2202 struct mlx4_config_dev_params params; 2203 int err; 2204 2205 if (!mlx4_is_slave(dev)) { 2206 err = mlx4_dev_cap(dev, &dev_cap); 2207 if (err) { 2208 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); 2209 return err; 2210 } 2211 2212 choose_steering_mode(dev, &dev_cap); 2213 choose_tunnel_offload_mode(dev, &dev_cap); 2214 2215 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC && 2216 mlx4_is_master(dev)) 2217 dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC; 2218 2219 err = mlx4_get_phys_port_id(dev); 2220 if (err) 2221 mlx4_err(dev, "Fail to get physical port id\n"); 2222 2223 if (mlx4_is_master(dev)) 2224 mlx4_parav_master_pf_caps(dev); 2225 2226 if (mlx4_low_memory_profile()) { 2227 mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n"); 2228 profile = low_mem_profile; 2229 } else { 2230 profile = default_profile; 2231 } 2232 if (dev->caps.steering_mode == 2233 MLX4_STEERING_MODE_DEVICE_MANAGED) 2234 profile.num_mcg = MLX4_FS_NUM_MCG; 2235 2236 icm_size = mlx4_make_profile(dev, &profile, &dev_cap, 2237 &init_hca); 2238 if ((long long) icm_size < 0) { 2239 err = icm_size; 2240 return err; 2241 } 2242 2243 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; 2244 2245 if (enable_4k_uar) { 2246 init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + 2247 PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; 2248 init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; 2249 } else { 2250 init_hca.log_uar_sz = ilog2(dev->caps.num_uars); 2251 init_hca.uar_page_sz = PAGE_SHIFT - 12; 2252 } 2253 2254 init_hca.mw_enabled = 0; 2255 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || 2256 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) 2257 init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE; 2258 2259 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); 2260 if (err) 2261 return err; 2262 2263 err = mlx4_INIT_HCA(dev, &init_hca); 2264 if (err) { 2265 mlx4_err(dev, "INIT_HCA command failed, aborting\n"); 2266 goto err_free_icm; 2267 } 2268 2269 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { 2270 err = mlx4_query_func(dev, &dev_cap); 2271 if (err < 0) { 2272 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); 2273 goto err_close; 2274 } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { 2275 dev->caps.num_eqs = dev_cap.max_eqs; 2276 dev->caps.reserved_eqs = dev_cap.reserved_eqs; 2277 dev->caps.reserved_uars = dev_cap.reserved_uars; 2278 } 2279 } 2280 2281 /* 2282 * If TS is supported by FW 2283 * read HCA frequency by QUERY_HCA command 2284 */ 2285 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { 2286 memset(&init_hca, 0, sizeof(init_hca)); 2287 err = mlx4_QUERY_HCA(dev, &init_hca); 2288 if (err) { 2289 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n"); 2290 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2291 } else { 2292 dev->caps.hca_core_clock = 2293 init_hca.hca_core_clock; 2294 } 2295 2296 /* In case we got HCA frequency 0 - disable timestamping 2297 * to avoid dividing by zero 2298 */ 2299 if (!dev->caps.hca_core_clock) { 2300 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2301 mlx4_err(dev, 2302 "HCA frequency is 0 - timestamping is not supported\n"); 2303 } else if (map_internal_clock(dev)) { 2304 /* 2305 * Map internal clock, 2306 * in case of failure disable timestamping 2307 */ 2308 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2309 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n"); 2310 } 2311 } 2312 2313 if (dev->caps.dmfs_high_steer_mode != 2314 MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) { 2315 if (mlx4_validate_optimized_steering(dev)) 2316 mlx4_warn(dev, "Optimized steering validation failed\n"); 2317 2318 if (dev->caps.dmfs_high_steer_mode == 2319 MLX4_STEERING_DMFS_A0_DISABLE) { 2320 dev->caps.dmfs_high_rate_qpn_base = 2321 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; 2322 dev->caps.dmfs_high_rate_qpn_range = 2323 MLX4_A0_STEERING_TABLE_SIZE; 2324 } 2325 2326 mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n", 2327 dmfs_high_rate_steering_mode_str( 2328 dev->caps.dmfs_high_steer_mode)); 2329 } 2330 } else { 2331 err = mlx4_init_slave(dev); 2332 if (err) { 2333 if (err != -EPROBE_DEFER) 2334 mlx4_err(dev, "Failed to initialize slave\n"); 2335 return err; 2336 } 2337 2338 err = mlx4_slave_cap(dev); 2339 if (err) { 2340 mlx4_err(dev, "Failed to obtain slave caps\n"); 2341 goto err_close; 2342 } 2343 } 2344 2345 if (map_bf_area(dev)) 2346 mlx4_dbg(dev, "Failed to map blue flame area\n"); 2347 2348 /*Only the master set the ports, all the rest got it from it.*/ 2349 if (!mlx4_is_slave(dev)) 2350 mlx4_set_port_mask(dev); 2351 2352 err = mlx4_QUERY_ADAPTER(dev, &adapter); 2353 if (err) { 2354 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n"); 2355 goto unmap_bf; 2356 } 2357 2358 /* Query CONFIG_DEV parameters */ 2359 err = mlx4_config_dev_retrieval(dev, ¶ms); 2360 if (err && err != -ENOTSUPP) { 2361 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n"); 2362 } else if (!err) { 2363 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1; 2364 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2; 2365 } 2366 priv->eq_table.inta_pin = adapter.inta_pin; 2367 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); 2368 2369 return 0; 2370 2371 unmap_bf: 2372 unmap_internal_clock(dev); 2373 unmap_bf_area(dev); 2374 2375 if (mlx4_is_slave(dev)) { 2376 kfree(dev->caps.qp0_qkey); 2377 kfree(dev->caps.qp0_tunnel); 2378 kfree(dev->caps.qp0_proxy); 2379 kfree(dev->caps.qp1_tunnel); 2380 kfree(dev->caps.qp1_proxy); 2381 } 2382 2383 err_close: 2384 if (mlx4_is_slave(dev)) 2385 mlx4_slave_exit(dev); 2386 else 2387 mlx4_CLOSE_HCA(dev, 0); 2388 2389 err_free_icm: 2390 if (!mlx4_is_slave(dev)) 2391 mlx4_free_icms(dev); 2392 2393 return err; 2394 } 2395 2396 static int mlx4_init_counters_table(struct mlx4_dev *dev) 2397 { 2398 struct mlx4_priv *priv = mlx4_priv(dev); 2399 int nent_pow2; 2400 2401 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2402 return -ENOENT; 2403 2404 if (!dev->caps.max_counters) 2405 return -ENOSPC; 2406 2407 nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); 2408 /* reserve last counter index for sink counter */ 2409 return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2, 2410 nent_pow2 - 1, 0, 2411 nent_pow2 - dev->caps.max_counters + 1); 2412 } 2413 2414 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) 2415 { 2416 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2417 return; 2418 2419 if (!dev->caps.max_counters) 2420 return; 2421 2422 mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); 2423 } 2424 2425 static void mlx4_cleanup_default_counters(struct mlx4_dev *dev) 2426 { 2427 struct mlx4_priv *priv = mlx4_priv(dev); 2428 int port; 2429 2430 for (port = 0; port < dev->caps.num_ports; port++) 2431 if (priv->def_counter[port] != -1) 2432 mlx4_counter_free(dev, priv->def_counter[port]); 2433 } 2434 2435 static int mlx4_allocate_default_counters(struct mlx4_dev *dev) 2436 { 2437 struct mlx4_priv *priv = mlx4_priv(dev); 2438 int port, err = 0; 2439 u32 idx; 2440 2441 for (port = 0; port < dev->caps.num_ports; port++) 2442 priv->def_counter[port] = -1; 2443 2444 for (port = 0; port < dev->caps.num_ports; port++) { 2445 err = mlx4_counter_alloc(dev, &idx); 2446 2447 if (!err || err == -ENOSPC) { 2448 priv->def_counter[port] = idx; 2449 } else if (err == -ENOENT) { 2450 err = 0; 2451 continue; 2452 } else if (mlx4_is_slave(dev) && err == -EINVAL) { 2453 priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev); 2454 mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n", 2455 MLX4_SINK_COUNTER_INDEX(dev)); 2456 err = 0; 2457 } else { 2458 mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n", 2459 __func__, port + 1, err); 2460 mlx4_cleanup_default_counters(dev); 2461 return err; 2462 } 2463 2464 mlx4_dbg(dev, "%s: default counter index %d for port %d\n", 2465 __func__, priv->def_counter[port], port + 1); 2466 } 2467 2468 return err; 2469 } 2470 2471 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) 2472 { 2473 struct mlx4_priv *priv = mlx4_priv(dev); 2474 2475 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2476 return -ENOENT; 2477 2478 *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); 2479 if (*idx == -1) { 2480 *idx = MLX4_SINK_COUNTER_INDEX(dev); 2481 return -ENOSPC; 2482 } 2483 2484 return 0; 2485 } 2486 2487 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) 2488 { 2489 u64 out_param; 2490 int err; 2491 2492 if (mlx4_is_mfunc(dev)) { 2493 err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, 2494 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, 2495 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 2496 if (!err) 2497 *idx = get_param_l(&out_param); 2498 2499 return err; 2500 } 2501 return __mlx4_counter_alloc(dev, idx); 2502 } 2503 EXPORT_SYMBOL_GPL(mlx4_counter_alloc); 2504 2505 static int __mlx4_clear_if_stat(struct mlx4_dev *dev, 2506 u8 counter_index) 2507 { 2508 struct mlx4_cmd_mailbox *if_stat_mailbox; 2509 int err; 2510 u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET; 2511 2512 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2513 if (IS_ERR(if_stat_mailbox)) 2514 return PTR_ERR(if_stat_mailbox); 2515 2516 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, 2517 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, 2518 MLX4_CMD_NATIVE); 2519 2520 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2521 return err; 2522 } 2523 2524 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) 2525 { 2526 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2527 return; 2528 2529 if (idx == MLX4_SINK_COUNTER_INDEX(dev)) 2530 return; 2531 2532 __mlx4_clear_if_stat(dev, idx); 2533 2534 mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR); 2535 return; 2536 } 2537 2538 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) 2539 { 2540 u64 in_param = 0; 2541 2542 if (mlx4_is_mfunc(dev)) { 2543 set_param_l(&in_param, idx); 2544 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE, 2545 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, 2546 MLX4_CMD_WRAPPED); 2547 return; 2548 } 2549 __mlx4_counter_free(dev, idx); 2550 } 2551 EXPORT_SYMBOL_GPL(mlx4_counter_free); 2552 2553 int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port) 2554 { 2555 struct mlx4_priv *priv = mlx4_priv(dev); 2556 2557 return priv->def_counter[port - 1]; 2558 } 2559 EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index); 2560 2561 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port) 2562 { 2563 struct mlx4_priv *priv = mlx4_priv(dev); 2564 2565 priv->mfunc.master.vf_admin[entry].vport[port].guid = guid; 2566 } 2567 EXPORT_SYMBOL_GPL(mlx4_set_admin_guid); 2568 2569 __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port) 2570 { 2571 struct mlx4_priv *priv = mlx4_priv(dev); 2572 2573 return priv->mfunc.master.vf_admin[entry].vport[port].guid; 2574 } 2575 EXPORT_SYMBOL_GPL(mlx4_get_admin_guid); 2576 2577 void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port) 2578 { 2579 struct mlx4_priv *priv = mlx4_priv(dev); 2580 __be64 guid; 2581 2582 /* hw GUID */ 2583 if (entry == 0) 2584 return; 2585 2586 get_random_bytes((char *)&guid, sizeof(guid)); 2587 guid &= ~(cpu_to_be64(1ULL << 56)); 2588 guid |= cpu_to_be64(1ULL << 57); 2589 priv->mfunc.master.vf_admin[entry].vport[port].guid = guid; 2590 } 2591 2592 static int mlx4_setup_hca(struct mlx4_dev *dev) 2593 { 2594 struct mlx4_priv *priv = mlx4_priv(dev); 2595 int err; 2596 int port; 2597 __be32 ib_port_default_caps; 2598 2599 err = mlx4_init_uar_table(dev); 2600 if (err) { 2601 mlx4_err(dev, "Failed to initialize user access region table, aborting\n"); 2602 return err; 2603 } 2604 2605 err = mlx4_uar_alloc(dev, &priv->driver_uar); 2606 if (err) { 2607 mlx4_err(dev, "Failed to allocate driver access region, aborting\n"); 2608 goto err_uar_table_free; 2609 } 2610 2611 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 2612 if (!priv->kar) { 2613 mlx4_err(dev, "Couldn't map kernel access region, aborting\n"); 2614 err = -ENOMEM; 2615 goto err_uar_free; 2616 } 2617 2618 err = mlx4_init_pd_table(dev); 2619 if (err) { 2620 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n"); 2621 goto err_kar_unmap; 2622 } 2623 2624 err = mlx4_init_xrcd_table(dev); 2625 if (err) { 2626 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n"); 2627 goto err_pd_table_free; 2628 } 2629 2630 err = mlx4_init_mr_table(dev); 2631 if (err) { 2632 mlx4_err(dev, "Failed to initialize memory region table, aborting\n"); 2633 goto err_xrcd_table_free; 2634 } 2635 2636 if (!mlx4_is_slave(dev)) { 2637 err = mlx4_init_mcg_table(dev); 2638 if (err) { 2639 mlx4_err(dev, "Failed to initialize multicast group table, aborting\n"); 2640 goto err_mr_table_free; 2641 } 2642 err = mlx4_config_mad_demux(dev); 2643 if (err) { 2644 mlx4_err(dev, "Failed in config_mad_demux, aborting\n"); 2645 goto err_mcg_table_free; 2646 } 2647 } 2648 2649 err = mlx4_init_eq_table(dev); 2650 if (err) { 2651 mlx4_err(dev, "Failed to initialize event queue table, aborting\n"); 2652 goto err_mcg_table_free; 2653 } 2654 2655 err = mlx4_cmd_use_events(dev); 2656 if (err) { 2657 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n"); 2658 goto err_eq_table_free; 2659 } 2660 2661 err = mlx4_NOP(dev); 2662 if (err) { 2663 if (dev->flags & MLX4_FLAG_MSI_X) { 2664 mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n", 2665 priv->eq_table.eq[MLX4_EQ_ASYNC].irq); 2666 mlx4_warn(dev, "Trying again without MSI-X\n"); 2667 } else { 2668 mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n", 2669 priv->eq_table.eq[MLX4_EQ_ASYNC].irq); 2670 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 2671 } 2672 2673 goto err_cmd_poll; 2674 } 2675 2676 mlx4_dbg(dev, "NOP command IRQ test passed\n"); 2677 2678 err = mlx4_init_cq_table(dev); 2679 if (err) { 2680 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n"); 2681 goto err_cmd_poll; 2682 } 2683 2684 err = mlx4_init_srq_table(dev); 2685 if (err) { 2686 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n"); 2687 goto err_cq_table_free; 2688 } 2689 2690 err = mlx4_init_qp_table(dev); 2691 if (err) { 2692 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n"); 2693 goto err_srq_table_free; 2694 } 2695 2696 if (!mlx4_is_slave(dev)) { 2697 err = mlx4_init_counters_table(dev); 2698 if (err && err != -ENOENT) { 2699 mlx4_err(dev, "Failed to initialize counters table, aborting\n"); 2700 goto err_qp_table_free; 2701 } 2702 } 2703 2704 err = mlx4_allocate_default_counters(dev); 2705 if (err) { 2706 mlx4_err(dev, "Failed to allocate default counters, aborting\n"); 2707 goto err_counters_table_free; 2708 } 2709 2710 if (!mlx4_is_slave(dev)) { 2711 for (port = 1; port <= dev->caps.num_ports; port++) { 2712 ib_port_default_caps = 0; 2713 err = mlx4_get_port_ib_caps(dev, port, 2714 &ib_port_default_caps); 2715 if (err) 2716 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n", 2717 port, err); 2718 dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 2719 2720 /* initialize per-slave default ib port capabilities */ 2721 if (mlx4_is_master(dev)) { 2722 int i; 2723 for (i = 0; i < dev->num_slaves; i++) { 2724 if (i == mlx4_master_func_num(dev)) 2725 continue; 2726 priv->mfunc.master.slave_state[i].ib_cap_mask[port] = 2727 ib_port_default_caps; 2728 } 2729 } 2730 2731 if (mlx4_is_mfunc(dev)) 2732 dev->caps.port_ib_mtu[port] = IB_MTU_2048; 2733 else 2734 dev->caps.port_ib_mtu[port] = IB_MTU_4096; 2735 2736 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? 2737 dev->caps.pkey_table_len[port] : -1); 2738 if (err) { 2739 mlx4_err(dev, "Failed to set port %d, aborting\n", 2740 port); 2741 goto err_default_countes_free; 2742 } 2743 } 2744 } 2745 2746 return 0; 2747 2748 err_default_countes_free: 2749 mlx4_cleanup_default_counters(dev); 2750 2751 err_counters_table_free: 2752 if (!mlx4_is_slave(dev)) 2753 mlx4_cleanup_counters_table(dev); 2754 2755 err_qp_table_free: 2756 mlx4_cleanup_qp_table(dev); 2757 2758 err_srq_table_free: 2759 mlx4_cleanup_srq_table(dev); 2760 2761 err_cq_table_free: 2762 mlx4_cleanup_cq_table(dev); 2763 2764 err_cmd_poll: 2765 mlx4_cmd_use_polling(dev); 2766 2767 err_eq_table_free: 2768 mlx4_cleanup_eq_table(dev); 2769 2770 err_mcg_table_free: 2771 if (!mlx4_is_slave(dev)) 2772 mlx4_cleanup_mcg_table(dev); 2773 2774 err_mr_table_free: 2775 mlx4_cleanup_mr_table(dev); 2776 2777 err_xrcd_table_free: 2778 mlx4_cleanup_xrcd_table(dev); 2779 2780 err_pd_table_free: 2781 mlx4_cleanup_pd_table(dev); 2782 2783 err_kar_unmap: 2784 iounmap(priv->kar); 2785 2786 err_uar_free: 2787 mlx4_uar_free(dev, &priv->driver_uar); 2788 2789 err_uar_table_free: 2790 mlx4_cleanup_uar_table(dev); 2791 return err; 2792 } 2793 2794 static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn) 2795 { 2796 int requested_cpu = 0; 2797 struct mlx4_priv *priv = mlx4_priv(dev); 2798 struct mlx4_eq *eq; 2799 int off = 0; 2800 int i; 2801 2802 if (eqn > dev->caps.num_comp_vectors) 2803 return -EINVAL; 2804 2805 for (i = 1; i < port; i++) 2806 off += mlx4_get_eqs_per_port(dev, i); 2807 2808 requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC); 2809 2810 /* Meaning EQs are shared, and this call comes from the second port */ 2811 if (requested_cpu < 0) 2812 return 0; 2813 2814 eq = &priv->eq_table.eq[eqn]; 2815 2816 if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL)) 2817 return -ENOMEM; 2818 2819 cpumask_set_cpu(requested_cpu, eq->affinity_mask); 2820 2821 return 0; 2822 } 2823 2824 static void mlx4_enable_msi_x(struct mlx4_dev *dev) 2825 { 2826 struct mlx4_priv *priv = mlx4_priv(dev); 2827 struct msix_entry *entries; 2828 int i; 2829 int port = 0; 2830 2831 if (msi_x) { 2832 int nreq = dev->caps.num_ports * num_online_cpus() + 1; 2833 2834 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 2835 nreq); 2836 if (nreq > MAX_MSIX) 2837 nreq = MAX_MSIX; 2838 2839 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); 2840 if (!entries) 2841 goto no_msi; 2842 2843 for (i = 0; i < nreq; ++i) 2844 entries[i].entry = i; 2845 2846 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, 2847 nreq); 2848 2849 if (nreq < 0 || nreq < MLX4_EQ_ASYNC) { 2850 kfree(entries); 2851 goto no_msi; 2852 } 2853 /* 1 is reserved for events (asyncrounous EQ) */ 2854 dev->caps.num_comp_vectors = nreq - 1; 2855 2856 priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector; 2857 bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports, 2858 dev->caps.num_ports); 2859 2860 for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) { 2861 if (i == MLX4_EQ_ASYNC) 2862 continue; 2863 2864 priv->eq_table.eq[i].irq = 2865 entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector; 2866 2867 if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) { 2868 bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, 2869 dev->caps.num_ports); 2870 /* We don't set affinity hint when there 2871 * aren't enough EQs 2872 */ 2873 } else { 2874 set_bit(port, 2875 priv->eq_table.eq[i].actv_ports.ports); 2876 if (mlx4_init_affinity_hint(dev, port + 1, i)) 2877 mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n", 2878 i); 2879 } 2880 /* We divide the Eqs evenly between the two ports. 2881 * (dev->caps.num_comp_vectors / dev->caps.num_ports) 2882 * refers to the number of Eqs per port 2883 * (i.e eqs_per_port). Theoretically, we would like to 2884 * write something like (i + 1) % eqs_per_port == 0. 2885 * However, since there's an asynchronous Eq, we have 2886 * to skip over it by comparing this condition to 2887 * !!((i + 1) > MLX4_EQ_ASYNC). 2888 */ 2889 if ((dev->caps.num_comp_vectors > dev->caps.num_ports) && 2890 ((i + 1) % 2891 (dev->caps.num_comp_vectors / dev->caps.num_ports)) == 2892 !!((i + 1) > MLX4_EQ_ASYNC)) 2893 /* If dev->caps.num_comp_vectors < dev->caps.num_ports, 2894 * everything is shared anyway. 2895 */ 2896 port++; 2897 } 2898 2899 dev->flags |= MLX4_FLAG_MSI_X; 2900 2901 kfree(entries); 2902 return; 2903 } 2904 2905 no_msi: 2906 dev->caps.num_comp_vectors = 1; 2907 2908 BUG_ON(MLX4_EQ_ASYNC >= 2); 2909 for (i = 0; i < 2; ++i) { 2910 priv->eq_table.eq[i].irq = dev->persist->pdev->irq; 2911 if (i != MLX4_EQ_ASYNC) { 2912 bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, 2913 dev->caps.num_ports); 2914 } 2915 } 2916 } 2917 2918 static int mlx4_init_port_info(struct mlx4_dev *dev, int port) 2919 { 2920 struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); 2921 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 2922 int err; 2923 2924 err = devlink_port_register(devlink, &info->devlink_port, port); 2925 if (err) 2926 return err; 2927 2928 info->dev = dev; 2929 info->port = port; 2930 if (!mlx4_is_slave(dev)) { 2931 mlx4_init_mac_table(dev, &info->mac_table); 2932 mlx4_init_vlan_table(dev, &info->vlan_table); 2933 mlx4_init_roce_gid_table(dev, &info->gid_table); 2934 info->base_qpn = mlx4_get_base_qpn(dev, port); 2935 } 2936 2937 sprintf(info->dev_name, "mlx4_port%d", port); 2938 info->port_attr.attr.name = info->dev_name; 2939 if (mlx4_is_mfunc(dev)) 2940 info->port_attr.attr.mode = S_IRUGO; 2941 else { 2942 info->port_attr.attr.mode = S_IRUGO | S_IWUSR; 2943 info->port_attr.store = set_port_type; 2944 } 2945 info->port_attr.show = show_port_type; 2946 sysfs_attr_init(&info->port_attr.attr); 2947 2948 err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); 2949 if (err) { 2950 mlx4_err(dev, "Failed to create file for port %d\n", port); 2951 devlink_port_unregister(&info->devlink_port); 2952 info->port = -1; 2953 } 2954 2955 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); 2956 info->port_mtu_attr.attr.name = info->dev_mtu_name; 2957 if (mlx4_is_mfunc(dev)) 2958 info->port_mtu_attr.attr.mode = S_IRUGO; 2959 else { 2960 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; 2961 info->port_mtu_attr.store = set_port_ib_mtu; 2962 } 2963 info->port_mtu_attr.show = show_port_ib_mtu; 2964 sysfs_attr_init(&info->port_mtu_attr.attr); 2965 2966 err = device_create_file(&dev->persist->pdev->dev, 2967 &info->port_mtu_attr); 2968 if (err) { 2969 mlx4_err(dev, "Failed to create mtu file for port %d\n", port); 2970 device_remove_file(&info->dev->persist->pdev->dev, 2971 &info->port_attr); 2972 info->port = -1; 2973 } 2974 2975 return err; 2976 } 2977 2978 static void mlx4_cleanup_port_info(struct mlx4_port_info *info) 2979 { 2980 if (info->port < 0) 2981 return; 2982 2983 device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); 2984 device_remove_file(&info->dev->persist->pdev->dev, 2985 &info->port_mtu_attr); 2986 #ifdef CONFIG_RFS_ACCEL 2987 free_irq_cpu_rmap(info->rmap); 2988 info->rmap = NULL; 2989 #endif 2990 } 2991 2992 static int mlx4_init_steering(struct mlx4_dev *dev) 2993 { 2994 struct mlx4_priv *priv = mlx4_priv(dev); 2995 int num_entries = dev->caps.num_ports; 2996 int i, j; 2997 2998 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); 2999 if (!priv->steer) 3000 return -ENOMEM; 3001 3002 for (i = 0; i < num_entries; i++) 3003 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3004 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); 3005 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); 3006 } 3007 return 0; 3008 } 3009 3010 static void mlx4_clear_steering(struct mlx4_dev *dev) 3011 { 3012 struct mlx4_priv *priv = mlx4_priv(dev); 3013 struct mlx4_steer_index *entry, *tmp_entry; 3014 struct mlx4_promisc_qp *pqp, *tmp_pqp; 3015 int num_entries = dev->caps.num_ports; 3016 int i, j; 3017 3018 for (i = 0; i < num_entries; i++) { 3019 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3020 list_for_each_entry_safe(pqp, tmp_pqp, 3021 &priv->steer[i].promisc_qps[j], 3022 list) { 3023 list_del(&pqp->list); 3024 kfree(pqp); 3025 } 3026 list_for_each_entry_safe(entry, tmp_entry, 3027 &priv->steer[i].steer_entries[j], 3028 list) { 3029 list_del(&entry->list); 3030 list_for_each_entry_safe(pqp, tmp_pqp, 3031 &entry->duplicates, 3032 list) { 3033 list_del(&pqp->list); 3034 kfree(pqp); 3035 } 3036 kfree(entry); 3037 } 3038 } 3039 } 3040 kfree(priv->steer); 3041 } 3042 3043 static int extended_func_num(struct pci_dev *pdev) 3044 { 3045 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); 3046 } 3047 3048 #define MLX4_OWNER_BASE 0x8069c 3049 #define MLX4_OWNER_SIZE 4 3050 3051 static int mlx4_get_ownership(struct mlx4_dev *dev) 3052 { 3053 void __iomem *owner; 3054 u32 ret; 3055 3056 if (pci_channel_offline(dev->persist->pdev)) 3057 return -EIO; 3058 3059 owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + 3060 MLX4_OWNER_BASE, 3061 MLX4_OWNER_SIZE); 3062 if (!owner) { 3063 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3064 return -ENOMEM; 3065 } 3066 3067 ret = readl(owner); 3068 iounmap(owner); 3069 return (int) !!ret; 3070 } 3071 3072 static void mlx4_free_ownership(struct mlx4_dev *dev) 3073 { 3074 void __iomem *owner; 3075 3076 if (pci_channel_offline(dev->persist->pdev)) 3077 return; 3078 3079 owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + 3080 MLX4_OWNER_BASE, 3081 MLX4_OWNER_SIZE); 3082 if (!owner) { 3083 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3084 return; 3085 } 3086 writel(0, owner); 3087 msleep(1000); 3088 iounmap(owner); 3089 } 3090 3091 #define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\ 3092 !!((flags) & MLX4_FLAG_MASTER)) 3093 3094 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, 3095 u8 total_vfs, int existing_vfs, int reset_flow) 3096 { 3097 u64 dev_flags = dev->flags; 3098 int err = 0; 3099 int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev), 3100 MLX4_MAX_NUM_VF); 3101 3102 if (reset_flow) { 3103 dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), 3104 GFP_KERNEL); 3105 if (!dev->dev_vfs) 3106 goto free_mem; 3107 return dev_flags; 3108 } 3109 3110 atomic_inc(&pf_loading); 3111 if (dev->flags & MLX4_FLAG_SRIOV) { 3112 if (existing_vfs != total_vfs) { 3113 mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n", 3114 existing_vfs, total_vfs); 3115 total_vfs = existing_vfs; 3116 } 3117 } 3118 3119 dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL); 3120 if (NULL == dev->dev_vfs) { 3121 mlx4_err(dev, "Failed to allocate memory for VFs\n"); 3122 goto disable_sriov; 3123 } 3124 3125 if (!(dev->flags & MLX4_FLAG_SRIOV)) { 3126 if (total_vfs > fw_enabled_sriov_vfs) { 3127 mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n", 3128 total_vfs, fw_enabled_sriov_vfs); 3129 err = -ENOMEM; 3130 goto disable_sriov; 3131 } 3132 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs); 3133 err = pci_enable_sriov(pdev, total_vfs); 3134 } 3135 if (err) { 3136 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n", 3137 err); 3138 goto disable_sriov; 3139 } else { 3140 mlx4_warn(dev, "Running in master mode\n"); 3141 dev_flags |= MLX4_FLAG_SRIOV | 3142 MLX4_FLAG_MASTER; 3143 dev_flags &= ~MLX4_FLAG_SLAVE; 3144 dev->persist->num_vfs = total_vfs; 3145 } 3146 return dev_flags; 3147 3148 disable_sriov: 3149 atomic_dec(&pf_loading); 3150 free_mem: 3151 dev->persist->num_vfs = 0; 3152 kfree(dev->dev_vfs); 3153 dev->dev_vfs = NULL; 3154 return dev_flags & ~MLX4_FLAG_MASTER; 3155 } 3156 3157 enum { 3158 MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1, 3159 }; 3160 3161 static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 3162 int *nvfs) 3163 { 3164 int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2]; 3165 /* Checking for 64 VFs as a limitation of CX2 */ 3166 if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) && 3167 requested_vfs >= 64) { 3168 mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n", 3169 requested_vfs); 3170 return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64; 3171 } 3172 return 0; 3173 } 3174 3175 static int mlx4_pci_enable_device(struct mlx4_dev *dev) 3176 { 3177 struct pci_dev *pdev = dev->persist->pdev; 3178 int err = 0; 3179 3180 mutex_lock(&dev->persist->pci_status_mutex); 3181 if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) { 3182 err = pci_enable_device(pdev); 3183 if (!err) 3184 dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED; 3185 } 3186 mutex_unlock(&dev->persist->pci_status_mutex); 3187 3188 return err; 3189 } 3190 3191 static void mlx4_pci_disable_device(struct mlx4_dev *dev) 3192 { 3193 struct pci_dev *pdev = dev->persist->pdev; 3194 3195 mutex_lock(&dev->persist->pci_status_mutex); 3196 if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) { 3197 pci_disable_device(pdev); 3198 dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED; 3199 } 3200 mutex_unlock(&dev->persist->pci_status_mutex); 3201 } 3202 3203 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, 3204 int total_vfs, int *nvfs, struct mlx4_priv *priv, 3205 int reset_flow) 3206 { 3207 struct mlx4_dev *dev; 3208 unsigned sum = 0; 3209 int err; 3210 int port; 3211 int i; 3212 struct mlx4_dev_cap *dev_cap = NULL; 3213 int existing_vfs = 0; 3214 3215 dev = &priv->dev; 3216 3217 INIT_LIST_HEAD(&priv->ctx_list); 3218 spin_lock_init(&priv->ctx_lock); 3219 3220 mutex_init(&priv->port_mutex); 3221 mutex_init(&priv->bond_mutex); 3222 3223 INIT_LIST_HEAD(&priv->pgdir_list); 3224 mutex_init(&priv->pgdir_mutex); 3225 spin_lock_init(&priv->cmd.context_lock); 3226 3227 INIT_LIST_HEAD(&priv->bf_list); 3228 mutex_init(&priv->bf_mutex); 3229 3230 dev->rev_id = pdev->revision; 3231 dev->numa_node = dev_to_node(&pdev->dev); 3232 3233 /* Detect if this device is a virtual function */ 3234 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 3235 mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); 3236 dev->flags |= MLX4_FLAG_SLAVE; 3237 } else { 3238 /* We reset the device and enable SRIOV only for physical 3239 * devices. Try to claim ownership on the device; 3240 * if already taken, skip -- do not allow multiple PFs */ 3241 err = mlx4_get_ownership(dev); 3242 if (err) { 3243 if (err < 0) 3244 return err; 3245 else { 3246 mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n"); 3247 return -EINVAL; 3248 } 3249 } 3250 3251 atomic_set(&priv->opreq_count, 0); 3252 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 3253 3254 /* 3255 * Now reset the HCA before we touch the PCI capabilities or 3256 * attempt a firmware command, since a boot ROM may have left 3257 * the HCA in an undefined state. 3258 */ 3259 err = mlx4_reset(dev); 3260 if (err) { 3261 mlx4_err(dev, "Failed to reset HCA, aborting\n"); 3262 goto err_sriov; 3263 } 3264 3265 if (total_vfs) { 3266 dev->flags = MLX4_FLAG_MASTER; 3267 existing_vfs = pci_num_vf(pdev); 3268 if (existing_vfs) 3269 dev->flags |= MLX4_FLAG_SRIOV; 3270 dev->persist->num_vfs = total_vfs; 3271 } 3272 } 3273 3274 /* on load remove any previous indication of internal error, 3275 * device is up. 3276 */ 3277 dev->persist->state = MLX4_DEVICE_STATE_UP; 3278 3279 slave_start: 3280 err = mlx4_cmd_init(dev); 3281 if (err) { 3282 mlx4_err(dev, "Failed to init command interface, aborting\n"); 3283 goto err_sriov; 3284 } 3285 3286 /* In slave functions, the communication channel must be initialized 3287 * before posting commands. Also, init num_slaves before calling 3288 * mlx4_init_hca */ 3289 if (mlx4_is_mfunc(dev)) { 3290 if (mlx4_is_master(dev)) { 3291 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 3292 3293 } else { 3294 dev->num_slaves = 0; 3295 err = mlx4_multi_func_init(dev); 3296 if (err) { 3297 mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n"); 3298 goto err_cmd; 3299 } 3300 } 3301 } 3302 3303 err = mlx4_init_fw(dev); 3304 if (err) { 3305 mlx4_err(dev, "Failed to init fw, aborting.\n"); 3306 goto err_mfunc; 3307 } 3308 3309 if (mlx4_is_master(dev)) { 3310 /* when we hit the goto slave_start below, dev_cap already initialized */ 3311 if (!dev_cap) { 3312 dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); 3313 3314 if (!dev_cap) { 3315 err = -ENOMEM; 3316 goto err_fw; 3317 } 3318 3319 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 3320 if (err) { 3321 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 3322 goto err_fw; 3323 } 3324 3325 if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) 3326 goto err_fw; 3327 3328 if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { 3329 u64 dev_flags = mlx4_enable_sriov(dev, pdev, 3330 total_vfs, 3331 existing_vfs, 3332 reset_flow); 3333 3334 mlx4_close_fw(dev); 3335 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); 3336 dev->flags = dev_flags; 3337 if (!SRIOV_VALID_STATE(dev->flags)) { 3338 mlx4_err(dev, "Invalid SRIOV state\n"); 3339 goto err_sriov; 3340 } 3341 err = mlx4_reset(dev); 3342 if (err) { 3343 mlx4_err(dev, "Failed to reset HCA, aborting.\n"); 3344 goto err_sriov; 3345 } 3346 goto slave_start; 3347 } 3348 } else { 3349 /* Legacy mode FW requires SRIOV to be enabled before 3350 * doing QUERY_DEV_CAP, since max_eq's value is different if 3351 * SRIOV is enabled. 3352 */ 3353 memset(dev_cap, 0, sizeof(*dev_cap)); 3354 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 3355 if (err) { 3356 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 3357 goto err_fw; 3358 } 3359 3360 if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) 3361 goto err_fw; 3362 } 3363 } 3364 3365 err = mlx4_init_hca(dev); 3366 if (err) { 3367 if (err == -EACCES) { 3368 /* Not primary Physical function 3369 * Running in slave mode */ 3370 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); 3371 /* We're not a PF */ 3372 if (dev->flags & MLX4_FLAG_SRIOV) { 3373 if (!existing_vfs) 3374 pci_disable_sriov(pdev); 3375 if (mlx4_is_master(dev) && !reset_flow) 3376 atomic_dec(&pf_loading); 3377 dev->flags &= ~MLX4_FLAG_SRIOV; 3378 } 3379 if (!mlx4_is_slave(dev)) 3380 mlx4_free_ownership(dev); 3381 dev->flags |= MLX4_FLAG_SLAVE; 3382 dev->flags &= ~MLX4_FLAG_MASTER; 3383 goto slave_start; 3384 } else 3385 goto err_fw; 3386 } 3387 3388 if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { 3389 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, 3390 existing_vfs, reset_flow); 3391 3392 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { 3393 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); 3394 dev->flags = dev_flags; 3395 err = mlx4_cmd_init(dev); 3396 if (err) { 3397 /* Only VHCR is cleaned up, so could still 3398 * send FW commands 3399 */ 3400 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n"); 3401 goto err_close; 3402 } 3403 } else { 3404 dev->flags = dev_flags; 3405 } 3406 3407 if (!SRIOV_VALID_STATE(dev->flags)) { 3408 mlx4_err(dev, "Invalid SRIOV state\n"); 3409 goto err_close; 3410 } 3411 } 3412 3413 /* check if the device is functioning at its maximum possible speed. 3414 * No return code for this call, just warn the user in case of PCI 3415 * express device capabilities are under-satisfied by the bus. 3416 */ 3417 if (!mlx4_is_slave(dev)) 3418 mlx4_check_pcie_caps(dev); 3419 3420 /* In master functions, the communication channel must be initialized 3421 * after obtaining its address from fw */ 3422 if (mlx4_is_master(dev)) { 3423 if (dev->caps.num_ports < 2 && 3424 num_vfs_argc > 1) { 3425 err = -EINVAL; 3426 mlx4_err(dev, 3427 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n", 3428 dev->caps.num_ports); 3429 goto err_close; 3430 } 3431 memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs)); 3432 3433 for (i = 0; 3434 i < sizeof(dev->persist->nvfs)/ 3435 sizeof(dev->persist->nvfs[0]); i++) { 3436 unsigned j; 3437 3438 for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) { 3439 dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1; 3440 dev->dev_vfs[sum].n_ports = i < 2 ? 1 : 3441 dev->caps.num_ports; 3442 } 3443 } 3444 3445 /* In master functions, the communication channel 3446 * must be initialized after obtaining its address from fw 3447 */ 3448 err = mlx4_multi_func_init(dev); 3449 if (err) { 3450 mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n"); 3451 goto err_close; 3452 } 3453 } 3454 3455 err = mlx4_alloc_eq_table(dev); 3456 if (err) 3457 goto err_master_mfunc; 3458 3459 bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX); 3460 mutex_init(&priv->msix_ctl.pool_lock); 3461 3462 mlx4_enable_msi_x(dev); 3463 if ((mlx4_is_mfunc(dev)) && 3464 !(dev->flags & MLX4_FLAG_MSI_X)) { 3465 err = -ENOSYS; 3466 mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n"); 3467 goto err_free_eq; 3468 } 3469 3470 if (!mlx4_is_slave(dev)) { 3471 err = mlx4_init_steering(dev); 3472 if (err) 3473 goto err_disable_msix; 3474 } 3475 3476 err = mlx4_setup_hca(dev); 3477 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && 3478 !mlx4_is_mfunc(dev)) { 3479 dev->flags &= ~MLX4_FLAG_MSI_X; 3480 dev->caps.num_comp_vectors = 1; 3481 pci_disable_msix(pdev); 3482 err = mlx4_setup_hca(dev); 3483 } 3484 3485 if (err) 3486 goto err_steer; 3487 3488 mlx4_init_quotas(dev); 3489 /* When PF resources are ready arm its comm channel to enable 3490 * getting commands 3491 */ 3492 if (mlx4_is_master(dev)) { 3493 err = mlx4_ARM_COMM_CHANNEL(dev); 3494 if (err) { 3495 mlx4_err(dev, " Failed to arm comm channel eq: %x\n", 3496 err); 3497 goto err_steer; 3498 } 3499 } 3500 3501 for (port = 1; port <= dev->caps.num_ports; port++) { 3502 err = mlx4_init_port_info(dev, port); 3503 if (err) 3504 goto err_port; 3505 } 3506 3507 priv->v2p.port1 = 1; 3508 priv->v2p.port2 = 2; 3509 3510 err = mlx4_register_device(dev); 3511 if (err) 3512 goto err_port; 3513 3514 mlx4_request_modules(dev); 3515 3516 mlx4_sense_init(dev); 3517 mlx4_start_sense(dev); 3518 3519 priv->removed = 0; 3520 3521 if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) 3522 atomic_dec(&pf_loading); 3523 3524 kfree(dev_cap); 3525 return 0; 3526 3527 err_port: 3528 for (--port; port >= 1; --port) 3529 mlx4_cleanup_port_info(&priv->port[port]); 3530 3531 mlx4_cleanup_default_counters(dev); 3532 if (!mlx4_is_slave(dev)) 3533 mlx4_cleanup_counters_table(dev); 3534 mlx4_cleanup_qp_table(dev); 3535 mlx4_cleanup_srq_table(dev); 3536 mlx4_cleanup_cq_table(dev); 3537 mlx4_cmd_use_polling(dev); 3538 mlx4_cleanup_eq_table(dev); 3539 mlx4_cleanup_mcg_table(dev); 3540 mlx4_cleanup_mr_table(dev); 3541 mlx4_cleanup_xrcd_table(dev); 3542 mlx4_cleanup_pd_table(dev); 3543 mlx4_cleanup_uar_table(dev); 3544 3545 err_steer: 3546 if (!mlx4_is_slave(dev)) 3547 mlx4_clear_steering(dev); 3548 3549 err_disable_msix: 3550 if (dev->flags & MLX4_FLAG_MSI_X) 3551 pci_disable_msix(pdev); 3552 3553 err_free_eq: 3554 mlx4_free_eq_table(dev); 3555 3556 err_master_mfunc: 3557 if (mlx4_is_master(dev)) { 3558 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); 3559 mlx4_multi_func_cleanup(dev); 3560 } 3561 3562 if (mlx4_is_slave(dev)) { 3563 kfree(dev->caps.qp0_qkey); 3564 kfree(dev->caps.qp0_tunnel); 3565 kfree(dev->caps.qp0_proxy); 3566 kfree(dev->caps.qp1_tunnel); 3567 kfree(dev->caps.qp1_proxy); 3568 } 3569 3570 err_close: 3571 mlx4_close_hca(dev); 3572 3573 err_fw: 3574 mlx4_close_fw(dev); 3575 3576 err_mfunc: 3577 if (mlx4_is_slave(dev)) 3578 mlx4_multi_func_cleanup(dev); 3579 3580 err_cmd: 3581 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); 3582 3583 err_sriov: 3584 if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) { 3585 pci_disable_sriov(pdev); 3586 dev->flags &= ~MLX4_FLAG_SRIOV; 3587 } 3588 3589 if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) 3590 atomic_dec(&pf_loading); 3591 3592 kfree(priv->dev.dev_vfs); 3593 3594 if (!mlx4_is_slave(dev)) 3595 mlx4_free_ownership(dev); 3596 3597 kfree(dev_cap); 3598 return err; 3599 } 3600 3601 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, 3602 struct mlx4_priv *priv) 3603 { 3604 int err; 3605 int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; 3606 int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0}; 3607 const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = { 3608 {2, 0, 0}, {0, 1, 2}, {0, 1, 2} }; 3609 unsigned total_vfs = 0; 3610 unsigned int i; 3611 3612 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); 3613 3614 err = mlx4_pci_enable_device(&priv->dev); 3615 if (err) { 3616 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); 3617 return err; 3618 } 3619 3620 /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS 3621 * per port, we must limit the number of VFs to 63 (since their are 3622 * 128 MACs) 3623 */ 3624 for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc; 3625 total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) { 3626 nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i]; 3627 if (nvfs[i] < 0) { 3628 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); 3629 err = -EINVAL; 3630 goto err_disable_pdev; 3631 } 3632 } 3633 for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc; 3634 i++) { 3635 prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i]; 3636 if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) { 3637 dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n"); 3638 err = -EINVAL; 3639 goto err_disable_pdev; 3640 } 3641 } 3642 if (total_vfs > MLX4_MAX_NUM_VF) { 3643 dev_err(&pdev->dev, 3644 "Requested more VF's (%d) than allowed by hw (%d)\n", 3645 total_vfs, MLX4_MAX_NUM_VF); 3646 err = -EINVAL; 3647 goto err_disable_pdev; 3648 } 3649 3650 for (i = 0; i < MLX4_MAX_PORTS; i++) { 3651 if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) { 3652 dev_err(&pdev->dev, 3653 "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n", 3654 nvfs[i] + nvfs[2], i + 1, 3655 MLX4_MAX_NUM_VF_P_PORT); 3656 err = -EINVAL; 3657 goto err_disable_pdev; 3658 } 3659 } 3660 3661 /* Check for BARs. */ 3662 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && 3663 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 3664 dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n", 3665 pci_dev_data, pci_resource_flags(pdev, 0)); 3666 err = -ENODEV; 3667 goto err_disable_pdev; 3668 } 3669 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { 3670 dev_err(&pdev->dev, "Missing UAR, aborting\n"); 3671 err = -ENODEV; 3672 goto err_disable_pdev; 3673 } 3674 3675 err = pci_request_regions(pdev, DRV_NAME); 3676 if (err) { 3677 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 3678 goto err_disable_pdev; 3679 } 3680 3681 pci_set_master(pdev); 3682 3683 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3684 if (err) { 3685 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n"); 3686 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3687 if (err) { 3688 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n"); 3689 goto err_release_regions; 3690 } 3691 } 3692 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3693 if (err) { 3694 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n"); 3695 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 3696 if (err) { 3697 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n"); 3698 goto err_release_regions; 3699 } 3700 } 3701 3702 /* Allow large DMA segments, up to the firmware limit of 1 GB */ 3703 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); 3704 /* Detect if this device is a virtual function */ 3705 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 3706 /* When acting as pf, we normally skip vfs unless explicitly 3707 * requested to probe them. 3708 */ 3709 if (total_vfs) { 3710 unsigned vfs_offset = 0; 3711 3712 for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && 3713 vfs_offset + nvfs[i] < extended_func_num(pdev); 3714 vfs_offset += nvfs[i], i++) 3715 ; 3716 if (i == sizeof(nvfs)/sizeof(nvfs[0])) { 3717 err = -ENODEV; 3718 goto err_release_regions; 3719 } 3720 if ((extended_func_num(pdev) - vfs_offset) 3721 > prb_vf[i]) { 3722 dev_warn(&pdev->dev, "Skipping virtual function:%d\n", 3723 extended_func_num(pdev)); 3724 err = -ENODEV; 3725 goto err_release_regions; 3726 } 3727 } 3728 } 3729 3730 err = mlx4_catas_init(&priv->dev); 3731 if (err) 3732 goto err_release_regions; 3733 3734 err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0); 3735 if (err) 3736 goto err_catas; 3737 3738 return 0; 3739 3740 err_catas: 3741 mlx4_catas_end(&priv->dev); 3742 3743 err_release_regions: 3744 pci_release_regions(pdev); 3745 3746 err_disable_pdev: 3747 mlx4_pci_disable_device(&priv->dev); 3748 pci_set_drvdata(pdev, NULL); 3749 return err; 3750 } 3751 3752 static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port, 3753 enum devlink_port_type port_type) 3754 { 3755 struct mlx4_port_info *info = container_of(devlink_port, 3756 struct mlx4_port_info, 3757 devlink_port); 3758 enum mlx4_port_type mlx4_port_type; 3759 3760 switch (port_type) { 3761 case DEVLINK_PORT_TYPE_AUTO: 3762 mlx4_port_type = MLX4_PORT_TYPE_AUTO; 3763 break; 3764 case DEVLINK_PORT_TYPE_ETH: 3765 mlx4_port_type = MLX4_PORT_TYPE_ETH; 3766 break; 3767 case DEVLINK_PORT_TYPE_IB: 3768 mlx4_port_type = MLX4_PORT_TYPE_IB; 3769 break; 3770 default: 3771 return -EOPNOTSUPP; 3772 } 3773 3774 return __set_port_type(info, mlx4_port_type); 3775 } 3776 3777 static const struct devlink_ops mlx4_devlink_ops = { 3778 .port_type_set = mlx4_devlink_port_type_set, 3779 }; 3780 3781 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) 3782 { 3783 struct devlink *devlink; 3784 struct mlx4_priv *priv; 3785 struct mlx4_dev *dev; 3786 int ret; 3787 3788 printk_once(KERN_INFO "%s", mlx4_version); 3789 3790 devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv)); 3791 if (!devlink) 3792 return -ENOMEM; 3793 priv = devlink_priv(devlink); 3794 3795 dev = &priv->dev; 3796 dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL); 3797 if (!dev->persist) { 3798 ret = -ENOMEM; 3799 goto err_devlink_free; 3800 } 3801 dev->persist->pdev = pdev; 3802 dev->persist->dev = dev; 3803 pci_set_drvdata(pdev, dev->persist); 3804 priv->pci_dev_data = id->driver_data; 3805 mutex_init(&dev->persist->device_state_mutex); 3806 mutex_init(&dev->persist->interface_state_mutex); 3807 mutex_init(&dev->persist->pci_status_mutex); 3808 3809 ret = devlink_register(devlink, &pdev->dev); 3810 if (ret) 3811 goto err_persist_free; 3812 3813 ret = __mlx4_init_one(pdev, id->driver_data, priv); 3814 if (ret) 3815 goto err_devlink_unregister; 3816 3817 pci_save_state(pdev); 3818 return 0; 3819 3820 err_devlink_unregister: 3821 devlink_unregister(devlink); 3822 err_persist_free: 3823 kfree(dev->persist); 3824 err_devlink_free: 3825 devlink_free(devlink); 3826 return ret; 3827 } 3828 3829 static void mlx4_clean_dev(struct mlx4_dev *dev) 3830 { 3831 struct mlx4_dev_persistent *persist = dev->persist; 3832 struct mlx4_priv *priv = mlx4_priv(dev); 3833 unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS); 3834 3835 memset(priv, 0, sizeof(*priv)); 3836 priv->dev.persist = persist; 3837 priv->dev.flags = flags; 3838 } 3839 3840 static void mlx4_unload_one(struct pci_dev *pdev) 3841 { 3842 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); 3843 struct mlx4_dev *dev = persist->dev; 3844 struct mlx4_priv *priv = mlx4_priv(dev); 3845 int pci_dev_data; 3846 int p, i; 3847 3848 if (priv->removed) 3849 return; 3850 3851 /* saving current ports type for further use */ 3852 for (i = 0; i < dev->caps.num_ports; i++) { 3853 dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1]; 3854 dev->persist->curr_port_poss_type[i] = dev->caps. 3855 possible_type[i + 1]; 3856 } 3857 3858 pci_dev_data = priv->pci_dev_data; 3859 3860 mlx4_stop_sense(dev); 3861 mlx4_unregister_device(dev); 3862 3863 for (p = 1; p <= dev->caps.num_ports; p++) { 3864 mlx4_cleanup_port_info(&priv->port[p]); 3865 mlx4_CLOSE_PORT(dev, p); 3866 } 3867 3868 if (mlx4_is_master(dev)) 3869 mlx4_free_resource_tracker(dev, 3870 RES_TR_FREE_SLAVES_ONLY); 3871 3872 mlx4_cleanup_default_counters(dev); 3873 if (!mlx4_is_slave(dev)) 3874 mlx4_cleanup_counters_table(dev); 3875 mlx4_cleanup_qp_table(dev); 3876 mlx4_cleanup_srq_table(dev); 3877 mlx4_cleanup_cq_table(dev); 3878 mlx4_cmd_use_polling(dev); 3879 mlx4_cleanup_eq_table(dev); 3880 mlx4_cleanup_mcg_table(dev); 3881 mlx4_cleanup_mr_table(dev); 3882 mlx4_cleanup_xrcd_table(dev); 3883 mlx4_cleanup_pd_table(dev); 3884 3885 if (mlx4_is_master(dev)) 3886 mlx4_free_resource_tracker(dev, 3887 RES_TR_FREE_STRUCTS_ONLY); 3888 3889 iounmap(priv->kar); 3890 mlx4_uar_free(dev, &priv->driver_uar); 3891 mlx4_cleanup_uar_table(dev); 3892 if (!mlx4_is_slave(dev)) 3893 mlx4_clear_steering(dev); 3894 mlx4_free_eq_table(dev); 3895 if (mlx4_is_master(dev)) 3896 mlx4_multi_func_cleanup(dev); 3897 mlx4_close_hca(dev); 3898 mlx4_close_fw(dev); 3899 if (mlx4_is_slave(dev)) 3900 mlx4_multi_func_cleanup(dev); 3901 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); 3902 3903 if (dev->flags & MLX4_FLAG_MSI_X) 3904 pci_disable_msix(pdev); 3905 3906 if (!mlx4_is_slave(dev)) 3907 mlx4_free_ownership(dev); 3908 3909 kfree(dev->caps.qp0_qkey); 3910 kfree(dev->caps.qp0_tunnel); 3911 kfree(dev->caps.qp0_proxy); 3912 kfree(dev->caps.qp1_tunnel); 3913 kfree(dev->caps.qp1_proxy); 3914 kfree(dev->dev_vfs); 3915 3916 mlx4_clean_dev(dev); 3917 priv->pci_dev_data = pci_dev_data; 3918 priv->removed = 1; 3919 } 3920 3921 static void mlx4_remove_one(struct pci_dev *pdev) 3922 { 3923 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); 3924 struct mlx4_dev *dev = persist->dev; 3925 struct mlx4_priv *priv = mlx4_priv(dev); 3926 struct devlink *devlink = priv_to_devlink(priv); 3927 int active_vfs = 0; 3928 3929 mutex_lock(&persist->interface_state_mutex); 3930 persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; 3931 mutex_unlock(&persist->interface_state_mutex); 3932 3933 /* Disabling SR-IOV is not allowed while there are active vf's */ 3934 if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) { 3935 active_vfs = mlx4_how_many_lives_vf(dev); 3936 if (active_vfs) { 3937 pr_warn("Removing PF when there are active VF's !!\n"); 3938 pr_warn("Will not disable SR-IOV.\n"); 3939 } 3940 } 3941 3942 /* device marked to be under deletion running now without the lock 3943 * letting other tasks to be terminated 3944 */ 3945 if (persist->interface_state & MLX4_INTERFACE_STATE_UP) 3946 mlx4_unload_one(pdev); 3947 else 3948 mlx4_info(dev, "%s: interface is down\n", __func__); 3949 mlx4_catas_end(dev); 3950 if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { 3951 mlx4_warn(dev, "Disabling SR-IOV\n"); 3952 pci_disable_sriov(pdev); 3953 } 3954 3955 pci_release_regions(pdev); 3956 mlx4_pci_disable_device(dev); 3957 devlink_unregister(devlink); 3958 kfree(dev->persist); 3959 devlink_free(devlink); 3960 pci_set_drvdata(pdev, NULL); 3961 } 3962 3963 static int restore_current_port_types(struct mlx4_dev *dev, 3964 enum mlx4_port_type *types, 3965 enum mlx4_port_type *poss_types) 3966 { 3967 struct mlx4_priv *priv = mlx4_priv(dev); 3968 int err, i; 3969 3970 mlx4_stop_sense(dev); 3971 3972 mutex_lock(&priv->port_mutex); 3973 for (i = 0; i < dev->caps.num_ports; i++) 3974 dev->caps.possible_type[i + 1] = poss_types[i]; 3975 err = mlx4_change_port_types(dev, types); 3976 mlx4_start_sense(dev); 3977 mutex_unlock(&priv->port_mutex); 3978 3979 return err; 3980 } 3981 3982 int mlx4_restart_one(struct pci_dev *pdev) 3983 { 3984 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); 3985 struct mlx4_dev *dev = persist->dev; 3986 struct mlx4_priv *priv = mlx4_priv(dev); 3987 int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; 3988 int pci_dev_data, err, total_vfs; 3989 3990 pci_dev_data = priv->pci_dev_data; 3991 total_vfs = dev->persist->num_vfs; 3992 memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); 3993 3994 mlx4_unload_one(pdev); 3995 err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); 3996 if (err) { 3997 mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", 3998 __func__, pci_name(pdev), err); 3999 return err; 4000 } 4001 4002 err = restore_current_port_types(dev, dev->persist->curr_port_type, 4003 dev->persist->curr_port_poss_type); 4004 if (err) 4005 mlx4_err(dev, "could not restore original port types (%d)\n", 4006 err); 4007 4008 return err; 4009 } 4010 4011 static const struct pci_device_id mlx4_pci_table[] = { 4012 /* MT25408 "Hermon" SDR */ 4013 { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4014 /* MT25408 "Hermon" DDR */ 4015 { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4016 /* MT25408 "Hermon" QDR */ 4017 { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4018 /* MT25408 "Hermon" DDR PCIe gen2 */ 4019 { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4020 /* MT25408 "Hermon" QDR PCIe gen2 */ 4021 { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4022 /* MT25408 "Hermon" EN 10GigE */ 4023 { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4024 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 4025 { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4026 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 4027 { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4028 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 4029 { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4030 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 4031 { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4032 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 4033 { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4034 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 4035 { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 4036 /* MT25400 Family [ConnectX-2 Virtual Function] */ 4037 { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, 4038 /* MT27500 Family [ConnectX-3] */ 4039 { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, 4040 /* MT27500 Family [ConnectX-3 Virtual Function] */ 4041 { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, 4042 { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ 4043 { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ 4044 { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ 4045 { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ 4046 { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ 4047 { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ 4048 { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ 4049 { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ 4050 { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ 4051 { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ 4052 { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ 4053 { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ 4054 { 0, } 4055 }; 4056 4057 MODULE_DEVICE_TABLE(pci, mlx4_pci_table); 4058 4059 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, 4060 pci_channel_state_t state) 4061 { 4062 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); 4063 4064 mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n"); 4065 mlx4_enter_error_state(persist); 4066 4067 mutex_lock(&persist->interface_state_mutex); 4068 if (persist->interface_state & MLX4_INTERFACE_STATE_UP) 4069 mlx4_unload_one(pdev); 4070 4071 mutex_unlock(&persist->interface_state_mutex); 4072 if (state == pci_channel_io_perm_failure) 4073 return PCI_ERS_RESULT_DISCONNECT; 4074 4075 mlx4_pci_disable_device(persist->dev); 4076 return PCI_ERS_RESULT_NEED_RESET; 4077 } 4078 4079 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 4080 { 4081 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); 4082 struct mlx4_dev *dev = persist->dev; 4083 int err; 4084 4085 mlx4_err(dev, "mlx4_pci_slot_reset was called\n"); 4086 err = mlx4_pci_enable_device(dev); 4087 if (err) { 4088 mlx4_err(dev, "Can not re-enable device, err=%d\n", err); 4089 return PCI_ERS_RESULT_DISCONNECT; 4090 } 4091 4092 pci_set_master(pdev); 4093 pci_restore_state(pdev); 4094 pci_save_state(pdev); 4095 return PCI_ERS_RESULT_RECOVERED; 4096 } 4097 4098 static void mlx4_pci_resume(struct pci_dev *pdev) 4099 { 4100 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); 4101 struct mlx4_dev *dev = persist->dev; 4102 struct mlx4_priv *priv = mlx4_priv(dev); 4103 int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; 4104 int total_vfs; 4105 int err; 4106 4107 mlx4_err(dev, "%s was called\n", __func__); 4108 total_vfs = dev->persist->num_vfs; 4109 memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); 4110 4111 mutex_lock(&persist->interface_state_mutex); 4112 if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { 4113 err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, 4114 priv, 1); 4115 if (err) { 4116 mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n", 4117 __func__, err); 4118 goto end; 4119 } 4120 4121 err = restore_current_port_types(dev, dev->persist-> 4122 curr_port_type, dev->persist-> 4123 curr_port_poss_type); 4124 if (err) 4125 mlx4_err(dev, "could not restore original port types (%d)\n", err); 4126 } 4127 end: 4128 mutex_unlock(&persist->interface_state_mutex); 4129 4130 } 4131 4132 static void mlx4_shutdown(struct pci_dev *pdev) 4133 { 4134 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); 4135 4136 mlx4_info(persist->dev, "mlx4_shutdown was called\n"); 4137 mutex_lock(&persist->interface_state_mutex); 4138 if (persist->interface_state & MLX4_INTERFACE_STATE_UP) { 4139 /* Notify mlx4 clients that the kernel is being shut down */ 4140 persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN; 4141 mlx4_unload_one(pdev); 4142 } 4143 mutex_unlock(&persist->interface_state_mutex); 4144 } 4145 4146 static const struct pci_error_handlers mlx4_err_handler = { 4147 .error_detected = mlx4_pci_err_detected, 4148 .slot_reset = mlx4_pci_slot_reset, 4149 .resume = mlx4_pci_resume, 4150 }; 4151 4152 static struct pci_driver mlx4_driver = { 4153 .name = DRV_NAME, 4154 .id_table = mlx4_pci_table, 4155 .probe = mlx4_init_one, 4156 .shutdown = mlx4_shutdown, 4157 .remove = mlx4_remove_one, 4158 .err_handler = &mlx4_err_handler, 4159 }; 4160 4161 static int __init mlx4_verify_params(void) 4162 { 4163 if ((log_num_mac < 0) || (log_num_mac > 7)) { 4164 pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac); 4165 return -1; 4166 } 4167 4168 if (log_num_vlan != 0) 4169 pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n", 4170 MLX4_LOG_NUM_VLANS); 4171 4172 if (use_prio != 0) 4173 pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n"); 4174 4175 if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) { 4176 pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n", 4177 log_mtts_per_seg); 4178 return -1; 4179 } 4180 4181 /* Check if module param for ports type has legal combination */ 4182 if (port_type_array[0] == false && port_type_array[1] == true) { 4183 pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n"); 4184 port_type_array[0] = true; 4185 } 4186 4187 if (mlx4_log_num_mgm_entry_size < -7 || 4188 (mlx4_log_num_mgm_entry_size > 0 && 4189 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || 4190 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) { 4191 pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n", 4192 mlx4_log_num_mgm_entry_size, 4193 MLX4_MIN_MGM_LOG_ENTRY_SIZE, 4194 MLX4_MAX_MGM_LOG_ENTRY_SIZE); 4195 return -1; 4196 } 4197 4198 return 0; 4199 } 4200 4201 static int __init mlx4_init(void) 4202 { 4203 int ret; 4204 4205 if (mlx4_verify_params()) 4206 return -EINVAL; 4207 4208 4209 mlx4_wq = create_singlethread_workqueue("mlx4"); 4210 if (!mlx4_wq) 4211 return -ENOMEM; 4212 4213 ret = pci_register_driver(&mlx4_driver); 4214 if (ret < 0) 4215 destroy_workqueue(mlx4_wq); 4216 return ret < 0 ? ret : 0; 4217 } 4218 4219 static void __exit mlx4_cleanup(void) 4220 { 4221 pci_unregister_driver(&mlx4_driver); 4222 destroy_workqueue(mlx4_wq); 4223 } 4224 4225 module_init(mlx4_init); 4226 module_exit(mlx4_cleanup); 4227