1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/module.h>
37 #include <linux/kernel.h>
38 #include <linux/init.h>
39 #include <linux/errno.h>
40 #include <linux/pci.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/slab.h>
43 #include <linux/io-mapping.h>
44 #include <linux/delay.h>
45 #include <linux/kmod.h>
46 #include <linux/etherdevice.h>
47 #include <net/devlink.h>
48 
49 #include <uapi/rdma/mlx4-abi.h>
50 #include <linux/mlx4/device.h>
51 #include <linux/mlx4/doorbell.h>
52 
53 #include "mlx4.h"
54 #include "fw.h"
55 #include "icm.h"
56 
57 MODULE_AUTHOR("Roland Dreier");
58 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
59 MODULE_LICENSE("Dual BSD/GPL");
60 MODULE_VERSION(DRV_VERSION);
61 
62 struct workqueue_struct *mlx4_wq;
63 
64 #ifdef CONFIG_MLX4_DEBUG
65 
66 int mlx4_debug_level = 0;
67 module_param_named(debug_level, mlx4_debug_level, int, 0644);
68 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
69 
70 #endif /* CONFIG_MLX4_DEBUG */
71 
72 #ifdef CONFIG_PCI_MSI
73 
74 static int msi_x = 1;
75 module_param(msi_x, int, 0444);
76 MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x");
77 
78 #else /* CONFIG_PCI_MSI */
79 
80 #define msi_x (0)
81 
82 #endif /* CONFIG_PCI_MSI */
83 
84 static uint8_t num_vfs[3] = {0, 0, 0};
85 static int num_vfs_argc;
86 module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
87 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
88 			  "num_vfs=port1,port2,port1+2");
89 
90 static uint8_t probe_vf[3] = {0, 0, 0};
91 static int probe_vfs_argc;
92 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
93 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
94 			   "probe_vf=port1,port2,port1+2");
95 
96 static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
97 module_param_named(log_num_mgm_entry_size,
98 			mlx4_log_num_mgm_entry_size, int, 0444);
99 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
100 					 " of qp per mcg, for example:"
101 					 " 10 gives 248.range: 7 <="
102 					 " log_num_mgm_entry_size <= 12."
103 					 " To activate device managed"
104 					 " flow steering when available, set to -1");
105 
106 static bool enable_64b_cqe_eqe = true;
107 module_param(enable_64b_cqe_eqe, bool, 0444);
108 MODULE_PARM_DESC(enable_64b_cqe_eqe,
109 		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
110 
111 static bool enable_4k_uar;
112 module_param(enable_4k_uar, bool, 0444);
113 MODULE_PARM_DESC(enable_4k_uar,
114 		 "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)");
115 
116 #define PF_CONTEXT_BEHAVIOUR_MASK	(MLX4_FUNC_CAP_64B_EQE_CQE | \
117 					 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
118 					 MLX4_FUNC_CAP_DMFS_A0_STATIC)
119 
120 #define RESET_PERSIST_MASK_FLAGS	(MLX4_FLAG_SRIOV)
121 
122 static char mlx4_version[] =
123 	DRV_NAME ": Mellanox ConnectX core driver v"
124 	DRV_VERSION "\n";
125 
126 static const struct mlx4_profile default_profile = {
127 	.num_qp		= 1 << 18,
128 	.num_srq	= 1 << 16,
129 	.rdmarc_per_qp	= 1 << 4,
130 	.num_cq		= 1 << 16,
131 	.num_mcg	= 1 << 13,
132 	.num_mpt	= 1 << 19,
133 	.num_mtt	= 1 << 20, /* It is really num mtt segements */
134 };
135 
136 static const struct mlx4_profile low_mem_profile = {
137 	.num_qp		= 1 << 17,
138 	.num_srq	= 1 << 6,
139 	.rdmarc_per_qp	= 1 << 4,
140 	.num_cq		= 1 << 8,
141 	.num_mcg	= 1 << 8,
142 	.num_mpt	= 1 << 9,
143 	.num_mtt	= 1 << 7,
144 };
145 
146 static int log_num_mac = 7;
147 module_param_named(log_num_mac, log_num_mac, int, 0444);
148 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
149 
150 static int log_num_vlan;
151 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
152 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
153 /* Log2 max number of VLANs per ETH port (0-7) */
154 #define MLX4_LOG_NUM_VLANS 7
155 #define MLX4_MIN_LOG_NUM_VLANS 0
156 #define MLX4_MIN_LOG_NUM_MAC 1
157 
158 static bool use_prio;
159 module_param_named(use_prio, use_prio, bool, 0444);
160 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
161 
162 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
163 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
164 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
165 
166 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
167 static int arr_argc = 2;
168 module_param_array(port_type_array, int, &arr_argc, 0444);
169 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
170 				"1 for IB, 2 for Ethernet");
171 
172 struct mlx4_port_config {
173 	struct list_head list;
174 	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
175 	struct pci_dev *pdev;
176 };
177 
178 static atomic_t pf_loading = ATOMIC_INIT(0);
179 
180 static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id,
181 				       struct devlink_param_gset_ctx *ctx)
182 {
183 	ctx->val.vbool = !!mlx4_internal_err_reset;
184 	return 0;
185 }
186 
187 static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
188 				       struct devlink_param_gset_ctx *ctx)
189 {
190 	mlx4_internal_err_reset = ctx->val.vbool;
191 	return 0;
192 }
193 
194 static int
195 mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
196 			       union devlink_param_value val,
197 			       struct netlink_ext_ack *extack)
198 {
199 	u32 value = val.vu32;
200 
201 	if (value < 1 || value > 128)
202 		return -ERANGE;
203 
204 	if (!is_power_of_2(value)) {
205 		NL_SET_ERR_MSG_MOD(extack, "max_macs supported must be power of 2");
206 		return -EINVAL;
207 	}
208 
209 	return 0;
210 }
211 
212 enum mlx4_devlink_param_id {
213 	MLX4_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
214 	MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
215 	MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
216 };
217 
218 static const struct devlink_param mlx4_devlink_params[] = {
219 	DEVLINK_PARAM_GENERIC(INT_ERR_RESET,
220 			      BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
221 			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
222 			      mlx4_devlink_ierr_reset_get,
223 			      mlx4_devlink_ierr_reset_set, NULL),
224 	DEVLINK_PARAM_GENERIC(MAX_MACS,
225 			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
226 			      NULL, NULL, mlx4_devlink_max_macs_validate),
227 	DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
228 			     "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
229 			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
230 			     NULL, NULL, NULL),
231 	DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
232 			     "enable_4k_uar", DEVLINK_PARAM_TYPE_BOOL,
233 			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
234 			     NULL, NULL, NULL),
235 };
236 
237 static void mlx4_devlink_set_init_value(struct devlink *devlink, u32 param_id,
238 					union devlink_param_value init_val)
239 {
240 	struct mlx4_priv *priv = devlink_priv(devlink);
241 	struct mlx4_dev *dev = &priv->dev;
242 	int err;
243 
244 	err = devlink_param_driverinit_value_set(devlink, param_id, init_val);
245 	if (err)
246 		mlx4_warn(dev,
247 			  "devlink set parameter %u value failed (err = %d)",
248 			  param_id, err);
249 }
250 
251 static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
252 {
253 	union devlink_param_value value;
254 
255 	value.vbool = !!mlx4_internal_err_reset;
256 	mlx4_devlink_set_init_value(devlink,
257 				    DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
258 				    value);
259 
260 	value.vu32 = 1UL << log_num_mac;
261 	mlx4_devlink_set_init_value(devlink,
262 				    DEVLINK_PARAM_GENERIC_ID_MAX_MACS, value);
263 
264 	value.vbool = enable_64b_cqe_eqe;
265 	mlx4_devlink_set_init_value(devlink,
266 				    MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
267 				    value);
268 
269 	value.vbool = enable_4k_uar;
270 	mlx4_devlink_set_init_value(devlink,
271 				    MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
272 				    value);
273 }
274 
275 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
276 					      struct mlx4_dev_cap *dev_cap)
277 {
278 	/* The reserved_uars is calculated by system page size unit.
279 	 * Therefore, adjustment is added when the uar page size is less
280 	 * than the system page size
281 	 */
282 	dev->caps.reserved_uars	=
283 		max_t(int,
284 		      mlx4_get_num_reserved_uar(dev),
285 		      dev_cap->reserved_uars /
286 			(1 << (PAGE_SHIFT - dev->uar_page_shift)));
287 }
288 
289 int mlx4_check_port_params(struct mlx4_dev *dev,
290 			   enum mlx4_port_type *port_type)
291 {
292 	int i;
293 
294 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
295 		for (i = 0; i < dev->caps.num_ports - 1; i++) {
296 			if (port_type[i] != port_type[i + 1]) {
297 				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
298 				return -EINVAL;
299 			}
300 		}
301 	}
302 
303 	for (i = 0; i < dev->caps.num_ports; i++) {
304 		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
305 			mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
306 				 i + 1);
307 			return -EINVAL;
308 		}
309 	}
310 	return 0;
311 }
312 
313 static void mlx4_set_port_mask(struct mlx4_dev *dev)
314 {
315 	int i;
316 
317 	for (i = 1; i <= dev->caps.num_ports; ++i)
318 		dev->caps.port_mask[i] = dev->caps.port_type[i];
319 }
320 
321 enum {
322 	MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
323 };
324 
325 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
326 {
327 	int err = 0;
328 	struct mlx4_func func;
329 
330 	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
331 		err = mlx4_QUERY_FUNC(dev, &func, 0);
332 		if (err) {
333 			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
334 			return err;
335 		}
336 		dev_cap->max_eqs = func.max_eq;
337 		dev_cap->reserved_eqs = func.rsvd_eqs;
338 		dev_cap->reserved_uars = func.rsvd_uars;
339 		err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
340 	}
341 	return err;
342 }
343 
344 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
345 {
346 	struct mlx4_caps *dev_cap = &dev->caps;
347 
348 	/* FW not supporting or cancelled by user */
349 	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
350 	    !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
351 		return;
352 
353 	/* Must have 64B CQE_EQE enabled by FW to use bigger stride
354 	 * When FW has NCSI it may decide not to report 64B CQE/EQEs
355 	 */
356 	if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
357 	    !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
358 		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
359 		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
360 		return;
361 	}
362 
363 	if (cache_line_size() == 128 || cache_line_size() == 256) {
364 		mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
365 		/* Changing the real data inside CQE size to 32B */
366 		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
367 		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
368 
369 		if (mlx4_is_master(dev))
370 			dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
371 	} else {
372 		if (cache_line_size() != 32  && cache_line_size() != 64)
373 			mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
374 		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
375 		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
376 	}
377 }
378 
379 static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
380 			  struct mlx4_port_cap *port_cap)
381 {
382 	dev->caps.vl_cap[port]	    = port_cap->max_vl;
383 	dev->caps.ib_mtu_cap[port]	    = port_cap->ib_mtu;
384 	dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
385 	dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
386 	/* set gid and pkey table operating lengths by default
387 	 * to non-sriov values
388 	 */
389 	dev->caps.gid_table_len[port]  = port_cap->max_gids;
390 	dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
391 	dev->caps.port_width_cap[port] = port_cap->max_port_width;
392 	dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
393 	dev->caps.max_tc_eth	       = port_cap->max_tc_eth;
394 	dev->caps.def_mac[port]        = port_cap->def_mac;
395 	dev->caps.supported_type[port] = port_cap->supported_port_types;
396 	dev->caps.suggested_type[port] = port_cap->suggested_type;
397 	dev->caps.default_sense[port] = port_cap->default_sense;
398 	dev->caps.trans_type[port]	    = port_cap->trans_type;
399 	dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
400 	dev->caps.wavelength[port]     = port_cap->wavelength;
401 	dev->caps.trans_code[port]     = port_cap->trans_code;
402 
403 	return 0;
404 }
405 
406 static int mlx4_dev_port(struct mlx4_dev *dev, int port,
407 			 struct mlx4_port_cap *port_cap)
408 {
409 	int err = 0;
410 
411 	err = mlx4_QUERY_PORT(dev, port, port_cap);
412 
413 	if (err)
414 		mlx4_err(dev, "QUERY_PORT command failed.\n");
415 
416 	return err;
417 }
418 
419 static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
420 {
421 	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
422 		return;
423 
424 	if (mlx4_is_mfunc(dev)) {
425 		mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
426 		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
427 		return;
428 	}
429 
430 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
431 		mlx4_dbg(dev,
432 			 "Keep FCS is not supported - Disabling Ignore FCS");
433 		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
434 		return;
435 	}
436 }
437 
438 #define MLX4_A0_STEERING_TABLE_SIZE	256
439 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
440 {
441 	int err;
442 	int i;
443 
444 	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
445 	if (err) {
446 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
447 		return err;
448 	}
449 	mlx4_dev_cap_dump(dev, dev_cap);
450 
451 	if (dev_cap->min_page_sz > PAGE_SIZE) {
452 		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
453 			 dev_cap->min_page_sz, PAGE_SIZE);
454 		return -ENODEV;
455 	}
456 	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
457 		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
458 			 dev_cap->num_ports, MLX4_MAX_PORTS);
459 		return -ENODEV;
460 	}
461 
462 	if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
463 		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
464 			 dev_cap->uar_size,
465 			 (unsigned long long)
466 			 pci_resource_len(dev->persist->pdev, 2));
467 		return -ENODEV;
468 	}
469 
470 	dev->caps.num_ports	     = dev_cap->num_ports;
471 	dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
472 	dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
473 				      dev->caps.num_sys_eqs :
474 				      MLX4_MAX_EQ_NUM;
475 	for (i = 1; i <= dev->caps.num_ports; ++i) {
476 		err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
477 		if (err) {
478 			mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
479 			return err;
480 		}
481 	}
482 
483 	dev->caps.uar_page_size	     = PAGE_SIZE;
484 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
485 	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
486 	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
487 	dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
488 	dev->caps.max_sq_sg	     = dev_cap->max_sq_sg;
489 	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
490 	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
491 	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
492 	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
493 	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
494 	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
495 	dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
496 	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
497 	/*
498 	 * Subtract 1 from the limit because we need to allocate a
499 	 * spare CQE so the HCA HW can tell the difference between an
500 	 * empty CQ and a full CQ.
501 	 */
502 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
503 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
504 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
505 	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
506 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
507 
508 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
509 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
510 					dev_cap->reserved_xrcds : 0;
511 	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
512 					dev_cap->max_xrcds : 0;
513 	dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
514 
515 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
516 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
517 	dev->caps.flags		     = dev_cap->flags;
518 	dev->caps.flags2	     = dev_cap->flags2;
519 	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
520 	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
521 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
522 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
523 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
524 	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
525 	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
526 
527 	/* Save uar page shift */
528 	if (!mlx4_is_slave(dev)) {
529 		/* Virtual PCI function needs to determine UAR page size from
530 		 * firmware. Only master PCI function can set the uar page size
531 		 */
532 		if (enable_4k_uar || !dev->persist->num_vfs)
533 			dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
534 		else
535 			dev->uar_page_shift = PAGE_SHIFT;
536 
537 		mlx4_set_num_reserved_uars(dev, dev_cap);
538 	}
539 
540 	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
541 		struct mlx4_init_hca_param hca_param;
542 
543 		memset(&hca_param, 0, sizeof(hca_param));
544 		err = mlx4_QUERY_HCA(dev, &hca_param);
545 		/* Turn off PHV_EN flag in case phv_check_en is set.
546 		 * phv_check_en is a HW check that parse the packet and verify
547 		 * phv bit was reported correctly in the wqe. To allow QinQ
548 		 * PHV_EN flag should be set and phv_check_en must be cleared
549 		 * otherwise QinQ packets will be drop by the HW.
550 		 */
551 		if (err || hca_param.phv_check_en)
552 			dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
553 	}
554 
555 	/* Sense port always allowed on supported devices for ConnectX-1 and -2 */
556 	if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
557 		dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
558 	/* Don't do sense port on multifunction devices (for now at least) */
559 	if (mlx4_is_mfunc(dev))
560 		dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
561 
562 	if (mlx4_low_memory_profile()) {
563 		dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
564 		dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
565 	} else {
566 		dev->caps.log_num_macs  = log_num_mac;
567 		dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
568 	}
569 
570 	for (i = 1; i <= dev->caps.num_ports; ++i) {
571 		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
572 		if (dev->caps.supported_type[i]) {
573 			/* if only ETH is supported - assign ETH */
574 			if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
575 				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
576 			/* if only IB is supported, assign IB */
577 			else if (dev->caps.supported_type[i] ==
578 				 MLX4_PORT_TYPE_IB)
579 				dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
580 			else {
581 				/* if IB and ETH are supported, we set the port
582 				 * type according to user selection of port type;
583 				 * if user selected none, take the FW hint */
584 				if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
585 					dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
586 						MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
587 				else
588 					dev->caps.port_type[i] = port_type_array[i - 1];
589 			}
590 		}
591 		/*
592 		 * Link sensing is allowed on the port if 3 conditions are true:
593 		 * 1. Both protocols are supported on the port.
594 		 * 2. Different types are supported on the port
595 		 * 3. FW declared that it supports link sensing
596 		 */
597 		mlx4_priv(dev)->sense.sense_allowed[i] =
598 			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
599 			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
600 			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
601 
602 		/*
603 		 * If "default_sense" bit is set, we move the port to "AUTO" mode
604 		 * and perform sense_port FW command to try and set the correct
605 		 * port type from beginning
606 		 */
607 		if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
608 			enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
609 			dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
610 			mlx4_SENSE_PORT(dev, i, &sensed_port);
611 			if (sensed_port != MLX4_PORT_TYPE_NONE)
612 				dev->caps.port_type[i] = sensed_port;
613 		} else {
614 			dev->caps.possible_type[i] = dev->caps.port_type[i];
615 		}
616 
617 		if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
618 			dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
619 			mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
620 				  i, 1 << dev->caps.log_num_macs);
621 		}
622 		if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
623 			dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
624 			mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
625 				  i, 1 << dev->caps.log_num_vlans);
626 		}
627 	}
628 
629 	if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
630 	    (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
631 	    (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
632 		mlx4_warn(dev,
633 			  "Granular QoS per VF not supported with IB/Eth configuration\n");
634 		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
635 	}
636 
637 	dev->caps.max_counters = dev_cap->max_counters;
638 
639 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
640 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
641 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
642 		(1 << dev->caps.log_num_macs) *
643 		(1 << dev->caps.log_num_vlans) *
644 		dev->caps.num_ports;
645 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
646 
647 	if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
648 	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
649 		dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
650 	else
651 		dev->caps.dmfs_high_rate_qpn_base =
652 			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
653 
654 	if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
655 	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
656 		dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
657 		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
658 		dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
659 	} else {
660 		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
661 		dev->caps.dmfs_high_rate_qpn_base =
662 			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
663 		dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
664 	}
665 
666 	dev->caps.rl_caps = dev_cap->rl_caps;
667 
668 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
669 		dev->caps.dmfs_high_rate_qpn_range;
670 
671 	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
672 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
673 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
674 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
675 
676 	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
677 
678 	if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
679 		if (dev_cap->flags &
680 		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
681 			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
682 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
683 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
684 		}
685 
686 		if (dev_cap->flags2 &
687 		    (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
688 		     MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
689 			mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
690 			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
691 			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
692 		}
693 	}
694 
695 	if ((dev->caps.flags &
696 	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
697 	    mlx4_is_master(dev))
698 		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
699 
700 	if (!mlx4_is_slave(dev)) {
701 		mlx4_enable_cqe_eqe_stride(dev);
702 		dev->caps.alloc_res_qp_mask =
703 			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
704 			MLX4_RESERVE_A0_QP;
705 
706 		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
707 		    dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
708 			mlx4_warn(dev, "Old device ETS support detected\n");
709 			mlx4_warn(dev, "Consider upgrading device FW.\n");
710 			dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
711 		}
712 
713 	} else {
714 		dev->caps.alloc_res_qp_mask = 0;
715 	}
716 
717 	mlx4_enable_ignore_fcs(dev);
718 
719 	return 0;
720 }
721 
722 /*The function checks if there are live vf, return the num of them*/
723 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
724 {
725 	struct mlx4_priv *priv = mlx4_priv(dev);
726 	struct mlx4_slave_state *s_state;
727 	int i;
728 	int ret = 0;
729 
730 	for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
731 		s_state = &priv->mfunc.master.slave_state[i];
732 		if (s_state->active && s_state->last_cmd !=
733 		    MLX4_COMM_CMD_RESET) {
734 			mlx4_warn(dev, "%s: slave: %d is still active\n",
735 				  __func__, i);
736 			ret++;
737 		}
738 	}
739 	return ret;
740 }
741 
742 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
743 {
744 	u32 qk = MLX4_RESERVED_QKEY_BASE;
745 
746 	if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
747 	    qpn < dev->phys_caps.base_proxy_sqpn)
748 		return -EINVAL;
749 
750 	if (qpn >= dev->phys_caps.base_tunnel_sqpn)
751 		/* tunnel qp */
752 		qk += qpn - dev->phys_caps.base_tunnel_sqpn;
753 	else
754 		qk += qpn - dev->phys_caps.base_proxy_sqpn;
755 	*qkey = qk;
756 	return 0;
757 }
758 EXPORT_SYMBOL(mlx4_get_parav_qkey);
759 
760 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
761 {
762 	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
763 
764 	if (!mlx4_is_master(dev))
765 		return;
766 
767 	priv->virt2phys_pkey[slave][port - 1][i] = val;
768 }
769 EXPORT_SYMBOL(mlx4_sync_pkey_table);
770 
771 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
772 {
773 	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
774 
775 	if (!mlx4_is_master(dev))
776 		return;
777 
778 	priv->slave_node_guids[slave] = guid;
779 }
780 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
781 
782 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
783 {
784 	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
785 
786 	if (!mlx4_is_master(dev))
787 		return 0;
788 
789 	return priv->slave_node_guids[slave];
790 }
791 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
792 
793 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
794 {
795 	struct mlx4_priv *priv = mlx4_priv(dev);
796 	struct mlx4_slave_state *s_slave;
797 
798 	if (!mlx4_is_master(dev))
799 		return 0;
800 
801 	s_slave = &priv->mfunc.master.slave_state[slave];
802 	return !!s_slave->active;
803 }
804 EXPORT_SYMBOL(mlx4_is_slave_active);
805 
806 void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
807 				       struct _rule_hw *eth_header)
808 {
809 	if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
810 	    is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
811 		struct mlx4_net_trans_rule_hw_eth *eth =
812 			(struct mlx4_net_trans_rule_hw_eth *)eth_header;
813 		struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
814 		bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
815 			next_rule->rsvd == 0;
816 
817 		if (last_rule)
818 			ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
819 	}
820 }
821 EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);
822 
823 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
824 				       struct mlx4_dev_cap *dev_cap,
825 				       struct mlx4_init_hca_param *hca_param)
826 {
827 	dev->caps.steering_mode = hca_param->steering_mode;
828 	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
829 		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
830 		dev->caps.fs_log_max_ucast_qp_range_size =
831 			dev_cap->fs_log_max_ucast_qp_range_size;
832 	} else
833 		dev->caps.num_qp_per_mgm =
834 			4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
835 
836 	mlx4_dbg(dev, "Steering mode is: %s\n",
837 		 mlx4_steering_mode_str(dev->caps.steering_mode));
838 }
839 
840 static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev)
841 {
842 	kfree(dev->caps.spec_qps);
843 	dev->caps.spec_qps = NULL;
844 }
845 
846 static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
847 {
848 	struct mlx4_func_cap *func_cap = NULL;
849 	struct mlx4_caps *caps = &dev->caps;
850 	int i, err = 0;
851 
852 	func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
853 	caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL);
854 
855 	if (!func_cap || !caps->spec_qps) {
856 		mlx4_err(dev, "Failed to allocate memory for special qps cap\n");
857 		err = -ENOMEM;
858 		goto err_mem;
859 	}
860 
861 	for (i = 1; i <= caps->num_ports; ++i) {
862 		err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap);
863 		if (err) {
864 			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
865 				 i, err);
866 			goto err_mem;
867 		}
868 		caps->spec_qps[i - 1] = func_cap->spec_qps;
869 		caps->port_mask[i] = caps->port_type[i];
870 		caps->phys_port_id[i] = func_cap->phys_port_id;
871 		err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
872 						      &caps->gid_table_len[i],
873 						      &caps->pkey_table_len[i]);
874 		if (err) {
875 			mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n",
876 				 i, err);
877 			goto err_mem;
878 		}
879 	}
880 
881 err_mem:
882 	if (err)
883 		mlx4_slave_destroy_special_qp_cap(dev);
884 	kfree(func_cap);
885 	return err;
886 }
887 
888 static int mlx4_slave_cap(struct mlx4_dev *dev)
889 {
890 	int			   err;
891 	u32			   page_size;
892 	struct mlx4_dev_cap	   *dev_cap = NULL;
893 	struct mlx4_func_cap	   *func_cap = NULL;
894 	struct mlx4_init_hca_param *hca_param = NULL;
895 
896 	hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL);
897 	func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
898 	dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
899 	if (!hca_param || !func_cap || !dev_cap) {
900 		mlx4_err(dev, "Failed to allocate memory for slave_cap\n");
901 		err = -ENOMEM;
902 		goto free_mem;
903 	}
904 
905 	err = mlx4_QUERY_HCA(dev, hca_param);
906 	if (err) {
907 		mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
908 		goto free_mem;
909 	}
910 
911 	/* fail if the hca has an unknown global capability
912 	 * at this time global_caps should be always zeroed
913 	 */
914 	if (hca_param->global_caps) {
915 		mlx4_err(dev, "Unknown hca global capabilities\n");
916 		err = -EINVAL;
917 		goto free_mem;
918 	}
919 
920 	dev->caps.hca_core_clock = hca_param->hca_core_clock;
921 
922 	dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp;
923 	err = mlx4_dev_cap(dev, dev_cap);
924 	if (err) {
925 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
926 		goto free_mem;
927 	}
928 
929 	err = mlx4_QUERY_FW(dev);
930 	if (err)
931 		mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
932 
933 	page_size = ~dev->caps.page_size_cap + 1;
934 	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
935 	if (page_size > PAGE_SIZE) {
936 		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
937 			 page_size, PAGE_SIZE);
938 		err = -ENODEV;
939 		goto free_mem;
940 	}
941 
942 	/* Set uar_page_shift for VF */
943 	dev->uar_page_shift = hca_param->uar_page_sz + 12;
944 
945 	/* Make sure the master uar page size is valid */
946 	if (dev->uar_page_shift > PAGE_SHIFT) {
947 		mlx4_err(dev,
948 			 "Invalid configuration: uar page size is larger than system page size\n");
949 		err = -ENODEV;
950 		goto free_mem;
951 	}
952 
953 	/* Set reserved_uars based on the uar_page_shift */
954 	mlx4_set_num_reserved_uars(dev, dev_cap);
955 
956 	/* Although uar page size in FW differs from system page size,
957 	 * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
958 	 * still works with assumption that uar page size == system page size
959 	 */
960 	dev->caps.uar_page_size = PAGE_SIZE;
961 
962 	err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap);
963 	if (err) {
964 		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
965 			 err);
966 		goto free_mem;
967 	}
968 
969 	if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
970 	    PF_CONTEXT_BEHAVIOUR_MASK) {
971 		mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
972 			 func_cap->pf_context_behaviour,
973 			 PF_CONTEXT_BEHAVIOUR_MASK);
974 		err = -EINVAL;
975 		goto free_mem;
976 	}
977 
978 	dev->caps.num_ports		= func_cap->num_ports;
979 	dev->quotas.qp			= func_cap->qp_quota;
980 	dev->quotas.srq			= func_cap->srq_quota;
981 	dev->quotas.cq			= func_cap->cq_quota;
982 	dev->quotas.mpt			= func_cap->mpt_quota;
983 	dev->quotas.mtt			= func_cap->mtt_quota;
984 	dev->caps.num_qps		= 1 << hca_param->log_num_qps;
985 	dev->caps.num_srqs		= 1 << hca_param->log_num_srqs;
986 	dev->caps.num_cqs		= 1 << hca_param->log_num_cqs;
987 	dev->caps.num_mpts		= 1 << hca_param->log_mpt_sz;
988 	dev->caps.num_eqs		= func_cap->max_eq;
989 	dev->caps.reserved_eqs		= func_cap->reserved_eq;
990 	dev->caps.reserved_lkey		= func_cap->reserved_lkey;
991 	dev->caps.num_pds               = MLX4_NUM_PDS;
992 	dev->caps.num_mgms              = 0;
993 	dev->caps.num_amgms             = 0;
994 
995 	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
996 		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
997 			 dev->caps.num_ports, MLX4_MAX_PORTS);
998 		err = -ENODEV;
999 		goto free_mem;
1000 	}
1001 
1002 	mlx4_replace_zero_macs(dev);
1003 
1004 	err = mlx4_slave_special_qp_cap(dev);
1005 	if (err) {
1006 		mlx4_err(dev, "Set special QP caps failed. aborting\n");
1007 		goto free_mem;
1008 	}
1009 
1010 	if (dev->caps.uar_page_size * (dev->caps.num_uars -
1011 				       dev->caps.reserved_uars) >
1012 				       pci_resource_len(dev->persist->pdev,
1013 							2)) {
1014 		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
1015 			 dev->caps.uar_page_size * dev->caps.num_uars,
1016 			 (unsigned long long)
1017 			 pci_resource_len(dev->persist->pdev, 2));
1018 		err = -ENOMEM;
1019 		goto err_mem;
1020 	}
1021 
1022 	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
1023 		dev->caps.eqe_size   = 64;
1024 		dev->caps.eqe_factor = 1;
1025 	} else {
1026 		dev->caps.eqe_size   = 32;
1027 		dev->caps.eqe_factor = 0;
1028 	}
1029 
1030 	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
1031 		dev->caps.cqe_size   = 64;
1032 		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1033 	} else {
1034 		dev->caps.cqe_size   = 32;
1035 	}
1036 
1037 	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
1038 		dev->caps.eqe_size = hca_param->eqe_size;
1039 		dev->caps.eqe_factor = 0;
1040 	}
1041 
1042 	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
1043 		dev->caps.cqe_size = hca_param->cqe_size;
1044 		/* User still need to know when CQE > 32B */
1045 		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1046 	}
1047 
1048 	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1049 	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
1050 
1051 	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
1052 	mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n");
1053 
1054 	slave_adjust_steering_mode(dev, dev_cap, hca_param);
1055 	mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
1056 		 hca_param->rss_ip_frags ? "on" : "off");
1057 
1058 	if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
1059 	    dev->caps.bf_reg_size)
1060 		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
1061 
1062 	if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
1063 		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
1064 
1065 err_mem:
1066 	if (err)
1067 		mlx4_slave_destroy_special_qp_cap(dev);
1068 free_mem:
1069 	kfree(hca_param);
1070 	kfree(func_cap);
1071 	kfree(dev_cap);
1072 	return err;
1073 }
1074 
1075 static void mlx4_request_modules(struct mlx4_dev *dev)
1076 {
1077 	int port;
1078 	int has_ib_port = false;
1079 	int has_eth_port = false;
1080 #define EN_DRV_NAME	"mlx4_en"
1081 #define IB_DRV_NAME	"mlx4_ib"
1082 
1083 	for (port = 1; port <= dev->caps.num_ports; port++) {
1084 		if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1085 			has_ib_port = true;
1086 		else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1087 			has_eth_port = true;
1088 	}
1089 
1090 	if (has_eth_port)
1091 		request_module_nowait(EN_DRV_NAME);
1092 	if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
1093 		request_module_nowait(IB_DRV_NAME);
1094 }
1095 
1096 /*
1097  * Change the port configuration of the device.
1098  * Every user of this function must hold the port mutex.
1099  */
1100 int mlx4_change_port_types(struct mlx4_dev *dev,
1101 			   enum mlx4_port_type *port_types)
1102 {
1103 	int err = 0;
1104 	int change = 0;
1105 	int port;
1106 
1107 	for (port = 0; port <  dev->caps.num_ports; port++) {
1108 		/* Change the port type only if the new type is different
1109 		 * from the current, and not set to Auto */
1110 		if (port_types[port] != dev->caps.port_type[port + 1])
1111 			change = 1;
1112 	}
1113 	if (change) {
1114 		mlx4_unregister_device(dev);
1115 		for (port = 1; port <= dev->caps.num_ports; port++) {
1116 			mlx4_CLOSE_PORT(dev, port);
1117 			dev->caps.port_type[port] = port_types[port - 1];
1118 			err = mlx4_SET_PORT(dev, port, -1);
1119 			if (err) {
1120 				mlx4_err(dev, "Failed to set port %d, aborting\n",
1121 					 port);
1122 				goto out;
1123 			}
1124 		}
1125 		mlx4_set_port_mask(dev);
1126 		err = mlx4_register_device(dev);
1127 		if (err) {
1128 			mlx4_err(dev, "Failed to register device\n");
1129 			goto out;
1130 		}
1131 		mlx4_request_modules(dev);
1132 	}
1133 
1134 out:
1135 	return err;
1136 }
1137 
1138 static ssize_t show_port_type(struct device *dev,
1139 			      struct device_attribute *attr,
1140 			      char *buf)
1141 {
1142 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1143 						   port_attr);
1144 	struct mlx4_dev *mdev = info->dev;
1145 	char type[8];
1146 
1147 	sprintf(type, "%s",
1148 		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1149 		"ib" : "eth");
1150 	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1151 		sprintf(buf, "auto (%s)\n", type);
1152 	else
1153 		sprintf(buf, "%s\n", type);
1154 
1155 	return strlen(buf);
1156 }
1157 
1158 static int __set_port_type(struct mlx4_port_info *info,
1159 			   enum mlx4_port_type port_type)
1160 {
1161 	struct mlx4_dev *mdev = info->dev;
1162 	struct mlx4_priv *priv = mlx4_priv(mdev);
1163 	enum mlx4_port_type types[MLX4_MAX_PORTS];
1164 	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1165 	int i;
1166 	int err = 0;
1167 
1168 	if ((port_type & mdev->caps.supported_type[info->port]) != port_type) {
1169 		mlx4_err(mdev,
1170 			 "Requested port type for port %d is not supported on this HCA\n",
1171 			 info->port);
1172 		err = -EINVAL;
1173 		goto err_sup;
1174 	}
1175 
1176 	mlx4_stop_sense(mdev);
1177 	mutex_lock(&priv->port_mutex);
1178 	info->tmp_type = port_type;
1179 
1180 	/* Possible type is always the one that was delivered */
1181 	mdev->caps.possible_type[info->port] = info->tmp_type;
1182 
1183 	for (i = 0; i < mdev->caps.num_ports; i++) {
1184 		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1185 					mdev->caps.possible_type[i+1];
1186 		if (types[i] == MLX4_PORT_TYPE_AUTO)
1187 			types[i] = mdev->caps.port_type[i+1];
1188 	}
1189 
1190 	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1191 	    !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1192 		for (i = 1; i <= mdev->caps.num_ports; i++) {
1193 			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1194 				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1195 				err = -EINVAL;
1196 			}
1197 		}
1198 	}
1199 	if (err) {
1200 		mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1201 		goto out;
1202 	}
1203 
1204 	mlx4_do_sense_ports(mdev, new_types, types);
1205 
1206 	err = mlx4_check_port_params(mdev, new_types);
1207 	if (err)
1208 		goto out;
1209 
1210 	/* We are about to apply the changes after the configuration
1211 	 * was verified, no need to remember the temporary types
1212 	 * any more */
1213 	for (i = 0; i < mdev->caps.num_ports; i++)
1214 		priv->port[i + 1].tmp_type = 0;
1215 
1216 	err = mlx4_change_port_types(mdev, new_types);
1217 
1218 out:
1219 	mlx4_start_sense(mdev);
1220 	mutex_unlock(&priv->port_mutex);
1221 err_sup:
1222 	return err;
1223 }
1224 
1225 static ssize_t set_port_type(struct device *dev,
1226 			     struct device_attribute *attr,
1227 			     const char *buf, size_t count)
1228 {
1229 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1230 						   port_attr);
1231 	struct mlx4_dev *mdev = info->dev;
1232 	enum mlx4_port_type port_type;
1233 	static DEFINE_MUTEX(set_port_type_mutex);
1234 	int err;
1235 
1236 	mutex_lock(&set_port_type_mutex);
1237 
1238 	if (!strcmp(buf, "ib\n")) {
1239 		port_type = MLX4_PORT_TYPE_IB;
1240 	} else if (!strcmp(buf, "eth\n")) {
1241 		port_type = MLX4_PORT_TYPE_ETH;
1242 	} else if (!strcmp(buf, "auto\n")) {
1243 		port_type = MLX4_PORT_TYPE_AUTO;
1244 	} else {
1245 		mlx4_err(mdev, "%s is not supported port type\n", buf);
1246 		err = -EINVAL;
1247 		goto err_out;
1248 	}
1249 
1250 	err = __set_port_type(info, port_type);
1251 
1252 err_out:
1253 	mutex_unlock(&set_port_type_mutex);
1254 
1255 	return err ? err : count;
1256 }
1257 
1258 enum ibta_mtu {
1259 	IB_MTU_256  = 1,
1260 	IB_MTU_512  = 2,
1261 	IB_MTU_1024 = 3,
1262 	IB_MTU_2048 = 4,
1263 	IB_MTU_4096 = 5
1264 };
1265 
1266 static inline int int_to_ibta_mtu(int mtu)
1267 {
1268 	switch (mtu) {
1269 	case 256:  return IB_MTU_256;
1270 	case 512:  return IB_MTU_512;
1271 	case 1024: return IB_MTU_1024;
1272 	case 2048: return IB_MTU_2048;
1273 	case 4096: return IB_MTU_4096;
1274 	default: return -1;
1275 	}
1276 }
1277 
1278 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1279 {
1280 	switch (mtu) {
1281 	case IB_MTU_256:  return  256;
1282 	case IB_MTU_512:  return  512;
1283 	case IB_MTU_1024: return 1024;
1284 	case IB_MTU_2048: return 2048;
1285 	case IB_MTU_4096: return 4096;
1286 	default: return -1;
1287 	}
1288 }
1289 
1290 static ssize_t show_port_ib_mtu(struct device *dev,
1291 			     struct device_attribute *attr,
1292 			     char *buf)
1293 {
1294 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1295 						   port_mtu_attr);
1296 	struct mlx4_dev *mdev = info->dev;
1297 
1298 	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1299 		mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1300 
1301 	sprintf(buf, "%d\n",
1302 			ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1303 	return strlen(buf);
1304 }
1305 
1306 static ssize_t set_port_ib_mtu(struct device *dev,
1307 			     struct device_attribute *attr,
1308 			     const char *buf, size_t count)
1309 {
1310 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1311 						   port_mtu_attr);
1312 	struct mlx4_dev *mdev = info->dev;
1313 	struct mlx4_priv *priv = mlx4_priv(mdev);
1314 	int err, port, mtu, ibta_mtu = -1;
1315 
1316 	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1317 		mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1318 		return -EINVAL;
1319 	}
1320 
1321 	err = kstrtoint(buf, 0, &mtu);
1322 	if (!err)
1323 		ibta_mtu = int_to_ibta_mtu(mtu);
1324 
1325 	if (err || ibta_mtu < 0) {
1326 		mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1327 		return -EINVAL;
1328 	}
1329 
1330 	mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1331 
1332 	mlx4_stop_sense(mdev);
1333 	mutex_lock(&priv->port_mutex);
1334 	mlx4_unregister_device(mdev);
1335 	for (port = 1; port <= mdev->caps.num_ports; port++) {
1336 		mlx4_CLOSE_PORT(mdev, port);
1337 		err = mlx4_SET_PORT(mdev, port, -1);
1338 		if (err) {
1339 			mlx4_err(mdev, "Failed to set port %d, aborting\n",
1340 				 port);
1341 			goto err_set_port;
1342 		}
1343 	}
1344 	err = mlx4_register_device(mdev);
1345 err_set_port:
1346 	mutex_unlock(&priv->port_mutex);
1347 	mlx4_start_sense(mdev);
1348 	return err ? err : count;
1349 }
1350 
1351 /* bond for multi-function device */
1352 #define MAX_MF_BOND_ALLOWED_SLAVES 63
1353 static int mlx4_mf_bond(struct mlx4_dev *dev)
1354 {
1355 	int err = 0;
1356 	int nvfs;
1357 	struct mlx4_slaves_pport slaves_port1;
1358 	struct mlx4_slaves_pport slaves_port2;
1359 	DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
1360 
1361 	slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
1362 	slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
1363 	bitmap_and(slaves_port_1_2,
1364 		   slaves_port1.slaves, slaves_port2.slaves,
1365 		   dev->persist->num_vfs + 1);
1366 
1367 	/* only single port vfs are allowed */
1368 	if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
1369 		mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
1370 		return -EINVAL;
1371 	}
1372 
1373 	/* number of virtual functions is number of total functions minus one
1374 	 * physical function for each port.
1375 	 */
1376 	nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
1377 		bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
1378 
1379 	/* limit on maximum allowed VFs */
1380 	if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
1381 		mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
1382 			  nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
1383 		return -EINVAL;
1384 	}
1385 
1386 	if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
1387 		mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
1388 		return -EINVAL;
1389 	}
1390 
1391 	err = mlx4_bond_mac_table(dev);
1392 	if (err)
1393 		return err;
1394 	err = mlx4_bond_vlan_table(dev);
1395 	if (err)
1396 		goto err1;
1397 	err = mlx4_bond_fs_rules(dev);
1398 	if (err)
1399 		goto err2;
1400 
1401 	return 0;
1402 err2:
1403 	(void)mlx4_unbond_vlan_table(dev);
1404 err1:
1405 	(void)mlx4_unbond_mac_table(dev);
1406 	return err;
1407 }
1408 
1409 static int mlx4_mf_unbond(struct mlx4_dev *dev)
1410 {
1411 	int ret, ret1;
1412 
1413 	ret = mlx4_unbond_fs_rules(dev);
1414 	if (ret)
1415 		mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret);
1416 	ret1 = mlx4_unbond_mac_table(dev);
1417 	if (ret1) {
1418 		mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
1419 		ret = ret1;
1420 	}
1421 	ret1 = mlx4_unbond_vlan_table(dev);
1422 	if (ret1) {
1423 		mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
1424 		ret = ret1;
1425 	}
1426 	return ret;
1427 }
1428 
1429 int mlx4_bond(struct mlx4_dev *dev)
1430 {
1431 	int ret = 0;
1432 	struct mlx4_priv *priv = mlx4_priv(dev);
1433 
1434 	mutex_lock(&priv->bond_mutex);
1435 
1436 	if (!mlx4_is_bonded(dev)) {
1437 		ret = mlx4_do_bond(dev, true);
1438 		if (ret)
1439 			mlx4_err(dev, "Failed to bond device: %d\n", ret);
1440 		if (!ret && mlx4_is_master(dev)) {
1441 			ret = mlx4_mf_bond(dev);
1442 			if (ret) {
1443 				mlx4_err(dev, "bond for multifunction failed\n");
1444 				mlx4_do_bond(dev, false);
1445 			}
1446 		}
1447 	}
1448 
1449 	mutex_unlock(&priv->bond_mutex);
1450 	if (!ret)
1451 		mlx4_dbg(dev, "Device is bonded\n");
1452 
1453 	return ret;
1454 }
1455 EXPORT_SYMBOL_GPL(mlx4_bond);
1456 
1457 int mlx4_unbond(struct mlx4_dev *dev)
1458 {
1459 	int ret = 0;
1460 	struct mlx4_priv *priv = mlx4_priv(dev);
1461 
1462 	mutex_lock(&priv->bond_mutex);
1463 
1464 	if (mlx4_is_bonded(dev)) {
1465 		int ret2 = 0;
1466 
1467 		ret = mlx4_do_bond(dev, false);
1468 		if (ret)
1469 			mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1470 		if (mlx4_is_master(dev))
1471 			ret2 = mlx4_mf_unbond(dev);
1472 		if (ret2) {
1473 			mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
1474 			ret = ret2;
1475 		}
1476 	}
1477 
1478 	mutex_unlock(&priv->bond_mutex);
1479 	if (!ret)
1480 		mlx4_dbg(dev, "Device is unbonded\n");
1481 
1482 	return ret;
1483 }
1484 EXPORT_SYMBOL_GPL(mlx4_unbond);
1485 
1486 
1487 int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1488 {
1489 	u8 port1 = v2p->port1;
1490 	u8 port2 = v2p->port2;
1491 	struct mlx4_priv *priv = mlx4_priv(dev);
1492 	int err;
1493 
1494 	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1495 		return -EOPNOTSUPP;
1496 
1497 	mutex_lock(&priv->bond_mutex);
1498 
1499 	/* zero means keep current mapping for this port */
1500 	if (port1 == 0)
1501 		port1 = priv->v2p.port1;
1502 	if (port2 == 0)
1503 		port2 = priv->v2p.port2;
1504 
1505 	if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1506 	    (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1507 	    (port1 == 2 && port2 == 1)) {
1508 		/* besides boundary checks cross mapping makes
1509 		 * no sense and therefore not allowed */
1510 		err = -EINVAL;
1511 	} else if ((port1 == priv->v2p.port1) &&
1512 		 (port2 == priv->v2p.port2)) {
1513 		err = 0;
1514 	} else {
1515 		err = mlx4_virt2phy_port_map(dev, port1, port2);
1516 		if (!err) {
1517 			mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1518 				 port1, port2);
1519 			priv->v2p.port1 = port1;
1520 			priv->v2p.port2 = port2;
1521 		} else {
1522 			mlx4_err(dev, "Failed to change port mape: %d\n", err);
1523 		}
1524 	}
1525 
1526 	mutex_unlock(&priv->bond_mutex);
1527 	return err;
1528 }
1529 EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1530 
1531 static int mlx4_load_fw(struct mlx4_dev *dev)
1532 {
1533 	struct mlx4_priv *priv = mlx4_priv(dev);
1534 	int err;
1535 
1536 	priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1537 					 GFP_HIGHUSER | __GFP_NOWARN, 0);
1538 	if (!priv->fw.fw_icm) {
1539 		mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1540 		return -ENOMEM;
1541 	}
1542 
1543 	err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1544 	if (err) {
1545 		mlx4_err(dev, "MAP_FA command failed, aborting\n");
1546 		goto err_free;
1547 	}
1548 
1549 	err = mlx4_RUN_FW(dev);
1550 	if (err) {
1551 		mlx4_err(dev, "RUN_FW command failed, aborting\n");
1552 		goto err_unmap_fa;
1553 	}
1554 
1555 	return 0;
1556 
1557 err_unmap_fa:
1558 	mlx4_UNMAP_FA(dev);
1559 
1560 err_free:
1561 	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1562 	return err;
1563 }
1564 
1565 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1566 				int cmpt_entry_sz)
1567 {
1568 	struct mlx4_priv *priv = mlx4_priv(dev);
1569 	int err;
1570 	int num_eqs;
1571 
1572 	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1573 				  cmpt_base +
1574 				  ((u64) (MLX4_CMPT_TYPE_QP *
1575 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1576 				  cmpt_entry_sz, dev->caps.num_qps,
1577 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1578 				  0, 0);
1579 	if (err)
1580 		goto err;
1581 
1582 	err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1583 				  cmpt_base +
1584 				  ((u64) (MLX4_CMPT_TYPE_SRQ *
1585 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1586 				  cmpt_entry_sz, dev->caps.num_srqs,
1587 				  dev->caps.reserved_srqs, 0, 0);
1588 	if (err)
1589 		goto err_qp;
1590 
1591 	err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1592 				  cmpt_base +
1593 				  ((u64) (MLX4_CMPT_TYPE_CQ *
1594 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1595 				  cmpt_entry_sz, dev->caps.num_cqs,
1596 				  dev->caps.reserved_cqs, 0, 0);
1597 	if (err)
1598 		goto err_srq;
1599 
1600 	num_eqs = dev->phys_caps.num_phys_eqs;
1601 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1602 				  cmpt_base +
1603 				  ((u64) (MLX4_CMPT_TYPE_EQ *
1604 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1605 				  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1606 	if (err)
1607 		goto err_cq;
1608 
1609 	return 0;
1610 
1611 err_cq:
1612 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1613 
1614 err_srq:
1615 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1616 
1617 err_qp:
1618 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1619 
1620 err:
1621 	return err;
1622 }
1623 
1624 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1625 			 struct mlx4_init_hca_param *init_hca, u64 icm_size)
1626 {
1627 	struct mlx4_priv *priv = mlx4_priv(dev);
1628 	u64 aux_pages;
1629 	int num_eqs;
1630 	int err;
1631 
1632 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1633 	if (err) {
1634 		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1635 		return err;
1636 	}
1637 
1638 	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1639 		 (unsigned long long) icm_size >> 10,
1640 		 (unsigned long long) aux_pages << 2);
1641 
1642 	priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1643 					  GFP_HIGHUSER | __GFP_NOWARN, 0);
1644 	if (!priv->fw.aux_icm) {
1645 		mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1646 		return -ENOMEM;
1647 	}
1648 
1649 	err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1650 	if (err) {
1651 		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1652 		goto err_free_aux;
1653 	}
1654 
1655 	err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1656 	if (err) {
1657 		mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1658 		goto err_unmap_aux;
1659 	}
1660 
1661 
1662 	num_eqs = dev->phys_caps.num_phys_eqs;
1663 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1664 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
1665 				  num_eqs, num_eqs, 0, 0);
1666 	if (err) {
1667 		mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1668 		goto err_unmap_cmpt;
1669 	}
1670 
1671 	/*
1672 	 * Reserved MTT entries must be aligned up to a cacheline
1673 	 * boundary, since the FW will write to them, while the driver
1674 	 * writes to all other MTT entries. (The variable
1675 	 * dev->caps.mtt_entry_sz below is really the MTT segment
1676 	 * size, not the raw entry size)
1677 	 */
1678 	dev->caps.reserved_mtts =
1679 		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1680 		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1681 
1682 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1683 				  init_hca->mtt_base,
1684 				  dev->caps.mtt_entry_sz,
1685 				  dev->caps.num_mtts,
1686 				  dev->caps.reserved_mtts, 1, 0);
1687 	if (err) {
1688 		mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1689 		goto err_unmap_eq;
1690 	}
1691 
1692 	err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1693 				  init_hca->dmpt_base,
1694 				  dev_cap->dmpt_entry_sz,
1695 				  dev->caps.num_mpts,
1696 				  dev->caps.reserved_mrws, 1, 1);
1697 	if (err) {
1698 		mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1699 		goto err_unmap_mtt;
1700 	}
1701 
1702 	err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1703 				  init_hca->qpc_base,
1704 				  dev_cap->qpc_entry_sz,
1705 				  dev->caps.num_qps,
1706 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1707 				  0, 0);
1708 	if (err) {
1709 		mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1710 		goto err_unmap_dmpt;
1711 	}
1712 
1713 	err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1714 				  init_hca->auxc_base,
1715 				  dev_cap->aux_entry_sz,
1716 				  dev->caps.num_qps,
1717 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1718 				  0, 0);
1719 	if (err) {
1720 		mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1721 		goto err_unmap_qp;
1722 	}
1723 
1724 	err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1725 				  init_hca->altc_base,
1726 				  dev_cap->altc_entry_sz,
1727 				  dev->caps.num_qps,
1728 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1729 				  0, 0);
1730 	if (err) {
1731 		mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1732 		goto err_unmap_auxc;
1733 	}
1734 
1735 	err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1736 				  init_hca->rdmarc_base,
1737 				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1738 				  dev->caps.num_qps,
1739 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1740 				  0, 0);
1741 	if (err) {
1742 		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1743 		goto err_unmap_altc;
1744 	}
1745 
1746 	err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1747 				  init_hca->cqc_base,
1748 				  dev_cap->cqc_entry_sz,
1749 				  dev->caps.num_cqs,
1750 				  dev->caps.reserved_cqs, 0, 0);
1751 	if (err) {
1752 		mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1753 		goto err_unmap_rdmarc;
1754 	}
1755 
1756 	err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1757 				  init_hca->srqc_base,
1758 				  dev_cap->srq_entry_sz,
1759 				  dev->caps.num_srqs,
1760 				  dev->caps.reserved_srqs, 0, 0);
1761 	if (err) {
1762 		mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1763 		goto err_unmap_cq;
1764 	}
1765 
1766 	/*
1767 	 * For flow steering device managed mode it is required to use
1768 	 * mlx4_init_icm_table. For B0 steering mode it's not strictly
1769 	 * required, but for simplicity just map the whole multicast
1770 	 * group table now.  The table isn't very big and it's a lot
1771 	 * easier than trying to track ref counts.
1772 	 */
1773 	err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1774 				  init_hca->mc_base,
1775 				  mlx4_get_mgm_entry_size(dev),
1776 				  dev->caps.num_mgms + dev->caps.num_amgms,
1777 				  dev->caps.num_mgms + dev->caps.num_amgms,
1778 				  0, 0);
1779 	if (err) {
1780 		mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1781 		goto err_unmap_srq;
1782 	}
1783 
1784 	return 0;
1785 
1786 err_unmap_srq:
1787 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1788 
1789 err_unmap_cq:
1790 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1791 
1792 err_unmap_rdmarc:
1793 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1794 
1795 err_unmap_altc:
1796 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1797 
1798 err_unmap_auxc:
1799 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1800 
1801 err_unmap_qp:
1802 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1803 
1804 err_unmap_dmpt:
1805 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1806 
1807 err_unmap_mtt:
1808 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1809 
1810 err_unmap_eq:
1811 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1812 
1813 err_unmap_cmpt:
1814 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1815 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1816 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1817 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1818 
1819 err_unmap_aux:
1820 	mlx4_UNMAP_ICM_AUX(dev);
1821 
1822 err_free_aux:
1823 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1824 
1825 	return err;
1826 }
1827 
1828 static void mlx4_free_icms(struct mlx4_dev *dev)
1829 {
1830 	struct mlx4_priv *priv = mlx4_priv(dev);
1831 
1832 	mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1833 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1834 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1835 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1836 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1837 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1838 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1839 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1840 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1841 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1842 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1843 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1844 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1845 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1846 
1847 	mlx4_UNMAP_ICM_AUX(dev);
1848 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1849 }
1850 
1851 static void mlx4_slave_exit(struct mlx4_dev *dev)
1852 {
1853 	struct mlx4_priv *priv = mlx4_priv(dev);
1854 
1855 	mutex_lock(&priv->cmd.slave_cmd_mutex);
1856 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1857 			  MLX4_COMM_TIME))
1858 		mlx4_warn(dev, "Failed to close slave function\n");
1859 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1860 }
1861 
1862 static int map_bf_area(struct mlx4_dev *dev)
1863 {
1864 	struct mlx4_priv *priv = mlx4_priv(dev);
1865 	resource_size_t bf_start;
1866 	resource_size_t bf_len;
1867 	int err = 0;
1868 
1869 	if (!dev->caps.bf_reg_size)
1870 		return -ENXIO;
1871 
1872 	bf_start = pci_resource_start(dev->persist->pdev, 2) +
1873 			(dev->caps.num_uars << PAGE_SHIFT);
1874 	bf_len = pci_resource_len(dev->persist->pdev, 2) -
1875 			(dev->caps.num_uars << PAGE_SHIFT);
1876 	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1877 	if (!priv->bf_mapping)
1878 		err = -ENOMEM;
1879 
1880 	return err;
1881 }
1882 
1883 static void unmap_bf_area(struct mlx4_dev *dev)
1884 {
1885 	if (mlx4_priv(dev)->bf_mapping)
1886 		io_mapping_free(mlx4_priv(dev)->bf_mapping);
1887 }
1888 
1889 u64 mlx4_read_clock(struct mlx4_dev *dev)
1890 {
1891 	u32 clockhi, clocklo, clockhi1;
1892 	u64 cycles;
1893 	int i;
1894 	struct mlx4_priv *priv = mlx4_priv(dev);
1895 
1896 	for (i = 0; i < 10; i++) {
1897 		clockhi = swab32(readl(priv->clock_mapping));
1898 		clocklo = swab32(readl(priv->clock_mapping + 4));
1899 		clockhi1 = swab32(readl(priv->clock_mapping));
1900 		if (clockhi == clockhi1)
1901 			break;
1902 	}
1903 
1904 	cycles = (u64) clockhi << 32 | (u64) clocklo;
1905 
1906 	return cycles;
1907 }
1908 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1909 
1910 
1911 static int map_internal_clock(struct mlx4_dev *dev)
1912 {
1913 	struct mlx4_priv *priv = mlx4_priv(dev);
1914 
1915 	priv->clock_mapping =
1916 		ioremap(pci_resource_start(dev->persist->pdev,
1917 					   priv->fw.clock_bar) +
1918 			priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1919 
1920 	if (!priv->clock_mapping)
1921 		return -ENOMEM;
1922 
1923 	return 0;
1924 }
1925 
1926 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1927 				   struct mlx4_clock_params *params)
1928 {
1929 	struct mlx4_priv *priv = mlx4_priv(dev);
1930 
1931 	if (mlx4_is_slave(dev))
1932 		return -EOPNOTSUPP;
1933 
1934 	if (!params)
1935 		return -EINVAL;
1936 
1937 	params->bar = priv->fw.clock_bar;
1938 	params->offset = priv->fw.clock_offset;
1939 	params->size = MLX4_CLOCK_SIZE;
1940 
1941 	return 0;
1942 }
1943 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1944 
1945 static void unmap_internal_clock(struct mlx4_dev *dev)
1946 {
1947 	struct mlx4_priv *priv = mlx4_priv(dev);
1948 
1949 	if (priv->clock_mapping)
1950 		iounmap(priv->clock_mapping);
1951 }
1952 
1953 static void mlx4_close_hca(struct mlx4_dev *dev)
1954 {
1955 	unmap_internal_clock(dev);
1956 	unmap_bf_area(dev);
1957 	if (mlx4_is_slave(dev))
1958 		mlx4_slave_exit(dev);
1959 	else {
1960 		mlx4_CLOSE_HCA(dev, 0);
1961 		mlx4_free_icms(dev);
1962 	}
1963 }
1964 
1965 static void mlx4_close_fw(struct mlx4_dev *dev)
1966 {
1967 	if (!mlx4_is_slave(dev)) {
1968 		mlx4_UNMAP_FA(dev);
1969 		mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1970 	}
1971 }
1972 
1973 static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1974 {
1975 #define COMM_CHAN_OFFLINE_OFFSET 0x09
1976 
1977 	u32 comm_flags;
1978 	u32 offline_bit;
1979 	unsigned long end;
1980 	struct mlx4_priv *priv = mlx4_priv(dev);
1981 
1982 	end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
1983 	while (time_before(jiffies, end)) {
1984 		comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
1985 					  MLX4_COMM_CHAN_FLAGS));
1986 		offline_bit = (comm_flags &
1987 			       (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
1988 		if (!offline_bit)
1989 			return 0;
1990 
1991 		/* If device removal has been requested,
1992 		 * do not continue retrying.
1993 		 */
1994 		if (dev->persist->interface_state &
1995 		    MLX4_INTERFACE_STATE_NOWAIT)
1996 			break;
1997 
1998 		/* There are cases as part of AER/Reset flow that PF needs
1999 		 * around 100 msec to load. We therefore sleep for 100 msec
2000 		 * to allow other tasks to make use of that CPU during this
2001 		 * time interval.
2002 		 */
2003 		msleep(100);
2004 	}
2005 	mlx4_err(dev, "Communication channel is offline.\n");
2006 	return -EIO;
2007 }
2008 
2009 static void mlx4_reset_vf_support(struct mlx4_dev *dev)
2010 {
2011 #define COMM_CHAN_RST_OFFSET 0x1e
2012 
2013 	struct mlx4_priv *priv = mlx4_priv(dev);
2014 	u32 comm_rst;
2015 	u32 comm_caps;
2016 
2017 	comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
2018 				 MLX4_COMM_CHAN_CAPS));
2019 	comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
2020 
2021 	if (comm_rst)
2022 		dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
2023 }
2024 
2025 static int mlx4_init_slave(struct mlx4_dev *dev)
2026 {
2027 	struct mlx4_priv *priv = mlx4_priv(dev);
2028 	u64 dma = (u64) priv->mfunc.vhcr_dma;
2029 	int ret_from_reset = 0;
2030 	u32 slave_read;
2031 	u32 cmd_channel_ver;
2032 
2033 	if (atomic_read(&pf_loading)) {
2034 		mlx4_warn(dev, "PF is not ready - Deferring probe\n");
2035 		return -EPROBE_DEFER;
2036 	}
2037 
2038 	mutex_lock(&priv->cmd.slave_cmd_mutex);
2039 	priv->cmd.max_cmds = 1;
2040 	if (mlx4_comm_check_offline(dev)) {
2041 		mlx4_err(dev, "PF is not responsive, skipping initialization\n");
2042 		goto err_offline;
2043 	}
2044 
2045 	mlx4_reset_vf_support(dev);
2046 	mlx4_warn(dev, "Sending reset\n");
2047 	ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
2048 				       MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
2049 	/* if we are in the middle of flr the slave will try
2050 	 * NUM_OF_RESET_RETRIES times before leaving.*/
2051 	if (ret_from_reset) {
2052 		if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
2053 			mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
2054 			mutex_unlock(&priv->cmd.slave_cmd_mutex);
2055 			return -EPROBE_DEFER;
2056 		} else
2057 			goto err;
2058 	}
2059 
2060 	/* check the driver version - the slave I/F revision
2061 	 * must match the master's */
2062 	slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
2063 	cmd_channel_ver = mlx4_comm_get_version();
2064 
2065 	if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
2066 		MLX4_COMM_GET_IF_REV(slave_read)) {
2067 		mlx4_err(dev, "slave driver version is not supported by the master\n");
2068 		goto err;
2069 	}
2070 
2071 	mlx4_warn(dev, "Sending vhcr0\n");
2072 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
2073 			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2074 		goto err;
2075 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
2076 			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2077 		goto err;
2078 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
2079 			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2080 		goto err;
2081 	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
2082 			  MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2083 		goto err;
2084 
2085 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
2086 	return 0;
2087 
2088 err:
2089 	mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
2090 err_offline:
2091 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
2092 	return -EIO;
2093 }
2094 
2095 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
2096 {
2097 	int i;
2098 
2099 	for (i = 1; i <= dev->caps.num_ports; i++) {
2100 		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
2101 			dev->caps.gid_table_len[i] =
2102 				mlx4_get_slave_num_gids(dev, 0, i);
2103 		else
2104 			dev->caps.gid_table_len[i] = 1;
2105 		dev->caps.pkey_table_len[i] =
2106 			dev->phys_caps.pkey_phys_table_len[i] - 1;
2107 	}
2108 }
2109 
2110 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
2111 {
2112 	int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
2113 
2114 	for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
2115 	      i++) {
2116 		if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
2117 			break;
2118 	}
2119 
2120 	return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
2121 }
2122 
2123 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
2124 {
2125 	switch (dmfs_high_steer_mode) {
2126 	case MLX4_STEERING_DMFS_A0_DEFAULT:
2127 		return "default performance";
2128 
2129 	case MLX4_STEERING_DMFS_A0_DYNAMIC:
2130 		return "dynamic hybrid mode";
2131 
2132 	case MLX4_STEERING_DMFS_A0_STATIC:
2133 		return "performance optimized for limited rule configuration (static)";
2134 
2135 	case MLX4_STEERING_DMFS_A0_DISABLE:
2136 		return "disabled performance optimized steering";
2137 
2138 	case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
2139 		return "performance optimized steering not supported";
2140 
2141 	default:
2142 		return "Unrecognized mode";
2143 	}
2144 }
2145 
2146 #define MLX4_DMFS_A0_STEERING			(1UL << 2)
2147 
2148 static void choose_steering_mode(struct mlx4_dev *dev,
2149 				 struct mlx4_dev_cap *dev_cap)
2150 {
2151 	if (mlx4_log_num_mgm_entry_size <= 0) {
2152 		if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
2153 			if (dev->caps.dmfs_high_steer_mode ==
2154 			    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2155 				mlx4_err(dev, "DMFS high rate mode not supported\n");
2156 			else
2157 				dev->caps.dmfs_high_steer_mode =
2158 					MLX4_STEERING_DMFS_A0_STATIC;
2159 		}
2160 	}
2161 
2162 	if (mlx4_log_num_mgm_entry_size <= 0 &&
2163 	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
2164 	    (!mlx4_is_mfunc(dev) ||
2165 	     (dev_cap->fs_max_num_qp_per_entry >=
2166 	     (dev->persist->num_vfs + 1))) &&
2167 	    choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
2168 		MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
2169 		dev->oper_log_mgm_entry_size =
2170 			choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
2171 		dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
2172 		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
2173 		dev->caps.fs_log_max_ucast_qp_range_size =
2174 			dev_cap->fs_log_max_ucast_qp_range_size;
2175 	} else {
2176 		if (dev->caps.dmfs_high_steer_mode !=
2177 		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2178 			dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
2179 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
2180 		    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2181 			dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
2182 		else {
2183 			dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
2184 
2185 			if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
2186 			    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2187 				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
2188 		}
2189 		dev->oper_log_mgm_entry_size =
2190 			mlx4_log_num_mgm_entry_size > 0 ?
2191 			mlx4_log_num_mgm_entry_size :
2192 			MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
2193 		dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
2194 	}
2195 	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
2196 		 mlx4_steering_mode_str(dev->caps.steering_mode),
2197 		 dev->oper_log_mgm_entry_size,
2198 		 mlx4_log_num_mgm_entry_size);
2199 }
2200 
2201 static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
2202 				       struct mlx4_dev_cap *dev_cap)
2203 {
2204 	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2205 	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
2206 		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
2207 	else
2208 		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
2209 
2210 	mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
2211 		 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
2212 }
2213 
2214 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
2215 {
2216 	int i;
2217 	struct mlx4_port_cap port_cap;
2218 
2219 	if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2220 		return -EINVAL;
2221 
2222 	for (i = 1; i <= dev->caps.num_ports; i++) {
2223 		if (mlx4_dev_port(dev, i, &port_cap)) {
2224 			mlx4_err(dev,
2225 				 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
2226 		} else if ((dev->caps.dmfs_high_steer_mode !=
2227 			    MLX4_STEERING_DMFS_A0_DEFAULT) &&
2228 			   (port_cap.dmfs_optimized_state ==
2229 			    !!(dev->caps.dmfs_high_steer_mode ==
2230 			    MLX4_STEERING_DMFS_A0_DISABLE))) {
2231 			mlx4_err(dev,
2232 				 "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
2233 				 dmfs_high_rate_steering_mode_str(
2234 					dev->caps.dmfs_high_steer_mode),
2235 				 (port_cap.dmfs_optimized_state ?
2236 					"enabled" : "disabled"));
2237 		}
2238 	}
2239 
2240 	return 0;
2241 }
2242 
2243 static int mlx4_init_fw(struct mlx4_dev *dev)
2244 {
2245 	struct mlx4_mod_stat_cfg   mlx4_cfg;
2246 	int err = 0;
2247 
2248 	if (!mlx4_is_slave(dev)) {
2249 		err = mlx4_QUERY_FW(dev);
2250 		if (err) {
2251 			if (err == -EACCES)
2252 				mlx4_info(dev, "non-primary physical function, skipping\n");
2253 			else
2254 				mlx4_err(dev, "QUERY_FW command failed, aborting\n");
2255 			return err;
2256 		}
2257 
2258 		err = mlx4_load_fw(dev);
2259 		if (err) {
2260 			mlx4_err(dev, "Failed to start FW, aborting\n");
2261 			return err;
2262 		}
2263 
2264 		mlx4_cfg.log_pg_sz_m = 1;
2265 		mlx4_cfg.log_pg_sz = 0;
2266 		err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2267 		if (err)
2268 			mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2269 	}
2270 
2271 	return err;
2272 }
2273 
2274 static int mlx4_init_hca(struct mlx4_dev *dev)
2275 {
2276 	struct mlx4_priv	  *priv = mlx4_priv(dev);
2277 	struct mlx4_adapter	   adapter;
2278 	struct mlx4_dev_cap	   dev_cap;
2279 	struct mlx4_profile	   profile;
2280 	struct mlx4_init_hca_param init_hca;
2281 	u64 icm_size;
2282 	struct mlx4_config_dev_params params;
2283 	int err;
2284 
2285 	if (!mlx4_is_slave(dev)) {
2286 		err = mlx4_dev_cap(dev, &dev_cap);
2287 		if (err) {
2288 			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2289 			return err;
2290 		}
2291 
2292 		choose_steering_mode(dev, &dev_cap);
2293 		choose_tunnel_offload_mode(dev, &dev_cap);
2294 
2295 		if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2296 		    mlx4_is_master(dev))
2297 			dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2298 
2299 		err = mlx4_get_phys_port_id(dev);
2300 		if (err)
2301 			mlx4_err(dev, "Fail to get physical port id\n");
2302 
2303 		if (mlx4_is_master(dev))
2304 			mlx4_parav_master_pf_caps(dev);
2305 
2306 		if (mlx4_low_memory_profile()) {
2307 			mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2308 			profile = low_mem_profile;
2309 		} else {
2310 			profile = default_profile;
2311 		}
2312 		if (dev->caps.steering_mode ==
2313 		    MLX4_STEERING_MODE_DEVICE_MANAGED)
2314 			profile.num_mcg = MLX4_FS_NUM_MCG;
2315 
2316 		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
2317 					     &init_hca);
2318 		if ((long long) icm_size < 0) {
2319 			err = icm_size;
2320 			return err;
2321 		}
2322 
2323 		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2324 
2325 		if (enable_4k_uar || !dev->persist->num_vfs) {
2326 			init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
2327 						    PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
2328 			init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
2329 		} else {
2330 			init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2331 			init_hca.uar_page_sz = PAGE_SHIFT - 12;
2332 		}
2333 
2334 		init_hca.mw_enabled = 0;
2335 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2336 		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2337 			init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2338 
2339 		err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
2340 		if (err)
2341 			return err;
2342 
2343 		err = mlx4_INIT_HCA(dev, &init_hca);
2344 		if (err) {
2345 			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2346 			goto err_free_icm;
2347 		}
2348 
2349 		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2350 			err = mlx4_query_func(dev, &dev_cap);
2351 			if (err < 0) {
2352 				mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2353 				goto err_close;
2354 			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2355 				dev->caps.num_eqs = dev_cap.max_eqs;
2356 				dev->caps.reserved_eqs = dev_cap.reserved_eqs;
2357 				dev->caps.reserved_uars = dev_cap.reserved_uars;
2358 			}
2359 		}
2360 
2361 		/*
2362 		 * If TS is supported by FW
2363 		 * read HCA frequency by QUERY_HCA command
2364 		 */
2365 		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2366 			memset(&init_hca, 0, sizeof(init_hca));
2367 			err = mlx4_QUERY_HCA(dev, &init_hca);
2368 			if (err) {
2369 				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2370 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2371 			} else {
2372 				dev->caps.hca_core_clock =
2373 					init_hca.hca_core_clock;
2374 			}
2375 
2376 			/* In case we got HCA frequency 0 - disable timestamping
2377 			 * to avoid dividing by zero
2378 			 */
2379 			if (!dev->caps.hca_core_clock) {
2380 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2381 				mlx4_err(dev,
2382 					 "HCA frequency is 0 - timestamping is not supported\n");
2383 			} else if (map_internal_clock(dev)) {
2384 				/*
2385 				 * Map internal clock,
2386 				 * in case of failure disable timestamping
2387 				 */
2388 				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2389 				mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2390 			}
2391 		}
2392 
2393 		if (dev->caps.dmfs_high_steer_mode !=
2394 		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2395 			if (mlx4_validate_optimized_steering(dev))
2396 				mlx4_warn(dev, "Optimized steering validation failed\n");
2397 
2398 			if (dev->caps.dmfs_high_steer_mode ==
2399 			    MLX4_STEERING_DMFS_A0_DISABLE) {
2400 				dev->caps.dmfs_high_rate_qpn_base =
2401 					dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2402 				dev->caps.dmfs_high_rate_qpn_range =
2403 					MLX4_A0_STEERING_TABLE_SIZE;
2404 			}
2405 
2406 			mlx4_info(dev, "DMFS high rate steer mode is: %s\n",
2407 				  dmfs_high_rate_steering_mode_str(
2408 					dev->caps.dmfs_high_steer_mode));
2409 		}
2410 	} else {
2411 		err = mlx4_init_slave(dev);
2412 		if (err) {
2413 			if (err != -EPROBE_DEFER)
2414 				mlx4_err(dev, "Failed to initialize slave\n");
2415 			return err;
2416 		}
2417 
2418 		err = mlx4_slave_cap(dev);
2419 		if (err) {
2420 			mlx4_err(dev, "Failed to obtain slave caps\n");
2421 			goto err_close;
2422 		}
2423 	}
2424 
2425 	if (map_bf_area(dev))
2426 		mlx4_dbg(dev, "Failed to map blue flame area\n");
2427 
2428 	/*Only the master set the ports, all the rest got it from it.*/
2429 	if (!mlx4_is_slave(dev))
2430 		mlx4_set_port_mask(dev);
2431 
2432 	err = mlx4_QUERY_ADAPTER(dev, &adapter);
2433 	if (err) {
2434 		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2435 		goto unmap_bf;
2436 	}
2437 
2438 	/* Query CONFIG_DEV parameters */
2439 	err = mlx4_config_dev_retrieval(dev, &params);
2440 	if (err && err != -EOPNOTSUPP) {
2441 		mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2442 	} else if (!err) {
2443 		dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2444 		dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2445 	}
2446 	priv->eq_table.inta_pin = adapter.inta_pin;
2447 	memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
2448 
2449 	return 0;
2450 
2451 unmap_bf:
2452 	unmap_internal_clock(dev);
2453 	unmap_bf_area(dev);
2454 
2455 	if (mlx4_is_slave(dev))
2456 		mlx4_slave_destroy_special_qp_cap(dev);
2457 
2458 err_close:
2459 	if (mlx4_is_slave(dev))
2460 		mlx4_slave_exit(dev);
2461 	else
2462 		mlx4_CLOSE_HCA(dev, 0);
2463 
2464 err_free_icm:
2465 	if (!mlx4_is_slave(dev))
2466 		mlx4_free_icms(dev);
2467 
2468 	return err;
2469 }
2470 
2471 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2472 {
2473 	struct mlx4_priv *priv = mlx4_priv(dev);
2474 	int nent_pow2;
2475 
2476 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2477 		return -ENOENT;
2478 
2479 	if (!dev->caps.max_counters)
2480 		return -ENOSPC;
2481 
2482 	nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2483 	/* reserve last counter index for sink counter */
2484 	return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
2485 				nent_pow2 - 1, 0,
2486 				nent_pow2 - dev->caps.max_counters + 1);
2487 }
2488 
2489 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2490 {
2491 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2492 		return;
2493 
2494 	if (!dev->caps.max_counters)
2495 		return;
2496 
2497 	mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2498 }
2499 
2500 static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
2501 {
2502 	struct mlx4_priv *priv = mlx4_priv(dev);
2503 	int port;
2504 
2505 	for (port = 0; port < dev->caps.num_ports; port++)
2506 		if (priv->def_counter[port] != -1)
2507 			mlx4_counter_free(dev,  priv->def_counter[port]);
2508 }
2509 
2510 static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
2511 {
2512 	struct mlx4_priv *priv = mlx4_priv(dev);
2513 	int port, err = 0;
2514 	u32 idx;
2515 
2516 	for (port = 0; port < dev->caps.num_ports; port++)
2517 		priv->def_counter[port] = -1;
2518 
2519 	for (port = 0; port < dev->caps.num_ports; port++) {
2520 		err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER);
2521 
2522 		if (!err || err == -ENOSPC) {
2523 			priv->def_counter[port] = idx;
2524 		} else if (err == -ENOENT) {
2525 			err = 0;
2526 			continue;
2527 		} else if (mlx4_is_slave(dev) && err == -EINVAL) {
2528 			priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
2529 			mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
2530 				  MLX4_SINK_COUNTER_INDEX(dev));
2531 			err = 0;
2532 		} else {
2533 			mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
2534 				 __func__, port + 1, err);
2535 			mlx4_cleanup_default_counters(dev);
2536 			return err;
2537 		}
2538 
2539 		mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
2540 			 __func__, priv->def_counter[port], port + 1);
2541 	}
2542 
2543 	return err;
2544 }
2545 
2546 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2547 {
2548 	struct mlx4_priv *priv = mlx4_priv(dev);
2549 
2550 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2551 		return -ENOENT;
2552 
2553 	*idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2554 	if (*idx == -1) {
2555 		*idx = MLX4_SINK_COUNTER_INDEX(dev);
2556 		return -ENOSPC;
2557 	}
2558 
2559 	return 0;
2560 }
2561 
2562 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage)
2563 {
2564 	u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30);
2565 	u64 out_param;
2566 	int err;
2567 
2568 	if (mlx4_is_mfunc(dev)) {
2569 		err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
2570 				   RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2571 				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2572 		if (!err)
2573 			*idx = get_param_l(&out_param);
2574 
2575 		return err;
2576 	}
2577 	return __mlx4_counter_alloc(dev, idx);
2578 }
2579 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2580 
2581 static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2582 				u8 counter_index)
2583 {
2584 	struct mlx4_cmd_mailbox *if_stat_mailbox;
2585 	int err;
2586 	u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
2587 
2588 	if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2589 	if (IS_ERR(if_stat_mailbox))
2590 		return PTR_ERR(if_stat_mailbox);
2591 
2592 	err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2593 			   MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2594 			   MLX4_CMD_NATIVE);
2595 
2596 	mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2597 	return err;
2598 }
2599 
2600 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2601 {
2602 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2603 		return;
2604 
2605 	if (idx == MLX4_SINK_COUNTER_INDEX(dev))
2606 		return;
2607 
2608 	__mlx4_clear_if_stat(dev, idx);
2609 
2610 	mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2611 	return;
2612 }
2613 
2614 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2615 {
2616 	u64 in_param = 0;
2617 
2618 	if (mlx4_is_mfunc(dev)) {
2619 		set_param_l(&in_param, idx);
2620 		mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2621 			 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2622 			 MLX4_CMD_WRAPPED);
2623 		return;
2624 	}
2625 	__mlx4_counter_free(dev, idx);
2626 }
2627 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2628 
2629 int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
2630 {
2631 	struct mlx4_priv *priv = mlx4_priv(dev);
2632 
2633 	return priv->def_counter[port - 1];
2634 }
2635 EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
2636 
2637 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2638 {
2639 	struct mlx4_priv *priv = mlx4_priv(dev);
2640 
2641 	priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2642 }
2643 EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2644 
2645 __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2646 {
2647 	struct mlx4_priv *priv = mlx4_priv(dev);
2648 
2649 	return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2650 }
2651 EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2652 
2653 void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2654 {
2655 	struct mlx4_priv *priv = mlx4_priv(dev);
2656 	__be64 guid;
2657 
2658 	/* hw GUID */
2659 	if (entry == 0)
2660 		return;
2661 
2662 	get_random_bytes((char *)&guid, sizeof(guid));
2663 	guid &= ~(cpu_to_be64(1ULL << 56));
2664 	guid |= cpu_to_be64(1ULL << 57);
2665 	priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2666 }
2667 
2668 static int mlx4_setup_hca(struct mlx4_dev *dev)
2669 {
2670 	struct mlx4_priv *priv = mlx4_priv(dev);
2671 	int err;
2672 	int port;
2673 	__be32 ib_port_default_caps;
2674 
2675 	err = mlx4_init_uar_table(dev);
2676 	if (err) {
2677 		mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2678 		return err;
2679 	}
2680 
2681 	err = mlx4_uar_alloc(dev, &priv->driver_uar);
2682 	if (err) {
2683 		mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2684 		goto err_uar_table_free;
2685 	}
2686 
2687 	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2688 	if (!priv->kar) {
2689 		mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2690 		err = -ENOMEM;
2691 		goto err_uar_free;
2692 	}
2693 
2694 	err = mlx4_init_pd_table(dev);
2695 	if (err) {
2696 		mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2697 		goto err_kar_unmap;
2698 	}
2699 
2700 	err = mlx4_init_xrcd_table(dev);
2701 	if (err) {
2702 		mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2703 		goto err_pd_table_free;
2704 	}
2705 
2706 	err = mlx4_init_mr_table(dev);
2707 	if (err) {
2708 		mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2709 		goto err_xrcd_table_free;
2710 	}
2711 
2712 	if (!mlx4_is_slave(dev)) {
2713 		err = mlx4_init_mcg_table(dev);
2714 		if (err) {
2715 			mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2716 			goto err_mr_table_free;
2717 		}
2718 		err = mlx4_config_mad_demux(dev);
2719 		if (err) {
2720 			mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2721 			goto err_mcg_table_free;
2722 		}
2723 	}
2724 
2725 	err = mlx4_init_eq_table(dev);
2726 	if (err) {
2727 		mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2728 		goto err_mcg_table_free;
2729 	}
2730 
2731 	err = mlx4_cmd_use_events(dev);
2732 	if (err) {
2733 		mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2734 		goto err_eq_table_free;
2735 	}
2736 
2737 	err = mlx4_NOP(dev);
2738 	if (err) {
2739 		if (dev->flags & MLX4_FLAG_MSI_X) {
2740 			mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2741 				  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2742 			mlx4_warn(dev, "Trying again without MSI-X\n");
2743 		} else {
2744 			mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2745 				 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2746 			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2747 		}
2748 
2749 		goto err_cmd_poll;
2750 	}
2751 
2752 	mlx4_dbg(dev, "NOP command IRQ test passed\n");
2753 
2754 	err = mlx4_init_cq_table(dev);
2755 	if (err) {
2756 		mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2757 		goto err_cmd_poll;
2758 	}
2759 
2760 	err = mlx4_init_srq_table(dev);
2761 	if (err) {
2762 		mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2763 		goto err_cq_table_free;
2764 	}
2765 
2766 	err = mlx4_init_qp_table(dev);
2767 	if (err) {
2768 		mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2769 		goto err_srq_table_free;
2770 	}
2771 
2772 	if (!mlx4_is_slave(dev)) {
2773 		err = mlx4_init_counters_table(dev);
2774 		if (err && err != -ENOENT) {
2775 			mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2776 			goto err_qp_table_free;
2777 		}
2778 	}
2779 
2780 	err = mlx4_allocate_default_counters(dev);
2781 	if (err) {
2782 		mlx4_err(dev, "Failed to allocate default counters, aborting\n");
2783 		goto err_counters_table_free;
2784 	}
2785 
2786 	if (!mlx4_is_slave(dev)) {
2787 		for (port = 1; port <= dev->caps.num_ports; port++) {
2788 			ib_port_default_caps = 0;
2789 			err = mlx4_get_port_ib_caps(dev, port,
2790 						    &ib_port_default_caps);
2791 			if (err)
2792 				mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2793 					  port, err);
2794 			dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2795 
2796 			/* initialize per-slave default ib port capabilities */
2797 			if (mlx4_is_master(dev)) {
2798 				int i;
2799 				for (i = 0; i < dev->num_slaves; i++) {
2800 					if (i == mlx4_master_func_num(dev))
2801 						continue;
2802 					priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2803 						ib_port_default_caps;
2804 				}
2805 			}
2806 
2807 			if (mlx4_is_mfunc(dev))
2808 				dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2809 			else
2810 				dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2811 
2812 			err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2813 					    dev->caps.pkey_table_len[port] : -1);
2814 			if (err) {
2815 				mlx4_err(dev, "Failed to set port %d, aborting\n",
2816 					 port);
2817 				goto err_default_countes_free;
2818 			}
2819 		}
2820 	}
2821 
2822 	return 0;
2823 
2824 err_default_countes_free:
2825 	mlx4_cleanup_default_counters(dev);
2826 
2827 err_counters_table_free:
2828 	if (!mlx4_is_slave(dev))
2829 		mlx4_cleanup_counters_table(dev);
2830 
2831 err_qp_table_free:
2832 	mlx4_cleanup_qp_table(dev);
2833 
2834 err_srq_table_free:
2835 	mlx4_cleanup_srq_table(dev);
2836 
2837 err_cq_table_free:
2838 	mlx4_cleanup_cq_table(dev);
2839 
2840 err_cmd_poll:
2841 	mlx4_cmd_use_polling(dev);
2842 
2843 err_eq_table_free:
2844 	mlx4_cleanup_eq_table(dev);
2845 
2846 err_mcg_table_free:
2847 	if (!mlx4_is_slave(dev))
2848 		mlx4_cleanup_mcg_table(dev);
2849 
2850 err_mr_table_free:
2851 	mlx4_cleanup_mr_table(dev);
2852 
2853 err_xrcd_table_free:
2854 	mlx4_cleanup_xrcd_table(dev);
2855 
2856 err_pd_table_free:
2857 	mlx4_cleanup_pd_table(dev);
2858 
2859 err_kar_unmap:
2860 	iounmap(priv->kar);
2861 
2862 err_uar_free:
2863 	mlx4_uar_free(dev, &priv->driver_uar);
2864 
2865 err_uar_table_free:
2866 	mlx4_cleanup_uar_table(dev);
2867 	return err;
2868 }
2869 
2870 static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
2871 {
2872 	int requested_cpu = 0;
2873 	struct mlx4_priv *priv = mlx4_priv(dev);
2874 	struct mlx4_eq *eq;
2875 	int off = 0;
2876 	int i;
2877 
2878 	if (eqn > dev->caps.num_comp_vectors)
2879 		return -EINVAL;
2880 
2881 	for (i = 1; i < port; i++)
2882 		off += mlx4_get_eqs_per_port(dev, i);
2883 
2884 	requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
2885 
2886 	/* Meaning EQs are shared, and this call comes from the second port */
2887 	if (requested_cpu < 0)
2888 		return 0;
2889 
2890 	eq = &priv->eq_table.eq[eqn];
2891 
2892 	if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
2893 		return -ENOMEM;
2894 
2895 	cpumask_set_cpu(requested_cpu, eq->affinity_mask);
2896 
2897 	return 0;
2898 }
2899 
2900 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2901 {
2902 	struct mlx4_priv *priv = mlx4_priv(dev);
2903 	struct msix_entry *entries;
2904 	int i;
2905 	int port = 0;
2906 
2907 	if (msi_x) {
2908 		int nreq = min3(dev->caps.num_ports *
2909 				(int)num_online_cpus() + 1,
2910 				dev->caps.num_eqs - dev->caps.reserved_eqs,
2911 				MAX_MSIX);
2912 
2913 		if (msi_x > 1)
2914 			nreq = min_t(int, nreq, msi_x);
2915 
2916 		entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
2917 		if (!entries)
2918 			goto no_msi;
2919 
2920 		for (i = 0; i < nreq; ++i)
2921 			entries[i].entry = i;
2922 
2923 		nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2924 					     nreq);
2925 
2926 		if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
2927 			kfree(entries);
2928 			goto no_msi;
2929 		}
2930 		/* 1 is reserved for events (asyncrounous EQ) */
2931 		dev->caps.num_comp_vectors = nreq - 1;
2932 
2933 		priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
2934 		bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
2935 			    dev->caps.num_ports);
2936 
2937 		for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
2938 			if (i == MLX4_EQ_ASYNC)
2939 				continue;
2940 
2941 			priv->eq_table.eq[i].irq =
2942 				entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
2943 
2944 			if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
2945 				bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2946 					    dev->caps.num_ports);
2947 				/* We don't set affinity hint when there
2948 				 * aren't enough EQs
2949 				 */
2950 			} else {
2951 				set_bit(port,
2952 					priv->eq_table.eq[i].actv_ports.ports);
2953 				if (mlx4_init_affinity_hint(dev, port + 1, i))
2954 					mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
2955 						  i);
2956 			}
2957 			/* We divide the Eqs evenly between the two ports.
2958 			 * (dev->caps.num_comp_vectors / dev->caps.num_ports)
2959 			 * refers to the number of Eqs per port
2960 			 * (i.e eqs_per_port). Theoretically, we would like to
2961 			 * write something like (i + 1) % eqs_per_port == 0.
2962 			 * However, since there's an asynchronous Eq, we have
2963 			 * to skip over it by comparing this condition to
2964 			 * !!((i + 1) > MLX4_EQ_ASYNC).
2965 			 */
2966 			if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
2967 			    ((i + 1) %
2968 			     (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
2969 			    !!((i + 1) > MLX4_EQ_ASYNC))
2970 				/* If dev->caps.num_comp_vectors < dev->caps.num_ports,
2971 				 * everything is shared anyway.
2972 				 */
2973 				port++;
2974 		}
2975 
2976 		dev->flags |= MLX4_FLAG_MSI_X;
2977 
2978 		kfree(entries);
2979 		return;
2980 	}
2981 
2982 no_msi:
2983 	dev->caps.num_comp_vectors = 1;
2984 
2985 	BUG_ON(MLX4_EQ_ASYNC >= 2);
2986 	for (i = 0; i < 2; ++i) {
2987 		priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2988 		if (i != MLX4_EQ_ASYNC) {
2989 			bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2990 				    dev->caps.num_ports);
2991 		}
2992 	}
2993 }
2994 
2995 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2996 {
2997 	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
2998 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2999 	int err;
3000 
3001 	err = devlink_port_register(devlink, &info->devlink_port, port);
3002 	if (err)
3003 		return err;
3004 
3005 	info->dev = dev;
3006 	info->port = port;
3007 	if (!mlx4_is_slave(dev)) {
3008 		mlx4_init_mac_table(dev, &info->mac_table);
3009 		mlx4_init_vlan_table(dev, &info->vlan_table);
3010 		mlx4_init_roce_gid_table(dev, &info->gid_table);
3011 		info->base_qpn = mlx4_get_base_qpn(dev, port);
3012 	}
3013 
3014 	sprintf(info->dev_name, "mlx4_port%d", port);
3015 	info->port_attr.attr.name = info->dev_name;
3016 	if (mlx4_is_mfunc(dev)) {
3017 		info->port_attr.attr.mode = 0444;
3018 	} else {
3019 		info->port_attr.attr.mode = 0644;
3020 		info->port_attr.store     = set_port_type;
3021 	}
3022 	info->port_attr.show      = show_port_type;
3023 	sysfs_attr_init(&info->port_attr.attr);
3024 
3025 	err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
3026 	if (err) {
3027 		mlx4_err(dev, "Failed to create file for port %d\n", port);
3028 		devlink_port_unregister(&info->devlink_port);
3029 		info->port = -1;
3030 		return err;
3031 	}
3032 
3033 	sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
3034 	info->port_mtu_attr.attr.name = info->dev_mtu_name;
3035 	if (mlx4_is_mfunc(dev)) {
3036 		info->port_mtu_attr.attr.mode = 0444;
3037 	} else {
3038 		info->port_mtu_attr.attr.mode = 0644;
3039 		info->port_mtu_attr.store     = set_port_ib_mtu;
3040 	}
3041 	info->port_mtu_attr.show      = show_port_ib_mtu;
3042 	sysfs_attr_init(&info->port_mtu_attr.attr);
3043 
3044 	err = device_create_file(&dev->persist->pdev->dev,
3045 				 &info->port_mtu_attr);
3046 	if (err) {
3047 		mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
3048 		device_remove_file(&info->dev->persist->pdev->dev,
3049 				   &info->port_attr);
3050 		devlink_port_unregister(&info->devlink_port);
3051 		info->port = -1;
3052 		return err;
3053 	}
3054 
3055 	return 0;
3056 }
3057 
3058 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
3059 {
3060 	if (info->port < 0)
3061 		return;
3062 
3063 	device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
3064 	device_remove_file(&info->dev->persist->pdev->dev,
3065 			   &info->port_mtu_attr);
3066 	devlink_port_unregister(&info->devlink_port);
3067 
3068 #ifdef CONFIG_RFS_ACCEL
3069 	free_irq_cpu_rmap(info->rmap);
3070 	info->rmap = NULL;
3071 #endif
3072 }
3073 
3074 static int mlx4_init_steering(struct mlx4_dev *dev)
3075 {
3076 	struct mlx4_priv *priv = mlx4_priv(dev);
3077 	int num_entries = dev->caps.num_ports;
3078 	int i, j;
3079 
3080 	priv->steer = kcalloc(num_entries, sizeof(struct mlx4_steer),
3081 			      GFP_KERNEL);
3082 	if (!priv->steer)
3083 		return -ENOMEM;
3084 
3085 	for (i = 0; i < num_entries; i++)
3086 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
3087 			INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
3088 			INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
3089 		}
3090 	return 0;
3091 }
3092 
3093 static void mlx4_clear_steering(struct mlx4_dev *dev)
3094 {
3095 	struct mlx4_priv *priv = mlx4_priv(dev);
3096 	struct mlx4_steer_index *entry, *tmp_entry;
3097 	struct mlx4_promisc_qp *pqp, *tmp_pqp;
3098 	int num_entries = dev->caps.num_ports;
3099 	int i, j;
3100 
3101 	for (i = 0; i < num_entries; i++) {
3102 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
3103 			list_for_each_entry_safe(pqp, tmp_pqp,
3104 						 &priv->steer[i].promisc_qps[j],
3105 						 list) {
3106 				list_del(&pqp->list);
3107 				kfree(pqp);
3108 			}
3109 			list_for_each_entry_safe(entry, tmp_entry,
3110 						 &priv->steer[i].steer_entries[j],
3111 						 list) {
3112 				list_del(&entry->list);
3113 				list_for_each_entry_safe(pqp, tmp_pqp,
3114 							 &entry->duplicates,
3115 							 list) {
3116 					list_del(&pqp->list);
3117 					kfree(pqp);
3118 				}
3119 				kfree(entry);
3120 			}
3121 		}
3122 	}
3123 	kfree(priv->steer);
3124 }
3125 
3126 static int extended_func_num(struct pci_dev *pdev)
3127 {
3128 	return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
3129 }
3130 
3131 #define MLX4_OWNER_BASE	0x8069c
3132 #define MLX4_OWNER_SIZE	4
3133 
3134 static int mlx4_get_ownership(struct mlx4_dev *dev)
3135 {
3136 	void __iomem *owner;
3137 	u32 ret;
3138 
3139 	if (pci_channel_offline(dev->persist->pdev))
3140 		return -EIO;
3141 
3142 	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3143 			MLX4_OWNER_BASE,
3144 			MLX4_OWNER_SIZE);
3145 	if (!owner) {
3146 		mlx4_err(dev, "Failed to obtain ownership bit\n");
3147 		return -ENOMEM;
3148 	}
3149 
3150 	ret = readl(owner);
3151 	iounmap(owner);
3152 	return (int) !!ret;
3153 }
3154 
3155 static void mlx4_free_ownership(struct mlx4_dev *dev)
3156 {
3157 	void __iomem *owner;
3158 
3159 	if (pci_channel_offline(dev->persist->pdev))
3160 		return;
3161 
3162 	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3163 			MLX4_OWNER_BASE,
3164 			MLX4_OWNER_SIZE);
3165 	if (!owner) {
3166 		mlx4_err(dev, "Failed to obtain ownership bit\n");
3167 		return;
3168 	}
3169 	writel(0, owner);
3170 	msleep(1000);
3171 	iounmap(owner);
3172 }
3173 
3174 #define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV)	==\
3175 				  !!((flags) & MLX4_FLAG_MASTER))
3176 
3177 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
3178 			     u8 total_vfs, int existing_vfs, int reset_flow)
3179 {
3180 	u64 dev_flags = dev->flags;
3181 	int err = 0;
3182 	int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
3183 					MLX4_MAX_NUM_VF);
3184 
3185 	if (reset_flow) {
3186 		dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
3187 				       GFP_KERNEL);
3188 		if (!dev->dev_vfs)
3189 			goto free_mem;
3190 		return dev_flags;
3191 	}
3192 
3193 	atomic_inc(&pf_loading);
3194 	if (dev->flags &  MLX4_FLAG_SRIOV) {
3195 		if (existing_vfs != total_vfs) {
3196 			mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
3197 				 existing_vfs, total_vfs);
3198 			total_vfs = existing_vfs;
3199 		}
3200 	}
3201 
3202 	dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), GFP_KERNEL);
3203 	if (NULL == dev->dev_vfs) {
3204 		mlx4_err(dev, "Failed to allocate memory for VFs\n");
3205 		goto disable_sriov;
3206 	}
3207 
3208 	if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
3209 		if (total_vfs > fw_enabled_sriov_vfs) {
3210 			mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
3211 				 total_vfs, fw_enabled_sriov_vfs);
3212 			err = -ENOMEM;
3213 			goto disable_sriov;
3214 		}
3215 		mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
3216 		err = pci_enable_sriov(pdev, total_vfs);
3217 	}
3218 	if (err) {
3219 		mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
3220 			 err);
3221 		goto disable_sriov;
3222 	} else {
3223 		mlx4_warn(dev, "Running in master mode\n");
3224 		dev_flags |= MLX4_FLAG_SRIOV |
3225 			MLX4_FLAG_MASTER;
3226 		dev_flags &= ~MLX4_FLAG_SLAVE;
3227 		dev->persist->num_vfs = total_vfs;
3228 	}
3229 	return dev_flags;
3230 
3231 disable_sriov:
3232 	atomic_dec(&pf_loading);
3233 free_mem:
3234 	dev->persist->num_vfs = 0;
3235 	kfree(dev->dev_vfs);
3236         dev->dev_vfs = NULL;
3237 	return dev_flags & ~MLX4_FLAG_MASTER;
3238 }
3239 
3240 enum {
3241 	MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
3242 };
3243 
3244 static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
3245 			      int *nvfs)
3246 {
3247 	int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
3248 	/* Checking for 64 VFs as a limitation of CX2 */
3249 	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
3250 	    requested_vfs >= 64) {
3251 		mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
3252 			 requested_vfs);
3253 		return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
3254 	}
3255 	return 0;
3256 }
3257 
3258 static int mlx4_pci_enable_device(struct mlx4_dev *dev)
3259 {
3260 	struct pci_dev *pdev = dev->persist->pdev;
3261 	int err = 0;
3262 
3263 	mutex_lock(&dev->persist->pci_status_mutex);
3264 	if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) {
3265 		err = pci_enable_device(pdev);
3266 		if (!err)
3267 			dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED;
3268 	}
3269 	mutex_unlock(&dev->persist->pci_status_mutex);
3270 
3271 	return err;
3272 }
3273 
3274 static void mlx4_pci_disable_device(struct mlx4_dev *dev)
3275 {
3276 	struct pci_dev *pdev = dev->persist->pdev;
3277 
3278 	mutex_lock(&dev->persist->pci_status_mutex);
3279 	if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) {
3280 		pci_disable_device(pdev);
3281 		dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED;
3282 	}
3283 	mutex_unlock(&dev->persist->pci_status_mutex);
3284 }
3285 
3286 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
3287 			 int total_vfs, int *nvfs, struct mlx4_priv *priv,
3288 			 int reset_flow)
3289 {
3290 	struct mlx4_dev *dev;
3291 	unsigned sum = 0;
3292 	int err;
3293 	int port;
3294 	int i;
3295 	struct mlx4_dev_cap *dev_cap = NULL;
3296 	int existing_vfs = 0;
3297 
3298 	dev = &priv->dev;
3299 
3300 	INIT_LIST_HEAD(&priv->ctx_list);
3301 	spin_lock_init(&priv->ctx_lock);
3302 
3303 	mutex_init(&priv->port_mutex);
3304 	mutex_init(&priv->bond_mutex);
3305 
3306 	INIT_LIST_HEAD(&priv->pgdir_list);
3307 	mutex_init(&priv->pgdir_mutex);
3308 	spin_lock_init(&priv->cmd.context_lock);
3309 
3310 	INIT_LIST_HEAD(&priv->bf_list);
3311 	mutex_init(&priv->bf_mutex);
3312 
3313 	dev->rev_id = pdev->revision;
3314 	dev->numa_node = dev_to_node(&pdev->dev);
3315 
3316 	/* Detect if this device is a virtual function */
3317 	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3318 		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3319 		dev->flags |= MLX4_FLAG_SLAVE;
3320 	} else {
3321 		/* We reset the device and enable SRIOV only for physical
3322 		 * devices.  Try to claim ownership on the device;
3323 		 * if already taken, skip -- do not allow multiple PFs */
3324 		err = mlx4_get_ownership(dev);
3325 		if (err) {
3326 			if (err < 0)
3327 				return err;
3328 			else {
3329 				mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
3330 				return -EINVAL;
3331 			}
3332 		}
3333 
3334 		atomic_set(&priv->opreq_count, 0);
3335 		INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3336 
3337 		/*
3338 		 * Now reset the HCA before we touch the PCI capabilities or
3339 		 * attempt a firmware command, since a boot ROM may have left
3340 		 * the HCA in an undefined state.
3341 		 */
3342 		err = mlx4_reset(dev);
3343 		if (err) {
3344 			mlx4_err(dev, "Failed to reset HCA, aborting\n");
3345 			goto err_sriov;
3346 		}
3347 
3348 		if (total_vfs) {
3349 			dev->flags = MLX4_FLAG_MASTER;
3350 			existing_vfs = pci_num_vf(pdev);
3351 			if (existing_vfs)
3352 				dev->flags |= MLX4_FLAG_SRIOV;
3353 			dev->persist->num_vfs = total_vfs;
3354 		}
3355 	}
3356 
3357 	/* on load remove any previous indication of internal error,
3358 	 * device is up.
3359 	 */
3360 	dev->persist->state = MLX4_DEVICE_STATE_UP;
3361 
3362 slave_start:
3363 	err = mlx4_cmd_init(dev);
3364 	if (err) {
3365 		mlx4_err(dev, "Failed to init command interface, aborting\n");
3366 		goto err_sriov;
3367 	}
3368 
3369 	/* In slave functions, the communication channel must be initialized
3370 	 * before posting commands. Also, init num_slaves before calling
3371 	 * mlx4_init_hca */
3372 	if (mlx4_is_mfunc(dev)) {
3373 		if (mlx4_is_master(dev)) {
3374 			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3375 
3376 		} else {
3377 			dev->num_slaves = 0;
3378 			err = mlx4_multi_func_init(dev);
3379 			if (err) {
3380 				mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
3381 				goto err_cmd;
3382 			}
3383 		}
3384 	}
3385 
3386 	err = mlx4_init_fw(dev);
3387 	if (err) {
3388 		mlx4_err(dev, "Failed to init fw, aborting.\n");
3389 		goto err_mfunc;
3390 	}
3391 
3392 	if (mlx4_is_master(dev)) {
3393 		/* when we hit the goto slave_start below, dev_cap already initialized */
3394 		if (!dev_cap) {
3395 			dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
3396 
3397 			if (!dev_cap) {
3398 				err = -ENOMEM;
3399 				goto err_fw;
3400 			}
3401 
3402 			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3403 			if (err) {
3404 				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3405 				goto err_fw;
3406 			}
3407 
3408 			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3409 				goto err_fw;
3410 
3411 			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3412 				u64 dev_flags = mlx4_enable_sriov(dev, pdev,
3413 								  total_vfs,
3414 								  existing_vfs,
3415 								  reset_flow);
3416 
3417 				mlx4_close_fw(dev);
3418 				mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3419 				dev->flags = dev_flags;
3420 				if (!SRIOV_VALID_STATE(dev->flags)) {
3421 					mlx4_err(dev, "Invalid SRIOV state\n");
3422 					goto err_sriov;
3423 				}
3424 				err = mlx4_reset(dev);
3425 				if (err) {
3426 					mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3427 					goto err_sriov;
3428 				}
3429 				goto slave_start;
3430 			}
3431 		} else {
3432 			/* Legacy mode FW requires SRIOV to be enabled before
3433 			 * doing QUERY_DEV_CAP, since max_eq's value is different if
3434 			 * SRIOV is enabled.
3435 			 */
3436 			memset(dev_cap, 0, sizeof(*dev_cap));
3437 			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3438 			if (err) {
3439 				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3440 				goto err_fw;
3441 			}
3442 
3443 			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3444 				goto err_fw;
3445 		}
3446 	}
3447 
3448 	err = mlx4_init_hca(dev);
3449 	if (err) {
3450 		if (err == -EACCES) {
3451 			/* Not primary Physical function
3452 			 * Running in slave mode */
3453 			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3454 			/* We're not a PF */
3455 			if (dev->flags & MLX4_FLAG_SRIOV) {
3456 				if (!existing_vfs)
3457 					pci_disable_sriov(pdev);
3458 				if (mlx4_is_master(dev) && !reset_flow)
3459 					atomic_dec(&pf_loading);
3460 				dev->flags &= ~MLX4_FLAG_SRIOV;
3461 			}
3462 			if (!mlx4_is_slave(dev))
3463 				mlx4_free_ownership(dev);
3464 			dev->flags |= MLX4_FLAG_SLAVE;
3465 			dev->flags &= ~MLX4_FLAG_MASTER;
3466 			goto slave_start;
3467 		} else
3468 			goto err_fw;
3469 	}
3470 
3471 	if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3472 		u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
3473 						  existing_vfs, reset_flow);
3474 
3475 		if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
3476 			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
3477 			dev->flags = dev_flags;
3478 			err = mlx4_cmd_init(dev);
3479 			if (err) {
3480 				/* Only VHCR is cleaned up, so could still
3481 				 * send FW commands
3482 				 */
3483 				mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
3484 				goto err_close;
3485 			}
3486 		} else {
3487 			dev->flags = dev_flags;
3488 		}
3489 
3490 		if (!SRIOV_VALID_STATE(dev->flags)) {
3491 			mlx4_err(dev, "Invalid SRIOV state\n");
3492 			goto err_close;
3493 		}
3494 	}
3495 
3496 	/* check if the device is functioning at its maximum possible speed.
3497 	 * No return code for this call, just warn the user in case of PCI
3498 	 * express device capabilities are under-satisfied by the bus.
3499 	 */
3500 	if (!mlx4_is_slave(dev))
3501 		pcie_print_link_status(dev->persist->pdev);
3502 
3503 	/* In master functions, the communication channel must be initialized
3504 	 * after obtaining its address from fw */
3505 	if (mlx4_is_master(dev)) {
3506 		if (dev->caps.num_ports < 2 &&
3507 		    num_vfs_argc > 1) {
3508 			err = -EINVAL;
3509 			mlx4_err(dev,
3510 				 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
3511 				 dev->caps.num_ports);
3512 			goto err_close;
3513 		}
3514 		memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
3515 
3516 		for (i = 0;
3517 		     i < sizeof(dev->persist->nvfs)/
3518 		     sizeof(dev->persist->nvfs[0]); i++) {
3519 			unsigned j;
3520 
3521 			for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
3522 				dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
3523 				dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
3524 					dev->caps.num_ports;
3525 			}
3526 		}
3527 
3528 		/* In master functions, the communication channel
3529 		 * must be initialized after obtaining its address from fw
3530 		 */
3531 		err = mlx4_multi_func_init(dev);
3532 		if (err) {
3533 			mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
3534 			goto err_close;
3535 		}
3536 	}
3537 
3538 	err = mlx4_alloc_eq_table(dev);
3539 	if (err)
3540 		goto err_master_mfunc;
3541 
3542 	bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
3543 	mutex_init(&priv->msix_ctl.pool_lock);
3544 
3545 	mlx4_enable_msi_x(dev);
3546 	if ((mlx4_is_mfunc(dev)) &&
3547 	    !(dev->flags & MLX4_FLAG_MSI_X)) {
3548 		err = -EOPNOTSUPP;
3549 		mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
3550 		goto err_free_eq;
3551 	}
3552 
3553 	if (!mlx4_is_slave(dev)) {
3554 		err = mlx4_init_steering(dev);
3555 		if (err)
3556 			goto err_disable_msix;
3557 	}
3558 
3559 	mlx4_init_quotas(dev);
3560 
3561 	err = mlx4_setup_hca(dev);
3562 	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3563 	    !mlx4_is_mfunc(dev)) {
3564 		dev->flags &= ~MLX4_FLAG_MSI_X;
3565 		dev->caps.num_comp_vectors = 1;
3566 		pci_disable_msix(pdev);
3567 		err = mlx4_setup_hca(dev);
3568 	}
3569 
3570 	if (err)
3571 		goto err_steer;
3572 
3573 	/* When PF resources are ready arm its comm channel to enable
3574 	 * getting commands
3575 	 */
3576 	if (mlx4_is_master(dev)) {
3577 		err = mlx4_ARM_COMM_CHANNEL(dev);
3578 		if (err) {
3579 			mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
3580 				 err);
3581 			goto err_steer;
3582 		}
3583 	}
3584 
3585 	for (port = 1; port <= dev->caps.num_ports; port++) {
3586 		err = mlx4_init_port_info(dev, port);
3587 		if (err)
3588 			goto err_port;
3589 	}
3590 
3591 	priv->v2p.port1 = 1;
3592 	priv->v2p.port2 = 2;
3593 
3594 	err = mlx4_register_device(dev);
3595 	if (err)
3596 		goto err_port;
3597 
3598 	mlx4_request_modules(dev);
3599 
3600 	mlx4_sense_init(dev);
3601 	mlx4_start_sense(dev);
3602 
3603 	priv->removed = 0;
3604 
3605 	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3606 		atomic_dec(&pf_loading);
3607 
3608 	kfree(dev_cap);
3609 	return 0;
3610 
3611 err_port:
3612 	for (--port; port >= 1; --port)
3613 		mlx4_cleanup_port_info(&priv->port[port]);
3614 
3615 	mlx4_cleanup_default_counters(dev);
3616 	if (!mlx4_is_slave(dev))
3617 		mlx4_cleanup_counters_table(dev);
3618 	mlx4_cleanup_qp_table(dev);
3619 	mlx4_cleanup_srq_table(dev);
3620 	mlx4_cleanup_cq_table(dev);
3621 	mlx4_cmd_use_polling(dev);
3622 	mlx4_cleanup_eq_table(dev);
3623 	mlx4_cleanup_mcg_table(dev);
3624 	mlx4_cleanup_mr_table(dev);
3625 	mlx4_cleanup_xrcd_table(dev);
3626 	mlx4_cleanup_pd_table(dev);
3627 	mlx4_cleanup_uar_table(dev);
3628 
3629 err_steer:
3630 	if (!mlx4_is_slave(dev))
3631 		mlx4_clear_steering(dev);
3632 
3633 err_disable_msix:
3634 	if (dev->flags & MLX4_FLAG_MSI_X)
3635 		pci_disable_msix(pdev);
3636 
3637 err_free_eq:
3638 	mlx4_free_eq_table(dev);
3639 
3640 err_master_mfunc:
3641 	if (mlx4_is_master(dev)) {
3642 		mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3643 		mlx4_multi_func_cleanup(dev);
3644 	}
3645 
3646 	if (mlx4_is_slave(dev))
3647 		mlx4_slave_destroy_special_qp_cap(dev);
3648 
3649 err_close:
3650 	mlx4_close_hca(dev);
3651 
3652 err_fw:
3653 	mlx4_close_fw(dev);
3654 
3655 err_mfunc:
3656 	if (mlx4_is_slave(dev))
3657 		mlx4_multi_func_cleanup(dev);
3658 
3659 err_cmd:
3660 	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3661 
3662 err_sriov:
3663 	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3664 		pci_disable_sriov(pdev);
3665 		dev->flags &= ~MLX4_FLAG_SRIOV;
3666 	}
3667 
3668 	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3669 		atomic_dec(&pf_loading);
3670 
3671 	kfree(priv->dev.dev_vfs);
3672 
3673 	if (!mlx4_is_slave(dev))
3674 		mlx4_free_ownership(dev);
3675 
3676 	kfree(dev_cap);
3677 	return err;
3678 }
3679 
3680 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
3681 			   struct mlx4_priv *priv)
3682 {
3683 	int err;
3684 	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3685 	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3686 	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
3687 		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
3688 	unsigned total_vfs = 0;
3689 	unsigned int i;
3690 
3691 	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3692 
3693 	err = mlx4_pci_enable_device(&priv->dev);
3694 	if (err) {
3695 		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
3696 		return err;
3697 	}
3698 
3699 	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
3700 	 * per port, we must limit the number of VFs to 63 (since their are
3701 	 * 128 MACs)
3702 	 */
3703 	for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc;
3704 	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
3705 		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
3706 		if (nvfs[i] < 0) {
3707 			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3708 			err = -EINVAL;
3709 			goto err_disable_pdev;
3710 		}
3711 	}
3712 	for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc;
3713 	     i++) {
3714 		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
3715 		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
3716 			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
3717 			err = -EINVAL;
3718 			goto err_disable_pdev;
3719 		}
3720 	}
3721 	if (total_vfs > MLX4_MAX_NUM_VF) {
3722 		dev_err(&pdev->dev,
3723 			"Requested more VF's (%d) than allowed by hw (%d)\n",
3724 			total_vfs, MLX4_MAX_NUM_VF);
3725 		err = -EINVAL;
3726 		goto err_disable_pdev;
3727 	}
3728 
3729 	for (i = 0; i < MLX4_MAX_PORTS; i++) {
3730 		if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
3731 			dev_err(&pdev->dev,
3732 				"Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
3733 				nvfs[i] + nvfs[2], i + 1,
3734 				MLX4_MAX_NUM_VF_P_PORT);
3735 			err = -EINVAL;
3736 			goto err_disable_pdev;
3737 		}
3738 	}
3739 
3740 	/* Check for BARs. */
3741 	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3742 	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3743 		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3744 			pci_dev_data, pci_resource_flags(pdev, 0));
3745 		err = -ENODEV;
3746 		goto err_disable_pdev;
3747 	}
3748 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3749 		dev_err(&pdev->dev, "Missing UAR, aborting\n");
3750 		err = -ENODEV;
3751 		goto err_disable_pdev;
3752 	}
3753 
3754 	err = pci_request_regions(pdev, DRV_NAME);
3755 	if (err) {
3756 		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3757 		goto err_disable_pdev;
3758 	}
3759 
3760 	pci_set_master(pdev);
3761 
3762 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3763 	if (err) {
3764 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3765 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3766 		if (err) {
3767 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3768 			goto err_release_regions;
3769 		}
3770 	}
3771 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3772 	if (err) {
3773 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3774 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3775 		if (err) {
3776 			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3777 			goto err_release_regions;
3778 		}
3779 	}
3780 
3781 	/* Allow large DMA segments, up to the firmware limit of 1 GB */
3782 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3783 	/* Detect if this device is a virtual function */
3784 	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3785 		/* When acting as pf, we normally skip vfs unless explicitly
3786 		 * requested to probe them.
3787 		 */
3788 		if (total_vfs) {
3789 			unsigned vfs_offset = 0;
3790 
3791 			for (i = 0; i < ARRAY_SIZE(nvfs) &&
3792 			     vfs_offset + nvfs[i] < extended_func_num(pdev);
3793 			     vfs_offset += nvfs[i], i++)
3794 				;
3795 			if (i == ARRAY_SIZE(nvfs)) {
3796 				err = -ENODEV;
3797 				goto err_release_regions;
3798 			}
3799 			if ((extended_func_num(pdev) - vfs_offset)
3800 			    > prb_vf[i]) {
3801 				dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3802 					 extended_func_num(pdev));
3803 				err = -ENODEV;
3804 				goto err_release_regions;
3805 			}
3806 		}
3807 	}
3808 
3809 	err = mlx4_catas_init(&priv->dev);
3810 	if (err)
3811 		goto err_release_regions;
3812 
3813 	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3814 	if (err)
3815 		goto err_catas;
3816 
3817 	return 0;
3818 
3819 err_catas:
3820 	mlx4_catas_end(&priv->dev);
3821 
3822 err_release_regions:
3823 	pci_release_regions(pdev);
3824 
3825 err_disable_pdev:
3826 	mlx4_pci_disable_device(&priv->dev);
3827 	return err;
3828 }
3829 
3830 static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
3831 				      enum devlink_port_type port_type)
3832 {
3833 	struct mlx4_port_info *info = container_of(devlink_port,
3834 						   struct mlx4_port_info,
3835 						   devlink_port);
3836 	enum mlx4_port_type mlx4_port_type;
3837 
3838 	switch (port_type) {
3839 	case DEVLINK_PORT_TYPE_AUTO:
3840 		mlx4_port_type = MLX4_PORT_TYPE_AUTO;
3841 		break;
3842 	case DEVLINK_PORT_TYPE_ETH:
3843 		mlx4_port_type = MLX4_PORT_TYPE_ETH;
3844 		break;
3845 	case DEVLINK_PORT_TYPE_IB:
3846 		mlx4_port_type = MLX4_PORT_TYPE_IB;
3847 		break;
3848 	default:
3849 		return -EOPNOTSUPP;
3850 	}
3851 
3852 	return __set_port_type(info, mlx4_port_type);
3853 }
3854 
3855 static const struct devlink_ops mlx4_devlink_ops = {
3856 	.port_type_set	= mlx4_devlink_port_type_set,
3857 };
3858 
3859 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3860 {
3861 	struct devlink *devlink;
3862 	struct mlx4_priv *priv;
3863 	struct mlx4_dev *dev;
3864 	int ret;
3865 
3866 	printk_once(KERN_INFO "%s", mlx4_version);
3867 
3868 	devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
3869 	if (!devlink)
3870 		return -ENOMEM;
3871 	priv = devlink_priv(devlink);
3872 
3873 	dev       = &priv->dev;
3874 	dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3875 	if (!dev->persist) {
3876 		ret = -ENOMEM;
3877 		goto err_devlink_free;
3878 	}
3879 	dev->persist->pdev = pdev;
3880 	dev->persist->dev = dev;
3881 	pci_set_drvdata(pdev, dev->persist);
3882 	priv->pci_dev_data = id->driver_data;
3883 	mutex_init(&dev->persist->device_state_mutex);
3884 	mutex_init(&dev->persist->interface_state_mutex);
3885 	mutex_init(&dev->persist->pci_status_mutex);
3886 
3887 	ret = devlink_register(devlink, &pdev->dev);
3888 	if (ret)
3889 		goto err_persist_free;
3890 	ret = devlink_params_register(devlink, mlx4_devlink_params,
3891 				      ARRAY_SIZE(mlx4_devlink_params));
3892 	if (ret)
3893 		goto err_devlink_unregister;
3894 	mlx4_devlink_set_params_init_values(devlink);
3895 	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3896 	if (ret)
3897 		goto err_params_unregister;
3898 
3899 	pci_save_state(pdev);
3900 	return 0;
3901 
3902 err_params_unregister:
3903 	devlink_params_unregister(devlink, mlx4_devlink_params,
3904 				  ARRAY_SIZE(mlx4_devlink_params));
3905 err_devlink_unregister:
3906 	devlink_unregister(devlink);
3907 err_persist_free:
3908 	kfree(dev->persist);
3909 err_devlink_free:
3910 	devlink_free(devlink);
3911 	return ret;
3912 }
3913 
3914 static void mlx4_clean_dev(struct mlx4_dev *dev)
3915 {
3916 	struct mlx4_dev_persistent *persist = dev->persist;
3917 	struct mlx4_priv *priv = mlx4_priv(dev);
3918 	unsigned long	flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
3919 
3920 	memset(priv, 0, sizeof(*priv));
3921 	priv->dev.persist = persist;
3922 	priv->dev.flags = flags;
3923 }
3924 
3925 static void mlx4_unload_one(struct pci_dev *pdev)
3926 {
3927 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3928 	struct mlx4_dev  *dev  = persist->dev;
3929 	struct mlx4_priv *priv = mlx4_priv(dev);
3930 	int               pci_dev_data;
3931 	int p, i;
3932 
3933 	if (priv->removed)
3934 		return;
3935 
3936 	/* saving current ports type for further use */
3937 	for (i = 0; i < dev->caps.num_ports; i++) {
3938 		dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
3939 		dev->persist->curr_port_poss_type[i] = dev->caps.
3940 						       possible_type[i + 1];
3941 	}
3942 
3943 	pci_dev_data = priv->pci_dev_data;
3944 
3945 	mlx4_stop_sense(dev);
3946 	mlx4_unregister_device(dev);
3947 
3948 	for (p = 1; p <= dev->caps.num_ports; p++) {
3949 		mlx4_cleanup_port_info(&priv->port[p]);
3950 		mlx4_CLOSE_PORT(dev, p);
3951 	}
3952 
3953 	if (mlx4_is_master(dev))
3954 		mlx4_free_resource_tracker(dev,
3955 					   RES_TR_FREE_SLAVES_ONLY);
3956 
3957 	mlx4_cleanup_default_counters(dev);
3958 	if (!mlx4_is_slave(dev))
3959 		mlx4_cleanup_counters_table(dev);
3960 	mlx4_cleanup_qp_table(dev);
3961 	mlx4_cleanup_srq_table(dev);
3962 	mlx4_cleanup_cq_table(dev);
3963 	mlx4_cmd_use_polling(dev);
3964 	mlx4_cleanup_eq_table(dev);
3965 	mlx4_cleanup_mcg_table(dev);
3966 	mlx4_cleanup_mr_table(dev);
3967 	mlx4_cleanup_xrcd_table(dev);
3968 	mlx4_cleanup_pd_table(dev);
3969 
3970 	if (mlx4_is_master(dev))
3971 		mlx4_free_resource_tracker(dev,
3972 					   RES_TR_FREE_STRUCTS_ONLY);
3973 
3974 	iounmap(priv->kar);
3975 	mlx4_uar_free(dev, &priv->driver_uar);
3976 	mlx4_cleanup_uar_table(dev);
3977 	if (!mlx4_is_slave(dev))
3978 		mlx4_clear_steering(dev);
3979 	mlx4_free_eq_table(dev);
3980 	if (mlx4_is_master(dev))
3981 		mlx4_multi_func_cleanup(dev);
3982 	mlx4_close_hca(dev);
3983 	mlx4_close_fw(dev);
3984 	if (mlx4_is_slave(dev))
3985 		mlx4_multi_func_cleanup(dev);
3986 	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3987 
3988 	if (dev->flags & MLX4_FLAG_MSI_X)
3989 		pci_disable_msix(pdev);
3990 
3991 	if (!mlx4_is_slave(dev))
3992 		mlx4_free_ownership(dev);
3993 
3994 	mlx4_slave_destroy_special_qp_cap(dev);
3995 	kfree(dev->dev_vfs);
3996 
3997 	mlx4_clean_dev(dev);
3998 	priv->pci_dev_data = pci_dev_data;
3999 	priv->removed = 1;
4000 }
4001 
4002 static void mlx4_remove_one(struct pci_dev *pdev)
4003 {
4004 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4005 	struct mlx4_dev  *dev  = persist->dev;
4006 	struct mlx4_priv *priv = mlx4_priv(dev);
4007 	struct devlink *devlink = priv_to_devlink(priv);
4008 	int active_vfs = 0;
4009 
4010 	if (mlx4_is_slave(dev))
4011 		persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
4012 
4013 	mutex_lock(&persist->interface_state_mutex);
4014 	persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
4015 	mutex_unlock(&persist->interface_state_mutex);
4016 
4017 	/* Disabling SR-IOV is not allowed while there are active vf's */
4018 	if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
4019 		active_vfs = mlx4_how_many_lives_vf(dev);
4020 		if (active_vfs) {
4021 			pr_warn("Removing PF when there are active VF's !!\n");
4022 			pr_warn("Will not disable SR-IOV.\n");
4023 		}
4024 	}
4025 
4026 	/* device marked to be under deletion running now without the lock
4027 	 * letting other tasks to be terminated
4028 	 */
4029 	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4030 		mlx4_unload_one(pdev);
4031 	else
4032 		mlx4_info(dev, "%s: interface is down\n", __func__);
4033 	mlx4_catas_end(dev);
4034 	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
4035 		mlx4_warn(dev, "Disabling SR-IOV\n");
4036 		pci_disable_sriov(pdev);
4037 	}
4038 
4039 	pci_release_regions(pdev);
4040 	mlx4_pci_disable_device(dev);
4041 	devlink_params_unregister(devlink, mlx4_devlink_params,
4042 				  ARRAY_SIZE(mlx4_devlink_params));
4043 	devlink_unregister(devlink);
4044 	kfree(dev->persist);
4045 	devlink_free(devlink);
4046 }
4047 
4048 static int restore_current_port_types(struct mlx4_dev *dev,
4049 				      enum mlx4_port_type *types,
4050 				      enum mlx4_port_type *poss_types)
4051 {
4052 	struct mlx4_priv *priv = mlx4_priv(dev);
4053 	int err, i;
4054 
4055 	mlx4_stop_sense(dev);
4056 
4057 	mutex_lock(&priv->port_mutex);
4058 	for (i = 0; i < dev->caps.num_ports; i++)
4059 		dev->caps.possible_type[i + 1] = poss_types[i];
4060 	err = mlx4_change_port_types(dev, types);
4061 	mlx4_start_sense(dev);
4062 	mutex_unlock(&priv->port_mutex);
4063 
4064 	return err;
4065 }
4066 
4067 int mlx4_restart_one(struct pci_dev *pdev)
4068 {
4069 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4070 	struct mlx4_dev	 *dev  = persist->dev;
4071 	struct mlx4_priv *priv = mlx4_priv(dev);
4072 	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4073 	int pci_dev_data, err, total_vfs;
4074 
4075 	pci_dev_data = priv->pci_dev_data;
4076 	total_vfs = dev->persist->num_vfs;
4077 	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4078 
4079 	mlx4_unload_one(pdev);
4080 	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
4081 	if (err) {
4082 		mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
4083 			 __func__, pci_name(pdev), err);
4084 		return err;
4085 	}
4086 
4087 	err = restore_current_port_types(dev, dev->persist->curr_port_type,
4088 					 dev->persist->curr_port_poss_type);
4089 	if (err)
4090 		mlx4_err(dev, "could not restore original port types (%d)\n",
4091 			 err);
4092 
4093 	return err;
4094 }
4095 
4096 #define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT }
4097 #define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF }
4098 #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 }
4099 
4100 static const struct pci_device_id mlx4_pci_table[] = {
4101 #ifdef CONFIG_MLX4_CORE_GEN2
4102 	/* MT25408 "Hermon" */
4103 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR),	/* SDR */
4104 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR),	/* DDR */
4105 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR),	/* QDR */
4106 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2), /* DDR Gen2 */
4107 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2),	/* QDR Gen2 */
4108 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN),	/* EN 10GigE */
4109 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2),  /* EN 10GigE Gen2 */
4110 	/* MT25458 ConnectX EN 10GBASE-T */
4111 	MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN),
4112 	MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2),	/* Gen2 */
4113 	/* MT26468 ConnectX EN 10GigE PCIe Gen2*/
4114 	MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2),
4115 	/* MT26438 ConnectX EN 40GigE PCIe Gen2 5GT/s */
4116 	MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2),
4117 	/* MT26478 ConnectX2 40GigE PCIe Gen2 */
4118 	MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2),
4119 	/* MT25400 Family [ConnectX-2] */
4120 	MLX_VF(0x1002),					/* Virtual Function */
4121 #endif /* CONFIG_MLX4_CORE_GEN2 */
4122 	/* MT27500 Family [ConnectX-3] */
4123 	MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3),
4124 	MLX_VF(0x1004),					/* Virtual Function */
4125 	MLX_GN(0x1005),					/* MT27510 Family */
4126 	MLX_GN(0x1006),					/* MT27511 Family */
4127 	MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO),	/* MT27520 Family */
4128 	MLX_GN(0x1008),					/* MT27521 Family */
4129 	MLX_GN(0x1009),					/* MT27530 Family */
4130 	MLX_GN(0x100a),					/* MT27531 Family */
4131 	MLX_GN(0x100b),					/* MT27540 Family */
4132 	MLX_GN(0x100c),					/* MT27541 Family */
4133 	MLX_GN(0x100d),					/* MT27550 Family */
4134 	MLX_GN(0x100e),					/* MT27551 Family */
4135 	MLX_GN(0x100f),					/* MT27560 Family */
4136 	MLX_GN(0x1010),					/* MT27561 Family */
4137 
4138 	/*
4139 	 * See the mellanox_check_broken_intx_masking() quirk when
4140 	 * adding devices
4141 	 */
4142 
4143 	{ 0, }
4144 };
4145 
4146 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
4147 
4148 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
4149 					      pci_channel_state_t state)
4150 {
4151 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4152 
4153 	mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
4154 	mlx4_enter_error_state(persist);
4155 
4156 	mutex_lock(&persist->interface_state_mutex);
4157 	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4158 		mlx4_unload_one(pdev);
4159 
4160 	mutex_unlock(&persist->interface_state_mutex);
4161 	if (state == pci_channel_io_perm_failure)
4162 		return PCI_ERS_RESULT_DISCONNECT;
4163 
4164 	mlx4_pci_disable_device(persist->dev);
4165 	return PCI_ERS_RESULT_NEED_RESET;
4166 }
4167 
4168 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
4169 {
4170 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4171 	struct mlx4_dev	 *dev  = persist->dev;
4172 	int err;
4173 
4174 	mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
4175 	err = mlx4_pci_enable_device(dev);
4176 	if (err) {
4177 		mlx4_err(dev, "Can not re-enable device, err=%d\n", err);
4178 		return PCI_ERS_RESULT_DISCONNECT;
4179 	}
4180 
4181 	pci_set_master(pdev);
4182 	pci_restore_state(pdev);
4183 	pci_save_state(pdev);
4184 	return PCI_ERS_RESULT_RECOVERED;
4185 }
4186 
4187 static void mlx4_pci_resume(struct pci_dev *pdev)
4188 {
4189 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4190 	struct mlx4_dev	 *dev  = persist->dev;
4191 	struct mlx4_priv *priv = mlx4_priv(dev);
4192 	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4193 	int total_vfs;
4194 	int err;
4195 
4196 	mlx4_err(dev, "%s was called\n", __func__);
4197 	total_vfs = dev->persist->num_vfs;
4198 	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4199 
4200 	mutex_lock(&persist->interface_state_mutex);
4201 	if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
4202 		err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
4203 				    priv, 1);
4204 		if (err) {
4205 			mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n",
4206 				 __func__,  err);
4207 			goto end;
4208 		}
4209 
4210 		err = restore_current_port_types(dev, dev->persist->
4211 						 curr_port_type, dev->persist->
4212 						 curr_port_poss_type);
4213 		if (err)
4214 			mlx4_err(dev, "could not restore original port types (%d)\n", err);
4215 	}
4216 end:
4217 	mutex_unlock(&persist->interface_state_mutex);
4218 
4219 }
4220 
4221 static void mlx4_shutdown(struct pci_dev *pdev)
4222 {
4223 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4224 
4225 	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
4226 	mutex_lock(&persist->interface_state_mutex);
4227 	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4228 		mlx4_unload_one(pdev);
4229 	mutex_unlock(&persist->interface_state_mutex);
4230 }
4231 
4232 static const struct pci_error_handlers mlx4_err_handler = {
4233 	.error_detected = mlx4_pci_err_detected,
4234 	.slot_reset     = mlx4_pci_slot_reset,
4235 	.resume		= mlx4_pci_resume,
4236 };
4237 
4238 static int mlx4_suspend(struct pci_dev *pdev, pm_message_t state)
4239 {
4240 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4241 	struct mlx4_dev	*dev = persist->dev;
4242 
4243 	mlx4_err(dev, "suspend was called\n");
4244 	mutex_lock(&persist->interface_state_mutex);
4245 	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4246 		mlx4_unload_one(pdev);
4247 	mutex_unlock(&persist->interface_state_mutex);
4248 
4249 	return 0;
4250 }
4251 
4252 static int mlx4_resume(struct pci_dev *pdev)
4253 {
4254 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4255 	struct mlx4_dev	*dev = persist->dev;
4256 	struct mlx4_priv *priv = mlx4_priv(dev);
4257 	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4258 	int total_vfs;
4259 	int ret = 0;
4260 
4261 	mlx4_err(dev, "resume was called\n");
4262 	total_vfs = dev->persist->num_vfs;
4263 	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4264 
4265 	mutex_lock(&persist->interface_state_mutex);
4266 	if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
4267 		ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs,
4268 				    nvfs, priv, 1);
4269 		if (!ret) {
4270 			ret = restore_current_port_types(dev,
4271 					dev->persist->curr_port_type,
4272 					dev->persist->curr_port_poss_type);
4273 			if (ret)
4274 				mlx4_err(dev, "resume: could not restore original port types (%d)\n", ret);
4275 		}
4276 	}
4277 	mutex_unlock(&persist->interface_state_mutex);
4278 
4279 	return ret;
4280 }
4281 
4282 static struct pci_driver mlx4_driver = {
4283 	.name		= DRV_NAME,
4284 	.id_table	= mlx4_pci_table,
4285 	.probe		= mlx4_init_one,
4286 	.shutdown	= mlx4_shutdown,
4287 	.remove		= mlx4_remove_one,
4288 	.suspend	= mlx4_suspend,
4289 	.resume		= mlx4_resume,
4290 	.err_handler    = &mlx4_err_handler,
4291 };
4292 
4293 static int __init mlx4_verify_params(void)
4294 {
4295 	if (msi_x < 0) {
4296 		pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
4297 		return -1;
4298 	}
4299 
4300 	if ((log_num_mac < 0) || (log_num_mac > 7)) {
4301 		pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
4302 		return -1;
4303 	}
4304 
4305 	if (log_num_vlan != 0)
4306 		pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
4307 			MLX4_LOG_NUM_VLANS);
4308 
4309 	if (use_prio != 0)
4310 		pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
4311 
4312 	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
4313 		pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
4314 			log_mtts_per_seg);
4315 		return -1;
4316 	}
4317 
4318 	/* Check if module param for ports type has legal combination */
4319 	if (port_type_array[0] == false && port_type_array[1] == true) {
4320 		pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
4321 		port_type_array[0] = true;
4322 	}
4323 
4324 	if (mlx4_log_num_mgm_entry_size < -7 ||
4325 	    (mlx4_log_num_mgm_entry_size > 0 &&
4326 	     (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
4327 	      mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
4328 		pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
4329 			mlx4_log_num_mgm_entry_size,
4330 			MLX4_MIN_MGM_LOG_ENTRY_SIZE,
4331 			MLX4_MAX_MGM_LOG_ENTRY_SIZE);
4332 		return -1;
4333 	}
4334 
4335 	return 0;
4336 }
4337 
4338 static int __init mlx4_init(void)
4339 {
4340 	int ret;
4341 
4342 	if (mlx4_verify_params())
4343 		return -EINVAL;
4344 
4345 
4346 	mlx4_wq = create_singlethread_workqueue("mlx4");
4347 	if (!mlx4_wq)
4348 		return -ENOMEM;
4349 
4350 	ret = pci_register_driver(&mlx4_driver);
4351 	if (ret < 0)
4352 		destroy_workqueue(mlx4_wq);
4353 	return ret < 0 ? ret : 0;
4354 }
4355 
4356 static void __exit mlx4_cleanup(void)
4357 {
4358 	pci_unregister_driver(&mlx4_driver);
4359 	destroy_workqueue(mlx4_wq);
4360 }
4361 
4362 module_init(mlx4_init);
4363 module_exit(mlx4_cleanup);
4364