1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/errno.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/io-mapping.h>
43 
44 #include <linux/mlx4/device.h>
45 #include <linux/mlx4/doorbell.h>
46 
47 #include "mlx4.h"
48 #include "fw.h"
49 #include "icm.h"
50 
51 MODULE_AUTHOR("Roland Dreier");
52 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
53 MODULE_LICENSE("Dual BSD/GPL");
54 MODULE_VERSION(DRV_VERSION);
55 
56 struct workqueue_struct *mlx4_wq;
57 
58 #ifdef CONFIG_MLX4_DEBUG
59 
60 int mlx4_debug_level = 0;
61 module_param_named(debug_level, mlx4_debug_level, int, 0644);
62 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
63 
64 #endif /* CONFIG_MLX4_DEBUG */
65 
66 #ifdef CONFIG_PCI_MSI
67 
68 static int msi_x = 1;
69 module_param(msi_x, int, 0444);
70 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
71 
72 #else /* CONFIG_PCI_MSI */
73 
74 #define msi_x (0)
75 
76 #endif /* CONFIG_PCI_MSI */
77 
78 static char mlx4_version[] __devinitdata =
79 	DRV_NAME ": Mellanox ConnectX core driver v"
80 	DRV_VERSION " (" DRV_RELDATE ")\n";
81 
82 static struct mlx4_profile default_profile = {
83 	.num_qp		= 1 << 17,
84 	.num_srq	= 1 << 16,
85 	.rdmarc_per_qp	= 1 << 4,
86 	.num_cq		= 1 << 16,
87 	.num_mcg	= 1 << 13,
88 	.num_mpt	= 1 << 17,
89 	.num_mtt	= 1 << 20,
90 };
91 
92 static int log_num_mac = 2;
93 module_param_named(log_num_mac, log_num_mac, int, 0444);
94 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
95 
96 static int log_num_vlan;
97 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
98 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
99 /* Log2 max number of VLANs per ETH port (0-7) */
100 #define MLX4_LOG_NUM_VLANS 7
101 
102 static int use_prio;
103 module_param_named(use_prio, use_prio, bool, 0444);
104 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
105 		  "(0/1, default 0)");
106 
107 static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
108 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
109 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
110 
111 int mlx4_check_port_params(struct mlx4_dev *dev,
112 			   enum mlx4_port_type *port_type)
113 {
114 	int i;
115 
116 	for (i = 0; i < dev->caps.num_ports - 1; i++) {
117 		if (port_type[i] != port_type[i + 1]) {
118 			if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
119 				mlx4_err(dev, "Only same port types supported "
120 					 "on this HCA, aborting.\n");
121 				return -EINVAL;
122 			}
123 			if (port_type[i] == MLX4_PORT_TYPE_ETH &&
124 			    port_type[i + 1] == MLX4_PORT_TYPE_IB)
125 				return -EINVAL;
126 		}
127 	}
128 
129 	for (i = 0; i < dev->caps.num_ports; i++) {
130 		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
131 			mlx4_err(dev, "Requested port type for port %d is not "
132 				      "supported on this HCA\n", i + 1);
133 			return -EINVAL;
134 		}
135 	}
136 	return 0;
137 }
138 
139 static void mlx4_set_port_mask(struct mlx4_dev *dev)
140 {
141 	int i;
142 
143 	dev->caps.port_mask = 0;
144 	for (i = 1; i <= dev->caps.num_ports; ++i)
145 		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
146 			dev->caps.port_mask |= 1 << (i - 1);
147 }
148 
149 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
150 {
151 	int err;
152 	int i;
153 
154 	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
155 	if (err) {
156 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
157 		return err;
158 	}
159 
160 	if (dev_cap->min_page_sz > PAGE_SIZE) {
161 		mlx4_err(dev, "HCA minimum page size of %d bigger than "
162 			 "kernel PAGE_SIZE of %ld, aborting.\n",
163 			 dev_cap->min_page_sz, PAGE_SIZE);
164 		return -ENODEV;
165 	}
166 	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
167 		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
168 			 "aborting.\n",
169 			 dev_cap->num_ports, MLX4_MAX_PORTS);
170 		return -ENODEV;
171 	}
172 
173 	if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
174 		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
175 			 "PCI resource 2 size of 0x%llx, aborting.\n",
176 			 dev_cap->uar_size,
177 			 (unsigned long long) pci_resource_len(dev->pdev, 2));
178 		return -ENODEV;
179 	}
180 
181 	dev->caps.num_ports	     = dev_cap->num_ports;
182 	for (i = 1; i <= dev->caps.num_ports; ++i) {
183 		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
184 		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
185 		dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
186 		dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
187 		dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
188 		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
189 		dev->caps.def_mac[i]        = dev_cap->def_mac[i];
190 		dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
191 		dev->caps.trans_type[i]	    = dev_cap->trans_type[i];
192 		dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
193 		dev->caps.wavelength[i]     = dev_cap->wavelength[i];
194 		dev->caps.trans_code[i]     = dev_cap->trans_code[i];
195 	}
196 
197 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
198 	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
199 	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
200 	dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
201 	dev->caps.max_sq_sg	     = dev_cap->max_sq_sg;
202 	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
203 	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
204 	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
205 	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
206 	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
207 	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
208 	dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
209 	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
210 	dev->caps.num_qp_per_mgm     = MLX4_QP_PER_MGM;
211 	/*
212 	 * Subtract 1 from the limit because we need to allocate a
213 	 * spare CQE so the HCA HW can tell the difference between an
214 	 * empty CQ and a full CQ.
215 	 */
216 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
217 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
218 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
219 	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
220 	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
221 						    dev->caps.mtts_per_seg);
222 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
223 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
224 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
225 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
226 					dev_cap->reserved_xrcds : 0;
227 	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
228 					dev_cap->max_xrcds : 0;
229 	dev->caps.mtt_entry_sz	     = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
230 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
231 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
232 	dev->caps.flags		     = dev_cap->flags;
233 	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
234 	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
235 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
236 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
237 
238 	dev->caps.log_num_macs  = log_num_mac;
239 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
240 	dev->caps.log_num_prios = use_prio ? 3 : 0;
241 
242 	for (i = 1; i <= dev->caps.num_ports; ++i) {
243 		if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
244 			dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
245 		else
246 			dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
247 		dev->caps.possible_type[i] = dev->caps.port_type[i];
248 		mlx4_priv(dev)->sense.sense_allowed[i] =
249 			dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
250 
251 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
252 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
253 			mlx4_warn(dev, "Requested number of MACs is too much "
254 				  "for port %d, reducing to %d.\n",
255 				  i, 1 << dev->caps.log_num_macs);
256 		}
257 		if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
258 			dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
259 			mlx4_warn(dev, "Requested number of VLANs is too much "
260 				  "for port %d, reducing to %d.\n",
261 				  i, 1 << dev->caps.log_num_vlans);
262 		}
263 	}
264 
265 	mlx4_set_port_mask(dev);
266 
267 	dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
268 
269 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
270 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
271 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
272 		(1 << dev->caps.log_num_macs) *
273 		(1 << dev->caps.log_num_vlans) *
274 		(1 << dev->caps.log_num_prios) *
275 		dev->caps.num_ports;
276 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
277 
278 	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
279 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
280 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
281 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
282 
283 	return 0;
284 }
285 
286 /*
287  * Change the port configuration of the device.
288  * Every user of this function must hold the port mutex.
289  */
290 int mlx4_change_port_types(struct mlx4_dev *dev,
291 			   enum mlx4_port_type *port_types)
292 {
293 	int err = 0;
294 	int change = 0;
295 	int port;
296 
297 	for (port = 0; port <  dev->caps.num_ports; port++) {
298 		/* Change the port type only if the new type is different
299 		 * from the current, and not set to Auto */
300 		if (port_types[port] != dev->caps.port_type[port + 1]) {
301 			change = 1;
302 			dev->caps.port_type[port + 1] = port_types[port];
303 		}
304 	}
305 	if (change) {
306 		mlx4_unregister_device(dev);
307 		for (port = 1; port <= dev->caps.num_ports; port++) {
308 			mlx4_CLOSE_PORT(dev, port);
309 			err = mlx4_SET_PORT(dev, port);
310 			if (err) {
311 				mlx4_err(dev, "Failed to set port %d, "
312 					      "aborting\n", port);
313 				goto out;
314 			}
315 		}
316 		mlx4_set_port_mask(dev);
317 		err = mlx4_register_device(dev);
318 	}
319 
320 out:
321 	return err;
322 }
323 
324 static ssize_t show_port_type(struct device *dev,
325 			      struct device_attribute *attr,
326 			      char *buf)
327 {
328 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
329 						   port_attr);
330 	struct mlx4_dev *mdev = info->dev;
331 	char type[8];
332 
333 	sprintf(type, "%s",
334 		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
335 		"ib" : "eth");
336 	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
337 		sprintf(buf, "auto (%s)\n", type);
338 	else
339 		sprintf(buf, "%s\n", type);
340 
341 	return strlen(buf);
342 }
343 
344 static ssize_t set_port_type(struct device *dev,
345 			     struct device_attribute *attr,
346 			     const char *buf, size_t count)
347 {
348 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
349 						   port_attr);
350 	struct mlx4_dev *mdev = info->dev;
351 	struct mlx4_priv *priv = mlx4_priv(mdev);
352 	enum mlx4_port_type types[MLX4_MAX_PORTS];
353 	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
354 	int i;
355 	int err = 0;
356 
357 	if (!strcmp(buf, "ib\n"))
358 		info->tmp_type = MLX4_PORT_TYPE_IB;
359 	else if (!strcmp(buf, "eth\n"))
360 		info->tmp_type = MLX4_PORT_TYPE_ETH;
361 	else if (!strcmp(buf, "auto\n"))
362 		info->tmp_type = MLX4_PORT_TYPE_AUTO;
363 	else {
364 		mlx4_err(mdev, "%s is not supported port type\n", buf);
365 		return -EINVAL;
366 	}
367 
368 	mlx4_stop_sense(mdev);
369 	mutex_lock(&priv->port_mutex);
370 	/* Possible type is always the one that was delivered */
371 	mdev->caps.possible_type[info->port] = info->tmp_type;
372 
373 	for (i = 0; i < mdev->caps.num_ports; i++) {
374 		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
375 					mdev->caps.possible_type[i+1];
376 		if (types[i] == MLX4_PORT_TYPE_AUTO)
377 			types[i] = mdev->caps.port_type[i+1];
378 	}
379 
380 	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
381 		for (i = 1; i <= mdev->caps.num_ports; i++) {
382 			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
383 				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
384 				err = -EINVAL;
385 			}
386 		}
387 	}
388 	if (err) {
389 		mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
390 			       "Set only 'eth' or 'ib' for both ports "
391 			       "(should be the same)\n");
392 		goto out;
393 	}
394 
395 	mlx4_do_sense_ports(mdev, new_types, types);
396 
397 	err = mlx4_check_port_params(mdev, new_types);
398 	if (err)
399 		goto out;
400 
401 	/* We are about to apply the changes after the configuration
402 	 * was verified, no need to remember the temporary types
403 	 * any more */
404 	for (i = 0; i < mdev->caps.num_ports; i++)
405 		priv->port[i + 1].tmp_type = 0;
406 
407 	err = mlx4_change_port_types(mdev, new_types);
408 
409 out:
410 	mlx4_start_sense(mdev);
411 	mutex_unlock(&priv->port_mutex);
412 	return err ? err : count;
413 }
414 
415 static int mlx4_load_fw(struct mlx4_dev *dev)
416 {
417 	struct mlx4_priv *priv = mlx4_priv(dev);
418 	int err;
419 
420 	priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
421 					 GFP_HIGHUSER | __GFP_NOWARN, 0);
422 	if (!priv->fw.fw_icm) {
423 		mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
424 		return -ENOMEM;
425 	}
426 
427 	err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
428 	if (err) {
429 		mlx4_err(dev, "MAP_FA command failed, aborting.\n");
430 		goto err_free;
431 	}
432 
433 	err = mlx4_RUN_FW(dev);
434 	if (err) {
435 		mlx4_err(dev, "RUN_FW command failed, aborting.\n");
436 		goto err_unmap_fa;
437 	}
438 
439 	return 0;
440 
441 err_unmap_fa:
442 	mlx4_UNMAP_FA(dev);
443 
444 err_free:
445 	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
446 	return err;
447 }
448 
449 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
450 				int cmpt_entry_sz)
451 {
452 	struct mlx4_priv *priv = mlx4_priv(dev);
453 	int err;
454 
455 	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
456 				  cmpt_base +
457 				  ((u64) (MLX4_CMPT_TYPE_QP *
458 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
459 				  cmpt_entry_sz, dev->caps.num_qps,
460 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
461 				  0, 0);
462 	if (err)
463 		goto err;
464 
465 	err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
466 				  cmpt_base +
467 				  ((u64) (MLX4_CMPT_TYPE_SRQ *
468 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
469 				  cmpt_entry_sz, dev->caps.num_srqs,
470 				  dev->caps.reserved_srqs, 0, 0);
471 	if (err)
472 		goto err_qp;
473 
474 	err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
475 				  cmpt_base +
476 				  ((u64) (MLX4_CMPT_TYPE_CQ *
477 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
478 				  cmpt_entry_sz, dev->caps.num_cqs,
479 				  dev->caps.reserved_cqs, 0, 0);
480 	if (err)
481 		goto err_srq;
482 
483 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
484 				  cmpt_base +
485 				  ((u64) (MLX4_CMPT_TYPE_EQ *
486 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
487 				  cmpt_entry_sz,
488 				  dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
489 	if (err)
490 		goto err_cq;
491 
492 	return 0;
493 
494 err_cq:
495 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
496 
497 err_srq:
498 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
499 
500 err_qp:
501 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
502 
503 err:
504 	return err;
505 }
506 
507 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
508 			 struct mlx4_init_hca_param *init_hca, u64 icm_size)
509 {
510 	struct mlx4_priv *priv = mlx4_priv(dev);
511 	u64 aux_pages;
512 	int err;
513 
514 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
515 	if (err) {
516 		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
517 		return err;
518 	}
519 
520 	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
521 		 (unsigned long long) icm_size >> 10,
522 		 (unsigned long long) aux_pages << 2);
523 
524 	priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
525 					  GFP_HIGHUSER | __GFP_NOWARN, 0);
526 	if (!priv->fw.aux_icm) {
527 		mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
528 		return -ENOMEM;
529 	}
530 
531 	err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
532 	if (err) {
533 		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
534 		goto err_free_aux;
535 	}
536 
537 	err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
538 	if (err) {
539 		mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
540 		goto err_unmap_aux;
541 	}
542 
543 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
544 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
545 				  dev->caps.num_eqs, dev->caps.num_eqs,
546 				  0, 0);
547 	if (err) {
548 		mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
549 		goto err_unmap_cmpt;
550 	}
551 
552 	/*
553 	 * Reserved MTT entries must be aligned up to a cacheline
554 	 * boundary, since the FW will write to them, while the driver
555 	 * writes to all other MTT entries. (The variable
556 	 * dev->caps.mtt_entry_sz below is really the MTT segment
557 	 * size, not the raw entry size)
558 	 */
559 	dev->caps.reserved_mtts =
560 		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
561 		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
562 
563 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
564 				  init_hca->mtt_base,
565 				  dev->caps.mtt_entry_sz,
566 				  dev->caps.num_mtt_segs,
567 				  dev->caps.reserved_mtts, 1, 0);
568 	if (err) {
569 		mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
570 		goto err_unmap_eq;
571 	}
572 
573 	err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
574 				  init_hca->dmpt_base,
575 				  dev_cap->dmpt_entry_sz,
576 				  dev->caps.num_mpts,
577 				  dev->caps.reserved_mrws, 1, 1);
578 	if (err) {
579 		mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
580 		goto err_unmap_mtt;
581 	}
582 
583 	err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
584 				  init_hca->qpc_base,
585 				  dev_cap->qpc_entry_sz,
586 				  dev->caps.num_qps,
587 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
588 				  0, 0);
589 	if (err) {
590 		mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
591 		goto err_unmap_dmpt;
592 	}
593 
594 	err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
595 				  init_hca->auxc_base,
596 				  dev_cap->aux_entry_sz,
597 				  dev->caps.num_qps,
598 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
599 				  0, 0);
600 	if (err) {
601 		mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
602 		goto err_unmap_qp;
603 	}
604 
605 	err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
606 				  init_hca->altc_base,
607 				  dev_cap->altc_entry_sz,
608 				  dev->caps.num_qps,
609 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
610 				  0, 0);
611 	if (err) {
612 		mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
613 		goto err_unmap_auxc;
614 	}
615 
616 	err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
617 				  init_hca->rdmarc_base,
618 				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
619 				  dev->caps.num_qps,
620 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
621 				  0, 0);
622 	if (err) {
623 		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
624 		goto err_unmap_altc;
625 	}
626 
627 	err = mlx4_init_icm_table(dev, &priv->cq_table.table,
628 				  init_hca->cqc_base,
629 				  dev_cap->cqc_entry_sz,
630 				  dev->caps.num_cqs,
631 				  dev->caps.reserved_cqs, 0, 0);
632 	if (err) {
633 		mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
634 		goto err_unmap_rdmarc;
635 	}
636 
637 	err = mlx4_init_icm_table(dev, &priv->srq_table.table,
638 				  init_hca->srqc_base,
639 				  dev_cap->srq_entry_sz,
640 				  dev->caps.num_srqs,
641 				  dev->caps.reserved_srqs, 0, 0);
642 	if (err) {
643 		mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
644 		goto err_unmap_cq;
645 	}
646 
647 	/*
648 	 * It's not strictly required, but for simplicity just map the
649 	 * whole multicast group table now.  The table isn't very big
650 	 * and it's a lot easier than trying to track ref counts.
651 	 */
652 	err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
653 				  init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
654 				  dev->caps.num_mgms + dev->caps.num_amgms,
655 				  dev->caps.num_mgms + dev->caps.num_amgms,
656 				  0, 0);
657 	if (err) {
658 		mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
659 		goto err_unmap_srq;
660 	}
661 
662 	return 0;
663 
664 err_unmap_srq:
665 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
666 
667 err_unmap_cq:
668 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
669 
670 err_unmap_rdmarc:
671 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
672 
673 err_unmap_altc:
674 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
675 
676 err_unmap_auxc:
677 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
678 
679 err_unmap_qp:
680 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
681 
682 err_unmap_dmpt:
683 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
684 
685 err_unmap_mtt:
686 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
687 
688 err_unmap_eq:
689 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
690 
691 err_unmap_cmpt:
692 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
693 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
694 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
695 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
696 
697 err_unmap_aux:
698 	mlx4_UNMAP_ICM_AUX(dev);
699 
700 err_free_aux:
701 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
702 
703 	return err;
704 }
705 
706 static void mlx4_free_icms(struct mlx4_dev *dev)
707 {
708 	struct mlx4_priv *priv = mlx4_priv(dev);
709 
710 	mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
711 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
712 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
713 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
714 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
715 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
716 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
717 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
718 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
719 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
720 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
721 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
722 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
723 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
724 
725 	mlx4_UNMAP_ICM_AUX(dev);
726 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
727 }
728 
729 static int map_bf_area(struct mlx4_dev *dev)
730 {
731 	struct mlx4_priv *priv = mlx4_priv(dev);
732 	resource_size_t bf_start;
733 	resource_size_t bf_len;
734 	int err = 0;
735 
736 	bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
737 	bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
738 	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
739 	if (!priv->bf_mapping)
740 		err = -ENOMEM;
741 
742 	return err;
743 }
744 
745 static void unmap_bf_area(struct mlx4_dev *dev)
746 {
747 	if (mlx4_priv(dev)->bf_mapping)
748 		io_mapping_free(mlx4_priv(dev)->bf_mapping);
749 }
750 
751 static void mlx4_close_hca(struct mlx4_dev *dev)
752 {
753 	unmap_bf_area(dev);
754 	mlx4_CLOSE_HCA(dev, 0);
755 	mlx4_free_icms(dev);
756 	mlx4_UNMAP_FA(dev);
757 	mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
758 }
759 
760 static int mlx4_init_hca(struct mlx4_dev *dev)
761 {
762 	struct mlx4_priv	  *priv = mlx4_priv(dev);
763 	struct mlx4_adapter	   adapter;
764 	struct mlx4_dev_cap	   dev_cap;
765 	struct mlx4_mod_stat_cfg   mlx4_cfg;
766 	struct mlx4_profile	   profile;
767 	struct mlx4_init_hca_param init_hca;
768 	u64 icm_size;
769 	int err;
770 
771 	err = mlx4_QUERY_FW(dev);
772 	if (err) {
773 		if (err == -EACCES)
774 			mlx4_info(dev, "non-primary physical function, skipping.\n");
775 		else
776 			mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
777 		return err;
778 	}
779 
780 	err = mlx4_load_fw(dev);
781 	if (err) {
782 		mlx4_err(dev, "Failed to start FW, aborting.\n");
783 		return err;
784 	}
785 
786 	mlx4_cfg.log_pg_sz_m = 1;
787 	mlx4_cfg.log_pg_sz = 0;
788 	err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
789 	if (err)
790 		mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
791 
792 	err = mlx4_dev_cap(dev, &dev_cap);
793 	if (err) {
794 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
795 		goto err_stop_fw;
796 	}
797 
798 	profile = default_profile;
799 
800 	icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
801 	if ((long long) icm_size < 0) {
802 		err = icm_size;
803 		goto err_stop_fw;
804 	}
805 
806 	if (map_bf_area(dev))
807 		mlx4_dbg(dev, "Failed to map blue flame area\n");
808 
809 	init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
810 
811 	err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
812 	if (err)
813 		goto err_stop_fw;
814 
815 	err = mlx4_INIT_HCA(dev, &init_hca);
816 	if (err) {
817 		mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
818 		goto err_free_icm;
819 	}
820 
821 	err = mlx4_QUERY_ADAPTER(dev, &adapter);
822 	if (err) {
823 		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
824 		goto err_close;
825 	}
826 
827 	priv->eq_table.inta_pin = adapter.inta_pin;
828 	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
829 
830 	return 0;
831 
832 err_close:
833 	mlx4_CLOSE_HCA(dev, 0);
834 
835 err_free_icm:
836 	mlx4_free_icms(dev);
837 
838 err_stop_fw:
839 	unmap_bf_area(dev);
840 	mlx4_UNMAP_FA(dev);
841 	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
842 
843 	return err;
844 }
845 
846 static int mlx4_init_counters_table(struct mlx4_dev *dev)
847 {
848 	struct mlx4_priv *priv = mlx4_priv(dev);
849 	int nent;
850 
851 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
852 		return -ENOENT;
853 
854 	nent = dev->caps.max_counters;
855 	return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
856 }
857 
858 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
859 {
860 	mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
861 }
862 
863 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
864 {
865 	struct mlx4_priv *priv = mlx4_priv(dev);
866 
867 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
868 		return -ENOENT;
869 
870 	*idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
871 	if (*idx == -1)
872 		return -ENOMEM;
873 
874 	return 0;
875 }
876 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
877 
878 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
879 {
880 	mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
881 	return;
882 }
883 EXPORT_SYMBOL_GPL(mlx4_counter_free);
884 
885 static int mlx4_setup_hca(struct mlx4_dev *dev)
886 {
887 	struct mlx4_priv *priv = mlx4_priv(dev);
888 	int err;
889 	int port;
890 	__be32 ib_port_default_caps;
891 
892 	err = mlx4_init_uar_table(dev);
893 	if (err) {
894 		mlx4_err(dev, "Failed to initialize "
895 			 "user access region table, aborting.\n");
896 		return err;
897 	}
898 
899 	err = mlx4_uar_alloc(dev, &priv->driver_uar);
900 	if (err) {
901 		mlx4_err(dev, "Failed to allocate driver access region, "
902 			 "aborting.\n");
903 		goto err_uar_table_free;
904 	}
905 
906 	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
907 	if (!priv->kar) {
908 		mlx4_err(dev, "Couldn't map kernel access region, "
909 			 "aborting.\n");
910 		err = -ENOMEM;
911 		goto err_uar_free;
912 	}
913 
914 	err = mlx4_init_pd_table(dev);
915 	if (err) {
916 		mlx4_err(dev, "Failed to initialize "
917 			 "protection domain table, aborting.\n");
918 		goto err_kar_unmap;
919 	}
920 
921 	err = mlx4_init_xrcd_table(dev);
922 	if (err) {
923 		mlx4_err(dev, "Failed to initialize "
924 			 "reliable connection domain table, aborting.\n");
925 		goto err_pd_table_free;
926 	}
927 
928 	err = mlx4_init_mr_table(dev);
929 	if (err) {
930 		mlx4_err(dev, "Failed to initialize "
931 			 "memory region table, aborting.\n");
932 		goto err_xrcd_table_free;
933 	}
934 
935 	err = mlx4_init_eq_table(dev);
936 	if (err) {
937 		mlx4_err(dev, "Failed to initialize "
938 			 "event queue table, aborting.\n");
939 		goto err_mr_table_free;
940 	}
941 
942 	err = mlx4_cmd_use_events(dev);
943 	if (err) {
944 		mlx4_err(dev, "Failed to switch to event-driven "
945 			 "firmware commands, aborting.\n");
946 		goto err_eq_table_free;
947 	}
948 
949 	err = mlx4_NOP(dev);
950 	if (err) {
951 		if (dev->flags & MLX4_FLAG_MSI_X) {
952 			mlx4_warn(dev, "NOP command failed to generate MSI-X "
953 				  "interrupt IRQ %d).\n",
954 				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
955 			mlx4_warn(dev, "Trying again without MSI-X.\n");
956 		} else {
957 			mlx4_err(dev, "NOP command failed to generate interrupt "
958 				 "(IRQ %d), aborting.\n",
959 				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
960 			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
961 		}
962 
963 		goto err_cmd_poll;
964 	}
965 
966 	mlx4_dbg(dev, "NOP command IRQ test passed\n");
967 
968 	err = mlx4_init_cq_table(dev);
969 	if (err) {
970 		mlx4_err(dev, "Failed to initialize "
971 			 "completion queue table, aborting.\n");
972 		goto err_cmd_poll;
973 	}
974 
975 	err = mlx4_init_srq_table(dev);
976 	if (err) {
977 		mlx4_err(dev, "Failed to initialize "
978 			 "shared receive queue table, aborting.\n");
979 		goto err_cq_table_free;
980 	}
981 
982 	err = mlx4_init_qp_table(dev);
983 	if (err) {
984 		mlx4_err(dev, "Failed to initialize "
985 			 "queue pair table, aborting.\n");
986 		goto err_srq_table_free;
987 	}
988 
989 	err = mlx4_init_mcg_table(dev);
990 	if (err) {
991 		mlx4_err(dev, "Failed to initialize "
992 			 "multicast group table, aborting.\n");
993 		goto err_qp_table_free;
994 	}
995 
996 	err = mlx4_init_counters_table(dev);
997 	if (err && err != -ENOENT) {
998 		mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
999 		goto err_counters_table_free;
1000 	}
1001 
1002 	for (port = 1; port <= dev->caps.num_ports; port++) {
1003 		enum mlx4_port_type port_type = 0;
1004 		mlx4_SENSE_PORT(dev, port, &port_type);
1005 		if (port_type)
1006 			dev->caps.port_type[port] = port_type;
1007 		ib_port_default_caps = 0;
1008 		err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps);
1009 		if (err)
1010 			mlx4_warn(dev, "failed to get port %d default "
1011 				  "ib capabilities (%d). Continuing with "
1012 				  "caps = 0\n", port, err);
1013 		dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
1014 
1015 		err = mlx4_check_ext_port_caps(dev, port);
1016 		if (err)
1017 			mlx4_warn(dev, "failed to get port %d extended "
1018 				  "port capabilities support info (%d)."
1019 				  " Assuming not supported\n", port, err);
1020 
1021 		err = mlx4_SET_PORT(dev, port);
1022 		if (err) {
1023 			mlx4_err(dev, "Failed to set port %d, aborting\n",
1024 				port);
1025 			goto err_mcg_table_free;
1026 		}
1027 	}
1028 	mlx4_set_port_mask(dev);
1029 
1030 	return 0;
1031 
1032 err_mcg_table_free:
1033 	mlx4_cleanup_mcg_table(dev);
1034 
1035 err_counters_table_free:
1036 	mlx4_cleanup_counters_table(dev);
1037 
1038 err_qp_table_free:
1039 	mlx4_cleanup_qp_table(dev);
1040 
1041 err_srq_table_free:
1042 	mlx4_cleanup_srq_table(dev);
1043 
1044 err_cq_table_free:
1045 	mlx4_cleanup_cq_table(dev);
1046 
1047 err_cmd_poll:
1048 	mlx4_cmd_use_polling(dev);
1049 
1050 err_eq_table_free:
1051 	mlx4_cleanup_eq_table(dev);
1052 
1053 err_mr_table_free:
1054 	mlx4_cleanup_mr_table(dev);
1055 
1056 err_xrcd_table_free:
1057 	mlx4_cleanup_xrcd_table(dev);
1058 
1059 err_pd_table_free:
1060 	mlx4_cleanup_pd_table(dev);
1061 
1062 err_kar_unmap:
1063 	iounmap(priv->kar);
1064 
1065 err_uar_free:
1066 	mlx4_uar_free(dev, &priv->driver_uar);
1067 
1068 err_uar_table_free:
1069 	mlx4_cleanup_uar_table(dev);
1070 	return err;
1071 }
1072 
1073 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
1074 {
1075 	struct mlx4_priv *priv = mlx4_priv(dev);
1076 	struct msix_entry *entries;
1077 	int nreq = min_t(int, dev->caps.num_ports *
1078 			 min_t(int, num_online_cpus() + 1, MAX_MSIX_P_PORT)
1079 				+ MSIX_LEGACY_SZ, MAX_MSIX);
1080 	int err;
1081 	int i;
1082 
1083 	if (msi_x) {
1084 		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
1085 			     nreq);
1086 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
1087 		if (!entries)
1088 			goto no_msi;
1089 
1090 		for (i = 0; i < nreq; ++i)
1091 			entries[i].entry = i;
1092 
1093 	retry:
1094 		err = pci_enable_msix(dev->pdev, entries, nreq);
1095 		if (err) {
1096 			/* Try again if at least 2 vectors are available */
1097 			if (err > 1) {
1098 				mlx4_info(dev, "Requested %d vectors, "
1099 					  "but only %d MSI-X vectors available, "
1100 					  "trying again\n", nreq, err);
1101 				nreq = err;
1102 				goto retry;
1103 			}
1104 			kfree(entries);
1105 			goto no_msi;
1106 		}
1107 
1108 		if (nreq <
1109 		    MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
1110 			/*Working in legacy mode , all EQ's shared*/
1111 			dev->caps.comp_pool           = 0;
1112 			dev->caps.num_comp_vectors = nreq - 1;
1113 		} else {
1114 			dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
1115 			dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
1116 		}
1117 		for (i = 0; i < nreq; ++i)
1118 			priv->eq_table.eq[i].irq = entries[i].vector;
1119 
1120 		dev->flags |= MLX4_FLAG_MSI_X;
1121 
1122 		kfree(entries);
1123 		return;
1124 	}
1125 
1126 no_msi:
1127 	dev->caps.num_comp_vectors = 1;
1128 	dev->caps.comp_pool	   = 0;
1129 
1130 	for (i = 0; i < 2; ++i)
1131 		priv->eq_table.eq[i].irq = dev->pdev->irq;
1132 }
1133 
1134 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
1135 {
1136 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
1137 	int err = 0;
1138 
1139 	info->dev = dev;
1140 	info->port = port;
1141 	mlx4_init_mac_table(dev, &info->mac_table);
1142 	mlx4_init_vlan_table(dev, &info->vlan_table);
1143 	info->base_qpn = dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
1144 			(port - 1) * (1 << log_num_mac);
1145 
1146 	sprintf(info->dev_name, "mlx4_port%d", port);
1147 	info->port_attr.attr.name = info->dev_name;
1148 	info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
1149 	info->port_attr.show      = show_port_type;
1150 	info->port_attr.store     = set_port_type;
1151 	sysfs_attr_init(&info->port_attr.attr);
1152 
1153 	err = device_create_file(&dev->pdev->dev, &info->port_attr);
1154 	if (err) {
1155 		mlx4_err(dev, "Failed to create file for port %d\n", port);
1156 		info->port = -1;
1157 	}
1158 
1159 	return err;
1160 }
1161 
1162 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
1163 {
1164 	if (info->port < 0)
1165 		return;
1166 
1167 	device_remove_file(&info->dev->pdev->dev, &info->port_attr);
1168 }
1169 
1170 static int mlx4_init_steering(struct mlx4_dev *dev)
1171 {
1172 	struct mlx4_priv *priv = mlx4_priv(dev);
1173 	int num_entries = dev->caps.num_ports;
1174 	int i, j;
1175 
1176 	priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
1177 	if (!priv->steer)
1178 		return -ENOMEM;
1179 
1180 	for (i = 0; i < num_entries; i++) {
1181 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
1182 			INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
1183 			INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
1184 		}
1185 		INIT_LIST_HEAD(&priv->steer[i].high_prios);
1186 	}
1187 	return 0;
1188 }
1189 
1190 static void mlx4_clear_steering(struct mlx4_dev *dev)
1191 {
1192 	struct mlx4_priv *priv = mlx4_priv(dev);
1193 	struct mlx4_steer_index *entry, *tmp_entry;
1194 	struct mlx4_promisc_qp *pqp, *tmp_pqp;
1195 	int num_entries = dev->caps.num_ports;
1196 	int i, j;
1197 
1198 	for (i = 0; i < num_entries; i++) {
1199 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
1200 			list_for_each_entry_safe(pqp, tmp_pqp,
1201 						 &priv->steer[i].promisc_qps[j],
1202 						 list) {
1203 				list_del(&pqp->list);
1204 				kfree(pqp);
1205 			}
1206 			list_for_each_entry_safe(entry, tmp_entry,
1207 						 &priv->steer[i].steer_entries[j],
1208 						 list) {
1209 				list_del(&entry->list);
1210 				list_for_each_entry_safe(pqp, tmp_pqp,
1211 							 &entry->duplicates,
1212 							 list) {
1213 					list_del(&pqp->list);
1214 					kfree(pqp);
1215 				}
1216 				kfree(entry);
1217 			}
1218 		}
1219 	}
1220 	kfree(priv->steer);
1221 }
1222 
1223 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1224 {
1225 	struct mlx4_priv *priv;
1226 	struct mlx4_dev *dev;
1227 	int err;
1228 	int port;
1229 
1230 	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
1231 
1232 	err = pci_enable_device(pdev);
1233 	if (err) {
1234 		dev_err(&pdev->dev, "Cannot enable PCI device, "
1235 			"aborting.\n");
1236 		return err;
1237 	}
1238 
1239 	/*
1240 	 * Check for BARs.  We expect 0: 1MB
1241 	 */
1242 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
1243 	    pci_resource_len(pdev, 0) != 1 << 20) {
1244 		dev_err(&pdev->dev, "Missing DCS, aborting.\n");
1245 		err = -ENODEV;
1246 		goto err_disable_pdev;
1247 	}
1248 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
1249 		dev_err(&pdev->dev, "Missing UAR, aborting.\n");
1250 		err = -ENODEV;
1251 		goto err_disable_pdev;
1252 	}
1253 
1254 	err = pci_request_regions(pdev, DRV_NAME);
1255 	if (err) {
1256 		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
1257 		goto err_disable_pdev;
1258 	}
1259 
1260 	pci_set_master(pdev);
1261 
1262 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1263 	if (err) {
1264 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
1265 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1266 		if (err) {
1267 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
1268 			goto err_release_regions;
1269 		}
1270 	}
1271 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1272 	if (err) {
1273 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
1274 			 "consistent PCI DMA mask.\n");
1275 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1276 		if (err) {
1277 			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
1278 				"aborting.\n");
1279 			goto err_release_regions;
1280 		}
1281 	}
1282 
1283 	/* Allow large DMA segments, up to the firmware limit of 1 GB */
1284 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
1285 
1286 	priv = kzalloc(sizeof *priv, GFP_KERNEL);
1287 	if (!priv) {
1288 		dev_err(&pdev->dev, "Device struct alloc failed, "
1289 			"aborting.\n");
1290 		err = -ENOMEM;
1291 		goto err_release_regions;
1292 	}
1293 
1294 	dev       = &priv->dev;
1295 	dev->pdev = pdev;
1296 	INIT_LIST_HEAD(&priv->ctx_list);
1297 	spin_lock_init(&priv->ctx_lock);
1298 
1299 	mutex_init(&priv->port_mutex);
1300 
1301 	INIT_LIST_HEAD(&priv->pgdir_list);
1302 	mutex_init(&priv->pgdir_mutex);
1303 
1304 	INIT_LIST_HEAD(&priv->bf_list);
1305 	mutex_init(&priv->bf_mutex);
1306 
1307 	dev->rev_id = pdev->revision;
1308 
1309 	/*
1310 	 * Now reset the HCA before we touch the PCI capabilities or
1311 	 * attempt a firmware command, since a boot ROM may have left
1312 	 * the HCA in an undefined state.
1313 	 */
1314 	err = mlx4_reset(dev);
1315 	if (err) {
1316 		mlx4_err(dev, "Failed to reset HCA, aborting.\n");
1317 		goto err_free_dev;
1318 	}
1319 
1320 	if (mlx4_cmd_init(dev)) {
1321 		mlx4_err(dev, "Failed to init command interface, aborting.\n");
1322 		goto err_free_dev;
1323 	}
1324 
1325 	err = mlx4_init_hca(dev);
1326 	if (err)
1327 		goto err_cmd;
1328 
1329 	err = mlx4_alloc_eq_table(dev);
1330 	if (err)
1331 		goto err_close;
1332 
1333 	priv->msix_ctl.pool_bm = 0;
1334 	spin_lock_init(&priv->msix_ctl.pool_lock);
1335 
1336 	mlx4_enable_msi_x(dev);
1337 
1338 	err = mlx4_init_steering(dev);
1339 	if (err)
1340 		goto err_free_eq;
1341 
1342 	err = mlx4_setup_hca(dev);
1343 	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
1344 		dev->flags &= ~MLX4_FLAG_MSI_X;
1345 		pci_disable_msix(pdev);
1346 		err = mlx4_setup_hca(dev);
1347 	}
1348 
1349 	if (err)
1350 		goto err_steer;
1351 
1352 	for (port = 1; port <= dev->caps.num_ports; port++) {
1353 		err = mlx4_init_port_info(dev, port);
1354 		if (err)
1355 			goto err_port;
1356 	}
1357 
1358 	err = mlx4_register_device(dev);
1359 	if (err)
1360 		goto err_port;
1361 
1362 	mlx4_sense_init(dev);
1363 	mlx4_start_sense(dev);
1364 
1365 	pci_set_drvdata(pdev, dev);
1366 
1367 	return 0;
1368 
1369 err_port:
1370 	for (--port; port >= 1; --port)
1371 		mlx4_cleanup_port_info(&priv->port[port]);
1372 
1373 	mlx4_cleanup_counters_table(dev);
1374 	mlx4_cleanup_mcg_table(dev);
1375 	mlx4_cleanup_qp_table(dev);
1376 	mlx4_cleanup_srq_table(dev);
1377 	mlx4_cleanup_cq_table(dev);
1378 	mlx4_cmd_use_polling(dev);
1379 	mlx4_cleanup_eq_table(dev);
1380 	mlx4_cleanup_mr_table(dev);
1381 	mlx4_cleanup_xrcd_table(dev);
1382 	mlx4_cleanup_pd_table(dev);
1383 	mlx4_cleanup_uar_table(dev);
1384 
1385 err_steer:
1386 	mlx4_clear_steering(dev);
1387 
1388 err_free_eq:
1389 	mlx4_free_eq_table(dev);
1390 
1391 err_close:
1392 	if (dev->flags & MLX4_FLAG_MSI_X)
1393 		pci_disable_msix(pdev);
1394 
1395 	mlx4_close_hca(dev);
1396 
1397 err_cmd:
1398 	mlx4_cmd_cleanup(dev);
1399 
1400 err_free_dev:
1401 	kfree(priv);
1402 
1403 err_release_regions:
1404 	pci_release_regions(pdev);
1405 
1406 err_disable_pdev:
1407 	pci_disable_device(pdev);
1408 	pci_set_drvdata(pdev, NULL);
1409 	return err;
1410 }
1411 
1412 static int __devinit mlx4_init_one(struct pci_dev *pdev,
1413 				   const struct pci_device_id *id)
1414 {
1415 	printk_once(KERN_INFO "%s", mlx4_version);
1416 
1417 	return __mlx4_init_one(pdev, id);
1418 }
1419 
1420 static void mlx4_remove_one(struct pci_dev *pdev)
1421 {
1422 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
1423 	struct mlx4_priv *priv = mlx4_priv(dev);
1424 	int p;
1425 
1426 	if (dev) {
1427 		mlx4_stop_sense(dev);
1428 		mlx4_unregister_device(dev);
1429 
1430 		for (p = 1; p <= dev->caps.num_ports; p++) {
1431 			mlx4_cleanup_port_info(&priv->port[p]);
1432 			mlx4_CLOSE_PORT(dev, p);
1433 		}
1434 
1435 		mlx4_cleanup_counters_table(dev);
1436 		mlx4_cleanup_mcg_table(dev);
1437 		mlx4_cleanup_qp_table(dev);
1438 		mlx4_cleanup_srq_table(dev);
1439 		mlx4_cleanup_cq_table(dev);
1440 		mlx4_cmd_use_polling(dev);
1441 		mlx4_cleanup_eq_table(dev);
1442 		mlx4_cleanup_mr_table(dev);
1443 		mlx4_cleanup_xrcd_table(dev);
1444 		mlx4_cleanup_pd_table(dev);
1445 
1446 		iounmap(priv->kar);
1447 		mlx4_uar_free(dev, &priv->driver_uar);
1448 		mlx4_cleanup_uar_table(dev);
1449 		mlx4_clear_steering(dev);
1450 		mlx4_free_eq_table(dev);
1451 		mlx4_close_hca(dev);
1452 		mlx4_cmd_cleanup(dev);
1453 
1454 		if (dev->flags & MLX4_FLAG_MSI_X)
1455 			pci_disable_msix(pdev);
1456 
1457 		kfree(priv);
1458 		pci_release_regions(pdev);
1459 		pci_disable_device(pdev);
1460 		pci_set_drvdata(pdev, NULL);
1461 	}
1462 }
1463 
1464 int mlx4_restart_one(struct pci_dev *pdev)
1465 {
1466 	mlx4_remove_one(pdev);
1467 	return __mlx4_init_one(pdev, NULL);
1468 }
1469 
1470 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
1471 	{ PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
1472 	{ PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
1473 	{ PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
1474 	{ PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
1475 	{ PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
1476 	{ PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
1477 	{ PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
1478 	{ PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
1479 	{ PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
1480 	{ PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/
1481 	{ PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
1482 	{ PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX2 40GigE PCIe gen2 */
1483 	{ PCI_VDEVICE(MELLANOX, 0x1002) }, /* MT25400 Family [ConnectX-2 Virtual Function] */
1484 	{ PCI_VDEVICE(MELLANOX, 0x1003) }, /* MT27500 Family [ConnectX-3] */
1485 	{ PCI_VDEVICE(MELLANOX, 0x1004) }, /* MT27500 Family [ConnectX-3 Virtual Function] */
1486 	{ PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */
1487 	{ PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */
1488 	{ PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */
1489 	{ PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */
1490 	{ PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */
1491 	{ PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */
1492 	{ PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */
1493 	{ PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */
1494 	{ PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */
1495 	{ PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */
1496 	{ PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */
1497 	{ PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */
1498 	{ 0, }
1499 };
1500 
1501 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
1502 
1503 static struct pci_driver mlx4_driver = {
1504 	.name		= DRV_NAME,
1505 	.id_table	= mlx4_pci_table,
1506 	.probe		= mlx4_init_one,
1507 	.remove		= __devexit_p(mlx4_remove_one)
1508 };
1509 
1510 static int __init mlx4_verify_params(void)
1511 {
1512 	if ((log_num_mac < 0) || (log_num_mac > 7)) {
1513 		pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
1514 		return -1;
1515 	}
1516 
1517 	if (log_num_vlan != 0)
1518 		pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
1519 			   MLX4_LOG_NUM_VLANS);
1520 
1521 	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
1522 		pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
1523 		return -1;
1524 	}
1525 
1526 	return 0;
1527 }
1528 
1529 static int __init mlx4_init(void)
1530 {
1531 	int ret;
1532 
1533 	if (mlx4_verify_params())
1534 		return -EINVAL;
1535 
1536 	mlx4_catas_init();
1537 
1538 	mlx4_wq = create_singlethread_workqueue("mlx4");
1539 	if (!mlx4_wq)
1540 		return -ENOMEM;
1541 
1542 	ret = pci_register_driver(&mlx4_driver);
1543 	return ret < 0 ? ret : 0;
1544 }
1545 
1546 static void __exit mlx4_cleanup(void)
1547 {
1548 	pci_unregister_driver(&mlx4_driver);
1549 	destroy_workqueue(mlx4_wq);
1550 }
1551 
1552 module_init(mlx4_init);
1553 module_exit(mlx4_cleanup);
1554