1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/errno.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/io-mapping.h>
43 
44 #include <linux/mlx4/device.h>
45 #include <linux/mlx4/doorbell.h>
46 
47 #include "mlx4.h"
48 #include "fw.h"
49 #include "icm.h"
50 
51 MODULE_AUTHOR("Roland Dreier");
52 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
53 MODULE_LICENSE("Dual BSD/GPL");
54 MODULE_VERSION(DRV_VERSION);
55 
56 struct workqueue_struct *mlx4_wq;
57 
58 #ifdef CONFIG_MLX4_DEBUG
59 
60 int mlx4_debug_level = 0;
61 module_param_named(debug_level, mlx4_debug_level, int, 0644);
62 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
63 
64 #endif /* CONFIG_MLX4_DEBUG */
65 
66 #ifdef CONFIG_PCI_MSI
67 
68 static int msi_x = 1;
69 module_param(msi_x, int, 0444);
70 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
71 
72 #else /* CONFIG_PCI_MSI */
73 
74 #define msi_x (0)
75 
76 #endif /* CONFIG_PCI_MSI */
77 
78 static char mlx4_version[] __devinitdata =
79 	DRV_NAME ": Mellanox ConnectX core driver v"
80 	DRV_VERSION " (" DRV_RELDATE ")\n";
81 
82 static struct mlx4_profile default_profile = {
83 	.num_qp		= 1 << 17,
84 	.num_srq	= 1 << 16,
85 	.rdmarc_per_qp	= 1 << 4,
86 	.num_cq		= 1 << 16,
87 	.num_mcg	= 1 << 13,
88 	.num_mpt	= 1 << 17,
89 	.num_mtt	= 1 << 20,
90 };
91 
92 static int log_num_mac = 2;
93 module_param_named(log_num_mac, log_num_mac, int, 0444);
94 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
95 
96 static int log_num_vlan;
97 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
98 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
99 /* Log2 max number of VLANs per ETH port (0-7) */
100 #define MLX4_LOG_NUM_VLANS 7
101 
102 static int use_prio;
103 module_param_named(use_prio, use_prio, bool, 0444);
104 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
105 		  "(0/1, default 0)");
106 
107 static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
108 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
109 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
110 
111 int mlx4_check_port_params(struct mlx4_dev *dev,
112 			   enum mlx4_port_type *port_type)
113 {
114 	int i;
115 
116 	for (i = 0; i < dev->caps.num_ports - 1; i++) {
117 		if (port_type[i] != port_type[i + 1]) {
118 			if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
119 				mlx4_err(dev, "Only same port types supported "
120 					 "on this HCA, aborting.\n");
121 				return -EINVAL;
122 			}
123 			if (port_type[i] == MLX4_PORT_TYPE_ETH &&
124 			    port_type[i + 1] == MLX4_PORT_TYPE_IB)
125 				return -EINVAL;
126 		}
127 	}
128 
129 	for (i = 0; i < dev->caps.num_ports; i++) {
130 		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
131 			mlx4_err(dev, "Requested port type for port %d is not "
132 				      "supported on this HCA\n", i + 1);
133 			return -EINVAL;
134 		}
135 	}
136 	return 0;
137 }
138 
139 static void mlx4_set_port_mask(struct mlx4_dev *dev)
140 {
141 	int i;
142 
143 	for (i = 1; i <= dev->caps.num_ports; ++i)
144 		dev->caps.port_mask[i] = dev->caps.port_type[i];
145 }
146 
147 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
148 {
149 	int err;
150 	int i;
151 
152 	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
153 	if (err) {
154 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
155 		return err;
156 	}
157 
158 	if (dev_cap->min_page_sz > PAGE_SIZE) {
159 		mlx4_err(dev, "HCA minimum page size of %d bigger than "
160 			 "kernel PAGE_SIZE of %ld, aborting.\n",
161 			 dev_cap->min_page_sz, PAGE_SIZE);
162 		return -ENODEV;
163 	}
164 	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
165 		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
166 			 "aborting.\n",
167 			 dev_cap->num_ports, MLX4_MAX_PORTS);
168 		return -ENODEV;
169 	}
170 
171 	if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
172 		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
173 			 "PCI resource 2 size of 0x%llx, aborting.\n",
174 			 dev_cap->uar_size,
175 			 (unsigned long long) pci_resource_len(dev->pdev, 2));
176 		return -ENODEV;
177 	}
178 
179 	dev->caps.num_ports	     = dev_cap->num_ports;
180 	for (i = 1; i <= dev->caps.num_ports; ++i) {
181 		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
182 		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
183 		dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
184 		dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
185 		dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
186 		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
187 		dev->caps.def_mac[i]        = dev_cap->def_mac[i];
188 		dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
189 		dev->caps.trans_type[i]	    = dev_cap->trans_type[i];
190 		dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
191 		dev->caps.wavelength[i]     = dev_cap->wavelength[i];
192 		dev->caps.trans_code[i]     = dev_cap->trans_code[i];
193 	}
194 
195 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
196 	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
197 	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
198 	dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
199 	dev->caps.max_sq_sg	     = dev_cap->max_sq_sg;
200 	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
201 	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
202 	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
203 	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
204 	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
205 	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
206 	dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
207 	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
208 	dev->caps.num_qp_per_mgm     = MLX4_QP_PER_MGM;
209 	/*
210 	 * Subtract 1 from the limit because we need to allocate a
211 	 * spare CQE so the HCA HW can tell the difference between an
212 	 * empty CQ and a full CQ.
213 	 */
214 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
215 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
216 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
217 	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
218 	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
219 						    dev->caps.mtts_per_seg);
220 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
221 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
222 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
223 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
224 					dev_cap->reserved_xrcds : 0;
225 	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
226 					dev_cap->max_xrcds : 0;
227 	dev->caps.mtt_entry_sz	     = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
228 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
229 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
230 	dev->caps.flags		     = dev_cap->flags;
231 	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
232 	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
233 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
234 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
235 
236 	dev->caps.log_num_macs  = log_num_mac;
237 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
238 	dev->caps.log_num_prios = use_prio ? 3 : 0;
239 
240 	for (i = 1; i <= dev->caps.num_ports; ++i) {
241 		if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
242 			dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
243 		else
244 			dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
245 		dev->caps.possible_type[i] = dev->caps.port_type[i];
246 		mlx4_priv(dev)->sense.sense_allowed[i] =
247 			dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
248 
249 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
250 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
251 			mlx4_warn(dev, "Requested number of MACs is too much "
252 				  "for port %d, reducing to %d.\n",
253 				  i, 1 << dev->caps.log_num_macs);
254 		}
255 		if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
256 			dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
257 			mlx4_warn(dev, "Requested number of VLANs is too much "
258 				  "for port %d, reducing to %d.\n",
259 				  i, 1 << dev->caps.log_num_vlans);
260 		}
261 	}
262 
263 	mlx4_set_port_mask(dev);
264 
265 	dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
266 
267 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
268 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
269 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
270 		(1 << dev->caps.log_num_macs) *
271 		(1 << dev->caps.log_num_vlans) *
272 		(1 << dev->caps.log_num_prios) *
273 		dev->caps.num_ports;
274 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
275 
276 	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
277 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
278 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
279 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
280 
281 	return 0;
282 }
283 
284 /*
285  * Change the port configuration of the device.
286  * Every user of this function must hold the port mutex.
287  */
288 int mlx4_change_port_types(struct mlx4_dev *dev,
289 			   enum mlx4_port_type *port_types)
290 {
291 	int err = 0;
292 	int change = 0;
293 	int port;
294 
295 	for (port = 0; port <  dev->caps.num_ports; port++) {
296 		/* Change the port type only if the new type is different
297 		 * from the current, and not set to Auto */
298 		if (port_types[port] != dev->caps.port_type[port + 1]) {
299 			change = 1;
300 			dev->caps.port_type[port + 1] = port_types[port];
301 		}
302 	}
303 	if (change) {
304 		mlx4_unregister_device(dev);
305 		for (port = 1; port <= dev->caps.num_ports; port++) {
306 			mlx4_CLOSE_PORT(dev, port);
307 			err = mlx4_SET_PORT(dev, port);
308 			if (err) {
309 				mlx4_err(dev, "Failed to set port %d, "
310 					      "aborting\n", port);
311 				goto out;
312 			}
313 		}
314 		mlx4_set_port_mask(dev);
315 		err = mlx4_register_device(dev);
316 	}
317 
318 out:
319 	return err;
320 }
321 
322 static ssize_t show_port_type(struct device *dev,
323 			      struct device_attribute *attr,
324 			      char *buf)
325 {
326 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
327 						   port_attr);
328 	struct mlx4_dev *mdev = info->dev;
329 	char type[8];
330 
331 	sprintf(type, "%s",
332 		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
333 		"ib" : "eth");
334 	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
335 		sprintf(buf, "auto (%s)\n", type);
336 	else
337 		sprintf(buf, "%s\n", type);
338 
339 	return strlen(buf);
340 }
341 
342 static ssize_t set_port_type(struct device *dev,
343 			     struct device_attribute *attr,
344 			     const char *buf, size_t count)
345 {
346 	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
347 						   port_attr);
348 	struct mlx4_dev *mdev = info->dev;
349 	struct mlx4_priv *priv = mlx4_priv(mdev);
350 	enum mlx4_port_type types[MLX4_MAX_PORTS];
351 	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
352 	int i;
353 	int err = 0;
354 
355 	if (!strcmp(buf, "ib\n"))
356 		info->tmp_type = MLX4_PORT_TYPE_IB;
357 	else if (!strcmp(buf, "eth\n"))
358 		info->tmp_type = MLX4_PORT_TYPE_ETH;
359 	else if (!strcmp(buf, "auto\n"))
360 		info->tmp_type = MLX4_PORT_TYPE_AUTO;
361 	else {
362 		mlx4_err(mdev, "%s is not supported port type\n", buf);
363 		return -EINVAL;
364 	}
365 
366 	mlx4_stop_sense(mdev);
367 	mutex_lock(&priv->port_mutex);
368 	/* Possible type is always the one that was delivered */
369 	mdev->caps.possible_type[info->port] = info->tmp_type;
370 
371 	for (i = 0; i < mdev->caps.num_ports; i++) {
372 		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
373 					mdev->caps.possible_type[i+1];
374 		if (types[i] == MLX4_PORT_TYPE_AUTO)
375 			types[i] = mdev->caps.port_type[i+1];
376 	}
377 
378 	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
379 		for (i = 1; i <= mdev->caps.num_ports; i++) {
380 			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
381 				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
382 				err = -EINVAL;
383 			}
384 		}
385 	}
386 	if (err) {
387 		mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
388 			       "Set only 'eth' or 'ib' for both ports "
389 			       "(should be the same)\n");
390 		goto out;
391 	}
392 
393 	mlx4_do_sense_ports(mdev, new_types, types);
394 
395 	err = mlx4_check_port_params(mdev, new_types);
396 	if (err)
397 		goto out;
398 
399 	/* We are about to apply the changes after the configuration
400 	 * was verified, no need to remember the temporary types
401 	 * any more */
402 	for (i = 0; i < mdev->caps.num_ports; i++)
403 		priv->port[i + 1].tmp_type = 0;
404 
405 	err = mlx4_change_port_types(mdev, new_types);
406 
407 out:
408 	mlx4_start_sense(mdev);
409 	mutex_unlock(&priv->port_mutex);
410 	return err ? err : count;
411 }
412 
413 static int mlx4_load_fw(struct mlx4_dev *dev)
414 {
415 	struct mlx4_priv *priv = mlx4_priv(dev);
416 	int err;
417 
418 	priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
419 					 GFP_HIGHUSER | __GFP_NOWARN, 0);
420 	if (!priv->fw.fw_icm) {
421 		mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
422 		return -ENOMEM;
423 	}
424 
425 	err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
426 	if (err) {
427 		mlx4_err(dev, "MAP_FA command failed, aborting.\n");
428 		goto err_free;
429 	}
430 
431 	err = mlx4_RUN_FW(dev);
432 	if (err) {
433 		mlx4_err(dev, "RUN_FW command failed, aborting.\n");
434 		goto err_unmap_fa;
435 	}
436 
437 	return 0;
438 
439 err_unmap_fa:
440 	mlx4_UNMAP_FA(dev);
441 
442 err_free:
443 	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
444 	return err;
445 }
446 
447 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
448 				int cmpt_entry_sz)
449 {
450 	struct mlx4_priv *priv = mlx4_priv(dev);
451 	int err;
452 
453 	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
454 				  cmpt_base +
455 				  ((u64) (MLX4_CMPT_TYPE_QP *
456 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
457 				  cmpt_entry_sz, dev->caps.num_qps,
458 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
459 				  0, 0);
460 	if (err)
461 		goto err;
462 
463 	err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
464 				  cmpt_base +
465 				  ((u64) (MLX4_CMPT_TYPE_SRQ *
466 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
467 				  cmpt_entry_sz, dev->caps.num_srqs,
468 				  dev->caps.reserved_srqs, 0, 0);
469 	if (err)
470 		goto err_qp;
471 
472 	err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
473 				  cmpt_base +
474 				  ((u64) (MLX4_CMPT_TYPE_CQ *
475 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
476 				  cmpt_entry_sz, dev->caps.num_cqs,
477 				  dev->caps.reserved_cqs, 0, 0);
478 	if (err)
479 		goto err_srq;
480 
481 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
482 				  cmpt_base +
483 				  ((u64) (MLX4_CMPT_TYPE_EQ *
484 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
485 				  cmpt_entry_sz,
486 				  dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
487 	if (err)
488 		goto err_cq;
489 
490 	return 0;
491 
492 err_cq:
493 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
494 
495 err_srq:
496 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
497 
498 err_qp:
499 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
500 
501 err:
502 	return err;
503 }
504 
505 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
506 			 struct mlx4_init_hca_param *init_hca, u64 icm_size)
507 {
508 	struct mlx4_priv *priv = mlx4_priv(dev);
509 	u64 aux_pages;
510 	int err;
511 
512 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
513 	if (err) {
514 		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
515 		return err;
516 	}
517 
518 	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
519 		 (unsigned long long) icm_size >> 10,
520 		 (unsigned long long) aux_pages << 2);
521 
522 	priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
523 					  GFP_HIGHUSER | __GFP_NOWARN, 0);
524 	if (!priv->fw.aux_icm) {
525 		mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
526 		return -ENOMEM;
527 	}
528 
529 	err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
530 	if (err) {
531 		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
532 		goto err_free_aux;
533 	}
534 
535 	err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
536 	if (err) {
537 		mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
538 		goto err_unmap_aux;
539 	}
540 
541 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
542 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
543 				  dev->caps.num_eqs, dev->caps.num_eqs,
544 				  0, 0);
545 	if (err) {
546 		mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
547 		goto err_unmap_cmpt;
548 	}
549 
550 	/*
551 	 * Reserved MTT entries must be aligned up to a cacheline
552 	 * boundary, since the FW will write to them, while the driver
553 	 * writes to all other MTT entries. (The variable
554 	 * dev->caps.mtt_entry_sz below is really the MTT segment
555 	 * size, not the raw entry size)
556 	 */
557 	dev->caps.reserved_mtts =
558 		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
559 		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
560 
561 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
562 				  init_hca->mtt_base,
563 				  dev->caps.mtt_entry_sz,
564 				  dev->caps.num_mtt_segs,
565 				  dev->caps.reserved_mtts, 1, 0);
566 	if (err) {
567 		mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
568 		goto err_unmap_eq;
569 	}
570 
571 	err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
572 				  init_hca->dmpt_base,
573 				  dev_cap->dmpt_entry_sz,
574 				  dev->caps.num_mpts,
575 				  dev->caps.reserved_mrws, 1, 1);
576 	if (err) {
577 		mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
578 		goto err_unmap_mtt;
579 	}
580 
581 	err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
582 				  init_hca->qpc_base,
583 				  dev_cap->qpc_entry_sz,
584 				  dev->caps.num_qps,
585 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
586 				  0, 0);
587 	if (err) {
588 		mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
589 		goto err_unmap_dmpt;
590 	}
591 
592 	err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
593 				  init_hca->auxc_base,
594 				  dev_cap->aux_entry_sz,
595 				  dev->caps.num_qps,
596 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
597 				  0, 0);
598 	if (err) {
599 		mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
600 		goto err_unmap_qp;
601 	}
602 
603 	err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
604 				  init_hca->altc_base,
605 				  dev_cap->altc_entry_sz,
606 				  dev->caps.num_qps,
607 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
608 				  0, 0);
609 	if (err) {
610 		mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
611 		goto err_unmap_auxc;
612 	}
613 
614 	err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
615 				  init_hca->rdmarc_base,
616 				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
617 				  dev->caps.num_qps,
618 				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
619 				  0, 0);
620 	if (err) {
621 		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
622 		goto err_unmap_altc;
623 	}
624 
625 	err = mlx4_init_icm_table(dev, &priv->cq_table.table,
626 				  init_hca->cqc_base,
627 				  dev_cap->cqc_entry_sz,
628 				  dev->caps.num_cqs,
629 				  dev->caps.reserved_cqs, 0, 0);
630 	if (err) {
631 		mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
632 		goto err_unmap_rdmarc;
633 	}
634 
635 	err = mlx4_init_icm_table(dev, &priv->srq_table.table,
636 				  init_hca->srqc_base,
637 				  dev_cap->srq_entry_sz,
638 				  dev->caps.num_srqs,
639 				  dev->caps.reserved_srqs, 0, 0);
640 	if (err) {
641 		mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
642 		goto err_unmap_cq;
643 	}
644 
645 	/*
646 	 * It's not strictly required, but for simplicity just map the
647 	 * whole multicast group table now.  The table isn't very big
648 	 * and it's a lot easier than trying to track ref counts.
649 	 */
650 	err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
651 				  init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
652 				  dev->caps.num_mgms + dev->caps.num_amgms,
653 				  dev->caps.num_mgms + dev->caps.num_amgms,
654 				  0, 0);
655 	if (err) {
656 		mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
657 		goto err_unmap_srq;
658 	}
659 
660 	return 0;
661 
662 err_unmap_srq:
663 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
664 
665 err_unmap_cq:
666 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
667 
668 err_unmap_rdmarc:
669 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
670 
671 err_unmap_altc:
672 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
673 
674 err_unmap_auxc:
675 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
676 
677 err_unmap_qp:
678 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
679 
680 err_unmap_dmpt:
681 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
682 
683 err_unmap_mtt:
684 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
685 
686 err_unmap_eq:
687 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
688 
689 err_unmap_cmpt:
690 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
691 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
692 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
693 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
694 
695 err_unmap_aux:
696 	mlx4_UNMAP_ICM_AUX(dev);
697 
698 err_free_aux:
699 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
700 
701 	return err;
702 }
703 
704 static void mlx4_free_icms(struct mlx4_dev *dev)
705 {
706 	struct mlx4_priv *priv = mlx4_priv(dev);
707 
708 	mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
709 	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
710 	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
711 	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
712 	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
713 	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
714 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
715 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
716 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
717 	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
718 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
719 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
720 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
721 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
722 
723 	mlx4_UNMAP_ICM_AUX(dev);
724 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
725 }
726 
727 static int map_bf_area(struct mlx4_dev *dev)
728 {
729 	struct mlx4_priv *priv = mlx4_priv(dev);
730 	resource_size_t bf_start;
731 	resource_size_t bf_len;
732 	int err = 0;
733 
734 	bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
735 	bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
736 	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
737 	if (!priv->bf_mapping)
738 		err = -ENOMEM;
739 
740 	return err;
741 }
742 
743 static void unmap_bf_area(struct mlx4_dev *dev)
744 {
745 	if (mlx4_priv(dev)->bf_mapping)
746 		io_mapping_free(mlx4_priv(dev)->bf_mapping);
747 }
748 
749 static void mlx4_close_hca(struct mlx4_dev *dev)
750 {
751 	unmap_bf_area(dev);
752 	mlx4_CLOSE_HCA(dev, 0);
753 	mlx4_free_icms(dev);
754 	mlx4_UNMAP_FA(dev);
755 	mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
756 }
757 
758 static int mlx4_init_hca(struct mlx4_dev *dev)
759 {
760 	struct mlx4_priv	  *priv = mlx4_priv(dev);
761 	struct mlx4_adapter	   adapter;
762 	struct mlx4_dev_cap	   dev_cap;
763 	struct mlx4_mod_stat_cfg   mlx4_cfg;
764 	struct mlx4_profile	   profile;
765 	struct mlx4_init_hca_param init_hca;
766 	u64 icm_size;
767 	int err;
768 
769 	err = mlx4_QUERY_FW(dev);
770 	if (err) {
771 		if (err == -EACCES)
772 			mlx4_info(dev, "non-primary physical function, skipping.\n");
773 		else
774 			mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
775 		return err;
776 	}
777 
778 	err = mlx4_load_fw(dev);
779 	if (err) {
780 		mlx4_err(dev, "Failed to start FW, aborting.\n");
781 		return err;
782 	}
783 
784 	mlx4_cfg.log_pg_sz_m = 1;
785 	mlx4_cfg.log_pg_sz = 0;
786 	err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
787 	if (err)
788 		mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
789 
790 	err = mlx4_dev_cap(dev, &dev_cap);
791 	if (err) {
792 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
793 		goto err_stop_fw;
794 	}
795 
796 	profile = default_profile;
797 
798 	icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
799 	if ((long long) icm_size < 0) {
800 		err = icm_size;
801 		goto err_stop_fw;
802 	}
803 
804 	if (map_bf_area(dev))
805 		mlx4_dbg(dev, "Failed to map blue flame area\n");
806 
807 	init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
808 
809 	err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
810 	if (err)
811 		goto err_stop_fw;
812 
813 	err = mlx4_INIT_HCA(dev, &init_hca);
814 	if (err) {
815 		mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
816 		goto err_free_icm;
817 	}
818 
819 	err = mlx4_QUERY_ADAPTER(dev, &adapter);
820 	if (err) {
821 		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
822 		goto err_close;
823 	}
824 
825 	priv->eq_table.inta_pin = adapter.inta_pin;
826 	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
827 
828 	return 0;
829 
830 err_close:
831 	mlx4_CLOSE_HCA(dev, 0);
832 
833 err_free_icm:
834 	mlx4_free_icms(dev);
835 
836 err_stop_fw:
837 	unmap_bf_area(dev);
838 	mlx4_UNMAP_FA(dev);
839 	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
840 
841 	return err;
842 }
843 
844 static int mlx4_init_counters_table(struct mlx4_dev *dev)
845 {
846 	struct mlx4_priv *priv = mlx4_priv(dev);
847 	int nent;
848 
849 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
850 		return -ENOENT;
851 
852 	nent = dev->caps.max_counters;
853 	return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
854 }
855 
856 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
857 {
858 	mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
859 }
860 
861 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
862 {
863 	struct mlx4_priv *priv = mlx4_priv(dev);
864 
865 	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
866 		return -ENOENT;
867 
868 	*idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
869 	if (*idx == -1)
870 		return -ENOMEM;
871 
872 	return 0;
873 }
874 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
875 
876 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
877 {
878 	mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
879 	return;
880 }
881 EXPORT_SYMBOL_GPL(mlx4_counter_free);
882 
883 static int mlx4_setup_hca(struct mlx4_dev *dev)
884 {
885 	struct mlx4_priv *priv = mlx4_priv(dev);
886 	int err;
887 	int port;
888 	__be32 ib_port_default_caps;
889 
890 	err = mlx4_init_uar_table(dev);
891 	if (err) {
892 		mlx4_err(dev, "Failed to initialize "
893 			 "user access region table, aborting.\n");
894 		return err;
895 	}
896 
897 	err = mlx4_uar_alloc(dev, &priv->driver_uar);
898 	if (err) {
899 		mlx4_err(dev, "Failed to allocate driver access region, "
900 			 "aborting.\n");
901 		goto err_uar_table_free;
902 	}
903 
904 	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
905 	if (!priv->kar) {
906 		mlx4_err(dev, "Couldn't map kernel access region, "
907 			 "aborting.\n");
908 		err = -ENOMEM;
909 		goto err_uar_free;
910 	}
911 
912 	err = mlx4_init_pd_table(dev);
913 	if (err) {
914 		mlx4_err(dev, "Failed to initialize "
915 			 "protection domain table, aborting.\n");
916 		goto err_kar_unmap;
917 	}
918 
919 	err = mlx4_init_xrcd_table(dev);
920 	if (err) {
921 		mlx4_err(dev, "Failed to initialize "
922 			 "reliable connection domain table, aborting.\n");
923 		goto err_pd_table_free;
924 	}
925 
926 	err = mlx4_init_mr_table(dev);
927 	if (err) {
928 		mlx4_err(dev, "Failed to initialize "
929 			 "memory region table, aborting.\n");
930 		goto err_xrcd_table_free;
931 	}
932 
933 	err = mlx4_init_eq_table(dev);
934 	if (err) {
935 		mlx4_err(dev, "Failed to initialize "
936 			 "event queue table, aborting.\n");
937 		goto err_mr_table_free;
938 	}
939 
940 	err = mlx4_cmd_use_events(dev);
941 	if (err) {
942 		mlx4_err(dev, "Failed to switch to event-driven "
943 			 "firmware commands, aborting.\n");
944 		goto err_eq_table_free;
945 	}
946 
947 	err = mlx4_NOP(dev);
948 	if (err) {
949 		if (dev->flags & MLX4_FLAG_MSI_X) {
950 			mlx4_warn(dev, "NOP command failed to generate MSI-X "
951 				  "interrupt IRQ %d).\n",
952 				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
953 			mlx4_warn(dev, "Trying again without MSI-X.\n");
954 		} else {
955 			mlx4_err(dev, "NOP command failed to generate interrupt "
956 				 "(IRQ %d), aborting.\n",
957 				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
958 			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
959 		}
960 
961 		goto err_cmd_poll;
962 	}
963 
964 	mlx4_dbg(dev, "NOP command IRQ test passed\n");
965 
966 	err = mlx4_init_cq_table(dev);
967 	if (err) {
968 		mlx4_err(dev, "Failed to initialize "
969 			 "completion queue table, aborting.\n");
970 		goto err_cmd_poll;
971 	}
972 
973 	err = mlx4_init_srq_table(dev);
974 	if (err) {
975 		mlx4_err(dev, "Failed to initialize "
976 			 "shared receive queue table, aborting.\n");
977 		goto err_cq_table_free;
978 	}
979 
980 	err = mlx4_init_qp_table(dev);
981 	if (err) {
982 		mlx4_err(dev, "Failed to initialize "
983 			 "queue pair table, aborting.\n");
984 		goto err_srq_table_free;
985 	}
986 
987 	err = mlx4_init_mcg_table(dev);
988 	if (err) {
989 		mlx4_err(dev, "Failed to initialize "
990 			 "multicast group table, aborting.\n");
991 		goto err_qp_table_free;
992 	}
993 
994 	err = mlx4_init_counters_table(dev);
995 	if (err && err != -ENOENT) {
996 		mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
997 		goto err_counters_table_free;
998 	}
999 
1000 	for (port = 1; port <= dev->caps.num_ports; port++) {
1001 		enum mlx4_port_type port_type = 0;
1002 		mlx4_SENSE_PORT(dev, port, &port_type);
1003 		if (port_type)
1004 			dev->caps.port_type[port] = port_type;
1005 		ib_port_default_caps = 0;
1006 		err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps);
1007 		if (err)
1008 			mlx4_warn(dev, "failed to get port %d default "
1009 				  "ib capabilities (%d). Continuing with "
1010 				  "caps = 0\n", port, err);
1011 		dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
1012 
1013 		err = mlx4_check_ext_port_caps(dev, port);
1014 		if (err)
1015 			mlx4_warn(dev, "failed to get port %d extended "
1016 				  "port capabilities support info (%d)."
1017 				  " Assuming not supported\n", port, err);
1018 
1019 		err = mlx4_SET_PORT(dev, port);
1020 		if (err) {
1021 			mlx4_err(dev, "Failed to set port %d, aborting\n",
1022 				port);
1023 			goto err_mcg_table_free;
1024 		}
1025 	}
1026 	mlx4_set_port_mask(dev);
1027 
1028 	return 0;
1029 
1030 err_mcg_table_free:
1031 	mlx4_cleanup_mcg_table(dev);
1032 
1033 err_counters_table_free:
1034 	mlx4_cleanup_counters_table(dev);
1035 
1036 err_qp_table_free:
1037 	mlx4_cleanup_qp_table(dev);
1038 
1039 err_srq_table_free:
1040 	mlx4_cleanup_srq_table(dev);
1041 
1042 err_cq_table_free:
1043 	mlx4_cleanup_cq_table(dev);
1044 
1045 err_cmd_poll:
1046 	mlx4_cmd_use_polling(dev);
1047 
1048 err_eq_table_free:
1049 	mlx4_cleanup_eq_table(dev);
1050 
1051 err_mr_table_free:
1052 	mlx4_cleanup_mr_table(dev);
1053 
1054 err_xrcd_table_free:
1055 	mlx4_cleanup_xrcd_table(dev);
1056 
1057 err_pd_table_free:
1058 	mlx4_cleanup_pd_table(dev);
1059 
1060 err_kar_unmap:
1061 	iounmap(priv->kar);
1062 
1063 err_uar_free:
1064 	mlx4_uar_free(dev, &priv->driver_uar);
1065 
1066 err_uar_table_free:
1067 	mlx4_cleanup_uar_table(dev);
1068 	return err;
1069 }
1070 
1071 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
1072 {
1073 	struct mlx4_priv *priv = mlx4_priv(dev);
1074 	struct msix_entry *entries;
1075 	int nreq = min_t(int, dev->caps.num_ports *
1076 			 min_t(int, num_online_cpus() + 1, MAX_MSIX_P_PORT)
1077 				+ MSIX_LEGACY_SZ, MAX_MSIX);
1078 	int err;
1079 	int i;
1080 
1081 	if (msi_x) {
1082 		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
1083 			     nreq);
1084 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
1085 		if (!entries)
1086 			goto no_msi;
1087 
1088 		for (i = 0; i < nreq; ++i)
1089 			entries[i].entry = i;
1090 
1091 	retry:
1092 		err = pci_enable_msix(dev->pdev, entries, nreq);
1093 		if (err) {
1094 			/* Try again if at least 2 vectors are available */
1095 			if (err > 1) {
1096 				mlx4_info(dev, "Requested %d vectors, "
1097 					  "but only %d MSI-X vectors available, "
1098 					  "trying again\n", nreq, err);
1099 				nreq = err;
1100 				goto retry;
1101 			}
1102 			kfree(entries);
1103 			goto no_msi;
1104 		}
1105 
1106 		if (nreq <
1107 		    MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
1108 			/*Working in legacy mode , all EQ's shared*/
1109 			dev->caps.comp_pool           = 0;
1110 			dev->caps.num_comp_vectors = nreq - 1;
1111 		} else {
1112 			dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
1113 			dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
1114 		}
1115 		for (i = 0; i < nreq; ++i)
1116 			priv->eq_table.eq[i].irq = entries[i].vector;
1117 
1118 		dev->flags |= MLX4_FLAG_MSI_X;
1119 
1120 		kfree(entries);
1121 		return;
1122 	}
1123 
1124 no_msi:
1125 	dev->caps.num_comp_vectors = 1;
1126 	dev->caps.comp_pool	   = 0;
1127 
1128 	for (i = 0; i < 2; ++i)
1129 		priv->eq_table.eq[i].irq = dev->pdev->irq;
1130 }
1131 
1132 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
1133 {
1134 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
1135 	int err = 0;
1136 
1137 	info->dev = dev;
1138 	info->port = port;
1139 	mlx4_init_mac_table(dev, &info->mac_table);
1140 	mlx4_init_vlan_table(dev, &info->vlan_table);
1141 	info->base_qpn = dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
1142 			(port - 1) * (1 << log_num_mac);
1143 
1144 	sprintf(info->dev_name, "mlx4_port%d", port);
1145 	info->port_attr.attr.name = info->dev_name;
1146 	info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
1147 	info->port_attr.show      = show_port_type;
1148 	info->port_attr.store     = set_port_type;
1149 	sysfs_attr_init(&info->port_attr.attr);
1150 
1151 	err = device_create_file(&dev->pdev->dev, &info->port_attr);
1152 	if (err) {
1153 		mlx4_err(dev, "Failed to create file for port %d\n", port);
1154 		info->port = -1;
1155 	}
1156 
1157 	return err;
1158 }
1159 
1160 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
1161 {
1162 	if (info->port < 0)
1163 		return;
1164 
1165 	device_remove_file(&info->dev->pdev->dev, &info->port_attr);
1166 }
1167 
1168 static int mlx4_init_steering(struct mlx4_dev *dev)
1169 {
1170 	struct mlx4_priv *priv = mlx4_priv(dev);
1171 	int num_entries = dev->caps.num_ports;
1172 	int i, j;
1173 
1174 	priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
1175 	if (!priv->steer)
1176 		return -ENOMEM;
1177 
1178 	for (i = 0; i < num_entries; i++) {
1179 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
1180 			INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
1181 			INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
1182 		}
1183 		INIT_LIST_HEAD(&priv->steer[i].high_prios);
1184 	}
1185 	return 0;
1186 }
1187 
1188 static void mlx4_clear_steering(struct mlx4_dev *dev)
1189 {
1190 	struct mlx4_priv *priv = mlx4_priv(dev);
1191 	struct mlx4_steer_index *entry, *tmp_entry;
1192 	struct mlx4_promisc_qp *pqp, *tmp_pqp;
1193 	int num_entries = dev->caps.num_ports;
1194 	int i, j;
1195 
1196 	for (i = 0; i < num_entries; i++) {
1197 		for (j = 0; j < MLX4_NUM_STEERS; j++) {
1198 			list_for_each_entry_safe(pqp, tmp_pqp,
1199 						 &priv->steer[i].promisc_qps[j],
1200 						 list) {
1201 				list_del(&pqp->list);
1202 				kfree(pqp);
1203 			}
1204 			list_for_each_entry_safe(entry, tmp_entry,
1205 						 &priv->steer[i].steer_entries[j],
1206 						 list) {
1207 				list_del(&entry->list);
1208 				list_for_each_entry_safe(pqp, tmp_pqp,
1209 							 &entry->duplicates,
1210 							 list) {
1211 					list_del(&pqp->list);
1212 					kfree(pqp);
1213 				}
1214 				kfree(entry);
1215 			}
1216 		}
1217 	}
1218 	kfree(priv->steer);
1219 }
1220 
1221 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1222 {
1223 	struct mlx4_priv *priv;
1224 	struct mlx4_dev *dev;
1225 	int err;
1226 	int port;
1227 
1228 	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
1229 
1230 	err = pci_enable_device(pdev);
1231 	if (err) {
1232 		dev_err(&pdev->dev, "Cannot enable PCI device, "
1233 			"aborting.\n");
1234 		return err;
1235 	}
1236 
1237 	/*
1238 	 * Check for BARs.  We expect 0: 1MB
1239 	 */
1240 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
1241 	    pci_resource_len(pdev, 0) != 1 << 20) {
1242 		dev_err(&pdev->dev, "Missing DCS, aborting.\n");
1243 		err = -ENODEV;
1244 		goto err_disable_pdev;
1245 	}
1246 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
1247 		dev_err(&pdev->dev, "Missing UAR, aborting.\n");
1248 		err = -ENODEV;
1249 		goto err_disable_pdev;
1250 	}
1251 
1252 	err = pci_request_regions(pdev, DRV_NAME);
1253 	if (err) {
1254 		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
1255 		goto err_disable_pdev;
1256 	}
1257 
1258 	pci_set_master(pdev);
1259 
1260 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1261 	if (err) {
1262 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
1263 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1264 		if (err) {
1265 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
1266 			goto err_release_regions;
1267 		}
1268 	}
1269 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1270 	if (err) {
1271 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
1272 			 "consistent PCI DMA mask.\n");
1273 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1274 		if (err) {
1275 			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
1276 				"aborting.\n");
1277 			goto err_release_regions;
1278 		}
1279 	}
1280 
1281 	/* Allow large DMA segments, up to the firmware limit of 1 GB */
1282 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
1283 
1284 	priv = kzalloc(sizeof *priv, GFP_KERNEL);
1285 	if (!priv) {
1286 		dev_err(&pdev->dev, "Device struct alloc failed, "
1287 			"aborting.\n");
1288 		err = -ENOMEM;
1289 		goto err_release_regions;
1290 	}
1291 
1292 	dev       = &priv->dev;
1293 	dev->pdev = pdev;
1294 	INIT_LIST_HEAD(&priv->ctx_list);
1295 	spin_lock_init(&priv->ctx_lock);
1296 
1297 	mutex_init(&priv->port_mutex);
1298 
1299 	INIT_LIST_HEAD(&priv->pgdir_list);
1300 	mutex_init(&priv->pgdir_mutex);
1301 
1302 	INIT_LIST_HEAD(&priv->bf_list);
1303 	mutex_init(&priv->bf_mutex);
1304 
1305 	dev->rev_id = pdev->revision;
1306 
1307 	/*
1308 	 * Now reset the HCA before we touch the PCI capabilities or
1309 	 * attempt a firmware command, since a boot ROM may have left
1310 	 * the HCA in an undefined state.
1311 	 */
1312 	err = mlx4_reset(dev);
1313 	if (err) {
1314 		mlx4_err(dev, "Failed to reset HCA, aborting.\n");
1315 		goto err_free_dev;
1316 	}
1317 
1318 	if (mlx4_cmd_init(dev)) {
1319 		mlx4_err(dev, "Failed to init command interface, aborting.\n");
1320 		goto err_free_dev;
1321 	}
1322 
1323 	err = mlx4_init_hca(dev);
1324 	if (err)
1325 		goto err_cmd;
1326 
1327 	err = mlx4_alloc_eq_table(dev);
1328 	if (err)
1329 		goto err_close;
1330 
1331 	priv->msix_ctl.pool_bm = 0;
1332 	spin_lock_init(&priv->msix_ctl.pool_lock);
1333 
1334 	mlx4_enable_msi_x(dev);
1335 
1336 	err = mlx4_init_steering(dev);
1337 	if (err)
1338 		goto err_free_eq;
1339 
1340 	err = mlx4_setup_hca(dev);
1341 	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
1342 		dev->flags &= ~MLX4_FLAG_MSI_X;
1343 		pci_disable_msix(pdev);
1344 		err = mlx4_setup_hca(dev);
1345 	}
1346 
1347 	if (err)
1348 		goto err_steer;
1349 
1350 	for (port = 1; port <= dev->caps.num_ports; port++) {
1351 		err = mlx4_init_port_info(dev, port);
1352 		if (err)
1353 			goto err_port;
1354 	}
1355 
1356 	err = mlx4_register_device(dev);
1357 	if (err)
1358 		goto err_port;
1359 
1360 	mlx4_sense_init(dev);
1361 	mlx4_start_sense(dev);
1362 
1363 	pci_set_drvdata(pdev, dev);
1364 
1365 	return 0;
1366 
1367 err_port:
1368 	for (--port; port >= 1; --port)
1369 		mlx4_cleanup_port_info(&priv->port[port]);
1370 
1371 	mlx4_cleanup_counters_table(dev);
1372 	mlx4_cleanup_mcg_table(dev);
1373 	mlx4_cleanup_qp_table(dev);
1374 	mlx4_cleanup_srq_table(dev);
1375 	mlx4_cleanup_cq_table(dev);
1376 	mlx4_cmd_use_polling(dev);
1377 	mlx4_cleanup_eq_table(dev);
1378 	mlx4_cleanup_mr_table(dev);
1379 	mlx4_cleanup_xrcd_table(dev);
1380 	mlx4_cleanup_pd_table(dev);
1381 	mlx4_cleanup_uar_table(dev);
1382 
1383 err_steer:
1384 	mlx4_clear_steering(dev);
1385 
1386 err_free_eq:
1387 	mlx4_free_eq_table(dev);
1388 
1389 err_close:
1390 	if (dev->flags & MLX4_FLAG_MSI_X)
1391 		pci_disable_msix(pdev);
1392 
1393 	mlx4_close_hca(dev);
1394 
1395 err_cmd:
1396 	mlx4_cmd_cleanup(dev);
1397 
1398 err_free_dev:
1399 	kfree(priv);
1400 
1401 err_release_regions:
1402 	pci_release_regions(pdev);
1403 
1404 err_disable_pdev:
1405 	pci_disable_device(pdev);
1406 	pci_set_drvdata(pdev, NULL);
1407 	return err;
1408 }
1409 
1410 static int __devinit mlx4_init_one(struct pci_dev *pdev,
1411 				   const struct pci_device_id *id)
1412 {
1413 	printk_once(KERN_INFO "%s", mlx4_version);
1414 
1415 	return __mlx4_init_one(pdev, id);
1416 }
1417 
1418 static void mlx4_remove_one(struct pci_dev *pdev)
1419 {
1420 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
1421 	struct mlx4_priv *priv = mlx4_priv(dev);
1422 	int p;
1423 
1424 	if (dev) {
1425 		mlx4_stop_sense(dev);
1426 		mlx4_unregister_device(dev);
1427 
1428 		for (p = 1; p <= dev->caps.num_ports; p++) {
1429 			mlx4_cleanup_port_info(&priv->port[p]);
1430 			mlx4_CLOSE_PORT(dev, p);
1431 		}
1432 
1433 		mlx4_cleanup_counters_table(dev);
1434 		mlx4_cleanup_mcg_table(dev);
1435 		mlx4_cleanup_qp_table(dev);
1436 		mlx4_cleanup_srq_table(dev);
1437 		mlx4_cleanup_cq_table(dev);
1438 		mlx4_cmd_use_polling(dev);
1439 		mlx4_cleanup_eq_table(dev);
1440 		mlx4_cleanup_mr_table(dev);
1441 		mlx4_cleanup_xrcd_table(dev);
1442 		mlx4_cleanup_pd_table(dev);
1443 
1444 		iounmap(priv->kar);
1445 		mlx4_uar_free(dev, &priv->driver_uar);
1446 		mlx4_cleanup_uar_table(dev);
1447 		mlx4_clear_steering(dev);
1448 		mlx4_free_eq_table(dev);
1449 		mlx4_close_hca(dev);
1450 		mlx4_cmd_cleanup(dev);
1451 
1452 		if (dev->flags & MLX4_FLAG_MSI_X)
1453 			pci_disable_msix(pdev);
1454 
1455 		kfree(priv);
1456 		pci_release_regions(pdev);
1457 		pci_disable_device(pdev);
1458 		pci_set_drvdata(pdev, NULL);
1459 	}
1460 }
1461 
1462 int mlx4_restart_one(struct pci_dev *pdev)
1463 {
1464 	mlx4_remove_one(pdev);
1465 	return __mlx4_init_one(pdev, NULL);
1466 }
1467 
1468 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
1469 	{ PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
1470 	{ PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
1471 	{ PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
1472 	{ PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
1473 	{ PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
1474 	{ PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
1475 	{ PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
1476 	{ PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
1477 	{ PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
1478 	{ PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/
1479 	{ PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
1480 	{ PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX2 40GigE PCIe gen2 */
1481 	{ PCI_VDEVICE(MELLANOX, 0x1002) }, /* MT25400 Family [ConnectX-2 Virtual Function] */
1482 	{ PCI_VDEVICE(MELLANOX, 0x1003) }, /* MT27500 Family [ConnectX-3] */
1483 	{ PCI_VDEVICE(MELLANOX, 0x1004) }, /* MT27500 Family [ConnectX-3 Virtual Function] */
1484 	{ PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */
1485 	{ PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */
1486 	{ PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */
1487 	{ PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */
1488 	{ PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */
1489 	{ PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */
1490 	{ PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */
1491 	{ PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */
1492 	{ PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */
1493 	{ PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */
1494 	{ PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */
1495 	{ PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */
1496 	{ 0, }
1497 };
1498 
1499 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
1500 
1501 static struct pci_driver mlx4_driver = {
1502 	.name		= DRV_NAME,
1503 	.id_table	= mlx4_pci_table,
1504 	.probe		= mlx4_init_one,
1505 	.remove		= __devexit_p(mlx4_remove_one)
1506 };
1507 
1508 static int __init mlx4_verify_params(void)
1509 {
1510 	if ((log_num_mac < 0) || (log_num_mac > 7)) {
1511 		pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
1512 		return -1;
1513 	}
1514 
1515 	if (log_num_vlan != 0)
1516 		pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
1517 			   MLX4_LOG_NUM_VLANS);
1518 
1519 	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
1520 		pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
1521 		return -1;
1522 	}
1523 
1524 	return 0;
1525 }
1526 
1527 static int __init mlx4_init(void)
1528 {
1529 	int ret;
1530 
1531 	if (mlx4_verify_params())
1532 		return -EINVAL;
1533 
1534 	mlx4_catas_init();
1535 
1536 	mlx4_wq = create_singlethread_workqueue("mlx4");
1537 	if (!mlx4_wq)
1538 		return -ENOMEM;
1539 
1540 	ret = pci_register_driver(&mlx4_driver);
1541 	return ret < 0 ? ret : 0;
1542 }
1543 
1544 static void __exit mlx4_cleanup(void)
1545 {
1546 	pci_unregister_driver(&mlx4_driver);
1547 	destroy_workqueue(mlx4_wq);
1548 }
1549 
1550 module_init(mlx4_init);
1551 module_exit(mlx4_cleanup);
1552