xref: /openbmc/linux/drivers/infiniband/hw/mlx4/main.c (revision 5104d265)
1 /*
2  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/errno.h>
38 #include <linux/netdevice.h>
39 #include <linux/inetdevice.h>
40 #include <linux/rtnetlink.h>
41 #include <linux/if_vlan.h>
42 
43 #include <rdma/ib_smi.h>
44 #include <rdma/ib_user_verbs.h>
45 #include <rdma/ib_addr.h>
46 
47 #include <linux/mlx4/driver.h>
48 #include <linux/mlx4/cmd.h>
49 
50 #include "mlx4_ib.h"
51 #include "user.h"
52 
53 #define DRV_NAME	MLX4_IB_DRV_NAME
54 #define DRV_VERSION	"1.0"
55 #define DRV_RELDATE	"April 4, 2008"
56 
57 MODULE_AUTHOR("Roland Dreier");
58 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
59 MODULE_LICENSE("Dual BSD/GPL");
60 MODULE_VERSION(DRV_VERSION);
61 
62 int mlx4_ib_sm_guid_assign = 1;
63 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
64 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
65 
66 static const char mlx4_ib_version[] =
67 	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
68 	DRV_VERSION " (" DRV_RELDATE ")\n";
69 
70 struct update_gid_work {
71 	struct work_struct	work;
72 	union ib_gid		gids[128];
73 	struct mlx4_ib_dev     *dev;
74 	int			port;
75 };
76 
77 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
78 
79 static struct workqueue_struct *wq;
80 
81 static void init_query_mad(struct ib_smp *mad)
82 {
83 	mad->base_version  = 1;
84 	mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
85 	mad->class_version = 1;
86 	mad->method	   = IB_MGMT_METHOD_GET;
87 }
88 
89 static union ib_gid zgid;
90 
91 static int mlx4_ib_query_device(struct ib_device *ibdev,
92 				struct ib_device_attr *props)
93 {
94 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
95 	struct ib_smp *in_mad  = NULL;
96 	struct ib_smp *out_mad = NULL;
97 	int err = -ENOMEM;
98 
99 	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
100 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
101 	if (!in_mad || !out_mad)
102 		goto out;
103 
104 	init_query_mad(in_mad);
105 	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
106 
107 	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
108 			   1, NULL, NULL, in_mad, out_mad);
109 	if (err)
110 		goto out;
111 
112 	memset(props, 0, sizeof *props);
113 
114 	props->fw_ver = dev->dev->caps.fw_ver;
115 	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
116 		IB_DEVICE_PORT_ACTIVE_EVENT		|
117 		IB_DEVICE_SYS_IMAGE_GUID		|
118 		IB_DEVICE_RC_RNR_NAK_GEN		|
119 		IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
120 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
121 		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
122 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
123 		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
124 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
125 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
126 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
127 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
128 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
129 		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
130 	if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
131 		props->device_cap_flags |= IB_DEVICE_UD_TSO;
132 	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
133 		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
134 	if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
135 	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
136 	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
137 		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
138 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
139 		props->device_cap_flags |= IB_DEVICE_XRC;
140 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
141 		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
142 	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
143 		if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
144 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
145 		else
146 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
147 	}
148 
149 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
150 		0xffffff;
151 	props->vendor_part_id	   = dev->dev->pdev->device;
152 	props->hw_ver		   = be32_to_cpup((__be32 *) (out_mad->data + 32));
153 	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
154 
155 	props->max_mr_size	   = ~0ull;
156 	props->page_size_cap	   = dev->dev->caps.page_size_cap;
157 	props->max_qp		   = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
158 	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
159 	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
160 					 dev->dev->caps.max_rq_sg);
161 	props->max_cq		   = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
162 	props->max_cqe		   = dev->dev->caps.max_cqes;
163 	props->max_mr		   = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
164 	props->max_pd		   = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
165 	props->max_qp_rd_atom	   = dev->dev->caps.max_qp_dest_rdma;
166 	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
167 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
168 	props->max_srq		   = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
169 	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
170 	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
171 	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
172 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
173 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
174 		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
175 	props->masked_atomic_cap   = props->atomic_cap;
176 	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
177 	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
178 	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
179 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
180 					   props->max_mcast_grp;
181 	props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
182 
183 out:
184 	kfree(in_mad);
185 	kfree(out_mad);
186 
187 	return err;
188 }
189 
190 static enum rdma_link_layer
191 mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
192 {
193 	struct mlx4_dev *dev = to_mdev(device)->dev;
194 
195 	return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
196 		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
197 }
198 
199 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
200 			      struct ib_port_attr *props, int netw_view)
201 {
202 	struct ib_smp *in_mad  = NULL;
203 	struct ib_smp *out_mad = NULL;
204 	int ext_active_speed;
205 	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
206 	int err = -ENOMEM;
207 
208 	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
209 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
210 	if (!in_mad || !out_mad)
211 		goto out;
212 
213 	init_query_mad(in_mad);
214 	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
215 	in_mad->attr_mod = cpu_to_be32(port);
216 
217 	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
218 		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
219 
220 	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
221 				in_mad, out_mad);
222 	if (err)
223 		goto out;
224 
225 
226 	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
227 	props->lmc		= out_mad->data[34] & 0x7;
228 	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
229 	props->sm_sl		= out_mad->data[36] & 0xf;
230 	props->state		= out_mad->data[32] & 0xf;
231 	props->phys_state	= out_mad->data[33] >> 4;
232 	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
233 	if (netw_view)
234 		props->gid_tbl_len = out_mad->data[50];
235 	else
236 		props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
237 	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
238 	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
239 	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
240 	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
241 	props->active_width	= out_mad->data[31] & 0xf;
242 	props->active_speed	= out_mad->data[35] >> 4;
243 	props->max_mtu		= out_mad->data[41] & 0xf;
244 	props->active_mtu	= out_mad->data[36] >> 4;
245 	props->subnet_timeout	= out_mad->data[51] & 0x1f;
246 	props->max_vl_num	= out_mad->data[37] >> 4;
247 	props->init_type_reply	= out_mad->data[41] >> 4;
248 
249 	/* Check if extended speeds (EDR/FDR/...) are supported */
250 	if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
251 		ext_active_speed = out_mad->data[62] >> 4;
252 
253 		switch (ext_active_speed) {
254 		case 1:
255 			props->active_speed = IB_SPEED_FDR;
256 			break;
257 		case 2:
258 			props->active_speed = IB_SPEED_EDR;
259 			break;
260 		}
261 	}
262 
263 	/* If reported active speed is QDR, check if is FDR-10 */
264 	if (props->active_speed == IB_SPEED_QDR) {
265 		init_query_mad(in_mad);
266 		in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
267 		in_mad->attr_mod = cpu_to_be32(port);
268 
269 		err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
270 				   NULL, NULL, in_mad, out_mad);
271 		if (err)
272 			goto out;
273 
274 		/* Checking LinkSpeedActive for FDR-10 */
275 		if (out_mad->data[15] & 0x1)
276 			props->active_speed = IB_SPEED_FDR10;
277 	}
278 
279 	/* Avoid wrong speed value returned by FW if the IB link is down. */
280 	if (props->state == IB_PORT_DOWN)
281 		 props->active_speed = IB_SPEED_SDR;
282 
283 out:
284 	kfree(in_mad);
285 	kfree(out_mad);
286 	return err;
287 }
288 
289 static u8 state_to_phys_state(enum ib_port_state state)
290 {
291 	return state == IB_PORT_ACTIVE ? 5 : 3;
292 }
293 
294 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
295 			       struct ib_port_attr *props, int netw_view)
296 {
297 
298 	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
299 	struct mlx4_ib_iboe *iboe = &mdev->iboe;
300 	struct net_device *ndev;
301 	enum ib_mtu tmp;
302 	struct mlx4_cmd_mailbox *mailbox;
303 	int err = 0;
304 
305 	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
306 	if (IS_ERR(mailbox))
307 		return PTR_ERR(mailbox);
308 
309 	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
310 			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
311 			   MLX4_CMD_WRAPPED);
312 	if (err)
313 		goto out;
314 
315 	props->active_width	=  (((u8 *)mailbox->buf)[5] == 0x40) ?
316 						IB_WIDTH_4X : IB_WIDTH_1X;
317 	props->active_speed	= IB_SPEED_QDR;
318 	props->port_cap_flags	= IB_PORT_CM_SUP;
319 	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
320 	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
321 	props->pkey_tbl_len	= 1;
322 	props->max_mtu		= IB_MTU_4096;
323 	props->max_vl_num	= 2;
324 	props->state		= IB_PORT_DOWN;
325 	props->phys_state	= state_to_phys_state(props->state);
326 	props->active_mtu	= IB_MTU_256;
327 	spin_lock(&iboe->lock);
328 	ndev = iboe->netdevs[port - 1];
329 	if (!ndev)
330 		goto out_unlock;
331 
332 	tmp = iboe_get_mtu(ndev->mtu);
333 	props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
334 
335 	props->state		= (netif_running(ndev) && netif_carrier_ok(ndev)) ?
336 					IB_PORT_ACTIVE : IB_PORT_DOWN;
337 	props->phys_state	= state_to_phys_state(props->state);
338 out_unlock:
339 	spin_unlock(&iboe->lock);
340 out:
341 	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
342 	return err;
343 }
344 
345 int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
346 			 struct ib_port_attr *props, int netw_view)
347 {
348 	int err;
349 
350 	memset(props, 0, sizeof *props);
351 
352 	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
353 		ib_link_query_port(ibdev, port, props, netw_view) :
354 				eth_link_query_port(ibdev, port, props, netw_view);
355 
356 	return err;
357 }
358 
359 static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
360 			      struct ib_port_attr *props)
361 {
362 	/* returns host view */
363 	return __mlx4_ib_query_port(ibdev, port, props, 0);
364 }
365 
366 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
367 			union ib_gid *gid, int netw_view)
368 {
369 	struct ib_smp *in_mad  = NULL;
370 	struct ib_smp *out_mad = NULL;
371 	int err = -ENOMEM;
372 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
373 	int clear = 0;
374 	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
375 
376 	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
377 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
378 	if (!in_mad || !out_mad)
379 		goto out;
380 
381 	init_query_mad(in_mad);
382 	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
383 	in_mad->attr_mod = cpu_to_be32(port);
384 
385 	if (mlx4_is_mfunc(dev->dev) && netw_view)
386 		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
387 
388 	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
389 	if (err)
390 		goto out;
391 
392 	memcpy(gid->raw, out_mad->data + 8, 8);
393 
394 	if (mlx4_is_mfunc(dev->dev) && !netw_view) {
395 		if (index) {
396 			/* For any index > 0, return the null guid */
397 			err = 0;
398 			clear = 1;
399 			goto out;
400 		}
401 	}
402 
403 	init_query_mad(in_mad);
404 	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
405 	in_mad->attr_mod = cpu_to_be32(index / 8);
406 
407 	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
408 			   NULL, NULL, in_mad, out_mad);
409 	if (err)
410 		goto out;
411 
412 	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
413 
414 out:
415 	if (clear)
416 		memset(gid->raw + 8, 0, 8);
417 	kfree(in_mad);
418 	kfree(out_mad);
419 	return err;
420 }
421 
422 static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
423 			  union ib_gid *gid)
424 {
425 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
426 
427 	*gid = dev->iboe.gid_table[port - 1][index];
428 
429 	return 0;
430 }
431 
432 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
433 			     union ib_gid *gid)
434 {
435 	if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
436 		return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
437 	else
438 		return iboe_query_gid(ibdev, port, index, gid);
439 }
440 
441 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
442 			 u16 *pkey, int netw_view)
443 {
444 	struct ib_smp *in_mad  = NULL;
445 	struct ib_smp *out_mad = NULL;
446 	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
447 	int err = -ENOMEM;
448 
449 	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
450 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
451 	if (!in_mad || !out_mad)
452 		goto out;
453 
454 	init_query_mad(in_mad);
455 	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
456 	in_mad->attr_mod = cpu_to_be32(index / 32);
457 
458 	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
459 		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
460 
461 	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
462 			   in_mad, out_mad);
463 	if (err)
464 		goto out;
465 
466 	*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
467 
468 out:
469 	kfree(in_mad);
470 	kfree(out_mad);
471 	return err;
472 }
473 
474 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
475 {
476 	return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
477 }
478 
479 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
480 				 struct ib_device_modify *props)
481 {
482 	struct mlx4_cmd_mailbox *mailbox;
483 	unsigned long flags;
484 
485 	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
486 		return -EOPNOTSUPP;
487 
488 	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
489 		return 0;
490 
491 	if (mlx4_is_slave(to_mdev(ibdev)->dev))
492 		return -EOPNOTSUPP;
493 
494 	spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
495 	memcpy(ibdev->node_desc, props->node_desc, 64);
496 	spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
497 
498 	/*
499 	 * If possible, pass node desc to FW, so it can generate
500 	 * a 144 trap.  If cmd fails, just ignore.
501 	 */
502 	mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
503 	if (IS_ERR(mailbox))
504 		return 0;
505 
506 	memset(mailbox->buf, 0, 256);
507 	memcpy(mailbox->buf, props->node_desc, 64);
508 	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
509 		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
510 
511 	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
512 
513 	return 0;
514 }
515 
516 static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
517 			 u32 cap_mask)
518 {
519 	struct mlx4_cmd_mailbox *mailbox;
520 	int err;
521 	u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
522 
523 	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
524 	if (IS_ERR(mailbox))
525 		return PTR_ERR(mailbox);
526 
527 	memset(mailbox->buf, 0, 256);
528 
529 	if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
530 		*(u8 *) mailbox->buf	     = !!reset_qkey_viols << 6;
531 		((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
532 	} else {
533 		((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
534 		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
535 	}
536 
537 	err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
538 		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
539 
540 	mlx4_free_cmd_mailbox(dev->dev, mailbox);
541 	return err;
542 }
543 
544 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
545 			       struct ib_port_modify *props)
546 {
547 	struct ib_port_attr attr;
548 	u32 cap_mask;
549 	int err;
550 
551 	mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
552 
553 	err = mlx4_ib_query_port(ibdev, port, &attr);
554 	if (err)
555 		goto out;
556 
557 	cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
558 		~props->clr_port_cap_mask;
559 
560 	err = mlx4_SET_PORT(to_mdev(ibdev), port,
561 			    !!(mask & IB_PORT_RESET_QKEY_CNTR),
562 			    cap_mask);
563 
564 out:
565 	mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
566 	return err;
567 }
568 
569 static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
570 						  struct ib_udata *udata)
571 {
572 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
573 	struct mlx4_ib_ucontext *context;
574 	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
575 	struct mlx4_ib_alloc_ucontext_resp resp;
576 	int err;
577 
578 	if (!dev->ib_active)
579 		return ERR_PTR(-EAGAIN);
580 
581 	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
582 		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
583 		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
584 		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
585 	} else {
586 		resp.dev_caps	      = dev->dev->caps.userspace_caps;
587 		resp.qp_tab_size      = dev->dev->caps.num_qps;
588 		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
589 		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
590 		resp.cqe_size	      = dev->dev->caps.cqe_size;
591 	}
592 
593 	context = kmalloc(sizeof *context, GFP_KERNEL);
594 	if (!context)
595 		return ERR_PTR(-ENOMEM);
596 
597 	err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
598 	if (err) {
599 		kfree(context);
600 		return ERR_PTR(err);
601 	}
602 
603 	INIT_LIST_HEAD(&context->db_page_list);
604 	mutex_init(&context->db_page_mutex);
605 
606 	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
607 		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
608 	else
609 		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
610 
611 	if (err) {
612 		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
613 		kfree(context);
614 		return ERR_PTR(-EFAULT);
615 	}
616 
617 	return &context->ibucontext;
618 }
619 
620 static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
621 {
622 	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
623 
624 	mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
625 	kfree(context);
626 
627 	return 0;
628 }
629 
630 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
631 {
632 	struct mlx4_ib_dev *dev = to_mdev(context->device);
633 
634 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
635 		return -EINVAL;
636 
637 	if (vma->vm_pgoff == 0) {
638 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
639 
640 		if (io_remap_pfn_range(vma, vma->vm_start,
641 				       to_mucontext(context)->uar.pfn,
642 				       PAGE_SIZE, vma->vm_page_prot))
643 			return -EAGAIN;
644 	} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
645 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
646 
647 		if (io_remap_pfn_range(vma, vma->vm_start,
648 				       to_mucontext(context)->uar.pfn +
649 				       dev->dev->caps.num_uars,
650 				       PAGE_SIZE, vma->vm_page_prot))
651 			return -EAGAIN;
652 	} else
653 		return -EINVAL;
654 
655 	return 0;
656 }
657 
658 static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
659 				      struct ib_ucontext *context,
660 				      struct ib_udata *udata)
661 {
662 	struct mlx4_ib_pd *pd;
663 	int err;
664 
665 	pd = kmalloc(sizeof *pd, GFP_KERNEL);
666 	if (!pd)
667 		return ERR_PTR(-ENOMEM);
668 
669 	err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
670 	if (err) {
671 		kfree(pd);
672 		return ERR_PTR(err);
673 	}
674 
675 	if (context)
676 		if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
677 			mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
678 			kfree(pd);
679 			return ERR_PTR(-EFAULT);
680 		}
681 
682 	return &pd->ibpd;
683 }
684 
685 static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
686 {
687 	mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
688 	kfree(pd);
689 
690 	return 0;
691 }
692 
693 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
694 					  struct ib_ucontext *context,
695 					  struct ib_udata *udata)
696 {
697 	struct mlx4_ib_xrcd *xrcd;
698 	int err;
699 
700 	if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
701 		return ERR_PTR(-ENOSYS);
702 
703 	xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
704 	if (!xrcd)
705 		return ERR_PTR(-ENOMEM);
706 
707 	err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
708 	if (err)
709 		goto err1;
710 
711 	xrcd->pd = ib_alloc_pd(ibdev);
712 	if (IS_ERR(xrcd->pd)) {
713 		err = PTR_ERR(xrcd->pd);
714 		goto err2;
715 	}
716 
717 	xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
718 	if (IS_ERR(xrcd->cq)) {
719 		err = PTR_ERR(xrcd->cq);
720 		goto err3;
721 	}
722 
723 	return &xrcd->ibxrcd;
724 
725 err3:
726 	ib_dealloc_pd(xrcd->pd);
727 err2:
728 	mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
729 err1:
730 	kfree(xrcd);
731 	return ERR_PTR(err);
732 }
733 
734 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
735 {
736 	ib_destroy_cq(to_mxrcd(xrcd)->cq);
737 	ib_dealloc_pd(to_mxrcd(xrcd)->pd);
738 	mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
739 	kfree(xrcd);
740 
741 	return 0;
742 }
743 
744 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
745 {
746 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
747 	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
748 	struct mlx4_ib_gid_entry *ge;
749 
750 	ge = kzalloc(sizeof *ge, GFP_KERNEL);
751 	if (!ge)
752 		return -ENOMEM;
753 
754 	ge->gid = *gid;
755 	if (mlx4_ib_add_mc(mdev, mqp, gid)) {
756 		ge->port = mqp->port;
757 		ge->added = 1;
758 	}
759 
760 	mutex_lock(&mqp->mutex);
761 	list_add_tail(&ge->list, &mqp->gid_list);
762 	mutex_unlock(&mqp->mutex);
763 
764 	return 0;
765 }
766 
767 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
768 		   union ib_gid *gid)
769 {
770 	u8 mac[6];
771 	struct net_device *ndev;
772 	int ret = 0;
773 
774 	if (!mqp->port)
775 		return 0;
776 
777 	spin_lock(&mdev->iboe.lock);
778 	ndev = mdev->iboe.netdevs[mqp->port - 1];
779 	if (ndev)
780 		dev_hold(ndev);
781 	spin_unlock(&mdev->iboe.lock);
782 
783 	if (ndev) {
784 		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
785 		rtnl_lock();
786 		dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
787 		ret = 1;
788 		rtnl_unlock();
789 		dev_put(ndev);
790 	}
791 
792 	return ret;
793 }
794 
795 struct mlx4_ib_steering {
796 	struct list_head list;
797 	u64 reg_id;
798 	union ib_gid gid;
799 };
800 
801 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
802 {
803 	int err;
804 	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
805 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
806 	u64 reg_id;
807 	struct mlx4_ib_steering *ib_steering = NULL;
808 
809 	if (mdev->dev->caps.steering_mode ==
810 	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
811 		ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
812 		if (!ib_steering)
813 			return -ENOMEM;
814 	}
815 
816 	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
817 				    !!(mqp->flags &
818 				       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
819 				    MLX4_PROT_IB_IPV6, &reg_id);
820 	if (err)
821 		goto err_malloc;
822 
823 	err = add_gid_entry(ibqp, gid);
824 	if (err)
825 		goto err_add;
826 
827 	if (ib_steering) {
828 		memcpy(ib_steering->gid.raw, gid->raw, 16);
829 		ib_steering->reg_id = reg_id;
830 		mutex_lock(&mqp->mutex);
831 		list_add(&ib_steering->list, &mqp->steering_rules);
832 		mutex_unlock(&mqp->mutex);
833 	}
834 	return 0;
835 
836 err_add:
837 	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
838 			      MLX4_PROT_IB_IPV6, reg_id);
839 err_malloc:
840 	kfree(ib_steering);
841 
842 	return err;
843 }
844 
845 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
846 {
847 	struct mlx4_ib_gid_entry *ge;
848 	struct mlx4_ib_gid_entry *tmp;
849 	struct mlx4_ib_gid_entry *ret = NULL;
850 
851 	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
852 		if (!memcmp(raw, ge->gid.raw, 16)) {
853 			ret = ge;
854 			break;
855 		}
856 	}
857 
858 	return ret;
859 }
860 
861 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
862 {
863 	int err;
864 	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
865 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
866 	u8 mac[6];
867 	struct net_device *ndev;
868 	struct mlx4_ib_gid_entry *ge;
869 	u64 reg_id = 0;
870 
871 	if (mdev->dev->caps.steering_mode ==
872 	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
873 		struct mlx4_ib_steering *ib_steering;
874 
875 		mutex_lock(&mqp->mutex);
876 		list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
877 			if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
878 				list_del(&ib_steering->list);
879 				break;
880 			}
881 		}
882 		mutex_unlock(&mqp->mutex);
883 		if (&ib_steering->list == &mqp->steering_rules) {
884 			pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
885 			return -EINVAL;
886 		}
887 		reg_id = ib_steering->reg_id;
888 		kfree(ib_steering);
889 	}
890 
891 	err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
892 				    MLX4_PROT_IB_IPV6, reg_id);
893 	if (err)
894 		return err;
895 
896 	mutex_lock(&mqp->mutex);
897 	ge = find_gid_entry(mqp, gid->raw);
898 	if (ge) {
899 		spin_lock(&mdev->iboe.lock);
900 		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
901 		if (ndev)
902 			dev_hold(ndev);
903 		spin_unlock(&mdev->iboe.lock);
904 		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
905 		if (ndev) {
906 			rtnl_lock();
907 			dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
908 			rtnl_unlock();
909 			dev_put(ndev);
910 		}
911 		list_del(&ge->list);
912 		kfree(ge);
913 	} else
914 		pr_warn("could not find mgid entry\n");
915 
916 	mutex_unlock(&mqp->mutex);
917 
918 	return 0;
919 }
920 
921 static int init_node_data(struct mlx4_ib_dev *dev)
922 {
923 	struct ib_smp *in_mad  = NULL;
924 	struct ib_smp *out_mad = NULL;
925 	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
926 	int err = -ENOMEM;
927 
928 	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
929 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
930 	if (!in_mad || !out_mad)
931 		goto out;
932 
933 	init_query_mad(in_mad);
934 	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
935 	if (mlx4_is_master(dev->dev))
936 		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
937 
938 	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
939 	if (err)
940 		goto out;
941 
942 	memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
943 
944 	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
945 
946 	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
947 	if (err)
948 		goto out;
949 
950 	dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
951 	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
952 
953 out:
954 	kfree(in_mad);
955 	kfree(out_mad);
956 	return err;
957 }
958 
959 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
960 			char *buf)
961 {
962 	struct mlx4_ib_dev *dev =
963 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
964 	return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
965 }
966 
967 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
968 			   char *buf)
969 {
970 	struct mlx4_ib_dev *dev =
971 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
972 	return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
973 		       (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
974 		       (int) dev->dev->caps.fw_ver & 0xffff);
975 }
976 
977 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
978 			char *buf)
979 {
980 	struct mlx4_ib_dev *dev =
981 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
982 	return sprintf(buf, "%x\n", dev->dev->rev_id);
983 }
984 
985 static ssize_t show_board(struct device *device, struct device_attribute *attr,
986 			  char *buf)
987 {
988 	struct mlx4_ib_dev *dev =
989 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
990 	return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
991 		       dev->dev->board_id);
992 }
993 
994 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
995 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
996 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
997 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
998 
999 static struct device_attribute *mlx4_class_attributes[] = {
1000 	&dev_attr_hw_rev,
1001 	&dev_attr_fw_ver,
1002 	&dev_attr_hca_type,
1003 	&dev_attr_board_id
1004 };
1005 
1006 static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
1007 {
1008 	memcpy(eui, dev->dev_addr, 3);
1009 	memcpy(eui + 5, dev->dev_addr + 3, 3);
1010 	if (vlan_id < 0x1000) {
1011 		eui[3] = vlan_id >> 8;
1012 		eui[4] = vlan_id & 0xff;
1013 	} else {
1014 		eui[3] = 0xff;
1015 		eui[4] = 0xfe;
1016 	}
1017 	eui[0] ^= 2;
1018 }
1019 
1020 static void update_gids_task(struct work_struct *work)
1021 {
1022 	struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
1023 	struct mlx4_cmd_mailbox *mailbox;
1024 	union ib_gid *gids;
1025 	int err;
1026 	struct mlx4_dev	*dev = gw->dev->dev;
1027 
1028 	mailbox = mlx4_alloc_cmd_mailbox(dev);
1029 	if (IS_ERR(mailbox)) {
1030 		pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
1031 		return;
1032 	}
1033 
1034 	gids = mailbox->buf;
1035 	memcpy(gids, gw->gids, sizeof gw->gids);
1036 
1037 	err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
1038 		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1039 		       MLX4_CMD_WRAPPED);
1040 	if (err)
1041 		pr_warn("set port command failed\n");
1042 	else {
1043 		memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
1044 		mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
1045 	}
1046 
1047 	mlx4_free_cmd_mailbox(dev, mailbox);
1048 	kfree(gw);
1049 }
1050 
1051 static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
1052 {
1053 	struct net_device *ndev = dev->iboe.netdevs[port - 1];
1054 	struct update_gid_work *work;
1055 	struct net_device *tmp;
1056 	int i;
1057 	u8 *hits;
1058 	int ret;
1059 	union ib_gid gid;
1060 	int free;
1061 	int found;
1062 	int need_update = 0;
1063 	u16 vid;
1064 
1065 	work = kzalloc(sizeof *work, GFP_ATOMIC);
1066 	if (!work)
1067 		return -ENOMEM;
1068 
1069 	hits = kzalloc(128, GFP_ATOMIC);
1070 	if (!hits) {
1071 		ret = -ENOMEM;
1072 		goto out;
1073 	}
1074 
1075 	rcu_read_lock();
1076 	for_each_netdev_rcu(&init_net, tmp) {
1077 		if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
1078 			gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
1079 			vid = rdma_vlan_dev_vlan_id(tmp);
1080 			mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
1081 			found = 0;
1082 			free = -1;
1083 			for (i = 0; i < 128; ++i) {
1084 				if (free < 0 &&
1085 				    !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
1086 					free = i;
1087 				if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
1088 					hits[i] = 1;
1089 					found = 1;
1090 					break;
1091 				}
1092 			}
1093 
1094 			if (!found) {
1095 				if (tmp == ndev &&
1096 				    (memcmp(&dev->iboe.gid_table[port - 1][0],
1097 					    &gid, sizeof gid) ||
1098 				     !memcmp(&dev->iboe.gid_table[port - 1][0],
1099 					     &zgid, sizeof gid))) {
1100 					dev->iboe.gid_table[port - 1][0] = gid;
1101 					++need_update;
1102 					hits[0] = 1;
1103 				} else if (free >= 0) {
1104 					dev->iboe.gid_table[port - 1][free] = gid;
1105 					hits[free] = 1;
1106 					++need_update;
1107 				}
1108 			}
1109 		}
1110 	}
1111 	rcu_read_unlock();
1112 
1113 	for (i = 0; i < 128; ++i)
1114 		if (!hits[i]) {
1115 			if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
1116 				++need_update;
1117 			dev->iboe.gid_table[port - 1][i] = zgid;
1118 		}
1119 
1120 	if (need_update) {
1121 		memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
1122 		INIT_WORK(&work->work, update_gids_task);
1123 		work->port = port;
1124 		work->dev = dev;
1125 		queue_work(wq, &work->work);
1126 	} else
1127 		kfree(work);
1128 
1129 	kfree(hits);
1130 	return 0;
1131 
1132 out:
1133 	kfree(work);
1134 	return ret;
1135 }
1136 
1137 static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
1138 {
1139 	switch (event) {
1140 	case NETDEV_UP:
1141 	case NETDEV_CHANGEADDR:
1142 		update_ipv6_gids(dev, port, 0);
1143 		break;
1144 
1145 	case NETDEV_DOWN:
1146 		update_ipv6_gids(dev, port, 1);
1147 		dev->iboe.netdevs[port - 1] = NULL;
1148 	}
1149 }
1150 
1151 static void netdev_added(struct mlx4_ib_dev *dev, int port)
1152 {
1153 	update_ipv6_gids(dev, port, 0);
1154 }
1155 
1156 static void netdev_removed(struct mlx4_ib_dev *dev, int port)
1157 {
1158 	update_ipv6_gids(dev, port, 1);
1159 }
1160 
1161 static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
1162 				void *ptr)
1163 {
1164 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1165 	struct mlx4_ib_dev *ibdev;
1166 	struct net_device *oldnd;
1167 	struct mlx4_ib_iboe *iboe;
1168 	int port;
1169 
1170 	if (!net_eq(dev_net(dev), &init_net))
1171 		return NOTIFY_DONE;
1172 
1173 	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
1174 	iboe = &ibdev->iboe;
1175 
1176 	spin_lock(&iboe->lock);
1177 	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
1178 		oldnd = iboe->netdevs[port - 1];
1179 		iboe->netdevs[port - 1] =
1180 			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
1181 		if (oldnd != iboe->netdevs[port - 1]) {
1182 			if (iboe->netdevs[port - 1])
1183 				netdev_added(ibdev, port);
1184 			else
1185 				netdev_removed(ibdev, port);
1186 		}
1187 	}
1188 
1189 	if (dev == iboe->netdevs[0] ||
1190 	    (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
1191 		handle_en_event(ibdev, 1, event);
1192 	else if (dev == iboe->netdevs[1]
1193 		 || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
1194 		handle_en_event(ibdev, 2, event);
1195 
1196 	spin_unlock(&iboe->lock);
1197 
1198 	return NOTIFY_DONE;
1199 }
1200 
1201 static void init_pkeys(struct mlx4_ib_dev *ibdev)
1202 {
1203 	int port;
1204 	int slave;
1205 	int i;
1206 
1207 	if (mlx4_is_master(ibdev->dev)) {
1208 		for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
1209 			for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1210 				for (i = 0;
1211 				     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1212 				     ++i) {
1213 					ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
1214 					/* master has the identity virt2phys pkey mapping */
1215 						(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
1216 							ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
1217 					mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
1218 							     ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
1219 				}
1220 			}
1221 		}
1222 		/* initialize pkey cache */
1223 		for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1224 			for (i = 0;
1225 			     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1226 			     ++i)
1227 				ibdev->pkeys.phys_pkey_cache[port-1][i] =
1228 					(i) ? 0 : 0xFFFF;
1229 		}
1230 	}
1231 }
1232 
1233 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1234 {
1235 	char name[32];
1236 	int eq_per_port = 0;
1237 	int added_eqs = 0;
1238 	int total_eqs = 0;
1239 	int i, j, eq;
1240 
1241 	/* Legacy mode or comp_pool is not large enough */
1242 	if (dev->caps.comp_pool == 0 ||
1243 	    dev->caps.num_ports > dev->caps.comp_pool)
1244 		return;
1245 
1246 	eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
1247 					dev->caps.num_ports);
1248 
1249 	/* Init eq table */
1250 	added_eqs = 0;
1251 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
1252 		added_eqs += eq_per_port;
1253 
1254 	total_eqs = dev->caps.num_comp_vectors + added_eqs;
1255 
1256 	ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
1257 	if (!ibdev->eq_table)
1258 		return;
1259 
1260 	ibdev->eq_added = added_eqs;
1261 
1262 	eq = 0;
1263 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
1264 		for (j = 0; j < eq_per_port; j++) {
1265 			sprintf(name, "mlx4-ib-%d-%d@%s",
1266 				i, j, dev->pdev->bus->name);
1267 			/* Set IRQ for specific name (per ring) */
1268 			if (mlx4_assign_eq(dev, name, NULL,
1269 					   &ibdev->eq_table[eq])) {
1270 				/* Use legacy (same as mlx4_en driver) */
1271 				pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
1272 				ibdev->eq_table[eq] =
1273 					(eq % dev->caps.num_comp_vectors);
1274 			}
1275 			eq++;
1276 		}
1277 	}
1278 
1279 	/* Fill the reset of the vector with legacy EQ */
1280 	for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
1281 		ibdev->eq_table[eq++] = i;
1282 
1283 	/* Advertise the new number of EQs to clients */
1284 	ibdev->ib_dev.num_comp_vectors = total_eqs;
1285 }
1286 
1287 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1288 {
1289 	int i;
1290 
1291 	/* no additional eqs were added */
1292 	if (!ibdev->eq_table)
1293 		return;
1294 
1295 	/* Reset the advertised EQ number */
1296 	ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
1297 
1298 	/* Free only the added eqs */
1299 	for (i = 0; i < ibdev->eq_added; i++) {
1300 		/* Don't free legacy eqs if used */
1301 		if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
1302 			continue;
1303 		mlx4_release_eq(dev, ibdev->eq_table[i]);
1304 	}
1305 
1306 	kfree(ibdev->eq_table);
1307 }
1308 
1309 static void *mlx4_ib_add(struct mlx4_dev *dev)
1310 {
1311 	struct mlx4_ib_dev *ibdev;
1312 	int num_ports = 0;
1313 	int i, j;
1314 	int err;
1315 	struct mlx4_ib_iboe *iboe;
1316 
1317 	pr_info_once("%s", mlx4_ib_version);
1318 
1319 	mlx4_foreach_non_ib_transport_port(i, dev)
1320 		num_ports++;
1321 
1322 	if (mlx4_is_mfunc(dev) && num_ports) {
1323 		dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
1324 		return NULL;
1325 	}
1326 
1327 	num_ports = 0;
1328 	mlx4_foreach_ib_transport_port(i, dev)
1329 		num_ports++;
1330 
1331 	/* No point in registering a device with no ports... */
1332 	if (num_ports == 0)
1333 		return NULL;
1334 
1335 	ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
1336 	if (!ibdev) {
1337 		dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
1338 		return NULL;
1339 	}
1340 
1341 	iboe = &ibdev->iboe;
1342 
1343 	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
1344 		goto err_dealloc;
1345 
1346 	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
1347 		goto err_pd;
1348 
1349 	ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
1350 				 PAGE_SIZE);
1351 	if (!ibdev->uar_map)
1352 		goto err_uar;
1353 	MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
1354 
1355 	ibdev->dev = dev;
1356 
1357 	strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
1358 	ibdev->ib_dev.owner		= THIS_MODULE;
1359 	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
1360 	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
1361 	ibdev->num_ports		= num_ports;
1362 	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
1363 	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
1364 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
1365 
1366 	if (dev->caps.userspace_caps)
1367 		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
1368 	else
1369 		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
1370 
1371 	ibdev->ib_dev.uverbs_cmd_mask	=
1372 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
1373 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
1374 		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
1375 		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
1376 		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
1377 		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
1378 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
1379 		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
1380 		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
1381 		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
1382 		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
1383 		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
1384 		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
1385 		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
1386 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
1387 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
1388 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
1389 		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
1390 		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
1391 		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
1392 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
1393 		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
1394 		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
1395 
1396 	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
1397 	ibdev->ib_dev.query_port	= mlx4_ib_query_port;
1398 	ibdev->ib_dev.get_link_layer	= mlx4_ib_port_link_layer;
1399 	ibdev->ib_dev.query_gid		= mlx4_ib_query_gid;
1400 	ibdev->ib_dev.query_pkey	= mlx4_ib_query_pkey;
1401 	ibdev->ib_dev.modify_device	= mlx4_ib_modify_device;
1402 	ibdev->ib_dev.modify_port	= mlx4_ib_modify_port;
1403 	ibdev->ib_dev.alloc_ucontext	= mlx4_ib_alloc_ucontext;
1404 	ibdev->ib_dev.dealloc_ucontext	= mlx4_ib_dealloc_ucontext;
1405 	ibdev->ib_dev.mmap		= mlx4_ib_mmap;
1406 	ibdev->ib_dev.alloc_pd		= mlx4_ib_alloc_pd;
1407 	ibdev->ib_dev.dealloc_pd	= mlx4_ib_dealloc_pd;
1408 	ibdev->ib_dev.create_ah		= mlx4_ib_create_ah;
1409 	ibdev->ib_dev.query_ah		= mlx4_ib_query_ah;
1410 	ibdev->ib_dev.destroy_ah	= mlx4_ib_destroy_ah;
1411 	ibdev->ib_dev.create_srq	= mlx4_ib_create_srq;
1412 	ibdev->ib_dev.modify_srq	= mlx4_ib_modify_srq;
1413 	ibdev->ib_dev.query_srq		= mlx4_ib_query_srq;
1414 	ibdev->ib_dev.destroy_srq	= mlx4_ib_destroy_srq;
1415 	ibdev->ib_dev.post_srq_recv	= mlx4_ib_post_srq_recv;
1416 	ibdev->ib_dev.create_qp		= mlx4_ib_create_qp;
1417 	ibdev->ib_dev.modify_qp		= mlx4_ib_modify_qp;
1418 	ibdev->ib_dev.query_qp		= mlx4_ib_query_qp;
1419 	ibdev->ib_dev.destroy_qp	= mlx4_ib_destroy_qp;
1420 	ibdev->ib_dev.post_send		= mlx4_ib_post_send;
1421 	ibdev->ib_dev.post_recv		= mlx4_ib_post_recv;
1422 	ibdev->ib_dev.create_cq		= mlx4_ib_create_cq;
1423 	ibdev->ib_dev.modify_cq		= mlx4_ib_modify_cq;
1424 	ibdev->ib_dev.resize_cq		= mlx4_ib_resize_cq;
1425 	ibdev->ib_dev.destroy_cq	= mlx4_ib_destroy_cq;
1426 	ibdev->ib_dev.poll_cq		= mlx4_ib_poll_cq;
1427 	ibdev->ib_dev.req_notify_cq	= mlx4_ib_arm_cq;
1428 	ibdev->ib_dev.get_dma_mr	= mlx4_ib_get_dma_mr;
1429 	ibdev->ib_dev.reg_user_mr	= mlx4_ib_reg_user_mr;
1430 	ibdev->ib_dev.dereg_mr		= mlx4_ib_dereg_mr;
1431 	ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
1432 	ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
1433 	ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
1434 	ibdev->ib_dev.attach_mcast	= mlx4_ib_mcg_attach;
1435 	ibdev->ib_dev.detach_mcast	= mlx4_ib_mcg_detach;
1436 	ibdev->ib_dev.process_mad	= mlx4_ib_process_mad;
1437 
1438 	if (!mlx4_is_slave(ibdev->dev)) {
1439 		ibdev->ib_dev.alloc_fmr		= mlx4_ib_fmr_alloc;
1440 		ibdev->ib_dev.map_phys_fmr	= mlx4_ib_map_phys_fmr;
1441 		ibdev->ib_dev.unmap_fmr		= mlx4_ib_unmap_fmr;
1442 		ibdev->ib_dev.dealloc_fmr	= mlx4_ib_fmr_dealloc;
1443 	}
1444 
1445 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
1446 	    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
1447 		ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
1448 		ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
1449 		ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
1450 
1451 		ibdev->ib_dev.uverbs_cmd_mask |=
1452 			(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
1453 			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
1454 	}
1455 
1456 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
1457 		ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
1458 		ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
1459 		ibdev->ib_dev.uverbs_cmd_mask |=
1460 			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1461 			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1462 	}
1463 
1464 	mlx4_ib_alloc_eqs(dev, ibdev);
1465 
1466 	spin_lock_init(&iboe->lock);
1467 
1468 	if (init_node_data(ibdev))
1469 		goto err_map;
1470 
1471 	for (i = 0; i < ibdev->num_ports; ++i) {
1472 		if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
1473 						IB_LINK_LAYER_ETHERNET) {
1474 			err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
1475 			if (err)
1476 				ibdev->counters[i] = -1;
1477 		} else
1478 				ibdev->counters[i] = -1;
1479 	}
1480 
1481 	spin_lock_init(&ibdev->sm_lock);
1482 	mutex_init(&ibdev->cap_mask_mutex);
1483 
1484 	if (ib_register_device(&ibdev->ib_dev, NULL))
1485 		goto err_counter;
1486 
1487 	if (mlx4_ib_mad_init(ibdev))
1488 		goto err_reg;
1489 
1490 	if (mlx4_ib_init_sriov(ibdev))
1491 		goto err_mad;
1492 
1493 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1494 		iboe->nb.notifier_call = mlx4_ib_netdev_event;
1495 		err = register_netdevice_notifier(&iboe->nb);
1496 		if (err)
1497 			goto err_sriov;
1498 	}
1499 
1500 	for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
1501 		if (device_create_file(&ibdev->ib_dev.dev,
1502 				       mlx4_class_attributes[j]))
1503 			goto err_notif;
1504 	}
1505 
1506 	ibdev->ib_active = true;
1507 
1508 	if (mlx4_is_mfunc(ibdev->dev))
1509 		init_pkeys(ibdev);
1510 
1511 	/* create paravirt contexts for any VFs which are active */
1512 	if (mlx4_is_master(ibdev->dev)) {
1513 		for (j = 0; j < MLX4_MFUNC_MAX; j++) {
1514 			if (j == mlx4_master_func_num(ibdev->dev))
1515 				continue;
1516 			if (mlx4_is_slave_active(ibdev->dev, j))
1517 				do_slave_init(ibdev, j, 1);
1518 		}
1519 	}
1520 	return ibdev;
1521 
1522 err_notif:
1523 	if (unregister_netdevice_notifier(&ibdev->iboe.nb))
1524 		pr_warn("failure unregistering notifier\n");
1525 	flush_workqueue(wq);
1526 
1527 err_sriov:
1528 	mlx4_ib_close_sriov(ibdev);
1529 
1530 err_mad:
1531 	mlx4_ib_mad_cleanup(ibdev);
1532 
1533 err_reg:
1534 	ib_unregister_device(&ibdev->ib_dev);
1535 
1536 err_counter:
1537 	for (; i; --i)
1538 		if (ibdev->counters[i - 1] != -1)
1539 			mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]);
1540 
1541 err_map:
1542 	iounmap(ibdev->uar_map);
1543 
1544 err_uar:
1545 	mlx4_uar_free(dev, &ibdev->priv_uar);
1546 
1547 err_pd:
1548 	mlx4_pd_free(dev, ibdev->priv_pdn);
1549 
1550 err_dealloc:
1551 	ib_dealloc_device(&ibdev->ib_dev);
1552 
1553 	return NULL;
1554 }
1555 
1556 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1557 {
1558 	struct mlx4_ib_dev *ibdev = ibdev_ptr;
1559 	int p;
1560 
1561 	mlx4_ib_close_sriov(ibdev);
1562 	mlx4_ib_mad_cleanup(ibdev);
1563 	ib_unregister_device(&ibdev->ib_dev);
1564 	if (ibdev->iboe.nb.notifier_call) {
1565 		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
1566 			pr_warn("failure unregistering notifier\n");
1567 		ibdev->iboe.nb.notifier_call = NULL;
1568 	}
1569 	iounmap(ibdev->uar_map);
1570 	for (p = 0; p < ibdev->num_ports; ++p)
1571 		if (ibdev->counters[p] != -1)
1572 			mlx4_counter_free(ibdev->dev, ibdev->counters[p]);
1573 	mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
1574 		mlx4_CLOSE_PORT(dev, p);
1575 
1576 	mlx4_ib_free_eqs(dev, ibdev);
1577 
1578 	mlx4_uar_free(dev, &ibdev->priv_uar);
1579 	mlx4_pd_free(dev, ibdev->priv_pdn);
1580 	ib_dealloc_device(&ibdev->ib_dev);
1581 }
1582 
1583 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
1584 {
1585 	struct mlx4_ib_demux_work **dm = NULL;
1586 	struct mlx4_dev *dev = ibdev->dev;
1587 	int i;
1588 	unsigned long flags;
1589 
1590 	if (!mlx4_is_master(dev))
1591 		return;
1592 
1593 	dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
1594 	if (!dm) {
1595 		pr_err("failed to allocate memory for tunneling qp update\n");
1596 		goto out;
1597 	}
1598 
1599 	for (i = 0; i < dev->caps.num_ports; i++) {
1600 		dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
1601 		if (!dm[i]) {
1602 			pr_err("failed to allocate memory for tunneling qp update work struct\n");
1603 			for (i = 0; i < dev->caps.num_ports; i++) {
1604 				if (dm[i])
1605 					kfree(dm[i]);
1606 			}
1607 			goto out;
1608 		}
1609 	}
1610 	/* initialize or tear down tunnel QPs for the slave */
1611 	for (i = 0; i < dev->caps.num_ports; i++) {
1612 		INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
1613 		dm[i]->port = i + 1;
1614 		dm[i]->slave = slave;
1615 		dm[i]->do_init = do_init;
1616 		dm[i]->dev = ibdev;
1617 		spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
1618 		if (!ibdev->sriov.is_going_down)
1619 			queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
1620 		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
1621 	}
1622 out:
1623 	kfree(dm);
1624 	return;
1625 }
1626 
1627 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1628 			  enum mlx4_dev_event event, unsigned long param)
1629 {
1630 	struct ib_event ibev;
1631 	struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
1632 	struct mlx4_eqe *eqe = NULL;
1633 	struct ib_event_work *ew;
1634 	int p = 0;
1635 
1636 	if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
1637 		eqe = (struct mlx4_eqe *)param;
1638 	else
1639 		p = (int) param;
1640 
1641 	switch (event) {
1642 	case MLX4_DEV_EVENT_PORT_UP:
1643 		if (p > ibdev->num_ports)
1644 			return;
1645 		if (mlx4_is_master(dev) &&
1646 		    rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
1647 			IB_LINK_LAYER_INFINIBAND) {
1648 			mlx4_ib_invalidate_all_guid_record(ibdev, p);
1649 		}
1650 		ibev.event = IB_EVENT_PORT_ACTIVE;
1651 		break;
1652 
1653 	case MLX4_DEV_EVENT_PORT_DOWN:
1654 		if (p > ibdev->num_ports)
1655 			return;
1656 		ibev.event = IB_EVENT_PORT_ERR;
1657 		break;
1658 
1659 	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
1660 		ibdev->ib_active = false;
1661 		ibev.event = IB_EVENT_DEVICE_FATAL;
1662 		break;
1663 
1664 	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
1665 		ew = kmalloc(sizeof *ew, GFP_ATOMIC);
1666 		if (!ew) {
1667 			pr_err("failed to allocate memory for events work\n");
1668 			break;
1669 		}
1670 
1671 		INIT_WORK(&ew->work, handle_port_mgmt_change_event);
1672 		memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
1673 		ew->ib_dev = ibdev;
1674 		/* need to queue only for port owner, which uses GEN_EQE */
1675 		if (mlx4_is_master(dev))
1676 			queue_work(wq, &ew->work);
1677 		else
1678 			handle_port_mgmt_change_event(&ew->work);
1679 		return;
1680 
1681 	case MLX4_DEV_EVENT_SLAVE_INIT:
1682 		/* here, p is the slave id */
1683 		do_slave_init(ibdev, p, 1);
1684 		return;
1685 
1686 	case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
1687 		/* here, p is the slave id */
1688 		do_slave_init(ibdev, p, 0);
1689 		return;
1690 
1691 	default:
1692 		return;
1693 	}
1694 
1695 	ibev.device	      = ibdev_ptr;
1696 	ibev.element.port_num = (u8) p;
1697 
1698 	ib_dispatch_event(&ibev);
1699 }
1700 
1701 static struct mlx4_interface mlx4_ib_interface = {
1702 	.add		= mlx4_ib_add,
1703 	.remove		= mlx4_ib_remove,
1704 	.event		= mlx4_ib_event,
1705 	.protocol	= MLX4_PROT_IB_IPV6
1706 };
1707 
1708 static int __init mlx4_ib_init(void)
1709 {
1710 	int err;
1711 
1712 	wq = create_singlethread_workqueue("mlx4_ib");
1713 	if (!wq)
1714 		return -ENOMEM;
1715 
1716 	err = mlx4_ib_mcg_init();
1717 	if (err)
1718 		goto clean_wq;
1719 
1720 	err = mlx4_register_interface(&mlx4_ib_interface);
1721 	if (err)
1722 		goto clean_mcg;
1723 
1724 	return 0;
1725 
1726 clean_mcg:
1727 	mlx4_ib_mcg_destroy();
1728 
1729 clean_wq:
1730 	destroy_workqueue(wq);
1731 	return err;
1732 }
1733 
1734 static void __exit mlx4_ib_cleanup(void)
1735 {
1736 	mlx4_unregister_interface(&mlx4_ib_interface);
1737 	mlx4_ib_mcg_destroy();
1738 	destroy_workqueue(wq);
1739 }
1740 
1741 module_init(mlx4_ib_init);
1742 module_exit(mlx4_ib_cleanup);
1743