xref: /openbmc/linux/drivers/infiniband/core/cm.c (revision dc6a81c3)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
4  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
5  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
6  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
7  * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
8  */
9 
10 #include <linux/completion.h>
11 #include <linux/dma-mapping.h>
12 #include <linux/device.h>
13 #include <linux/module.h>
14 #include <linux/err.h>
15 #include <linux/idr.h>
16 #include <linux/interrupt.h>
17 #include <linux/random.h>
18 #include <linux/rbtree.h>
19 #include <linux/spinlock.h>
20 #include <linux/slab.h>
21 #include <linux/sysfs.h>
22 #include <linux/workqueue.h>
23 #include <linux/kdev_t.h>
24 #include <linux/etherdevice.h>
25 
26 #include <rdma/ib_cache.h>
27 #include <rdma/ib_cm.h>
28 #include "cm_msgs.h"
29 #include "core_priv.h"
30 
31 MODULE_AUTHOR("Sean Hefty");
32 MODULE_DESCRIPTION("InfiniBand CM");
33 MODULE_LICENSE("Dual BSD/GPL");
34 
35 static const char * const ibcm_rej_reason_strs[] = {
36 	[IB_CM_REJ_NO_QP]			= "no QP",
37 	[IB_CM_REJ_NO_EEC]			= "no EEC",
38 	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
39 	[IB_CM_REJ_TIMEOUT]			= "timeout",
40 	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
41 	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm ID",
42 	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
43 	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service ID",
44 	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
45 	[IB_CM_REJ_STALE_CONN]			= "stale conn",
46 	[IB_CM_REJ_RDC_NOT_EXIST]		= "RDC not exist",
47 	[IB_CM_REJ_INVALID_GID]			= "invalid GID",
48 	[IB_CM_REJ_INVALID_LID]			= "invalid LID",
49 	[IB_CM_REJ_INVALID_SL]			= "invalid SL",
50 	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
51 	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
52 	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet rate",
53 	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt GID",
54 	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt LID",
55 	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt SL",
56 	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic class",
57 	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
58 	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
59 	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port CM redirect",
60 	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
61 	[IB_CM_REJ_INVALID_MTU]			= "invalid MTU",
62 	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp resources",
63 	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
64 	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid RNR retry",
65 	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm ID",
66 	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
67 	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
68 	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
69 };
70 
71 const char *__attribute_const__ ibcm_reject_msg(int reason)
72 {
73 	size_t index = reason;
74 
75 	if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
76 	    ibcm_rej_reason_strs[index])
77 		return ibcm_rej_reason_strs[index];
78 	else
79 		return "unrecognized reason";
80 }
81 EXPORT_SYMBOL(ibcm_reject_msg);
82 
83 static void cm_add_one(struct ib_device *device);
84 static void cm_remove_one(struct ib_device *device, void *client_data);
85 
86 static struct ib_client cm_client = {
87 	.name   = "cm",
88 	.add    = cm_add_one,
89 	.remove = cm_remove_one
90 };
91 
92 static struct ib_cm {
93 	spinlock_t lock;
94 	struct list_head device_list;
95 	rwlock_t device_lock;
96 	struct rb_root listen_service_table;
97 	u64 listen_service_id;
98 	/* struct rb_root peer_service_table; todo: fix peer to peer */
99 	struct rb_root remote_qp_table;
100 	struct rb_root remote_id_table;
101 	struct rb_root remote_sidr_table;
102 	struct xarray local_id_table;
103 	u32 local_id_next;
104 	__be32 random_id_operand;
105 	struct list_head timewait_list;
106 	struct workqueue_struct *wq;
107 	/* Sync on cm change port state */
108 	spinlock_t state_lock;
109 } cm;
110 
111 /* Counter indexes ordered by attribute ID */
112 enum {
113 	CM_REQ_COUNTER,
114 	CM_MRA_COUNTER,
115 	CM_REJ_COUNTER,
116 	CM_REP_COUNTER,
117 	CM_RTU_COUNTER,
118 	CM_DREQ_COUNTER,
119 	CM_DREP_COUNTER,
120 	CM_SIDR_REQ_COUNTER,
121 	CM_SIDR_REP_COUNTER,
122 	CM_LAP_COUNTER,
123 	CM_APR_COUNTER,
124 	CM_ATTR_COUNT,
125 	CM_ATTR_ID_OFFSET = 0x0010,
126 };
127 
128 enum {
129 	CM_XMIT,
130 	CM_XMIT_RETRIES,
131 	CM_RECV,
132 	CM_RECV_DUPLICATES,
133 	CM_COUNTER_GROUPS
134 };
135 
136 static char const counter_group_names[CM_COUNTER_GROUPS]
137 				     [sizeof("cm_rx_duplicates")] = {
138 	"cm_tx_msgs", "cm_tx_retries",
139 	"cm_rx_msgs", "cm_rx_duplicates"
140 };
141 
142 struct cm_counter_group {
143 	struct kobject obj;
144 	atomic_long_t counter[CM_ATTR_COUNT];
145 };
146 
147 struct cm_counter_attribute {
148 	struct attribute attr;
149 	int index;
150 };
151 
152 #define CM_COUNTER_ATTR(_name, _index) \
153 struct cm_counter_attribute cm_##_name##_counter_attr = { \
154 	.attr = { .name = __stringify(_name), .mode = 0444 }, \
155 	.index = _index \
156 }
157 
158 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
159 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
160 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
161 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
162 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
163 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
164 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
165 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
166 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
167 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
168 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
169 
170 static struct attribute *cm_counter_default_attrs[] = {
171 	&cm_req_counter_attr.attr,
172 	&cm_mra_counter_attr.attr,
173 	&cm_rej_counter_attr.attr,
174 	&cm_rep_counter_attr.attr,
175 	&cm_rtu_counter_attr.attr,
176 	&cm_dreq_counter_attr.attr,
177 	&cm_drep_counter_attr.attr,
178 	&cm_sidr_req_counter_attr.attr,
179 	&cm_sidr_rep_counter_attr.attr,
180 	&cm_lap_counter_attr.attr,
181 	&cm_apr_counter_attr.attr,
182 	NULL
183 };
184 
185 struct cm_port {
186 	struct cm_device *cm_dev;
187 	struct ib_mad_agent *mad_agent;
188 	struct kobject port_obj;
189 	u8 port_num;
190 	struct list_head cm_priv_prim_list;
191 	struct list_head cm_priv_altr_list;
192 	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
193 };
194 
195 struct cm_device {
196 	struct list_head list;
197 	struct ib_device *ib_device;
198 	u8 ack_delay;
199 	int going_down;
200 	struct cm_port *port[0];
201 };
202 
203 struct cm_av {
204 	struct cm_port *port;
205 	union ib_gid dgid;
206 	struct rdma_ah_attr ah_attr;
207 	u16 pkey_index;
208 	u8 timeout;
209 };
210 
211 struct cm_work {
212 	struct delayed_work work;
213 	struct list_head list;
214 	struct cm_port *port;
215 	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
216 	__be32 local_id;			/* Established / timewait */
217 	__be32 remote_id;
218 	struct ib_cm_event cm_event;
219 	struct sa_path_rec path[0];
220 };
221 
222 struct cm_timewait_info {
223 	struct cm_work work;
224 	struct list_head list;
225 	struct rb_node remote_qp_node;
226 	struct rb_node remote_id_node;
227 	__be64 remote_ca_guid;
228 	__be32 remote_qpn;
229 	u8 inserted_remote_qp;
230 	u8 inserted_remote_id;
231 };
232 
233 struct cm_id_private {
234 	struct ib_cm_id	id;
235 
236 	struct rb_node service_node;
237 	struct rb_node sidr_id_node;
238 	spinlock_t lock;	/* Do not acquire inside cm.lock */
239 	struct completion comp;
240 	refcount_t refcount;
241 	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
242 	 * Protected by the cm.lock spinlock. */
243 	int listen_sharecount;
244 	struct rcu_head rcu;
245 
246 	struct ib_mad_send_buf *msg;
247 	struct cm_timewait_info *timewait_info;
248 	/* todo: use alternate port on send failure */
249 	struct cm_av av;
250 	struct cm_av alt_av;
251 
252 	void *private_data;
253 	__be64 tid;
254 	__be32 local_qpn;
255 	__be32 remote_qpn;
256 	enum ib_qp_type qp_type;
257 	__be32 sq_psn;
258 	__be32 rq_psn;
259 	int timeout_ms;
260 	enum ib_mtu path_mtu;
261 	__be16 pkey;
262 	u8 private_data_len;
263 	u8 max_cm_retries;
264 	u8 peer_to_peer;
265 	u8 responder_resources;
266 	u8 initiator_depth;
267 	u8 retry_count;
268 	u8 rnr_retry_count;
269 	u8 service_timeout;
270 	u8 target_ack_delay;
271 
272 	struct list_head prim_list;
273 	struct list_head altr_list;
274 	/* Indicates that the send port mad is registered and av is set */
275 	int prim_send_port_not_ready;
276 	int altr_send_port_not_ready;
277 
278 	struct list_head work_list;
279 	atomic_t work_count;
280 };
281 
282 static void cm_work_handler(struct work_struct *work);
283 
284 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
285 {
286 	if (refcount_dec_and_test(&cm_id_priv->refcount))
287 		complete(&cm_id_priv->comp);
288 }
289 
290 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
291 			struct ib_mad_send_buf **msg)
292 {
293 	struct ib_mad_agent *mad_agent;
294 	struct ib_mad_send_buf *m;
295 	struct ib_ah *ah;
296 	struct cm_av *av;
297 	unsigned long flags, flags2;
298 	int ret = 0;
299 
300 	/* don't let the port to be released till the agent is down */
301 	spin_lock_irqsave(&cm.state_lock, flags2);
302 	spin_lock_irqsave(&cm.lock, flags);
303 	if (!cm_id_priv->prim_send_port_not_ready)
304 		av = &cm_id_priv->av;
305 	else if (!cm_id_priv->altr_send_port_not_ready &&
306 		 (cm_id_priv->alt_av.port))
307 		av = &cm_id_priv->alt_av;
308 	else {
309 		pr_info("%s: not valid CM id\n", __func__);
310 		ret = -ENODEV;
311 		spin_unlock_irqrestore(&cm.lock, flags);
312 		goto out;
313 	}
314 	spin_unlock_irqrestore(&cm.lock, flags);
315 	/* Make sure the port haven't released the mad yet */
316 	mad_agent = cm_id_priv->av.port->mad_agent;
317 	if (!mad_agent) {
318 		pr_info("%s: not a valid MAD agent\n", __func__);
319 		ret = -ENODEV;
320 		goto out;
321 	}
322 	ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
323 	if (IS_ERR(ah)) {
324 		ret = PTR_ERR(ah);
325 		goto out;
326 	}
327 
328 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
329 			       av->pkey_index,
330 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
331 			       GFP_ATOMIC,
332 			       IB_MGMT_BASE_VERSION);
333 	if (IS_ERR(m)) {
334 		rdma_destroy_ah(ah, 0);
335 		ret = PTR_ERR(m);
336 		goto out;
337 	}
338 
339 	/* Timeout set by caller if response is expected. */
340 	m->ah = ah;
341 	m->retries = cm_id_priv->max_cm_retries;
342 
343 	refcount_inc(&cm_id_priv->refcount);
344 	m->context[0] = cm_id_priv;
345 	*msg = m;
346 
347 out:
348 	spin_unlock_irqrestore(&cm.state_lock, flags2);
349 	return ret;
350 }
351 
352 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
353 							   struct ib_mad_recv_wc *mad_recv_wc)
354 {
355 	return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
356 				  0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
357 				  GFP_ATOMIC,
358 				  IB_MGMT_BASE_VERSION);
359 }
360 
361 static int cm_create_response_msg_ah(struct cm_port *port,
362 				     struct ib_mad_recv_wc *mad_recv_wc,
363 				     struct ib_mad_send_buf *msg)
364 {
365 	struct ib_ah *ah;
366 
367 	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
368 				  mad_recv_wc->recv_buf.grh, port->port_num);
369 	if (IS_ERR(ah))
370 		return PTR_ERR(ah);
371 
372 	msg->ah = ah;
373 	return 0;
374 }
375 
376 static void cm_free_msg(struct ib_mad_send_buf *msg)
377 {
378 	if (msg->ah)
379 		rdma_destroy_ah(msg->ah, 0);
380 	if (msg->context[0])
381 		cm_deref_id(msg->context[0]);
382 	ib_free_send_mad(msg);
383 }
384 
385 static int cm_alloc_response_msg(struct cm_port *port,
386 				 struct ib_mad_recv_wc *mad_recv_wc,
387 				 struct ib_mad_send_buf **msg)
388 {
389 	struct ib_mad_send_buf *m;
390 	int ret;
391 
392 	m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
393 	if (IS_ERR(m))
394 		return PTR_ERR(m);
395 
396 	ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
397 	if (ret) {
398 		cm_free_msg(m);
399 		return ret;
400 	}
401 
402 	*msg = m;
403 	return 0;
404 }
405 
406 static void * cm_copy_private_data(const void *private_data,
407 				   u8 private_data_len)
408 {
409 	void *data;
410 
411 	if (!private_data || !private_data_len)
412 		return NULL;
413 
414 	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
415 	if (!data)
416 		return ERR_PTR(-ENOMEM);
417 
418 	return data;
419 }
420 
421 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
422 				 void *private_data, u8 private_data_len)
423 {
424 	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
425 		kfree(cm_id_priv->private_data);
426 
427 	cm_id_priv->private_data = private_data;
428 	cm_id_priv->private_data_len = private_data_len;
429 }
430 
431 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
432 			      struct ib_grh *grh, struct cm_av *av)
433 {
434 	struct rdma_ah_attr new_ah_attr;
435 	int ret;
436 
437 	av->port = port;
438 	av->pkey_index = wc->pkey_index;
439 
440 	/*
441 	 * av->ah_attr might be initialized based on past wc during incoming
442 	 * connect request or while sending out connect request. So initialize
443 	 * a new ah_attr on stack. If initialization fails, old ah_attr is
444 	 * used for sending any responses. If initialization is successful,
445 	 * than new ah_attr is used by overwriting old one.
446 	 */
447 	ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
448 				      port->port_num, wc,
449 				      grh, &new_ah_attr);
450 	if (ret)
451 		return ret;
452 
453 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
454 	return 0;
455 }
456 
457 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
458 				   struct ib_grh *grh, struct cm_av *av)
459 {
460 	av->port = port;
461 	av->pkey_index = wc->pkey_index;
462 	return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
463 				       port->port_num, wc,
464 				       grh, &av->ah_attr);
465 }
466 
467 static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
468 				  struct cm_av *av,
469 				  struct cm_port *port)
470 {
471 	unsigned long flags;
472 	int ret = 0;
473 
474 	spin_lock_irqsave(&cm.lock, flags);
475 
476 	if (&cm_id_priv->av == av)
477 		list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
478 	else if (&cm_id_priv->alt_av == av)
479 		list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
480 	else
481 		ret = -EINVAL;
482 
483 	spin_unlock_irqrestore(&cm.lock, flags);
484 	return ret;
485 }
486 
487 static struct cm_port *
488 get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
489 {
490 	struct cm_device *cm_dev;
491 	struct cm_port *port = NULL;
492 	unsigned long flags;
493 
494 	if (attr) {
495 		read_lock_irqsave(&cm.device_lock, flags);
496 		list_for_each_entry(cm_dev, &cm.device_list, list) {
497 			if (cm_dev->ib_device == attr->device) {
498 				port = cm_dev->port[attr->port_num - 1];
499 				break;
500 			}
501 		}
502 		read_unlock_irqrestore(&cm.device_lock, flags);
503 	} else {
504 		/* SGID attribute can be NULL in following
505 		 * conditions.
506 		 * (a) Alternative path
507 		 * (b) IB link layer without GRH
508 		 * (c) LAP send messages
509 		 */
510 		read_lock_irqsave(&cm.device_lock, flags);
511 		list_for_each_entry(cm_dev, &cm.device_list, list) {
512 			attr = rdma_find_gid(cm_dev->ib_device,
513 					     &path->sgid,
514 					     sa_conv_pathrec_to_gid_type(path),
515 					     NULL);
516 			if (!IS_ERR(attr)) {
517 				port = cm_dev->port[attr->port_num - 1];
518 				break;
519 			}
520 		}
521 		read_unlock_irqrestore(&cm.device_lock, flags);
522 		if (port)
523 			rdma_put_gid_attr(attr);
524 	}
525 	return port;
526 }
527 
528 static int cm_init_av_by_path(struct sa_path_rec *path,
529 			      const struct ib_gid_attr *sgid_attr,
530 			      struct cm_av *av,
531 			      struct cm_id_private *cm_id_priv)
532 {
533 	struct rdma_ah_attr new_ah_attr;
534 	struct cm_device *cm_dev;
535 	struct cm_port *port;
536 	int ret;
537 
538 	port = get_cm_port_from_path(path, sgid_attr);
539 	if (!port)
540 		return -EINVAL;
541 	cm_dev = port->cm_dev;
542 
543 	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
544 				  be16_to_cpu(path->pkey), &av->pkey_index);
545 	if (ret)
546 		return ret;
547 
548 	av->port = port;
549 
550 	/*
551 	 * av->ah_attr might be initialized based on wc or during
552 	 * request processing time which might have reference to sgid_attr.
553 	 * So initialize a new ah_attr on stack.
554 	 * If initialization fails, old ah_attr is used for sending any
555 	 * responses. If initialization is successful, than new ah_attr
556 	 * is used by overwriting the old one. So that right ah_attr
557 	 * can be used to return an error response.
558 	 */
559 	ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
560 					&new_ah_attr, sgid_attr);
561 	if (ret)
562 		return ret;
563 
564 	av->timeout = path->packet_life_time + 1;
565 
566 	ret = add_cm_id_to_port_list(cm_id_priv, av, port);
567 	if (ret) {
568 		rdma_destroy_ah_attr(&new_ah_attr);
569 		return ret;
570 	}
571 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
572 	return 0;
573 }
574 
575 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
576 {
577 	int err;
578 	u32 id;
579 
580 	err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
581 			xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
582 
583 	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
584 	return err;
585 }
586 
587 static u32 cm_local_id(__be32 local_id)
588 {
589 	return (__force u32) (local_id ^ cm.random_id_operand);
590 }
591 
592 static void cm_free_id(__be32 local_id)
593 {
594 	xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
595 }
596 
597 static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
598 {
599 	struct cm_id_private *cm_id_priv;
600 
601 	rcu_read_lock();
602 	cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
603 	if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
604 	    !refcount_inc_not_zero(&cm_id_priv->refcount))
605 		cm_id_priv = NULL;
606 	rcu_read_unlock();
607 
608 	return cm_id_priv;
609 }
610 
611 /*
612  * Trivial helpers to strip endian annotation and compare; the
613  * endianness doesn't actually matter since we just need a stable
614  * order for the RB tree.
615  */
616 static int be32_lt(__be32 a, __be32 b)
617 {
618 	return (__force u32) a < (__force u32) b;
619 }
620 
621 static int be32_gt(__be32 a, __be32 b)
622 {
623 	return (__force u32) a > (__force u32) b;
624 }
625 
626 static int be64_lt(__be64 a, __be64 b)
627 {
628 	return (__force u64) a < (__force u64) b;
629 }
630 
631 static int be64_gt(__be64 a, __be64 b)
632 {
633 	return (__force u64) a > (__force u64) b;
634 }
635 
636 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
637 {
638 	struct rb_node **link = &cm.listen_service_table.rb_node;
639 	struct rb_node *parent = NULL;
640 	struct cm_id_private *cur_cm_id_priv;
641 	__be64 service_id = cm_id_priv->id.service_id;
642 	__be64 service_mask = cm_id_priv->id.service_mask;
643 
644 	while (*link) {
645 		parent = *link;
646 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
647 					  service_node);
648 		if ((cur_cm_id_priv->id.service_mask & service_id) ==
649 		    (service_mask & cur_cm_id_priv->id.service_id) &&
650 		    (cm_id_priv->id.device == cur_cm_id_priv->id.device))
651 			return cur_cm_id_priv;
652 
653 		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
654 			link = &(*link)->rb_left;
655 		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
656 			link = &(*link)->rb_right;
657 		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
658 			link = &(*link)->rb_left;
659 		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
660 			link = &(*link)->rb_right;
661 		else
662 			link = &(*link)->rb_right;
663 	}
664 	rb_link_node(&cm_id_priv->service_node, parent, link);
665 	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
666 	return NULL;
667 }
668 
669 static struct cm_id_private * cm_find_listen(struct ib_device *device,
670 					     __be64 service_id)
671 {
672 	struct rb_node *node = cm.listen_service_table.rb_node;
673 	struct cm_id_private *cm_id_priv;
674 
675 	while (node) {
676 		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
677 		if ((cm_id_priv->id.service_mask & service_id) ==
678 		     cm_id_priv->id.service_id &&
679 		    (cm_id_priv->id.device == device))
680 			return cm_id_priv;
681 
682 		if (device < cm_id_priv->id.device)
683 			node = node->rb_left;
684 		else if (device > cm_id_priv->id.device)
685 			node = node->rb_right;
686 		else if (be64_lt(service_id, cm_id_priv->id.service_id))
687 			node = node->rb_left;
688 		else if (be64_gt(service_id, cm_id_priv->id.service_id))
689 			node = node->rb_right;
690 		else
691 			node = node->rb_right;
692 	}
693 	return NULL;
694 }
695 
696 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
697 						     *timewait_info)
698 {
699 	struct rb_node **link = &cm.remote_id_table.rb_node;
700 	struct rb_node *parent = NULL;
701 	struct cm_timewait_info *cur_timewait_info;
702 	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
703 	__be32 remote_id = timewait_info->work.remote_id;
704 
705 	while (*link) {
706 		parent = *link;
707 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
708 					     remote_id_node);
709 		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
710 			link = &(*link)->rb_left;
711 		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
712 			link = &(*link)->rb_right;
713 		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
714 			link = &(*link)->rb_left;
715 		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
716 			link = &(*link)->rb_right;
717 		else
718 			return cur_timewait_info;
719 	}
720 	timewait_info->inserted_remote_id = 1;
721 	rb_link_node(&timewait_info->remote_id_node, parent, link);
722 	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
723 	return NULL;
724 }
725 
726 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
727 						   __be32 remote_id)
728 {
729 	struct rb_node *node = cm.remote_id_table.rb_node;
730 	struct cm_timewait_info *timewait_info;
731 
732 	while (node) {
733 		timewait_info = rb_entry(node, struct cm_timewait_info,
734 					 remote_id_node);
735 		if (be32_lt(remote_id, timewait_info->work.remote_id))
736 			node = node->rb_left;
737 		else if (be32_gt(remote_id, timewait_info->work.remote_id))
738 			node = node->rb_right;
739 		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
740 			node = node->rb_left;
741 		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
742 			node = node->rb_right;
743 		else
744 			return timewait_info;
745 	}
746 	return NULL;
747 }
748 
749 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
750 						      *timewait_info)
751 {
752 	struct rb_node **link = &cm.remote_qp_table.rb_node;
753 	struct rb_node *parent = NULL;
754 	struct cm_timewait_info *cur_timewait_info;
755 	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
756 	__be32 remote_qpn = timewait_info->remote_qpn;
757 
758 	while (*link) {
759 		parent = *link;
760 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
761 					     remote_qp_node);
762 		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
763 			link = &(*link)->rb_left;
764 		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
765 			link = &(*link)->rb_right;
766 		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
767 			link = &(*link)->rb_left;
768 		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
769 			link = &(*link)->rb_right;
770 		else
771 			return cur_timewait_info;
772 	}
773 	timewait_info->inserted_remote_qp = 1;
774 	rb_link_node(&timewait_info->remote_qp_node, parent, link);
775 	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
776 	return NULL;
777 }
778 
779 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
780 						    *cm_id_priv)
781 {
782 	struct rb_node **link = &cm.remote_sidr_table.rb_node;
783 	struct rb_node *parent = NULL;
784 	struct cm_id_private *cur_cm_id_priv;
785 	union ib_gid *port_gid = &cm_id_priv->av.dgid;
786 	__be32 remote_id = cm_id_priv->id.remote_id;
787 
788 	while (*link) {
789 		parent = *link;
790 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
791 					  sidr_id_node);
792 		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
793 			link = &(*link)->rb_left;
794 		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
795 			link = &(*link)->rb_right;
796 		else {
797 			int cmp;
798 			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
799 				     sizeof *port_gid);
800 			if (cmp < 0)
801 				link = &(*link)->rb_left;
802 			else if (cmp > 0)
803 				link = &(*link)->rb_right;
804 			else
805 				return cur_cm_id_priv;
806 		}
807 	}
808 	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
809 	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
810 	return NULL;
811 }
812 
813 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
814 			       enum ib_cm_sidr_status status)
815 {
816 	struct ib_cm_sidr_rep_param param;
817 
818 	memset(&param, 0, sizeof param);
819 	param.status = status;
820 	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
821 }
822 
823 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
824 				 ib_cm_handler cm_handler,
825 				 void *context)
826 {
827 	struct cm_id_private *cm_id_priv;
828 	int ret;
829 
830 	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
831 	if (!cm_id_priv)
832 		return ERR_PTR(-ENOMEM);
833 
834 	cm_id_priv->id.state = IB_CM_IDLE;
835 	cm_id_priv->id.device = device;
836 	cm_id_priv->id.cm_handler = cm_handler;
837 	cm_id_priv->id.context = context;
838 	cm_id_priv->id.remote_cm_qpn = 1;
839 	ret = cm_alloc_id(cm_id_priv);
840 	if (ret)
841 		goto error;
842 
843 	spin_lock_init(&cm_id_priv->lock);
844 	init_completion(&cm_id_priv->comp);
845 	INIT_LIST_HEAD(&cm_id_priv->work_list);
846 	INIT_LIST_HEAD(&cm_id_priv->prim_list);
847 	INIT_LIST_HEAD(&cm_id_priv->altr_list);
848 	atomic_set(&cm_id_priv->work_count, -1);
849 	refcount_set(&cm_id_priv->refcount, 1);
850 	return &cm_id_priv->id;
851 
852 error:
853 	kfree(cm_id_priv);
854 	return ERR_PTR(-ENOMEM);
855 }
856 EXPORT_SYMBOL(ib_create_cm_id);
857 
858 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
859 {
860 	struct cm_work *work;
861 
862 	if (list_empty(&cm_id_priv->work_list))
863 		return NULL;
864 
865 	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
866 	list_del(&work->list);
867 	return work;
868 }
869 
870 static void cm_free_work(struct cm_work *work)
871 {
872 	if (work->mad_recv_wc)
873 		ib_free_recv_mad(work->mad_recv_wc);
874 	kfree(work);
875 }
876 
877 static inline int cm_convert_to_ms(int iba_time)
878 {
879 	/* approximate conversion to ms from 4.096us x 2^iba_time */
880 	return 1 << max(iba_time - 8, 0);
881 }
882 
883 /*
884  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
885  * Because of how ack_timeout is stored, adding one doubles the timeout.
886  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
887  * increment it (round up) only if the other is within 50%.
888  */
889 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
890 {
891 	int ack_timeout = packet_life_time + 1;
892 
893 	if (ack_timeout >= ca_ack_delay)
894 		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
895 	else
896 		ack_timeout = ca_ack_delay +
897 			      (ack_timeout >= (ca_ack_delay - 1));
898 
899 	return min(31, ack_timeout);
900 }
901 
902 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
903 {
904 	if (timewait_info->inserted_remote_id) {
905 		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
906 		timewait_info->inserted_remote_id = 0;
907 	}
908 
909 	if (timewait_info->inserted_remote_qp) {
910 		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
911 		timewait_info->inserted_remote_qp = 0;
912 	}
913 }
914 
915 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
916 {
917 	struct cm_timewait_info *timewait_info;
918 
919 	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
920 	if (!timewait_info)
921 		return ERR_PTR(-ENOMEM);
922 
923 	timewait_info->work.local_id = local_id;
924 	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
925 	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
926 	return timewait_info;
927 }
928 
929 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
930 {
931 	int wait_time;
932 	unsigned long flags;
933 	struct cm_device *cm_dev;
934 
935 	cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
936 	if (!cm_dev)
937 		return;
938 
939 	spin_lock_irqsave(&cm.lock, flags);
940 	cm_cleanup_timewait(cm_id_priv->timewait_info);
941 	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
942 	spin_unlock_irqrestore(&cm.lock, flags);
943 
944 	/*
945 	 * The cm_id could be destroyed by the user before we exit timewait.
946 	 * To protect against this, we search for the cm_id after exiting
947 	 * timewait before notifying the user that we've exited timewait.
948 	 */
949 	cm_id_priv->id.state = IB_CM_TIMEWAIT;
950 	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
951 
952 	/* Check if the device started its remove_one */
953 	spin_lock_irqsave(&cm.lock, flags);
954 	if (!cm_dev->going_down)
955 		queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
956 				   msecs_to_jiffies(wait_time));
957 	spin_unlock_irqrestore(&cm.lock, flags);
958 
959 	cm_id_priv->timewait_info = NULL;
960 }
961 
962 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
963 {
964 	unsigned long flags;
965 
966 	cm_id_priv->id.state = IB_CM_IDLE;
967 	if (cm_id_priv->timewait_info) {
968 		spin_lock_irqsave(&cm.lock, flags);
969 		cm_cleanup_timewait(cm_id_priv->timewait_info);
970 		spin_unlock_irqrestore(&cm.lock, flags);
971 		kfree(cm_id_priv->timewait_info);
972 		cm_id_priv->timewait_info = NULL;
973 	}
974 }
975 
976 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
977 {
978 	struct cm_id_private *cm_id_priv;
979 	struct cm_work *work;
980 
981 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
982 retest:
983 	spin_lock_irq(&cm_id_priv->lock);
984 	switch (cm_id->state) {
985 	case IB_CM_LISTEN:
986 		spin_unlock_irq(&cm_id_priv->lock);
987 
988 		spin_lock_irq(&cm.lock);
989 		if (--cm_id_priv->listen_sharecount > 0) {
990 			/* The id is still shared. */
991 			cm_deref_id(cm_id_priv);
992 			spin_unlock_irq(&cm.lock);
993 			return;
994 		}
995 		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
996 		spin_unlock_irq(&cm.lock);
997 		break;
998 	case IB_CM_SIDR_REQ_SENT:
999 		cm_id->state = IB_CM_IDLE;
1000 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1001 		spin_unlock_irq(&cm_id_priv->lock);
1002 		break;
1003 	case IB_CM_SIDR_REQ_RCVD:
1004 		spin_unlock_irq(&cm_id_priv->lock);
1005 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
1006 		spin_lock_irq(&cm.lock);
1007 		if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
1008 			rb_erase(&cm_id_priv->sidr_id_node,
1009 				 &cm.remote_sidr_table);
1010 		spin_unlock_irq(&cm.lock);
1011 		break;
1012 	case IB_CM_REQ_SENT:
1013 	case IB_CM_MRA_REQ_RCVD:
1014 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1015 		spin_unlock_irq(&cm_id_priv->lock);
1016 		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
1017 			       &cm_id_priv->id.device->node_guid,
1018 			       sizeof cm_id_priv->id.device->node_guid,
1019 			       NULL, 0);
1020 		break;
1021 	case IB_CM_REQ_RCVD:
1022 		if (err == -ENOMEM) {
1023 			/* Do not reject to allow future retries. */
1024 			cm_reset_to_idle(cm_id_priv);
1025 			spin_unlock_irq(&cm_id_priv->lock);
1026 		} else {
1027 			spin_unlock_irq(&cm_id_priv->lock);
1028 			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1029 				       NULL, 0, NULL, 0);
1030 		}
1031 		break;
1032 	case IB_CM_REP_SENT:
1033 	case IB_CM_MRA_REP_RCVD:
1034 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1035 		/* Fall through */
1036 	case IB_CM_MRA_REQ_SENT:
1037 	case IB_CM_REP_RCVD:
1038 	case IB_CM_MRA_REP_SENT:
1039 		spin_unlock_irq(&cm_id_priv->lock);
1040 		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1041 			       NULL, 0, NULL, 0);
1042 		break;
1043 	case IB_CM_ESTABLISHED:
1044 		spin_unlock_irq(&cm_id_priv->lock);
1045 		if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
1046 			break;
1047 		ib_send_cm_dreq(cm_id, NULL, 0);
1048 		goto retest;
1049 	case IB_CM_DREQ_SENT:
1050 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1051 		cm_enter_timewait(cm_id_priv);
1052 		spin_unlock_irq(&cm_id_priv->lock);
1053 		break;
1054 	case IB_CM_DREQ_RCVD:
1055 		spin_unlock_irq(&cm_id_priv->lock);
1056 		ib_send_cm_drep(cm_id, NULL, 0);
1057 		break;
1058 	default:
1059 		spin_unlock_irq(&cm_id_priv->lock);
1060 		break;
1061 	}
1062 
1063 	spin_lock_irq(&cm.lock);
1064 	if (!list_empty(&cm_id_priv->altr_list) &&
1065 	    (!cm_id_priv->altr_send_port_not_ready))
1066 		list_del(&cm_id_priv->altr_list);
1067 	if (!list_empty(&cm_id_priv->prim_list) &&
1068 	    (!cm_id_priv->prim_send_port_not_ready))
1069 		list_del(&cm_id_priv->prim_list);
1070 	spin_unlock_irq(&cm.lock);
1071 
1072 	cm_free_id(cm_id->local_id);
1073 	cm_deref_id(cm_id_priv);
1074 	wait_for_completion(&cm_id_priv->comp);
1075 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
1076 		cm_free_work(work);
1077 
1078 	rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
1079 	rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
1080 	kfree(cm_id_priv->private_data);
1081 	kfree_rcu(cm_id_priv, rcu);
1082 }
1083 
1084 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
1085 {
1086 	cm_destroy_id(cm_id, 0);
1087 }
1088 EXPORT_SYMBOL(ib_destroy_cm_id);
1089 
1090 /**
1091  * __ib_cm_listen - Initiates listening on the specified service ID for
1092  *   connection and service ID resolution requests.
1093  * @cm_id: Connection identifier associated with the listen request.
1094  * @service_id: Service identifier matched against incoming connection
1095  *   and service ID resolution requests.  The service ID should be specified
1096  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1097  *   assign a service ID to the caller.
1098  * @service_mask: Mask applied to service ID used to listen across a
1099  *   range of service IDs.  If set to 0, the service ID is matched
1100  *   exactly.  This parameter is ignored if %service_id is set to
1101  *   IB_CM_ASSIGN_SERVICE_ID.
1102  */
1103 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1104 			  __be64 service_mask)
1105 {
1106 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1107 	int ret = 0;
1108 
1109 	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1110 	service_id &= service_mask;
1111 	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1112 	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
1113 		return -EINVAL;
1114 
1115 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1116 	if (cm_id->state != IB_CM_IDLE)
1117 		return -EINVAL;
1118 
1119 	cm_id->state = IB_CM_LISTEN;
1120 	++cm_id_priv->listen_sharecount;
1121 
1122 	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1123 		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1124 		cm_id->service_mask = ~cpu_to_be64(0);
1125 	} else {
1126 		cm_id->service_id = service_id;
1127 		cm_id->service_mask = service_mask;
1128 	}
1129 	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1130 
1131 	if (cur_cm_id_priv) {
1132 		cm_id->state = IB_CM_IDLE;
1133 		--cm_id_priv->listen_sharecount;
1134 		ret = -EBUSY;
1135 	}
1136 	return ret;
1137 }
1138 
1139 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1140 {
1141 	unsigned long flags;
1142 	int ret;
1143 
1144 	spin_lock_irqsave(&cm.lock, flags);
1145 	ret = __ib_cm_listen(cm_id, service_id, service_mask);
1146 	spin_unlock_irqrestore(&cm.lock, flags);
1147 
1148 	return ret;
1149 }
1150 EXPORT_SYMBOL(ib_cm_listen);
1151 
1152 /**
1153  * Create a new listening ib_cm_id and listen on the given service ID.
1154  *
1155  * If there's an existing ID listening on that same device and service ID,
1156  * return it.
1157  *
1158  * @device: Device associated with the cm_id.  All related communication will
1159  * be associated with the specified device.
1160  * @cm_handler: Callback invoked to notify the user of CM events.
1161  * @service_id: Service identifier matched against incoming connection
1162  *   and service ID resolution requests.  The service ID should be specified
1163  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1164  *   assign a service ID to the caller.
1165  *
1166  * Callers should call ib_destroy_cm_id when done with the listener ID.
1167  */
1168 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1169 				     ib_cm_handler cm_handler,
1170 				     __be64 service_id)
1171 {
1172 	struct cm_id_private *cm_id_priv;
1173 	struct ib_cm_id *cm_id;
1174 	unsigned long flags;
1175 	int err = 0;
1176 
1177 	/* Create an ID in advance, since the creation may sleep */
1178 	cm_id = ib_create_cm_id(device, cm_handler, NULL);
1179 	if (IS_ERR(cm_id))
1180 		return cm_id;
1181 
1182 	spin_lock_irqsave(&cm.lock, flags);
1183 
1184 	if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1185 		goto new_id;
1186 
1187 	/* Find an existing ID */
1188 	cm_id_priv = cm_find_listen(device, service_id);
1189 	if (cm_id_priv) {
1190 		if (cm_id->cm_handler != cm_handler || cm_id->context) {
1191 			/* Sharing an ib_cm_id with different handlers is not
1192 			 * supported */
1193 			spin_unlock_irqrestore(&cm.lock, flags);
1194 			return ERR_PTR(-EINVAL);
1195 		}
1196 		refcount_inc(&cm_id_priv->refcount);
1197 		++cm_id_priv->listen_sharecount;
1198 		spin_unlock_irqrestore(&cm.lock, flags);
1199 
1200 		ib_destroy_cm_id(cm_id);
1201 		cm_id = &cm_id_priv->id;
1202 		return cm_id;
1203 	}
1204 
1205 new_id:
1206 	/* Use newly created ID */
1207 	err = __ib_cm_listen(cm_id, service_id, 0);
1208 
1209 	spin_unlock_irqrestore(&cm.lock, flags);
1210 
1211 	if (err) {
1212 		ib_destroy_cm_id(cm_id);
1213 		return ERR_PTR(err);
1214 	}
1215 	return cm_id;
1216 }
1217 EXPORT_SYMBOL(ib_cm_insert_listen);
1218 
1219 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
1220 {
1221 	u64 hi_tid, low_tid;
1222 
1223 	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1224 	low_tid  = (u64)cm_id_priv->id.local_id;
1225 	return cpu_to_be64(hi_tid | low_tid);
1226 }
1227 
1228 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1229 			      __be16 attr_id, __be64 tid)
1230 {
1231 	hdr->base_version  = IB_MGMT_BASE_VERSION;
1232 	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
1233 	hdr->class_version = IB_CM_CLASS_VERSION;
1234 	hdr->method	   = IB_MGMT_METHOD_SEND;
1235 	hdr->attr_id	   = attr_id;
1236 	hdr->tid	   = tid;
1237 }
1238 
1239 static void cm_format_req(struct cm_req_msg *req_msg,
1240 			  struct cm_id_private *cm_id_priv,
1241 			  struct ib_cm_req_param *param)
1242 {
1243 	struct sa_path_rec *pri_path = param->primary_path;
1244 	struct sa_path_rec *alt_path = param->alternate_path;
1245 	bool pri_ext = false;
1246 
1247 	if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
1248 		pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
1249 					      pri_path->opa.slid);
1250 
1251 	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1252 			  cm_form_tid(cm_id_priv));
1253 
1254 	IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
1255 		be32_to_cpu(cm_id_priv->id.local_id));
1256 	IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
1257 	IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
1258 		be64_to_cpu(cm_id_priv->id.device->node_guid));
1259 	IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
1260 	IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
1261 	IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
1262 		param->remote_cm_response_timeout);
1263 	cm_req_set_qp_type(req_msg, param->qp_type);
1264 	IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
1265 	IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
1266 	IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
1267 		param->local_cm_response_timeout);
1268 	IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
1269 		be16_to_cpu(param->primary_path->pkey));
1270 	IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
1271 		param->primary_path->mtu);
1272 	IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);
1273 
1274 	if (param->qp_type != IB_QPT_XRC_INI) {
1275 		IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
1276 			param->responder_resources);
1277 		IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
1278 		IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
1279 			param->rnr_retry_count);
1280 		IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
1281 	}
1282 
1283 	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
1284 		pri_path->sgid;
1285 	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
1286 		pri_path->dgid;
1287 	if (pri_ext) {
1288 		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
1289 			->global.interface_id =
1290 			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
1291 		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
1292 			->global.interface_id =
1293 			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
1294 	}
1295 	if (pri_path->hop_limit <= 1) {
1296 		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
1297 			be16_to_cpu(pri_ext ? 0 :
1298 					      htons(ntohl(sa_path_get_slid(
1299 						      pri_path)))));
1300 		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
1301 			be16_to_cpu(pri_ext ? 0 :
1302 					      htons(ntohl(sa_path_get_dlid(
1303 						      pri_path)))));
1304 	} else {
1305 		/* Work-around until there's a way to obtain remote LID info */
1306 		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
1307 			be16_to_cpu(IB_LID_PERMISSIVE));
1308 		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
1309 			be16_to_cpu(IB_LID_PERMISSIVE));
1310 	}
1311 	IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
1312 		be32_to_cpu(pri_path->flow_label));
1313 	IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
1314 	IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
1315 	IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
1316 	IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
1317 	IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
1318 		(pri_path->hop_limit <= 1));
1319 	IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
1320 		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1321 			       pri_path->packet_life_time));
1322 
1323 	if (alt_path) {
1324 		bool alt_ext = false;
1325 
1326 		if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
1327 			alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
1328 						      alt_path->opa.slid);
1329 
1330 		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
1331 			alt_path->sgid;
1332 		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
1333 			alt_path->dgid;
1334 		if (alt_ext) {
1335 			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
1336 					req_msg)
1337 				->global.interface_id =
1338 				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
1339 			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
1340 					req_msg)
1341 				->global.interface_id =
1342 				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
1343 		}
1344 		if (alt_path->hop_limit <= 1) {
1345 			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
1346 				be16_to_cpu(
1347 					alt_ext ? 0 :
1348 						  htons(ntohl(sa_path_get_slid(
1349 							  alt_path)))));
1350 			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
1351 				be16_to_cpu(
1352 					alt_ext ? 0 :
1353 						  htons(ntohl(sa_path_get_dlid(
1354 							  alt_path)))));
1355 		} else {
1356 			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
1357 				be16_to_cpu(IB_LID_PERMISSIVE));
1358 			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
1359 				be16_to_cpu(IB_LID_PERMISSIVE));
1360 		}
1361 		IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
1362 			be32_to_cpu(alt_path->flow_label));
1363 		IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
1364 		IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
1365 			alt_path->traffic_class);
1366 		IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
1367 			alt_path->hop_limit);
1368 		IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
1369 		IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
1370 			(alt_path->hop_limit <= 1));
1371 		IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
1372 			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1373 				       alt_path->packet_life_time));
1374 	}
1375 
1376 	if (param->private_data && param->private_data_len)
1377 		IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
1378 			    param->private_data_len);
1379 }
1380 
1381 static int cm_validate_req_param(struct ib_cm_req_param *param)
1382 {
1383 	/* peer-to-peer not supported */
1384 	if (param->peer_to_peer)
1385 		return -EINVAL;
1386 
1387 	if (!param->primary_path)
1388 		return -EINVAL;
1389 
1390 	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1391 	    param->qp_type != IB_QPT_XRC_INI)
1392 		return -EINVAL;
1393 
1394 	if (param->private_data &&
1395 	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1396 		return -EINVAL;
1397 
1398 	if (param->alternate_path &&
1399 	    (param->alternate_path->pkey != param->primary_path->pkey ||
1400 	     param->alternate_path->mtu != param->primary_path->mtu))
1401 		return -EINVAL;
1402 
1403 	return 0;
1404 }
1405 
1406 int ib_send_cm_req(struct ib_cm_id *cm_id,
1407 		   struct ib_cm_req_param *param)
1408 {
1409 	struct cm_id_private *cm_id_priv;
1410 	struct cm_req_msg *req_msg;
1411 	unsigned long flags;
1412 	int ret;
1413 
1414 	ret = cm_validate_req_param(param);
1415 	if (ret)
1416 		return ret;
1417 
1418 	/* Verify that we're not in timewait. */
1419 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1420 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1421 	if (cm_id->state != IB_CM_IDLE) {
1422 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1423 		ret = -EINVAL;
1424 		goto out;
1425 	}
1426 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1427 
1428 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1429 							    id.local_id);
1430 	if (IS_ERR(cm_id_priv->timewait_info)) {
1431 		ret = PTR_ERR(cm_id_priv->timewait_info);
1432 		goto out;
1433 	}
1434 
1435 	ret = cm_init_av_by_path(param->primary_path,
1436 				 param->ppath_sgid_attr, &cm_id_priv->av,
1437 				 cm_id_priv);
1438 	if (ret)
1439 		goto error1;
1440 	if (param->alternate_path) {
1441 		ret = cm_init_av_by_path(param->alternate_path, NULL,
1442 					 &cm_id_priv->alt_av, cm_id_priv);
1443 		if (ret)
1444 			goto error1;
1445 	}
1446 	cm_id->service_id = param->service_id;
1447 	cm_id->service_mask = ~cpu_to_be64(0);
1448 	cm_id_priv->timeout_ms = cm_convert_to_ms(
1449 				    param->primary_path->packet_life_time) * 2 +
1450 				 cm_convert_to_ms(
1451 				    param->remote_cm_response_timeout);
1452 	cm_id_priv->max_cm_retries = param->max_cm_retries;
1453 	cm_id_priv->initiator_depth = param->initiator_depth;
1454 	cm_id_priv->responder_resources = param->responder_resources;
1455 	cm_id_priv->retry_count = param->retry_count;
1456 	cm_id_priv->path_mtu = param->primary_path->mtu;
1457 	cm_id_priv->pkey = param->primary_path->pkey;
1458 	cm_id_priv->qp_type = param->qp_type;
1459 
1460 	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1461 	if (ret)
1462 		goto error1;
1463 
1464 	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1465 	cm_format_req(req_msg, cm_id_priv, param);
1466 	cm_id_priv->tid = req_msg->hdr.tid;
1467 	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1468 	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1469 
1470 	cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
1471 	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
1472 
1473 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1474 	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1475 	if (ret) {
1476 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1477 		goto error2;
1478 	}
1479 	BUG_ON(cm_id->state != IB_CM_IDLE);
1480 	cm_id->state = IB_CM_REQ_SENT;
1481 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1482 	return 0;
1483 
1484 error2:	cm_free_msg(cm_id_priv->msg);
1485 error1:	kfree(cm_id_priv->timewait_info);
1486 out:	return ret;
1487 }
1488 EXPORT_SYMBOL(ib_send_cm_req);
1489 
1490 static int cm_issue_rej(struct cm_port *port,
1491 			struct ib_mad_recv_wc *mad_recv_wc,
1492 			enum ib_cm_rej_reason reason,
1493 			enum cm_msg_response msg_rejected,
1494 			void *ari, u8 ari_length)
1495 {
1496 	struct ib_mad_send_buf *msg = NULL;
1497 	struct cm_rej_msg *rej_msg, *rcv_msg;
1498 	int ret;
1499 
1500 	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1501 	if (ret)
1502 		return ret;
1503 
1504 	/* We just need common CM header information.  Cast to any message. */
1505 	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1506 	rej_msg = (struct cm_rej_msg *) msg->mad;
1507 
1508 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1509 	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
1510 		IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
1511 	IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1512 		IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
1513 	IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
1514 	IBA_SET(CM_REJ_REASON, rej_msg, reason);
1515 
1516 	if (ari && ari_length) {
1517 		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
1518 		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
1519 	}
1520 
1521 	ret = ib_post_send_mad(msg, NULL);
1522 	if (ret)
1523 		cm_free_msg(msg);
1524 
1525 	return ret;
1526 }
1527 
1528 static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
1529 {
1530 	return ((cpu_to_be16(
1531 			IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
1532 		(ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
1533 					       req_msg))));
1534 }
1535 
1536 static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
1537 				 struct sa_path_rec *path, union ib_gid *gid)
1538 {
1539 	if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
1540 		path->rec_type = SA_PATH_REC_TYPE_OPA;
1541 	else
1542 		path->rec_type = SA_PATH_REC_TYPE_IB;
1543 }
1544 
1545 static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
1546 					struct sa_path_rec *primary_path,
1547 					struct sa_path_rec *alt_path)
1548 {
1549 	u32 lid;
1550 
1551 	if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1552 		sa_path_set_dlid(primary_path,
1553 				 IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
1554 					 req_msg));
1555 		sa_path_set_slid(primary_path,
1556 				 IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
1557 					 req_msg));
1558 	} else {
1559 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1560 			CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
1561 		sa_path_set_dlid(primary_path, lid);
1562 
1563 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1564 			CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
1565 		sa_path_set_slid(primary_path, lid);
1566 	}
1567 
1568 	if (!cm_req_has_alt_path(req_msg))
1569 		return;
1570 
1571 	if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1572 		sa_path_set_dlid(alt_path,
1573 				 IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
1574 					 req_msg));
1575 		sa_path_set_slid(alt_path,
1576 				 IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
1577 					 req_msg));
1578 	} else {
1579 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1580 			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
1581 		sa_path_set_dlid(alt_path, lid);
1582 
1583 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1584 			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
1585 		sa_path_set_slid(alt_path, lid);
1586 	}
1587 }
1588 
1589 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1590 				     struct sa_path_rec *primary_path,
1591 				     struct sa_path_rec *alt_path)
1592 {
1593 	primary_path->dgid =
1594 		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
1595 	primary_path->sgid =
1596 		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
1597 	primary_path->flow_label =
1598 		cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
1599 	primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
1600 	primary_path->traffic_class =
1601 		IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
1602 	primary_path->reversible = 1;
1603 	primary_path->pkey =
1604 		cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
1605 	primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
1606 	primary_path->mtu_selector = IB_SA_EQ;
1607 	primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
1608 	primary_path->rate_selector = IB_SA_EQ;
1609 	primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
1610 	primary_path->packet_life_time_selector = IB_SA_EQ;
1611 	primary_path->packet_life_time =
1612 		IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
1613 	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1614 	primary_path->service_id =
1615 		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
1616 	if (sa_path_is_roce(primary_path))
1617 		primary_path->roce.route_resolved = false;
1618 
1619 	if (cm_req_has_alt_path(req_msg)) {
1620 		alt_path->dgid = *IBA_GET_MEM_PTR(
1621 			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
1622 		alt_path->sgid = *IBA_GET_MEM_PTR(
1623 			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
1624 		alt_path->flow_label = cpu_to_be32(
1625 			IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
1626 		alt_path->hop_limit =
1627 			IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
1628 		alt_path->traffic_class =
1629 			IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
1630 		alt_path->reversible = 1;
1631 		alt_path->pkey =
1632 			cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
1633 		alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
1634 		alt_path->mtu_selector = IB_SA_EQ;
1635 		alt_path->mtu =
1636 			IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
1637 		alt_path->rate_selector = IB_SA_EQ;
1638 		alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
1639 		alt_path->packet_life_time_selector = IB_SA_EQ;
1640 		alt_path->packet_life_time =
1641 			IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
1642 		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1643 		alt_path->service_id =
1644 			cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
1645 
1646 		if (sa_path_is_roce(alt_path))
1647 			alt_path->roce.route_resolved = false;
1648 	}
1649 	cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
1650 }
1651 
1652 static u16 cm_get_bth_pkey(struct cm_work *work)
1653 {
1654 	struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1655 	u8 port_num = work->port->port_num;
1656 	u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1657 	u16 pkey;
1658 	int ret;
1659 
1660 	ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1661 	if (ret) {
1662 		dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1663 				     port_num, pkey_index, ret);
1664 		return 0;
1665 	}
1666 
1667 	return pkey;
1668 }
1669 
1670 /**
1671  * Convert OPA SGID to IB SGID
1672  * ULPs (such as IPoIB) do not understand OPA GIDs and will
1673  * reject them as the local_gid will not match the sgid. Therefore,
1674  * change the pathrec's SGID to an IB SGID.
1675  *
1676  * @work: Work completion
1677  * @path: Path record
1678  */
1679 static void cm_opa_to_ib_sgid(struct cm_work *work,
1680 			      struct sa_path_rec *path)
1681 {
1682 	struct ib_device *dev = work->port->cm_dev->ib_device;
1683 	u8 port_num = work->port->port_num;
1684 
1685 	if (rdma_cap_opa_ah(dev, port_num) &&
1686 	    (ib_is_opa_gid(&path->sgid))) {
1687 		union ib_gid sgid;
1688 
1689 		if (rdma_query_gid(dev, port_num, 0, &sgid)) {
1690 			dev_warn(&dev->dev,
1691 				 "Error updating sgid in CM request\n");
1692 			return;
1693 		}
1694 
1695 		path->sgid = sgid;
1696 	}
1697 }
1698 
1699 static void cm_format_req_event(struct cm_work *work,
1700 				struct cm_id_private *cm_id_priv,
1701 				struct ib_cm_id *listen_id)
1702 {
1703 	struct cm_req_msg *req_msg;
1704 	struct ib_cm_req_event_param *param;
1705 
1706 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1707 	param = &work->cm_event.param.req_rcvd;
1708 	param->listen_id = listen_id;
1709 	param->bth_pkey = cm_get_bth_pkey(work);
1710 	param->port = cm_id_priv->av.port->port_num;
1711 	param->primary_path = &work->path[0];
1712 	cm_opa_to_ib_sgid(work, param->primary_path);
1713 	if (cm_req_has_alt_path(req_msg)) {
1714 		param->alternate_path = &work->path[1];
1715 		cm_opa_to_ib_sgid(work, param->alternate_path);
1716 	} else {
1717 		param->alternate_path = NULL;
1718 	}
1719 	param->remote_ca_guid =
1720 		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
1721 	param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
1722 	param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
1723 	param->qp_type = cm_req_get_qp_type(req_msg);
1724 	param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
1725 	param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
1726 	param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
1727 	param->local_cm_response_timeout =
1728 		IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
1729 	param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
1730 	param->remote_cm_response_timeout =
1731 		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
1732 	param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
1733 	param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
1734 	param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
1735 	param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
1736 	work->cm_event.private_data =
1737 		IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
1738 }
1739 
1740 static void cm_process_work(struct cm_id_private *cm_id_priv,
1741 			    struct cm_work *work)
1742 {
1743 	int ret;
1744 
1745 	/* We will typically only have the current event to report. */
1746 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1747 	cm_free_work(work);
1748 
1749 	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1750 		spin_lock_irq(&cm_id_priv->lock);
1751 		work = cm_dequeue_work(cm_id_priv);
1752 		spin_unlock_irq(&cm_id_priv->lock);
1753 		if (!work)
1754 			return;
1755 
1756 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1757 						&work->cm_event);
1758 		cm_free_work(work);
1759 	}
1760 	cm_deref_id(cm_id_priv);
1761 	if (ret)
1762 		cm_destroy_id(&cm_id_priv->id, ret);
1763 }
1764 
1765 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1766 			  struct cm_id_private *cm_id_priv,
1767 			  enum cm_msg_response msg_mraed, u8 service_timeout,
1768 			  const void *private_data, u8 private_data_len)
1769 {
1770 	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1771 	IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
1772 	IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
1773 		be32_to_cpu(cm_id_priv->id.local_id));
1774 	IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
1775 		be32_to_cpu(cm_id_priv->id.remote_id));
1776 	IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout);
1777 
1778 	if (private_data && private_data_len)
1779 		IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
1780 			    private_data_len);
1781 }
1782 
1783 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1784 			  struct cm_id_private *cm_id_priv,
1785 			  enum ib_cm_rej_reason reason,
1786 			  void *ari,
1787 			  u8 ari_length,
1788 			  const void *private_data,
1789 			  u8 private_data_len)
1790 {
1791 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1792 	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
1793 		be32_to_cpu(cm_id_priv->id.remote_id));
1794 
1795 	switch(cm_id_priv->id.state) {
1796 	case IB_CM_REQ_RCVD:
1797 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
1798 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
1799 		break;
1800 	case IB_CM_MRA_REQ_SENT:
1801 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1802 			be32_to_cpu(cm_id_priv->id.local_id));
1803 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
1804 		break;
1805 	case IB_CM_REP_RCVD:
1806 	case IB_CM_MRA_REP_SENT:
1807 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1808 			be32_to_cpu(cm_id_priv->id.local_id));
1809 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
1810 		break;
1811 	default:
1812 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1813 			be32_to_cpu(cm_id_priv->id.local_id));
1814 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
1815 			CM_MSG_RESPONSE_OTHER);
1816 		break;
1817 	}
1818 
1819 	IBA_SET(CM_REJ_REASON, rej_msg, reason);
1820 	if (ari && ari_length) {
1821 		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
1822 		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
1823 	}
1824 
1825 	if (private_data && private_data_len)
1826 		IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
1827 			    private_data_len);
1828 }
1829 
1830 static void cm_dup_req_handler(struct cm_work *work,
1831 			       struct cm_id_private *cm_id_priv)
1832 {
1833 	struct ib_mad_send_buf *msg = NULL;
1834 	int ret;
1835 
1836 	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1837 			counter[CM_REQ_COUNTER]);
1838 
1839 	/* Quick state check to discard duplicate REQs. */
1840 	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1841 		return;
1842 
1843 	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1844 	if (ret)
1845 		return;
1846 
1847 	spin_lock_irq(&cm_id_priv->lock);
1848 	switch (cm_id_priv->id.state) {
1849 	case IB_CM_MRA_REQ_SENT:
1850 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1851 			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1852 			      cm_id_priv->private_data,
1853 			      cm_id_priv->private_data_len);
1854 		break;
1855 	case IB_CM_TIMEWAIT:
1856 		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1857 			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1858 		break;
1859 	default:
1860 		goto unlock;
1861 	}
1862 	spin_unlock_irq(&cm_id_priv->lock);
1863 
1864 	ret = ib_post_send_mad(msg, NULL);
1865 	if (ret)
1866 		goto free;
1867 	return;
1868 
1869 unlock:	spin_unlock_irq(&cm_id_priv->lock);
1870 free:	cm_free_msg(msg);
1871 }
1872 
1873 static struct cm_id_private * cm_match_req(struct cm_work *work,
1874 					   struct cm_id_private *cm_id_priv)
1875 {
1876 	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1877 	struct cm_timewait_info *timewait_info;
1878 	struct cm_req_msg *req_msg;
1879 	struct ib_cm_id *cm_id;
1880 
1881 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1882 
1883 	/* Check for possible duplicate REQ. */
1884 	spin_lock_irq(&cm.lock);
1885 	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1886 	if (timewait_info) {
1887 		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
1888 					   timewait_info->work.remote_id);
1889 		spin_unlock_irq(&cm.lock);
1890 		if (cur_cm_id_priv) {
1891 			cm_dup_req_handler(work, cur_cm_id_priv);
1892 			cm_deref_id(cur_cm_id_priv);
1893 		}
1894 		return NULL;
1895 	}
1896 
1897 	/* Check for stale connections. */
1898 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1899 	if (timewait_info) {
1900 		cm_cleanup_timewait(cm_id_priv->timewait_info);
1901 		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
1902 					   timewait_info->work.remote_id);
1903 
1904 		spin_unlock_irq(&cm.lock);
1905 		cm_issue_rej(work->port, work->mad_recv_wc,
1906 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1907 			     NULL, 0);
1908 		if (cur_cm_id_priv) {
1909 			cm_id = &cur_cm_id_priv->id;
1910 			ib_send_cm_dreq(cm_id, NULL, 0);
1911 			cm_deref_id(cur_cm_id_priv);
1912 		}
1913 		return NULL;
1914 	}
1915 
1916 	/* Find matching listen request. */
1917 	listen_cm_id_priv = cm_find_listen(
1918 		cm_id_priv->id.device,
1919 		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
1920 	if (!listen_cm_id_priv) {
1921 		cm_cleanup_timewait(cm_id_priv->timewait_info);
1922 		spin_unlock_irq(&cm.lock);
1923 		cm_issue_rej(work->port, work->mad_recv_wc,
1924 			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1925 			     NULL, 0);
1926 		goto out;
1927 	}
1928 	refcount_inc(&listen_cm_id_priv->refcount);
1929 	refcount_inc(&cm_id_priv->refcount);
1930 	cm_id_priv->id.state = IB_CM_REQ_RCVD;
1931 	atomic_inc(&cm_id_priv->work_count);
1932 	spin_unlock_irq(&cm.lock);
1933 out:
1934 	return listen_cm_id_priv;
1935 }
1936 
1937 /*
1938  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1939  * we need to override the LID/SL data in the REQ with the LID information
1940  * in the work completion.
1941  */
1942 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1943 {
1944 	if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
1945 		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
1946 					req_msg)) == IB_LID_PERMISSIVE) {
1947 			IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
1948 				be16_to_cpu(ib_lid_be16(wc->slid)));
1949 			IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
1950 		}
1951 
1952 		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
1953 					req_msg)) == IB_LID_PERMISSIVE)
1954 			IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
1955 				wc->dlid_path_bits);
1956 	}
1957 
1958 	if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
1959 		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
1960 					req_msg)) == IB_LID_PERMISSIVE) {
1961 			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
1962 				be16_to_cpu(ib_lid_be16(wc->slid)));
1963 			IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
1964 		}
1965 
1966 		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
1967 					req_msg)) == IB_LID_PERMISSIVE)
1968 			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
1969 				wc->dlid_path_bits);
1970 	}
1971 }
1972 
1973 static int cm_req_handler(struct cm_work *work)
1974 {
1975 	struct ib_cm_id *cm_id;
1976 	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1977 	struct cm_req_msg *req_msg;
1978 	const struct ib_global_route *grh;
1979 	const struct ib_gid_attr *gid_attr;
1980 	int ret;
1981 
1982 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1983 
1984 	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1985 	if (IS_ERR(cm_id))
1986 		return PTR_ERR(cm_id);
1987 
1988 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1989 	cm_id_priv->id.remote_id =
1990 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
1991 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1992 				      work->mad_recv_wc->recv_buf.grh,
1993 				      &cm_id_priv->av);
1994 	if (ret)
1995 		goto destroy;
1996 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1997 							    id.local_id);
1998 	if (IS_ERR(cm_id_priv->timewait_info)) {
1999 		ret = PTR_ERR(cm_id_priv->timewait_info);
2000 		goto destroy;
2001 	}
2002 	cm_id_priv->timewait_info->work.remote_id =
2003 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
2004 	cm_id_priv->timewait_info->remote_ca_guid =
2005 		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
2006 	cm_id_priv->timewait_info->remote_qpn =
2007 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
2008 
2009 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
2010 	if (!listen_cm_id_priv) {
2011 		pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
2012 			 be32_to_cpu(cm_id->local_id));
2013 		ret = -EINVAL;
2014 		goto free_timeinfo;
2015 	}
2016 
2017 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
2018 	cm_id_priv->id.context = listen_cm_id_priv->id.context;
2019 	cm_id_priv->id.service_id =
2020 		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
2021 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
2022 
2023 	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
2024 
2025 	memset(&work->path[0], 0, sizeof(work->path[0]));
2026 	if (cm_req_has_alt_path(req_msg))
2027 		memset(&work->path[1], 0, sizeof(work->path[1]));
2028 	grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
2029 	gid_attr = grh->sgid_attr;
2030 
2031 	if (gid_attr &&
2032 	    rdma_protocol_roce(work->port->cm_dev->ib_device,
2033 			       work->port->port_num)) {
2034 		work->path[0].rec_type =
2035 			sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
2036 	} else {
2037 		cm_path_set_rec_type(
2038 			work->port->cm_dev->ib_device, work->port->port_num,
2039 			&work->path[0],
2040 			IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
2041 					req_msg));
2042 	}
2043 	if (cm_req_has_alt_path(req_msg))
2044 		work->path[1].rec_type = work->path[0].rec_type;
2045 	cm_format_paths_from_req(req_msg, &work->path[0],
2046 				 &work->path[1]);
2047 	if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
2048 		sa_path_set_dmac(&work->path[0],
2049 				 cm_id_priv->av.ah_attr.roce.dmac);
2050 	work->path[0].hop_limit = grh->hop_limit;
2051 	ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
2052 				 cm_id_priv);
2053 	if (ret) {
2054 		int err;
2055 
2056 		err = rdma_query_gid(work->port->cm_dev->ib_device,
2057 				     work->port->port_num, 0,
2058 				     &work->path[0].sgid);
2059 		if (err)
2060 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2061 				       NULL, 0, NULL, 0);
2062 		else
2063 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2064 				       &work->path[0].sgid,
2065 				       sizeof(work->path[0].sgid),
2066 				       NULL, 0);
2067 		goto rejected;
2068 	}
2069 	if (cm_req_has_alt_path(req_msg)) {
2070 		ret = cm_init_av_by_path(&work->path[1], NULL,
2071 					 &cm_id_priv->alt_av, cm_id_priv);
2072 		if (ret) {
2073 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
2074 				       &work->path[0].sgid,
2075 				       sizeof(work->path[0].sgid), NULL, 0);
2076 			goto rejected;
2077 		}
2078 	}
2079 	cm_id_priv->tid = req_msg->hdr.tid;
2080 	cm_id_priv->timeout_ms = cm_convert_to_ms(
2081 		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
2082 	cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
2083 	cm_id_priv->remote_qpn =
2084 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
2085 	cm_id_priv->initiator_depth =
2086 		IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
2087 	cm_id_priv->responder_resources =
2088 		IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
2089 	cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
2090 	cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
2091 	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
2092 	cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
2093 	cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
2094 	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
2095 
2096 	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
2097 	cm_process_work(cm_id_priv, work);
2098 	cm_deref_id(listen_cm_id_priv);
2099 	return 0;
2100 
2101 rejected:
2102 	refcount_dec(&cm_id_priv->refcount);
2103 	cm_deref_id(listen_cm_id_priv);
2104 free_timeinfo:
2105 	kfree(cm_id_priv->timewait_info);
2106 destroy:
2107 	ib_destroy_cm_id(cm_id);
2108 	return ret;
2109 }
2110 
2111 static void cm_format_rep(struct cm_rep_msg *rep_msg,
2112 			  struct cm_id_private *cm_id_priv,
2113 			  struct ib_cm_rep_param *param)
2114 {
2115 	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
2116 	IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
2117 		be32_to_cpu(cm_id_priv->id.local_id));
2118 	IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
2119 		be32_to_cpu(cm_id_priv->id.remote_id));
2120 	IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
2121 	IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
2122 		param->responder_resources);
2123 	IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
2124 		cm_id_priv->av.port->cm_dev->ack_delay);
2125 	IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
2126 	IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
2127 	IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
2128 		be64_to_cpu(cm_id_priv->id.device->node_guid));
2129 
2130 	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
2131 		IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg,
2132 			param->initiator_depth);
2133 		IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg,
2134 			param->flow_control);
2135 		IBA_SET(CM_REP_SRQ, rep_msg, param->srq);
2136 		IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num);
2137 	} else {
2138 		IBA_SET(CM_REP_SRQ, rep_msg, 1);
2139 		IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
2140 	}
2141 
2142 	if (param->private_data && param->private_data_len)
2143 		IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
2144 			    param->private_data_len);
2145 }
2146 
2147 int ib_send_cm_rep(struct ib_cm_id *cm_id,
2148 		   struct ib_cm_rep_param *param)
2149 {
2150 	struct cm_id_private *cm_id_priv;
2151 	struct ib_mad_send_buf *msg;
2152 	struct cm_rep_msg *rep_msg;
2153 	unsigned long flags;
2154 	int ret;
2155 
2156 	if (param->private_data &&
2157 	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
2158 		return -EINVAL;
2159 
2160 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2161 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2162 	if (cm_id->state != IB_CM_REQ_RCVD &&
2163 	    cm_id->state != IB_CM_MRA_REQ_SENT) {
2164 		pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
2165 			 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2166 		ret = -EINVAL;
2167 		goto out;
2168 	}
2169 
2170 	ret = cm_alloc_msg(cm_id_priv, &msg);
2171 	if (ret)
2172 		goto out;
2173 
2174 	rep_msg = (struct cm_rep_msg *) msg->mad;
2175 	cm_format_rep(rep_msg, cm_id_priv, param);
2176 	msg->timeout_ms = cm_id_priv->timeout_ms;
2177 	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
2178 
2179 	ret = ib_post_send_mad(msg, NULL);
2180 	if (ret) {
2181 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2182 		cm_free_msg(msg);
2183 		return ret;
2184 	}
2185 
2186 	cm_id->state = IB_CM_REP_SENT;
2187 	cm_id_priv->msg = msg;
2188 	cm_id_priv->initiator_depth = param->initiator_depth;
2189 	cm_id_priv->responder_resources = param->responder_resources;
2190 	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
2191 	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
2192 
2193 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2194 	return ret;
2195 }
2196 EXPORT_SYMBOL(ib_send_cm_rep);
2197 
2198 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
2199 			  struct cm_id_private *cm_id_priv,
2200 			  const void *private_data,
2201 			  u8 private_data_len)
2202 {
2203 	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
2204 	IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg,
2205 		be32_to_cpu(cm_id_priv->id.local_id));
2206 	IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg,
2207 		be32_to_cpu(cm_id_priv->id.remote_id));
2208 
2209 	if (private_data && private_data_len)
2210 		IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data,
2211 			    private_data_len);
2212 }
2213 
2214 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
2215 		   const void *private_data,
2216 		   u8 private_data_len)
2217 {
2218 	struct cm_id_private *cm_id_priv;
2219 	struct ib_mad_send_buf *msg;
2220 	unsigned long flags;
2221 	void *data;
2222 	int ret;
2223 
2224 	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
2225 		return -EINVAL;
2226 
2227 	data = cm_copy_private_data(private_data, private_data_len);
2228 	if (IS_ERR(data))
2229 		return PTR_ERR(data);
2230 
2231 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2232 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2233 	if (cm_id->state != IB_CM_REP_RCVD &&
2234 	    cm_id->state != IB_CM_MRA_REP_SENT) {
2235 		pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
2236 			 be32_to_cpu(cm_id->local_id), cm_id->state);
2237 		ret = -EINVAL;
2238 		goto error;
2239 	}
2240 
2241 	ret = cm_alloc_msg(cm_id_priv, &msg);
2242 	if (ret)
2243 		goto error;
2244 
2245 	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2246 		      private_data, private_data_len);
2247 
2248 	ret = ib_post_send_mad(msg, NULL);
2249 	if (ret) {
2250 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2251 		cm_free_msg(msg);
2252 		kfree(data);
2253 		return ret;
2254 	}
2255 
2256 	cm_id->state = IB_CM_ESTABLISHED;
2257 	cm_set_private_data(cm_id_priv, data, private_data_len);
2258 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2259 	return 0;
2260 
2261 error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2262 	kfree(data);
2263 	return ret;
2264 }
2265 EXPORT_SYMBOL(ib_send_cm_rtu);
2266 
2267 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
2268 {
2269 	struct cm_rep_msg *rep_msg;
2270 	struct ib_cm_rep_event_param *param;
2271 
2272 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2273 	param = &work->cm_event.param.rep_rcvd;
2274 	param->remote_ca_guid =
2275 		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
2276 	param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg);
2277 	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
2278 	param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg);
2279 	param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
2280 	param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
2281 	param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
2282 	param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg);
2283 	param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
2284 	param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
2285 	param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
2286 	work->cm_event.private_data =
2287 		IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
2288 }
2289 
2290 static void cm_dup_rep_handler(struct cm_work *work)
2291 {
2292 	struct cm_id_private *cm_id_priv;
2293 	struct cm_rep_msg *rep_msg;
2294 	struct ib_mad_send_buf *msg = NULL;
2295 	int ret;
2296 
2297 	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
2298 	cm_id_priv = cm_acquire_id(
2299 		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)),
2300 		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)));
2301 	if (!cm_id_priv)
2302 		return;
2303 
2304 	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2305 			counter[CM_REP_COUNTER]);
2306 	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
2307 	if (ret)
2308 		goto deref;
2309 
2310 	spin_lock_irq(&cm_id_priv->lock);
2311 	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
2312 		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2313 			      cm_id_priv->private_data,
2314 			      cm_id_priv->private_data_len);
2315 	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
2316 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2317 			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
2318 			      cm_id_priv->private_data,
2319 			      cm_id_priv->private_data_len);
2320 	else
2321 		goto unlock;
2322 	spin_unlock_irq(&cm_id_priv->lock);
2323 
2324 	ret = ib_post_send_mad(msg, NULL);
2325 	if (ret)
2326 		goto free;
2327 	goto deref;
2328 
2329 unlock:	spin_unlock_irq(&cm_id_priv->lock);
2330 free:	cm_free_msg(msg);
2331 deref:	cm_deref_id(cm_id_priv);
2332 }
2333 
2334 static int cm_rep_handler(struct cm_work *work)
2335 {
2336 	struct cm_id_private *cm_id_priv;
2337 	struct cm_rep_msg *rep_msg;
2338 	int ret;
2339 	struct cm_id_private *cur_cm_id_priv;
2340 	struct ib_cm_id *cm_id;
2341 	struct cm_timewait_info *timewait_info;
2342 
2343 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2344 	cm_id_priv = cm_acquire_id(
2345 		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
2346 	if (!cm_id_priv) {
2347 		cm_dup_rep_handler(work);
2348 		pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
2349 			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2350 		return -EINVAL;
2351 	}
2352 
2353 	cm_format_rep_event(work, cm_id_priv->qp_type);
2354 
2355 	spin_lock_irq(&cm_id_priv->lock);
2356 	switch (cm_id_priv->id.state) {
2357 	case IB_CM_REQ_SENT:
2358 	case IB_CM_MRA_REQ_RCVD:
2359 		break;
2360 	default:
2361 		spin_unlock_irq(&cm_id_priv->lock);
2362 		ret = -EINVAL;
2363 		pr_debug(
2364 			"%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
2365 			__func__, cm_id_priv->id.state,
2366 			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
2367 			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2368 		goto error;
2369 	}
2370 
2371 	cm_id_priv->timewait_info->work.remote_id =
2372 		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
2373 	cm_id_priv->timewait_info->remote_ca_guid =
2374 		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
2375 	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2376 
2377 	spin_lock(&cm.lock);
2378 	/* Check for duplicate REP. */
2379 	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2380 		spin_unlock(&cm.lock);
2381 		spin_unlock_irq(&cm_id_priv->lock);
2382 		ret = -EINVAL;
2383 		pr_debug("%s: Failed to insert remote id %d\n", __func__,
2384 			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2385 		goto error;
2386 	}
2387 	/* Check for a stale connection. */
2388 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
2389 	if (timewait_info) {
2390 		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2391 			 &cm.remote_id_table);
2392 		cm_id_priv->timewait_info->inserted_remote_id = 0;
2393 		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2394 					   timewait_info->work.remote_id);
2395 
2396 		spin_unlock(&cm.lock);
2397 		spin_unlock_irq(&cm_id_priv->lock);
2398 		cm_issue_rej(work->port, work->mad_recv_wc,
2399 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2400 			     NULL, 0);
2401 		ret = -EINVAL;
2402 		pr_debug(
2403 			"%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
2404 			__func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
2405 			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2406 
2407 		if (cur_cm_id_priv) {
2408 			cm_id = &cur_cm_id_priv->id;
2409 			ib_send_cm_dreq(cm_id, NULL, 0);
2410 			cm_deref_id(cur_cm_id_priv);
2411 		}
2412 
2413 		goto error;
2414 	}
2415 	spin_unlock(&cm.lock);
2416 
2417 	cm_id_priv->id.state = IB_CM_REP_RCVD;
2418 	cm_id_priv->id.remote_id =
2419 		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
2420 	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2421 	cm_id_priv->initiator_depth =
2422 		IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
2423 	cm_id_priv->responder_resources =
2424 		IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
2425 	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
2426 	cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
2427 	cm_id_priv->target_ack_delay =
2428 		IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
2429 	cm_id_priv->av.timeout =
2430 			cm_ack_timeout(cm_id_priv->target_ack_delay,
2431 				       cm_id_priv->av.timeout - 1);
2432 	cm_id_priv->alt_av.timeout =
2433 			cm_ack_timeout(cm_id_priv->target_ack_delay,
2434 				       cm_id_priv->alt_av.timeout - 1);
2435 
2436 	/* todo: handle peer_to_peer */
2437 
2438 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2439 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2440 	if (!ret)
2441 		list_add_tail(&work->list, &cm_id_priv->work_list);
2442 	spin_unlock_irq(&cm_id_priv->lock);
2443 
2444 	if (ret)
2445 		cm_process_work(cm_id_priv, work);
2446 	else
2447 		cm_deref_id(cm_id_priv);
2448 	return 0;
2449 
2450 error:
2451 	cm_deref_id(cm_id_priv);
2452 	return ret;
2453 }
2454 
2455 static int cm_establish_handler(struct cm_work *work)
2456 {
2457 	struct cm_id_private *cm_id_priv;
2458 	int ret;
2459 
2460 	/* See comment in cm_establish about lookup. */
2461 	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2462 	if (!cm_id_priv)
2463 		return -EINVAL;
2464 
2465 	spin_lock_irq(&cm_id_priv->lock);
2466 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2467 		spin_unlock_irq(&cm_id_priv->lock);
2468 		goto out;
2469 	}
2470 
2471 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2472 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2473 	if (!ret)
2474 		list_add_tail(&work->list, &cm_id_priv->work_list);
2475 	spin_unlock_irq(&cm_id_priv->lock);
2476 
2477 	if (ret)
2478 		cm_process_work(cm_id_priv, work);
2479 	else
2480 		cm_deref_id(cm_id_priv);
2481 	return 0;
2482 out:
2483 	cm_deref_id(cm_id_priv);
2484 	return -EINVAL;
2485 }
2486 
2487 static int cm_rtu_handler(struct cm_work *work)
2488 {
2489 	struct cm_id_private *cm_id_priv;
2490 	struct cm_rtu_msg *rtu_msg;
2491 	int ret;
2492 
2493 	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2494 	cm_id_priv = cm_acquire_id(
2495 		cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)),
2496 		cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg)));
2497 	if (!cm_id_priv)
2498 		return -EINVAL;
2499 
2500 	work->cm_event.private_data =
2501 		IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg);
2502 
2503 	spin_lock_irq(&cm_id_priv->lock);
2504 	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2505 	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2506 		spin_unlock_irq(&cm_id_priv->lock);
2507 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2508 				counter[CM_RTU_COUNTER]);
2509 		goto out;
2510 	}
2511 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
2512 
2513 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2514 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2515 	if (!ret)
2516 		list_add_tail(&work->list, &cm_id_priv->work_list);
2517 	spin_unlock_irq(&cm_id_priv->lock);
2518 
2519 	if (ret)
2520 		cm_process_work(cm_id_priv, work);
2521 	else
2522 		cm_deref_id(cm_id_priv);
2523 	return 0;
2524 out:
2525 	cm_deref_id(cm_id_priv);
2526 	return -EINVAL;
2527 }
2528 
2529 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2530 			  struct cm_id_private *cm_id_priv,
2531 			  const void *private_data,
2532 			  u8 private_data_len)
2533 {
2534 	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2535 			  cm_form_tid(cm_id_priv));
2536 	IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg,
2537 		be32_to_cpu(cm_id_priv->id.local_id));
2538 	IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg,
2539 		be32_to_cpu(cm_id_priv->id.remote_id));
2540 	IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg,
2541 		be32_to_cpu(cm_id_priv->remote_qpn));
2542 
2543 	if (private_data && private_data_len)
2544 		IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data,
2545 			    private_data_len);
2546 }
2547 
2548 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2549 		    const void *private_data,
2550 		    u8 private_data_len)
2551 {
2552 	struct cm_id_private *cm_id_priv;
2553 	struct ib_mad_send_buf *msg;
2554 	unsigned long flags;
2555 	int ret;
2556 
2557 	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2558 		return -EINVAL;
2559 
2560 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2561 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2562 	if (cm_id->state != IB_CM_ESTABLISHED) {
2563 		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2564 			 be32_to_cpu(cm_id->local_id), cm_id->state);
2565 		ret = -EINVAL;
2566 		goto out;
2567 	}
2568 
2569 	if (cm_id->lap_state == IB_CM_LAP_SENT ||
2570 	    cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2571 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2572 
2573 	ret = cm_alloc_msg(cm_id_priv, &msg);
2574 	if (ret) {
2575 		cm_enter_timewait(cm_id_priv);
2576 		goto out;
2577 	}
2578 
2579 	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2580 		       private_data, private_data_len);
2581 	msg->timeout_ms = cm_id_priv->timeout_ms;
2582 	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2583 
2584 	ret = ib_post_send_mad(msg, NULL);
2585 	if (ret) {
2586 		cm_enter_timewait(cm_id_priv);
2587 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2588 		cm_free_msg(msg);
2589 		return ret;
2590 	}
2591 
2592 	cm_id->state = IB_CM_DREQ_SENT;
2593 	cm_id_priv->msg = msg;
2594 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2595 	return ret;
2596 }
2597 EXPORT_SYMBOL(ib_send_cm_dreq);
2598 
2599 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2600 			  struct cm_id_private *cm_id_priv,
2601 			  const void *private_data,
2602 			  u8 private_data_len)
2603 {
2604 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2605 	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
2606 		be32_to_cpu(cm_id_priv->id.local_id));
2607 	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
2608 		be32_to_cpu(cm_id_priv->id.remote_id));
2609 
2610 	if (private_data && private_data_len)
2611 		IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data,
2612 			    private_data_len);
2613 }
2614 
2615 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2616 		    const void *private_data,
2617 		    u8 private_data_len)
2618 {
2619 	struct cm_id_private *cm_id_priv;
2620 	struct ib_mad_send_buf *msg;
2621 	unsigned long flags;
2622 	void *data;
2623 	int ret;
2624 
2625 	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2626 		return -EINVAL;
2627 
2628 	data = cm_copy_private_data(private_data, private_data_len);
2629 	if (IS_ERR(data))
2630 		return PTR_ERR(data);
2631 
2632 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2633 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2634 	if (cm_id->state != IB_CM_DREQ_RCVD) {
2635 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2636 		kfree(data);
2637 		pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
2638 			 __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
2639 		return -EINVAL;
2640 	}
2641 
2642 	cm_set_private_data(cm_id_priv, data, private_data_len);
2643 	cm_enter_timewait(cm_id_priv);
2644 
2645 	ret = cm_alloc_msg(cm_id_priv, &msg);
2646 	if (ret)
2647 		goto out;
2648 
2649 	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2650 		       private_data, private_data_len);
2651 
2652 	ret = ib_post_send_mad(msg, NULL);
2653 	if (ret) {
2654 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2655 		cm_free_msg(msg);
2656 		return ret;
2657 	}
2658 
2659 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2660 	return ret;
2661 }
2662 EXPORT_SYMBOL(ib_send_cm_drep);
2663 
2664 static int cm_issue_drep(struct cm_port *port,
2665 			 struct ib_mad_recv_wc *mad_recv_wc)
2666 {
2667 	struct ib_mad_send_buf *msg = NULL;
2668 	struct cm_dreq_msg *dreq_msg;
2669 	struct cm_drep_msg *drep_msg;
2670 	int ret;
2671 
2672 	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2673 	if (ret)
2674 		return ret;
2675 
2676 	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2677 	drep_msg = (struct cm_drep_msg *) msg->mad;
2678 
2679 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2680 	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
2681 		IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg));
2682 	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
2683 		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
2684 
2685 	ret = ib_post_send_mad(msg, NULL);
2686 	if (ret)
2687 		cm_free_msg(msg);
2688 
2689 	return ret;
2690 }
2691 
2692 static int cm_dreq_handler(struct cm_work *work)
2693 {
2694 	struct cm_id_private *cm_id_priv;
2695 	struct cm_dreq_msg *dreq_msg;
2696 	struct ib_mad_send_buf *msg = NULL;
2697 	int ret;
2698 
2699 	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2700 	cm_id_priv = cm_acquire_id(
2701 		cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
2702 		cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
2703 	if (!cm_id_priv) {
2704 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2705 				counter[CM_DREQ_COUNTER]);
2706 		cm_issue_drep(work->port, work->mad_recv_wc);
2707 		pr_debug(
2708 			"%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
2709 			__func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
2710 			IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
2711 		return -EINVAL;
2712 	}
2713 
2714 	work->cm_event.private_data =
2715 		IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg);
2716 
2717 	spin_lock_irq(&cm_id_priv->lock);
2718 	if (cm_id_priv->local_qpn !=
2719 	    cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg)))
2720 		goto unlock;
2721 
2722 	switch (cm_id_priv->id.state) {
2723 	case IB_CM_REP_SENT:
2724 	case IB_CM_DREQ_SENT:
2725 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2726 		break;
2727 	case IB_CM_ESTABLISHED:
2728 		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2729 		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2730 			ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2731 		break;
2732 	case IB_CM_MRA_REP_RCVD:
2733 		break;
2734 	case IB_CM_TIMEWAIT:
2735 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2736 				counter[CM_DREQ_COUNTER]);
2737 		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2738 		if (IS_ERR(msg))
2739 			goto unlock;
2740 
2741 		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2742 			       cm_id_priv->private_data,
2743 			       cm_id_priv->private_data_len);
2744 		spin_unlock_irq(&cm_id_priv->lock);
2745 
2746 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2747 		    ib_post_send_mad(msg, NULL))
2748 			cm_free_msg(msg);
2749 		goto deref;
2750 	case IB_CM_DREQ_RCVD:
2751 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2752 				counter[CM_DREQ_COUNTER]);
2753 		goto unlock;
2754 	default:
2755 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2756 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2757 			 cm_id_priv->id.state);
2758 		goto unlock;
2759 	}
2760 	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2761 	cm_id_priv->tid = dreq_msg->hdr.tid;
2762 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2763 	if (!ret)
2764 		list_add_tail(&work->list, &cm_id_priv->work_list);
2765 	spin_unlock_irq(&cm_id_priv->lock);
2766 
2767 	if (ret)
2768 		cm_process_work(cm_id_priv, work);
2769 	else
2770 		cm_deref_id(cm_id_priv);
2771 	return 0;
2772 
2773 unlock:	spin_unlock_irq(&cm_id_priv->lock);
2774 deref:	cm_deref_id(cm_id_priv);
2775 	return -EINVAL;
2776 }
2777 
2778 static int cm_drep_handler(struct cm_work *work)
2779 {
2780 	struct cm_id_private *cm_id_priv;
2781 	struct cm_drep_msg *drep_msg;
2782 	int ret;
2783 
2784 	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2785 	cm_id_priv = cm_acquire_id(
2786 		cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)),
2787 		cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg)));
2788 	if (!cm_id_priv)
2789 		return -EINVAL;
2790 
2791 	work->cm_event.private_data =
2792 		IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg);
2793 
2794 	spin_lock_irq(&cm_id_priv->lock);
2795 	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2796 	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2797 		spin_unlock_irq(&cm_id_priv->lock);
2798 		goto out;
2799 	}
2800 	cm_enter_timewait(cm_id_priv);
2801 
2802 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2803 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2804 	if (!ret)
2805 		list_add_tail(&work->list, &cm_id_priv->work_list);
2806 	spin_unlock_irq(&cm_id_priv->lock);
2807 
2808 	if (ret)
2809 		cm_process_work(cm_id_priv, work);
2810 	else
2811 		cm_deref_id(cm_id_priv);
2812 	return 0;
2813 out:
2814 	cm_deref_id(cm_id_priv);
2815 	return -EINVAL;
2816 }
2817 
2818 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2819 		   enum ib_cm_rej_reason reason,
2820 		   void *ari,
2821 		   u8 ari_length,
2822 		   const void *private_data,
2823 		   u8 private_data_len)
2824 {
2825 	struct cm_id_private *cm_id_priv;
2826 	struct ib_mad_send_buf *msg;
2827 	unsigned long flags;
2828 	int ret;
2829 
2830 	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2831 	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2832 		return -EINVAL;
2833 
2834 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2835 
2836 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2837 	switch (cm_id->state) {
2838 	case IB_CM_REQ_SENT:
2839 	case IB_CM_MRA_REQ_RCVD:
2840 	case IB_CM_REQ_RCVD:
2841 	case IB_CM_MRA_REQ_SENT:
2842 	case IB_CM_REP_RCVD:
2843 	case IB_CM_MRA_REP_SENT:
2844 		ret = cm_alloc_msg(cm_id_priv, &msg);
2845 		if (!ret)
2846 			cm_format_rej((struct cm_rej_msg *) msg->mad,
2847 				      cm_id_priv, reason, ari, ari_length,
2848 				      private_data, private_data_len);
2849 
2850 		cm_reset_to_idle(cm_id_priv);
2851 		break;
2852 	case IB_CM_REP_SENT:
2853 	case IB_CM_MRA_REP_RCVD:
2854 		ret = cm_alloc_msg(cm_id_priv, &msg);
2855 		if (!ret)
2856 			cm_format_rej((struct cm_rej_msg *) msg->mad,
2857 				      cm_id_priv, reason, ari, ari_length,
2858 				      private_data, private_data_len);
2859 
2860 		cm_enter_timewait(cm_id_priv);
2861 		break;
2862 	default:
2863 		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2864 			 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2865 		ret = -EINVAL;
2866 		goto out;
2867 	}
2868 
2869 	if (ret)
2870 		goto out;
2871 
2872 	ret = ib_post_send_mad(msg, NULL);
2873 	if (ret)
2874 		cm_free_msg(msg);
2875 
2876 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2877 	return ret;
2878 }
2879 EXPORT_SYMBOL(ib_send_cm_rej);
2880 
2881 static void cm_format_rej_event(struct cm_work *work)
2882 {
2883 	struct cm_rej_msg *rej_msg;
2884 	struct ib_cm_rej_event_param *param;
2885 
2886 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2887 	param = &work->cm_event.param.rej_rcvd;
2888 	param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg);
2889 	param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg);
2890 	param->reason = IBA_GET(CM_REJ_REASON, rej_msg);
2891 	work->cm_event.private_data =
2892 		IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
2893 }
2894 
2895 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2896 {
2897 	struct cm_timewait_info *timewait_info;
2898 	struct cm_id_private *cm_id_priv;
2899 	__be32 remote_id;
2900 
2901 	remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
2902 
2903 	if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
2904 		spin_lock_irq(&cm.lock);
2905 		timewait_info = cm_find_remote_id(
2906 			*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
2907 			remote_id);
2908 		if (!timewait_info) {
2909 			spin_unlock_irq(&cm.lock);
2910 			return NULL;
2911 		}
2912 		cm_id_priv =
2913 			cm_acquire_id(timewait_info->work.local_id, remote_id);
2914 		spin_unlock_irq(&cm.lock);
2915 	} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
2916 		   CM_MSG_RESPONSE_REQ)
2917 		cm_id_priv = cm_acquire_id(
2918 			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
2919 			0);
2920 	else
2921 		cm_id_priv = cm_acquire_id(
2922 			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
2923 			remote_id);
2924 
2925 	return cm_id_priv;
2926 }
2927 
2928 static int cm_rej_handler(struct cm_work *work)
2929 {
2930 	struct cm_id_private *cm_id_priv;
2931 	struct cm_rej_msg *rej_msg;
2932 	int ret;
2933 
2934 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2935 	cm_id_priv = cm_acquire_rejected_id(rej_msg);
2936 	if (!cm_id_priv)
2937 		return -EINVAL;
2938 
2939 	cm_format_rej_event(work);
2940 
2941 	spin_lock_irq(&cm_id_priv->lock);
2942 	switch (cm_id_priv->id.state) {
2943 	case IB_CM_REQ_SENT:
2944 	case IB_CM_MRA_REQ_RCVD:
2945 	case IB_CM_REP_SENT:
2946 	case IB_CM_MRA_REP_RCVD:
2947 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2948 		/* fall through */
2949 	case IB_CM_REQ_RCVD:
2950 	case IB_CM_MRA_REQ_SENT:
2951 		if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
2952 			cm_enter_timewait(cm_id_priv);
2953 		else
2954 			cm_reset_to_idle(cm_id_priv);
2955 		break;
2956 	case IB_CM_DREQ_SENT:
2957 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2958 		/* fall through */
2959 	case IB_CM_REP_RCVD:
2960 	case IB_CM_MRA_REP_SENT:
2961 		cm_enter_timewait(cm_id_priv);
2962 		break;
2963 	case IB_CM_ESTABLISHED:
2964 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2965 		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2966 			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2967 				ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2968 					      cm_id_priv->msg);
2969 			cm_enter_timewait(cm_id_priv);
2970 			break;
2971 		}
2972 		/* fall through */
2973 	default:
2974 		spin_unlock_irq(&cm_id_priv->lock);
2975 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2976 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2977 			 cm_id_priv->id.state);
2978 		ret = -EINVAL;
2979 		goto out;
2980 	}
2981 
2982 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2983 	if (!ret)
2984 		list_add_tail(&work->list, &cm_id_priv->work_list);
2985 	spin_unlock_irq(&cm_id_priv->lock);
2986 
2987 	if (ret)
2988 		cm_process_work(cm_id_priv, work);
2989 	else
2990 		cm_deref_id(cm_id_priv);
2991 	return 0;
2992 out:
2993 	cm_deref_id(cm_id_priv);
2994 	return -EINVAL;
2995 }
2996 
2997 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2998 		   u8 service_timeout,
2999 		   const void *private_data,
3000 		   u8 private_data_len)
3001 {
3002 	struct cm_id_private *cm_id_priv;
3003 	struct ib_mad_send_buf *msg;
3004 	enum ib_cm_state cm_state;
3005 	enum ib_cm_lap_state lap_state;
3006 	enum cm_msg_response msg_response;
3007 	void *data;
3008 	unsigned long flags;
3009 	int ret;
3010 
3011 	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
3012 		return -EINVAL;
3013 
3014 	data = cm_copy_private_data(private_data, private_data_len);
3015 	if (IS_ERR(data))
3016 		return PTR_ERR(data);
3017 
3018 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3019 
3020 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3021 	switch(cm_id_priv->id.state) {
3022 	case IB_CM_REQ_RCVD:
3023 		cm_state = IB_CM_MRA_REQ_SENT;
3024 		lap_state = cm_id->lap_state;
3025 		msg_response = CM_MSG_RESPONSE_REQ;
3026 		break;
3027 	case IB_CM_REP_RCVD:
3028 		cm_state = IB_CM_MRA_REP_SENT;
3029 		lap_state = cm_id->lap_state;
3030 		msg_response = CM_MSG_RESPONSE_REP;
3031 		break;
3032 	case IB_CM_ESTABLISHED:
3033 		if (cm_id->lap_state == IB_CM_LAP_RCVD) {
3034 			cm_state = cm_id->state;
3035 			lap_state = IB_CM_MRA_LAP_SENT;
3036 			msg_response = CM_MSG_RESPONSE_OTHER;
3037 			break;
3038 		}
3039 		/* fall through */
3040 	default:
3041 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
3042 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
3043 			 cm_id_priv->id.state);
3044 		ret = -EINVAL;
3045 		goto error1;
3046 	}
3047 
3048 	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
3049 		ret = cm_alloc_msg(cm_id_priv, &msg);
3050 		if (ret)
3051 			goto error1;
3052 
3053 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3054 			      msg_response, service_timeout,
3055 			      private_data, private_data_len);
3056 		ret = ib_post_send_mad(msg, NULL);
3057 		if (ret)
3058 			goto error2;
3059 	}
3060 
3061 	cm_id->state = cm_state;
3062 	cm_id->lap_state = lap_state;
3063 	cm_id_priv->service_timeout = service_timeout;
3064 	cm_set_private_data(cm_id_priv, data, private_data_len);
3065 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3066 	return 0;
3067 
3068 error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3069 	kfree(data);
3070 	return ret;
3071 
3072 error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3073 	kfree(data);
3074 	cm_free_msg(msg);
3075 	return ret;
3076 }
3077 EXPORT_SYMBOL(ib_send_cm_mra);
3078 
3079 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
3080 {
3081 	switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
3082 	case CM_MSG_RESPONSE_REQ:
3083 		return cm_acquire_id(
3084 			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
3085 			0);
3086 	case CM_MSG_RESPONSE_REP:
3087 	case CM_MSG_RESPONSE_OTHER:
3088 		return cm_acquire_id(
3089 			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
3090 			cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg)));
3091 	default:
3092 		return NULL;
3093 	}
3094 }
3095 
3096 static int cm_mra_handler(struct cm_work *work)
3097 {
3098 	struct cm_id_private *cm_id_priv;
3099 	struct cm_mra_msg *mra_msg;
3100 	int timeout, ret;
3101 
3102 	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
3103 	cm_id_priv = cm_acquire_mraed_id(mra_msg);
3104 	if (!cm_id_priv)
3105 		return -EINVAL;
3106 
3107 	work->cm_event.private_data =
3108 		IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg);
3109 	work->cm_event.param.mra_rcvd.service_timeout =
3110 		IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg);
3111 	timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) +
3112 		  cm_convert_to_ms(cm_id_priv->av.timeout);
3113 
3114 	spin_lock_irq(&cm_id_priv->lock);
3115 	switch (cm_id_priv->id.state) {
3116 	case IB_CM_REQ_SENT:
3117 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
3118 			    CM_MSG_RESPONSE_REQ ||
3119 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
3120 				  cm_id_priv->msg, timeout))
3121 			goto out;
3122 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
3123 		break;
3124 	case IB_CM_REP_SENT:
3125 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
3126 			    CM_MSG_RESPONSE_REP ||
3127 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
3128 				  cm_id_priv->msg, timeout))
3129 			goto out;
3130 		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
3131 		break;
3132 	case IB_CM_ESTABLISHED:
3133 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
3134 			    CM_MSG_RESPONSE_OTHER ||
3135 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
3136 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
3137 				  cm_id_priv->msg, timeout)) {
3138 			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
3139 				atomic_long_inc(&work->port->
3140 						counter_group[CM_RECV_DUPLICATES].
3141 						counter[CM_MRA_COUNTER]);
3142 			goto out;
3143 		}
3144 		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
3145 		break;
3146 	case IB_CM_MRA_REQ_RCVD:
3147 	case IB_CM_MRA_REP_RCVD:
3148 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3149 				counter[CM_MRA_COUNTER]);
3150 		/* fall through */
3151 	default:
3152 		pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
3153 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
3154 			 cm_id_priv->id.state);
3155 		goto out;
3156 	}
3157 
3158 	cm_id_priv->msg->context[1] = (void *) (unsigned long)
3159 				      cm_id_priv->id.state;
3160 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3161 	if (!ret)
3162 		list_add_tail(&work->list, &cm_id_priv->work_list);
3163 	spin_unlock_irq(&cm_id_priv->lock);
3164 
3165 	if (ret)
3166 		cm_process_work(cm_id_priv, work);
3167 	else
3168 		cm_deref_id(cm_id_priv);
3169 	return 0;
3170 out:
3171 	spin_unlock_irq(&cm_id_priv->lock);
3172 	cm_deref_id(cm_id_priv);
3173 	return -EINVAL;
3174 }
3175 
3176 static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
3177 					struct sa_path_rec *path)
3178 {
3179 	u32 lid;
3180 
3181 	if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
3182 		sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID,
3183 					       lap_msg));
3184 		sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID,
3185 					       lap_msg));
3186 	} else {
3187 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
3188 			CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg));
3189 		sa_path_set_dlid(path, lid);
3190 
3191 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
3192 			CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg));
3193 		sa_path_set_slid(path, lid);
3194 	}
3195 }
3196 
3197 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
3198 				    struct sa_path_rec *path,
3199 				    struct cm_lap_msg *lap_msg)
3200 {
3201 	path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg);
3202 	path->sgid =
3203 		*IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg);
3204 	path->flow_label =
3205 		cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg));
3206 	path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg);
3207 	path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg);
3208 	path->reversible = 1;
3209 	path->pkey = cm_id_priv->pkey;
3210 	path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg);
3211 	path->mtu_selector = IB_SA_EQ;
3212 	path->mtu = cm_id_priv->path_mtu;
3213 	path->rate_selector = IB_SA_EQ;
3214 	path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg);
3215 	path->packet_life_time_selector = IB_SA_EQ;
3216 	path->packet_life_time =
3217 		IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg);
3218 	path->packet_life_time -= (path->packet_life_time > 0);
3219 	cm_format_path_lid_from_lap(lap_msg, path);
3220 }
3221 
3222 static int cm_lap_handler(struct cm_work *work)
3223 {
3224 	struct cm_id_private *cm_id_priv;
3225 	struct cm_lap_msg *lap_msg;
3226 	struct ib_cm_lap_event_param *param;
3227 	struct ib_mad_send_buf *msg = NULL;
3228 	int ret;
3229 
3230 	/* Currently Alternate path messages are not supported for
3231 	 * RoCE link layer.
3232 	 */
3233 	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3234 			       work->port->port_num))
3235 		return -EINVAL;
3236 
3237 	/* todo: verify LAP request and send reject APR if invalid. */
3238 	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
3239 	cm_id_priv = cm_acquire_id(
3240 		cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)),
3241 		cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg)));
3242 	if (!cm_id_priv)
3243 		return -EINVAL;
3244 
3245 	param = &work->cm_event.param.lap_rcvd;
3246 	memset(&work->path[0], 0, sizeof(work->path[1]));
3247 	cm_path_set_rec_type(work->port->cm_dev->ib_device,
3248 			     work->port->port_num, &work->path[0],
3249 			     IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID,
3250 					     lap_msg));
3251 	param->alternate_path = &work->path[0];
3252 	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
3253 	work->cm_event.private_data =
3254 		IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
3255 
3256 	spin_lock_irq(&cm_id_priv->lock);
3257 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
3258 		goto unlock;
3259 
3260 	switch (cm_id_priv->id.lap_state) {
3261 	case IB_CM_LAP_UNINIT:
3262 	case IB_CM_LAP_IDLE:
3263 		break;
3264 	case IB_CM_MRA_LAP_SENT:
3265 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3266 				counter[CM_LAP_COUNTER]);
3267 		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
3268 		if (IS_ERR(msg))
3269 			goto unlock;
3270 
3271 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3272 			      CM_MSG_RESPONSE_OTHER,
3273 			      cm_id_priv->service_timeout,
3274 			      cm_id_priv->private_data,
3275 			      cm_id_priv->private_data_len);
3276 		spin_unlock_irq(&cm_id_priv->lock);
3277 
3278 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
3279 		    ib_post_send_mad(msg, NULL))
3280 			cm_free_msg(msg);
3281 		goto deref;
3282 	case IB_CM_LAP_RCVD:
3283 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3284 				counter[CM_LAP_COUNTER]);
3285 		goto unlock;
3286 	default:
3287 		goto unlock;
3288 	}
3289 
3290 	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
3291 				 work->mad_recv_wc->recv_buf.grh,
3292 				 &cm_id_priv->av);
3293 	if (ret)
3294 		goto unlock;
3295 
3296 	ret = cm_init_av_by_path(param->alternate_path, NULL,
3297 				 &cm_id_priv->alt_av, cm_id_priv);
3298 	if (ret)
3299 		goto unlock;
3300 
3301 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3302 	cm_id_priv->tid = lap_msg->hdr.tid;
3303 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3304 	if (!ret)
3305 		list_add_tail(&work->list, &cm_id_priv->work_list);
3306 	spin_unlock_irq(&cm_id_priv->lock);
3307 
3308 	if (ret)
3309 		cm_process_work(cm_id_priv, work);
3310 	else
3311 		cm_deref_id(cm_id_priv);
3312 	return 0;
3313 
3314 unlock:	spin_unlock_irq(&cm_id_priv->lock);
3315 deref:	cm_deref_id(cm_id_priv);
3316 	return -EINVAL;
3317 }
3318 
3319 static int cm_apr_handler(struct cm_work *work)
3320 {
3321 	struct cm_id_private *cm_id_priv;
3322 	struct cm_apr_msg *apr_msg;
3323 	int ret;
3324 
3325 	/* Currently Alternate path messages are not supported for
3326 	 * RoCE link layer.
3327 	 */
3328 	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3329 			       work->port->port_num))
3330 		return -EINVAL;
3331 
3332 	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3333 	cm_id_priv = cm_acquire_id(
3334 		cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)),
3335 		cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg)));
3336 	if (!cm_id_priv)
3337 		return -EINVAL; /* Unmatched reply. */
3338 
3339 	work->cm_event.param.apr_rcvd.ap_status =
3340 		IBA_GET(CM_APR_AR_STATUS, apr_msg);
3341 	work->cm_event.param.apr_rcvd.apr_info =
3342 		IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg);
3343 	work->cm_event.param.apr_rcvd.info_len =
3344 		IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg);
3345 	work->cm_event.private_data =
3346 		IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg);
3347 
3348 	spin_lock_irq(&cm_id_priv->lock);
3349 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3350 	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3351 	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3352 		spin_unlock_irq(&cm_id_priv->lock);
3353 		goto out;
3354 	}
3355 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3356 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3357 	cm_id_priv->msg = NULL;
3358 
3359 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3360 	if (!ret)
3361 		list_add_tail(&work->list, &cm_id_priv->work_list);
3362 	spin_unlock_irq(&cm_id_priv->lock);
3363 
3364 	if (ret)
3365 		cm_process_work(cm_id_priv, work);
3366 	else
3367 		cm_deref_id(cm_id_priv);
3368 	return 0;
3369 out:
3370 	cm_deref_id(cm_id_priv);
3371 	return -EINVAL;
3372 }
3373 
3374 static int cm_timewait_handler(struct cm_work *work)
3375 {
3376 	struct cm_timewait_info *timewait_info;
3377 	struct cm_id_private *cm_id_priv;
3378 	int ret;
3379 
3380 	timewait_info = container_of(work, struct cm_timewait_info, work);
3381 	spin_lock_irq(&cm.lock);
3382 	list_del(&timewait_info->list);
3383 	spin_unlock_irq(&cm.lock);
3384 
3385 	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3386 				   timewait_info->work.remote_id);
3387 	if (!cm_id_priv)
3388 		return -EINVAL;
3389 
3390 	spin_lock_irq(&cm_id_priv->lock);
3391 	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3392 	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3393 		spin_unlock_irq(&cm_id_priv->lock);
3394 		goto out;
3395 	}
3396 	cm_id_priv->id.state = IB_CM_IDLE;
3397 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3398 	if (!ret)
3399 		list_add_tail(&work->list, &cm_id_priv->work_list);
3400 	spin_unlock_irq(&cm_id_priv->lock);
3401 
3402 	if (ret)
3403 		cm_process_work(cm_id_priv, work);
3404 	else
3405 		cm_deref_id(cm_id_priv);
3406 	return 0;
3407 out:
3408 	cm_deref_id(cm_id_priv);
3409 	return -EINVAL;
3410 }
3411 
3412 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3413 			       struct cm_id_private *cm_id_priv,
3414 			       struct ib_cm_sidr_req_param *param)
3415 {
3416 	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3417 			  cm_form_tid(cm_id_priv));
3418 	IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg,
3419 		be32_to_cpu(cm_id_priv->id.local_id));
3420 	IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg,
3421 		be16_to_cpu(param->path->pkey));
3422 	IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg,
3423 		be64_to_cpu(param->service_id));
3424 
3425 	if (param->private_data && param->private_data_len)
3426 		IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg,
3427 			    param->private_data, param->private_data_len);
3428 }
3429 
3430 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3431 			struct ib_cm_sidr_req_param *param)
3432 {
3433 	struct cm_id_private *cm_id_priv;
3434 	struct ib_mad_send_buf *msg;
3435 	unsigned long flags;
3436 	int ret;
3437 
3438 	if (!param->path || (param->private_data &&
3439 	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3440 		return -EINVAL;
3441 
3442 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3443 	ret = cm_init_av_by_path(param->path, param->sgid_attr,
3444 				 &cm_id_priv->av,
3445 				 cm_id_priv);
3446 	if (ret)
3447 		goto out;
3448 
3449 	cm_id->service_id = param->service_id;
3450 	cm_id->service_mask = ~cpu_to_be64(0);
3451 	cm_id_priv->timeout_ms = param->timeout_ms;
3452 	cm_id_priv->max_cm_retries = param->max_cm_retries;
3453 	ret = cm_alloc_msg(cm_id_priv, &msg);
3454 	if (ret)
3455 		goto out;
3456 
3457 	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3458 			   param);
3459 	msg->timeout_ms = cm_id_priv->timeout_ms;
3460 	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3461 
3462 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3463 	if (cm_id->state == IB_CM_IDLE)
3464 		ret = ib_post_send_mad(msg, NULL);
3465 	else
3466 		ret = -EINVAL;
3467 
3468 	if (ret) {
3469 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3470 		cm_free_msg(msg);
3471 		goto out;
3472 	}
3473 	cm_id->state = IB_CM_SIDR_REQ_SENT;
3474 	cm_id_priv->msg = msg;
3475 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3476 out:
3477 	return ret;
3478 }
3479 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3480 
3481 static void cm_format_sidr_req_event(struct cm_work *work,
3482 				     const struct cm_id_private *rx_cm_id,
3483 				     struct ib_cm_id *listen_id)
3484 {
3485 	struct cm_sidr_req_msg *sidr_req_msg;
3486 	struct ib_cm_sidr_req_event_param *param;
3487 
3488 	sidr_req_msg = (struct cm_sidr_req_msg *)
3489 				work->mad_recv_wc->recv_buf.mad;
3490 	param = &work->cm_event.param.sidr_req_rcvd;
3491 	param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg);
3492 	param->listen_id = listen_id;
3493 	param->service_id =
3494 		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
3495 	param->bth_pkey = cm_get_bth_pkey(work);
3496 	param->port = work->port->port_num;
3497 	param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
3498 	work->cm_event.private_data =
3499 		IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg);
3500 }
3501 
3502 static int cm_sidr_req_handler(struct cm_work *work)
3503 {
3504 	struct ib_cm_id *cm_id;
3505 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3506 	struct cm_sidr_req_msg *sidr_req_msg;
3507 	struct ib_wc *wc;
3508 	int ret;
3509 
3510 	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3511 	if (IS_ERR(cm_id))
3512 		return PTR_ERR(cm_id);
3513 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3514 
3515 	/* Record SGID/SLID and request ID for lookup. */
3516 	sidr_req_msg = (struct cm_sidr_req_msg *)
3517 				work->mad_recv_wc->recv_buf.mad;
3518 	wc = work->mad_recv_wc->wc;
3519 	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3520 	cm_id_priv->av.dgid.global.interface_id = 0;
3521 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3522 				      work->mad_recv_wc->recv_buf.grh,
3523 				      &cm_id_priv->av);
3524 	if (ret)
3525 		goto out;
3526 
3527 	cm_id_priv->id.remote_id =
3528 		cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
3529 	cm_id_priv->tid = sidr_req_msg->hdr.tid;
3530 	atomic_inc(&cm_id_priv->work_count);
3531 
3532 	spin_lock_irq(&cm.lock);
3533 	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3534 	if (cur_cm_id_priv) {
3535 		spin_unlock_irq(&cm.lock);
3536 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3537 				counter[CM_SIDR_REQ_COUNTER]);
3538 		goto out; /* Duplicate message. */
3539 	}
3540 	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3541 	cur_cm_id_priv = cm_find_listen(
3542 		cm_id->device,
3543 		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
3544 	if (!cur_cm_id_priv) {
3545 		spin_unlock_irq(&cm.lock);
3546 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3547 		goto out; /* No match. */
3548 	}
3549 	refcount_inc(&cur_cm_id_priv->refcount);
3550 	refcount_inc(&cm_id_priv->refcount);
3551 	spin_unlock_irq(&cm.lock);
3552 
3553 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3554 	cm_id_priv->id.context = cur_cm_id_priv->id.context;
3555 	cm_id_priv->id.service_id =
3556 		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
3557 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3558 
3559 	cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3560 	cm_process_work(cm_id_priv, work);
3561 	cm_deref_id(cur_cm_id_priv);
3562 	return 0;
3563 out:
3564 	ib_destroy_cm_id(&cm_id_priv->id);
3565 	return -EINVAL;
3566 }
3567 
3568 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3569 			       struct cm_id_private *cm_id_priv,
3570 			       struct ib_cm_sidr_rep_param *param)
3571 {
3572 	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3573 			  cm_id_priv->tid);
3574 	IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
3575 		be32_to_cpu(cm_id_priv->id.remote_id));
3576 	IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
3577 	IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num);
3578 	IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
3579 		be64_to_cpu(cm_id_priv->id.service_id));
3580 	IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
3581 
3582 	if (param->info && param->info_length)
3583 		IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
3584 			    param->info, param->info_length);
3585 
3586 	if (param->private_data && param->private_data_len)
3587 		IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg,
3588 			    param->private_data, param->private_data_len);
3589 }
3590 
3591 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3592 			struct ib_cm_sidr_rep_param *param)
3593 {
3594 	struct cm_id_private *cm_id_priv;
3595 	struct ib_mad_send_buf *msg;
3596 	unsigned long flags;
3597 	int ret;
3598 
3599 	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3600 	    (param->private_data &&
3601 	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3602 		return -EINVAL;
3603 
3604 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3605 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3606 	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3607 		ret = -EINVAL;
3608 		goto error;
3609 	}
3610 
3611 	ret = cm_alloc_msg(cm_id_priv, &msg);
3612 	if (ret)
3613 		goto error;
3614 
3615 	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3616 			   param);
3617 	ret = ib_post_send_mad(msg, NULL);
3618 	if (ret) {
3619 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3620 		cm_free_msg(msg);
3621 		return ret;
3622 	}
3623 	cm_id->state = IB_CM_IDLE;
3624 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3625 
3626 	spin_lock_irqsave(&cm.lock, flags);
3627 	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3628 		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3629 		RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3630 	}
3631 	spin_unlock_irqrestore(&cm.lock, flags);
3632 	return 0;
3633 
3634 error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3635 	return ret;
3636 }
3637 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3638 
3639 static void cm_format_sidr_rep_event(struct cm_work *work,
3640 				     const struct cm_id_private *cm_id_priv)
3641 {
3642 	struct cm_sidr_rep_msg *sidr_rep_msg;
3643 	struct ib_cm_sidr_rep_event_param *param;
3644 
3645 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3646 				work->mad_recv_wc->recv_buf.mad;
3647 	param = &work->cm_event.param.sidr_rep_rcvd;
3648 	param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg);
3649 	param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg);
3650 	param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg);
3651 	param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION,
3652 				      sidr_rep_msg);
3653 	param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH,
3654 				  sidr_rep_msg);
3655 	param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
3656 	work->cm_event.private_data =
3657 		IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg);
3658 }
3659 
3660 static int cm_sidr_rep_handler(struct cm_work *work)
3661 {
3662 	struct cm_sidr_rep_msg *sidr_rep_msg;
3663 	struct cm_id_private *cm_id_priv;
3664 
3665 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3666 				work->mad_recv_wc->recv_buf.mad;
3667 	cm_id_priv = cm_acquire_id(
3668 		cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
3669 	if (!cm_id_priv)
3670 		return -EINVAL; /* Unmatched reply. */
3671 
3672 	spin_lock_irq(&cm_id_priv->lock);
3673 	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3674 		spin_unlock_irq(&cm_id_priv->lock);
3675 		goto out;
3676 	}
3677 	cm_id_priv->id.state = IB_CM_IDLE;
3678 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3679 	spin_unlock_irq(&cm_id_priv->lock);
3680 
3681 	cm_format_sidr_rep_event(work, cm_id_priv);
3682 	cm_process_work(cm_id_priv, work);
3683 	return 0;
3684 out:
3685 	cm_deref_id(cm_id_priv);
3686 	return -EINVAL;
3687 }
3688 
3689 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3690 				  enum ib_wc_status wc_status)
3691 {
3692 	struct cm_id_private *cm_id_priv;
3693 	struct ib_cm_event cm_event;
3694 	enum ib_cm_state state;
3695 	int ret;
3696 
3697 	memset(&cm_event, 0, sizeof cm_event);
3698 	cm_id_priv = msg->context[0];
3699 
3700 	/* Discard old sends or ones without a response. */
3701 	spin_lock_irq(&cm_id_priv->lock);
3702 	state = (enum ib_cm_state) (unsigned long) msg->context[1];
3703 	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3704 		goto discard;
3705 
3706 	pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
3707 			     state, ib_wc_status_msg(wc_status));
3708 	switch (state) {
3709 	case IB_CM_REQ_SENT:
3710 	case IB_CM_MRA_REQ_RCVD:
3711 		cm_reset_to_idle(cm_id_priv);
3712 		cm_event.event = IB_CM_REQ_ERROR;
3713 		break;
3714 	case IB_CM_REP_SENT:
3715 	case IB_CM_MRA_REP_RCVD:
3716 		cm_reset_to_idle(cm_id_priv);
3717 		cm_event.event = IB_CM_REP_ERROR;
3718 		break;
3719 	case IB_CM_DREQ_SENT:
3720 		cm_enter_timewait(cm_id_priv);
3721 		cm_event.event = IB_CM_DREQ_ERROR;
3722 		break;
3723 	case IB_CM_SIDR_REQ_SENT:
3724 		cm_id_priv->id.state = IB_CM_IDLE;
3725 		cm_event.event = IB_CM_SIDR_REQ_ERROR;
3726 		break;
3727 	default:
3728 		goto discard;
3729 	}
3730 	spin_unlock_irq(&cm_id_priv->lock);
3731 	cm_event.param.send_status = wc_status;
3732 
3733 	/* No other events can occur on the cm_id at this point. */
3734 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3735 	cm_free_msg(msg);
3736 	if (ret)
3737 		ib_destroy_cm_id(&cm_id_priv->id);
3738 	return;
3739 discard:
3740 	spin_unlock_irq(&cm_id_priv->lock);
3741 	cm_free_msg(msg);
3742 }
3743 
3744 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3745 			    struct ib_mad_send_wc *mad_send_wc)
3746 {
3747 	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3748 	struct cm_port *port;
3749 	u16 attr_index;
3750 
3751 	port = mad_agent->context;
3752 	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3753 				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3754 
3755 	/*
3756 	 * If the send was in response to a received message (context[0] is not
3757 	 * set to a cm_id), and is not a REJ, then it is a send that was
3758 	 * manually retried.
3759 	 */
3760 	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3761 		msg->retries = 1;
3762 
3763 	atomic_long_add(1 + msg->retries,
3764 			&port->counter_group[CM_XMIT].counter[attr_index]);
3765 	if (msg->retries)
3766 		atomic_long_add(msg->retries,
3767 				&port->counter_group[CM_XMIT_RETRIES].
3768 				counter[attr_index]);
3769 
3770 	switch (mad_send_wc->status) {
3771 	case IB_WC_SUCCESS:
3772 	case IB_WC_WR_FLUSH_ERR:
3773 		cm_free_msg(msg);
3774 		break;
3775 	default:
3776 		if (msg->context[0] && msg->context[1])
3777 			cm_process_send_error(msg, mad_send_wc->status);
3778 		else
3779 			cm_free_msg(msg);
3780 		break;
3781 	}
3782 }
3783 
3784 static void cm_work_handler(struct work_struct *_work)
3785 {
3786 	struct cm_work *work = container_of(_work, struct cm_work, work.work);
3787 	int ret;
3788 
3789 	switch (work->cm_event.event) {
3790 	case IB_CM_REQ_RECEIVED:
3791 		ret = cm_req_handler(work);
3792 		break;
3793 	case IB_CM_MRA_RECEIVED:
3794 		ret = cm_mra_handler(work);
3795 		break;
3796 	case IB_CM_REJ_RECEIVED:
3797 		ret = cm_rej_handler(work);
3798 		break;
3799 	case IB_CM_REP_RECEIVED:
3800 		ret = cm_rep_handler(work);
3801 		break;
3802 	case IB_CM_RTU_RECEIVED:
3803 		ret = cm_rtu_handler(work);
3804 		break;
3805 	case IB_CM_USER_ESTABLISHED:
3806 		ret = cm_establish_handler(work);
3807 		break;
3808 	case IB_CM_DREQ_RECEIVED:
3809 		ret = cm_dreq_handler(work);
3810 		break;
3811 	case IB_CM_DREP_RECEIVED:
3812 		ret = cm_drep_handler(work);
3813 		break;
3814 	case IB_CM_SIDR_REQ_RECEIVED:
3815 		ret = cm_sidr_req_handler(work);
3816 		break;
3817 	case IB_CM_SIDR_REP_RECEIVED:
3818 		ret = cm_sidr_rep_handler(work);
3819 		break;
3820 	case IB_CM_LAP_RECEIVED:
3821 		ret = cm_lap_handler(work);
3822 		break;
3823 	case IB_CM_APR_RECEIVED:
3824 		ret = cm_apr_handler(work);
3825 		break;
3826 	case IB_CM_TIMEWAIT_EXIT:
3827 		ret = cm_timewait_handler(work);
3828 		break;
3829 	default:
3830 		pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
3831 		ret = -EINVAL;
3832 		break;
3833 	}
3834 	if (ret)
3835 		cm_free_work(work);
3836 }
3837 
3838 static int cm_establish(struct ib_cm_id *cm_id)
3839 {
3840 	struct cm_id_private *cm_id_priv;
3841 	struct cm_work *work;
3842 	unsigned long flags;
3843 	int ret = 0;
3844 	struct cm_device *cm_dev;
3845 
3846 	cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3847 	if (!cm_dev)
3848 		return -ENODEV;
3849 
3850 	work = kmalloc(sizeof *work, GFP_ATOMIC);
3851 	if (!work)
3852 		return -ENOMEM;
3853 
3854 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3855 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3856 	switch (cm_id->state)
3857 	{
3858 	case IB_CM_REP_SENT:
3859 	case IB_CM_MRA_REP_RCVD:
3860 		cm_id->state = IB_CM_ESTABLISHED;
3861 		break;
3862 	case IB_CM_ESTABLISHED:
3863 		ret = -EISCONN;
3864 		break;
3865 	default:
3866 		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
3867 			 be32_to_cpu(cm_id->local_id), cm_id->state);
3868 		ret = -EINVAL;
3869 		break;
3870 	}
3871 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3872 
3873 	if (ret) {
3874 		kfree(work);
3875 		goto out;
3876 	}
3877 
3878 	/*
3879 	 * The CM worker thread may try to destroy the cm_id before it
3880 	 * can execute this work item.  To prevent potential deadlock,
3881 	 * we need to find the cm_id once we're in the context of the
3882 	 * worker thread, rather than holding a reference on it.
3883 	 */
3884 	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3885 	work->local_id = cm_id->local_id;
3886 	work->remote_id = cm_id->remote_id;
3887 	work->mad_recv_wc = NULL;
3888 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
3889 
3890 	/* Check if the device started its remove_one */
3891 	spin_lock_irqsave(&cm.lock, flags);
3892 	if (!cm_dev->going_down) {
3893 		queue_delayed_work(cm.wq, &work->work, 0);
3894 	} else {
3895 		kfree(work);
3896 		ret = -ENODEV;
3897 	}
3898 	spin_unlock_irqrestore(&cm.lock, flags);
3899 
3900 out:
3901 	return ret;
3902 }
3903 
3904 static int cm_migrate(struct ib_cm_id *cm_id)
3905 {
3906 	struct cm_id_private *cm_id_priv;
3907 	struct cm_av tmp_av;
3908 	unsigned long flags;
3909 	int tmp_send_port_not_ready;
3910 	int ret = 0;
3911 
3912 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3913 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3914 	if (cm_id->state == IB_CM_ESTABLISHED &&
3915 	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3916 	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
3917 		cm_id->lap_state = IB_CM_LAP_IDLE;
3918 		/* Swap address vector */
3919 		tmp_av = cm_id_priv->av;
3920 		cm_id_priv->av = cm_id_priv->alt_av;
3921 		cm_id_priv->alt_av = tmp_av;
3922 		/* Swap port send ready state */
3923 		tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3924 		cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3925 		cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3926 	} else
3927 		ret = -EINVAL;
3928 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3929 
3930 	return ret;
3931 }
3932 
3933 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3934 {
3935 	int ret;
3936 
3937 	switch (event) {
3938 	case IB_EVENT_COMM_EST:
3939 		ret = cm_establish(cm_id);
3940 		break;
3941 	case IB_EVENT_PATH_MIG:
3942 		ret = cm_migrate(cm_id);
3943 		break;
3944 	default:
3945 		ret = -EINVAL;
3946 	}
3947 	return ret;
3948 }
3949 EXPORT_SYMBOL(ib_cm_notify);
3950 
3951 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3952 			    struct ib_mad_send_buf *send_buf,
3953 			    struct ib_mad_recv_wc *mad_recv_wc)
3954 {
3955 	struct cm_port *port = mad_agent->context;
3956 	struct cm_work *work;
3957 	enum ib_cm_event_type event;
3958 	bool alt_path = false;
3959 	u16 attr_id;
3960 	int paths = 0;
3961 	int going_down = 0;
3962 
3963 	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3964 	case CM_REQ_ATTR_ID:
3965 		alt_path = cm_req_has_alt_path((struct cm_req_msg *)
3966 						mad_recv_wc->recv_buf.mad);
3967 		paths = 1 + (alt_path != 0);
3968 		event = IB_CM_REQ_RECEIVED;
3969 		break;
3970 	case CM_MRA_ATTR_ID:
3971 		event = IB_CM_MRA_RECEIVED;
3972 		break;
3973 	case CM_REJ_ATTR_ID:
3974 		event = IB_CM_REJ_RECEIVED;
3975 		break;
3976 	case CM_REP_ATTR_ID:
3977 		event = IB_CM_REP_RECEIVED;
3978 		break;
3979 	case CM_RTU_ATTR_ID:
3980 		event = IB_CM_RTU_RECEIVED;
3981 		break;
3982 	case CM_DREQ_ATTR_ID:
3983 		event = IB_CM_DREQ_RECEIVED;
3984 		break;
3985 	case CM_DREP_ATTR_ID:
3986 		event = IB_CM_DREP_RECEIVED;
3987 		break;
3988 	case CM_SIDR_REQ_ATTR_ID:
3989 		event = IB_CM_SIDR_REQ_RECEIVED;
3990 		break;
3991 	case CM_SIDR_REP_ATTR_ID:
3992 		event = IB_CM_SIDR_REP_RECEIVED;
3993 		break;
3994 	case CM_LAP_ATTR_ID:
3995 		paths = 1;
3996 		event = IB_CM_LAP_RECEIVED;
3997 		break;
3998 	case CM_APR_ATTR_ID:
3999 		event = IB_CM_APR_RECEIVED;
4000 		break;
4001 	default:
4002 		ib_free_recv_mad(mad_recv_wc);
4003 		return;
4004 	}
4005 
4006 	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
4007 	atomic_long_inc(&port->counter_group[CM_RECV].
4008 			counter[attr_id - CM_ATTR_ID_OFFSET]);
4009 
4010 	work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
4011 	if (!work) {
4012 		ib_free_recv_mad(mad_recv_wc);
4013 		return;
4014 	}
4015 
4016 	INIT_DELAYED_WORK(&work->work, cm_work_handler);
4017 	work->cm_event.event = event;
4018 	work->mad_recv_wc = mad_recv_wc;
4019 	work->port = port;
4020 
4021 	/* Check if the device started its remove_one */
4022 	spin_lock_irq(&cm.lock);
4023 	if (!port->cm_dev->going_down)
4024 		queue_delayed_work(cm.wq, &work->work, 0);
4025 	else
4026 		going_down = 1;
4027 	spin_unlock_irq(&cm.lock);
4028 
4029 	if (going_down) {
4030 		kfree(work);
4031 		ib_free_recv_mad(mad_recv_wc);
4032 	}
4033 }
4034 
4035 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
4036 				struct ib_qp_attr *qp_attr,
4037 				int *qp_attr_mask)
4038 {
4039 	unsigned long flags;
4040 	int ret;
4041 
4042 	spin_lock_irqsave(&cm_id_priv->lock, flags);
4043 	switch (cm_id_priv->id.state) {
4044 	case IB_CM_REQ_SENT:
4045 	case IB_CM_MRA_REQ_RCVD:
4046 	case IB_CM_REQ_RCVD:
4047 	case IB_CM_MRA_REQ_SENT:
4048 	case IB_CM_REP_RCVD:
4049 	case IB_CM_MRA_REP_SENT:
4050 	case IB_CM_REP_SENT:
4051 	case IB_CM_MRA_REP_RCVD:
4052 	case IB_CM_ESTABLISHED:
4053 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
4054 				IB_QP_PKEY_INDEX | IB_QP_PORT;
4055 		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
4056 		if (cm_id_priv->responder_resources)
4057 			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
4058 						    IB_ACCESS_REMOTE_ATOMIC;
4059 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
4060 		qp_attr->port_num = cm_id_priv->av.port->port_num;
4061 		ret = 0;
4062 		break;
4063 	default:
4064 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4065 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4066 			 cm_id_priv->id.state);
4067 		ret = -EINVAL;
4068 		break;
4069 	}
4070 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4071 	return ret;
4072 }
4073 
4074 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
4075 			       struct ib_qp_attr *qp_attr,
4076 			       int *qp_attr_mask)
4077 {
4078 	unsigned long flags;
4079 	int ret;
4080 
4081 	spin_lock_irqsave(&cm_id_priv->lock, flags);
4082 	switch (cm_id_priv->id.state) {
4083 	case IB_CM_REQ_RCVD:
4084 	case IB_CM_MRA_REQ_SENT:
4085 	case IB_CM_REP_RCVD:
4086 	case IB_CM_MRA_REP_SENT:
4087 	case IB_CM_REP_SENT:
4088 	case IB_CM_MRA_REP_RCVD:
4089 	case IB_CM_ESTABLISHED:
4090 		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
4091 				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
4092 		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
4093 		qp_attr->path_mtu = cm_id_priv->path_mtu;
4094 		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
4095 		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
4096 		if (cm_id_priv->qp_type == IB_QPT_RC ||
4097 		    cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
4098 			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
4099 					 IB_QP_MIN_RNR_TIMER;
4100 			qp_attr->max_dest_rd_atomic =
4101 					cm_id_priv->responder_resources;
4102 			qp_attr->min_rnr_timer = 0;
4103 		}
4104 		if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4105 			*qp_attr_mask |= IB_QP_ALT_PATH;
4106 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4107 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4108 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4109 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4110 		}
4111 		ret = 0;
4112 		break;
4113 	default:
4114 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4115 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4116 			 cm_id_priv->id.state);
4117 		ret = -EINVAL;
4118 		break;
4119 	}
4120 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4121 	return ret;
4122 }
4123 
4124 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
4125 			       struct ib_qp_attr *qp_attr,
4126 			       int *qp_attr_mask)
4127 {
4128 	unsigned long flags;
4129 	int ret;
4130 
4131 	spin_lock_irqsave(&cm_id_priv->lock, flags);
4132 	switch (cm_id_priv->id.state) {
4133 	/* Allow transition to RTS before sending REP */
4134 	case IB_CM_REQ_RCVD:
4135 	case IB_CM_MRA_REQ_SENT:
4136 
4137 	case IB_CM_REP_RCVD:
4138 	case IB_CM_MRA_REP_SENT:
4139 	case IB_CM_REP_SENT:
4140 	case IB_CM_MRA_REP_RCVD:
4141 	case IB_CM_ESTABLISHED:
4142 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
4143 			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
4144 			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
4145 			switch (cm_id_priv->qp_type) {
4146 			case IB_QPT_RC:
4147 			case IB_QPT_XRC_INI:
4148 				*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
4149 						 IB_QP_MAX_QP_RD_ATOMIC;
4150 				qp_attr->retry_cnt = cm_id_priv->retry_count;
4151 				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
4152 				qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
4153 				/* fall through */
4154 			case IB_QPT_XRC_TGT:
4155 				*qp_attr_mask |= IB_QP_TIMEOUT;
4156 				qp_attr->timeout = cm_id_priv->av.timeout;
4157 				break;
4158 			default:
4159 				break;
4160 			}
4161 			if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4162 				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
4163 				qp_attr->path_mig_state = IB_MIG_REARM;
4164 			}
4165 		} else {
4166 			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
4167 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4168 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4169 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4170 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4171 			qp_attr->path_mig_state = IB_MIG_REARM;
4172 		}
4173 		ret = 0;
4174 		break;
4175 	default:
4176 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4177 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4178 			 cm_id_priv->id.state);
4179 		ret = -EINVAL;
4180 		break;
4181 	}
4182 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4183 	return ret;
4184 }
4185 
4186 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
4187 		       struct ib_qp_attr *qp_attr,
4188 		       int *qp_attr_mask)
4189 {
4190 	struct cm_id_private *cm_id_priv;
4191 	int ret;
4192 
4193 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
4194 	switch (qp_attr->qp_state) {
4195 	case IB_QPS_INIT:
4196 		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
4197 		break;
4198 	case IB_QPS_RTR:
4199 		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
4200 		break;
4201 	case IB_QPS_RTS:
4202 		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
4203 		break;
4204 	default:
4205 		ret = -EINVAL;
4206 		break;
4207 	}
4208 	return ret;
4209 }
4210 EXPORT_SYMBOL(ib_cm_init_qp_attr);
4211 
4212 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
4213 			       char *buf)
4214 {
4215 	struct cm_counter_group *group;
4216 	struct cm_counter_attribute *cm_attr;
4217 
4218 	group = container_of(obj, struct cm_counter_group, obj);
4219 	cm_attr = container_of(attr, struct cm_counter_attribute, attr);
4220 
4221 	return sprintf(buf, "%ld\n",
4222 		       atomic_long_read(&group->counter[cm_attr->index]));
4223 }
4224 
4225 static const struct sysfs_ops cm_counter_ops = {
4226 	.show = cm_show_counter
4227 };
4228 
4229 static struct kobj_type cm_counter_obj_type = {
4230 	.sysfs_ops = &cm_counter_ops,
4231 	.default_attrs = cm_counter_default_attrs
4232 };
4233 
4234 static char *cm_devnode(struct device *dev, umode_t *mode)
4235 {
4236 	if (mode)
4237 		*mode = 0666;
4238 	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
4239 }
4240 
4241 struct class cm_class = {
4242 	.owner   = THIS_MODULE,
4243 	.name    = "infiniband_cm",
4244 	.devnode = cm_devnode,
4245 };
4246 EXPORT_SYMBOL(cm_class);
4247 
4248 static int cm_create_port_fs(struct cm_port *port)
4249 {
4250 	int i, ret;
4251 
4252 	for (i = 0; i < CM_COUNTER_GROUPS; i++) {
4253 		ret = ib_port_register_module_stat(port->cm_dev->ib_device,
4254 						   port->port_num,
4255 						   &port->counter_group[i].obj,
4256 						   &cm_counter_obj_type,
4257 						   counter_group_names[i]);
4258 		if (ret)
4259 			goto error;
4260 	}
4261 
4262 	return 0;
4263 
4264 error:
4265 	while (i--)
4266 		ib_port_unregister_module_stat(&port->counter_group[i].obj);
4267 	return ret;
4268 
4269 }
4270 
4271 static void cm_remove_port_fs(struct cm_port *port)
4272 {
4273 	int i;
4274 
4275 	for (i = 0; i < CM_COUNTER_GROUPS; i++)
4276 		ib_port_unregister_module_stat(&port->counter_group[i].obj);
4277 
4278 }
4279 
4280 static void cm_add_one(struct ib_device *ib_device)
4281 {
4282 	struct cm_device *cm_dev;
4283 	struct cm_port *port;
4284 	struct ib_mad_reg_req reg_req = {
4285 		.mgmt_class = IB_MGMT_CLASS_CM,
4286 		.mgmt_class_version = IB_CM_CLASS_VERSION,
4287 	};
4288 	struct ib_port_modify port_modify = {
4289 		.set_port_cap_mask = IB_PORT_CM_SUP
4290 	};
4291 	unsigned long flags;
4292 	int ret;
4293 	int count = 0;
4294 	u8 i;
4295 
4296 	cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
4297 			 GFP_KERNEL);
4298 	if (!cm_dev)
4299 		return;
4300 
4301 	cm_dev->ib_device = ib_device;
4302 	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
4303 	cm_dev->going_down = 0;
4304 
4305 	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
4306 	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4307 		if (!rdma_cap_ib_cm(ib_device, i))
4308 			continue;
4309 
4310 		port = kzalloc(sizeof *port, GFP_KERNEL);
4311 		if (!port)
4312 			goto error1;
4313 
4314 		cm_dev->port[i-1] = port;
4315 		port->cm_dev = cm_dev;
4316 		port->port_num = i;
4317 
4318 		INIT_LIST_HEAD(&port->cm_priv_prim_list);
4319 		INIT_LIST_HEAD(&port->cm_priv_altr_list);
4320 
4321 		ret = cm_create_port_fs(port);
4322 		if (ret)
4323 			goto error1;
4324 
4325 		port->mad_agent = ib_register_mad_agent(ib_device, i,
4326 							IB_QPT_GSI,
4327 							&reg_req,
4328 							0,
4329 							cm_send_handler,
4330 							cm_recv_handler,
4331 							port,
4332 							0);
4333 		if (IS_ERR(port->mad_agent))
4334 			goto error2;
4335 
4336 		ret = ib_modify_port(ib_device, i, 0, &port_modify);
4337 		if (ret)
4338 			goto error3;
4339 
4340 		count++;
4341 	}
4342 
4343 	if (!count)
4344 		goto free;
4345 
4346 	ib_set_client_data(ib_device, &cm_client, cm_dev);
4347 
4348 	write_lock_irqsave(&cm.device_lock, flags);
4349 	list_add_tail(&cm_dev->list, &cm.device_list);
4350 	write_unlock_irqrestore(&cm.device_lock, flags);
4351 	return;
4352 
4353 error3:
4354 	ib_unregister_mad_agent(port->mad_agent);
4355 error2:
4356 	cm_remove_port_fs(port);
4357 error1:
4358 	port_modify.set_port_cap_mask = 0;
4359 	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4360 	kfree(port);
4361 	while (--i) {
4362 		if (!rdma_cap_ib_cm(ib_device, i))
4363 			continue;
4364 
4365 		port = cm_dev->port[i-1];
4366 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4367 		ib_unregister_mad_agent(port->mad_agent);
4368 		cm_remove_port_fs(port);
4369 		kfree(port);
4370 	}
4371 free:
4372 	kfree(cm_dev);
4373 }
4374 
4375 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4376 {
4377 	struct cm_device *cm_dev = client_data;
4378 	struct cm_port *port;
4379 	struct cm_id_private *cm_id_priv;
4380 	struct ib_mad_agent *cur_mad_agent;
4381 	struct ib_port_modify port_modify = {
4382 		.clr_port_cap_mask = IB_PORT_CM_SUP
4383 	};
4384 	unsigned long flags;
4385 	int i;
4386 
4387 	if (!cm_dev)
4388 		return;
4389 
4390 	write_lock_irqsave(&cm.device_lock, flags);
4391 	list_del(&cm_dev->list);
4392 	write_unlock_irqrestore(&cm.device_lock, flags);
4393 
4394 	spin_lock_irq(&cm.lock);
4395 	cm_dev->going_down = 1;
4396 	spin_unlock_irq(&cm.lock);
4397 
4398 	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4399 		if (!rdma_cap_ib_cm(ib_device, i))
4400 			continue;
4401 
4402 		port = cm_dev->port[i-1];
4403 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4404 		/* Mark all the cm_id's as not valid */
4405 		spin_lock_irq(&cm.lock);
4406 		list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4407 			cm_id_priv->altr_send_port_not_ready = 1;
4408 		list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4409 			cm_id_priv->prim_send_port_not_ready = 1;
4410 		spin_unlock_irq(&cm.lock);
4411 		/*
4412 		 * We flush the queue here after the going_down set, this
4413 		 * verify that no new works will be queued in the recv handler,
4414 		 * after that we can call the unregister_mad_agent
4415 		 */
4416 		flush_workqueue(cm.wq);
4417 		spin_lock_irq(&cm.state_lock);
4418 		cur_mad_agent = port->mad_agent;
4419 		port->mad_agent = NULL;
4420 		spin_unlock_irq(&cm.state_lock);
4421 		ib_unregister_mad_agent(cur_mad_agent);
4422 		cm_remove_port_fs(port);
4423 		kfree(port);
4424 	}
4425 
4426 	kfree(cm_dev);
4427 }
4428 
4429 static int __init ib_cm_init(void)
4430 {
4431 	int ret;
4432 
4433 	INIT_LIST_HEAD(&cm.device_list);
4434 	rwlock_init(&cm.device_lock);
4435 	spin_lock_init(&cm.lock);
4436 	spin_lock_init(&cm.state_lock);
4437 	cm.listen_service_table = RB_ROOT;
4438 	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4439 	cm.remote_id_table = RB_ROOT;
4440 	cm.remote_qp_table = RB_ROOT;
4441 	cm.remote_sidr_table = RB_ROOT;
4442 	xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
4443 	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4444 	INIT_LIST_HEAD(&cm.timewait_list);
4445 
4446 	ret = class_register(&cm_class);
4447 	if (ret) {
4448 		ret = -ENOMEM;
4449 		goto error1;
4450 	}
4451 
4452 	cm.wq = alloc_workqueue("ib_cm", 0, 1);
4453 	if (!cm.wq) {
4454 		ret = -ENOMEM;
4455 		goto error2;
4456 	}
4457 
4458 	ret = ib_register_client(&cm_client);
4459 	if (ret)
4460 		goto error3;
4461 
4462 	return 0;
4463 error3:
4464 	destroy_workqueue(cm.wq);
4465 error2:
4466 	class_unregister(&cm_class);
4467 error1:
4468 	return ret;
4469 }
4470 
4471 static void __exit ib_cm_cleanup(void)
4472 {
4473 	struct cm_timewait_info *timewait_info, *tmp;
4474 
4475 	spin_lock_irq(&cm.lock);
4476 	list_for_each_entry(timewait_info, &cm.timewait_list, list)
4477 		cancel_delayed_work(&timewait_info->work.work);
4478 	spin_unlock_irq(&cm.lock);
4479 
4480 	ib_unregister_client(&cm_client);
4481 	destroy_workqueue(cm.wq);
4482 
4483 	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4484 		list_del(&timewait_info->list);
4485 		kfree(timewait_info);
4486 	}
4487 
4488 	class_unregister(&cm_class);
4489 	WARN_ON(!xa_empty(&cm.local_id_table));
4490 }
4491 
4492 module_init(ib_cm_init);
4493 module_exit(ib_cm_cleanup);
4494