xref: /openbmc/linux/drivers/infiniband/core/cm.c (revision f17f06a0)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
4  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
5  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
6  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
7  * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
8  */
9 
10 #include <linux/completion.h>
11 #include <linux/dma-mapping.h>
12 #include <linux/device.h>
13 #include <linux/module.h>
14 #include <linux/err.h>
15 #include <linux/idr.h>
16 #include <linux/interrupt.h>
17 #include <linux/random.h>
18 #include <linux/rbtree.h>
19 #include <linux/spinlock.h>
20 #include <linux/slab.h>
21 #include <linux/sysfs.h>
22 #include <linux/workqueue.h>
23 #include <linux/kdev_t.h>
24 #include <linux/etherdevice.h>
25 
26 #include <rdma/ib_cache.h>
27 #include <rdma/ib_cm.h>
28 #include "cm_msgs.h"
29 #include "core_priv.h"
30 
31 MODULE_AUTHOR("Sean Hefty");
32 MODULE_DESCRIPTION("InfiniBand CM");
33 MODULE_LICENSE("Dual BSD/GPL");
34 
35 static const char * const ibcm_rej_reason_strs[] = {
36 	[IB_CM_REJ_NO_QP]			= "no QP",
37 	[IB_CM_REJ_NO_EEC]			= "no EEC",
38 	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
39 	[IB_CM_REJ_TIMEOUT]			= "timeout",
40 	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
41 	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm ID",
42 	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
43 	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service ID",
44 	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
45 	[IB_CM_REJ_STALE_CONN]			= "stale conn",
46 	[IB_CM_REJ_RDC_NOT_EXIST]		= "RDC not exist",
47 	[IB_CM_REJ_INVALID_GID]			= "invalid GID",
48 	[IB_CM_REJ_INVALID_LID]			= "invalid LID",
49 	[IB_CM_REJ_INVALID_SL]			= "invalid SL",
50 	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
51 	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
52 	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet rate",
53 	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt GID",
54 	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt LID",
55 	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt SL",
56 	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic class",
57 	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
58 	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
59 	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port CM redirect",
60 	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
61 	[IB_CM_REJ_INVALID_MTU]			= "invalid MTU",
62 	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp resources",
63 	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
64 	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid RNR retry",
65 	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm ID",
66 	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
67 	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
68 	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
69 };
70 
71 const char *__attribute_const__ ibcm_reject_msg(int reason)
72 {
73 	size_t index = reason;
74 
75 	if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
76 	    ibcm_rej_reason_strs[index])
77 		return ibcm_rej_reason_strs[index];
78 	else
79 		return "unrecognized reason";
80 }
81 EXPORT_SYMBOL(ibcm_reject_msg);
82 
83 static void cm_add_one(struct ib_device *device);
84 static void cm_remove_one(struct ib_device *device, void *client_data);
85 
86 static struct ib_client cm_client = {
87 	.name   = "cm",
88 	.add    = cm_add_one,
89 	.remove = cm_remove_one
90 };
91 
92 static struct ib_cm {
93 	spinlock_t lock;
94 	struct list_head device_list;
95 	rwlock_t device_lock;
96 	struct rb_root listen_service_table;
97 	u64 listen_service_id;
98 	/* struct rb_root peer_service_table; todo: fix peer to peer */
99 	struct rb_root remote_qp_table;
100 	struct rb_root remote_id_table;
101 	struct rb_root remote_sidr_table;
102 	struct xarray local_id_table;
103 	u32 local_id_next;
104 	__be32 random_id_operand;
105 	struct list_head timewait_list;
106 	struct workqueue_struct *wq;
107 	/* Sync on cm change port state */
108 	spinlock_t state_lock;
109 } cm;
110 
111 /* Counter indexes ordered by attribute ID */
112 enum {
113 	CM_REQ_COUNTER,
114 	CM_MRA_COUNTER,
115 	CM_REJ_COUNTER,
116 	CM_REP_COUNTER,
117 	CM_RTU_COUNTER,
118 	CM_DREQ_COUNTER,
119 	CM_DREP_COUNTER,
120 	CM_SIDR_REQ_COUNTER,
121 	CM_SIDR_REP_COUNTER,
122 	CM_LAP_COUNTER,
123 	CM_APR_COUNTER,
124 	CM_ATTR_COUNT,
125 	CM_ATTR_ID_OFFSET = 0x0010,
126 };
127 
128 enum {
129 	CM_XMIT,
130 	CM_XMIT_RETRIES,
131 	CM_RECV,
132 	CM_RECV_DUPLICATES,
133 	CM_COUNTER_GROUPS
134 };
135 
136 static char const counter_group_names[CM_COUNTER_GROUPS]
137 				     [sizeof("cm_rx_duplicates")] = {
138 	"cm_tx_msgs", "cm_tx_retries",
139 	"cm_rx_msgs", "cm_rx_duplicates"
140 };
141 
142 struct cm_counter_group {
143 	struct kobject obj;
144 	atomic_long_t counter[CM_ATTR_COUNT];
145 };
146 
147 struct cm_counter_attribute {
148 	struct attribute attr;
149 	int index;
150 };
151 
152 #define CM_COUNTER_ATTR(_name, _index) \
153 struct cm_counter_attribute cm_##_name##_counter_attr = { \
154 	.attr = { .name = __stringify(_name), .mode = 0444 }, \
155 	.index = _index \
156 }
157 
158 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
159 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
160 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
161 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
162 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
163 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
164 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
165 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
166 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
167 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
168 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
169 
170 static struct attribute *cm_counter_default_attrs[] = {
171 	&cm_req_counter_attr.attr,
172 	&cm_mra_counter_attr.attr,
173 	&cm_rej_counter_attr.attr,
174 	&cm_rep_counter_attr.attr,
175 	&cm_rtu_counter_attr.attr,
176 	&cm_dreq_counter_attr.attr,
177 	&cm_drep_counter_attr.attr,
178 	&cm_sidr_req_counter_attr.attr,
179 	&cm_sidr_rep_counter_attr.attr,
180 	&cm_lap_counter_attr.attr,
181 	&cm_apr_counter_attr.attr,
182 	NULL
183 };
184 
185 struct cm_port {
186 	struct cm_device *cm_dev;
187 	struct ib_mad_agent *mad_agent;
188 	struct kobject port_obj;
189 	u8 port_num;
190 	struct list_head cm_priv_prim_list;
191 	struct list_head cm_priv_altr_list;
192 	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
193 };
194 
195 struct cm_device {
196 	struct list_head list;
197 	struct ib_device *ib_device;
198 	u8 ack_delay;
199 	int going_down;
200 	struct cm_port *port[0];
201 };
202 
203 struct cm_av {
204 	struct cm_port *port;
205 	union ib_gid dgid;
206 	struct rdma_ah_attr ah_attr;
207 	u16 pkey_index;
208 	u8 timeout;
209 };
210 
211 struct cm_work {
212 	struct delayed_work work;
213 	struct list_head list;
214 	struct cm_port *port;
215 	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
216 	__be32 local_id;			/* Established / timewait */
217 	__be32 remote_id;
218 	struct ib_cm_event cm_event;
219 	struct sa_path_rec path[0];
220 };
221 
222 struct cm_timewait_info {
223 	struct cm_work work;
224 	struct list_head list;
225 	struct rb_node remote_qp_node;
226 	struct rb_node remote_id_node;
227 	__be64 remote_ca_guid;
228 	__be32 remote_qpn;
229 	u8 inserted_remote_qp;
230 	u8 inserted_remote_id;
231 };
232 
233 struct cm_id_private {
234 	struct ib_cm_id	id;
235 
236 	struct rb_node service_node;
237 	struct rb_node sidr_id_node;
238 	spinlock_t lock;	/* Do not acquire inside cm.lock */
239 	struct completion comp;
240 	refcount_t refcount;
241 	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
242 	 * Protected by the cm.lock spinlock. */
243 	int listen_sharecount;
244 	struct rcu_head rcu;
245 
246 	struct ib_mad_send_buf *msg;
247 	struct cm_timewait_info *timewait_info;
248 	/* todo: use alternate port on send failure */
249 	struct cm_av av;
250 	struct cm_av alt_av;
251 
252 	void *private_data;
253 	__be64 tid;
254 	__be32 local_qpn;
255 	__be32 remote_qpn;
256 	enum ib_qp_type qp_type;
257 	__be32 sq_psn;
258 	__be32 rq_psn;
259 	int timeout_ms;
260 	enum ib_mtu path_mtu;
261 	__be16 pkey;
262 	u8 private_data_len;
263 	u8 max_cm_retries;
264 	u8 peer_to_peer;
265 	u8 responder_resources;
266 	u8 initiator_depth;
267 	u8 retry_count;
268 	u8 rnr_retry_count;
269 	u8 service_timeout;
270 	u8 target_ack_delay;
271 
272 	struct list_head prim_list;
273 	struct list_head altr_list;
274 	/* Indicates that the send port mad is registered and av is set */
275 	int prim_send_port_not_ready;
276 	int altr_send_port_not_ready;
277 
278 	struct list_head work_list;
279 	atomic_t work_count;
280 };
281 
282 static void cm_work_handler(struct work_struct *work);
283 
284 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
285 {
286 	if (refcount_dec_and_test(&cm_id_priv->refcount))
287 		complete(&cm_id_priv->comp);
288 }
289 
290 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
291 			struct ib_mad_send_buf **msg)
292 {
293 	struct ib_mad_agent *mad_agent;
294 	struct ib_mad_send_buf *m;
295 	struct ib_ah *ah;
296 	struct cm_av *av;
297 	unsigned long flags, flags2;
298 	int ret = 0;
299 
300 	/* don't let the port to be released till the agent is down */
301 	spin_lock_irqsave(&cm.state_lock, flags2);
302 	spin_lock_irqsave(&cm.lock, flags);
303 	if (!cm_id_priv->prim_send_port_not_ready)
304 		av = &cm_id_priv->av;
305 	else if (!cm_id_priv->altr_send_port_not_ready &&
306 		 (cm_id_priv->alt_av.port))
307 		av = &cm_id_priv->alt_av;
308 	else {
309 		pr_info("%s: not valid CM id\n", __func__);
310 		ret = -ENODEV;
311 		spin_unlock_irqrestore(&cm.lock, flags);
312 		goto out;
313 	}
314 	spin_unlock_irqrestore(&cm.lock, flags);
315 	/* Make sure the port haven't released the mad yet */
316 	mad_agent = cm_id_priv->av.port->mad_agent;
317 	if (!mad_agent) {
318 		pr_info("%s: not a valid MAD agent\n", __func__);
319 		ret = -ENODEV;
320 		goto out;
321 	}
322 	ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
323 	if (IS_ERR(ah)) {
324 		ret = PTR_ERR(ah);
325 		goto out;
326 	}
327 
328 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
329 			       av->pkey_index,
330 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
331 			       GFP_ATOMIC,
332 			       IB_MGMT_BASE_VERSION);
333 	if (IS_ERR(m)) {
334 		rdma_destroy_ah(ah, 0);
335 		ret = PTR_ERR(m);
336 		goto out;
337 	}
338 
339 	/* Timeout set by caller if response is expected. */
340 	m->ah = ah;
341 	m->retries = cm_id_priv->max_cm_retries;
342 
343 	refcount_inc(&cm_id_priv->refcount);
344 	m->context[0] = cm_id_priv;
345 	*msg = m;
346 
347 out:
348 	spin_unlock_irqrestore(&cm.state_lock, flags2);
349 	return ret;
350 }
351 
352 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
353 							   struct ib_mad_recv_wc *mad_recv_wc)
354 {
355 	return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
356 				  0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
357 				  GFP_ATOMIC,
358 				  IB_MGMT_BASE_VERSION);
359 }
360 
361 static int cm_create_response_msg_ah(struct cm_port *port,
362 				     struct ib_mad_recv_wc *mad_recv_wc,
363 				     struct ib_mad_send_buf *msg)
364 {
365 	struct ib_ah *ah;
366 
367 	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
368 				  mad_recv_wc->recv_buf.grh, port->port_num);
369 	if (IS_ERR(ah))
370 		return PTR_ERR(ah);
371 
372 	msg->ah = ah;
373 	return 0;
374 }
375 
376 static void cm_free_msg(struct ib_mad_send_buf *msg)
377 {
378 	if (msg->ah)
379 		rdma_destroy_ah(msg->ah, 0);
380 	if (msg->context[0])
381 		cm_deref_id(msg->context[0]);
382 	ib_free_send_mad(msg);
383 }
384 
385 static int cm_alloc_response_msg(struct cm_port *port,
386 				 struct ib_mad_recv_wc *mad_recv_wc,
387 				 struct ib_mad_send_buf **msg)
388 {
389 	struct ib_mad_send_buf *m;
390 	int ret;
391 
392 	m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
393 	if (IS_ERR(m))
394 		return PTR_ERR(m);
395 
396 	ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
397 	if (ret) {
398 		cm_free_msg(m);
399 		return ret;
400 	}
401 
402 	*msg = m;
403 	return 0;
404 }
405 
406 static void * cm_copy_private_data(const void *private_data,
407 				   u8 private_data_len)
408 {
409 	void *data;
410 
411 	if (!private_data || !private_data_len)
412 		return NULL;
413 
414 	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
415 	if (!data)
416 		return ERR_PTR(-ENOMEM);
417 
418 	return data;
419 }
420 
421 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
422 				 void *private_data, u8 private_data_len)
423 {
424 	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
425 		kfree(cm_id_priv->private_data);
426 
427 	cm_id_priv->private_data = private_data;
428 	cm_id_priv->private_data_len = private_data_len;
429 }
430 
431 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
432 			      struct ib_grh *grh, struct cm_av *av)
433 {
434 	struct rdma_ah_attr new_ah_attr;
435 	int ret;
436 
437 	av->port = port;
438 	av->pkey_index = wc->pkey_index;
439 
440 	/*
441 	 * av->ah_attr might be initialized based on past wc during incoming
442 	 * connect request or while sending out connect request. So initialize
443 	 * a new ah_attr on stack. If initialization fails, old ah_attr is
444 	 * used for sending any responses. If initialization is successful,
445 	 * than new ah_attr is used by overwriting old one.
446 	 */
447 	ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
448 				      port->port_num, wc,
449 				      grh, &new_ah_attr);
450 	if (ret)
451 		return ret;
452 
453 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
454 	return 0;
455 }
456 
457 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
458 				   struct ib_grh *grh, struct cm_av *av)
459 {
460 	av->port = port;
461 	av->pkey_index = wc->pkey_index;
462 	return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
463 				       port->port_num, wc,
464 				       grh, &av->ah_attr);
465 }
466 
467 static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
468 				  struct cm_av *av,
469 				  struct cm_port *port)
470 {
471 	unsigned long flags;
472 	int ret = 0;
473 
474 	spin_lock_irqsave(&cm.lock, flags);
475 
476 	if (&cm_id_priv->av == av)
477 		list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
478 	else if (&cm_id_priv->alt_av == av)
479 		list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
480 	else
481 		ret = -EINVAL;
482 
483 	spin_unlock_irqrestore(&cm.lock, flags);
484 	return ret;
485 }
486 
487 static struct cm_port *
488 get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
489 {
490 	struct cm_device *cm_dev;
491 	struct cm_port *port = NULL;
492 	unsigned long flags;
493 
494 	if (attr) {
495 		read_lock_irqsave(&cm.device_lock, flags);
496 		list_for_each_entry(cm_dev, &cm.device_list, list) {
497 			if (cm_dev->ib_device == attr->device) {
498 				port = cm_dev->port[attr->port_num - 1];
499 				break;
500 			}
501 		}
502 		read_unlock_irqrestore(&cm.device_lock, flags);
503 	} else {
504 		/* SGID attribute can be NULL in following
505 		 * conditions.
506 		 * (a) Alternative path
507 		 * (b) IB link layer without GRH
508 		 * (c) LAP send messages
509 		 */
510 		read_lock_irqsave(&cm.device_lock, flags);
511 		list_for_each_entry(cm_dev, &cm.device_list, list) {
512 			attr = rdma_find_gid(cm_dev->ib_device,
513 					     &path->sgid,
514 					     sa_conv_pathrec_to_gid_type(path),
515 					     NULL);
516 			if (!IS_ERR(attr)) {
517 				port = cm_dev->port[attr->port_num - 1];
518 				break;
519 			}
520 		}
521 		read_unlock_irqrestore(&cm.device_lock, flags);
522 		if (port)
523 			rdma_put_gid_attr(attr);
524 	}
525 	return port;
526 }
527 
528 static int cm_init_av_by_path(struct sa_path_rec *path,
529 			      const struct ib_gid_attr *sgid_attr,
530 			      struct cm_av *av,
531 			      struct cm_id_private *cm_id_priv)
532 {
533 	struct rdma_ah_attr new_ah_attr;
534 	struct cm_device *cm_dev;
535 	struct cm_port *port;
536 	int ret;
537 
538 	port = get_cm_port_from_path(path, sgid_attr);
539 	if (!port)
540 		return -EINVAL;
541 	cm_dev = port->cm_dev;
542 
543 	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
544 				  be16_to_cpu(path->pkey), &av->pkey_index);
545 	if (ret)
546 		return ret;
547 
548 	av->port = port;
549 
550 	/*
551 	 * av->ah_attr might be initialized based on wc or during
552 	 * request processing time which might have reference to sgid_attr.
553 	 * So initialize a new ah_attr on stack.
554 	 * If initialization fails, old ah_attr is used for sending any
555 	 * responses. If initialization is successful, than new ah_attr
556 	 * is used by overwriting the old one. So that right ah_attr
557 	 * can be used to return an error response.
558 	 */
559 	ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
560 					&new_ah_attr, sgid_attr);
561 	if (ret)
562 		return ret;
563 
564 	av->timeout = path->packet_life_time + 1;
565 
566 	ret = add_cm_id_to_port_list(cm_id_priv, av, port);
567 	if (ret) {
568 		rdma_destroy_ah_attr(&new_ah_attr);
569 		return ret;
570 	}
571 	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
572 	return 0;
573 }
574 
575 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
576 {
577 	int err;
578 	u32 id;
579 
580 	err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
581 			xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
582 
583 	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
584 	return err;
585 }
586 
587 static u32 cm_local_id(__be32 local_id)
588 {
589 	return (__force u32) (local_id ^ cm.random_id_operand);
590 }
591 
592 static void cm_free_id(__be32 local_id)
593 {
594 	xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
595 }
596 
597 static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
598 {
599 	struct cm_id_private *cm_id_priv;
600 
601 	rcu_read_lock();
602 	cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
603 	if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
604 	    !refcount_inc_not_zero(&cm_id_priv->refcount))
605 		cm_id_priv = NULL;
606 	rcu_read_unlock();
607 
608 	return cm_id_priv;
609 }
610 
611 /*
612  * Trivial helpers to strip endian annotation and compare; the
613  * endianness doesn't actually matter since we just need a stable
614  * order for the RB tree.
615  */
616 static int be32_lt(__be32 a, __be32 b)
617 {
618 	return (__force u32) a < (__force u32) b;
619 }
620 
621 static int be32_gt(__be32 a, __be32 b)
622 {
623 	return (__force u32) a > (__force u32) b;
624 }
625 
626 static int be64_lt(__be64 a, __be64 b)
627 {
628 	return (__force u64) a < (__force u64) b;
629 }
630 
631 static int be64_gt(__be64 a, __be64 b)
632 {
633 	return (__force u64) a > (__force u64) b;
634 }
635 
636 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
637 {
638 	struct rb_node **link = &cm.listen_service_table.rb_node;
639 	struct rb_node *parent = NULL;
640 	struct cm_id_private *cur_cm_id_priv;
641 	__be64 service_id = cm_id_priv->id.service_id;
642 	__be64 service_mask = cm_id_priv->id.service_mask;
643 
644 	while (*link) {
645 		parent = *link;
646 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
647 					  service_node);
648 		if ((cur_cm_id_priv->id.service_mask & service_id) ==
649 		    (service_mask & cur_cm_id_priv->id.service_id) &&
650 		    (cm_id_priv->id.device == cur_cm_id_priv->id.device))
651 			return cur_cm_id_priv;
652 
653 		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
654 			link = &(*link)->rb_left;
655 		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
656 			link = &(*link)->rb_right;
657 		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
658 			link = &(*link)->rb_left;
659 		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
660 			link = &(*link)->rb_right;
661 		else
662 			link = &(*link)->rb_right;
663 	}
664 	rb_link_node(&cm_id_priv->service_node, parent, link);
665 	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
666 	return NULL;
667 }
668 
669 static struct cm_id_private * cm_find_listen(struct ib_device *device,
670 					     __be64 service_id)
671 {
672 	struct rb_node *node = cm.listen_service_table.rb_node;
673 	struct cm_id_private *cm_id_priv;
674 
675 	while (node) {
676 		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
677 		if ((cm_id_priv->id.service_mask & service_id) ==
678 		     cm_id_priv->id.service_id &&
679 		    (cm_id_priv->id.device == device))
680 			return cm_id_priv;
681 
682 		if (device < cm_id_priv->id.device)
683 			node = node->rb_left;
684 		else if (device > cm_id_priv->id.device)
685 			node = node->rb_right;
686 		else if (be64_lt(service_id, cm_id_priv->id.service_id))
687 			node = node->rb_left;
688 		else if (be64_gt(service_id, cm_id_priv->id.service_id))
689 			node = node->rb_right;
690 		else
691 			node = node->rb_right;
692 	}
693 	return NULL;
694 }
695 
696 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
697 						     *timewait_info)
698 {
699 	struct rb_node **link = &cm.remote_id_table.rb_node;
700 	struct rb_node *parent = NULL;
701 	struct cm_timewait_info *cur_timewait_info;
702 	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
703 	__be32 remote_id = timewait_info->work.remote_id;
704 
705 	while (*link) {
706 		parent = *link;
707 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
708 					     remote_id_node);
709 		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
710 			link = &(*link)->rb_left;
711 		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
712 			link = &(*link)->rb_right;
713 		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
714 			link = &(*link)->rb_left;
715 		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
716 			link = &(*link)->rb_right;
717 		else
718 			return cur_timewait_info;
719 	}
720 	timewait_info->inserted_remote_id = 1;
721 	rb_link_node(&timewait_info->remote_id_node, parent, link);
722 	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
723 	return NULL;
724 }
725 
726 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
727 						   __be32 remote_id)
728 {
729 	struct rb_node *node = cm.remote_id_table.rb_node;
730 	struct cm_timewait_info *timewait_info;
731 
732 	while (node) {
733 		timewait_info = rb_entry(node, struct cm_timewait_info,
734 					 remote_id_node);
735 		if (be32_lt(remote_id, timewait_info->work.remote_id))
736 			node = node->rb_left;
737 		else if (be32_gt(remote_id, timewait_info->work.remote_id))
738 			node = node->rb_right;
739 		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
740 			node = node->rb_left;
741 		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
742 			node = node->rb_right;
743 		else
744 			return timewait_info;
745 	}
746 	return NULL;
747 }
748 
749 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
750 						      *timewait_info)
751 {
752 	struct rb_node **link = &cm.remote_qp_table.rb_node;
753 	struct rb_node *parent = NULL;
754 	struct cm_timewait_info *cur_timewait_info;
755 	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
756 	__be32 remote_qpn = timewait_info->remote_qpn;
757 
758 	while (*link) {
759 		parent = *link;
760 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
761 					     remote_qp_node);
762 		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
763 			link = &(*link)->rb_left;
764 		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
765 			link = &(*link)->rb_right;
766 		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
767 			link = &(*link)->rb_left;
768 		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
769 			link = &(*link)->rb_right;
770 		else
771 			return cur_timewait_info;
772 	}
773 	timewait_info->inserted_remote_qp = 1;
774 	rb_link_node(&timewait_info->remote_qp_node, parent, link);
775 	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
776 	return NULL;
777 }
778 
779 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
780 						    *cm_id_priv)
781 {
782 	struct rb_node **link = &cm.remote_sidr_table.rb_node;
783 	struct rb_node *parent = NULL;
784 	struct cm_id_private *cur_cm_id_priv;
785 	union ib_gid *port_gid = &cm_id_priv->av.dgid;
786 	__be32 remote_id = cm_id_priv->id.remote_id;
787 
788 	while (*link) {
789 		parent = *link;
790 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
791 					  sidr_id_node);
792 		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
793 			link = &(*link)->rb_left;
794 		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
795 			link = &(*link)->rb_right;
796 		else {
797 			int cmp;
798 			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
799 				     sizeof *port_gid);
800 			if (cmp < 0)
801 				link = &(*link)->rb_left;
802 			else if (cmp > 0)
803 				link = &(*link)->rb_right;
804 			else
805 				return cur_cm_id_priv;
806 		}
807 	}
808 	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
809 	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
810 	return NULL;
811 }
812 
813 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
814 			       enum ib_cm_sidr_status status)
815 {
816 	struct ib_cm_sidr_rep_param param;
817 
818 	memset(&param, 0, sizeof param);
819 	param.status = status;
820 	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
821 }
822 
823 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
824 				 ib_cm_handler cm_handler,
825 				 void *context)
826 {
827 	struct cm_id_private *cm_id_priv;
828 	int ret;
829 
830 	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
831 	if (!cm_id_priv)
832 		return ERR_PTR(-ENOMEM);
833 
834 	cm_id_priv->id.state = IB_CM_IDLE;
835 	cm_id_priv->id.device = device;
836 	cm_id_priv->id.cm_handler = cm_handler;
837 	cm_id_priv->id.context = context;
838 	cm_id_priv->id.remote_cm_qpn = 1;
839 	ret = cm_alloc_id(cm_id_priv);
840 	if (ret)
841 		goto error;
842 
843 	spin_lock_init(&cm_id_priv->lock);
844 	init_completion(&cm_id_priv->comp);
845 	INIT_LIST_HEAD(&cm_id_priv->work_list);
846 	INIT_LIST_HEAD(&cm_id_priv->prim_list);
847 	INIT_LIST_HEAD(&cm_id_priv->altr_list);
848 	atomic_set(&cm_id_priv->work_count, -1);
849 	refcount_set(&cm_id_priv->refcount, 1);
850 	return &cm_id_priv->id;
851 
852 error:
853 	kfree(cm_id_priv);
854 	return ERR_PTR(-ENOMEM);
855 }
856 EXPORT_SYMBOL(ib_create_cm_id);
857 
858 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
859 {
860 	struct cm_work *work;
861 
862 	if (list_empty(&cm_id_priv->work_list))
863 		return NULL;
864 
865 	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
866 	list_del(&work->list);
867 	return work;
868 }
869 
870 static void cm_free_work(struct cm_work *work)
871 {
872 	if (work->mad_recv_wc)
873 		ib_free_recv_mad(work->mad_recv_wc);
874 	kfree(work);
875 }
876 
877 static inline int cm_convert_to_ms(int iba_time)
878 {
879 	/* approximate conversion to ms from 4.096us x 2^iba_time */
880 	return 1 << max(iba_time - 8, 0);
881 }
882 
883 /*
884  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
885  * Because of how ack_timeout is stored, adding one doubles the timeout.
886  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
887  * increment it (round up) only if the other is within 50%.
888  */
889 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
890 {
891 	int ack_timeout = packet_life_time + 1;
892 
893 	if (ack_timeout >= ca_ack_delay)
894 		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
895 	else
896 		ack_timeout = ca_ack_delay +
897 			      (ack_timeout >= (ca_ack_delay - 1));
898 
899 	return min(31, ack_timeout);
900 }
901 
902 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
903 {
904 	if (timewait_info->inserted_remote_id) {
905 		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
906 		timewait_info->inserted_remote_id = 0;
907 	}
908 
909 	if (timewait_info->inserted_remote_qp) {
910 		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
911 		timewait_info->inserted_remote_qp = 0;
912 	}
913 }
914 
915 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
916 {
917 	struct cm_timewait_info *timewait_info;
918 
919 	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
920 	if (!timewait_info)
921 		return ERR_PTR(-ENOMEM);
922 
923 	timewait_info->work.local_id = local_id;
924 	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
925 	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
926 	return timewait_info;
927 }
928 
929 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
930 {
931 	int wait_time;
932 	unsigned long flags;
933 	struct cm_device *cm_dev;
934 
935 	cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
936 	if (!cm_dev)
937 		return;
938 
939 	spin_lock_irqsave(&cm.lock, flags);
940 	cm_cleanup_timewait(cm_id_priv->timewait_info);
941 	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
942 	spin_unlock_irqrestore(&cm.lock, flags);
943 
944 	/*
945 	 * The cm_id could be destroyed by the user before we exit timewait.
946 	 * To protect against this, we search for the cm_id after exiting
947 	 * timewait before notifying the user that we've exited timewait.
948 	 */
949 	cm_id_priv->id.state = IB_CM_TIMEWAIT;
950 	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
951 
952 	/* Check if the device started its remove_one */
953 	spin_lock_irqsave(&cm.lock, flags);
954 	if (!cm_dev->going_down)
955 		queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
956 				   msecs_to_jiffies(wait_time));
957 	spin_unlock_irqrestore(&cm.lock, flags);
958 
959 	cm_id_priv->timewait_info = NULL;
960 }
961 
962 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
963 {
964 	unsigned long flags;
965 
966 	cm_id_priv->id.state = IB_CM_IDLE;
967 	if (cm_id_priv->timewait_info) {
968 		spin_lock_irqsave(&cm.lock, flags);
969 		cm_cleanup_timewait(cm_id_priv->timewait_info);
970 		spin_unlock_irqrestore(&cm.lock, flags);
971 		kfree(cm_id_priv->timewait_info);
972 		cm_id_priv->timewait_info = NULL;
973 	}
974 }
975 
976 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
977 {
978 	struct cm_id_private *cm_id_priv;
979 	struct cm_work *work;
980 
981 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
982 retest:
983 	spin_lock_irq(&cm_id_priv->lock);
984 	switch (cm_id->state) {
985 	case IB_CM_LISTEN:
986 		spin_unlock_irq(&cm_id_priv->lock);
987 
988 		spin_lock_irq(&cm.lock);
989 		if (--cm_id_priv->listen_sharecount > 0) {
990 			/* The id is still shared. */
991 			cm_deref_id(cm_id_priv);
992 			spin_unlock_irq(&cm.lock);
993 			return;
994 		}
995 		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
996 		spin_unlock_irq(&cm.lock);
997 		break;
998 	case IB_CM_SIDR_REQ_SENT:
999 		cm_id->state = IB_CM_IDLE;
1000 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1001 		spin_unlock_irq(&cm_id_priv->lock);
1002 		break;
1003 	case IB_CM_SIDR_REQ_RCVD:
1004 		spin_unlock_irq(&cm_id_priv->lock);
1005 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
1006 		spin_lock_irq(&cm.lock);
1007 		if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
1008 			rb_erase(&cm_id_priv->sidr_id_node,
1009 				 &cm.remote_sidr_table);
1010 		spin_unlock_irq(&cm.lock);
1011 		break;
1012 	case IB_CM_REQ_SENT:
1013 	case IB_CM_MRA_REQ_RCVD:
1014 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1015 		spin_unlock_irq(&cm_id_priv->lock);
1016 		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
1017 			       &cm_id_priv->id.device->node_guid,
1018 			       sizeof cm_id_priv->id.device->node_guid,
1019 			       NULL, 0);
1020 		break;
1021 	case IB_CM_REQ_RCVD:
1022 		if (err == -ENOMEM) {
1023 			/* Do not reject to allow future retries. */
1024 			cm_reset_to_idle(cm_id_priv);
1025 			spin_unlock_irq(&cm_id_priv->lock);
1026 		} else {
1027 			spin_unlock_irq(&cm_id_priv->lock);
1028 			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1029 				       NULL, 0, NULL, 0);
1030 		}
1031 		break;
1032 	case IB_CM_REP_SENT:
1033 	case IB_CM_MRA_REP_RCVD:
1034 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1035 		/* Fall through */
1036 	case IB_CM_MRA_REQ_SENT:
1037 	case IB_CM_REP_RCVD:
1038 	case IB_CM_MRA_REP_SENT:
1039 		spin_unlock_irq(&cm_id_priv->lock);
1040 		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1041 			       NULL, 0, NULL, 0);
1042 		break;
1043 	case IB_CM_ESTABLISHED:
1044 		spin_unlock_irq(&cm_id_priv->lock);
1045 		if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
1046 			break;
1047 		ib_send_cm_dreq(cm_id, NULL, 0);
1048 		goto retest;
1049 	case IB_CM_DREQ_SENT:
1050 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1051 		cm_enter_timewait(cm_id_priv);
1052 		spin_unlock_irq(&cm_id_priv->lock);
1053 		break;
1054 	case IB_CM_DREQ_RCVD:
1055 		spin_unlock_irq(&cm_id_priv->lock);
1056 		ib_send_cm_drep(cm_id, NULL, 0);
1057 		break;
1058 	default:
1059 		spin_unlock_irq(&cm_id_priv->lock);
1060 		break;
1061 	}
1062 
1063 	spin_lock_irq(&cm.lock);
1064 	if (!list_empty(&cm_id_priv->altr_list) &&
1065 	    (!cm_id_priv->altr_send_port_not_ready))
1066 		list_del(&cm_id_priv->altr_list);
1067 	if (!list_empty(&cm_id_priv->prim_list) &&
1068 	    (!cm_id_priv->prim_send_port_not_ready))
1069 		list_del(&cm_id_priv->prim_list);
1070 	spin_unlock_irq(&cm.lock);
1071 
1072 	cm_free_id(cm_id->local_id);
1073 	cm_deref_id(cm_id_priv);
1074 	wait_for_completion(&cm_id_priv->comp);
1075 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
1076 		cm_free_work(work);
1077 
1078 	rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
1079 	rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
1080 	kfree(cm_id_priv->private_data);
1081 	kfree_rcu(cm_id_priv, rcu);
1082 }
1083 
1084 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
1085 {
1086 	cm_destroy_id(cm_id, 0);
1087 }
1088 EXPORT_SYMBOL(ib_destroy_cm_id);
1089 
1090 /**
1091  * __ib_cm_listen - Initiates listening on the specified service ID for
1092  *   connection and service ID resolution requests.
1093  * @cm_id: Connection identifier associated with the listen request.
1094  * @service_id: Service identifier matched against incoming connection
1095  *   and service ID resolution requests.  The service ID should be specified
1096  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1097  *   assign a service ID to the caller.
1098  * @service_mask: Mask applied to service ID used to listen across a
1099  *   range of service IDs.  If set to 0, the service ID is matched
1100  *   exactly.  This parameter is ignored if %service_id is set to
1101  *   IB_CM_ASSIGN_SERVICE_ID.
1102  */
1103 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1104 			  __be64 service_mask)
1105 {
1106 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1107 	int ret = 0;
1108 
1109 	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1110 	service_id &= service_mask;
1111 	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1112 	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
1113 		return -EINVAL;
1114 
1115 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1116 	if (cm_id->state != IB_CM_IDLE)
1117 		return -EINVAL;
1118 
1119 	cm_id->state = IB_CM_LISTEN;
1120 	++cm_id_priv->listen_sharecount;
1121 
1122 	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1123 		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1124 		cm_id->service_mask = ~cpu_to_be64(0);
1125 	} else {
1126 		cm_id->service_id = service_id;
1127 		cm_id->service_mask = service_mask;
1128 	}
1129 	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1130 
1131 	if (cur_cm_id_priv) {
1132 		cm_id->state = IB_CM_IDLE;
1133 		--cm_id_priv->listen_sharecount;
1134 		ret = -EBUSY;
1135 	}
1136 	return ret;
1137 }
1138 
1139 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1140 {
1141 	unsigned long flags;
1142 	int ret;
1143 
1144 	spin_lock_irqsave(&cm.lock, flags);
1145 	ret = __ib_cm_listen(cm_id, service_id, service_mask);
1146 	spin_unlock_irqrestore(&cm.lock, flags);
1147 
1148 	return ret;
1149 }
1150 EXPORT_SYMBOL(ib_cm_listen);
1151 
1152 /**
1153  * Create a new listening ib_cm_id and listen on the given service ID.
1154  *
1155  * If there's an existing ID listening on that same device and service ID,
1156  * return it.
1157  *
1158  * @device: Device associated with the cm_id.  All related communication will
1159  * be associated with the specified device.
1160  * @cm_handler: Callback invoked to notify the user of CM events.
1161  * @service_id: Service identifier matched against incoming connection
1162  *   and service ID resolution requests.  The service ID should be specified
1163  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1164  *   assign a service ID to the caller.
1165  *
1166  * Callers should call ib_destroy_cm_id when done with the listener ID.
1167  */
1168 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1169 				     ib_cm_handler cm_handler,
1170 				     __be64 service_id)
1171 {
1172 	struct cm_id_private *cm_id_priv;
1173 	struct ib_cm_id *cm_id;
1174 	unsigned long flags;
1175 	int err = 0;
1176 
1177 	/* Create an ID in advance, since the creation may sleep */
1178 	cm_id = ib_create_cm_id(device, cm_handler, NULL);
1179 	if (IS_ERR(cm_id))
1180 		return cm_id;
1181 
1182 	spin_lock_irqsave(&cm.lock, flags);
1183 
1184 	if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1185 		goto new_id;
1186 
1187 	/* Find an existing ID */
1188 	cm_id_priv = cm_find_listen(device, service_id);
1189 	if (cm_id_priv) {
1190 		if (cm_id->cm_handler != cm_handler || cm_id->context) {
1191 			/* Sharing an ib_cm_id with different handlers is not
1192 			 * supported */
1193 			spin_unlock_irqrestore(&cm.lock, flags);
1194 			ib_destroy_cm_id(cm_id);
1195 			return ERR_PTR(-EINVAL);
1196 		}
1197 		refcount_inc(&cm_id_priv->refcount);
1198 		++cm_id_priv->listen_sharecount;
1199 		spin_unlock_irqrestore(&cm.lock, flags);
1200 
1201 		ib_destroy_cm_id(cm_id);
1202 		cm_id = &cm_id_priv->id;
1203 		return cm_id;
1204 	}
1205 
1206 new_id:
1207 	/* Use newly created ID */
1208 	err = __ib_cm_listen(cm_id, service_id, 0);
1209 
1210 	spin_unlock_irqrestore(&cm.lock, flags);
1211 
1212 	if (err) {
1213 		ib_destroy_cm_id(cm_id);
1214 		return ERR_PTR(err);
1215 	}
1216 	return cm_id;
1217 }
1218 EXPORT_SYMBOL(ib_cm_insert_listen);
1219 
1220 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
1221 {
1222 	u64 hi_tid, low_tid;
1223 
1224 	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1225 	low_tid  = (u64)cm_id_priv->id.local_id;
1226 	return cpu_to_be64(hi_tid | low_tid);
1227 }
1228 
1229 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1230 			      __be16 attr_id, __be64 tid)
1231 {
1232 	hdr->base_version  = IB_MGMT_BASE_VERSION;
1233 	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
1234 	hdr->class_version = IB_CM_CLASS_VERSION;
1235 	hdr->method	   = IB_MGMT_METHOD_SEND;
1236 	hdr->attr_id	   = attr_id;
1237 	hdr->tid	   = tid;
1238 }
1239 
1240 static void cm_format_req(struct cm_req_msg *req_msg,
1241 			  struct cm_id_private *cm_id_priv,
1242 			  struct ib_cm_req_param *param)
1243 {
1244 	struct sa_path_rec *pri_path = param->primary_path;
1245 	struct sa_path_rec *alt_path = param->alternate_path;
1246 	bool pri_ext = false;
1247 
1248 	if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
1249 		pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
1250 					      pri_path->opa.slid);
1251 
1252 	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1253 			  cm_form_tid(cm_id_priv));
1254 
1255 	IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
1256 		be32_to_cpu(cm_id_priv->id.local_id));
1257 	IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
1258 	IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
1259 		be64_to_cpu(cm_id_priv->id.device->node_guid));
1260 	IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
1261 	IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
1262 	IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
1263 		param->remote_cm_response_timeout);
1264 	cm_req_set_qp_type(req_msg, param->qp_type);
1265 	IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
1266 	IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
1267 	IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
1268 		param->local_cm_response_timeout);
1269 	IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
1270 		be16_to_cpu(param->primary_path->pkey));
1271 	IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
1272 		param->primary_path->mtu);
1273 	IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);
1274 
1275 	if (param->qp_type != IB_QPT_XRC_INI) {
1276 		IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
1277 			param->responder_resources);
1278 		IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
1279 		IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
1280 			param->rnr_retry_count);
1281 		IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
1282 	}
1283 
1284 	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
1285 		pri_path->sgid;
1286 	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
1287 		pri_path->dgid;
1288 	if (pri_ext) {
1289 		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
1290 			->global.interface_id =
1291 			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
1292 		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
1293 			->global.interface_id =
1294 			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
1295 	}
1296 	if (pri_path->hop_limit <= 1) {
1297 		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
1298 			be16_to_cpu(pri_ext ? 0 :
1299 					      htons(ntohl(sa_path_get_slid(
1300 						      pri_path)))));
1301 		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
1302 			be16_to_cpu(pri_ext ? 0 :
1303 					      htons(ntohl(sa_path_get_dlid(
1304 						      pri_path)))));
1305 	} else {
1306 		/* Work-around until there's a way to obtain remote LID info */
1307 		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
1308 			be16_to_cpu(IB_LID_PERMISSIVE));
1309 		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
1310 			be16_to_cpu(IB_LID_PERMISSIVE));
1311 	}
1312 	IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
1313 		be32_to_cpu(pri_path->flow_label));
1314 	IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
1315 	IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
1316 	IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
1317 	IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
1318 	IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
1319 		(pri_path->hop_limit <= 1));
1320 	IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
1321 		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1322 			       pri_path->packet_life_time));
1323 
1324 	if (alt_path) {
1325 		bool alt_ext = false;
1326 
1327 		if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
1328 			alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
1329 						      alt_path->opa.slid);
1330 
1331 		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
1332 			alt_path->sgid;
1333 		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
1334 			alt_path->dgid;
1335 		if (alt_ext) {
1336 			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
1337 					req_msg)
1338 				->global.interface_id =
1339 				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
1340 			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
1341 					req_msg)
1342 				->global.interface_id =
1343 				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
1344 		}
1345 		if (alt_path->hop_limit <= 1) {
1346 			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
1347 				be16_to_cpu(
1348 					alt_ext ? 0 :
1349 						  htons(ntohl(sa_path_get_slid(
1350 							  alt_path)))));
1351 			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
1352 				be16_to_cpu(
1353 					alt_ext ? 0 :
1354 						  htons(ntohl(sa_path_get_dlid(
1355 							  alt_path)))));
1356 		} else {
1357 			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
1358 				be16_to_cpu(IB_LID_PERMISSIVE));
1359 			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
1360 				be16_to_cpu(IB_LID_PERMISSIVE));
1361 		}
1362 		IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
1363 			be32_to_cpu(alt_path->flow_label));
1364 		IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
1365 		IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
1366 			alt_path->traffic_class);
1367 		IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
1368 			alt_path->hop_limit);
1369 		IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
1370 		IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
1371 			(alt_path->hop_limit <= 1));
1372 		IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
1373 			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1374 				       alt_path->packet_life_time));
1375 	}
1376 
1377 	if (param->private_data && param->private_data_len)
1378 		IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
1379 			    param->private_data_len);
1380 }
1381 
1382 static int cm_validate_req_param(struct ib_cm_req_param *param)
1383 {
1384 	/* peer-to-peer not supported */
1385 	if (param->peer_to_peer)
1386 		return -EINVAL;
1387 
1388 	if (!param->primary_path)
1389 		return -EINVAL;
1390 
1391 	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1392 	    param->qp_type != IB_QPT_XRC_INI)
1393 		return -EINVAL;
1394 
1395 	if (param->private_data &&
1396 	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1397 		return -EINVAL;
1398 
1399 	if (param->alternate_path &&
1400 	    (param->alternate_path->pkey != param->primary_path->pkey ||
1401 	     param->alternate_path->mtu != param->primary_path->mtu))
1402 		return -EINVAL;
1403 
1404 	return 0;
1405 }
1406 
1407 int ib_send_cm_req(struct ib_cm_id *cm_id,
1408 		   struct ib_cm_req_param *param)
1409 {
1410 	struct cm_id_private *cm_id_priv;
1411 	struct cm_req_msg *req_msg;
1412 	unsigned long flags;
1413 	int ret;
1414 
1415 	ret = cm_validate_req_param(param);
1416 	if (ret)
1417 		return ret;
1418 
1419 	/* Verify that we're not in timewait. */
1420 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1421 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1422 	if (cm_id->state != IB_CM_IDLE) {
1423 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1424 		ret = -EINVAL;
1425 		goto out;
1426 	}
1427 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1428 
1429 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1430 							    id.local_id);
1431 	if (IS_ERR(cm_id_priv->timewait_info)) {
1432 		ret = PTR_ERR(cm_id_priv->timewait_info);
1433 		goto out;
1434 	}
1435 
1436 	ret = cm_init_av_by_path(param->primary_path,
1437 				 param->ppath_sgid_attr, &cm_id_priv->av,
1438 				 cm_id_priv);
1439 	if (ret)
1440 		goto error1;
1441 	if (param->alternate_path) {
1442 		ret = cm_init_av_by_path(param->alternate_path, NULL,
1443 					 &cm_id_priv->alt_av, cm_id_priv);
1444 		if (ret)
1445 			goto error1;
1446 	}
1447 	cm_id->service_id = param->service_id;
1448 	cm_id->service_mask = ~cpu_to_be64(0);
1449 	cm_id_priv->timeout_ms = cm_convert_to_ms(
1450 				    param->primary_path->packet_life_time) * 2 +
1451 				 cm_convert_to_ms(
1452 				    param->remote_cm_response_timeout);
1453 	cm_id_priv->max_cm_retries = param->max_cm_retries;
1454 	cm_id_priv->initiator_depth = param->initiator_depth;
1455 	cm_id_priv->responder_resources = param->responder_resources;
1456 	cm_id_priv->retry_count = param->retry_count;
1457 	cm_id_priv->path_mtu = param->primary_path->mtu;
1458 	cm_id_priv->pkey = param->primary_path->pkey;
1459 	cm_id_priv->qp_type = param->qp_type;
1460 
1461 	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1462 	if (ret)
1463 		goto error1;
1464 
1465 	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1466 	cm_format_req(req_msg, cm_id_priv, param);
1467 	cm_id_priv->tid = req_msg->hdr.tid;
1468 	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1469 	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1470 
1471 	cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
1472 	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
1473 
1474 	spin_lock_irqsave(&cm_id_priv->lock, flags);
1475 	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1476 	if (ret) {
1477 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1478 		goto error2;
1479 	}
1480 	BUG_ON(cm_id->state != IB_CM_IDLE);
1481 	cm_id->state = IB_CM_REQ_SENT;
1482 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1483 	return 0;
1484 
1485 error2:	cm_free_msg(cm_id_priv->msg);
1486 error1:	kfree(cm_id_priv->timewait_info);
1487 out:	return ret;
1488 }
1489 EXPORT_SYMBOL(ib_send_cm_req);
1490 
1491 static int cm_issue_rej(struct cm_port *port,
1492 			struct ib_mad_recv_wc *mad_recv_wc,
1493 			enum ib_cm_rej_reason reason,
1494 			enum cm_msg_response msg_rejected,
1495 			void *ari, u8 ari_length)
1496 {
1497 	struct ib_mad_send_buf *msg = NULL;
1498 	struct cm_rej_msg *rej_msg, *rcv_msg;
1499 	int ret;
1500 
1501 	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1502 	if (ret)
1503 		return ret;
1504 
1505 	/* We just need common CM header information.  Cast to any message. */
1506 	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1507 	rej_msg = (struct cm_rej_msg *) msg->mad;
1508 
1509 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1510 	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
1511 		IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
1512 	IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1513 		IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
1514 	IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
1515 	IBA_SET(CM_REJ_REASON, rej_msg, reason);
1516 
1517 	if (ari && ari_length) {
1518 		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
1519 		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
1520 	}
1521 
1522 	ret = ib_post_send_mad(msg, NULL);
1523 	if (ret)
1524 		cm_free_msg(msg);
1525 
1526 	return ret;
1527 }
1528 
1529 static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
1530 {
1531 	return ((cpu_to_be16(
1532 			IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
1533 		(ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
1534 					       req_msg))));
1535 }
1536 
1537 static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
1538 				 struct sa_path_rec *path, union ib_gid *gid)
1539 {
1540 	if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
1541 		path->rec_type = SA_PATH_REC_TYPE_OPA;
1542 	else
1543 		path->rec_type = SA_PATH_REC_TYPE_IB;
1544 }
1545 
1546 static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
1547 					struct sa_path_rec *primary_path,
1548 					struct sa_path_rec *alt_path)
1549 {
1550 	u32 lid;
1551 
1552 	if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1553 		sa_path_set_dlid(primary_path,
1554 				 IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
1555 					 req_msg));
1556 		sa_path_set_slid(primary_path,
1557 				 IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
1558 					 req_msg));
1559 	} else {
1560 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1561 			CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
1562 		sa_path_set_dlid(primary_path, lid);
1563 
1564 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1565 			CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
1566 		sa_path_set_slid(primary_path, lid);
1567 	}
1568 
1569 	if (!cm_req_has_alt_path(req_msg))
1570 		return;
1571 
1572 	if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1573 		sa_path_set_dlid(alt_path,
1574 				 IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
1575 					 req_msg));
1576 		sa_path_set_slid(alt_path,
1577 				 IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
1578 					 req_msg));
1579 	} else {
1580 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1581 			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
1582 		sa_path_set_dlid(alt_path, lid);
1583 
1584 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
1585 			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
1586 		sa_path_set_slid(alt_path, lid);
1587 	}
1588 }
1589 
1590 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1591 				     struct sa_path_rec *primary_path,
1592 				     struct sa_path_rec *alt_path)
1593 {
1594 	primary_path->dgid =
1595 		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
1596 	primary_path->sgid =
1597 		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
1598 	primary_path->flow_label =
1599 		cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
1600 	primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
1601 	primary_path->traffic_class =
1602 		IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
1603 	primary_path->reversible = 1;
1604 	primary_path->pkey =
1605 		cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
1606 	primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
1607 	primary_path->mtu_selector = IB_SA_EQ;
1608 	primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
1609 	primary_path->rate_selector = IB_SA_EQ;
1610 	primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
1611 	primary_path->packet_life_time_selector = IB_SA_EQ;
1612 	primary_path->packet_life_time =
1613 		IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
1614 	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1615 	primary_path->service_id =
1616 		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
1617 	if (sa_path_is_roce(primary_path))
1618 		primary_path->roce.route_resolved = false;
1619 
1620 	if (cm_req_has_alt_path(req_msg)) {
1621 		alt_path->dgid = *IBA_GET_MEM_PTR(
1622 			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
1623 		alt_path->sgid = *IBA_GET_MEM_PTR(
1624 			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
1625 		alt_path->flow_label = cpu_to_be32(
1626 			IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
1627 		alt_path->hop_limit =
1628 			IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
1629 		alt_path->traffic_class =
1630 			IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
1631 		alt_path->reversible = 1;
1632 		alt_path->pkey =
1633 			cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
1634 		alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
1635 		alt_path->mtu_selector = IB_SA_EQ;
1636 		alt_path->mtu =
1637 			IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
1638 		alt_path->rate_selector = IB_SA_EQ;
1639 		alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
1640 		alt_path->packet_life_time_selector = IB_SA_EQ;
1641 		alt_path->packet_life_time =
1642 			IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
1643 		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1644 		alt_path->service_id =
1645 			cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
1646 
1647 		if (sa_path_is_roce(alt_path))
1648 			alt_path->roce.route_resolved = false;
1649 	}
1650 	cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
1651 }
1652 
1653 static u16 cm_get_bth_pkey(struct cm_work *work)
1654 {
1655 	struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1656 	u8 port_num = work->port->port_num;
1657 	u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1658 	u16 pkey;
1659 	int ret;
1660 
1661 	ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1662 	if (ret) {
1663 		dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1664 				     port_num, pkey_index, ret);
1665 		return 0;
1666 	}
1667 
1668 	return pkey;
1669 }
1670 
1671 /**
1672  * Convert OPA SGID to IB SGID
1673  * ULPs (such as IPoIB) do not understand OPA GIDs and will
1674  * reject them as the local_gid will not match the sgid. Therefore,
1675  * change the pathrec's SGID to an IB SGID.
1676  *
1677  * @work: Work completion
1678  * @path: Path record
1679  */
1680 static void cm_opa_to_ib_sgid(struct cm_work *work,
1681 			      struct sa_path_rec *path)
1682 {
1683 	struct ib_device *dev = work->port->cm_dev->ib_device;
1684 	u8 port_num = work->port->port_num;
1685 
1686 	if (rdma_cap_opa_ah(dev, port_num) &&
1687 	    (ib_is_opa_gid(&path->sgid))) {
1688 		union ib_gid sgid;
1689 
1690 		if (rdma_query_gid(dev, port_num, 0, &sgid)) {
1691 			dev_warn(&dev->dev,
1692 				 "Error updating sgid in CM request\n");
1693 			return;
1694 		}
1695 
1696 		path->sgid = sgid;
1697 	}
1698 }
1699 
1700 static void cm_format_req_event(struct cm_work *work,
1701 				struct cm_id_private *cm_id_priv,
1702 				struct ib_cm_id *listen_id)
1703 {
1704 	struct cm_req_msg *req_msg;
1705 	struct ib_cm_req_event_param *param;
1706 
1707 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1708 	param = &work->cm_event.param.req_rcvd;
1709 	param->listen_id = listen_id;
1710 	param->bth_pkey = cm_get_bth_pkey(work);
1711 	param->port = cm_id_priv->av.port->port_num;
1712 	param->primary_path = &work->path[0];
1713 	cm_opa_to_ib_sgid(work, param->primary_path);
1714 	if (cm_req_has_alt_path(req_msg)) {
1715 		param->alternate_path = &work->path[1];
1716 		cm_opa_to_ib_sgid(work, param->alternate_path);
1717 	} else {
1718 		param->alternate_path = NULL;
1719 	}
1720 	param->remote_ca_guid =
1721 		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
1722 	param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
1723 	param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
1724 	param->qp_type = cm_req_get_qp_type(req_msg);
1725 	param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
1726 	param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
1727 	param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
1728 	param->local_cm_response_timeout =
1729 		IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
1730 	param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
1731 	param->remote_cm_response_timeout =
1732 		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
1733 	param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
1734 	param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
1735 	param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
1736 	param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
1737 	work->cm_event.private_data =
1738 		IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
1739 }
1740 
1741 static void cm_process_work(struct cm_id_private *cm_id_priv,
1742 			    struct cm_work *work)
1743 {
1744 	int ret;
1745 
1746 	/* We will typically only have the current event to report. */
1747 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1748 	cm_free_work(work);
1749 
1750 	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1751 		spin_lock_irq(&cm_id_priv->lock);
1752 		work = cm_dequeue_work(cm_id_priv);
1753 		spin_unlock_irq(&cm_id_priv->lock);
1754 		if (!work)
1755 			return;
1756 
1757 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1758 						&work->cm_event);
1759 		cm_free_work(work);
1760 	}
1761 	cm_deref_id(cm_id_priv);
1762 	if (ret)
1763 		cm_destroy_id(&cm_id_priv->id, ret);
1764 }
1765 
1766 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1767 			  struct cm_id_private *cm_id_priv,
1768 			  enum cm_msg_response msg_mraed, u8 service_timeout,
1769 			  const void *private_data, u8 private_data_len)
1770 {
1771 	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1772 	IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
1773 	IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
1774 		be32_to_cpu(cm_id_priv->id.local_id));
1775 	IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
1776 		be32_to_cpu(cm_id_priv->id.remote_id));
1777 	IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout);
1778 
1779 	if (private_data && private_data_len)
1780 		IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
1781 			    private_data_len);
1782 }
1783 
1784 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1785 			  struct cm_id_private *cm_id_priv,
1786 			  enum ib_cm_rej_reason reason,
1787 			  void *ari,
1788 			  u8 ari_length,
1789 			  const void *private_data,
1790 			  u8 private_data_len)
1791 {
1792 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1793 	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
1794 		be32_to_cpu(cm_id_priv->id.remote_id));
1795 
1796 	switch(cm_id_priv->id.state) {
1797 	case IB_CM_REQ_RCVD:
1798 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
1799 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
1800 		break;
1801 	case IB_CM_MRA_REQ_SENT:
1802 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1803 			be32_to_cpu(cm_id_priv->id.local_id));
1804 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
1805 		break;
1806 	case IB_CM_REP_RCVD:
1807 	case IB_CM_MRA_REP_SENT:
1808 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1809 			be32_to_cpu(cm_id_priv->id.local_id));
1810 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
1811 		break;
1812 	default:
1813 		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
1814 			be32_to_cpu(cm_id_priv->id.local_id));
1815 		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
1816 			CM_MSG_RESPONSE_OTHER);
1817 		break;
1818 	}
1819 
1820 	IBA_SET(CM_REJ_REASON, rej_msg, reason);
1821 	if (ari && ari_length) {
1822 		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
1823 		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
1824 	}
1825 
1826 	if (private_data && private_data_len)
1827 		IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
1828 			    private_data_len);
1829 }
1830 
1831 static void cm_dup_req_handler(struct cm_work *work,
1832 			       struct cm_id_private *cm_id_priv)
1833 {
1834 	struct ib_mad_send_buf *msg = NULL;
1835 	int ret;
1836 
1837 	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1838 			counter[CM_REQ_COUNTER]);
1839 
1840 	/* Quick state check to discard duplicate REQs. */
1841 	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1842 		return;
1843 
1844 	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1845 	if (ret)
1846 		return;
1847 
1848 	spin_lock_irq(&cm_id_priv->lock);
1849 	switch (cm_id_priv->id.state) {
1850 	case IB_CM_MRA_REQ_SENT:
1851 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1852 			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1853 			      cm_id_priv->private_data,
1854 			      cm_id_priv->private_data_len);
1855 		break;
1856 	case IB_CM_TIMEWAIT:
1857 		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1858 			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1859 		break;
1860 	default:
1861 		goto unlock;
1862 	}
1863 	spin_unlock_irq(&cm_id_priv->lock);
1864 
1865 	ret = ib_post_send_mad(msg, NULL);
1866 	if (ret)
1867 		goto free;
1868 	return;
1869 
1870 unlock:	spin_unlock_irq(&cm_id_priv->lock);
1871 free:	cm_free_msg(msg);
1872 }
1873 
1874 static struct cm_id_private * cm_match_req(struct cm_work *work,
1875 					   struct cm_id_private *cm_id_priv)
1876 {
1877 	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1878 	struct cm_timewait_info *timewait_info;
1879 	struct cm_req_msg *req_msg;
1880 	struct ib_cm_id *cm_id;
1881 
1882 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1883 
1884 	/* Check for possible duplicate REQ. */
1885 	spin_lock_irq(&cm.lock);
1886 	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1887 	if (timewait_info) {
1888 		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
1889 					   timewait_info->work.remote_id);
1890 		spin_unlock_irq(&cm.lock);
1891 		if (cur_cm_id_priv) {
1892 			cm_dup_req_handler(work, cur_cm_id_priv);
1893 			cm_deref_id(cur_cm_id_priv);
1894 		}
1895 		return NULL;
1896 	}
1897 
1898 	/* Check for stale connections. */
1899 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1900 	if (timewait_info) {
1901 		cm_cleanup_timewait(cm_id_priv->timewait_info);
1902 		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
1903 					   timewait_info->work.remote_id);
1904 
1905 		spin_unlock_irq(&cm.lock);
1906 		cm_issue_rej(work->port, work->mad_recv_wc,
1907 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1908 			     NULL, 0);
1909 		if (cur_cm_id_priv) {
1910 			cm_id = &cur_cm_id_priv->id;
1911 			ib_send_cm_dreq(cm_id, NULL, 0);
1912 			cm_deref_id(cur_cm_id_priv);
1913 		}
1914 		return NULL;
1915 	}
1916 
1917 	/* Find matching listen request. */
1918 	listen_cm_id_priv = cm_find_listen(
1919 		cm_id_priv->id.device,
1920 		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
1921 	if (!listen_cm_id_priv) {
1922 		cm_cleanup_timewait(cm_id_priv->timewait_info);
1923 		spin_unlock_irq(&cm.lock);
1924 		cm_issue_rej(work->port, work->mad_recv_wc,
1925 			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1926 			     NULL, 0);
1927 		goto out;
1928 	}
1929 	refcount_inc(&listen_cm_id_priv->refcount);
1930 	refcount_inc(&cm_id_priv->refcount);
1931 	cm_id_priv->id.state = IB_CM_REQ_RCVD;
1932 	atomic_inc(&cm_id_priv->work_count);
1933 	spin_unlock_irq(&cm.lock);
1934 out:
1935 	return listen_cm_id_priv;
1936 }
1937 
1938 /*
1939  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1940  * we need to override the LID/SL data in the REQ with the LID information
1941  * in the work completion.
1942  */
1943 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1944 {
1945 	if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
1946 		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
1947 					req_msg)) == IB_LID_PERMISSIVE) {
1948 			IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
1949 				be16_to_cpu(ib_lid_be16(wc->slid)));
1950 			IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
1951 		}
1952 
1953 		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
1954 					req_msg)) == IB_LID_PERMISSIVE)
1955 			IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
1956 				wc->dlid_path_bits);
1957 	}
1958 
1959 	if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
1960 		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
1961 					req_msg)) == IB_LID_PERMISSIVE) {
1962 			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
1963 				be16_to_cpu(ib_lid_be16(wc->slid)));
1964 			IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
1965 		}
1966 
1967 		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
1968 					req_msg)) == IB_LID_PERMISSIVE)
1969 			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
1970 				wc->dlid_path_bits);
1971 	}
1972 }
1973 
1974 static int cm_req_handler(struct cm_work *work)
1975 {
1976 	struct ib_cm_id *cm_id;
1977 	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1978 	struct cm_req_msg *req_msg;
1979 	const struct ib_global_route *grh;
1980 	const struct ib_gid_attr *gid_attr;
1981 	int ret;
1982 
1983 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1984 
1985 	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1986 	if (IS_ERR(cm_id))
1987 		return PTR_ERR(cm_id);
1988 
1989 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1990 	cm_id_priv->id.remote_id =
1991 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
1992 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1993 				      work->mad_recv_wc->recv_buf.grh,
1994 				      &cm_id_priv->av);
1995 	if (ret)
1996 		goto destroy;
1997 	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1998 							    id.local_id);
1999 	if (IS_ERR(cm_id_priv->timewait_info)) {
2000 		ret = PTR_ERR(cm_id_priv->timewait_info);
2001 		goto destroy;
2002 	}
2003 	cm_id_priv->timewait_info->work.remote_id =
2004 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
2005 	cm_id_priv->timewait_info->remote_ca_guid =
2006 		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
2007 	cm_id_priv->timewait_info->remote_qpn =
2008 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
2009 
2010 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
2011 	if (!listen_cm_id_priv) {
2012 		pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
2013 			 be32_to_cpu(cm_id->local_id));
2014 		ret = -EINVAL;
2015 		goto free_timeinfo;
2016 	}
2017 
2018 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
2019 	cm_id_priv->id.context = listen_cm_id_priv->id.context;
2020 	cm_id_priv->id.service_id =
2021 		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
2022 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
2023 
2024 	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
2025 
2026 	memset(&work->path[0], 0, sizeof(work->path[0]));
2027 	if (cm_req_has_alt_path(req_msg))
2028 		memset(&work->path[1], 0, sizeof(work->path[1]));
2029 	grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
2030 	gid_attr = grh->sgid_attr;
2031 
2032 	if (gid_attr &&
2033 	    rdma_protocol_roce(work->port->cm_dev->ib_device,
2034 			       work->port->port_num)) {
2035 		work->path[0].rec_type =
2036 			sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
2037 	} else {
2038 		cm_path_set_rec_type(
2039 			work->port->cm_dev->ib_device, work->port->port_num,
2040 			&work->path[0],
2041 			IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
2042 					req_msg));
2043 	}
2044 	if (cm_req_has_alt_path(req_msg))
2045 		work->path[1].rec_type = work->path[0].rec_type;
2046 	cm_format_paths_from_req(req_msg, &work->path[0],
2047 				 &work->path[1]);
2048 	if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
2049 		sa_path_set_dmac(&work->path[0],
2050 				 cm_id_priv->av.ah_attr.roce.dmac);
2051 	work->path[0].hop_limit = grh->hop_limit;
2052 	ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
2053 				 cm_id_priv);
2054 	if (ret) {
2055 		int err;
2056 
2057 		err = rdma_query_gid(work->port->cm_dev->ib_device,
2058 				     work->port->port_num, 0,
2059 				     &work->path[0].sgid);
2060 		if (err)
2061 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2062 				       NULL, 0, NULL, 0);
2063 		else
2064 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2065 				       &work->path[0].sgid,
2066 				       sizeof(work->path[0].sgid),
2067 				       NULL, 0);
2068 		goto rejected;
2069 	}
2070 	if (cm_req_has_alt_path(req_msg)) {
2071 		ret = cm_init_av_by_path(&work->path[1], NULL,
2072 					 &cm_id_priv->alt_av, cm_id_priv);
2073 		if (ret) {
2074 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
2075 				       &work->path[0].sgid,
2076 				       sizeof(work->path[0].sgid), NULL, 0);
2077 			goto rejected;
2078 		}
2079 	}
2080 	cm_id_priv->tid = req_msg->hdr.tid;
2081 	cm_id_priv->timeout_ms = cm_convert_to_ms(
2082 		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
2083 	cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
2084 	cm_id_priv->remote_qpn =
2085 		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
2086 	cm_id_priv->initiator_depth =
2087 		IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
2088 	cm_id_priv->responder_resources =
2089 		IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
2090 	cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
2091 	cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
2092 	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
2093 	cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
2094 	cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
2095 	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
2096 
2097 	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
2098 	cm_process_work(cm_id_priv, work);
2099 	cm_deref_id(listen_cm_id_priv);
2100 	return 0;
2101 
2102 rejected:
2103 	refcount_dec(&cm_id_priv->refcount);
2104 	cm_deref_id(listen_cm_id_priv);
2105 free_timeinfo:
2106 	kfree(cm_id_priv->timewait_info);
2107 destroy:
2108 	ib_destroy_cm_id(cm_id);
2109 	return ret;
2110 }
2111 
2112 static void cm_format_rep(struct cm_rep_msg *rep_msg,
2113 			  struct cm_id_private *cm_id_priv,
2114 			  struct ib_cm_rep_param *param)
2115 {
2116 	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
2117 	IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
2118 		be32_to_cpu(cm_id_priv->id.local_id));
2119 	IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
2120 		be32_to_cpu(cm_id_priv->id.remote_id));
2121 	IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
2122 	IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
2123 		param->responder_resources);
2124 	IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
2125 		cm_id_priv->av.port->cm_dev->ack_delay);
2126 	IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
2127 	IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
2128 	IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
2129 		be64_to_cpu(cm_id_priv->id.device->node_guid));
2130 
2131 	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
2132 		IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg,
2133 			param->initiator_depth);
2134 		IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg,
2135 			param->flow_control);
2136 		IBA_SET(CM_REP_SRQ, rep_msg, param->srq);
2137 		IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num);
2138 	} else {
2139 		IBA_SET(CM_REP_SRQ, rep_msg, 1);
2140 		IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
2141 	}
2142 
2143 	if (param->private_data && param->private_data_len)
2144 		IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
2145 			    param->private_data_len);
2146 }
2147 
2148 int ib_send_cm_rep(struct ib_cm_id *cm_id,
2149 		   struct ib_cm_rep_param *param)
2150 {
2151 	struct cm_id_private *cm_id_priv;
2152 	struct ib_mad_send_buf *msg;
2153 	struct cm_rep_msg *rep_msg;
2154 	unsigned long flags;
2155 	int ret;
2156 
2157 	if (param->private_data &&
2158 	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
2159 		return -EINVAL;
2160 
2161 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2162 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2163 	if (cm_id->state != IB_CM_REQ_RCVD &&
2164 	    cm_id->state != IB_CM_MRA_REQ_SENT) {
2165 		pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
2166 			 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2167 		ret = -EINVAL;
2168 		goto out;
2169 	}
2170 
2171 	ret = cm_alloc_msg(cm_id_priv, &msg);
2172 	if (ret)
2173 		goto out;
2174 
2175 	rep_msg = (struct cm_rep_msg *) msg->mad;
2176 	cm_format_rep(rep_msg, cm_id_priv, param);
2177 	msg->timeout_ms = cm_id_priv->timeout_ms;
2178 	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
2179 
2180 	ret = ib_post_send_mad(msg, NULL);
2181 	if (ret) {
2182 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2183 		cm_free_msg(msg);
2184 		return ret;
2185 	}
2186 
2187 	cm_id->state = IB_CM_REP_SENT;
2188 	cm_id_priv->msg = msg;
2189 	cm_id_priv->initiator_depth = param->initiator_depth;
2190 	cm_id_priv->responder_resources = param->responder_resources;
2191 	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
2192 	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
2193 
2194 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2195 	return ret;
2196 }
2197 EXPORT_SYMBOL(ib_send_cm_rep);
2198 
2199 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
2200 			  struct cm_id_private *cm_id_priv,
2201 			  const void *private_data,
2202 			  u8 private_data_len)
2203 {
2204 	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
2205 	IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg,
2206 		be32_to_cpu(cm_id_priv->id.local_id));
2207 	IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg,
2208 		be32_to_cpu(cm_id_priv->id.remote_id));
2209 
2210 	if (private_data && private_data_len)
2211 		IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data,
2212 			    private_data_len);
2213 }
2214 
2215 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
2216 		   const void *private_data,
2217 		   u8 private_data_len)
2218 {
2219 	struct cm_id_private *cm_id_priv;
2220 	struct ib_mad_send_buf *msg;
2221 	unsigned long flags;
2222 	void *data;
2223 	int ret;
2224 
2225 	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
2226 		return -EINVAL;
2227 
2228 	data = cm_copy_private_data(private_data, private_data_len);
2229 	if (IS_ERR(data))
2230 		return PTR_ERR(data);
2231 
2232 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2233 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2234 	if (cm_id->state != IB_CM_REP_RCVD &&
2235 	    cm_id->state != IB_CM_MRA_REP_SENT) {
2236 		pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
2237 			 be32_to_cpu(cm_id->local_id), cm_id->state);
2238 		ret = -EINVAL;
2239 		goto error;
2240 	}
2241 
2242 	ret = cm_alloc_msg(cm_id_priv, &msg);
2243 	if (ret)
2244 		goto error;
2245 
2246 	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2247 		      private_data, private_data_len);
2248 
2249 	ret = ib_post_send_mad(msg, NULL);
2250 	if (ret) {
2251 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2252 		cm_free_msg(msg);
2253 		kfree(data);
2254 		return ret;
2255 	}
2256 
2257 	cm_id->state = IB_CM_ESTABLISHED;
2258 	cm_set_private_data(cm_id_priv, data, private_data_len);
2259 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2260 	return 0;
2261 
2262 error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2263 	kfree(data);
2264 	return ret;
2265 }
2266 EXPORT_SYMBOL(ib_send_cm_rtu);
2267 
2268 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
2269 {
2270 	struct cm_rep_msg *rep_msg;
2271 	struct ib_cm_rep_event_param *param;
2272 
2273 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2274 	param = &work->cm_event.param.rep_rcvd;
2275 	param->remote_ca_guid =
2276 		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
2277 	param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg);
2278 	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
2279 	param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg);
2280 	param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
2281 	param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
2282 	param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
2283 	param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg);
2284 	param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
2285 	param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
2286 	param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
2287 	work->cm_event.private_data =
2288 		IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
2289 }
2290 
2291 static void cm_dup_rep_handler(struct cm_work *work)
2292 {
2293 	struct cm_id_private *cm_id_priv;
2294 	struct cm_rep_msg *rep_msg;
2295 	struct ib_mad_send_buf *msg = NULL;
2296 	int ret;
2297 
2298 	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
2299 	cm_id_priv = cm_acquire_id(
2300 		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)),
2301 		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)));
2302 	if (!cm_id_priv)
2303 		return;
2304 
2305 	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2306 			counter[CM_REP_COUNTER]);
2307 	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
2308 	if (ret)
2309 		goto deref;
2310 
2311 	spin_lock_irq(&cm_id_priv->lock);
2312 	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
2313 		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2314 			      cm_id_priv->private_data,
2315 			      cm_id_priv->private_data_len);
2316 	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
2317 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2318 			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
2319 			      cm_id_priv->private_data,
2320 			      cm_id_priv->private_data_len);
2321 	else
2322 		goto unlock;
2323 	spin_unlock_irq(&cm_id_priv->lock);
2324 
2325 	ret = ib_post_send_mad(msg, NULL);
2326 	if (ret)
2327 		goto free;
2328 	goto deref;
2329 
2330 unlock:	spin_unlock_irq(&cm_id_priv->lock);
2331 free:	cm_free_msg(msg);
2332 deref:	cm_deref_id(cm_id_priv);
2333 }
2334 
2335 static int cm_rep_handler(struct cm_work *work)
2336 {
2337 	struct cm_id_private *cm_id_priv;
2338 	struct cm_rep_msg *rep_msg;
2339 	int ret;
2340 	struct cm_id_private *cur_cm_id_priv;
2341 	struct ib_cm_id *cm_id;
2342 	struct cm_timewait_info *timewait_info;
2343 
2344 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2345 	cm_id_priv = cm_acquire_id(
2346 		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
2347 	if (!cm_id_priv) {
2348 		cm_dup_rep_handler(work);
2349 		pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
2350 			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2351 		return -EINVAL;
2352 	}
2353 
2354 	cm_format_rep_event(work, cm_id_priv->qp_type);
2355 
2356 	spin_lock_irq(&cm_id_priv->lock);
2357 	switch (cm_id_priv->id.state) {
2358 	case IB_CM_REQ_SENT:
2359 	case IB_CM_MRA_REQ_RCVD:
2360 		break;
2361 	default:
2362 		spin_unlock_irq(&cm_id_priv->lock);
2363 		ret = -EINVAL;
2364 		pr_debug(
2365 			"%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
2366 			__func__, cm_id_priv->id.state,
2367 			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
2368 			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2369 		goto error;
2370 	}
2371 
2372 	cm_id_priv->timewait_info->work.remote_id =
2373 		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
2374 	cm_id_priv->timewait_info->remote_ca_guid =
2375 		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
2376 	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2377 
2378 	spin_lock(&cm.lock);
2379 	/* Check for duplicate REP. */
2380 	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2381 		spin_unlock(&cm.lock);
2382 		spin_unlock_irq(&cm_id_priv->lock);
2383 		ret = -EINVAL;
2384 		pr_debug("%s: Failed to insert remote id %d\n", __func__,
2385 			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2386 		goto error;
2387 	}
2388 	/* Check for a stale connection. */
2389 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
2390 	if (timewait_info) {
2391 		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2392 			 &cm.remote_id_table);
2393 		cm_id_priv->timewait_info->inserted_remote_id = 0;
2394 		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2395 					   timewait_info->work.remote_id);
2396 
2397 		spin_unlock(&cm.lock);
2398 		spin_unlock_irq(&cm_id_priv->lock);
2399 		cm_issue_rej(work->port, work->mad_recv_wc,
2400 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2401 			     NULL, 0);
2402 		ret = -EINVAL;
2403 		pr_debug(
2404 			"%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
2405 			__func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
2406 			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
2407 
2408 		if (cur_cm_id_priv) {
2409 			cm_id = &cur_cm_id_priv->id;
2410 			ib_send_cm_dreq(cm_id, NULL, 0);
2411 			cm_deref_id(cur_cm_id_priv);
2412 		}
2413 
2414 		goto error;
2415 	}
2416 	spin_unlock(&cm.lock);
2417 
2418 	cm_id_priv->id.state = IB_CM_REP_RCVD;
2419 	cm_id_priv->id.remote_id =
2420 		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
2421 	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2422 	cm_id_priv->initiator_depth =
2423 		IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
2424 	cm_id_priv->responder_resources =
2425 		IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
2426 	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
2427 	cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
2428 	cm_id_priv->target_ack_delay =
2429 		IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
2430 	cm_id_priv->av.timeout =
2431 			cm_ack_timeout(cm_id_priv->target_ack_delay,
2432 				       cm_id_priv->av.timeout - 1);
2433 	cm_id_priv->alt_av.timeout =
2434 			cm_ack_timeout(cm_id_priv->target_ack_delay,
2435 				       cm_id_priv->alt_av.timeout - 1);
2436 
2437 	/* todo: handle peer_to_peer */
2438 
2439 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2440 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2441 	if (!ret)
2442 		list_add_tail(&work->list, &cm_id_priv->work_list);
2443 	spin_unlock_irq(&cm_id_priv->lock);
2444 
2445 	if (ret)
2446 		cm_process_work(cm_id_priv, work);
2447 	else
2448 		cm_deref_id(cm_id_priv);
2449 	return 0;
2450 
2451 error:
2452 	cm_deref_id(cm_id_priv);
2453 	return ret;
2454 }
2455 
2456 static int cm_establish_handler(struct cm_work *work)
2457 {
2458 	struct cm_id_private *cm_id_priv;
2459 	int ret;
2460 
2461 	/* See comment in cm_establish about lookup. */
2462 	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2463 	if (!cm_id_priv)
2464 		return -EINVAL;
2465 
2466 	spin_lock_irq(&cm_id_priv->lock);
2467 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2468 		spin_unlock_irq(&cm_id_priv->lock);
2469 		goto out;
2470 	}
2471 
2472 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2473 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2474 	if (!ret)
2475 		list_add_tail(&work->list, &cm_id_priv->work_list);
2476 	spin_unlock_irq(&cm_id_priv->lock);
2477 
2478 	if (ret)
2479 		cm_process_work(cm_id_priv, work);
2480 	else
2481 		cm_deref_id(cm_id_priv);
2482 	return 0;
2483 out:
2484 	cm_deref_id(cm_id_priv);
2485 	return -EINVAL;
2486 }
2487 
2488 static int cm_rtu_handler(struct cm_work *work)
2489 {
2490 	struct cm_id_private *cm_id_priv;
2491 	struct cm_rtu_msg *rtu_msg;
2492 	int ret;
2493 
2494 	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2495 	cm_id_priv = cm_acquire_id(
2496 		cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)),
2497 		cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg)));
2498 	if (!cm_id_priv)
2499 		return -EINVAL;
2500 
2501 	work->cm_event.private_data =
2502 		IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg);
2503 
2504 	spin_lock_irq(&cm_id_priv->lock);
2505 	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2506 	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2507 		spin_unlock_irq(&cm_id_priv->lock);
2508 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2509 				counter[CM_RTU_COUNTER]);
2510 		goto out;
2511 	}
2512 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
2513 
2514 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2515 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2516 	if (!ret)
2517 		list_add_tail(&work->list, &cm_id_priv->work_list);
2518 	spin_unlock_irq(&cm_id_priv->lock);
2519 
2520 	if (ret)
2521 		cm_process_work(cm_id_priv, work);
2522 	else
2523 		cm_deref_id(cm_id_priv);
2524 	return 0;
2525 out:
2526 	cm_deref_id(cm_id_priv);
2527 	return -EINVAL;
2528 }
2529 
2530 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2531 			  struct cm_id_private *cm_id_priv,
2532 			  const void *private_data,
2533 			  u8 private_data_len)
2534 {
2535 	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2536 			  cm_form_tid(cm_id_priv));
2537 	IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg,
2538 		be32_to_cpu(cm_id_priv->id.local_id));
2539 	IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg,
2540 		be32_to_cpu(cm_id_priv->id.remote_id));
2541 	IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg,
2542 		be32_to_cpu(cm_id_priv->remote_qpn));
2543 
2544 	if (private_data && private_data_len)
2545 		IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data,
2546 			    private_data_len);
2547 }
2548 
2549 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2550 		    const void *private_data,
2551 		    u8 private_data_len)
2552 {
2553 	struct cm_id_private *cm_id_priv;
2554 	struct ib_mad_send_buf *msg;
2555 	unsigned long flags;
2556 	int ret;
2557 
2558 	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2559 		return -EINVAL;
2560 
2561 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2562 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2563 	if (cm_id->state != IB_CM_ESTABLISHED) {
2564 		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2565 			 be32_to_cpu(cm_id->local_id), cm_id->state);
2566 		ret = -EINVAL;
2567 		goto out;
2568 	}
2569 
2570 	if (cm_id->lap_state == IB_CM_LAP_SENT ||
2571 	    cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2572 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2573 
2574 	ret = cm_alloc_msg(cm_id_priv, &msg);
2575 	if (ret) {
2576 		cm_enter_timewait(cm_id_priv);
2577 		goto out;
2578 	}
2579 
2580 	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2581 		       private_data, private_data_len);
2582 	msg->timeout_ms = cm_id_priv->timeout_ms;
2583 	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2584 
2585 	ret = ib_post_send_mad(msg, NULL);
2586 	if (ret) {
2587 		cm_enter_timewait(cm_id_priv);
2588 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2589 		cm_free_msg(msg);
2590 		return ret;
2591 	}
2592 
2593 	cm_id->state = IB_CM_DREQ_SENT;
2594 	cm_id_priv->msg = msg;
2595 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2596 	return ret;
2597 }
2598 EXPORT_SYMBOL(ib_send_cm_dreq);
2599 
2600 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2601 			  struct cm_id_private *cm_id_priv,
2602 			  const void *private_data,
2603 			  u8 private_data_len)
2604 {
2605 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2606 	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
2607 		be32_to_cpu(cm_id_priv->id.local_id));
2608 	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
2609 		be32_to_cpu(cm_id_priv->id.remote_id));
2610 
2611 	if (private_data && private_data_len)
2612 		IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data,
2613 			    private_data_len);
2614 }
2615 
2616 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2617 		    const void *private_data,
2618 		    u8 private_data_len)
2619 {
2620 	struct cm_id_private *cm_id_priv;
2621 	struct ib_mad_send_buf *msg;
2622 	unsigned long flags;
2623 	void *data;
2624 	int ret;
2625 
2626 	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2627 		return -EINVAL;
2628 
2629 	data = cm_copy_private_data(private_data, private_data_len);
2630 	if (IS_ERR(data))
2631 		return PTR_ERR(data);
2632 
2633 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2634 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2635 	if (cm_id->state != IB_CM_DREQ_RCVD) {
2636 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2637 		kfree(data);
2638 		pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
2639 			 __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
2640 		return -EINVAL;
2641 	}
2642 
2643 	cm_set_private_data(cm_id_priv, data, private_data_len);
2644 	cm_enter_timewait(cm_id_priv);
2645 
2646 	ret = cm_alloc_msg(cm_id_priv, &msg);
2647 	if (ret)
2648 		goto out;
2649 
2650 	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2651 		       private_data, private_data_len);
2652 
2653 	ret = ib_post_send_mad(msg, NULL);
2654 	if (ret) {
2655 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2656 		cm_free_msg(msg);
2657 		return ret;
2658 	}
2659 
2660 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2661 	return ret;
2662 }
2663 EXPORT_SYMBOL(ib_send_cm_drep);
2664 
2665 static int cm_issue_drep(struct cm_port *port,
2666 			 struct ib_mad_recv_wc *mad_recv_wc)
2667 {
2668 	struct ib_mad_send_buf *msg = NULL;
2669 	struct cm_dreq_msg *dreq_msg;
2670 	struct cm_drep_msg *drep_msg;
2671 	int ret;
2672 
2673 	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2674 	if (ret)
2675 		return ret;
2676 
2677 	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2678 	drep_msg = (struct cm_drep_msg *) msg->mad;
2679 
2680 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2681 	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
2682 		IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg));
2683 	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
2684 		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
2685 
2686 	ret = ib_post_send_mad(msg, NULL);
2687 	if (ret)
2688 		cm_free_msg(msg);
2689 
2690 	return ret;
2691 }
2692 
2693 static int cm_dreq_handler(struct cm_work *work)
2694 {
2695 	struct cm_id_private *cm_id_priv;
2696 	struct cm_dreq_msg *dreq_msg;
2697 	struct ib_mad_send_buf *msg = NULL;
2698 	int ret;
2699 
2700 	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2701 	cm_id_priv = cm_acquire_id(
2702 		cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
2703 		cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
2704 	if (!cm_id_priv) {
2705 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2706 				counter[CM_DREQ_COUNTER]);
2707 		cm_issue_drep(work->port, work->mad_recv_wc);
2708 		pr_debug(
2709 			"%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
2710 			__func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
2711 			IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
2712 		return -EINVAL;
2713 	}
2714 
2715 	work->cm_event.private_data =
2716 		IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg);
2717 
2718 	spin_lock_irq(&cm_id_priv->lock);
2719 	if (cm_id_priv->local_qpn !=
2720 	    cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg)))
2721 		goto unlock;
2722 
2723 	switch (cm_id_priv->id.state) {
2724 	case IB_CM_REP_SENT:
2725 	case IB_CM_DREQ_SENT:
2726 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2727 		break;
2728 	case IB_CM_ESTABLISHED:
2729 		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2730 		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2731 			ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2732 		break;
2733 	case IB_CM_MRA_REP_RCVD:
2734 		break;
2735 	case IB_CM_TIMEWAIT:
2736 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2737 				counter[CM_DREQ_COUNTER]);
2738 		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2739 		if (IS_ERR(msg))
2740 			goto unlock;
2741 
2742 		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2743 			       cm_id_priv->private_data,
2744 			       cm_id_priv->private_data_len);
2745 		spin_unlock_irq(&cm_id_priv->lock);
2746 
2747 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2748 		    ib_post_send_mad(msg, NULL))
2749 			cm_free_msg(msg);
2750 		goto deref;
2751 	case IB_CM_DREQ_RCVD:
2752 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2753 				counter[CM_DREQ_COUNTER]);
2754 		goto unlock;
2755 	default:
2756 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2757 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2758 			 cm_id_priv->id.state);
2759 		goto unlock;
2760 	}
2761 	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2762 	cm_id_priv->tid = dreq_msg->hdr.tid;
2763 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2764 	if (!ret)
2765 		list_add_tail(&work->list, &cm_id_priv->work_list);
2766 	spin_unlock_irq(&cm_id_priv->lock);
2767 
2768 	if (ret)
2769 		cm_process_work(cm_id_priv, work);
2770 	else
2771 		cm_deref_id(cm_id_priv);
2772 	return 0;
2773 
2774 unlock:	spin_unlock_irq(&cm_id_priv->lock);
2775 deref:	cm_deref_id(cm_id_priv);
2776 	return -EINVAL;
2777 }
2778 
2779 static int cm_drep_handler(struct cm_work *work)
2780 {
2781 	struct cm_id_private *cm_id_priv;
2782 	struct cm_drep_msg *drep_msg;
2783 	int ret;
2784 
2785 	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2786 	cm_id_priv = cm_acquire_id(
2787 		cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)),
2788 		cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg)));
2789 	if (!cm_id_priv)
2790 		return -EINVAL;
2791 
2792 	work->cm_event.private_data =
2793 		IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg);
2794 
2795 	spin_lock_irq(&cm_id_priv->lock);
2796 	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2797 	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2798 		spin_unlock_irq(&cm_id_priv->lock);
2799 		goto out;
2800 	}
2801 	cm_enter_timewait(cm_id_priv);
2802 
2803 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2804 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2805 	if (!ret)
2806 		list_add_tail(&work->list, &cm_id_priv->work_list);
2807 	spin_unlock_irq(&cm_id_priv->lock);
2808 
2809 	if (ret)
2810 		cm_process_work(cm_id_priv, work);
2811 	else
2812 		cm_deref_id(cm_id_priv);
2813 	return 0;
2814 out:
2815 	cm_deref_id(cm_id_priv);
2816 	return -EINVAL;
2817 }
2818 
2819 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2820 		   enum ib_cm_rej_reason reason,
2821 		   void *ari,
2822 		   u8 ari_length,
2823 		   const void *private_data,
2824 		   u8 private_data_len)
2825 {
2826 	struct cm_id_private *cm_id_priv;
2827 	struct ib_mad_send_buf *msg;
2828 	unsigned long flags;
2829 	int ret;
2830 
2831 	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2832 	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2833 		return -EINVAL;
2834 
2835 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2836 
2837 	spin_lock_irqsave(&cm_id_priv->lock, flags);
2838 	switch (cm_id->state) {
2839 	case IB_CM_REQ_SENT:
2840 	case IB_CM_MRA_REQ_RCVD:
2841 	case IB_CM_REQ_RCVD:
2842 	case IB_CM_MRA_REQ_SENT:
2843 	case IB_CM_REP_RCVD:
2844 	case IB_CM_MRA_REP_SENT:
2845 		ret = cm_alloc_msg(cm_id_priv, &msg);
2846 		if (!ret)
2847 			cm_format_rej((struct cm_rej_msg *) msg->mad,
2848 				      cm_id_priv, reason, ari, ari_length,
2849 				      private_data, private_data_len);
2850 
2851 		cm_reset_to_idle(cm_id_priv);
2852 		break;
2853 	case IB_CM_REP_SENT:
2854 	case IB_CM_MRA_REP_RCVD:
2855 		ret = cm_alloc_msg(cm_id_priv, &msg);
2856 		if (!ret)
2857 			cm_format_rej((struct cm_rej_msg *) msg->mad,
2858 				      cm_id_priv, reason, ari, ari_length,
2859 				      private_data, private_data_len);
2860 
2861 		cm_enter_timewait(cm_id_priv);
2862 		break;
2863 	default:
2864 		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2865 			 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2866 		ret = -EINVAL;
2867 		goto out;
2868 	}
2869 
2870 	if (ret)
2871 		goto out;
2872 
2873 	ret = ib_post_send_mad(msg, NULL);
2874 	if (ret)
2875 		cm_free_msg(msg);
2876 
2877 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2878 	return ret;
2879 }
2880 EXPORT_SYMBOL(ib_send_cm_rej);
2881 
2882 static void cm_format_rej_event(struct cm_work *work)
2883 {
2884 	struct cm_rej_msg *rej_msg;
2885 	struct ib_cm_rej_event_param *param;
2886 
2887 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2888 	param = &work->cm_event.param.rej_rcvd;
2889 	param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg);
2890 	param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg);
2891 	param->reason = IBA_GET(CM_REJ_REASON, rej_msg);
2892 	work->cm_event.private_data =
2893 		IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
2894 }
2895 
2896 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2897 {
2898 	struct cm_timewait_info *timewait_info;
2899 	struct cm_id_private *cm_id_priv;
2900 	__be32 remote_id;
2901 
2902 	remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
2903 
2904 	if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
2905 		spin_lock_irq(&cm.lock);
2906 		timewait_info = cm_find_remote_id(
2907 			*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
2908 			remote_id);
2909 		if (!timewait_info) {
2910 			spin_unlock_irq(&cm.lock);
2911 			return NULL;
2912 		}
2913 		cm_id_priv =
2914 			cm_acquire_id(timewait_info->work.local_id, remote_id);
2915 		spin_unlock_irq(&cm.lock);
2916 	} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
2917 		   CM_MSG_RESPONSE_REQ)
2918 		cm_id_priv = cm_acquire_id(
2919 			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
2920 			0);
2921 	else
2922 		cm_id_priv = cm_acquire_id(
2923 			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
2924 			remote_id);
2925 
2926 	return cm_id_priv;
2927 }
2928 
2929 static int cm_rej_handler(struct cm_work *work)
2930 {
2931 	struct cm_id_private *cm_id_priv;
2932 	struct cm_rej_msg *rej_msg;
2933 	int ret;
2934 
2935 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2936 	cm_id_priv = cm_acquire_rejected_id(rej_msg);
2937 	if (!cm_id_priv)
2938 		return -EINVAL;
2939 
2940 	cm_format_rej_event(work);
2941 
2942 	spin_lock_irq(&cm_id_priv->lock);
2943 	switch (cm_id_priv->id.state) {
2944 	case IB_CM_REQ_SENT:
2945 	case IB_CM_MRA_REQ_RCVD:
2946 	case IB_CM_REP_SENT:
2947 	case IB_CM_MRA_REP_RCVD:
2948 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2949 		/* fall through */
2950 	case IB_CM_REQ_RCVD:
2951 	case IB_CM_MRA_REQ_SENT:
2952 		if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
2953 			cm_enter_timewait(cm_id_priv);
2954 		else
2955 			cm_reset_to_idle(cm_id_priv);
2956 		break;
2957 	case IB_CM_DREQ_SENT:
2958 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2959 		/* fall through */
2960 	case IB_CM_REP_RCVD:
2961 	case IB_CM_MRA_REP_SENT:
2962 		cm_enter_timewait(cm_id_priv);
2963 		break;
2964 	case IB_CM_ESTABLISHED:
2965 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2966 		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2967 			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2968 				ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2969 					      cm_id_priv->msg);
2970 			cm_enter_timewait(cm_id_priv);
2971 			break;
2972 		}
2973 		/* fall through */
2974 	default:
2975 		spin_unlock_irq(&cm_id_priv->lock);
2976 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2977 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2978 			 cm_id_priv->id.state);
2979 		ret = -EINVAL;
2980 		goto out;
2981 	}
2982 
2983 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2984 	if (!ret)
2985 		list_add_tail(&work->list, &cm_id_priv->work_list);
2986 	spin_unlock_irq(&cm_id_priv->lock);
2987 
2988 	if (ret)
2989 		cm_process_work(cm_id_priv, work);
2990 	else
2991 		cm_deref_id(cm_id_priv);
2992 	return 0;
2993 out:
2994 	cm_deref_id(cm_id_priv);
2995 	return -EINVAL;
2996 }
2997 
2998 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2999 		   u8 service_timeout,
3000 		   const void *private_data,
3001 		   u8 private_data_len)
3002 {
3003 	struct cm_id_private *cm_id_priv;
3004 	struct ib_mad_send_buf *msg;
3005 	enum ib_cm_state cm_state;
3006 	enum ib_cm_lap_state lap_state;
3007 	enum cm_msg_response msg_response;
3008 	void *data;
3009 	unsigned long flags;
3010 	int ret;
3011 
3012 	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
3013 		return -EINVAL;
3014 
3015 	data = cm_copy_private_data(private_data, private_data_len);
3016 	if (IS_ERR(data))
3017 		return PTR_ERR(data);
3018 
3019 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3020 
3021 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3022 	switch(cm_id_priv->id.state) {
3023 	case IB_CM_REQ_RCVD:
3024 		cm_state = IB_CM_MRA_REQ_SENT;
3025 		lap_state = cm_id->lap_state;
3026 		msg_response = CM_MSG_RESPONSE_REQ;
3027 		break;
3028 	case IB_CM_REP_RCVD:
3029 		cm_state = IB_CM_MRA_REP_SENT;
3030 		lap_state = cm_id->lap_state;
3031 		msg_response = CM_MSG_RESPONSE_REP;
3032 		break;
3033 	case IB_CM_ESTABLISHED:
3034 		if (cm_id->lap_state == IB_CM_LAP_RCVD) {
3035 			cm_state = cm_id->state;
3036 			lap_state = IB_CM_MRA_LAP_SENT;
3037 			msg_response = CM_MSG_RESPONSE_OTHER;
3038 			break;
3039 		}
3040 		/* fall through */
3041 	default:
3042 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
3043 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
3044 			 cm_id_priv->id.state);
3045 		ret = -EINVAL;
3046 		goto error1;
3047 	}
3048 
3049 	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
3050 		ret = cm_alloc_msg(cm_id_priv, &msg);
3051 		if (ret)
3052 			goto error1;
3053 
3054 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3055 			      msg_response, service_timeout,
3056 			      private_data, private_data_len);
3057 		ret = ib_post_send_mad(msg, NULL);
3058 		if (ret)
3059 			goto error2;
3060 	}
3061 
3062 	cm_id->state = cm_state;
3063 	cm_id->lap_state = lap_state;
3064 	cm_id_priv->service_timeout = service_timeout;
3065 	cm_set_private_data(cm_id_priv, data, private_data_len);
3066 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3067 	return 0;
3068 
3069 error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3070 	kfree(data);
3071 	return ret;
3072 
3073 error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3074 	kfree(data);
3075 	cm_free_msg(msg);
3076 	return ret;
3077 }
3078 EXPORT_SYMBOL(ib_send_cm_mra);
3079 
3080 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
3081 {
3082 	switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
3083 	case CM_MSG_RESPONSE_REQ:
3084 		return cm_acquire_id(
3085 			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
3086 			0);
3087 	case CM_MSG_RESPONSE_REP:
3088 	case CM_MSG_RESPONSE_OTHER:
3089 		return cm_acquire_id(
3090 			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
3091 			cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg)));
3092 	default:
3093 		return NULL;
3094 	}
3095 }
3096 
3097 static int cm_mra_handler(struct cm_work *work)
3098 {
3099 	struct cm_id_private *cm_id_priv;
3100 	struct cm_mra_msg *mra_msg;
3101 	int timeout, ret;
3102 
3103 	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
3104 	cm_id_priv = cm_acquire_mraed_id(mra_msg);
3105 	if (!cm_id_priv)
3106 		return -EINVAL;
3107 
3108 	work->cm_event.private_data =
3109 		IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg);
3110 	work->cm_event.param.mra_rcvd.service_timeout =
3111 		IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg);
3112 	timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) +
3113 		  cm_convert_to_ms(cm_id_priv->av.timeout);
3114 
3115 	spin_lock_irq(&cm_id_priv->lock);
3116 	switch (cm_id_priv->id.state) {
3117 	case IB_CM_REQ_SENT:
3118 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
3119 			    CM_MSG_RESPONSE_REQ ||
3120 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
3121 				  cm_id_priv->msg, timeout))
3122 			goto out;
3123 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
3124 		break;
3125 	case IB_CM_REP_SENT:
3126 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
3127 			    CM_MSG_RESPONSE_REP ||
3128 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
3129 				  cm_id_priv->msg, timeout))
3130 			goto out;
3131 		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
3132 		break;
3133 	case IB_CM_ESTABLISHED:
3134 		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
3135 			    CM_MSG_RESPONSE_OTHER ||
3136 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
3137 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
3138 				  cm_id_priv->msg, timeout)) {
3139 			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
3140 				atomic_long_inc(&work->port->
3141 						counter_group[CM_RECV_DUPLICATES].
3142 						counter[CM_MRA_COUNTER]);
3143 			goto out;
3144 		}
3145 		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
3146 		break;
3147 	case IB_CM_MRA_REQ_RCVD:
3148 	case IB_CM_MRA_REP_RCVD:
3149 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3150 				counter[CM_MRA_COUNTER]);
3151 		/* fall through */
3152 	default:
3153 		pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
3154 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
3155 			 cm_id_priv->id.state);
3156 		goto out;
3157 	}
3158 
3159 	cm_id_priv->msg->context[1] = (void *) (unsigned long)
3160 				      cm_id_priv->id.state;
3161 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3162 	if (!ret)
3163 		list_add_tail(&work->list, &cm_id_priv->work_list);
3164 	spin_unlock_irq(&cm_id_priv->lock);
3165 
3166 	if (ret)
3167 		cm_process_work(cm_id_priv, work);
3168 	else
3169 		cm_deref_id(cm_id_priv);
3170 	return 0;
3171 out:
3172 	spin_unlock_irq(&cm_id_priv->lock);
3173 	cm_deref_id(cm_id_priv);
3174 	return -EINVAL;
3175 }
3176 
3177 static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
3178 					struct sa_path_rec *path)
3179 {
3180 	u32 lid;
3181 
3182 	if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
3183 		sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID,
3184 					       lap_msg));
3185 		sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID,
3186 					       lap_msg));
3187 	} else {
3188 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
3189 			CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg));
3190 		sa_path_set_dlid(path, lid);
3191 
3192 		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
3193 			CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg));
3194 		sa_path_set_slid(path, lid);
3195 	}
3196 }
3197 
3198 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
3199 				    struct sa_path_rec *path,
3200 				    struct cm_lap_msg *lap_msg)
3201 {
3202 	path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg);
3203 	path->sgid =
3204 		*IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg);
3205 	path->flow_label =
3206 		cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg));
3207 	path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg);
3208 	path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg);
3209 	path->reversible = 1;
3210 	path->pkey = cm_id_priv->pkey;
3211 	path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg);
3212 	path->mtu_selector = IB_SA_EQ;
3213 	path->mtu = cm_id_priv->path_mtu;
3214 	path->rate_selector = IB_SA_EQ;
3215 	path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg);
3216 	path->packet_life_time_selector = IB_SA_EQ;
3217 	path->packet_life_time =
3218 		IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg);
3219 	path->packet_life_time -= (path->packet_life_time > 0);
3220 	cm_format_path_lid_from_lap(lap_msg, path);
3221 }
3222 
3223 static int cm_lap_handler(struct cm_work *work)
3224 {
3225 	struct cm_id_private *cm_id_priv;
3226 	struct cm_lap_msg *lap_msg;
3227 	struct ib_cm_lap_event_param *param;
3228 	struct ib_mad_send_buf *msg = NULL;
3229 	int ret;
3230 
3231 	/* Currently Alternate path messages are not supported for
3232 	 * RoCE link layer.
3233 	 */
3234 	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3235 			       work->port->port_num))
3236 		return -EINVAL;
3237 
3238 	/* todo: verify LAP request and send reject APR if invalid. */
3239 	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
3240 	cm_id_priv = cm_acquire_id(
3241 		cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)),
3242 		cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg)));
3243 	if (!cm_id_priv)
3244 		return -EINVAL;
3245 
3246 	param = &work->cm_event.param.lap_rcvd;
3247 	memset(&work->path[0], 0, sizeof(work->path[1]));
3248 	cm_path_set_rec_type(work->port->cm_dev->ib_device,
3249 			     work->port->port_num, &work->path[0],
3250 			     IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID,
3251 					     lap_msg));
3252 	param->alternate_path = &work->path[0];
3253 	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
3254 	work->cm_event.private_data =
3255 		IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
3256 
3257 	spin_lock_irq(&cm_id_priv->lock);
3258 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
3259 		goto unlock;
3260 
3261 	switch (cm_id_priv->id.lap_state) {
3262 	case IB_CM_LAP_UNINIT:
3263 	case IB_CM_LAP_IDLE:
3264 		break;
3265 	case IB_CM_MRA_LAP_SENT:
3266 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3267 				counter[CM_LAP_COUNTER]);
3268 		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
3269 		if (IS_ERR(msg))
3270 			goto unlock;
3271 
3272 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3273 			      CM_MSG_RESPONSE_OTHER,
3274 			      cm_id_priv->service_timeout,
3275 			      cm_id_priv->private_data,
3276 			      cm_id_priv->private_data_len);
3277 		spin_unlock_irq(&cm_id_priv->lock);
3278 
3279 		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
3280 		    ib_post_send_mad(msg, NULL))
3281 			cm_free_msg(msg);
3282 		goto deref;
3283 	case IB_CM_LAP_RCVD:
3284 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3285 				counter[CM_LAP_COUNTER]);
3286 		goto unlock;
3287 	default:
3288 		goto unlock;
3289 	}
3290 
3291 	ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
3292 				 work->mad_recv_wc->recv_buf.grh,
3293 				 &cm_id_priv->av);
3294 	if (ret)
3295 		goto unlock;
3296 
3297 	ret = cm_init_av_by_path(param->alternate_path, NULL,
3298 				 &cm_id_priv->alt_av, cm_id_priv);
3299 	if (ret)
3300 		goto unlock;
3301 
3302 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3303 	cm_id_priv->tid = lap_msg->hdr.tid;
3304 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3305 	if (!ret)
3306 		list_add_tail(&work->list, &cm_id_priv->work_list);
3307 	spin_unlock_irq(&cm_id_priv->lock);
3308 
3309 	if (ret)
3310 		cm_process_work(cm_id_priv, work);
3311 	else
3312 		cm_deref_id(cm_id_priv);
3313 	return 0;
3314 
3315 unlock:	spin_unlock_irq(&cm_id_priv->lock);
3316 deref:	cm_deref_id(cm_id_priv);
3317 	return -EINVAL;
3318 }
3319 
3320 static int cm_apr_handler(struct cm_work *work)
3321 {
3322 	struct cm_id_private *cm_id_priv;
3323 	struct cm_apr_msg *apr_msg;
3324 	int ret;
3325 
3326 	/* Currently Alternate path messages are not supported for
3327 	 * RoCE link layer.
3328 	 */
3329 	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3330 			       work->port->port_num))
3331 		return -EINVAL;
3332 
3333 	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3334 	cm_id_priv = cm_acquire_id(
3335 		cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)),
3336 		cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg)));
3337 	if (!cm_id_priv)
3338 		return -EINVAL; /* Unmatched reply. */
3339 
3340 	work->cm_event.param.apr_rcvd.ap_status =
3341 		IBA_GET(CM_APR_AR_STATUS, apr_msg);
3342 	work->cm_event.param.apr_rcvd.apr_info =
3343 		IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg);
3344 	work->cm_event.param.apr_rcvd.info_len =
3345 		IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg);
3346 	work->cm_event.private_data =
3347 		IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg);
3348 
3349 	spin_lock_irq(&cm_id_priv->lock);
3350 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3351 	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3352 	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3353 		spin_unlock_irq(&cm_id_priv->lock);
3354 		goto out;
3355 	}
3356 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3357 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3358 	cm_id_priv->msg = NULL;
3359 
3360 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3361 	if (!ret)
3362 		list_add_tail(&work->list, &cm_id_priv->work_list);
3363 	spin_unlock_irq(&cm_id_priv->lock);
3364 
3365 	if (ret)
3366 		cm_process_work(cm_id_priv, work);
3367 	else
3368 		cm_deref_id(cm_id_priv);
3369 	return 0;
3370 out:
3371 	cm_deref_id(cm_id_priv);
3372 	return -EINVAL;
3373 }
3374 
3375 static int cm_timewait_handler(struct cm_work *work)
3376 {
3377 	struct cm_timewait_info *timewait_info;
3378 	struct cm_id_private *cm_id_priv;
3379 	int ret;
3380 
3381 	timewait_info = container_of(work, struct cm_timewait_info, work);
3382 	spin_lock_irq(&cm.lock);
3383 	list_del(&timewait_info->list);
3384 	spin_unlock_irq(&cm.lock);
3385 
3386 	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3387 				   timewait_info->work.remote_id);
3388 	if (!cm_id_priv)
3389 		return -EINVAL;
3390 
3391 	spin_lock_irq(&cm_id_priv->lock);
3392 	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3393 	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3394 		spin_unlock_irq(&cm_id_priv->lock);
3395 		goto out;
3396 	}
3397 	cm_id_priv->id.state = IB_CM_IDLE;
3398 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
3399 	if (!ret)
3400 		list_add_tail(&work->list, &cm_id_priv->work_list);
3401 	spin_unlock_irq(&cm_id_priv->lock);
3402 
3403 	if (ret)
3404 		cm_process_work(cm_id_priv, work);
3405 	else
3406 		cm_deref_id(cm_id_priv);
3407 	return 0;
3408 out:
3409 	cm_deref_id(cm_id_priv);
3410 	return -EINVAL;
3411 }
3412 
3413 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3414 			       struct cm_id_private *cm_id_priv,
3415 			       struct ib_cm_sidr_req_param *param)
3416 {
3417 	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3418 			  cm_form_tid(cm_id_priv));
3419 	IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg,
3420 		be32_to_cpu(cm_id_priv->id.local_id));
3421 	IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg,
3422 		be16_to_cpu(param->path->pkey));
3423 	IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg,
3424 		be64_to_cpu(param->service_id));
3425 
3426 	if (param->private_data && param->private_data_len)
3427 		IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg,
3428 			    param->private_data, param->private_data_len);
3429 }
3430 
3431 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3432 			struct ib_cm_sidr_req_param *param)
3433 {
3434 	struct cm_id_private *cm_id_priv;
3435 	struct ib_mad_send_buf *msg;
3436 	unsigned long flags;
3437 	int ret;
3438 
3439 	if (!param->path || (param->private_data &&
3440 	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3441 		return -EINVAL;
3442 
3443 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3444 	ret = cm_init_av_by_path(param->path, param->sgid_attr,
3445 				 &cm_id_priv->av,
3446 				 cm_id_priv);
3447 	if (ret)
3448 		goto out;
3449 
3450 	cm_id->service_id = param->service_id;
3451 	cm_id->service_mask = ~cpu_to_be64(0);
3452 	cm_id_priv->timeout_ms = param->timeout_ms;
3453 	cm_id_priv->max_cm_retries = param->max_cm_retries;
3454 	ret = cm_alloc_msg(cm_id_priv, &msg);
3455 	if (ret)
3456 		goto out;
3457 
3458 	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3459 			   param);
3460 	msg->timeout_ms = cm_id_priv->timeout_ms;
3461 	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3462 
3463 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3464 	if (cm_id->state == IB_CM_IDLE)
3465 		ret = ib_post_send_mad(msg, NULL);
3466 	else
3467 		ret = -EINVAL;
3468 
3469 	if (ret) {
3470 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3471 		cm_free_msg(msg);
3472 		goto out;
3473 	}
3474 	cm_id->state = IB_CM_SIDR_REQ_SENT;
3475 	cm_id_priv->msg = msg;
3476 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3477 out:
3478 	return ret;
3479 }
3480 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3481 
3482 static void cm_format_sidr_req_event(struct cm_work *work,
3483 				     const struct cm_id_private *rx_cm_id,
3484 				     struct ib_cm_id *listen_id)
3485 {
3486 	struct cm_sidr_req_msg *sidr_req_msg;
3487 	struct ib_cm_sidr_req_event_param *param;
3488 
3489 	sidr_req_msg = (struct cm_sidr_req_msg *)
3490 				work->mad_recv_wc->recv_buf.mad;
3491 	param = &work->cm_event.param.sidr_req_rcvd;
3492 	param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg);
3493 	param->listen_id = listen_id;
3494 	param->service_id =
3495 		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
3496 	param->bth_pkey = cm_get_bth_pkey(work);
3497 	param->port = work->port->port_num;
3498 	param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
3499 	work->cm_event.private_data =
3500 		IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg);
3501 }
3502 
3503 static int cm_sidr_req_handler(struct cm_work *work)
3504 {
3505 	struct ib_cm_id *cm_id;
3506 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3507 	struct cm_sidr_req_msg *sidr_req_msg;
3508 	struct ib_wc *wc;
3509 	int ret;
3510 
3511 	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3512 	if (IS_ERR(cm_id))
3513 		return PTR_ERR(cm_id);
3514 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3515 
3516 	/* Record SGID/SLID and request ID for lookup. */
3517 	sidr_req_msg = (struct cm_sidr_req_msg *)
3518 				work->mad_recv_wc->recv_buf.mad;
3519 	wc = work->mad_recv_wc->wc;
3520 	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3521 	cm_id_priv->av.dgid.global.interface_id = 0;
3522 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3523 				      work->mad_recv_wc->recv_buf.grh,
3524 				      &cm_id_priv->av);
3525 	if (ret)
3526 		goto out;
3527 
3528 	cm_id_priv->id.remote_id =
3529 		cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
3530 	cm_id_priv->tid = sidr_req_msg->hdr.tid;
3531 	atomic_inc(&cm_id_priv->work_count);
3532 
3533 	spin_lock_irq(&cm.lock);
3534 	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3535 	if (cur_cm_id_priv) {
3536 		spin_unlock_irq(&cm.lock);
3537 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3538 				counter[CM_SIDR_REQ_COUNTER]);
3539 		goto out; /* Duplicate message. */
3540 	}
3541 	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3542 	cur_cm_id_priv = cm_find_listen(
3543 		cm_id->device,
3544 		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
3545 	if (!cur_cm_id_priv) {
3546 		spin_unlock_irq(&cm.lock);
3547 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3548 		goto out; /* No match. */
3549 	}
3550 	refcount_inc(&cur_cm_id_priv->refcount);
3551 	refcount_inc(&cm_id_priv->refcount);
3552 	spin_unlock_irq(&cm.lock);
3553 
3554 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3555 	cm_id_priv->id.context = cur_cm_id_priv->id.context;
3556 	cm_id_priv->id.service_id =
3557 		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
3558 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3559 
3560 	cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3561 	cm_process_work(cm_id_priv, work);
3562 	cm_deref_id(cur_cm_id_priv);
3563 	return 0;
3564 out:
3565 	ib_destroy_cm_id(&cm_id_priv->id);
3566 	return -EINVAL;
3567 }
3568 
3569 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3570 			       struct cm_id_private *cm_id_priv,
3571 			       struct ib_cm_sidr_rep_param *param)
3572 {
3573 	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3574 			  cm_id_priv->tid);
3575 	IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
3576 		be32_to_cpu(cm_id_priv->id.remote_id));
3577 	IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
3578 	IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num);
3579 	IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
3580 		be64_to_cpu(cm_id_priv->id.service_id));
3581 	IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
3582 
3583 	if (param->info && param->info_length)
3584 		IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
3585 			    param->info, param->info_length);
3586 
3587 	if (param->private_data && param->private_data_len)
3588 		IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg,
3589 			    param->private_data, param->private_data_len);
3590 }
3591 
3592 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3593 			struct ib_cm_sidr_rep_param *param)
3594 {
3595 	struct cm_id_private *cm_id_priv;
3596 	struct ib_mad_send_buf *msg;
3597 	unsigned long flags;
3598 	int ret;
3599 
3600 	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3601 	    (param->private_data &&
3602 	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3603 		return -EINVAL;
3604 
3605 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3606 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3607 	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3608 		ret = -EINVAL;
3609 		goto error;
3610 	}
3611 
3612 	ret = cm_alloc_msg(cm_id_priv, &msg);
3613 	if (ret)
3614 		goto error;
3615 
3616 	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3617 			   param);
3618 	ret = ib_post_send_mad(msg, NULL);
3619 	if (ret) {
3620 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3621 		cm_free_msg(msg);
3622 		return ret;
3623 	}
3624 	cm_id->state = IB_CM_IDLE;
3625 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3626 
3627 	spin_lock_irqsave(&cm.lock, flags);
3628 	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3629 		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3630 		RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3631 	}
3632 	spin_unlock_irqrestore(&cm.lock, flags);
3633 	return 0;
3634 
3635 error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3636 	return ret;
3637 }
3638 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3639 
3640 static void cm_format_sidr_rep_event(struct cm_work *work,
3641 				     const struct cm_id_private *cm_id_priv)
3642 {
3643 	struct cm_sidr_rep_msg *sidr_rep_msg;
3644 	struct ib_cm_sidr_rep_event_param *param;
3645 
3646 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3647 				work->mad_recv_wc->recv_buf.mad;
3648 	param = &work->cm_event.param.sidr_rep_rcvd;
3649 	param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg);
3650 	param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg);
3651 	param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg);
3652 	param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION,
3653 				      sidr_rep_msg);
3654 	param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH,
3655 				  sidr_rep_msg);
3656 	param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
3657 	work->cm_event.private_data =
3658 		IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg);
3659 }
3660 
3661 static int cm_sidr_rep_handler(struct cm_work *work)
3662 {
3663 	struct cm_sidr_rep_msg *sidr_rep_msg;
3664 	struct cm_id_private *cm_id_priv;
3665 
3666 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3667 				work->mad_recv_wc->recv_buf.mad;
3668 	cm_id_priv = cm_acquire_id(
3669 		cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
3670 	if (!cm_id_priv)
3671 		return -EINVAL; /* Unmatched reply. */
3672 
3673 	spin_lock_irq(&cm_id_priv->lock);
3674 	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3675 		spin_unlock_irq(&cm_id_priv->lock);
3676 		goto out;
3677 	}
3678 	cm_id_priv->id.state = IB_CM_IDLE;
3679 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3680 	spin_unlock_irq(&cm_id_priv->lock);
3681 
3682 	cm_format_sidr_rep_event(work, cm_id_priv);
3683 	cm_process_work(cm_id_priv, work);
3684 	return 0;
3685 out:
3686 	cm_deref_id(cm_id_priv);
3687 	return -EINVAL;
3688 }
3689 
3690 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3691 				  enum ib_wc_status wc_status)
3692 {
3693 	struct cm_id_private *cm_id_priv;
3694 	struct ib_cm_event cm_event;
3695 	enum ib_cm_state state;
3696 	int ret;
3697 
3698 	memset(&cm_event, 0, sizeof cm_event);
3699 	cm_id_priv = msg->context[0];
3700 
3701 	/* Discard old sends or ones without a response. */
3702 	spin_lock_irq(&cm_id_priv->lock);
3703 	state = (enum ib_cm_state) (unsigned long) msg->context[1];
3704 	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3705 		goto discard;
3706 
3707 	pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
3708 			     state, ib_wc_status_msg(wc_status));
3709 	switch (state) {
3710 	case IB_CM_REQ_SENT:
3711 	case IB_CM_MRA_REQ_RCVD:
3712 		cm_reset_to_idle(cm_id_priv);
3713 		cm_event.event = IB_CM_REQ_ERROR;
3714 		break;
3715 	case IB_CM_REP_SENT:
3716 	case IB_CM_MRA_REP_RCVD:
3717 		cm_reset_to_idle(cm_id_priv);
3718 		cm_event.event = IB_CM_REP_ERROR;
3719 		break;
3720 	case IB_CM_DREQ_SENT:
3721 		cm_enter_timewait(cm_id_priv);
3722 		cm_event.event = IB_CM_DREQ_ERROR;
3723 		break;
3724 	case IB_CM_SIDR_REQ_SENT:
3725 		cm_id_priv->id.state = IB_CM_IDLE;
3726 		cm_event.event = IB_CM_SIDR_REQ_ERROR;
3727 		break;
3728 	default:
3729 		goto discard;
3730 	}
3731 	spin_unlock_irq(&cm_id_priv->lock);
3732 	cm_event.param.send_status = wc_status;
3733 
3734 	/* No other events can occur on the cm_id at this point. */
3735 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3736 	cm_free_msg(msg);
3737 	if (ret)
3738 		ib_destroy_cm_id(&cm_id_priv->id);
3739 	return;
3740 discard:
3741 	spin_unlock_irq(&cm_id_priv->lock);
3742 	cm_free_msg(msg);
3743 }
3744 
3745 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3746 			    struct ib_mad_send_wc *mad_send_wc)
3747 {
3748 	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3749 	struct cm_port *port;
3750 	u16 attr_index;
3751 
3752 	port = mad_agent->context;
3753 	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3754 				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3755 
3756 	/*
3757 	 * If the send was in response to a received message (context[0] is not
3758 	 * set to a cm_id), and is not a REJ, then it is a send that was
3759 	 * manually retried.
3760 	 */
3761 	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3762 		msg->retries = 1;
3763 
3764 	atomic_long_add(1 + msg->retries,
3765 			&port->counter_group[CM_XMIT].counter[attr_index]);
3766 	if (msg->retries)
3767 		atomic_long_add(msg->retries,
3768 				&port->counter_group[CM_XMIT_RETRIES].
3769 				counter[attr_index]);
3770 
3771 	switch (mad_send_wc->status) {
3772 	case IB_WC_SUCCESS:
3773 	case IB_WC_WR_FLUSH_ERR:
3774 		cm_free_msg(msg);
3775 		break;
3776 	default:
3777 		if (msg->context[0] && msg->context[1])
3778 			cm_process_send_error(msg, mad_send_wc->status);
3779 		else
3780 			cm_free_msg(msg);
3781 		break;
3782 	}
3783 }
3784 
3785 static void cm_work_handler(struct work_struct *_work)
3786 {
3787 	struct cm_work *work = container_of(_work, struct cm_work, work.work);
3788 	int ret;
3789 
3790 	switch (work->cm_event.event) {
3791 	case IB_CM_REQ_RECEIVED:
3792 		ret = cm_req_handler(work);
3793 		break;
3794 	case IB_CM_MRA_RECEIVED:
3795 		ret = cm_mra_handler(work);
3796 		break;
3797 	case IB_CM_REJ_RECEIVED:
3798 		ret = cm_rej_handler(work);
3799 		break;
3800 	case IB_CM_REP_RECEIVED:
3801 		ret = cm_rep_handler(work);
3802 		break;
3803 	case IB_CM_RTU_RECEIVED:
3804 		ret = cm_rtu_handler(work);
3805 		break;
3806 	case IB_CM_USER_ESTABLISHED:
3807 		ret = cm_establish_handler(work);
3808 		break;
3809 	case IB_CM_DREQ_RECEIVED:
3810 		ret = cm_dreq_handler(work);
3811 		break;
3812 	case IB_CM_DREP_RECEIVED:
3813 		ret = cm_drep_handler(work);
3814 		break;
3815 	case IB_CM_SIDR_REQ_RECEIVED:
3816 		ret = cm_sidr_req_handler(work);
3817 		break;
3818 	case IB_CM_SIDR_REP_RECEIVED:
3819 		ret = cm_sidr_rep_handler(work);
3820 		break;
3821 	case IB_CM_LAP_RECEIVED:
3822 		ret = cm_lap_handler(work);
3823 		break;
3824 	case IB_CM_APR_RECEIVED:
3825 		ret = cm_apr_handler(work);
3826 		break;
3827 	case IB_CM_TIMEWAIT_EXIT:
3828 		ret = cm_timewait_handler(work);
3829 		break;
3830 	default:
3831 		pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
3832 		ret = -EINVAL;
3833 		break;
3834 	}
3835 	if (ret)
3836 		cm_free_work(work);
3837 }
3838 
3839 static int cm_establish(struct ib_cm_id *cm_id)
3840 {
3841 	struct cm_id_private *cm_id_priv;
3842 	struct cm_work *work;
3843 	unsigned long flags;
3844 	int ret = 0;
3845 	struct cm_device *cm_dev;
3846 
3847 	cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3848 	if (!cm_dev)
3849 		return -ENODEV;
3850 
3851 	work = kmalloc(sizeof *work, GFP_ATOMIC);
3852 	if (!work)
3853 		return -ENOMEM;
3854 
3855 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3856 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3857 	switch (cm_id->state)
3858 	{
3859 	case IB_CM_REP_SENT:
3860 	case IB_CM_MRA_REP_RCVD:
3861 		cm_id->state = IB_CM_ESTABLISHED;
3862 		break;
3863 	case IB_CM_ESTABLISHED:
3864 		ret = -EISCONN;
3865 		break;
3866 	default:
3867 		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
3868 			 be32_to_cpu(cm_id->local_id), cm_id->state);
3869 		ret = -EINVAL;
3870 		break;
3871 	}
3872 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3873 
3874 	if (ret) {
3875 		kfree(work);
3876 		goto out;
3877 	}
3878 
3879 	/*
3880 	 * The CM worker thread may try to destroy the cm_id before it
3881 	 * can execute this work item.  To prevent potential deadlock,
3882 	 * we need to find the cm_id once we're in the context of the
3883 	 * worker thread, rather than holding a reference on it.
3884 	 */
3885 	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3886 	work->local_id = cm_id->local_id;
3887 	work->remote_id = cm_id->remote_id;
3888 	work->mad_recv_wc = NULL;
3889 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
3890 
3891 	/* Check if the device started its remove_one */
3892 	spin_lock_irqsave(&cm.lock, flags);
3893 	if (!cm_dev->going_down) {
3894 		queue_delayed_work(cm.wq, &work->work, 0);
3895 	} else {
3896 		kfree(work);
3897 		ret = -ENODEV;
3898 	}
3899 	spin_unlock_irqrestore(&cm.lock, flags);
3900 
3901 out:
3902 	return ret;
3903 }
3904 
3905 static int cm_migrate(struct ib_cm_id *cm_id)
3906 {
3907 	struct cm_id_private *cm_id_priv;
3908 	struct cm_av tmp_av;
3909 	unsigned long flags;
3910 	int tmp_send_port_not_ready;
3911 	int ret = 0;
3912 
3913 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3914 	spin_lock_irqsave(&cm_id_priv->lock, flags);
3915 	if (cm_id->state == IB_CM_ESTABLISHED &&
3916 	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3917 	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
3918 		cm_id->lap_state = IB_CM_LAP_IDLE;
3919 		/* Swap address vector */
3920 		tmp_av = cm_id_priv->av;
3921 		cm_id_priv->av = cm_id_priv->alt_av;
3922 		cm_id_priv->alt_av = tmp_av;
3923 		/* Swap port send ready state */
3924 		tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3925 		cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3926 		cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3927 	} else
3928 		ret = -EINVAL;
3929 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3930 
3931 	return ret;
3932 }
3933 
3934 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3935 {
3936 	int ret;
3937 
3938 	switch (event) {
3939 	case IB_EVENT_COMM_EST:
3940 		ret = cm_establish(cm_id);
3941 		break;
3942 	case IB_EVENT_PATH_MIG:
3943 		ret = cm_migrate(cm_id);
3944 		break;
3945 	default:
3946 		ret = -EINVAL;
3947 	}
3948 	return ret;
3949 }
3950 EXPORT_SYMBOL(ib_cm_notify);
3951 
3952 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3953 			    struct ib_mad_send_buf *send_buf,
3954 			    struct ib_mad_recv_wc *mad_recv_wc)
3955 {
3956 	struct cm_port *port = mad_agent->context;
3957 	struct cm_work *work;
3958 	enum ib_cm_event_type event;
3959 	bool alt_path = false;
3960 	u16 attr_id;
3961 	int paths = 0;
3962 	int going_down = 0;
3963 
3964 	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3965 	case CM_REQ_ATTR_ID:
3966 		alt_path = cm_req_has_alt_path((struct cm_req_msg *)
3967 						mad_recv_wc->recv_buf.mad);
3968 		paths = 1 + (alt_path != 0);
3969 		event = IB_CM_REQ_RECEIVED;
3970 		break;
3971 	case CM_MRA_ATTR_ID:
3972 		event = IB_CM_MRA_RECEIVED;
3973 		break;
3974 	case CM_REJ_ATTR_ID:
3975 		event = IB_CM_REJ_RECEIVED;
3976 		break;
3977 	case CM_REP_ATTR_ID:
3978 		event = IB_CM_REP_RECEIVED;
3979 		break;
3980 	case CM_RTU_ATTR_ID:
3981 		event = IB_CM_RTU_RECEIVED;
3982 		break;
3983 	case CM_DREQ_ATTR_ID:
3984 		event = IB_CM_DREQ_RECEIVED;
3985 		break;
3986 	case CM_DREP_ATTR_ID:
3987 		event = IB_CM_DREP_RECEIVED;
3988 		break;
3989 	case CM_SIDR_REQ_ATTR_ID:
3990 		event = IB_CM_SIDR_REQ_RECEIVED;
3991 		break;
3992 	case CM_SIDR_REP_ATTR_ID:
3993 		event = IB_CM_SIDR_REP_RECEIVED;
3994 		break;
3995 	case CM_LAP_ATTR_ID:
3996 		paths = 1;
3997 		event = IB_CM_LAP_RECEIVED;
3998 		break;
3999 	case CM_APR_ATTR_ID:
4000 		event = IB_CM_APR_RECEIVED;
4001 		break;
4002 	default:
4003 		ib_free_recv_mad(mad_recv_wc);
4004 		return;
4005 	}
4006 
4007 	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
4008 	atomic_long_inc(&port->counter_group[CM_RECV].
4009 			counter[attr_id - CM_ATTR_ID_OFFSET]);
4010 
4011 	work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
4012 	if (!work) {
4013 		ib_free_recv_mad(mad_recv_wc);
4014 		return;
4015 	}
4016 
4017 	INIT_DELAYED_WORK(&work->work, cm_work_handler);
4018 	work->cm_event.event = event;
4019 	work->mad_recv_wc = mad_recv_wc;
4020 	work->port = port;
4021 
4022 	/* Check if the device started its remove_one */
4023 	spin_lock_irq(&cm.lock);
4024 	if (!port->cm_dev->going_down)
4025 		queue_delayed_work(cm.wq, &work->work, 0);
4026 	else
4027 		going_down = 1;
4028 	spin_unlock_irq(&cm.lock);
4029 
4030 	if (going_down) {
4031 		kfree(work);
4032 		ib_free_recv_mad(mad_recv_wc);
4033 	}
4034 }
4035 
4036 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
4037 				struct ib_qp_attr *qp_attr,
4038 				int *qp_attr_mask)
4039 {
4040 	unsigned long flags;
4041 	int ret;
4042 
4043 	spin_lock_irqsave(&cm_id_priv->lock, flags);
4044 	switch (cm_id_priv->id.state) {
4045 	case IB_CM_REQ_SENT:
4046 	case IB_CM_MRA_REQ_RCVD:
4047 	case IB_CM_REQ_RCVD:
4048 	case IB_CM_MRA_REQ_SENT:
4049 	case IB_CM_REP_RCVD:
4050 	case IB_CM_MRA_REP_SENT:
4051 	case IB_CM_REP_SENT:
4052 	case IB_CM_MRA_REP_RCVD:
4053 	case IB_CM_ESTABLISHED:
4054 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
4055 				IB_QP_PKEY_INDEX | IB_QP_PORT;
4056 		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
4057 		if (cm_id_priv->responder_resources)
4058 			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
4059 						    IB_ACCESS_REMOTE_ATOMIC;
4060 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
4061 		qp_attr->port_num = cm_id_priv->av.port->port_num;
4062 		ret = 0;
4063 		break;
4064 	default:
4065 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4066 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4067 			 cm_id_priv->id.state);
4068 		ret = -EINVAL;
4069 		break;
4070 	}
4071 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4072 	return ret;
4073 }
4074 
4075 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
4076 			       struct ib_qp_attr *qp_attr,
4077 			       int *qp_attr_mask)
4078 {
4079 	unsigned long flags;
4080 	int ret;
4081 
4082 	spin_lock_irqsave(&cm_id_priv->lock, flags);
4083 	switch (cm_id_priv->id.state) {
4084 	case IB_CM_REQ_RCVD:
4085 	case IB_CM_MRA_REQ_SENT:
4086 	case IB_CM_REP_RCVD:
4087 	case IB_CM_MRA_REP_SENT:
4088 	case IB_CM_REP_SENT:
4089 	case IB_CM_MRA_REP_RCVD:
4090 	case IB_CM_ESTABLISHED:
4091 		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
4092 				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
4093 		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
4094 		qp_attr->path_mtu = cm_id_priv->path_mtu;
4095 		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
4096 		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
4097 		if (cm_id_priv->qp_type == IB_QPT_RC ||
4098 		    cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
4099 			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
4100 					 IB_QP_MIN_RNR_TIMER;
4101 			qp_attr->max_dest_rd_atomic =
4102 					cm_id_priv->responder_resources;
4103 			qp_attr->min_rnr_timer = 0;
4104 		}
4105 		if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4106 			*qp_attr_mask |= IB_QP_ALT_PATH;
4107 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4108 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4109 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4110 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4111 		}
4112 		ret = 0;
4113 		break;
4114 	default:
4115 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4116 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4117 			 cm_id_priv->id.state);
4118 		ret = -EINVAL;
4119 		break;
4120 	}
4121 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4122 	return ret;
4123 }
4124 
4125 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
4126 			       struct ib_qp_attr *qp_attr,
4127 			       int *qp_attr_mask)
4128 {
4129 	unsigned long flags;
4130 	int ret;
4131 
4132 	spin_lock_irqsave(&cm_id_priv->lock, flags);
4133 	switch (cm_id_priv->id.state) {
4134 	/* Allow transition to RTS before sending REP */
4135 	case IB_CM_REQ_RCVD:
4136 	case IB_CM_MRA_REQ_SENT:
4137 
4138 	case IB_CM_REP_RCVD:
4139 	case IB_CM_MRA_REP_SENT:
4140 	case IB_CM_REP_SENT:
4141 	case IB_CM_MRA_REP_RCVD:
4142 	case IB_CM_ESTABLISHED:
4143 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
4144 			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
4145 			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
4146 			switch (cm_id_priv->qp_type) {
4147 			case IB_QPT_RC:
4148 			case IB_QPT_XRC_INI:
4149 				*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
4150 						 IB_QP_MAX_QP_RD_ATOMIC;
4151 				qp_attr->retry_cnt = cm_id_priv->retry_count;
4152 				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
4153 				qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
4154 				/* fall through */
4155 			case IB_QPT_XRC_TGT:
4156 				*qp_attr_mask |= IB_QP_TIMEOUT;
4157 				qp_attr->timeout = cm_id_priv->av.timeout;
4158 				break;
4159 			default:
4160 				break;
4161 			}
4162 			if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4163 				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
4164 				qp_attr->path_mig_state = IB_MIG_REARM;
4165 			}
4166 		} else {
4167 			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
4168 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4169 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4170 			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4171 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4172 			qp_attr->path_mig_state = IB_MIG_REARM;
4173 		}
4174 		ret = 0;
4175 		break;
4176 	default:
4177 		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4178 			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4179 			 cm_id_priv->id.state);
4180 		ret = -EINVAL;
4181 		break;
4182 	}
4183 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4184 	return ret;
4185 }
4186 
4187 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
4188 		       struct ib_qp_attr *qp_attr,
4189 		       int *qp_attr_mask)
4190 {
4191 	struct cm_id_private *cm_id_priv;
4192 	int ret;
4193 
4194 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
4195 	switch (qp_attr->qp_state) {
4196 	case IB_QPS_INIT:
4197 		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
4198 		break;
4199 	case IB_QPS_RTR:
4200 		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
4201 		break;
4202 	case IB_QPS_RTS:
4203 		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
4204 		break;
4205 	default:
4206 		ret = -EINVAL;
4207 		break;
4208 	}
4209 	return ret;
4210 }
4211 EXPORT_SYMBOL(ib_cm_init_qp_attr);
4212 
4213 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
4214 			       char *buf)
4215 {
4216 	struct cm_counter_group *group;
4217 	struct cm_counter_attribute *cm_attr;
4218 
4219 	group = container_of(obj, struct cm_counter_group, obj);
4220 	cm_attr = container_of(attr, struct cm_counter_attribute, attr);
4221 
4222 	return sprintf(buf, "%ld\n",
4223 		       atomic_long_read(&group->counter[cm_attr->index]));
4224 }
4225 
4226 static const struct sysfs_ops cm_counter_ops = {
4227 	.show = cm_show_counter
4228 };
4229 
4230 static struct kobj_type cm_counter_obj_type = {
4231 	.sysfs_ops = &cm_counter_ops,
4232 	.default_attrs = cm_counter_default_attrs
4233 };
4234 
4235 static char *cm_devnode(struct device *dev, umode_t *mode)
4236 {
4237 	if (mode)
4238 		*mode = 0666;
4239 	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
4240 }
4241 
4242 struct class cm_class = {
4243 	.owner   = THIS_MODULE,
4244 	.name    = "infiniband_cm",
4245 	.devnode = cm_devnode,
4246 };
4247 EXPORT_SYMBOL(cm_class);
4248 
4249 static int cm_create_port_fs(struct cm_port *port)
4250 {
4251 	int i, ret;
4252 
4253 	for (i = 0; i < CM_COUNTER_GROUPS; i++) {
4254 		ret = ib_port_register_module_stat(port->cm_dev->ib_device,
4255 						   port->port_num,
4256 						   &port->counter_group[i].obj,
4257 						   &cm_counter_obj_type,
4258 						   counter_group_names[i]);
4259 		if (ret)
4260 			goto error;
4261 	}
4262 
4263 	return 0;
4264 
4265 error:
4266 	while (i--)
4267 		ib_port_unregister_module_stat(&port->counter_group[i].obj);
4268 	return ret;
4269 
4270 }
4271 
4272 static void cm_remove_port_fs(struct cm_port *port)
4273 {
4274 	int i;
4275 
4276 	for (i = 0; i < CM_COUNTER_GROUPS; i++)
4277 		ib_port_unregister_module_stat(&port->counter_group[i].obj);
4278 
4279 }
4280 
4281 static void cm_add_one(struct ib_device *ib_device)
4282 {
4283 	struct cm_device *cm_dev;
4284 	struct cm_port *port;
4285 	struct ib_mad_reg_req reg_req = {
4286 		.mgmt_class = IB_MGMT_CLASS_CM,
4287 		.mgmt_class_version = IB_CM_CLASS_VERSION,
4288 	};
4289 	struct ib_port_modify port_modify = {
4290 		.set_port_cap_mask = IB_PORT_CM_SUP
4291 	};
4292 	unsigned long flags;
4293 	int ret;
4294 	int count = 0;
4295 	u8 i;
4296 
4297 	cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
4298 			 GFP_KERNEL);
4299 	if (!cm_dev)
4300 		return;
4301 
4302 	cm_dev->ib_device = ib_device;
4303 	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
4304 	cm_dev->going_down = 0;
4305 
4306 	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
4307 	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4308 		if (!rdma_cap_ib_cm(ib_device, i))
4309 			continue;
4310 
4311 		port = kzalloc(sizeof *port, GFP_KERNEL);
4312 		if (!port)
4313 			goto error1;
4314 
4315 		cm_dev->port[i-1] = port;
4316 		port->cm_dev = cm_dev;
4317 		port->port_num = i;
4318 
4319 		INIT_LIST_HEAD(&port->cm_priv_prim_list);
4320 		INIT_LIST_HEAD(&port->cm_priv_altr_list);
4321 
4322 		ret = cm_create_port_fs(port);
4323 		if (ret)
4324 			goto error1;
4325 
4326 		port->mad_agent = ib_register_mad_agent(ib_device, i,
4327 							IB_QPT_GSI,
4328 							&reg_req,
4329 							0,
4330 							cm_send_handler,
4331 							cm_recv_handler,
4332 							port,
4333 							0);
4334 		if (IS_ERR(port->mad_agent))
4335 			goto error2;
4336 
4337 		ret = ib_modify_port(ib_device, i, 0, &port_modify);
4338 		if (ret)
4339 			goto error3;
4340 
4341 		count++;
4342 	}
4343 
4344 	if (!count)
4345 		goto free;
4346 
4347 	ib_set_client_data(ib_device, &cm_client, cm_dev);
4348 
4349 	write_lock_irqsave(&cm.device_lock, flags);
4350 	list_add_tail(&cm_dev->list, &cm.device_list);
4351 	write_unlock_irqrestore(&cm.device_lock, flags);
4352 	return;
4353 
4354 error3:
4355 	ib_unregister_mad_agent(port->mad_agent);
4356 error2:
4357 	cm_remove_port_fs(port);
4358 error1:
4359 	port_modify.set_port_cap_mask = 0;
4360 	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4361 	kfree(port);
4362 	while (--i) {
4363 		if (!rdma_cap_ib_cm(ib_device, i))
4364 			continue;
4365 
4366 		port = cm_dev->port[i-1];
4367 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4368 		ib_unregister_mad_agent(port->mad_agent);
4369 		cm_remove_port_fs(port);
4370 		kfree(port);
4371 	}
4372 free:
4373 	kfree(cm_dev);
4374 }
4375 
4376 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4377 {
4378 	struct cm_device *cm_dev = client_data;
4379 	struct cm_port *port;
4380 	struct cm_id_private *cm_id_priv;
4381 	struct ib_mad_agent *cur_mad_agent;
4382 	struct ib_port_modify port_modify = {
4383 		.clr_port_cap_mask = IB_PORT_CM_SUP
4384 	};
4385 	unsigned long flags;
4386 	int i;
4387 
4388 	if (!cm_dev)
4389 		return;
4390 
4391 	write_lock_irqsave(&cm.device_lock, flags);
4392 	list_del(&cm_dev->list);
4393 	write_unlock_irqrestore(&cm.device_lock, flags);
4394 
4395 	spin_lock_irq(&cm.lock);
4396 	cm_dev->going_down = 1;
4397 	spin_unlock_irq(&cm.lock);
4398 
4399 	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4400 		if (!rdma_cap_ib_cm(ib_device, i))
4401 			continue;
4402 
4403 		port = cm_dev->port[i-1];
4404 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4405 		/* Mark all the cm_id's as not valid */
4406 		spin_lock_irq(&cm.lock);
4407 		list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4408 			cm_id_priv->altr_send_port_not_ready = 1;
4409 		list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4410 			cm_id_priv->prim_send_port_not_ready = 1;
4411 		spin_unlock_irq(&cm.lock);
4412 		/*
4413 		 * We flush the queue here after the going_down set, this
4414 		 * verify that no new works will be queued in the recv handler,
4415 		 * after that we can call the unregister_mad_agent
4416 		 */
4417 		flush_workqueue(cm.wq);
4418 		spin_lock_irq(&cm.state_lock);
4419 		cur_mad_agent = port->mad_agent;
4420 		port->mad_agent = NULL;
4421 		spin_unlock_irq(&cm.state_lock);
4422 		ib_unregister_mad_agent(cur_mad_agent);
4423 		cm_remove_port_fs(port);
4424 		kfree(port);
4425 	}
4426 
4427 	kfree(cm_dev);
4428 }
4429 
4430 static int __init ib_cm_init(void)
4431 {
4432 	int ret;
4433 
4434 	INIT_LIST_HEAD(&cm.device_list);
4435 	rwlock_init(&cm.device_lock);
4436 	spin_lock_init(&cm.lock);
4437 	spin_lock_init(&cm.state_lock);
4438 	cm.listen_service_table = RB_ROOT;
4439 	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4440 	cm.remote_id_table = RB_ROOT;
4441 	cm.remote_qp_table = RB_ROOT;
4442 	cm.remote_sidr_table = RB_ROOT;
4443 	xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
4444 	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4445 	INIT_LIST_HEAD(&cm.timewait_list);
4446 
4447 	ret = class_register(&cm_class);
4448 	if (ret) {
4449 		ret = -ENOMEM;
4450 		goto error1;
4451 	}
4452 
4453 	cm.wq = alloc_workqueue("ib_cm", 0, 1);
4454 	if (!cm.wq) {
4455 		ret = -ENOMEM;
4456 		goto error2;
4457 	}
4458 
4459 	ret = ib_register_client(&cm_client);
4460 	if (ret)
4461 		goto error3;
4462 
4463 	return 0;
4464 error3:
4465 	destroy_workqueue(cm.wq);
4466 error2:
4467 	class_unregister(&cm_class);
4468 error1:
4469 	return ret;
4470 }
4471 
4472 static void __exit ib_cm_cleanup(void)
4473 {
4474 	struct cm_timewait_info *timewait_info, *tmp;
4475 
4476 	spin_lock_irq(&cm.lock);
4477 	list_for_each_entry(timewait_info, &cm.timewait_list, list)
4478 		cancel_delayed_work(&timewait_info->work.work);
4479 	spin_unlock_irq(&cm.lock);
4480 
4481 	ib_unregister_client(&cm_client);
4482 	destroy_workqueue(cm.wq);
4483 
4484 	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4485 		list_del(&timewait_info->list);
4486 		kfree(timewait_info);
4487 	}
4488 
4489 	class_unregister(&cm_class);
4490 	WARN_ON(!xa_empty(&cm.local_id_table));
4491 }
4492 
4493 module_init(ib_cm_init);
4494 module_exit(ib_cm_cleanup);
4495