xref: /openbmc/linux/drivers/infiniband/hw/erdma/erdma_cm.c (revision f00093608fa790580da309bb9feb5108fbe7c331)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /*          Fredy Neeser */
9 /*          Greg Joyce <greg@opengridcomputing.com> */
10 /* Copyright (c) 2008-2019, IBM Corporation */
11 /* Copyright (c) 2017, Open Grid Computing, Inc. */
12 
13 #include <linux/workqueue.h>
14 
15 #include "erdma.h"
16 #include "erdma_cm.h"
17 #include "erdma_verbs.h"
18 
19 static struct workqueue_struct *erdma_cm_wq;
20 
21 static void erdma_cm_llp_state_change(struct sock *sk);
22 static void erdma_cm_llp_data_ready(struct sock *sk);
23 static void erdma_cm_llp_error_report(struct sock *sk);
24 
25 static void erdma_sk_assign_cm_upcalls(struct sock *sk)
26 {
27 	write_lock_bh(&sk->sk_callback_lock);
28 	sk->sk_state_change = erdma_cm_llp_state_change;
29 	sk->sk_data_ready = erdma_cm_llp_data_ready;
30 	sk->sk_error_report = erdma_cm_llp_error_report;
31 	write_unlock_bh(&sk->sk_callback_lock);
32 }
33 
34 static void erdma_sk_save_upcalls(struct sock *sk)
35 {
36 	struct erdma_cep *cep = sk_to_cep(sk);
37 
38 	write_lock_bh(&sk->sk_callback_lock);
39 	cep->sk_state_change = sk->sk_state_change;
40 	cep->sk_data_ready = sk->sk_data_ready;
41 	cep->sk_error_report = sk->sk_error_report;
42 	write_unlock_bh(&sk->sk_callback_lock);
43 }
44 
45 static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
46 {
47 	sk->sk_state_change = cep->sk_state_change;
48 	sk->sk_data_ready = cep->sk_data_ready;
49 	sk->sk_error_report = cep->sk_error_report;
50 	sk->sk_user_data = NULL;
51 }
52 
53 static void erdma_socket_disassoc(struct socket *s)
54 {
55 	struct sock *sk = s->sk;
56 	struct erdma_cep *cep;
57 
58 	if (sk) {
59 		write_lock_bh(&sk->sk_callback_lock);
60 		cep = sk_to_cep(sk);
61 		if (cep) {
62 			erdma_sk_restore_upcalls(sk, cep);
63 			erdma_cep_put(cep);
64 		} else {
65 			WARN_ON_ONCE(1);
66 		}
67 		write_unlock_bh(&sk->sk_callback_lock);
68 	} else {
69 		WARN_ON_ONCE(1);
70 	}
71 }
72 
73 static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
74 {
75 	cep->sock = s;
76 	erdma_cep_get(cep);
77 	s->sk->sk_user_data = cep;
78 
79 	erdma_sk_save_upcalls(s->sk);
80 	erdma_sk_assign_cm_upcalls(s->sk);
81 }
82 
83 static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
84 {
85 	if (cep->listen_cep) {
86 		erdma_cep_put(cep->listen_cep);
87 		cep->listen_cep = NULL;
88 	}
89 }
90 
91 static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
92 {
93 	struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
94 	unsigned long flags;
95 
96 	if (!cep)
97 		return NULL;
98 
99 	INIT_LIST_HEAD(&cep->listenq);
100 	INIT_LIST_HEAD(&cep->devq);
101 	INIT_LIST_HEAD(&cep->work_freelist);
102 
103 	kref_init(&cep->ref);
104 	cep->state = ERDMA_EPSTATE_IDLE;
105 	init_waitqueue_head(&cep->waitq);
106 	spin_lock_init(&cep->lock);
107 	cep->dev = dev;
108 
109 	spin_lock_irqsave(&dev->lock, flags);
110 	list_add_tail(&cep->devq, &dev->cep_list);
111 	spin_unlock_irqrestore(&dev->lock, flags);
112 
113 	return cep;
114 }
115 
116 static void erdma_cm_free_work(struct erdma_cep *cep)
117 {
118 	struct list_head *w, *tmp;
119 	struct erdma_cm_work *work;
120 
121 	list_for_each_safe(w, tmp, &cep->work_freelist) {
122 		work = list_entry(w, struct erdma_cm_work, list);
123 		list_del(&work->list);
124 		kfree(work);
125 	}
126 }
127 
128 static void erdma_cancel_mpatimer(struct erdma_cep *cep)
129 {
130 	spin_lock_bh(&cep->lock);
131 	if (cep->mpa_timer) {
132 		if (cancel_delayed_work(&cep->mpa_timer->work)) {
133 			erdma_cep_put(cep);
134 			kfree(cep->mpa_timer);
135 		}
136 		cep->mpa_timer = NULL;
137 	}
138 	spin_unlock_bh(&cep->lock);
139 }
140 
141 static void erdma_put_work(struct erdma_cm_work *work)
142 {
143 	INIT_LIST_HEAD(&work->list);
144 	spin_lock_bh(&work->cep->lock);
145 	list_add(&work->list, &work->cep->work_freelist);
146 	spin_unlock_bh(&work->cep->lock);
147 }
148 
149 static void erdma_cep_set_inuse(struct erdma_cep *cep)
150 {
151 	unsigned long flags;
152 
153 	spin_lock_irqsave(&cep->lock, flags);
154 	while (cep->in_use) {
155 		spin_unlock_irqrestore(&cep->lock, flags);
156 		wait_event_interruptible(cep->waitq, !cep->in_use);
157 		if (signal_pending(current))
158 			flush_signals(current);
159 
160 		spin_lock_irqsave(&cep->lock, flags);
161 	}
162 
163 	cep->in_use = 1;
164 	spin_unlock_irqrestore(&cep->lock, flags);
165 }
166 
167 static void erdma_cep_set_free(struct erdma_cep *cep)
168 {
169 	unsigned long flags;
170 
171 	spin_lock_irqsave(&cep->lock, flags);
172 	cep->in_use = 0;
173 	spin_unlock_irqrestore(&cep->lock, flags);
174 
175 	wake_up(&cep->waitq);
176 }
177 
178 static void __erdma_cep_dealloc(struct kref *ref)
179 {
180 	struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
181 	struct erdma_dev *dev = cep->dev;
182 	unsigned long flags;
183 
184 	WARN_ON(cep->listen_cep);
185 
186 	kfree(cep->private_data);
187 	kfree(cep->mpa.pdata);
188 	spin_lock_bh(&cep->lock);
189 	if (!list_empty(&cep->work_freelist))
190 		erdma_cm_free_work(cep);
191 	spin_unlock_bh(&cep->lock);
192 
193 	spin_lock_irqsave(&dev->lock, flags);
194 	list_del(&cep->devq);
195 	spin_unlock_irqrestore(&dev->lock, flags);
196 	kfree(cep);
197 }
198 
199 static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
200 {
201 	struct erdma_cm_work *work = NULL;
202 
203 	spin_lock_bh(&cep->lock);
204 	if (!list_empty(&cep->work_freelist)) {
205 		work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
206 				  list);
207 		list_del_init(&work->list);
208 	}
209 
210 	spin_unlock_bh(&cep->lock);
211 	return work;
212 }
213 
214 static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
215 {
216 	struct erdma_cm_work *work;
217 
218 	while (num--) {
219 		work = kmalloc(sizeof(*work), GFP_KERNEL);
220 		if (!work) {
221 			if (!(list_empty(&cep->work_freelist)))
222 				erdma_cm_free_work(cep);
223 			return -ENOMEM;
224 		}
225 		work->cep = cep;
226 		INIT_LIST_HEAD(&work->list);
227 		list_add(&work->list, &cep->work_freelist);
228 	}
229 
230 	return 0;
231 }
232 
233 static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
234 			   int status)
235 {
236 	struct iw_cm_event event;
237 	struct iw_cm_id *cm_id;
238 
239 	memset(&event, 0, sizeof(event));
240 	event.status = status;
241 	event.event = reason;
242 
243 	if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
244 		event.provider_data = cep;
245 		cm_id = cep->listen_cep->cm_id;
246 
247 		event.ird = cep->dev->attrs.max_ird;
248 		event.ord = cep->dev->attrs.max_ord;
249 	} else {
250 		cm_id = cep->cm_id;
251 	}
252 
253 	if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
254 	    reason == IW_CM_EVENT_CONNECT_REPLY) {
255 		u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
256 
257 		if (pd_len && cep->mpa.pdata) {
258 			event.private_data_len = pd_len;
259 			event.private_data = cep->mpa.pdata;
260 		}
261 
262 		getname_local(cep->sock, &event.local_addr);
263 		getname_peer(cep->sock, &event.remote_addr);
264 	}
265 
266 	return cm_id->event_handler(cm_id, &event);
267 }
268 
269 void erdma_qp_cm_drop(struct erdma_qp *qp)
270 {
271 	struct erdma_cep *cep = qp->cep;
272 
273 	if (!qp->cep)
274 		return;
275 
276 	erdma_cep_set_inuse(cep);
277 
278 	/* already closed. */
279 	if (cep->state == ERDMA_EPSTATE_CLOSED)
280 		goto out;
281 
282 	if (cep->cm_id) {
283 		switch (cep->state) {
284 		case ERDMA_EPSTATE_AWAIT_MPAREP:
285 			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
286 					-EINVAL);
287 			break;
288 		case ERDMA_EPSTATE_RDMA_MODE:
289 			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
290 			break;
291 		case ERDMA_EPSTATE_IDLE:
292 		case ERDMA_EPSTATE_LISTENING:
293 		case ERDMA_EPSTATE_CONNECTING:
294 		case ERDMA_EPSTATE_AWAIT_MPAREQ:
295 		case ERDMA_EPSTATE_RECVD_MPAREQ:
296 		case ERDMA_EPSTATE_CLOSED:
297 		default:
298 			break;
299 		}
300 		cep->cm_id->rem_ref(cep->cm_id);
301 		cep->cm_id = NULL;
302 		erdma_cep_put(cep);
303 	}
304 	cep->state = ERDMA_EPSTATE_CLOSED;
305 
306 	if (cep->sock) {
307 		erdma_socket_disassoc(cep->sock);
308 		sock_release(cep->sock);
309 		cep->sock = NULL;
310 	}
311 
312 	if (cep->qp) {
313 		cep->qp = NULL;
314 		erdma_qp_put(qp);
315 	}
316 out:
317 	erdma_cep_set_free(cep);
318 }
319 
320 void erdma_cep_put(struct erdma_cep *cep)
321 {
322 	WARN_ON(kref_read(&cep->ref) < 1);
323 	kref_put(&cep->ref, __erdma_cep_dealloc);
324 }
325 
326 void erdma_cep_get(struct erdma_cep *cep)
327 {
328 	kref_get(&cep->ref);
329 }
330 
331 static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
332 				u8 pd_len)
333 {
334 	struct socket *s = cep->sock;
335 	struct mpa_rr *rr = &cep->mpa.hdr;
336 	struct kvec iov[3];
337 	struct msghdr msg;
338 	int iovec_num = 0;
339 	int ret;
340 	int mpa_len;
341 
342 	memset(&msg, 0, sizeof(msg));
343 
344 	rr->params.pd_len = cpu_to_be16(pd_len);
345 
346 	iov[iovec_num].iov_base = rr;
347 	iov[iovec_num].iov_len = sizeof(*rr);
348 	iovec_num++;
349 	mpa_len = sizeof(*rr);
350 
351 	iov[iovec_num].iov_base = &cep->mpa.ext_data;
352 	iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
353 	iovec_num++;
354 	mpa_len += sizeof(cep->mpa.ext_data);
355 
356 	if (pd_len) {
357 		iov[iovec_num].iov_base = (char *)pdata;
358 		iov[iovec_num].iov_len = pd_len;
359 		mpa_len += pd_len;
360 		iovec_num++;
361 	}
362 
363 	ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
364 
365 	return ret < 0 ? ret : 0;
366 }
367 
368 static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
369 			     int flags)
370 {
371 	struct kvec iov = { buf, size };
372 	struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
373 
374 	return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
375 }
376 
377 static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
378 			  int hdr_size, int *rcvd_out)
379 {
380 	struct socket *s = cep->sock;
381 	int rcvd;
382 
383 	*rcvd_out = 0;
384 	if (hdr_rcvd < hdr_size) {
385 		rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
386 				  MSG_DONTWAIT);
387 		if (rcvd == -EAGAIN)
388 			return -EAGAIN;
389 
390 		if (rcvd <= 0)
391 			return -ECONNABORTED;
392 
393 		hdr_rcvd += rcvd;
394 		*rcvd_out = rcvd;
395 
396 		if (hdr_rcvd < hdr_size)
397 			return -EAGAIN;
398 	}
399 
400 	return 0;
401 }
402 
403 static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
404 {
405 	*bits = (*bits & ~MPA_RR_MASK_REVISION) |
406 		(cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
407 }
408 
409 static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
410 {
411 	__be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
412 
413 	return (u8)be16_to_cpu(rev);
414 }
415 
416 static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
417 {
418 	*bits = (*bits & ~MPA_EXT_FLAG_CC) |
419 		(cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
420 }
421 
422 static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
423 {
424 	__be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
425 
426 	return (u8)be32_to_cpu(cc);
427 }
428 
429 /*
430  * Receive MPA Request/Reply header.
431  *
432  * Returns 0 if complete MPA Request/Reply haeder including
433  * eventual private data was received. Returns -EAGAIN if
434  * header was partially received or negative error code otherwise.
435  *
436  * Context: May be called in process context only
437  */
438 static int erdma_recv_mpa_rr(struct erdma_cep *cep)
439 {
440 	struct mpa_rr *hdr = &cep->mpa.hdr;
441 	struct socket *s = cep->sock;
442 	u16 pd_len;
443 	int rcvd, to_rcv, ret, pd_rcvd;
444 
445 	if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
446 		ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
447 				     (char *)&cep->mpa.hdr,
448 				     sizeof(struct mpa_rr), &rcvd);
449 		cep->mpa.bytes_rcvd += rcvd;
450 		if (ret)
451 			return ret;
452 	}
453 
454 	if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
455 	    __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
456 		return -EPROTO;
457 
458 	if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
459 	    sizeof(struct erdma_mpa_ext)) {
460 		ret = __recv_mpa_hdr(
461 			cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
462 			(char *)&cep->mpa.ext_data,
463 			sizeof(struct erdma_mpa_ext), &rcvd);
464 		cep->mpa.bytes_rcvd += rcvd;
465 		if (ret)
466 			return ret;
467 	}
468 
469 	pd_len = be16_to_cpu(hdr->params.pd_len);
470 	pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
471 		  sizeof(struct erdma_mpa_ext);
472 	to_rcv = pd_len - pd_rcvd;
473 
474 	if (!to_rcv) {
475 		/*
476 		 * We have received the whole MPA Request/Reply message.
477 		 * Check against peer protocol violation.
478 		 */
479 		u32 word;
480 
481 		ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
482 				     &rcvd);
483 		if (ret == -EAGAIN && rcvd == 0)
484 			return 0;
485 
486 		if (ret)
487 			return ret;
488 
489 		return -EPROTO;
490 	}
491 
492 	/*
493 	 * At this point, MPA header has been fully received, and pd_len != 0.
494 	 * So, begin to receive private data.
495 	 */
496 	if (!cep->mpa.pdata) {
497 		cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
498 		if (!cep->mpa.pdata)
499 			return -ENOMEM;
500 	}
501 
502 	rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
503 			  MSG_DONTWAIT);
504 	if (rcvd < 0)
505 		return rcvd;
506 
507 	if (rcvd > to_rcv)
508 		return -EPROTO;
509 
510 	cep->mpa.bytes_rcvd += rcvd;
511 
512 	if (to_rcv == rcvd)
513 		return 0;
514 
515 	return -EAGAIN;
516 }
517 
518 /*
519  * erdma_proc_mpareq()
520  *
521  * Read MPA Request from socket and signal new connection to IWCM
522  * if success. Caller must hold lock on corresponding listening CEP.
523  */
524 static int erdma_proc_mpareq(struct erdma_cep *cep)
525 {
526 	struct mpa_rr *req;
527 	int ret;
528 
529 	ret = erdma_recv_mpa_rr(cep);
530 	if (ret)
531 		return ret;
532 
533 	req = &cep->mpa.hdr;
534 
535 	if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
536 		return -EPROTO;
537 
538 	memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
539 
540 	/* Currently does not support marker and crc. */
541 	if (req->params.bits & MPA_RR_FLAG_MARKERS ||
542 	    req->params.bits & MPA_RR_FLAG_CRC)
543 		goto reject_conn;
544 
545 	cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
546 
547 	/* Keep reference until IWCM accepts/rejects */
548 	erdma_cep_get(cep);
549 	ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
550 	if (ret)
551 		erdma_cep_put(cep);
552 
553 	return ret;
554 
555 reject_conn:
556 	req->params.bits &= ~MPA_RR_FLAG_MARKERS;
557 	req->params.bits |= MPA_RR_FLAG_REJECT;
558 	req->params.bits &= ~MPA_RR_FLAG_CRC;
559 
560 	kfree(cep->mpa.pdata);
561 	cep->mpa.pdata = NULL;
562 	erdma_send_mpareqrep(cep, NULL, 0);
563 
564 	return -EOPNOTSUPP;
565 }
566 
567 static int erdma_proc_mpareply(struct erdma_cep *cep)
568 {
569 	struct erdma_qp_attrs qp_attrs;
570 	struct erdma_qp *qp = cep->qp;
571 	struct mpa_rr *rep;
572 	int ret;
573 
574 	ret = erdma_recv_mpa_rr(cep);
575 	if (ret)
576 		goto out_err;
577 
578 	erdma_cancel_mpatimer(cep);
579 
580 	rep = &cep->mpa.hdr;
581 
582 	if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
583 		ret = -EPROTO;
584 		goto out_err;
585 	}
586 
587 	if (rep->params.bits & MPA_RR_FLAG_REJECT) {
588 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
589 		return -ECONNRESET;
590 	}
591 
592 	/* Currently does not support marker and crc. */
593 	if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
594 	    (rep->params.bits & MPA_RR_FLAG_CRC)) {
595 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
596 		return -EINVAL;
597 	}
598 
599 	memset(&qp_attrs, 0, sizeof(qp_attrs));
600 	qp_attrs.irq_size = cep->ird;
601 	qp_attrs.orq_size = cep->ord;
602 	qp_attrs.state = ERDMA_QP_STATE_RTS;
603 
604 	down_write(&qp->state_lock);
605 	if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
606 		ret = -EINVAL;
607 		up_write(&qp->state_lock);
608 		goto out_err;
609 	}
610 
611 	qp->attrs.qp_type = ERDMA_QP_ACTIVE;
612 	if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
613 		qp->attrs.cc = COMPROMISE_CC;
614 
615 	ret = erdma_modify_qp_internal(qp, &qp_attrs,
616 				       ERDMA_QP_ATTR_STATE |
617 				       ERDMA_QP_ATTR_LLP_HANDLE |
618 				       ERDMA_QP_ATTR_MPA);
619 
620 	up_write(&qp->state_lock);
621 
622 	if (!ret) {
623 		ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
624 		if (!ret)
625 			cep->state = ERDMA_EPSTATE_RDMA_MODE;
626 
627 		return 0;
628 	}
629 
630 out_err:
631 	if (ret != -EAGAIN)
632 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
633 
634 	return ret;
635 }
636 
637 static void erdma_accept_newconn(struct erdma_cep *cep)
638 {
639 	struct socket *s = cep->sock;
640 	struct socket *new_s = NULL;
641 	struct erdma_cep *new_cep = NULL;
642 	int ret = 0;
643 
644 	if (cep->state != ERDMA_EPSTATE_LISTENING)
645 		goto error;
646 
647 	new_cep = erdma_cep_alloc(cep->dev);
648 	if (!new_cep)
649 		goto error;
650 
651 	/*
652 	 * 4: Allocate a sufficient number of work elements
653 	 * to allow concurrent handling of local + peer close
654 	 * events, MPA header processing + MPA timeout.
655 	 */
656 	if (erdma_cm_alloc_work(new_cep, 4) != 0)
657 		goto error;
658 
659 	/*
660 	 * Copy saved socket callbacks from listening CEP
661 	 * and assign new socket with new CEP
662 	 */
663 	new_cep->sk_state_change = cep->sk_state_change;
664 	new_cep->sk_data_ready = cep->sk_data_ready;
665 	new_cep->sk_error_report = cep->sk_error_report;
666 
667 	ret = kernel_accept(s, &new_s, O_NONBLOCK);
668 	if (ret != 0)
669 		goto error;
670 
671 	new_cep->sock = new_s;
672 	erdma_cep_get(new_cep);
673 	new_s->sk->sk_user_data = new_cep;
674 
675 	tcp_sock_set_nodelay(new_s->sk);
676 	new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
677 
678 	ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
679 	if (ret)
680 		goto error;
681 
682 	new_cep->listen_cep = cep;
683 	erdma_cep_get(cep);
684 
685 	if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
686 		/* MPA REQ already queued */
687 		erdma_cep_set_inuse(new_cep);
688 		ret = erdma_proc_mpareq(new_cep);
689 		if (ret != -EAGAIN) {
690 			erdma_cep_put(cep);
691 			new_cep->listen_cep = NULL;
692 			if (ret) {
693 				erdma_cep_set_free(new_cep);
694 				goto error;
695 			}
696 		}
697 		erdma_cep_set_free(new_cep);
698 	}
699 	return;
700 
701 error:
702 	if (new_cep) {
703 		new_cep->state = ERDMA_EPSTATE_CLOSED;
704 		erdma_cancel_mpatimer(new_cep);
705 
706 		erdma_cep_put(new_cep);
707 		new_cep->sock = NULL;
708 	}
709 
710 	if (new_s) {
711 		erdma_socket_disassoc(new_s);
712 		sock_release(new_s);
713 	}
714 }
715 
716 static int erdma_newconn_connected(struct erdma_cep *cep)
717 {
718 	int ret = 0;
719 
720 	cep->mpa.hdr.params.bits = 0;
721 	__mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
722 
723 	memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
724 	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
725 	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
726 
727 	ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
728 	cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
729 	cep->mpa.hdr.params.pd_len = 0;
730 
731 	if (ret >= 0)
732 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
733 
734 	return ret;
735 }
736 
737 static void erdma_cm_work_handler(struct work_struct *w)
738 {
739 	struct erdma_cm_work *work;
740 	struct erdma_cep *cep;
741 	int release_cep = 0, ret = 0;
742 
743 	work = container_of(w, struct erdma_cm_work, work.work);
744 	cep = work->cep;
745 
746 	erdma_cep_set_inuse(cep);
747 
748 	switch (work->type) {
749 	case ERDMA_CM_WORK_CONNECTED:
750 		erdma_cancel_mpatimer(cep);
751 		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
752 			ret = erdma_newconn_connected(cep);
753 			if (ret) {
754 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
755 						-EIO);
756 				release_cep = 1;
757 			}
758 		}
759 		break;
760 	case ERDMA_CM_WORK_CONNECTTIMEOUT:
761 		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
762 			cep->mpa_timer = NULL;
763 			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
764 					-ETIMEDOUT);
765 			release_cep = 1;
766 		}
767 		break;
768 	case ERDMA_CM_WORK_ACCEPT:
769 		erdma_accept_newconn(cep);
770 		break;
771 	case ERDMA_CM_WORK_READ_MPAHDR:
772 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
773 			if (cep->listen_cep) {
774 				erdma_cep_set_inuse(cep->listen_cep);
775 
776 				if (cep->listen_cep->state ==
777 				    ERDMA_EPSTATE_LISTENING)
778 					ret = erdma_proc_mpareq(cep);
779 				else
780 					ret = -EFAULT;
781 
782 				erdma_cep_set_free(cep->listen_cep);
783 
784 				if (ret != -EAGAIN) {
785 					erdma_cep_put(cep->listen_cep);
786 					cep->listen_cep = NULL;
787 					if (ret)
788 						erdma_cep_put(cep);
789 				}
790 			}
791 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
792 			ret = erdma_proc_mpareply(cep);
793 		}
794 
795 		if (ret && ret != -EAGAIN)
796 			release_cep = 1;
797 		break;
798 	case ERDMA_CM_WORK_CLOSE_LLP:
799 		if (cep->cm_id)
800 			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
801 		release_cep = 1;
802 		break;
803 	case ERDMA_CM_WORK_PEER_CLOSE:
804 		if (cep->cm_id) {
805 			if (cep->state == ERDMA_EPSTATE_CONNECTING ||
806 			    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
807 				/*
808 				 * MPA reply not received, but connection drop
809 				 */
810 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
811 						-ECONNRESET);
812 			} else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
813 				/*
814 				 * NOTE: IW_CM_EVENT_DISCONNECT is given just
815 				 *       to transition IWCM into CLOSING.
816 				 */
817 				erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
818 				erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
819 			}
820 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
821 			/* Socket close before MPA request received. */
822 			erdma_disassoc_listen_cep(cep);
823 			erdma_cep_put(cep);
824 		}
825 		release_cep = 1;
826 		break;
827 	case ERDMA_CM_WORK_MPATIMEOUT:
828 		cep->mpa_timer = NULL;
829 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
830 			/*
831 			 * MPA request timed out:
832 			 * Hide any partially received private data and signal
833 			 * timeout
834 			 */
835 			cep->mpa.hdr.params.pd_len = 0;
836 
837 			if (cep->cm_id)
838 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
839 						-ETIMEDOUT);
840 			release_cep = 1;
841 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
842 			/* No MPA req received after peer TCP stream setup. */
843 			erdma_disassoc_listen_cep(cep);
844 
845 			erdma_cep_put(cep);
846 			release_cep = 1;
847 		}
848 		break;
849 	default:
850 		WARN(1, "Undefined CM work type: %d\n", work->type);
851 	}
852 
853 	if (release_cep) {
854 		erdma_cancel_mpatimer(cep);
855 		cep->state = ERDMA_EPSTATE_CLOSED;
856 		if (cep->qp) {
857 			struct erdma_qp *qp = cep->qp;
858 			/*
859 			 * Serialize a potential race with application
860 			 * closing the QP and calling erdma_qp_cm_drop()
861 			 */
862 			erdma_qp_get(qp);
863 			erdma_cep_set_free(cep);
864 
865 			erdma_qp_llp_close(qp);
866 			erdma_qp_put(qp);
867 
868 			erdma_cep_set_inuse(cep);
869 			cep->qp = NULL;
870 			erdma_qp_put(qp);
871 		}
872 
873 		if (cep->sock) {
874 			erdma_socket_disassoc(cep->sock);
875 			sock_release(cep->sock);
876 			cep->sock = NULL;
877 		}
878 
879 		if (cep->cm_id) {
880 			cep->cm_id->rem_ref(cep->cm_id);
881 			cep->cm_id = NULL;
882 			if (cep->state != ERDMA_EPSTATE_LISTENING)
883 				erdma_cep_put(cep);
884 		}
885 	}
886 	erdma_cep_set_free(cep);
887 	erdma_put_work(work);
888 	erdma_cep_put(cep);
889 }
890 
891 int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
892 {
893 	struct erdma_cm_work *work = erdma_get_work(cep);
894 	unsigned long delay = 0;
895 
896 	if (!work)
897 		return -ENOMEM;
898 
899 	work->type = type;
900 	work->cep = cep;
901 
902 	erdma_cep_get(cep);
903 
904 	INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
905 
906 	if (type == ERDMA_CM_WORK_MPATIMEOUT) {
907 		cep->mpa_timer = work;
908 
909 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
910 			delay = MPAREP_TIMEOUT;
911 		else
912 			delay = MPAREQ_TIMEOUT;
913 	} else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
914 		cep->mpa_timer = work;
915 
916 		delay = CONNECT_TIMEOUT;
917 	}
918 
919 	queue_delayed_work(erdma_cm_wq, &work->work, delay);
920 
921 	return 0;
922 }
923 
924 static void erdma_cm_llp_data_ready(struct sock *sk)
925 {
926 	struct erdma_cep *cep;
927 
928 	read_lock(&sk->sk_callback_lock);
929 
930 	cep = sk_to_cep(sk);
931 	if (!cep)
932 		goto out;
933 
934 	if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
935 	    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
936 		erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
937 
938 out:
939 	read_unlock(&sk->sk_callback_lock);
940 }
941 
942 static void erdma_cm_llp_error_report(struct sock *sk)
943 {
944 	struct erdma_cep *cep = sk_to_cep(sk);
945 
946 	if (cep)
947 		cep->sk_error_report(sk);
948 }
949 
950 static void erdma_cm_llp_state_change(struct sock *sk)
951 {
952 	struct erdma_cep *cep;
953 	void (*orig_state_change)(struct sock *sk);
954 
955 	read_lock(&sk->sk_callback_lock);
956 
957 	cep = sk_to_cep(sk);
958 	if (!cep) {
959 		read_unlock(&sk->sk_callback_lock);
960 		return;
961 	}
962 	orig_state_change = cep->sk_state_change;
963 
964 	switch (sk->sk_state) {
965 	case TCP_ESTABLISHED:
966 		if (cep->state == ERDMA_EPSTATE_CONNECTING)
967 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
968 		else
969 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
970 		break;
971 	case TCP_CLOSE:
972 	case TCP_CLOSE_WAIT:
973 		if (cep->state != ERDMA_EPSTATE_LISTENING)
974 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
975 		break;
976 	default:
977 		break;
978 	}
979 	read_unlock(&sk->sk_callback_lock);
980 	orig_state_change(sk);
981 }
982 
983 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
984 			      int laddrlen, struct sockaddr *raddr,
985 			      int raddrlen, int flags)
986 {
987 	int ret;
988 
989 	sock_set_reuseaddr(s->sk);
990 	ret = s->ops->bind(s, laddr, laddrlen);
991 	if (ret)
992 		return ret;
993 	ret = s->ops->connect(s, raddr, raddrlen, flags);
994 	return ret < 0 ? ret : 0;
995 }
996 
997 int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
998 {
999 	struct erdma_dev *dev = to_edev(id->device);
1000 	struct erdma_qp *qp;
1001 	struct erdma_cep *cep = NULL;
1002 	struct socket *s = NULL;
1003 	struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
1004 	struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
1005 	u16 pd_len = params->private_data_len;
1006 	int ret;
1007 
1008 	if (pd_len > MPA_MAX_PRIVDATA)
1009 		return -EINVAL;
1010 
1011 	if (params->ird > dev->attrs.max_ird ||
1012 	    params->ord > dev->attrs.max_ord)
1013 		return -EINVAL;
1014 
1015 	if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
1016 		return -EAFNOSUPPORT;
1017 
1018 	qp = find_qp_by_qpn(dev, params->qpn);
1019 	if (!qp)
1020 		return -ENOENT;
1021 	erdma_qp_get(qp);
1022 
1023 	ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
1024 	if (ret < 0)
1025 		goto error_put_qp;
1026 
1027 	cep = erdma_cep_alloc(dev);
1028 	if (!cep) {
1029 		ret = -ENOMEM;
1030 		goto error_release_sock;
1031 	}
1032 
1033 	erdma_cep_set_inuse(cep);
1034 
1035 	/* Associate QP with CEP */
1036 	erdma_cep_get(cep);
1037 	qp->cep = cep;
1038 	cep->qp = qp;
1039 
1040 	/* Associate cm_id with CEP */
1041 	id->add_ref(id);
1042 	cep->cm_id = id;
1043 
1044 	/*
1045 	 * 6: Allocate a sufficient number of work elements
1046 	 * to allow concurrent handling of local + peer close
1047 	 * events, MPA header processing + MPA timeout, connected event
1048 	 * and connect timeout.
1049 	 */
1050 	ret = erdma_cm_alloc_work(cep, 6);
1051 	if (ret != 0) {
1052 		ret = -ENOMEM;
1053 		goto error_release_cep;
1054 	}
1055 
1056 	cep->ird = params->ird;
1057 	cep->ord = params->ord;
1058 	cep->state = ERDMA_EPSTATE_CONNECTING;
1059 
1060 	erdma_cep_socket_assoc(cep, s);
1061 
1062 	if (pd_len) {
1063 		cep->pd_len = pd_len;
1064 		cep->private_data = kmalloc(pd_len, GFP_KERNEL);
1065 		if (!cep->private_data) {
1066 			ret = -ENOMEM;
1067 			goto error_disassoc;
1068 		}
1069 
1070 		memcpy(cep->private_data, params->private_data,
1071 		       params->private_data_len);
1072 	}
1073 
1074 	ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
1075 				 sizeof(*raddr), O_NONBLOCK);
1076 	if (ret != -EINPROGRESS && ret != 0) {
1077 		goto error_disassoc;
1078 	} else if (ret == 0) {
1079 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
1080 		if (ret)
1081 			goto error_disassoc;
1082 	} else {
1083 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
1084 		if (ret)
1085 			goto error_disassoc;
1086 	}
1087 
1088 	erdma_cep_set_free(cep);
1089 	return 0;
1090 
1091 error_disassoc:
1092 	kfree(cep->private_data);
1093 	cep->private_data = NULL;
1094 	cep->pd_len = 0;
1095 
1096 	erdma_socket_disassoc(s);
1097 
1098 error_release_cep:
1099 	/* disassoc with cm_id */
1100 	cep->cm_id = NULL;
1101 	id->rem_ref(id);
1102 
1103 	/* disassoc with qp */
1104 	qp->cep = NULL;
1105 	erdma_cep_put(cep);
1106 	cep->qp = NULL;
1107 
1108 	cep->state = ERDMA_EPSTATE_CLOSED;
1109 
1110 	erdma_cep_set_free(cep);
1111 
1112 	/* release the cep. */
1113 	erdma_cep_put(cep);
1114 
1115 error_release_sock:
1116 	if (s)
1117 		sock_release(s);
1118 error_put_qp:
1119 	erdma_qp_put(qp);
1120 
1121 	return ret;
1122 }
1123 
1124 int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1125 {
1126 	struct erdma_dev *dev = to_edev(id->device);
1127 	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1128 	struct erdma_qp *qp;
1129 	struct erdma_qp_attrs qp_attrs;
1130 	int ret;
1131 
1132 	erdma_cep_set_inuse(cep);
1133 	erdma_cep_put(cep);
1134 
1135 	/* Free lingering inbound private data */
1136 	if (cep->mpa.hdr.params.pd_len) {
1137 		cep->mpa.hdr.params.pd_len = 0;
1138 		kfree(cep->mpa.pdata);
1139 		cep->mpa.pdata = NULL;
1140 	}
1141 	erdma_cancel_mpatimer(cep);
1142 
1143 	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1144 		erdma_cep_set_free(cep);
1145 		erdma_cep_put(cep);
1146 
1147 		return -ECONNRESET;
1148 	}
1149 
1150 	qp = find_qp_by_qpn(dev, params->qpn);
1151 	if (!qp)
1152 		return -ENOENT;
1153 	erdma_qp_get(qp);
1154 
1155 	down_write(&qp->state_lock);
1156 	if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
1157 		ret = -EINVAL;
1158 		up_write(&qp->state_lock);
1159 		goto error;
1160 	}
1161 
1162 	if (params->ord > dev->attrs.max_ord ||
1163 	    params->ird > dev->attrs.max_ord) {
1164 		ret = -EINVAL;
1165 		up_write(&qp->state_lock);
1166 		goto error;
1167 	}
1168 
1169 	if (params->private_data_len > MPA_MAX_PRIVDATA) {
1170 		ret = -EINVAL;
1171 		up_write(&qp->state_lock);
1172 		goto error;
1173 	}
1174 
1175 	cep->ird = params->ird;
1176 	cep->ord = params->ord;
1177 
1178 	cep->cm_id = id;
1179 	id->add_ref(id);
1180 
1181 	memset(&qp_attrs, 0, sizeof(qp_attrs));
1182 	qp_attrs.orq_size = params->ord;
1183 	qp_attrs.irq_size = params->ird;
1184 
1185 	qp_attrs.state = ERDMA_QP_STATE_RTS;
1186 
1187 	/* Associate QP with CEP */
1188 	erdma_cep_get(cep);
1189 	qp->cep = cep;
1190 	cep->qp = qp;
1191 
1192 	cep->state = ERDMA_EPSTATE_RDMA_MODE;
1193 
1194 	qp->attrs.qp_type = ERDMA_QP_PASSIVE;
1195 	qp->attrs.pd_len = params->private_data_len;
1196 
1197 	if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
1198 		qp->attrs.cc = COMPROMISE_CC;
1199 
1200 	/* move to rts */
1201 	ret = erdma_modify_qp_internal(qp, &qp_attrs,
1202 				       ERDMA_QP_ATTR_STATE |
1203 				       ERDMA_QP_ATTR_ORD |
1204 				       ERDMA_QP_ATTR_LLP_HANDLE |
1205 				       ERDMA_QP_ATTR_IRD |
1206 				       ERDMA_QP_ATTR_MPA);
1207 	up_write(&qp->state_lock);
1208 
1209 	if (ret)
1210 		goto error;
1211 
1212 	cep->mpa.ext_data.bits = 0;
1213 	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
1214 	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
1215 
1216 	ret = erdma_send_mpareqrep(cep, params->private_data,
1217 				   params->private_data_len);
1218 	if (!ret) {
1219 		ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
1220 		if (ret)
1221 			goto error;
1222 
1223 		erdma_cep_set_free(cep);
1224 
1225 		return 0;
1226 	}
1227 
1228 error:
1229 	erdma_socket_disassoc(cep->sock);
1230 	sock_release(cep->sock);
1231 	cep->sock = NULL;
1232 
1233 	cep->state = ERDMA_EPSTATE_CLOSED;
1234 
1235 	if (cep->cm_id) {
1236 		cep->cm_id->rem_ref(id);
1237 		cep->cm_id = NULL;
1238 	}
1239 
1240 	if (qp->cep) {
1241 		erdma_cep_put(cep);
1242 		qp->cep = NULL;
1243 	}
1244 
1245 	cep->qp = NULL;
1246 	erdma_qp_put(qp);
1247 
1248 	erdma_cep_set_free(cep);
1249 	erdma_cep_put(cep);
1250 
1251 	return ret;
1252 }
1253 
1254 int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
1255 {
1256 	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1257 
1258 	erdma_cep_set_inuse(cep);
1259 	erdma_cep_put(cep);
1260 
1261 	erdma_cancel_mpatimer(cep);
1262 
1263 	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1264 		erdma_cep_set_free(cep);
1265 		erdma_cep_put(cep);
1266 
1267 		return -ECONNRESET;
1268 	}
1269 
1270 	if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
1271 		cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
1272 		erdma_send_mpareqrep(cep, pdata, plen);
1273 	}
1274 
1275 	erdma_socket_disassoc(cep->sock);
1276 	sock_release(cep->sock);
1277 	cep->sock = NULL;
1278 
1279 	cep->state = ERDMA_EPSTATE_CLOSED;
1280 
1281 	erdma_cep_set_free(cep);
1282 	erdma_cep_put(cep);
1283 
1284 	return 0;
1285 }
1286 
1287 int erdma_create_listen(struct iw_cm_id *id, int backlog)
1288 {
1289 	struct socket *s;
1290 	struct erdma_cep *cep = NULL;
1291 	int ret = 0;
1292 	struct erdma_dev *dev = to_edev(id->device);
1293 	int addr_family = id->local_addr.ss_family;
1294 	struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
1295 
1296 	if (addr_family != AF_INET)
1297 		return -EAFNOSUPPORT;
1298 
1299 	ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
1300 	if (ret < 0)
1301 		return ret;
1302 
1303 	sock_set_reuseaddr(s->sk);
1304 
1305 	/* For wildcard addr, limit binding to current device only */
1306 	if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
1307 		s->sk->sk_bound_dev_if = dev->netdev->ifindex;
1308 
1309 	ret = s->ops->bind(s, (struct sockaddr *)laddr,
1310 			   sizeof(struct sockaddr_in));
1311 	if (ret)
1312 		goto error;
1313 
1314 	cep = erdma_cep_alloc(dev);
1315 	if (!cep) {
1316 		ret = -ENOMEM;
1317 		goto error;
1318 	}
1319 	erdma_cep_socket_assoc(cep, s);
1320 
1321 	ret = erdma_cm_alloc_work(cep, backlog);
1322 	if (ret)
1323 		goto error;
1324 
1325 	ret = s->ops->listen(s, backlog);
1326 	if (ret)
1327 		goto error;
1328 
1329 	cep->cm_id = id;
1330 	id->add_ref(id);
1331 
1332 	if (!id->provider_data) {
1333 		id->provider_data =
1334 			kmalloc(sizeof(struct list_head), GFP_KERNEL);
1335 		if (!id->provider_data) {
1336 			ret = -ENOMEM;
1337 			goto error;
1338 		}
1339 		INIT_LIST_HEAD((struct list_head *)id->provider_data);
1340 	}
1341 
1342 	list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
1343 	cep->state = ERDMA_EPSTATE_LISTENING;
1344 
1345 	return 0;
1346 
1347 error:
1348 	if (cep) {
1349 		erdma_cep_set_inuse(cep);
1350 
1351 		if (cep->cm_id) {
1352 			cep->cm_id->rem_ref(cep->cm_id);
1353 			cep->cm_id = NULL;
1354 		}
1355 		cep->sock = NULL;
1356 		erdma_socket_disassoc(s);
1357 		cep->state = ERDMA_EPSTATE_CLOSED;
1358 
1359 		erdma_cep_set_free(cep);
1360 		erdma_cep_put(cep);
1361 	}
1362 	sock_release(s);
1363 
1364 	return ret;
1365 }
1366 
1367 static void erdma_drop_listeners(struct iw_cm_id *id)
1368 {
1369 	struct list_head *p, *tmp;
1370 	/*
1371 	 * In case of a wildcard rdma_listen on a multi-homed device,
1372 	 * a listener's IWCM id is associated with more than one listening CEP.
1373 	 */
1374 	list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
1375 		struct erdma_cep *cep =
1376 			list_entry(p, struct erdma_cep, listenq);
1377 
1378 		list_del(p);
1379 
1380 		erdma_cep_set_inuse(cep);
1381 
1382 		if (cep->cm_id) {
1383 			cep->cm_id->rem_ref(cep->cm_id);
1384 			cep->cm_id = NULL;
1385 		}
1386 		if (cep->sock) {
1387 			erdma_socket_disassoc(cep->sock);
1388 			sock_release(cep->sock);
1389 			cep->sock = NULL;
1390 		}
1391 		cep->state = ERDMA_EPSTATE_CLOSED;
1392 		erdma_cep_set_free(cep);
1393 		erdma_cep_put(cep);
1394 	}
1395 }
1396 
1397 int erdma_destroy_listen(struct iw_cm_id *id)
1398 {
1399 	if (!id->provider_data)
1400 		return 0;
1401 
1402 	erdma_drop_listeners(id);
1403 	kfree(id->provider_data);
1404 	id->provider_data = NULL;
1405 
1406 	return 0;
1407 }
1408 
1409 int erdma_cm_init(void)
1410 {
1411 	erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
1412 	if (!erdma_cm_wq)
1413 		return -ENOMEM;
1414 
1415 	return 0;
1416 }
1417 
1418 void erdma_cm_exit(void)
1419 {
1420 	if (erdma_cm_wq)
1421 		destroy_workqueue(erdma_cm_wq);
1422 }
1423