1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /*          Fredy Neeser */
9 /*          Greg Joyce <greg@opengridcomputing.com> */
10 /* Copyright (c) 2008-2019, IBM Corporation */
11 /* Copyright (c) 2017, Open Grid Computing, Inc. */
12 
13 #include <linux/errno.h>
14 #include <linux/inetdevice.h>
15 #include <linux/net.h>
16 #include <linux/types.h>
17 #include <linux/workqueue.h>
18 #include <net/addrconf.h>
19 
20 #include <rdma/ib_user_verbs.h>
21 #include <rdma/ib_verbs.h>
22 
23 #include "erdma.h"
24 #include "erdma_cm.h"
25 #include "erdma_verbs.h"
26 
27 static struct workqueue_struct *erdma_cm_wq;
28 
29 static void erdma_cm_llp_state_change(struct sock *sk);
30 static void erdma_cm_llp_data_ready(struct sock *sk);
31 static void erdma_cm_llp_error_report(struct sock *sk);
32 
33 static void erdma_sk_assign_cm_upcalls(struct sock *sk)
34 {
35 	write_lock_bh(&sk->sk_callback_lock);
36 	sk->sk_state_change = erdma_cm_llp_state_change;
37 	sk->sk_data_ready = erdma_cm_llp_data_ready;
38 	sk->sk_error_report = erdma_cm_llp_error_report;
39 	write_unlock_bh(&sk->sk_callback_lock);
40 }
41 
42 static void erdma_sk_save_upcalls(struct sock *sk)
43 {
44 	struct erdma_cep *cep = sk_to_cep(sk);
45 
46 	write_lock_bh(&sk->sk_callback_lock);
47 	cep->sk_state_change = sk->sk_state_change;
48 	cep->sk_data_ready = sk->sk_data_ready;
49 	cep->sk_error_report = sk->sk_error_report;
50 	write_unlock_bh(&sk->sk_callback_lock);
51 }
52 
53 static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
54 {
55 	sk->sk_state_change = cep->sk_state_change;
56 	sk->sk_data_ready = cep->sk_data_ready;
57 	sk->sk_error_report = cep->sk_error_report;
58 	sk->sk_user_data = NULL;
59 }
60 
61 static void erdma_socket_disassoc(struct socket *s)
62 {
63 	struct sock *sk = s->sk;
64 	struct erdma_cep *cep;
65 
66 	if (sk) {
67 		write_lock_bh(&sk->sk_callback_lock);
68 		cep = sk_to_cep(sk);
69 		if (cep) {
70 			erdma_sk_restore_upcalls(sk, cep);
71 			erdma_cep_put(cep);
72 		} else {
73 			WARN_ON_ONCE(1);
74 		}
75 		write_unlock_bh(&sk->sk_callback_lock);
76 	} else {
77 		WARN_ON_ONCE(1);
78 	}
79 }
80 
81 static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
82 {
83 	cep->sock = s;
84 	erdma_cep_get(cep);
85 	s->sk->sk_user_data = cep;
86 
87 	erdma_sk_save_upcalls(s->sk);
88 	erdma_sk_assign_cm_upcalls(s->sk);
89 }
90 
91 static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
92 {
93 	if (cep->listen_cep) {
94 		erdma_cep_put(cep->listen_cep);
95 		cep->listen_cep = NULL;
96 	}
97 }
98 
99 static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
100 {
101 	struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
102 	unsigned long flags;
103 
104 	if (!cep)
105 		return NULL;
106 
107 	INIT_LIST_HEAD(&cep->listenq);
108 	INIT_LIST_HEAD(&cep->devq);
109 	INIT_LIST_HEAD(&cep->work_freelist);
110 
111 	kref_init(&cep->ref);
112 	cep->state = ERDMA_EPSTATE_IDLE;
113 	init_waitqueue_head(&cep->waitq);
114 	spin_lock_init(&cep->lock);
115 	cep->dev = dev;
116 
117 	spin_lock_irqsave(&dev->lock, flags);
118 	list_add_tail(&cep->devq, &dev->cep_list);
119 	spin_unlock_irqrestore(&dev->lock, flags);
120 
121 	return cep;
122 }
123 
124 static void erdma_cm_free_work(struct erdma_cep *cep)
125 {
126 	struct list_head *w, *tmp;
127 	struct erdma_cm_work *work;
128 
129 	list_for_each_safe(w, tmp, &cep->work_freelist) {
130 		work = list_entry(w, struct erdma_cm_work, list);
131 		list_del(&work->list);
132 		kfree(work);
133 	}
134 }
135 
136 static void erdma_cancel_mpatimer(struct erdma_cep *cep)
137 {
138 	spin_lock_bh(&cep->lock);
139 	if (cep->mpa_timer) {
140 		if (cancel_delayed_work(&cep->mpa_timer->work)) {
141 			erdma_cep_put(cep);
142 			kfree(cep->mpa_timer);
143 		}
144 		cep->mpa_timer = NULL;
145 	}
146 	spin_unlock_bh(&cep->lock);
147 }
148 
149 static void erdma_put_work(struct erdma_cm_work *work)
150 {
151 	INIT_LIST_HEAD(&work->list);
152 	spin_lock_bh(&work->cep->lock);
153 	list_add(&work->list, &work->cep->work_freelist);
154 	spin_unlock_bh(&work->cep->lock);
155 }
156 
157 static void erdma_cep_set_inuse(struct erdma_cep *cep)
158 {
159 	unsigned long flags;
160 
161 	spin_lock_irqsave(&cep->lock, flags);
162 	while (cep->in_use) {
163 		spin_unlock_irqrestore(&cep->lock, flags);
164 		wait_event_interruptible(cep->waitq, !cep->in_use);
165 		if (signal_pending(current))
166 			flush_signals(current);
167 
168 		spin_lock_irqsave(&cep->lock, flags);
169 	}
170 
171 	cep->in_use = 1;
172 	spin_unlock_irqrestore(&cep->lock, flags);
173 }
174 
175 static void erdma_cep_set_free(struct erdma_cep *cep)
176 {
177 	unsigned long flags;
178 
179 	spin_lock_irqsave(&cep->lock, flags);
180 	cep->in_use = 0;
181 	spin_unlock_irqrestore(&cep->lock, flags);
182 
183 	wake_up(&cep->waitq);
184 }
185 
186 static void __erdma_cep_dealloc(struct kref *ref)
187 {
188 	struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
189 	struct erdma_dev *dev = cep->dev;
190 	unsigned long flags;
191 
192 	WARN_ON(cep->listen_cep);
193 
194 	kfree(cep->private_data);
195 	kfree(cep->mpa.pdata);
196 	spin_lock_bh(&cep->lock);
197 	if (!list_empty(&cep->work_freelist))
198 		erdma_cm_free_work(cep);
199 	spin_unlock_bh(&cep->lock);
200 
201 	spin_lock_irqsave(&dev->lock, flags);
202 	list_del(&cep->devq);
203 	spin_unlock_irqrestore(&dev->lock, flags);
204 	kfree(cep);
205 }
206 
207 static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
208 {
209 	struct erdma_cm_work *work = NULL;
210 
211 	spin_lock_bh(&cep->lock);
212 	if (!list_empty(&cep->work_freelist)) {
213 		work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
214 				  list);
215 		list_del_init(&work->list);
216 	}
217 
218 	spin_unlock_bh(&cep->lock);
219 	return work;
220 }
221 
222 static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
223 {
224 	struct erdma_cm_work *work;
225 
226 	while (num--) {
227 		work = kmalloc(sizeof(*work), GFP_KERNEL);
228 		if (!work) {
229 			if (!(list_empty(&cep->work_freelist)))
230 				erdma_cm_free_work(cep);
231 			return -ENOMEM;
232 		}
233 		work->cep = cep;
234 		INIT_LIST_HEAD(&work->list);
235 		list_add(&work->list, &cep->work_freelist);
236 	}
237 
238 	return 0;
239 }
240 
241 static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
242 			   int status)
243 {
244 	struct iw_cm_event event;
245 	struct iw_cm_id *cm_id;
246 
247 	memset(&event, 0, sizeof(event));
248 	event.status = status;
249 	event.event = reason;
250 
251 	if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
252 		event.provider_data = cep;
253 		cm_id = cep->listen_cep->cm_id;
254 
255 		event.ird = cep->dev->attrs.max_ird;
256 		event.ord = cep->dev->attrs.max_ord;
257 	} else {
258 		cm_id = cep->cm_id;
259 	}
260 
261 	if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
262 	    reason == IW_CM_EVENT_CONNECT_REPLY) {
263 		u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
264 
265 		if (pd_len && cep->mpa.pdata) {
266 			event.private_data_len = pd_len;
267 			event.private_data = cep->mpa.pdata;
268 		}
269 
270 		getname_local(cep->sock, &event.local_addr);
271 		getname_peer(cep->sock, &event.remote_addr);
272 	}
273 
274 	return cm_id->event_handler(cm_id, &event);
275 }
276 
277 void erdma_qp_cm_drop(struct erdma_qp *qp)
278 {
279 	struct erdma_cep *cep = qp->cep;
280 
281 	if (!qp->cep)
282 		return;
283 
284 	erdma_cep_set_inuse(cep);
285 
286 	/* already closed. */
287 	if (cep->state == ERDMA_EPSTATE_CLOSED)
288 		goto out;
289 
290 	if (cep->cm_id) {
291 		switch (cep->state) {
292 		case ERDMA_EPSTATE_AWAIT_MPAREP:
293 			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
294 					-EINVAL);
295 			break;
296 		case ERDMA_EPSTATE_RDMA_MODE:
297 			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
298 			break;
299 		case ERDMA_EPSTATE_IDLE:
300 		case ERDMA_EPSTATE_LISTENING:
301 		case ERDMA_EPSTATE_CONNECTING:
302 		case ERDMA_EPSTATE_AWAIT_MPAREQ:
303 		case ERDMA_EPSTATE_RECVD_MPAREQ:
304 		case ERDMA_EPSTATE_CLOSED:
305 		default:
306 			break;
307 		}
308 		cep->cm_id->rem_ref(cep->cm_id);
309 		cep->cm_id = NULL;
310 		erdma_cep_put(cep);
311 	}
312 	cep->state = ERDMA_EPSTATE_CLOSED;
313 
314 	if (cep->sock) {
315 		erdma_socket_disassoc(cep->sock);
316 		sock_release(cep->sock);
317 		cep->sock = NULL;
318 	}
319 
320 	if (cep->qp) {
321 		cep->qp = NULL;
322 		erdma_qp_put(qp);
323 	}
324 out:
325 	erdma_cep_set_free(cep);
326 }
327 
328 void erdma_cep_put(struct erdma_cep *cep)
329 {
330 	WARN_ON(kref_read(&cep->ref) < 1);
331 	kref_put(&cep->ref, __erdma_cep_dealloc);
332 }
333 
334 void erdma_cep_get(struct erdma_cep *cep)
335 {
336 	kref_get(&cep->ref);
337 }
338 
339 static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
340 				u8 pd_len)
341 {
342 	struct socket *s = cep->sock;
343 	struct mpa_rr *rr = &cep->mpa.hdr;
344 	struct kvec iov[3];
345 	struct msghdr msg;
346 	int iovec_num = 0;
347 	int ret;
348 	int mpa_len;
349 
350 	memset(&msg, 0, sizeof(msg));
351 
352 	rr->params.pd_len = cpu_to_be16(pd_len);
353 
354 	iov[iovec_num].iov_base = rr;
355 	iov[iovec_num].iov_len = sizeof(*rr);
356 	iovec_num++;
357 	mpa_len = sizeof(*rr);
358 
359 	iov[iovec_num].iov_base = &cep->mpa.ext_data;
360 	iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
361 	iovec_num++;
362 	mpa_len += sizeof(cep->mpa.ext_data);
363 
364 	if (pd_len) {
365 		iov[iovec_num].iov_base = (char *)pdata;
366 		iov[iovec_num].iov_len = pd_len;
367 		mpa_len += pd_len;
368 		iovec_num++;
369 	}
370 
371 	ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
372 
373 	return ret < 0 ? ret : 0;
374 }
375 
376 static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
377 			     int flags)
378 {
379 	struct kvec iov = { buf, size };
380 	struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
381 
382 	return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
383 }
384 
385 static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
386 			  int hdr_size, int *rcvd_out)
387 {
388 	struct socket *s = cep->sock;
389 	int rcvd;
390 
391 	*rcvd_out = 0;
392 	if (hdr_rcvd < hdr_size) {
393 		rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
394 				  MSG_DONTWAIT);
395 		if (rcvd == -EAGAIN)
396 			return -EAGAIN;
397 
398 		if (rcvd <= 0)
399 			return -ECONNABORTED;
400 
401 		hdr_rcvd += rcvd;
402 		*rcvd_out = rcvd;
403 
404 		if (hdr_rcvd < hdr_size)
405 			return -EAGAIN;
406 	}
407 
408 	return 0;
409 }
410 
411 static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
412 {
413 	*bits = (*bits & ~MPA_RR_MASK_REVISION) |
414 		(cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
415 }
416 
417 static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
418 {
419 	__be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
420 
421 	return (u8)be16_to_cpu(rev);
422 }
423 
424 static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
425 {
426 	*bits = (*bits & ~MPA_EXT_FLAG_CC) |
427 		(cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
428 }
429 
430 static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
431 {
432 	__be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
433 
434 	return (u8)be32_to_cpu(cc);
435 }
436 
437 /*
438  * Receive MPA Request/Reply header.
439  *
440  * Returns 0 if complete MPA Request/Reply haeder including
441  * eventual private data was received. Returns -EAGAIN if
442  * header was partially received or negative error code otherwise.
443  *
444  * Context: May be called in process context only
445  */
446 static int erdma_recv_mpa_rr(struct erdma_cep *cep)
447 {
448 	struct mpa_rr *hdr = &cep->mpa.hdr;
449 	struct socket *s = cep->sock;
450 	u16 pd_len;
451 	int rcvd, to_rcv, ret, pd_rcvd;
452 
453 	if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
454 		ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
455 				     (char *)&cep->mpa.hdr,
456 				     sizeof(struct mpa_rr), &rcvd);
457 		cep->mpa.bytes_rcvd += rcvd;
458 		if (ret)
459 			return ret;
460 	}
461 
462 	if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
463 	    __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
464 		return -EPROTO;
465 
466 	if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
467 	    sizeof(struct erdma_mpa_ext)) {
468 		ret = __recv_mpa_hdr(
469 			cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
470 			(char *)&cep->mpa.ext_data,
471 			sizeof(struct erdma_mpa_ext), &rcvd);
472 		cep->mpa.bytes_rcvd += rcvd;
473 		if (ret)
474 			return ret;
475 	}
476 
477 	pd_len = be16_to_cpu(hdr->params.pd_len);
478 	pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
479 		  sizeof(struct erdma_mpa_ext);
480 	to_rcv = pd_len - pd_rcvd;
481 
482 	if (!to_rcv) {
483 		/*
484 		 * We have received the whole MPA Request/Reply message.
485 		 * Check against peer protocol violation.
486 		 */
487 		u32 word;
488 
489 		ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
490 				     &rcvd);
491 		if (ret == -EAGAIN && rcvd == 0)
492 			return 0;
493 
494 		if (ret)
495 			return ret;
496 
497 		return -EPROTO;
498 	}
499 
500 	/*
501 	 * At this point, MPA header has been fully received, and pd_len != 0.
502 	 * So, begin to receive private data.
503 	 */
504 	if (!cep->mpa.pdata) {
505 		cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
506 		if (!cep->mpa.pdata)
507 			return -ENOMEM;
508 	}
509 
510 	rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
511 			  MSG_DONTWAIT);
512 	if (rcvd < 0)
513 		return rcvd;
514 
515 	if (rcvd > to_rcv)
516 		return -EPROTO;
517 
518 	cep->mpa.bytes_rcvd += rcvd;
519 
520 	if (to_rcv == rcvd)
521 		return 0;
522 
523 	return -EAGAIN;
524 }
525 
526 /*
527  * erdma_proc_mpareq()
528  *
529  * Read MPA Request from socket and signal new connection to IWCM
530  * if success. Caller must hold lock on corresponding listening CEP.
531  */
532 static int erdma_proc_mpareq(struct erdma_cep *cep)
533 {
534 	struct mpa_rr *req;
535 	int ret;
536 
537 	ret = erdma_recv_mpa_rr(cep);
538 	if (ret)
539 		return ret;
540 
541 	req = &cep->mpa.hdr;
542 
543 	if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
544 		return -EPROTO;
545 
546 	memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
547 
548 	/* Currently does not support marker and crc. */
549 	if (req->params.bits & MPA_RR_FLAG_MARKERS ||
550 	    req->params.bits & MPA_RR_FLAG_CRC)
551 		goto reject_conn;
552 
553 	cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
554 
555 	/* Keep reference until IWCM accepts/rejects */
556 	erdma_cep_get(cep);
557 	ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
558 	if (ret)
559 		erdma_cep_put(cep);
560 
561 	return ret;
562 
563 reject_conn:
564 	req->params.bits &= ~MPA_RR_FLAG_MARKERS;
565 	req->params.bits |= MPA_RR_FLAG_REJECT;
566 	req->params.bits &= ~MPA_RR_FLAG_CRC;
567 
568 	kfree(cep->mpa.pdata);
569 	cep->mpa.pdata = NULL;
570 	erdma_send_mpareqrep(cep, NULL, 0);
571 
572 	return -EOPNOTSUPP;
573 }
574 
575 static int erdma_proc_mpareply(struct erdma_cep *cep)
576 {
577 	struct erdma_qp_attrs qp_attrs;
578 	struct erdma_qp *qp = cep->qp;
579 	struct mpa_rr *rep;
580 	int ret;
581 
582 	ret = erdma_recv_mpa_rr(cep);
583 	if (ret)
584 		goto out_err;
585 
586 	erdma_cancel_mpatimer(cep);
587 
588 	rep = &cep->mpa.hdr;
589 
590 	if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
591 		ret = -EPROTO;
592 		goto out_err;
593 	}
594 
595 	if (rep->params.bits & MPA_RR_FLAG_REJECT) {
596 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
597 		return -ECONNRESET;
598 	}
599 
600 	/* Currently does not support marker and crc. */
601 	if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
602 	    (rep->params.bits & MPA_RR_FLAG_CRC)) {
603 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
604 		return -EINVAL;
605 	}
606 
607 	memset(&qp_attrs, 0, sizeof(qp_attrs));
608 	qp_attrs.irq_size = cep->ird;
609 	qp_attrs.orq_size = cep->ord;
610 	qp_attrs.state = ERDMA_QP_STATE_RTS;
611 
612 	down_write(&qp->state_lock);
613 	if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
614 		ret = -EINVAL;
615 		up_write(&qp->state_lock);
616 		goto out_err;
617 	}
618 
619 	qp->attrs.qp_type = ERDMA_QP_ACTIVE;
620 	if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
621 		qp->attrs.cc = COMPROMISE_CC;
622 
623 	ret = erdma_modify_qp_internal(qp, &qp_attrs,
624 				       ERDMA_QP_ATTR_STATE |
625 				       ERDMA_QP_ATTR_LLP_HANDLE |
626 				       ERDMA_QP_ATTR_MPA);
627 
628 	up_write(&qp->state_lock);
629 
630 	if (!ret) {
631 		ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
632 		if (!ret)
633 			cep->state = ERDMA_EPSTATE_RDMA_MODE;
634 
635 		return 0;
636 	}
637 
638 out_err:
639 	if (ret != -EAGAIN)
640 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
641 
642 	return ret;
643 }
644 
645 static void erdma_accept_newconn(struct erdma_cep *cep)
646 {
647 	struct socket *s = cep->sock;
648 	struct socket *new_s = NULL;
649 	struct erdma_cep *new_cep = NULL;
650 	int ret = 0;
651 
652 	if (cep->state != ERDMA_EPSTATE_LISTENING)
653 		goto error;
654 
655 	new_cep = erdma_cep_alloc(cep->dev);
656 	if (!new_cep)
657 		goto error;
658 
659 	/*
660 	 * 4: Allocate a sufficient number of work elements
661 	 * to allow concurrent handling of local + peer close
662 	 * events, MPA header processing + MPA timeout.
663 	 */
664 	if (erdma_cm_alloc_work(new_cep, 4) != 0)
665 		goto error;
666 
667 	/*
668 	 * Copy saved socket callbacks from listening CEP
669 	 * and assign new socket with new CEP
670 	 */
671 	new_cep->sk_state_change = cep->sk_state_change;
672 	new_cep->sk_data_ready = cep->sk_data_ready;
673 	new_cep->sk_error_report = cep->sk_error_report;
674 
675 	ret = kernel_accept(s, &new_s, O_NONBLOCK);
676 	if (ret != 0)
677 		goto error;
678 
679 	new_cep->sock = new_s;
680 	erdma_cep_get(new_cep);
681 	new_s->sk->sk_user_data = new_cep;
682 
683 	tcp_sock_set_nodelay(new_s->sk);
684 	new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
685 
686 	ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
687 	if (ret)
688 		goto error;
689 
690 	new_cep->listen_cep = cep;
691 	erdma_cep_get(cep);
692 
693 	if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
694 		/* MPA REQ already queued */
695 		erdma_cep_set_inuse(new_cep);
696 		ret = erdma_proc_mpareq(new_cep);
697 		if (ret != -EAGAIN) {
698 			erdma_cep_put(cep);
699 			new_cep->listen_cep = NULL;
700 			if (ret) {
701 				erdma_cep_set_free(new_cep);
702 				goto error;
703 			}
704 		}
705 		erdma_cep_set_free(new_cep);
706 	}
707 	return;
708 
709 error:
710 	if (new_cep) {
711 		new_cep->state = ERDMA_EPSTATE_CLOSED;
712 		erdma_cancel_mpatimer(new_cep);
713 
714 		erdma_cep_put(new_cep);
715 		new_cep->sock = NULL;
716 	}
717 
718 	if (new_s) {
719 		erdma_socket_disassoc(new_s);
720 		sock_release(new_s);
721 	}
722 }
723 
724 static int erdma_newconn_connected(struct erdma_cep *cep)
725 {
726 	int ret = 0;
727 
728 	cep->mpa.hdr.params.bits = 0;
729 	__mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
730 
731 	memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
732 	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
733 	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
734 
735 	ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
736 	cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
737 	cep->mpa.hdr.params.pd_len = 0;
738 
739 	if (ret >= 0)
740 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
741 
742 	return ret;
743 }
744 
745 static void erdma_cm_work_handler(struct work_struct *w)
746 {
747 	struct erdma_cm_work *work;
748 	struct erdma_cep *cep;
749 	int release_cep = 0, ret = 0;
750 
751 	work = container_of(w, struct erdma_cm_work, work.work);
752 	cep = work->cep;
753 
754 	erdma_cep_set_inuse(cep);
755 
756 	switch (work->type) {
757 	case ERDMA_CM_WORK_CONNECTED:
758 		erdma_cancel_mpatimer(cep);
759 		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
760 			ret = erdma_newconn_connected(cep);
761 			if (ret) {
762 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
763 						-EIO);
764 				release_cep = 1;
765 			}
766 		}
767 		break;
768 	case ERDMA_CM_WORK_CONNECTTIMEOUT:
769 		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
770 			cep->mpa_timer = NULL;
771 			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
772 					-ETIMEDOUT);
773 			release_cep = 1;
774 		}
775 		break;
776 	case ERDMA_CM_WORK_ACCEPT:
777 		erdma_accept_newconn(cep);
778 		break;
779 	case ERDMA_CM_WORK_READ_MPAHDR:
780 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
781 			if (cep->listen_cep) {
782 				erdma_cep_set_inuse(cep->listen_cep);
783 
784 				if (cep->listen_cep->state ==
785 				    ERDMA_EPSTATE_LISTENING)
786 					ret = erdma_proc_mpareq(cep);
787 				else
788 					ret = -EFAULT;
789 
790 				erdma_cep_set_free(cep->listen_cep);
791 
792 				if (ret != -EAGAIN) {
793 					erdma_cep_put(cep->listen_cep);
794 					cep->listen_cep = NULL;
795 					if (ret)
796 						erdma_cep_put(cep);
797 				}
798 			}
799 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
800 			ret = erdma_proc_mpareply(cep);
801 		}
802 
803 		if (ret && ret != -EAGAIN)
804 			release_cep = 1;
805 		break;
806 	case ERDMA_CM_WORK_CLOSE_LLP:
807 		if (cep->cm_id)
808 			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
809 		release_cep = 1;
810 		break;
811 	case ERDMA_CM_WORK_PEER_CLOSE:
812 		if (cep->cm_id) {
813 			if (cep->state == ERDMA_EPSTATE_CONNECTING ||
814 			    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
815 				/*
816 				 * MPA reply not received, but connection drop
817 				 */
818 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
819 						-ECONNRESET);
820 			} else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
821 				/*
822 				 * NOTE: IW_CM_EVENT_DISCONNECT is given just
823 				 *       to transition IWCM into CLOSING.
824 				 */
825 				erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
826 				erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
827 			}
828 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
829 			/* Socket close before MPA request received. */
830 			erdma_disassoc_listen_cep(cep);
831 			erdma_cep_put(cep);
832 		}
833 		release_cep = 1;
834 		break;
835 	case ERDMA_CM_WORK_MPATIMEOUT:
836 		cep->mpa_timer = NULL;
837 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
838 			/*
839 			 * MPA request timed out:
840 			 * Hide any partially received private data and signal
841 			 * timeout
842 			 */
843 			cep->mpa.hdr.params.pd_len = 0;
844 
845 			if (cep->cm_id)
846 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
847 						-ETIMEDOUT);
848 			release_cep = 1;
849 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
850 			/* No MPA req received after peer TCP stream setup. */
851 			erdma_disassoc_listen_cep(cep);
852 
853 			erdma_cep_put(cep);
854 			release_cep = 1;
855 		}
856 		break;
857 	default:
858 		WARN(1, "Undefined CM work type: %d\n", work->type);
859 	}
860 
861 	if (release_cep) {
862 		erdma_cancel_mpatimer(cep);
863 		cep->state = ERDMA_EPSTATE_CLOSED;
864 		if (cep->qp) {
865 			struct erdma_qp *qp = cep->qp;
866 			/*
867 			 * Serialize a potential race with application
868 			 * closing the QP and calling erdma_qp_cm_drop()
869 			 */
870 			erdma_qp_get(qp);
871 			erdma_cep_set_free(cep);
872 
873 			erdma_qp_llp_close(qp);
874 			erdma_qp_put(qp);
875 
876 			erdma_cep_set_inuse(cep);
877 			cep->qp = NULL;
878 			erdma_qp_put(qp);
879 		}
880 
881 		if (cep->sock) {
882 			erdma_socket_disassoc(cep->sock);
883 			sock_release(cep->sock);
884 			cep->sock = NULL;
885 		}
886 
887 		if (cep->cm_id) {
888 			cep->cm_id->rem_ref(cep->cm_id);
889 			cep->cm_id = NULL;
890 			if (cep->state != ERDMA_EPSTATE_LISTENING)
891 				erdma_cep_put(cep);
892 		}
893 	}
894 	erdma_cep_set_free(cep);
895 	erdma_put_work(work);
896 	erdma_cep_put(cep);
897 }
898 
899 int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
900 {
901 	struct erdma_cm_work *work = erdma_get_work(cep);
902 	unsigned long delay = 0;
903 
904 	if (!work)
905 		return -ENOMEM;
906 
907 	work->type = type;
908 	work->cep = cep;
909 
910 	erdma_cep_get(cep);
911 
912 	INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
913 
914 	if (type == ERDMA_CM_WORK_MPATIMEOUT) {
915 		cep->mpa_timer = work;
916 
917 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
918 			delay = MPAREP_TIMEOUT;
919 		else
920 			delay = MPAREQ_TIMEOUT;
921 	} else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
922 		cep->mpa_timer = work;
923 
924 		delay = CONNECT_TIMEOUT;
925 	}
926 
927 	queue_delayed_work(erdma_cm_wq, &work->work, delay);
928 
929 	return 0;
930 }
931 
932 static void erdma_cm_llp_data_ready(struct sock *sk)
933 {
934 	struct erdma_cep *cep;
935 
936 	read_lock(&sk->sk_callback_lock);
937 
938 	cep = sk_to_cep(sk);
939 	if (!cep)
940 		goto out;
941 
942 	if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
943 	    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
944 		erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
945 
946 out:
947 	read_unlock(&sk->sk_callback_lock);
948 }
949 
950 static void erdma_cm_llp_error_report(struct sock *sk)
951 {
952 	struct erdma_cep *cep = sk_to_cep(sk);
953 
954 	if (cep)
955 		cep->sk_error_report(sk);
956 }
957 
958 static void erdma_cm_llp_state_change(struct sock *sk)
959 {
960 	struct erdma_cep *cep;
961 	void (*orig_state_change)(struct sock *sk);
962 
963 	read_lock(&sk->sk_callback_lock);
964 
965 	cep = sk_to_cep(sk);
966 	if (!cep) {
967 		read_unlock(&sk->sk_callback_lock);
968 		return;
969 	}
970 	orig_state_change = cep->sk_state_change;
971 
972 	switch (sk->sk_state) {
973 	case TCP_ESTABLISHED:
974 		if (cep->state == ERDMA_EPSTATE_CONNECTING)
975 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
976 		else
977 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
978 		break;
979 	case TCP_CLOSE:
980 	case TCP_CLOSE_WAIT:
981 		if (cep->state != ERDMA_EPSTATE_LISTENING)
982 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
983 		break;
984 	default:
985 		break;
986 	}
987 	read_unlock(&sk->sk_callback_lock);
988 	orig_state_change(sk);
989 }
990 
991 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
992 			      int laddrlen, struct sockaddr *raddr,
993 			      int raddrlen, int flags)
994 {
995 	int ret;
996 
997 	sock_set_reuseaddr(s->sk);
998 	ret = s->ops->bind(s, laddr, laddrlen);
999 	if (ret)
1000 		return ret;
1001 	ret = s->ops->connect(s, raddr, raddrlen, flags);
1002 	return ret < 0 ? ret : 0;
1003 }
1004 
1005 int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1006 {
1007 	struct erdma_dev *dev = to_edev(id->device);
1008 	struct erdma_qp *qp;
1009 	struct erdma_cep *cep = NULL;
1010 	struct socket *s = NULL;
1011 	struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
1012 	struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
1013 	u16 pd_len = params->private_data_len;
1014 	int ret;
1015 
1016 	if (pd_len > MPA_MAX_PRIVDATA)
1017 		return -EINVAL;
1018 
1019 	if (params->ird > dev->attrs.max_ird ||
1020 	    params->ord > dev->attrs.max_ord)
1021 		return -EINVAL;
1022 
1023 	if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
1024 		return -EAFNOSUPPORT;
1025 
1026 	qp = find_qp_by_qpn(dev, params->qpn);
1027 	if (!qp)
1028 		return -ENOENT;
1029 	erdma_qp_get(qp);
1030 
1031 	ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
1032 	if (ret < 0)
1033 		goto error_put_qp;
1034 
1035 	cep = erdma_cep_alloc(dev);
1036 	if (!cep) {
1037 		ret = -ENOMEM;
1038 		goto error_release_sock;
1039 	}
1040 
1041 	erdma_cep_set_inuse(cep);
1042 
1043 	/* Associate QP with CEP */
1044 	erdma_cep_get(cep);
1045 	qp->cep = cep;
1046 	cep->qp = qp;
1047 
1048 	/* Associate cm_id with CEP */
1049 	id->add_ref(id);
1050 	cep->cm_id = id;
1051 
1052 	/*
1053 	 * 6: Allocate a sufficient number of work elements
1054 	 * to allow concurrent handling of local + peer close
1055 	 * events, MPA header processing + MPA timeout, connected event
1056 	 * and connect timeout.
1057 	 */
1058 	ret = erdma_cm_alloc_work(cep, 6);
1059 	if (ret != 0) {
1060 		ret = -ENOMEM;
1061 		goto error_release_cep;
1062 	}
1063 
1064 	cep->ird = params->ird;
1065 	cep->ord = params->ord;
1066 	cep->state = ERDMA_EPSTATE_CONNECTING;
1067 
1068 	erdma_cep_socket_assoc(cep, s);
1069 
1070 	if (pd_len) {
1071 		cep->pd_len = pd_len;
1072 		cep->private_data = kmalloc(pd_len, GFP_KERNEL);
1073 		if (!cep->private_data) {
1074 			ret = -ENOMEM;
1075 			goto error_disassoc;
1076 		}
1077 
1078 		memcpy(cep->private_data, params->private_data,
1079 		       params->private_data_len);
1080 	}
1081 
1082 	ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
1083 				 sizeof(*raddr), O_NONBLOCK);
1084 	if (ret != -EINPROGRESS && ret != 0) {
1085 		goto error_disassoc;
1086 	} else if (ret == 0) {
1087 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
1088 		if (ret)
1089 			goto error_disassoc;
1090 	} else {
1091 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
1092 		if (ret)
1093 			goto error_disassoc;
1094 	}
1095 
1096 	erdma_cep_set_free(cep);
1097 	return 0;
1098 
1099 error_disassoc:
1100 	kfree(cep->private_data);
1101 	cep->private_data = NULL;
1102 	cep->pd_len = 0;
1103 
1104 	erdma_socket_disassoc(s);
1105 
1106 error_release_cep:
1107 	/* disassoc with cm_id */
1108 	cep->cm_id = NULL;
1109 	id->rem_ref(id);
1110 
1111 	/* disassoc with qp */
1112 	qp->cep = NULL;
1113 	erdma_cep_put(cep);
1114 	cep->qp = NULL;
1115 
1116 	cep->state = ERDMA_EPSTATE_CLOSED;
1117 
1118 	erdma_cep_set_free(cep);
1119 
1120 	/* release the cep. */
1121 	erdma_cep_put(cep);
1122 
1123 error_release_sock:
1124 	if (s)
1125 		sock_release(s);
1126 error_put_qp:
1127 	erdma_qp_put(qp);
1128 
1129 	return ret;
1130 }
1131 
1132 int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1133 {
1134 	struct erdma_dev *dev = to_edev(id->device);
1135 	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1136 	struct erdma_qp *qp;
1137 	struct erdma_qp_attrs qp_attrs;
1138 	int ret;
1139 
1140 	erdma_cep_set_inuse(cep);
1141 	erdma_cep_put(cep);
1142 
1143 	/* Free lingering inbound private data */
1144 	if (cep->mpa.hdr.params.pd_len) {
1145 		cep->mpa.hdr.params.pd_len = 0;
1146 		kfree(cep->mpa.pdata);
1147 		cep->mpa.pdata = NULL;
1148 	}
1149 	erdma_cancel_mpatimer(cep);
1150 
1151 	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1152 		erdma_cep_set_free(cep);
1153 		erdma_cep_put(cep);
1154 
1155 		return -ECONNRESET;
1156 	}
1157 
1158 	qp = find_qp_by_qpn(dev, params->qpn);
1159 	if (!qp)
1160 		return -ENOENT;
1161 	erdma_qp_get(qp);
1162 
1163 	down_write(&qp->state_lock);
1164 	if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
1165 		ret = -EINVAL;
1166 		up_write(&qp->state_lock);
1167 		goto error;
1168 	}
1169 
1170 	if (params->ord > dev->attrs.max_ord ||
1171 	    params->ird > dev->attrs.max_ord) {
1172 		ret = -EINVAL;
1173 		up_write(&qp->state_lock);
1174 		goto error;
1175 	}
1176 
1177 	if (params->private_data_len > MPA_MAX_PRIVDATA) {
1178 		ret = -EINVAL;
1179 		up_write(&qp->state_lock);
1180 		goto error;
1181 	}
1182 
1183 	cep->ird = params->ird;
1184 	cep->ord = params->ord;
1185 
1186 	cep->cm_id = id;
1187 	id->add_ref(id);
1188 
1189 	memset(&qp_attrs, 0, sizeof(qp_attrs));
1190 	qp_attrs.orq_size = params->ord;
1191 	qp_attrs.irq_size = params->ird;
1192 
1193 	qp_attrs.state = ERDMA_QP_STATE_RTS;
1194 
1195 	/* Associate QP with CEP */
1196 	erdma_cep_get(cep);
1197 	qp->cep = cep;
1198 	cep->qp = qp;
1199 
1200 	cep->state = ERDMA_EPSTATE_RDMA_MODE;
1201 
1202 	qp->attrs.qp_type = ERDMA_QP_PASSIVE;
1203 	qp->attrs.pd_len = params->private_data_len;
1204 
1205 	if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
1206 		qp->attrs.cc = COMPROMISE_CC;
1207 
1208 	/* move to rts */
1209 	ret = erdma_modify_qp_internal(qp, &qp_attrs,
1210 				       ERDMA_QP_ATTR_STATE |
1211 				       ERDMA_QP_ATTR_ORD |
1212 				       ERDMA_QP_ATTR_LLP_HANDLE |
1213 				       ERDMA_QP_ATTR_IRD |
1214 				       ERDMA_QP_ATTR_MPA);
1215 	up_write(&qp->state_lock);
1216 
1217 	if (ret)
1218 		goto error;
1219 
1220 	cep->mpa.ext_data.bits = 0;
1221 	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
1222 	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
1223 
1224 	ret = erdma_send_mpareqrep(cep, params->private_data,
1225 				   params->private_data_len);
1226 	if (!ret) {
1227 		ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
1228 		if (ret)
1229 			goto error;
1230 
1231 		erdma_cep_set_free(cep);
1232 
1233 		return 0;
1234 	}
1235 
1236 error:
1237 	erdma_socket_disassoc(cep->sock);
1238 	sock_release(cep->sock);
1239 	cep->sock = NULL;
1240 
1241 	cep->state = ERDMA_EPSTATE_CLOSED;
1242 
1243 	if (cep->cm_id) {
1244 		cep->cm_id->rem_ref(id);
1245 		cep->cm_id = NULL;
1246 	}
1247 
1248 	if (qp->cep) {
1249 		erdma_cep_put(cep);
1250 		qp->cep = NULL;
1251 	}
1252 
1253 	cep->qp = NULL;
1254 	erdma_qp_put(qp);
1255 
1256 	erdma_cep_set_free(cep);
1257 	erdma_cep_put(cep);
1258 
1259 	return ret;
1260 }
1261 
1262 int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
1263 {
1264 	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1265 
1266 	erdma_cep_set_inuse(cep);
1267 	erdma_cep_put(cep);
1268 
1269 	erdma_cancel_mpatimer(cep);
1270 
1271 	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1272 		erdma_cep_set_free(cep);
1273 		erdma_cep_put(cep);
1274 
1275 		return -ECONNRESET;
1276 	}
1277 
1278 	if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
1279 		cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
1280 		erdma_send_mpareqrep(cep, pdata, plen);
1281 	}
1282 
1283 	erdma_socket_disassoc(cep->sock);
1284 	sock_release(cep->sock);
1285 	cep->sock = NULL;
1286 
1287 	cep->state = ERDMA_EPSTATE_CLOSED;
1288 
1289 	erdma_cep_set_free(cep);
1290 	erdma_cep_put(cep);
1291 
1292 	return 0;
1293 }
1294 
1295 int erdma_create_listen(struct iw_cm_id *id, int backlog)
1296 {
1297 	struct socket *s;
1298 	struct erdma_cep *cep = NULL;
1299 	int ret = 0;
1300 	struct erdma_dev *dev = to_edev(id->device);
1301 	int addr_family = id->local_addr.ss_family;
1302 	struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
1303 
1304 	if (addr_family != AF_INET)
1305 		return -EAFNOSUPPORT;
1306 
1307 	ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
1308 	if (ret < 0)
1309 		return ret;
1310 
1311 	sock_set_reuseaddr(s->sk);
1312 
1313 	/* For wildcard addr, limit binding to current device only */
1314 	if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
1315 		s->sk->sk_bound_dev_if = dev->netdev->ifindex;
1316 
1317 	ret = s->ops->bind(s, (struct sockaddr *)laddr,
1318 			   sizeof(struct sockaddr_in));
1319 	if (ret)
1320 		goto error;
1321 
1322 	cep = erdma_cep_alloc(dev);
1323 	if (!cep) {
1324 		ret = -ENOMEM;
1325 		goto error;
1326 	}
1327 	erdma_cep_socket_assoc(cep, s);
1328 
1329 	ret = erdma_cm_alloc_work(cep, backlog);
1330 	if (ret)
1331 		goto error;
1332 
1333 	ret = s->ops->listen(s, backlog);
1334 	if (ret)
1335 		goto error;
1336 
1337 	cep->cm_id = id;
1338 	id->add_ref(id);
1339 
1340 	if (!id->provider_data) {
1341 		id->provider_data =
1342 			kmalloc(sizeof(struct list_head), GFP_KERNEL);
1343 		if (!id->provider_data) {
1344 			ret = -ENOMEM;
1345 			goto error;
1346 		}
1347 		INIT_LIST_HEAD((struct list_head *)id->provider_data);
1348 	}
1349 
1350 	list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
1351 	cep->state = ERDMA_EPSTATE_LISTENING;
1352 
1353 	return 0;
1354 
1355 error:
1356 	if (cep) {
1357 		erdma_cep_set_inuse(cep);
1358 
1359 		if (cep->cm_id) {
1360 			cep->cm_id->rem_ref(cep->cm_id);
1361 			cep->cm_id = NULL;
1362 		}
1363 		cep->sock = NULL;
1364 		erdma_socket_disassoc(s);
1365 		cep->state = ERDMA_EPSTATE_CLOSED;
1366 
1367 		erdma_cep_set_free(cep);
1368 		erdma_cep_put(cep);
1369 	}
1370 	sock_release(s);
1371 
1372 	return ret;
1373 }
1374 
1375 static void erdma_drop_listeners(struct iw_cm_id *id)
1376 {
1377 	struct list_head *p, *tmp;
1378 	/*
1379 	 * In case of a wildcard rdma_listen on a multi-homed device,
1380 	 * a listener's IWCM id is associated with more than one listening CEP.
1381 	 */
1382 	list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
1383 		struct erdma_cep *cep =
1384 			list_entry(p, struct erdma_cep, listenq);
1385 
1386 		list_del(p);
1387 
1388 		erdma_cep_set_inuse(cep);
1389 
1390 		if (cep->cm_id) {
1391 			cep->cm_id->rem_ref(cep->cm_id);
1392 			cep->cm_id = NULL;
1393 		}
1394 		if (cep->sock) {
1395 			erdma_socket_disassoc(cep->sock);
1396 			sock_release(cep->sock);
1397 			cep->sock = NULL;
1398 		}
1399 		cep->state = ERDMA_EPSTATE_CLOSED;
1400 		erdma_cep_set_free(cep);
1401 		erdma_cep_put(cep);
1402 	}
1403 }
1404 
1405 int erdma_destroy_listen(struct iw_cm_id *id)
1406 {
1407 	if (!id->provider_data)
1408 		return 0;
1409 
1410 	erdma_drop_listeners(id);
1411 	kfree(id->provider_data);
1412 	id->provider_data = NULL;
1413 
1414 	return 0;
1415 }
1416 
1417 int erdma_cm_init(void)
1418 {
1419 	erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
1420 	if (!erdma_cm_wq)
1421 		return -ENOMEM;
1422 
1423 	return 0;
1424 }
1425 
1426 void erdma_cm_exit(void)
1427 {
1428 	if (erdma_cm_wq)
1429 		destroy_workqueue(erdma_cm_wq);
1430 }
1431