1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2018 Chelsio Communications, Inc.
4  *
5  * Written by: Atul Gupta (atul.gupta@chelsio.com)
6  */
7 
8 #include <linux/module.h>
9 #include <linux/list.h>
10 #include <linux/workqueue.h>
11 #include <linux/skbuff.h>
12 #include <linux/timer.h>
13 #include <linux/notifier.h>
14 #include <linux/inetdevice.h>
15 #include <linux/ip.h>
16 #include <linux/tcp.h>
17 #include <linux/sched/signal.h>
18 #include <linux/kallsyms.h>
19 #include <linux/kprobes.h>
20 #include <linux/if_vlan.h>
21 #include <linux/ipv6.h>
22 #include <net/ipv6.h>
23 #include <net/transp_v6.h>
24 #include <net/ip6_route.h>
25 #include <net/inet_common.h>
26 #include <net/tcp.h>
27 #include <net/dst.h>
28 #include <net/tls.h>
29 #include <net/addrconf.h>
30 #include <net/secure_seq.h>
31 
32 #include "chtls.h"
33 #include "chtls_cm.h"
34 #include "clip_tbl.h"
35 
36 /*
37  * State transitions and actions for close.  Note that if we are in SYN_SENT
38  * we remain in that state as we cannot control a connection while it's in
39  * SYN_SENT; such connections are allowed to establish and are then aborted.
40  */
41 static unsigned char new_state[16] = {
42 	/* current state:     new state:      action: */
43 	/* (Invalid)       */ TCP_CLOSE,
44 	/* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
45 	/* TCP_SYN_SENT    */ TCP_SYN_SENT,
46 	/* TCP_SYN_RECV    */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
47 	/* TCP_FIN_WAIT1   */ TCP_FIN_WAIT1,
48 	/* TCP_FIN_WAIT2   */ TCP_FIN_WAIT2,
49 	/* TCP_TIME_WAIT   */ TCP_CLOSE,
50 	/* TCP_CLOSE       */ TCP_CLOSE,
51 	/* TCP_CLOSE_WAIT  */ TCP_LAST_ACK | TCP_ACTION_FIN,
52 	/* TCP_LAST_ACK    */ TCP_LAST_ACK,
53 	/* TCP_LISTEN      */ TCP_CLOSE,
54 	/* TCP_CLOSING     */ TCP_CLOSING,
55 };
56 
57 static struct chtls_sock *chtls_sock_create(struct chtls_dev *cdev)
58 {
59 	struct chtls_sock *csk = kzalloc(sizeof(*csk), GFP_ATOMIC);
60 
61 	if (!csk)
62 		return NULL;
63 
64 	csk->txdata_skb_cache = alloc_skb(TXDATA_SKB_LEN, GFP_ATOMIC);
65 	if (!csk->txdata_skb_cache) {
66 		kfree(csk);
67 		return NULL;
68 	}
69 
70 	kref_init(&csk->kref);
71 	csk->cdev = cdev;
72 	skb_queue_head_init(&csk->txq);
73 	csk->wr_skb_head = NULL;
74 	csk->wr_skb_tail = NULL;
75 	csk->mss = MAX_MSS;
76 	csk->tlshws.ofld = 1;
77 	csk->tlshws.txkey = -1;
78 	csk->tlshws.rxkey = -1;
79 	csk->tlshws.mfs = TLS_MFS;
80 	skb_queue_head_init(&csk->tlshws.sk_recv_queue);
81 	return csk;
82 }
83 
84 static void chtls_sock_release(struct kref *ref)
85 {
86 	struct chtls_sock *csk =
87 		container_of(ref, struct chtls_sock, kref);
88 
89 	kfree(csk);
90 }
91 
92 static struct net_device *chtls_find_netdev(struct chtls_dev *cdev,
93 					    struct sock *sk)
94 {
95 	struct net_device *ndev = cdev->ports[0];
96 #if IS_ENABLED(CONFIG_IPV6)
97 	struct net_device *temp;
98 	int addr_type;
99 #endif
100 
101 	switch (sk->sk_family) {
102 	case PF_INET:
103 		if (likely(!inet_sk(sk)->inet_rcv_saddr))
104 			return ndev;
105 		ndev = __ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr, false);
106 		break;
107 #if IS_ENABLED(CONFIG_IPV6)
108 	case PF_INET6:
109 		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
110 		if (likely(addr_type == IPV6_ADDR_ANY))
111 			return ndev;
112 
113 		for_each_netdev_rcu(&init_net, temp) {
114 			if (ipv6_chk_addr(&init_net, (struct in6_addr *)
115 					  &sk->sk_v6_rcv_saddr, temp, 1)) {
116 				ndev = temp;
117 				break;
118 			}
119 		}
120 	break;
121 #endif
122 	default:
123 		return NULL;
124 	}
125 
126 	if (!ndev)
127 		return NULL;
128 
129 	if (is_vlan_dev(ndev))
130 		return vlan_dev_real_dev(ndev);
131 	return ndev;
132 }
133 
134 static void assign_rxopt(struct sock *sk, unsigned int opt)
135 {
136 	const struct chtls_dev *cdev;
137 	struct chtls_sock *csk;
138 	struct tcp_sock *tp;
139 
140 	csk = rcu_dereference_sk_user_data(sk);
141 	tp = tcp_sk(sk);
142 
143 	cdev = csk->cdev;
144 	tp->tcp_header_len           = sizeof(struct tcphdr);
145 	tp->rx_opt.mss_clamp         = cdev->mtus[TCPOPT_MSS_G(opt)] - 40;
146 	tp->mss_cache                = tp->rx_opt.mss_clamp;
147 	tp->rx_opt.tstamp_ok         = TCPOPT_TSTAMP_G(opt);
148 	tp->rx_opt.snd_wscale        = TCPOPT_SACK_G(opt);
149 	tp->rx_opt.wscale_ok         = TCPOPT_WSCALE_OK_G(opt);
150 	SND_WSCALE(tp)               = TCPOPT_SND_WSCALE_G(opt);
151 	if (!tp->rx_opt.wscale_ok)
152 		tp->rx_opt.rcv_wscale = 0;
153 	if (tp->rx_opt.tstamp_ok) {
154 		tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
155 		tp->rx_opt.mss_clamp -= TCPOLEN_TSTAMP_ALIGNED;
156 	} else if (csk->opt2 & TSTAMPS_EN_F) {
157 		csk->opt2 &= ~TSTAMPS_EN_F;
158 		csk->mtu_idx = TCPOPT_MSS_G(opt);
159 	}
160 }
161 
162 static void chtls_purge_receive_queue(struct sock *sk)
163 {
164 	struct sk_buff *skb;
165 
166 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
167 		skb_dst_set(skb, (void *)NULL);
168 		kfree_skb(skb);
169 	}
170 }
171 
172 static void chtls_purge_write_queue(struct sock *sk)
173 {
174 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
175 	struct sk_buff *skb;
176 
177 	while ((skb = __skb_dequeue(&csk->txq))) {
178 		sk->sk_wmem_queued -= skb->truesize;
179 		__kfree_skb(skb);
180 	}
181 }
182 
183 static void chtls_purge_recv_queue(struct sock *sk)
184 {
185 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
186 	struct chtls_hws *tlsk = &csk->tlshws;
187 	struct sk_buff *skb;
188 
189 	while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) {
190 		skb_dst_set(skb, NULL);
191 		kfree_skb(skb);
192 	}
193 }
194 
195 static void abort_arp_failure(void *handle, struct sk_buff *skb)
196 {
197 	struct cpl_abort_req *req = cplhdr(skb);
198 	struct chtls_dev *cdev;
199 
200 	cdev = (struct chtls_dev *)handle;
201 	req->cmd = CPL_ABORT_NO_RST;
202 	cxgb4_ofld_send(cdev->lldi->ports[0], skb);
203 }
204 
205 static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len)
206 {
207 	if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) {
208 		__skb_trim(skb, 0);
209 		refcount_add(2, &skb->users);
210 	} else {
211 		skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
212 	}
213 	return skb;
214 }
215 
216 static void chtls_send_abort(struct sock *sk, int mode, struct sk_buff *skb)
217 {
218 	struct cpl_abort_req *req;
219 	struct chtls_sock *csk;
220 	struct tcp_sock *tp;
221 
222 	csk = rcu_dereference_sk_user_data(sk);
223 	tp = tcp_sk(sk);
224 
225 	if (!skb)
226 		skb = alloc_ctrl_skb(csk->txdata_skb_cache, sizeof(*req));
227 
228 	req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req));
229 	INIT_TP_WR_CPL(req, CPL_ABORT_REQ, csk->tid);
230 	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
231 	req->rsvd0 = htonl(tp->snd_nxt);
232 	req->rsvd1 = !csk_flag_nochk(csk, CSK_TX_DATA_SENT);
233 	req->cmd = mode;
234 	t4_set_arp_err_handler(skb, csk->cdev, abort_arp_failure);
235 	send_or_defer(sk, tp, skb, mode == CPL_ABORT_SEND_RST);
236 }
237 
238 static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb)
239 {
240 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
241 
242 	if (unlikely(csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) ||
243 		     !csk->cdev)) {
244 		if (sk->sk_state == TCP_SYN_RECV)
245 			csk_set_flag(csk, CSK_RST_ABORTED);
246 		goto out;
247 	}
248 
249 	if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
250 		struct tcp_sock *tp = tcp_sk(sk);
251 
252 		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
253 			WARN_ONCE(1, "send tx flowc error");
254 		csk_set_flag(csk, CSK_TX_DATA_SENT);
255 	}
256 
257 	csk_set_flag(csk, CSK_ABORT_RPL_PENDING);
258 	chtls_purge_write_queue(sk);
259 
260 	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
261 	if (sk->sk_state != TCP_SYN_RECV)
262 		chtls_send_abort(sk, mode, skb);
263 	else
264 		goto out;
265 
266 	return;
267 out:
268 	kfree_skb(skb);
269 }
270 
271 static void release_tcp_port(struct sock *sk)
272 {
273 	if (inet_csk(sk)->icsk_bind_hash)
274 		inet_put_port(sk);
275 }
276 
277 static void tcp_uncork(struct sock *sk)
278 {
279 	struct tcp_sock *tp = tcp_sk(sk);
280 
281 	if (tp->nonagle & TCP_NAGLE_CORK) {
282 		tp->nonagle &= ~TCP_NAGLE_CORK;
283 		chtls_tcp_push(sk, 0);
284 	}
285 }
286 
287 static void chtls_close_conn(struct sock *sk)
288 {
289 	struct cpl_close_con_req *req;
290 	struct chtls_sock *csk;
291 	struct sk_buff *skb;
292 	unsigned int tid;
293 	unsigned int len;
294 
295 	len = roundup(sizeof(struct cpl_close_con_req), 16);
296 	csk = rcu_dereference_sk_user_data(sk);
297 	tid = csk->tid;
298 
299 	skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
300 	req = (struct cpl_close_con_req *)__skb_put(skb, len);
301 	memset(req, 0, len);
302 	req->wr.wr_hi = htonl(FW_WR_OP_V(FW_TP_WR) |
303 			      FW_WR_IMMDLEN_V(sizeof(*req) -
304 					      sizeof(req->wr)));
305 	req->wr.wr_mid = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)) |
306 			       FW_WR_FLOWID_V(tid));
307 
308 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
309 
310 	tcp_uncork(sk);
311 	skb_entail(sk, skb, ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
312 	if (sk->sk_state != TCP_SYN_SENT)
313 		chtls_push_frames(csk, 1);
314 }
315 
316 /*
317  * Perform a state transition during close and return the actions indicated
318  * for the transition.  Do not make this function inline, the main reason
319  * it exists at all is to avoid multiple inlining of tcp_set_state.
320  */
321 static int make_close_transition(struct sock *sk)
322 {
323 	int next = (int)new_state[sk->sk_state];
324 
325 	tcp_set_state(sk, next & TCP_STATE_MASK);
326 	return next & TCP_ACTION_FIN;
327 }
328 
329 void chtls_close(struct sock *sk, long timeout)
330 {
331 	int data_lost, prev_state;
332 	struct chtls_sock *csk;
333 
334 	csk = rcu_dereference_sk_user_data(sk);
335 
336 	lock_sock(sk);
337 	sk->sk_shutdown |= SHUTDOWN_MASK;
338 
339 	data_lost = skb_queue_len(&sk->sk_receive_queue);
340 	data_lost |= skb_queue_len(&csk->tlshws.sk_recv_queue);
341 	chtls_purge_recv_queue(sk);
342 	chtls_purge_receive_queue(sk);
343 
344 	if (sk->sk_state == TCP_CLOSE) {
345 		goto wait;
346 	} else if (data_lost || sk->sk_state == TCP_SYN_SENT) {
347 		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
348 		release_tcp_port(sk);
349 		goto unlock;
350 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
351 		sk->sk_prot->disconnect(sk, 0);
352 	} else if (make_close_transition(sk)) {
353 		chtls_close_conn(sk);
354 	}
355 wait:
356 	if (timeout)
357 		sk_stream_wait_close(sk, timeout);
358 
359 unlock:
360 	prev_state = sk->sk_state;
361 	sock_hold(sk);
362 	sock_orphan(sk);
363 
364 	release_sock(sk);
365 
366 	local_bh_disable();
367 	bh_lock_sock(sk);
368 
369 	if (prev_state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
370 		goto out;
371 
372 	if (sk->sk_state == TCP_FIN_WAIT2 && tcp_sk(sk)->linger2 < 0 &&
373 	    !csk_flag(sk, CSK_ABORT_SHUTDOWN)) {
374 		struct sk_buff *skb;
375 
376 		skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
377 		if (skb)
378 			chtls_send_reset(sk, CPL_ABORT_SEND_RST, skb);
379 	}
380 
381 	if (sk->sk_state == TCP_CLOSE)
382 		inet_csk_destroy_sock(sk);
383 
384 out:
385 	bh_unlock_sock(sk);
386 	local_bh_enable();
387 	sock_put(sk);
388 }
389 
390 /*
391  * Wait until a socket enters on of the given states.
392  */
393 static int wait_for_states(struct sock *sk, unsigned int states)
394 {
395 	DECLARE_WAITQUEUE(wait, current);
396 	struct socket_wq _sk_wq;
397 	long current_timeo;
398 	int err = 0;
399 
400 	current_timeo = 200;
401 
402 	/*
403 	 * We want this to work even when there's no associated struct socket.
404 	 * In that case we provide a temporary wait_queue_head_t.
405 	 */
406 	if (!sk->sk_wq) {
407 		init_waitqueue_head(&_sk_wq.wait);
408 		_sk_wq.fasync_list = NULL;
409 		init_rcu_head_on_stack(&_sk_wq.rcu);
410 		RCU_INIT_POINTER(sk->sk_wq, &_sk_wq);
411 	}
412 
413 	add_wait_queue(sk_sleep(sk), &wait);
414 	while (!sk_in_state(sk, states)) {
415 		if (!current_timeo) {
416 			err = -EBUSY;
417 			break;
418 		}
419 		if (signal_pending(current)) {
420 			err = sock_intr_errno(current_timeo);
421 			break;
422 		}
423 		set_current_state(TASK_UNINTERRUPTIBLE);
424 		release_sock(sk);
425 		if (!sk_in_state(sk, states))
426 			current_timeo = schedule_timeout(current_timeo);
427 		__set_current_state(TASK_RUNNING);
428 		lock_sock(sk);
429 	}
430 	remove_wait_queue(sk_sleep(sk), &wait);
431 
432 	if (rcu_dereference(sk->sk_wq) == &_sk_wq)
433 		sk->sk_wq = NULL;
434 	return err;
435 }
436 
437 int chtls_disconnect(struct sock *sk, int flags)
438 {
439 	struct tcp_sock *tp;
440 	int err;
441 
442 	tp = tcp_sk(sk);
443 	chtls_purge_recv_queue(sk);
444 	chtls_purge_receive_queue(sk);
445 	chtls_purge_write_queue(sk);
446 
447 	if (sk->sk_state != TCP_CLOSE) {
448 		sk->sk_err = ECONNRESET;
449 		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
450 		err = wait_for_states(sk, TCPF_CLOSE);
451 		if (err)
452 			return err;
453 	}
454 	chtls_purge_recv_queue(sk);
455 	chtls_purge_receive_queue(sk);
456 	tp->max_window = 0xFFFF << (tp->rx_opt.snd_wscale);
457 	return tcp_disconnect(sk, flags);
458 }
459 
460 #define SHUTDOWN_ELIGIBLE_STATE (TCPF_ESTABLISHED | \
461 				 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)
462 void chtls_shutdown(struct sock *sk, int how)
463 {
464 	if ((how & SEND_SHUTDOWN) &&
465 	    sk_in_state(sk, SHUTDOWN_ELIGIBLE_STATE) &&
466 	    make_close_transition(sk))
467 		chtls_close_conn(sk);
468 }
469 
470 void chtls_destroy_sock(struct sock *sk)
471 {
472 	struct chtls_sock *csk;
473 
474 	csk = rcu_dereference_sk_user_data(sk);
475 	chtls_purge_recv_queue(sk);
476 	csk->ulp_mode = ULP_MODE_NONE;
477 	chtls_purge_write_queue(sk);
478 	free_tls_keyid(sk);
479 	kref_put(&csk->kref, chtls_sock_release);
480 	csk->cdev = NULL;
481 	if (sk->sk_family == AF_INET)
482 		sk->sk_prot = &tcp_prot;
483 #if IS_ENABLED(CONFIG_IPV6)
484 	else
485 		sk->sk_prot = &tcpv6_prot;
486 #endif
487 	sk->sk_prot->destroy(sk);
488 }
489 
490 static void reset_listen_child(struct sock *child)
491 {
492 	struct chtls_sock *csk = rcu_dereference_sk_user_data(child);
493 	struct sk_buff *skb;
494 
495 	skb = alloc_ctrl_skb(csk->txdata_skb_cache,
496 			     sizeof(struct cpl_abort_req));
497 
498 	chtls_send_reset(child, CPL_ABORT_SEND_RST, skb);
499 	sock_orphan(child);
500 	INC_ORPHAN_COUNT(child);
501 	if (child->sk_state == TCP_CLOSE)
502 		inet_csk_destroy_sock(child);
503 }
504 
505 static void chtls_disconnect_acceptq(struct sock *listen_sk)
506 {
507 	struct request_sock **pprev;
508 
509 	pprev = ACCEPT_QUEUE(listen_sk);
510 	while (*pprev) {
511 		struct request_sock *req = *pprev;
512 
513 		if (req->rsk_ops == &chtls_rsk_ops ||
514 		    req->rsk_ops == &chtls_rsk_opsv6) {
515 			struct sock *child = req->sk;
516 
517 			*pprev = req->dl_next;
518 			sk_acceptq_removed(listen_sk);
519 			reqsk_put(req);
520 			sock_hold(child);
521 			local_bh_disable();
522 			bh_lock_sock(child);
523 			release_tcp_port(child);
524 			reset_listen_child(child);
525 			bh_unlock_sock(child);
526 			local_bh_enable();
527 			sock_put(child);
528 		} else {
529 			pprev = &req->dl_next;
530 		}
531 	}
532 }
533 
534 static int listen_hashfn(const struct sock *sk)
535 {
536 	return ((unsigned long)sk >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
537 }
538 
539 static struct listen_info *listen_hash_add(struct chtls_dev *cdev,
540 					   struct sock *sk,
541 					   unsigned int stid)
542 {
543 	struct listen_info *p = kmalloc(sizeof(*p), GFP_KERNEL);
544 
545 	if (p) {
546 		int key = listen_hashfn(sk);
547 
548 		p->sk = sk;
549 		p->stid = stid;
550 		spin_lock(&cdev->listen_lock);
551 		p->next = cdev->listen_hash_tab[key];
552 		cdev->listen_hash_tab[key] = p;
553 		spin_unlock(&cdev->listen_lock);
554 	}
555 	return p;
556 }
557 
558 static int listen_hash_find(struct chtls_dev *cdev,
559 			    struct sock *sk)
560 {
561 	struct listen_info *p;
562 	int stid = -1;
563 	int key;
564 
565 	key = listen_hashfn(sk);
566 
567 	spin_lock(&cdev->listen_lock);
568 	for (p = cdev->listen_hash_tab[key]; p; p = p->next)
569 		if (p->sk == sk) {
570 			stid = p->stid;
571 			break;
572 		}
573 	spin_unlock(&cdev->listen_lock);
574 	return stid;
575 }
576 
577 static int listen_hash_del(struct chtls_dev *cdev,
578 			   struct sock *sk)
579 {
580 	struct listen_info *p, **prev;
581 	int stid = -1;
582 	int key;
583 
584 	key = listen_hashfn(sk);
585 	prev = &cdev->listen_hash_tab[key];
586 
587 	spin_lock(&cdev->listen_lock);
588 	for (p = *prev; p; prev = &p->next, p = p->next)
589 		if (p->sk == sk) {
590 			stid = p->stid;
591 			*prev = p->next;
592 			kfree(p);
593 			break;
594 		}
595 	spin_unlock(&cdev->listen_lock);
596 	return stid;
597 }
598 
599 static void cleanup_syn_rcv_conn(struct sock *child, struct sock *parent)
600 {
601 	struct request_sock *req;
602 	struct chtls_sock *csk;
603 
604 	csk = rcu_dereference_sk_user_data(child);
605 	req = csk->passive_reap_next;
606 
607 	reqsk_queue_removed(&inet_csk(parent)->icsk_accept_queue, req);
608 	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
609 	chtls_reqsk_free(req);
610 	csk->passive_reap_next = NULL;
611 }
612 
613 static void chtls_reset_synq(struct listen_ctx *listen_ctx)
614 {
615 	struct sock *listen_sk = listen_ctx->lsk;
616 
617 	while (!skb_queue_empty(&listen_ctx->synq)) {
618 		struct chtls_sock *csk =
619 			container_of((struct synq *)__skb_dequeue
620 				(&listen_ctx->synq), struct chtls_sock, synq);
621 		struct sock *child = csk->sk;
622 
623 		cleanup_syn_rcv_conn(child, listen_sk);
624 		sock_hold(child);
625 		local_bh_disable();
626 		bh_lock_sock(child);
627 		release_tcp_port(child);
628 		reset_listen_child(child);
629 		bh_unlock_sock(child);
630 		local_bh_enable();
631 		sock_put(child);
632 	}
633 }
634 
635 int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk)
636 {
637 	struct net_device *ndev;
638 #if IS_ENABLED(CONFIG_IPV6)
639 	bool clip_valid = false;
640 #endif
641 	struct listen_ctx *ctx;
642 	struct adapter *adap;
643 	struct port_info *pi;
644 	int ret = 0;
645 	int stid;
646 
647 	rcu_read_lock();
648 	ndev = chtls_find_netdev(cdev, sk);
649 	rcu_read_unlock();
650 	if (!ndev)
651 		return -EBADF;
652 
653 	pi = netdev_priv(ndev);
654 	adap = pi->adapter;
655 	if (!(adap->flags & CXGB4_FULL_INIT_DONE))
656 		return -EBADF;
657 
658 	if (listen_hash_find(cdev, sk) >= 0)   /* already have it */
659 		return -EADDRINUSE;
660 
661 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
662 	if (!ctx)
663 		return -ENOMEM;
664 
665 	__module_get(THIS_MODULE);
666 	ctx->lsk = sk;
667 	ctx->cdev = cdev;
668 	ctx->state = T4_LISTEN_START_PENDING;
669 	skb_queue_head_init(&ctx->synq);
670 
671 	stid = cxgb4_alloc_stid(cdev->tids, sk->sk_family, ctx);
672 	if (stid < 0)
673 		goto free_ctx;
674 
675 	sock_hold(sk);
676 	if (!listen_hash_add(cdev, sk, stid))
677 		goto free_stid;
678 
679 	if (sk->sk_family == PF_INET) {
680 		ret = cxgb4_create_server(ndev, stid,
681 					  inet_sk(sk)->inet_rcv_saddr,
682 					  inet_sk(sk)->inet_sport, 0,
683 					  cdev->lldi->rxq_ids[0]);
684 #if IS_ENABLED(CONFIG_IPV6)
685 	} else {
686 		int addr_type;
687 
688 		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
689 		if (addr_type != IPV6_ADDR_ANY) {
690 			ret = cxgb4_clip_get(ndev, (const u32 *)
691 					     &sk->sk_v6_rcv_saddr, 1);
692 			if (ret)
693 				goto del_hash;
694 			clip_valid = true;
695 		}
696 		ret = cxgb4_create_server6(ndev, stid,
697 					   &sk->sk_v6_rcv_saddr,
698 					   inet_sk(sk)->inet_sport,
699 					   cdev->lldi->rxq_ids[0]);
700 #endif
701 	}
702 	if (ret > 0)
703 		ret = net_xmit_errno(ret);
704 	if (ret)
705 		goto del_hash;
706 	return 0;
707 del_hash:
708 #if IS_ENABLED(CONFIG_IPV6)
709 	if (clip_valid)
710 		cxgb4_clip_release(ndev, (const u32 *)&sk->sk_v6_rcv_saddr, 1);
711 #endif
712 	listen_hash_del(cdev, sk);
713 free_stid:
714 	cxgb4_free_stid(cdev->tids, stid, sk->sk_family);
715 	sock_put(sk);
716 free_ctx:
717 	kfree(ctx);
718 	module_put(THIS_MODULE);
719 	return -EBADF;
720 }
721 
722 void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk)
723 {
724 	struct listen_ctx *listen_ctx;
725 	int stid;
726 
727 	stid = listen_hash_del(cdev, sk);
728 	if (stid < 0)
729 		return;
730 
731 	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
732 	chtls_reset_synq(listen_ctx);
733 
734 	cxgb4_remove_server(cdev->lldi->ports[0], stid,
735 			    cdev->lldi->rxq_ids[0], sk->sk_family == PF_INET6);
736 
737 #if IS_ENABLED(CONFIG_IPV6)
738 	if (sk->sk_family == PF_INET6) {
739 		struct chtls_sock *csk;
740 		int addr_type = 0;
741 
742 		csk = rcu_dereference_sk_user_data(sk);
743 		addr_type = ipv6_addr_type((const struct in6_addr *)
744 					  &sk->sk_v6_rcv_saddr);
745 		if (addr_type != IPV6_ADDR_ANY)
746 			cxgb4_clip_release(csk->egress_dev, (const u32 *)
747 					   &sk->sk_v6_rcv_saddr, 1);
748 	}
749 #endif
750 	chtls_disconnect_acceptq(sk);
751 }
752 
753 static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
754 {
755 	struct cpl_pass_open_rpl *rpl = cplhdr(skb) + RSS_HDR;
756 	unsigned int stid = GET_TID(rpl);
757 	struct listen_ctx *listen_ctx;
758 
759 	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
760 	if (!listen_ctx)
761 		return CPL_RET_BUF_DONE;
762 
763 	if (listen_ctx->state == T4_LISTEN_START_PENDING) {
764 		listen_ctx->state = T4_LISTEN_STARTED;
765 		return CPL_RET_BUF_DONE;
766 	}
767 
768 	if (rpl->status != CPL_ERR_NONE) {
769 		pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
770 			rpl->status, stid);
771 		return CPL_RET_BUF_DONE;
772 	}
773 	cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
774 	sock_put(listen_ctx->lsk);
775 	kfree(listen_ctx);
776 	module_put(THIS_MODULE);
777 
778 	return 0;
779 }
780 
781 static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
782 {
783 	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb) + RSS_HDR;
784 	struct listen_ctx *listen_ctx;
785 	unsigned int stid;
786 	void *data;
787 
788 	stid = GET_TID(rpl);
789 	data = lookup_stid(cdev->tids, stid);
790 	listen_ctx = (struct listen_ctx *)data;
791 
792 	if (rpl->status != CPL_ERR_NONE) {
793 		pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
794 			rpl->status, stid);
795 		return CPL_RET_BUF_DONE;
796 	}
797 
798 	cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
799 	sock_put(listen_ctx->lsk);
800 	kfree(listen_ctx);
801 	module_put(THIS_MODULE);
802 
803 	return 0;
804 }
805 
806 static void chtls_purge_wr_queue(struct sock *sk)
807 {
808 	struct sk_buff *skb;
809 
810 	while ((skb = dequeue_wr(sk)) != NULL)
811 		kfree_skb(skb);
812 }
813 
814 static void chtls_release_resources(struct sock *sk)
815 {
816 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
817 	struct chtls_dev *cdev = csk->cdev;
818 	unsigned int tid = csk->tid;
819 	struct tid_info *tids;
820 
821 	if (!cdev)
822 		return;
823 
824 	tids = cdev->tids;
825 	kfree_skb(csk->txdata_skb_cache);
826 	csk->txdata_skb_cache = NULL;
827 
828 	if (csk->wr_credits != csk->wr_max_credits) {
829 		chtls_purge_wr_queue(sk);
830 		chtls_reset_wr_list(csk);
831 	}
832 
833 	if (csk->l2t_entry) {
834 		cxgb4_l2t_release(csk->l2t_entry);
835 		csk->l2t_entry = NULL;
836 	}
837 
838 	if (sk->sk_state != TCP_SYN_SENT) {
839 		cxgb4_remove_tid(tids, csk->port_id, tid, sk->sk_family);
840 		sock_put(sk);
841 	}
842 }
843 
844 static void chtls_conn_done(struct sock *sk)
845 {
846 	if (sock_flag(sk, SOCK_DEAD))
847 		chtls_purge_receive_queue(sk);
848 	sk_wakeup_sleepers(sk, 0);
849 	tcp_done(sk);
850 }
851 
852 static void do_abort_syn_rcv(struct sock *child, struct sock *parent)
853 {
854 	/*
855 	 * If the server is still open we clean up the child connection,
856 	 * otherwise the server already did the clean up as it was purging
857 	 * its SYN queue and the skb was just sitting in its backlog.
858 	 */
859 	if (likely(parent->sk_state == TCP_LISTEN)) {
860 		cleanup_syn_rcv_conn(child, parent);
861 		/* Without the below call to sock_orphan,
862 		 * we leak the socket resource with syn_flood test
863 		 * as inet_csk_destroy_sock will not be called
864 		 * in tcp_done since SOCK_DEAD flag is not set.
865 		 * Kernel handles this differently where new socket is
866 		 * created only after 3 way handshake is done.
867 		 */
868 		sock_orphan(child);
869 		percpu_counter_inc((child)->sk_prot->orphan_count);
870 		chtls_release_resources(child);
871 		chtls_conn_done(child);
872 	} else {
873 		if (csk_flag(child, CSK_RST_ABORTED)) {
874 			chtls_release_resources(child);
875 			chtls_conn_done(child);
876 		}
877 	}
878 }
879 
880 static void pass_open_abort(struct sock *child, struct sock *parent,
881 			    struct sk_buff *skb)
882 {
883 	do_abort_syn_rcv(child, parent);
884 	kfree_skb(skb);
885 }
886 
887 static void bl_pass_open_abort(struct sock *lsk, struct sk_buff *skb)
888 {
889 	pass_open_abort(skb->sk, lsk, skb);
890 }
891 
892 static void chtls_pass_open_arp_failure(struct sock *sk,
893 					struct sk_buff *skb)
894 {
895 	const struct request_sock *oreq;
896 	struct chtls_sock *csk;
897 	struct chtls_dev *cdev;
898 	struct sock *parent;
899 	void *data;
900 
901 	csk = rcu_dereference_sk_user_data(sk);
902 	cdev = csk->cdev;
903 
904 	/*
905 	 * If the connection is being aborted due to the parent listening
906 	 * socket going away there's nothing to do, the ABORT_REQ will close
907 	 * the connection.
908 	 */
909 	if (csk_flag(sk, CSK_ABORT_RPL_PENDING)) {
910 		kfree_skb(skb);
911 		return;
912 	}
913 
914 	oreq = csk->passive_reap_next;
915 	data = lookup_stid(cdev->tids, oreq->ts_recent);
916 	parent = ((struct listen_ctx *)data)->lsk;
917 
918 	bh_lock_sock(parent);
919 	if (!sock_owned_by_user(parent)) {
920 		pass_open_abort(sk, parent, skb);
921 	} else {
922 		BLOG_SKB_CB(skb)->backlog_rcv = bl_pass_open_abort;
923 		__sk_add_backlog(parent, skb);
924 	}
925 	bh_unlock_sock(parent);
926 }
927 
928 static void chtls_accept_rpl_arp_failure(void *handle,
929 					 struct sk_buff *skb)
930 {
931 	struct sock *sk = (struct sock *)handle;
932 
933 	sock_hold(sk);
934 	process_cpl_msg(chtls_pass_open_arp_failure, sk, skb);
935 	sock_put(sk);
936 }
937 
938 static unsigned int chtls_select_mss(const struct chtls_sock *csk,
939 				     unsigned int pmtu,
940 				     struct cpl_pass_accept_req *req)
941 {
942 	struct chtls_dev *cdev;
943 	struct dst_entry *dst;
944 	unsigned int tcpoptsz;
945 	unsigned int iphdrsz;
946 	unsigned int mtu_idx;
947 	struct tcp_sock *tp;
948 	unsigned int mss;
949 	struct sock *sk;
950 
951 	mss = ntohs(req->tcpopt.mss);
952 	sk = csk->sk;
953 	dst = __sk_dst_get(sk);
954 	cdev = csk->cdev;
955 	tp = tcp_sk(sk);
956 	tcpoptsz = 0;
957 
958 #if IS_ENABLED(CONFIG_IPV6)
959 	if (sk->sk_family == AF_INET6)
960 		iphdrsz = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
961 	else
962 #endif
963 		iphdrsz = sizeof(struct iphdr) + sizeof(struct tcphdr);
964 	if (req->tcpopt.tstamp)
965 		tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4);
966 
967 	tp->advmss = dst_metric_advmss(dst);
968 	if (USER_MSS(tp) && tp->advmss > USER_MSS(tp))
969 		tp->advmss = USER_MSS(tp);
970 	if (tp->advmss > pmtu - iphdrsz)
971 		tp->advmss = pmtu - iphdrsz;
972 	if (mss && tp->advmss > mss)
973 		tp->advmss = mss;
974 
975 	tp->advmss = cxgb4_best_aligned_mtu(cdev->lldi->mtus,
976 					    iphdrsz + tcpoptsz,
977 					    tp->advmss - tcpoptsz,
978 					    8, &mtu_idx);
979 	tp->advmss -= iphdrsz;
980 
981 	inet_csk(sk)->icsk_pmtu_cookie = pmtu;
982 	return mtu_idx;
983 }
984 
985 static unsigned int select_rcv_wscale(int space, int wscale_ok, int win_clamp)
986 {
987 	int wscale = 0;
988 
989 	if (space > MAX_RCV_WND)
990 		space = MAX_RCV_WND;
991 	if (win_clamp && win_clamp < space)
992 		space = win_clamp;
993 
994 	if (wscale_ok) {
995 		while (wscale < 14 && (65535 << wscale) < space)
996 			wscale++;
997 	}
998 	return wscale;
999 }
1000 
1001 static void chtls_pass_accept_rpl(struct sk_buff *skb,
1002 				  struct cpl_pass_accept_req *req,
1003 				  unsigned int tid)
1004 
1005 {
1006 	struct cpl_t5_pass_accept_rpl *rpl5;
1007 	struct cxgb4_lld_info *lldi;
1008 	const struct tcphdr *tcph;
1009 	const struct tcp_sock *tp;
1010 	struct chtls_sock *csk;
1011 	unsigned int len;
1012 	struct sock *sk;
1013 	u32 opt2, hlen;
1014 	u64 opt0;
1015 
1016 	sk = skb->sk;
1017 	tp = tcp_sk(sk);
1018 	csk = sk->sk_user_data;
1019 	csk->tid = tid;
1020 	lldi = csk->cdev->lldi;
1021 	len = roundup(sizeof(*rpl5), 16);
1022 
1023 	rpl5 = __skb_put_zero(skb, len);
1024 	INIT_TP_WR(rpl5, tid);
1025 
1026 	OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1027 						     csk->tid));
1028 	csk->mtu_idx = chtls_select_mss(csk, dst_mtu(__sk_dst_get(sk)),
1029 					req);
1030 	opt0 = TCAM_BYPASS_F |
1031 	       WND_SCALE_V(RCV_WSCALE(tp)) |
1032 	       MSS_IDX_V(csk->mtu_idx) |
1033 	       L2T_IDX_V(csk->l2t_entry->idx) |
1034 	       NAGLE_V(!(tp->nonagle & TCP_NAGLE_OFF)) |
1035 	       TX_CHAN_V(csk->tx_chan) |
1036 	       SMAC_SEL_V(csk->smac_idx) |
1037 	       DSCP_V(csk->tos >> 2) |
1038 	       ULP_MODE_V(ULP_MODE_TLS) |
1039 	       RCV_BUFSIZ_V(min(tp->rcv_wnd >> 10, RCV_BUFSIZ_M));
1040 
1041 	opt2 = RX_CHANNEL_V(0) |
1042 		RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);
1043 
1044 	if (!is_t5(lldi->adapter_type))
1045 		opt2 |= RX_FC_DISABLE_F;
1046 	if (req->tcpopt.tstamp)
1047 		opt2 |= TSTAMPS_EN_F;
1048 	if (req->tcpopt.sack)
1049 		opt2 |= SACK_EN_F;
1050 	hlen = ntohl(req->hdr_len);
1051 
1052 	tcph = (struct tcphdr *)((u8 *)(req + 1) +
1053 			T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));
1054 	if (tcph->ece && tcph->cwr)
1055 		opt2 |= CCTRL_ECN_V(1);
1056 	opt2 |= CONG_CNTRL_V(CONG_ALG_NEWRENO);
1057 	opt2 |= T5_ISS_F;
1058 	opt2 |= T5_OPT_2_VALID_F;
1059 	opt2 |= WND_SCALE_EN_V(WSCALE_OK(tp));
1060 	rpl5->opt0 = cpu_to_be64(opt0);
1061 	rpl5->opt2 = cpu_to_be32(opt2);
1062 	rpl5->iss = cpu_to_be32((prandom_u32() & ~7UL) - 1);
1063 	set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->port_id);
1064 	t4_set_arp_err_handler(skb, sk, chtls_accept_rpl_arp_failure);
1065 	cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
1066 }
1067 
1068 static void inet_inherit_port(struct inet_hashinfo *hash_info,
1069 			      struct sock *lsk, struct sock *newsk)
1070 {
1071 	local_bh_disable();
1072 	__inet_inherit_port(lsk, newsk);
1073 	local_bh_enable();
1074 }
1075 
1076 static int chtls_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1077 {
1078 	if (skb->protocol) {
1079 		kfree_skb(skb);
1080 		return 0;
1081 	}
1082 	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
1083 	return 0;
1084 }
1085 
1086 static void chtls_set_tcp_window(struct chtls_sock *csk)
1087 {
1088 	struct net_device *ndev = csk->egress_dev;
1089 	struct port_info *pi = netdev_priv(ndev);
1090 	unsigned int linkspeed;
1091 	u8 scale;
1092 
1093 	linkspeed = pi->link_cfg.speed;
1094 	scale = linkspeed / SPEED_10000;
1095 #define CHTLS_10G_RCVWIN (256 * 1024)
1096 	csk->rcv_win = CHTLS_10G_RCVWIN;
1097 	if (scale)
1098 		csk->rcv_win *= scale;
1099 #define CHTLS_10G_SNDWIN (256 * 1024)
1100 	csk->snd_win = CHTLS_10G_SNDWIN;
1101 	if (scale)
1102 		csk->snd_win *= scale;
1103 }
1104 
1105 static struct sock *chtls_recv_sock(struct sock *lsk,
1106 				    struct request_sock *oreq,
1107 				    void *network_hdr,
1108 				    const struct cpl_pass_accept_req *req,
1109 				    struct chtls_dev *cdev)
1110 {
1111 	struct neighbour *n = NULL;
1112 	struct inet_sock *newinet;
1113 	const struct iphdr *iph;
1114 	struct tls_context *ctx;
1115 	struct net_device *ndev;
1116 	struct chtls_sock *csk;
1117 	struct dst_entry *dst;
1118 	struct tcp_sock *tp;
1119 	struct sock *newsk;
1120 	u16 port_id;
1121 	int rxq_idx;
1122 	int step;
1123 
1124 	iph = (const struct iphdr *)network_hdr;
1125 	newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb);
1126 	if (!newsk)
1127 		goto free_oreq;
1128 
1129 	if (lsk->sk_family == AF_INET) {
1130 		dst = inet_csk_route_child_sock(lsk, newsk, oreq);
1131 		if (!dst)
1132 			goto free_sk;
1133 
1134 		n = dst_neigh_lookup(dst, &iph->saddr);
1135 #if IS_ENABLED(CONFIG_IPV6)
1136 	} else {
1137 		const struct ipv6hdr *ip6h;
1138 		struct flowi6 fl6;
1139 
1140 		ip6h = (const struct ipv6hdr *)network_hdr;
1141 		memset(&fl6, 0, sizeof(fl6));
1142 		fl6.flowi6_proto = IPPROTO_TCP;
1143 		fl6.saddr = ip6h->daddr;
1144 		fl6.daddr = ip6h->saddr;
1145 		fl6.fl6_dport = inet_rsk(oreq)->ir_rmt_port;
1146 		fl6.fl6_sport = htons(inet_rsk(oreq)->ir_num);
1147 		security_req_classify_flow(oreq, flowi6_to_flowi(&fl6));
1148 		dst = ip6_dst_lookup_flow(sock_net(lsk), lsk, &fl6, NULL);
1149 		if (IS_ERR(dst))
1150 			goto free_sk;
1151 		n = dst_neigh_lookup(dst, &ip6h->saddr);
1152 #endif
1153 	}
1154 	if (!n)
1155 		goto free_sk;
1156 
1157 	ndev = n->dev;
1158 	if (!ndev)
1159 		goto free_dst;
1160 	port_id = cxgb4_port_idx(ndev);
1161 
1162 	csk = chtls_sock_create(cdev);
1163 	if (!csk)
1164 		goto free_dst;
1165 
1166 	csk->l2t_entry = cxgb4_l2t_get(cdev->lldi->l2t, n, ndev, 0);
1167 	if (!csk->l2t_entry)
1168 		goto free_csk;
1169 
1170 	newsk->sk_user_data = csk;
1171 	newsk->sk_backlog_rcv = chtls_backlog_rcv;
1172 
1173 	tp = tcp_sk(newsk);
1174 	newinet = inet_sk(newsk);
1175 
1176 	if (iph->version == 0x4) {
1177 		newinet->inet_daddr = iph->saddr;
1178 		newinet->inet_rcv_saddr = iph->daddr;
1179 		newinet->inet_saddr = iph->daddr;
1180 #if IS_ENABLED(CONFIG_IPV6)
1181 	} else {
1182 		struct tcp6_sock *newtcp6sk = (struct tcp6_sock *)newsk;
1183 		struct inet_request_sock *treq = inet_rsk(oreq);
1184 		struct ipv6_pinfo *newnp = inet6_sk(newsk);
1185 		struct ipv6_pinfo *np = inet6_sk(lsk);
1186 
1187 		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1188 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1189 		newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
1190 		newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
1191 		inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr;
1192 		newnp->ipv6_fl_list = NULL;
1193 		newnp->pktoptions = NULL;
1194 		newsk->sk_bound_dev_if = treq->ir_iif;
1195 		newinet->inet_opt = NULL;
1196 		newinet->inet_daddr = LOOPBACK4_IPV6;
1197 		newinet->inet_saddr = LOOPBACK4_IPV6;
1198 #endif
1199 	}
1200 
1201 	oreq->ts_recent = PASS_OPEN_TID_G(ntohl(req->tos_stid));
1202 	sk_setup_caps(newsk, dst);
1203 	ctx = tls_get_ctx(lsk);
1204 	newsk->sk_destruct = ctx->sk_destruct;
1205 	csk->sk = newsk;
1206 	csk->passive_reap_next = oreq;
1207 	csk->tx_chan = cxgb4_port_chan(ndev);
1208 	csk->port_id = port_id;
1209 	csk->egress_dev = ndev;
1210 	csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
1211 	chtls_set_tcp_window(csk);
1212 	tp->rcv_wnd = csk->rcv_win;
1213 	csk->sndbuf = csk->snd_win;
1214 	csk->ulp_mode = ULP_MODE_TLS;
1215 	step = cdev->lldi->nrxq / cdev->lldi->nchan;
1216 	csk->rss_qid = cdev->lldi->rxq_ids[port_id * step];
1217 	rxq_idx = port_id * step;
1218 	csk->txq_idx = (rxq_idx < cdev->lldi->ntxq) ? rxq_idx :
1219 			port_id * step;
1220 	csk->sndbuf = newsk->sk_sndbuf;
1221 	csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
1222 	RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
1223 					   sock_net(newsk)->
1224 						ipv4.sysctl_tcp_window_scaling,
1225 					   tp->window_clamp);
1226 	neigh_release(n);
1227 	inet_inherit_port(&tcp_hashinfo, lsk, newsk);
1228 	csk_set_flag(csk, CSK_CONN_INLINE);
1229 	bh_unlock_sock(newsk); /* tcp_create_openreq_child ->sk_clone_lock */
1230 
1231 	return newsk;
1232 free_csk:
1233 	chtls_sock_release(&csk->kref);
1234 free_dst:
1235 	dst_release(dst);
1236 free_sk:
1237 	inet_csk_prepare_forced_close(newsk);
1238 	tcp_done(newsk);
1239 free_oreq:
1240 	chtls_reqsk_free(oreq);
1241 	return NULL;
1242 }
1243 
1244 /*
1245  * Populate a TID_RELEASE WR.  The skb must be already propely sized.
1246  */
1247 static  void mk_tid_release(struct sk_buff *skb,
1248 			    unsigned int chan, unsigned int tid)
1249 {
1250 	struct cpl_tid_release *req;
1251 	unsigned int len;
1252 
1253 	len = roundup(sizeof(struct cpl_tid_release), 16);
1254 	req = (struct cpl_tid_release *)__skb_put(skb, len);
1255 	memset(req, 0, len);
1256 	set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
1257 	INIT_TP_WR_CPL(req, CPL_TID_RELEASE, tid);
1258 }
1259 
1260 static int chtls_get_module(struct sock *sk)
1261 {
1262 	struct inet_connection_sock *icsk = inet_csk(sk);
1263 
1264 	if (!try_module_get(icsk->icsk_ulp_ops->owner))
1265 		return -1;
1266 
1267 	return 0;
1268 }
1269 
1270 static void chtls_pass_accept_request(struct sock *sk,
1271 				      struct sk_buff *skb)
1272 {
1273 	struct cpl_t5_pass_accept_rpl *rpl;
1274 	struct cpl_pass_accept_req *req;
1275 	struct listen_ctx *listen_ctx;
1276 	struct vlan_ethhdr *vlan_eh;
1277 	struct request_sock *oreq;
1278 	struct sk_buff *reply_skb;
1279 	struct chtls_sock *csk;
1280 	struct chtls_dev *cdev;
1281 	struct ipv6hdr *ip6h;
1282 	struct tcphdr *tcph;
1283 	struct sock *newsk;
1284 	struct ethhdr *eh;
1285 	struct iphdr *iph;
1286 	void *network_hdr;
1287 	unsigned int stid;
1288 	unsigned int len;
1289 	unsigned int tid;
1290 	bool th_ecn, ect;
1291 	__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
1292 	u16 eth_hdr_len;
1293 	bool ecn_ok;
1294 
1295 	req = cplhdr(skb) + RSS_HDR;
1296 	tid = GET_TID(req);
1297 	cdev = BLOG_SKB_CB(skb)->cdev;
1298 	newsk = lookup_tid(cdev->tids, tid);
1299 	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
1300 	if (newsk) {
1301 		pr_info("tid (%d) already in use\n", tid);
1302 		return;
1303 	}
1304 
1305 	len = roundup(sizeof(*rpl), 16);
1306 	reply_skb = alloc_skb(len, GFP_ATOMIC);
1307 	if (!reply_skb) {
1308 		cxgb4_remove_tid(cdev->tids, 0, tid, sk->sk_family);
1309 		kfree_skb(skb);
1310 		return;
1311 	}
1312 
1313 	if (sk->sk_state != TCP_LISTEN)
1314 		goto reject;
1315 
1316 	if (inet_csk_reqsk_queue_is_full(sk))
1317 		goto reject;
1318 
1319 	if (sk_acceptq_is_full(sk))
1320 		goto reject;
1321 
1322 
1323 	eth_hdr_len = T6_ETH_HDR_LEN_G(ntohl(req->hdr_len));
1324 	if (eth_hdr_len == ETH_HLEN) {
1325 		eh = (struct ethhdr *)(req + 1);
1326 		iph = (struct iphdr *)(eh + 1);
1327 		ip6h = (struct ipv6hdr *)(eh + 1);
1328 		network_hdr = (void *)(eh + 1);
1329 	} else {
1330 		vlan_eh = (struct vlan_ethhdr *)(req + 1);
1331 		iph = (struct iphdr *)(vlan_eh + 1);
1332 		ip6h = (struct ipv6hdr *)(vlan_eh + 1);
1333 		network_hdr = (void *)(vlan_eh + 1);
1334 	}
1335 
1336 	if (iph->version == 0x4) {
1337 		tcph = (struct tcphdr *)(iph + 1);
1338 		skb_set_network_header(skb, (void *)iph - (void *)req);
1339 		oreq = inet_reqsk_alloc(&chtls_rsk_ops, sk, true);
1340 	} else {
1341 		tcph = (struct tcphdr *)(ip6h + 1);
1342 		skb_set_network_header(skb, (void *)ip6h - (void *)req);
1343 		oreq = inet_reqsk_alloc(&chtls_rsk_opsv6, sk, false);
1344 	}
1345 
1346 	if (!oreq)
1347 		goto reject;
1348 
1349 	oreq->rsk_rcv_wnd = 0;
1350 	oreq->rsk_window_clamp = 0;
1351 	oreq->syncookie = 0;
1352 	oreq->mss = 0;
1353 	oreq->ts_recent = 0;
1354 
1355 	tcp_rsk(oreq)->tfo_listener = false;
1356 	tcp_rsk(oreq)->rcv_isn = ntohl(tcph->seq);
1357 	chtls_set_req_port(oreq, tcph->source, tcph->dest);
1358 	if (iph->version == 0x4) {
1359 		chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
1360 		ip_dsfield = ipv4_get_dsfield(iph);
1361 #if IS_ENABLED(CONFIG_IPV6)
1362 	} else {
1363 		inet_rsk(oreq)->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
1364 		inet_rsk(oreq)->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
1365 		ip_dsfield = ipv6_get_dsfield(ipv6_hdr(skb));
1366 #endif
1367 	}
1368 	if (req->tcpopt.wsf <= 14 &&
1369 	    sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
1370 		inet_rsk(oreq)->wscale_ok = 1;
1371 		inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
1372 	}
1373 	inet_rsk(oreq)->ir_iif = sk->sk_bound_dev_if;
1374 	th_ecn = tcph->ece && tcph->cwr;
1375 	if (th_ecn) {
1376 		ect = !INET_ECN_is_not_ect(ip_dsfield);
1377 		ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn;
1378 		if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
1379 			inet_rsk(oreq)->ecn_ok = 1;
1380 	}
1381 
1382 	newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
1383 	if (!newsk)
1384 		goto free_oreq;
1385 
1386 	if (chtls_get_module(newsk))
1387 		goto reject;
1388 	inet_csk_reqsk_queue_added(sk);
1389 	reply_skb->sk = newsk;
1390 	chtls_install_cpl_ops(newsk);
1391 	cxgb4_insert_tid(cdev->tids, newsk, tid, newsk->sk_family);
1392 	csk = rcu_dereference_sk_user_data(newsk);
1393 	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
1394 	csk->listen_ctx = listen_ctx;
1395 	__skb_queue_tail(&listen_ctx->synq, (struct sk_buff *)&csk->synq);
1396 	chtls_pass_accept_rpl(reply_skb, req, tid);
1397 	kfree_skb(skb);
1398 	return;
1399 
1400 free_oreq:
1401 	chtls_reqsk_free(oreq);
1402 reject:
1403 	mk_tid_release(reply_skb, 0, tid);
1404 	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
1405 	kfree_skb(skb);
1406 }
1407 
1408 /*
1409  * Handle a CPL_PASS_ACCEPT_REQ message.
1410  */
1411 static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb)
1412 {
1413 	struct cpl_pass_accept_req *req = cplhdr(skb) + RSS_HDR;
1414 	struct listen_ctx *ctx;
1415 	unsigned int stid;
1416 	unsigned int tid;
1417 	struct sock *lsk;
1418 	void *data;
1419 
1420 	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
1421 	tid = GET_TID(req);
1422 
1423 	data = lookup_stid(cdev->tids, stid);
1424 	if (!data)
1425 		return 1;
1426 
1427 	ctx = (struct listen_ctx *)data;
1428 	lsk = ctx->lsk;
1429 
1430 	if (unlikely(tid_out_of_range(cdev->tids, tid))) {
1431 		pr_info("passive open TID %u too large\n", tid);
1432 		return 1;
1433 	}
1434 
1435 	BLOG_SKB_CB(skb)->cdev = cdev;
1436 	process_cpl_msg(chtls_pass_accept_request, lsk, skb);
1437 	return 0;
1438 }
1439 
1440 /*
1441  * Completes some final bits of initialization for just established connections
1442  * and changes their state to TCP_ESTABLISHED.
1443  *
1444  * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
1445  */
1446 static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
1447 {
1448 	struct tcp_sock *tp = tcp_sk(sk);
1449 
1450 	tp->pushed_seq = snd_isn;
1451 	tp->write_seq = snd_isn;
1452 	tp->snd_nxt = snd_isn;
1453 	tp->snd_una = snd_isn;
1454 	inet_sk(sk)->inet_id = prandom_u32();
1455 	assign_rxopt(sk, opt);
1456 
1457 	if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
1458 		tp->rcv_wup -= tp->rcv_wnd - (RCV_BUFSIZ_M << 10);
1459 
1460 	smp_mb();
1461 	tcp_set_state(sk, TCP_ESTABLISHED);
1462 }
1463 
1464 static void chtls_abort_conn(struct sock *sk, struct sk_buff *skb)
1465 {
1466 	struct sk_buff *abort_skb;
1467 
1468 	abort_skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
1469 	if (abort_skb)
1470 		chtls_send_reset(sk, CPL_ABORT_SEND_RST, abort_skb);
1471 }
1472 
1473 static struct sock *reap_list;
1474 static DEFINE_SPINLOCK(reap_list_lock);
1475 
1476 /*
1477  * Process the reap list.
1478  */
1479 DECLARE_TASK_FUNC(process_reap_list, task_param)
1480 {
1481 	spin_lock_bh(&reap_list_lock);
1482 	while (reap_list) {
1483 		struct sock *sk = reap_list;
1484 		struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1485 
1486 		reap_list = csk->passive_reap_next;
1487 		csk->passive_reap_next = NULL;
1488 		spin_unlock(&reap_list_lock);
1489 		sock_hold(sk);
1490 
1491 		bh_lock_sock(sk);
1492 		chtls_abort_conn(sk, NULL);
1493 		sock_orphan(sk);
1494 		if (sk->sk_state == TCP_CLOSE)
1495 			inet_csk_destroy_sock(sk);
1496 		bh_unlock_sock(sk);
1497 		sock_put(sk);
1498 		spin_lock(&reap_list_lock);
1499 	}
1500 	spin_unlock_bh(&reap_list_lock);
1501 }
1502 
1503 static DECLARE_WORK(reap_task, process_reap_list);
1504 
1505 static void add_to_reap_list(struct sock *sk)
1506 {
1507 	struct chtls_sock *csk = sk->sk_user_data;
1508 
1509 	local_bh_disable();
1510 	bh_lock_sock(sk);
1511 	release_tcp_port(sk); /* release the port immediately */
1512 
1513 	spin_lock(&reap_list_lock);
1514 	csk->passive_reap_next = reap_list;
1515 	reap_list = sk;
1516 	if (!csk->passive_reap_next)
1517 		schedule_work(&reap_task);
1518 	spin_unlock(&reap_list_lock);
1519 	bh_unlock_sock(sk);
1520 	local_bh_enable();
1521 }
1522 
1523 static void add_pass_open_to_parent(struct sock *child, struct sock *lsk,
1524 				    struct chtls_dev *cdev)
1525 {
1526 	struct request_sock *oreq;
1527 	struct chtls_sock *csk;
1528 
1529 	if (lsk->sk_state != TCP_LISTEN)
1530 		return;
1531 
1532 	csk = child->sk_user_data;
1533 	oreq = csk->passive_reap_next;
1534 	csk->passive_reap_next = NULL;
1535 
1536 	reqsk_queue_removed(&inet_csk(lsk)->icsk_accept_queue, oreq);
1537 	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
1538 
1539 	if (sk_acceptq_is_full(lsk)) {
1540 		chtls_reqsk_free(oreq);
1541 		add_to_reap_list(child);
1542 	} else {
1543 		refcount_set(&oreq->rsk_refcnt, 1);
1544 		inet_csk_reqsk_queue_add(lsk, oreq, child);
1545 		lsk->sk_data_ready(lsk);
1546 	}
1547 }
1548 
1549 static void bl_add_pass_open_to_parent(struct sock *lsk, struct sk_buff *skb)
1550 {
1551 	struct sock *child = skb->sk;
1552 
1553 	skb->sk = NULL;
1554 	add_pass_open_to_parent(child, lsk, BLOG_SKB_CB(skb)->cdev);
1555 	kfree_skb(skb);
1556 }
1557 
1558 static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb)
1559 {
1560 	struct cpl_pass_establish *req = cplhdr(skb) + RSS_HDR;
1561 	struct chtls_sock *csk;
1562 	struct sock *lsk, *sk;
1563 	unsigned int hwtid;
1564 
1565 	hwtid = GET_TID(req);
1566 	sk = lookup_tid(cdev->tids, hwtid);
1567 	if (!sk)
1568 		return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
1569 
1570 	bh_lock_sock(sk);
1571 	if (unlikely(sock_owned_by_user(sk))) {
1572 		kfree_skb(skb);
1573 	} else {
1574 		unsigned int stid;
1575 		void *data;
1576 
1577 		csk = sk->sk_user_data;
1578 		csk->wr_max_credits = 64;
1579 		csk->wr_credits = 64;
1580 		csk->wr_unacked = 0;
1581 		make_established(sk, ntohl(req->snd_isn), ntohs(req->tcp_opt));
1582 		stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
1583 		sk->sk_state_change(sk);
1584 		if (unlikely(sk->sk_socket))
1585 			sk_wake_async(sk, 0, POLL_OUT);
1586 
1587 		data = lookup_stid(cdev->tids, stid);
1588 		lsk = ((struct listen_ctx *)data)->lsk;
1589 
1590 		bh_lock_sock(lsk);
1591 		if (unlikely(skb_queue_empty(&csk->listen_ctx->synq))) {
1592 			/* removed from synq */
1593 			bh_unlock_sock(lsk);
1594 			kfree_skb(skb);
1595 			goto unlock;
1596 		}
1597 
1598 		if (likely(!sock_owned_by_user(lsk))) {
1599 			kfree_skb(skb);
1600 			add_pass_open_to_parent(sk, lsk, cdev);
1601 		} else {
1602 			skb->sk = sk;
1603 			BLOG_SKB_CB(skb)->cdev = cdev;
1604 			BLOG_SKB_CB(skb)->backlog_rcv =
1605 				bl_add_pass_open_to_parent;
1606 			__sk_add_backlog(lsk, skb);
1607 		}
1608 		bh_unlock_sock(lsk);
1609 	}
1610 unlock:
1611 	bh_unlock_sock(sk);
1612 	return 0;
1613 }
1614 
1615 /*
1616  * Handle receipt of an urgent pointer.
1617  */
1618 static void handle_urg_ptr(struct sock *sk, u32 urg_seq)
1619 {
1620 	struct tcp_sock *tp = tcp_sk(sk);
1621 
1622 	urg_seq--;
1623 	if (tp->urg_data && !after(urg_seq, tp->urg_seq))
1624 		return;	/* duplicate pointer */
1625 
1626 	sk_send_sigurg(sk);
1627 	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
1628 	    !sock_flag(sk, SOCK_URGINLINE) &&
1629 	    tp->copied_seq != tp->rcv_nxt) {
1630 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1631 
1632 		tp->copied_seq++;
1633 		if (skb && tp->copied_seq - ULP_SKB_CB(skb)->seq >= skb->len)
1634 			chtls_free_skb(sk, skb);
1635 	}
1636 
1637 	tp->urg_data = TCP_URG_NOTYET;
1638 	tp->urg_seq = urg_seq;
1639 }
1640 
1641 static void check_sk_callbacks(struct chtls_sock *csk)
1642 {
1643 	struct sock *sk = csk->sk;
1644 
1645 	if (unlikely(sk->sk_user_data &&
1646 		     !csk_flag_nochk(csk, CSK_CALLBACKS_CHKD)))
1647 		csk_set_flag(csk, CSK_CALLBACKS_CHKD);
1648 }
1649 
1650 /*
1651  * Handles Rx data that arrives in a state where the socket isn't accepting
1652  * new data.
1653  */
1654 static void handle_excess_rx(struct sock *sk, struct sk_buff *skb)
1655 {
1656 	if (!csk_flag(sk, CSK_ABORT_SHUTDOWN))
1657 		chtls_abort_conn(sk, skb);
1658 
1659 	kfree_skb(skb);
1660 }
1661 
1662 static void chtls_recv_data(struct sock *sk, struct sk_buff *skb)
1663 {
1664 	struct cpl_rx_data *hdr = cplhdr(skb) + RSS_HDR;
1665 	struct chtls_sock *csk;
1666 	struct tcp_sock *tp;
1667 
1668 	csk = rcu_dereference_sk_user_data(sk);
1669 	tp = tcp_sk(sk);
1670 
1671 	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
1672 		handle_excess_rx(sk, skb);
1673 		return;
1674 	}
1675 
1676 	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
1677 	ULP_SKB_CB(skb)->psh = hdr->psh;
1678 	skb_ulp_mode(skb) = ULP_MODE_NONE;
1679 
1680 	skb_reset_transport_header(skb);
1681 	__skb_pull(skb, sizeof(*hdr) + RSS_HDR);
1682 	if (!skb->data_len)
1683 		__skb_trim(skb, ntohs(hdr->len));
1684 
1685 	if (unlikely(hdr->urg))
1686 		handle_urg_ptr(sk, tp->rcv_nxt + ntohs(hdr->urg));
1687 	if (unlikely(tp->urg_data == TCP_URG_NOTYET &&
1688 		     tp->urg_seq - tp->rcv_nxt < skb->len))
1689 		tp->urg_data = TCP_URG_VALID |
1690 			       skb->data[tp->urg_seq - tp->rcv_nxt];
1691 
1692 	if (unlikely(hdr->dack_mode != csk->delack_mode)) {
1693 		csk->delack_mode = hdr->dack_mode;
1694 		csk->delack_seq = tp->rcv_nxt;
1695 	}
1696 
1697 	tcp_hdr(skb)->fin = 0;
1698 	tp->rcv_nxt += skb->len;
1699 
1700 	__skb_queue_tail(&sk->sk_receive_queue, skb);
1701 
1702 	if (!sock_flag(sk, SOCK_DEAD)) {
1703 		check_sk_callbacks(csk);
1704 		sk->sk_data_ready(sk);
1705 	}
1706 }
1707 
1708 static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb)
1709 {
1710 	struct cpl_rx_data *req = cplhdr(skb) + RSS_HDR;
1711 	unsigned int hwtid = GET_TID(req);
1712 	struct sock *sk;
1713 
1714 	sk = lookup_tid(cdev->tids, hwtid);
1715 	if (unlikely(!sk)) {
1716 		pr_err("can't find conn. for hwtid %u.\n", hwtid);
1717 		return -EINVAL;
1718 	}
1719 	skb_dst_set(skb, NULL);
1720 	process_cpl_msg(chtls_recv_data, sk, skb);
1721 	return 0;
1722 }
1723 
1724 static void chtls_recv_pdu(struct sock *sk, struct sk_buff *skb)
1725 {
1726 	struct cpl_tls_data *hdr = cplhdr(skb);
1727 	struct chtls_sock *csk;
1728 	struct chtls_hws *tlsk;
1729 	struct tcp_sock *tp;
1730 
1731 	csk = rcu_dereference_sk_user_data(sk);
1732 	tlsk = &csk->tlshws;
1733 	tp = tcp_sk(sk);
1734 
1735 	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
1736 		handle_excess_rx(sk, skb);
1737 		return;
1738 	}
1739 
1740 	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
1741 	ULP_SKB_CB(skb)->flags = 0;
1742 	skb_ulp_mode(skb) = ULP_MODE_TLS;
1743 
1744 	skb_reset_transport_header(skb);
1745 	__skb_pull(skb, sizeof(*hdr));
1746 	if (!skb->data_len)
1747 		__skb_trim(skb,
1748 			   CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd)));
1749 
1750 	if (unlikely(tp->urg_data == TCP_URG_NOTYET && tp->urg_seq -
1751 		     tp->rcv_nxt < skb->len))
1752 		tp->urg_data = TCP_URG_VALID |
1753 			       skb->data[tp->urg_seq - tp->rcv_nxt];
1754 
1755 	tcp_hdr(skb)->fin = 0;
1756 	tlsk->pldlen = CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd));
1757 	__skb_queue_tail(&tlsk->sk_recv_queue, skb);
1758 }
1759 
1760 static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb)
1761 {
1762 	struct cpl_tls_data *req = cplhdr(skb);
1763 	unsigned int hwtid = GET_TID(req);
1764 	struct sock *sk;
1765 
1766 	sk = lookup_tid(cdev->tids, hwtid);
1767 	if (unlikely(!sk)) {
1768 		pr_err("can't find conn. for hwtid %u.\n", hwtid);
1769 		return -EINVAL;
1770 	}
1771 	skb_dst_set(skb, NULL);
1772 	process_cpl_msg(chtls_recv_pdu, sk, skb);
1773 	return 0;
1774 }
1775 
1776 static void chtls_set_hdrlen(struct sk_buff *skb, unsigned int nlen)
1777 {
1778 	struct tlsrx_cmp_hdr *tls_cmp_hdr = cplhdr(skb);
1779 
1780 	skb->hdr_len = ntohs((__force __be16)tls_cmp_hdr->length);
1781 	tls_cmp_hdr->length = ntohs((__force __be16)nlen);
1782 }
1783 
1784 static void chtls_rx_hdr(struct sock *sk, struct sk_buff *skb)
1785 {
1786 	struct tlsrx_cmp_hdr *tls_hdr_pkt;
1787 	struct cpl_rx_tls_cmp *cmp_cpl;
1788 	struct sk_buff *skb_rec;
1789 	struct chtls_sock *csk;
1790 	struct chtls_hws *tlsk;
1791 	struct tcp_sock *tp;
1792 
1793 	cmp_cpl = cplhdr(skb);
1794 	csk = rcu_dereference_sk_user_data(sk);
1795 	tlsk = &csk->tlshws;
1796 	tp = tcp_sk(sk);
1797 
1798 	ULP_SKB_CB(skb)->seq = ntohl(cmp_cpl->seq);
1799 	ULP_SKB_CB(skb)->flags = 0;
1800 
1801 	skb_reset_transport_header(skb);
1802 	__skb_pull(skb, sizeof(*cmp_cpl));
1803 	tls_hdr_pkt = (struct tlsrx_cmp_hdr *)skb->data;
1804 	if (tls_hdr_pkt->res_to_mac_error & TLSRX_HDR_PKT_ERROR_M)
1805 		tls_hdr_pkt->type = CONTENT_TYPE_ERROR;
1806 	if (!skb->data_len)
1807 		__skb_trim(skb, TLS_HEADER_LENGTH);
1808 
1809 	tp->rcv_nxt +=
1810 		CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl->pdulength_length));
1811 
1812 	ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_TLS_HDR;
1813 	skb_rec = __skb_dequeue(&tlsk->sk_recv_queue);
1814 	if (!skb_rec) {
1815 		__skb_queue_tail(&sk->sk_receive_queue, skb);
1816 	} else {
1817 		chtls_set_hdrlen(skb, tlsk->pldlen);
1818 		tlsk->pldlen = 0;
1819 		__skb_queue_tail(&sk->sk_receive_queue, skb);
1820 		__skb_queue_tail(&sk->sk_receive_queue, skb_rec);
1821 	}
1822 
1823 	if (!sock_flag(sk, SOCK_DEAD)) {
1824 		check_sk_callbacks(csk);
1825 		sk->sk_data_ready(sk);
1826 	}
1827 }
1828 
1829 static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb)
1830 {
1831 	struct cpl_rx_tls_cmp *req = cplhdr(skb);
1832 	unsigned int hwtid = GET_TID(req);
1833 	struct sock *sk;
1834 
1835 	sk = lookup_tid(cdev->tids, hwtid);
1836 	if (unlikely(!sk)) {
1837 		pr_err("can't find conn. for hwtid %u.\n", hwtid);
1838 		return -EINVAL;
1839 	}
1840 	skb_dst_set(skb, NULL);
1841 	process_cpl_msg(chtls_rx_hdr, sk, skb);
1842 
1843 	return 0;
1844 }
1845 
1846 static void chtls_timewait(struct sock *sk)
1847 {
1848 	struct tcp_sock *tp = tcp_sk(sk);
1849 
1850 	tp->rcv_nxt++;
1851 	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
1852 	tp->srtt_us = 0;
1853 	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
1854 }
1855 
1856 static void chtls_peer_close(struct sock *sk, struct sk_buff *skb)
1857 {
1858 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1859 
1860 	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
1861 		goto out;
1862 
1863 	sk->sk_shutdown |= RCV_SHUTDOWN;
1864 	sock_set_flag(sk, SOCK_DONE);
1865 
1866 	switch (sk->sk_state) {
1867 	case TCP_SYN_RECV:
1868 	case TCP_ESTABLISHED:
1869 		tcp_set_state(sk, TCP_CLOSE_WAIT);
1870 		break;
1871 	case TCP_FIN_WAIT1:
1872 		tcp_set_state(sk, TCP_CLOSING);
1873 		break;
1874 	case TCP_FIN_WAIT2:
1875 		chtls_release_resources(sk);
1876 		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
1877 			chtls_conn_done(sk);
1878 		else
1879 			chtls_timewait(sk);
1880 		break;
1881 	default:
1882 		pr_info("cpl_peer_close in bad state %d\n", sk->sk_state);
1883 	}
1884 
1885 	if (!sock_flag(sk, SOCK_DEAD)) {
1886 		sk->sk_state_change(sk);
1887 		/* Do not send POLL_HUP for half duplex close. */
1888 
1889 		if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
1890 		    sk->sk_state == TCP_CLOSE)
1891 			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
1892 		else
1893 			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1894 	}
1895 out:
1896 	kfree_skb(skb);
1897 }
1898 
1899 static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb)
1900 {
1901 	struct cpl_close_con_rpl *rpl = cplhdr(skb) + RSS_HDR;
1902 	struct chtls_sock *csk;
1903 	struct tcp_sock *tp;
1904 
1905 	csk = rcu_dereference_sk_user_data(sk);
1906 
1907 	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
1908 		goto out;
1909 
1910 	tp = tcp_sk(sk);
1911 
1912 	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
1913 
1914 	switch (sk->sk_state) {
1915 	case TCP_CLOSING:
1916 		chtls_release_resources(sk);
1917 		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
1918 			chtls_conn_done(sk);
1919 		else
1920 			chtls_timewait(sk);
1921 		break;
1922 	case TCP_LAST_ACK:
1923 		chtls_release_resources(sk);
1924 		chtls_conn_done(sk);
1925 		break;
1926 	case TCP_FIN_WAIT1:
1927 		tcp_set_state(sk, TCP_FIN_WAIT2);
1928 		sk->sk_shutdown |= SEND_SHUTDOWN;
1929 
1930 		if (!sock_flag(sk, SOCK_DEAD))
1931 			sk->sk_state_change(sk);
1932 		else if (tcp_sk(sk)->linger2 < 0 &&
1933 			 !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN))
1934 			chtls_abort_conn(sk, skb);
1935 		break;
1936 	default:
1937 		pr_info("close_con_rpl in bad state %d\n", sk->sk_state);
1938 	}
1939 out:
1940 	kfree_skb(skb);
1941 }
1942 
1943 static struct sk_buff *get_cpl_skb(struct sk_buff *skb,
1944 				   size_t len, gfp_t gfp)
1945 {
1946 	if (likely(!skb_is_nonlinear(skb) && !skb_cloned(skb))) {
1947 		WARN_ONCE(skb->len < len, "skb alloc error");
1948 		__skb_trim(skb, len);
1949 		skb_get(skb);
1950 	} else {
1951 		skb = alloc_skb(len, gfp);
1952 		if (skb)
1953 			__skb_put(skb, len);
1954 	}
1955 	return skb;
1956 }
1957 
1958 static void set_abort_rpl_wr(struct sk_buff *skb, unsigned int tid,
1959 			     int cmd)
1960 {
1961 	struct cpl_abort_rpl *rpl = cplhdr(skb);
1962 
1963 	INIT_TP_WR_CPL(rpl, CPL_ABORT_RPL, tid);
1964 	rpl->cmd = cmd;
1965 }
1966 
1967 static void send_defer_abort_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
1968 {
1969 	struct cpl_abort_req_rss *req = cplhdr(skb);
1970 	struct sk_buff *reply_skb;
1971 
1972 	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
1973 			      GFP_KERNEL | __GFP_NOFAIL);
1974 	__skb_put(reply_skb, sizeof(struct cpl_abort_rpl));
1975 	set_abort_rpl_wr(reply_skb, GET_TID(req),
1976 			 (req->status & CPL_ABORT_NO_RST));
1977 	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, req->status >> 1);
1978 	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
1979 	kfree_skb(skb);
1980 }
1981 
1982 /*
1983  * Add an skb to the deferred skb queue for processing from process context.
1984  */
1985 static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
1986 			   defer_handler_t handler)
1987 {
1988 	DEFERRED_SKB_CB(skb)->handler = handler;
1989 	spin_lock_bh(&cdev->deferq.lock);
1990 	__skb_queue_tail(&cdev->deferq, skb);
1991 	if (skb_queue_len(&cdev->deferq) == 1)
1992 		schedule_work(&cdev->deferq_task);
1993 	spin_unlock_bh(&cdev->deferq.lock);
1994 }
1995 
1996 static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
1997 			   struct chtls_dev *cdev, int status, int queue)
1998 {
1999 	struct cpl_abort_req_rss *req = cplhdr(skb);
2000 	struct sk_buff *reply_skb;
2001 	struct chtls_sock *csk;
2002 
2003 	csk = rcu_dereference_sk_user_data(sk);
2004 
2005 	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
2006 			      GFP_KERNEL);
2007 
2008 	if (!reply_skb) {
2009 		req->status = (queue << 1);
2010 		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
2011 		return;
2012 	}
2013 
2014 	set_abort_rpl_wr(reply_skb, GET_TID(req), status);
2015 	kfree_skb(skb);
2016 
2017 	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
2018 	if (csk_conn_inline(csk)) {
2019 		struct l2t_entry *e = csk->l2t_entry;
2020 
2021 		if (e && sk->sk_state != TCP_SYN_RECV) {
2022 			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
2023 			return;
2024 		}
2025 	}
2026 	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
2027 }
2028 
2029 static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
2030 				 struct chtls_dev *cdev,
2031 				 int status, int queue)
2032 {
2033 	struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
2034 	struct sk_buff *reply_skb;
2035 	struct chtls_sock *csk;
2036 	unsigned int tid;
2037 
2038 	csk = rcu_dereference_sk_user_data(sk);
2039 	tid = GET_TID(req);
2040 
2041 	reply_skb = get_cpl_skb(skb, sizeof(struct cpl_abort_rpl), gfp_any());
2042 	if (!reply_skb) {
2043 		req->status = (queue << 1) | status;
2044 		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
2045 		return;
2046 	}
2047 
2048 	set_abort_rpl_wr(reply_skb, tid, status);
2049 	kfree_skb(skb);
2050 	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
2051 	if (csk_conn_inline(csk)) {
2052 		struct l2t_entry *e = csk->l2t_entry;
2053 
2054 		if (e && sk->sk_state != TCP_SYN_RECV) {
2055 			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
2056 			return;
2057 		}
2058 	}
2059 	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
2060 }
2061 
2062 /*
2063  * This is run from a listener's backlog to abort a child connection in
2064  * SYN_RCV state (i.e., one on the listener's SYN queue).
2065  */
2066 static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
2067 {
2068 	struct chtls_sock *csk;
2069 	struct sock *child;
2070 	int queue;
2071 
2072 	child = skb->sk;
2073 	csk = rcu_dereference_sk_user_data(child);
2074 	queue = csk->txq_idx;
2075 
2076 	skb->sk	= NULL;
2077 	do_abort_syn_rcv(child, lsk);
2078 	send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
2079 		       CPL_ABORT_NO_RST, queue);
2080 }
2081 
2082 static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
2083 {
2084 	const struct request_sock *oreq;
2085 	struct listen_ctx *listen_ctx;
2086 	struct chtls_sock *csk;
2087 	struct chtls_dev *cdev;
2088 	struct sock *psk;
2089 	void *ctx;
2090 
2091 	csk = sk->sk_user_data;
2092 	oreq = csk->passive_reap_next;
2093 	cdev = csk->cdev;
2094 
2095 	if (!oreq)
2096 		return -1;
2097 
2098 	ctx = lookup_stid(cdev->tids, oreq->ts_recent);
2099 	if (!ctx)
2100 		return -1;
2101 
2102 	listen_ctx = (struct listen_ctx *)ctx;
2103 	psk = listen_ctx->lsk;
2104 
2105 	bh_lock_sock(psk);
2106 	if (!sock_owned_by_user(psk)) {
2107 		int queue = csk->txq_idx;
2108 
2109 		do_abort_syn_rcv(sk, psk);
2110 		send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
2111 	} else {
2112 		skb->sk = sk;
2113 		BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;
2114 		__sk_add_backlog(psk, skb);
2115 	}
2116 	bh_unlock_sock(psk);
2117 	return 0;
2118 }
2119 
2120 static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
2121 {
2122 	const struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
2123 	struct chtls_sock *csk = sk->sk_user_data;
2124 	int rst_status = CPL_ABORT_NO_RST;
2125 	int queue = csk->txq_idx;
2126 
2127 	if (is_neg_adv(req->status)) {
2128 		if (sk->sk_state == TCP_SYN_RECV)
2129 			chtls_set_tcb_tflag(sk, 0, 0);
2130 
2131 		kfree_skb(skb);
2132 		return;
2133 	}
2134 
2135 	csk_reset_flag(csk, CSK_ABORT_REQ_RCVD);
2136 
2137 	if (!csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) &&
2138 	    !csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
2139 		struct tcp_sock *tp = tcp_sk(sk);
2140 
2141 		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
2142 			WARN_ONCE(1, "send_tx_flowc error");
2143 		csk_set_flag(csk, CSK_TX_DATA_SENT);
2144 	}
2145 
2146 	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
2147 
2148 	if (!csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
2149 		sk->sk_err = ETIMEDOUT;
2150 
2151 		if (!sock_flag(sk, SOCK_DEAD))
2152 			sk->sk_error_report(sk);
2153 
2154 		if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
2155 			return;
2156 
2157 		chtls_release_resources(sk);
2158 		chtls_conn_done(sk);
2159 	}
2160 
2161 	chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev,
2162 			     rst_status, queue);
2163 }
2164 
2165 static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb)
2166 {
2167 	struct cpl_abort_rpl_rss *rpl = cplhdr(skb) + RSS_HDR;
2168 	struct chtls_sock *csk;
2169 	struct chtls_dev *cdev;
2170 
2171 	csk = rcu_dereference_sk_user_data(sk);
2172 	cdev = csk->cdev;
2173 
2174 	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
2175 		csk_reset_flag(csk, CSK_ABORT_RPL_PENDING);
2176 		if (!csk_flag_nochk(csk, CSK_ABORT_REQ_RCVD)) {
2177 			if (sk->sk_state == TCP_SYN_SENT) {
2178 				cxgb4_remove_tid(cdev->tids,
2179 						 csk->port_id,
2180 						 GET_TID(rpl),
2181 						 sk->sk_family);
2182 				sock_put(sk);
2183 			}
2184 			chtls_release_resources(sk);
2185 			chtls_conn_done(sk);
2186 		}
2187 	}
2188 	kfree_skb(skb);
2189 }
2190 
2191 static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb)
2192 {
2193 	struct cpl_peer_close *req = cplhdr(skb) + RSS_HDR;
2194 	void (*fn)(struct sock *sk, struct sk_buff *skb);
2195 	unsigned int hwtid = GET_TID(req);
2196 	struct chtls_sock *csk;
2197 	struct sock *sk;
2198 	u8 opcode;
2199 
2200 	opcode = ((const struct rss_header *)cplhdr(skb))->opcode;
2201 
2202 	sk = lookup_tid(cdev->tids, hwtid);
2203 	if (!sk)
2204 		goto rel_skb;
2205 
2206 	csk = sk->sk_user_data;
2207 
2208 	switch (opcode) {
2209 	case CPL_PEER_CLOSE:
2210 		fn = chtls_peer_close;
2211 		break;
2212 	case CPL_CLOSE_CON_RPL:
2213 		fn = chtls_close_con_rpl;
2214 		break;
2215 	case CPL_ABORT_REQ_RSS:
2216 		/*
2217 		 * Save the offload device in the skb, we may process this
2218 		 * message after the socket has closed.
2219 		 */
2220 		BLOG_SKB_CB(skb)->cdev = csk->cdev;
2221 		fn = chtls_abort_req_rss;
2222 		break;
2223 	case CPL_ABORT_RPL_RSS:
2224 		fn = chtls_abort_rpl_rss;
2225 		break;
2226 	default:
2227 		goto rel_skb;
2228 	}
2229 
2230 	process_cpl_msg(fn, sk, skb);
2231 	return 0;
2232 
2233 rel_skb:
2234 	kfree_skb(skb);
2235 	return 0;
2236 }
2237 
2238 static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb)
2239 {
2240 	struct cpl_fw4_ack *hdr = cplhdr(skb) + RSS_HDR;
2241 	struct chtls_sock *csk = sk->sk_user_data;
2242 	struct tcp_sock *tp = tcp_sk(sk);
2243 	u32 credits = hdr->credits;
2244 	u32 snd_una;
2245 
2246 	snd_una = ntohl(hdr->snd_una);
2247 	csk->wr_credits += credits;
2248 
2249 	if (csk->wr_unacked > csk->wr_max_credits - csk->wr_credits)
2250 		csk->wr_unacked = csk->wr_max_credits - csk->wr_credits;
2251 
2252 	while (credits) {
2253 		struct sk_buff *pskb = csk->wr_skb_head;
2254 		u32 csum;
2255 
2256 		if (unlikely(!pskb)) {
2257 			if (csk->wr_nondata)
2258 				csk->wr_nondata -= credits;
2259 			break;
2260 		}
2261 		csum = (__force u32)pskb->csum;
2262 		if (unlikely(credits < csum)) {
2263 			pskb->csum = (__force __wsum)(csum - credits);
2264 			break;
2265 		}
2266 		dequeue_wr(sk);
2267 		credits -= csum;
2268 		kfree_skb(pskb);
2269 	}
2270 	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_SEQVAL) {
2271 		if (unlikely(before(snd_una, tp->snd_una))) {
2272 			kfree_skb(skb);
2273 			return;
2274 		}
2275 
2276 		if (tp->snd_una != snd_una) {
2277 			tp->snd_una = snd_una;
2278 			tp->rcv_tstamp = tcp_time_stamp(tp);
2279 			if (tp->snd_una == tp->snd_nxt &&
2280 			    !csk_flag_nochk(csk, CSK_TX_FAILOVER))
2281 				csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
2282 		}
2283 	}
2284 
2285 	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_CH) {
2286 		unsigned int fclen16 = roundup(failover_flowc_wr_len, 16);
2287 
2288 		csk->wr_credits -= fclen16;
2289 		csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
2290 		csk_reset_flag(csk, CSK_TX_FAILOVER);
2291 	}
2292 	if (skb_queue_len(&csk->txq) && chtls_push_frames(csk, 0))
2293 		sk->sk_write_space(sk);
2294 
2295 	kfree_skb(skb);
2296 }
2297 
2298 static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb)
2299 {
2300 	struct cpl_fw4_ack *rpl = cplhdr(skb) + RSS_HDR;
2301 	unsigned int hwtid = GET_TID(rpl);
2302 	struct sock *sk;
2303 
2304 	sk = lookup_tid(cdev->tids, hwtid);
2305 	if (unlikely(!sk)) {
2306 		pr_err("can't find conn. for hwtid %u.\n", hwtid);
2307 		return -EINVAL;
2308 	}
2309 	process_cpl_msg(chtls_rx_ack, sk, skb);
2310 
2311 	return 0;
2312 }
2313 
2314 chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = {
2315 	[CPL_PASS_OPEN_RPL]     = chtls_pass_open_rpl,
2316 	[CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl,
2317 	[CPL_PASS_ACCEPT_REQ]   = chtls_pass_accept_req,
2318 	[CPL_PASS_ESTABLISH]    = chtls_pass_establish,
2319 	[CPL_RX_DATA]           = chtls_rx_data,
2320 	[CPL_TLS_DATA]          = chtls_rx_pdu,
2321 	[CPL_RX_TLS_CMP]        = chtls_rx_cmp,
2322 	[CPL_PEER_CLOSE]        = chtls_conn_cpl,
2323 	[CPL_CLOSE_CON_RPL]     = chtls_conn_cpl,
2324 	[CPL_ABORT_REQ_RSS]     = chtls_conn_cpl,
2325 	[CPL_ABORT_RPL_RSS]     = chtls_conn_cpl,
2326 	[CPL_FW4_ACK]           = chtls_wr_ack,
2327 };
2328