xref: /openbmc/linux/drivers/infiniband/hw/cxgb4/cm.c (revision 3e26a691)
1 /*
2  * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *	  copyright notice, this list of conditions and the following
16  *	  disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *	  copyright notice, this list of conditions and the following
20  *	  disclaimer in the documentation and/or other materials
21  *	  provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/module.h>
33 #include <linux/list.h>
34 #include <linux/workqueue.h>
35 #include <linux/skbuff.h>
36 #include <linux/timer.h>
37 #include <linux/notifier.h>
38 #include <linux/inetdevice.h>
39 #include <linux/ip.h>
40 #include <linux/tcp.h>
41 #include <linux/if_vlan.h>
42 
43 #include <net/neighbour.h>
44 #include <net/netevent.h>
45 #include <net/route.h>
46 #include <net/tcp.h>
47 #include <net/ip6_route.h>
48 #include <net/addrconf.h>
49 
50 #include <rdma/ib_addr.h>
51 
52 #include "iw_cxgb4.h"
53 #include "clip_tbl.h"
54 
55 static char *states[] = {
56 	"idle",
57 	"listen",
58 	"connecting",
59 	"mpa_wait_req",
60 	"mpa_req_sent",
61 	"mpa_req_rcvd",
62 	"mpa_rep_sent",
63 	"fpdu_mode",
64 	"aborting",
65 	"closing",
66 	"moribund",
67 	"dead",
68 	NULL,
69 };
70 
71 static int nocong;
72 module_param(nocong, int, 0644);
73 MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
74 
75 static int enable_ecn;
76 module_param(enable_ecn, int, 0644);
77 MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
78 
79 static int dack_mode = 1;
80 module_param(dack_mode, int, 0644);
81 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
82 
83 uint c4iw_max_read_depth = 32;
84 module_param(c4iw_max_read_depth, int, 0644);
85 MODULE_PARM_DESC(c4iw_max_read_depth,
86 		 "Per-connection max ORD/IRD (default=32)");
87 
88 static int enable_tcp_timestamps;
89 module_param(enable_tcp_timestamps, int, 0644);
90 MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
91 
92 static int enable_tcp_sack;
93 module_param(enable_tcp_sack, int, 0644);
94 MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
95 
96 static int enable_tcp_window_scaling = 1;
97 module_param(enable_tcp_window_scaling, int, 0644);
98 MODULE_PARM_DESC(enable_tcp_window_scaling,
99 		 "Enable tcp window scaling (default=1)");
100 
101 int c4iw_debug;
102 module_param(c4iw_debug, int, 0644);
103 MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
104 
105 static int peer2peer = 1;
106 module_param(peer2peer, int, 0644);
107 MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
108 
109 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
110 module_param(p2p_type, int, 0644);
111 MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
112 			   "1=RDMA_READ 0=RDMA_WRITE (default 1)");
113 
114 static int ep_timeout_secs = 60;
115 module_param(ep_timeout_secs, int, 0644);
116 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
117 				   "in seconds (default=60)");
118 
119 static int mpa_rev = 2;
120 module_param(mpa_rev, int, 0644);
121 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
122 		"1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
123 		" compliant (default=2)");
124 
125 static int markers_enabled;
126 module_param(markers_enabled, int, 0644);
127 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
128 
129 static int crc_enabled = 1;
130 module_param(crc_enabled, int, 0644);
131 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
132 
133 static int rcv_win = 256 * 1024;
134 module_param(rcv_win, int, 0644);
135 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
136 
137 static int snd_win = 128 * 1024;
138 module_param(snd_win, int, 0644);
139 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
140 
141 static struct workqueue_struct *workq;
142 
143 static struct sk_buff_head rxq;
144 
145 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
146 static void ep_timeout(unsigned long arg);
147 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
148 
149 static LIST_HEAD(timeout_list);
150 static spinlock_t timeout_lock;
151 
152 static void deref_qp(struct c4iw_ep *ep)
153 {
154 	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
155 	clear_bit(QP_REFERENCED, &ep->com.flags);
156 }
157 
158 static void ref_qp(struct c4iw_ep *ep)
159 {
160 	set_bit(QP_REFERENCED, &ep->com.flags);
161 	c4iw_qp_add_ref(&ep->com.qp->ibqp);
162 }
163 
164 static void start_ep_timer(struct c4iw_ep *ep)
165 {
166 	PDBG("%s ep %p\n", __func__, ep);
167 	if (timer_pending(&ep->timer)) {
168 		pr_err("%s timer already started! ep %p\n",
169 		       __func__, ep);
170 		return;
171 	}
172 	clear_bit(TIMEOUT, &ep->com.flags);
173 	c4iw_get_ep(&ep->com);
174 	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
175 	ep->timer.data = (unsigned long)ep;
176 	ep->timer.function = ep_timeout;
177 	add_timer(&ep->timer);
178 }
179 
180 static int stop_ep_timer(struct c4iw_ep *ep)
181 {
182 	PDBG("%s ep %p stopping\n", __func__, ep);
183 	del_timer_sync(&ep->timer);
184 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
185 		c4iw_put_ep(&ep->com);
186 		return 0;
187 	}
188 	return 1;
189 }
190 
191 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
192 		  struct l2t_entry *l2e)
193 {
194 	int	error = 0;
195 
196 	if (c4iw_fatal_error(rdev)) {
197 		kfree_skb(skb);
198 		PDBG("%s - device in error state - dropping\n", __func__);
199 		return -EIO;
200 	}
201 	error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
202 	if (error < 0)
203 		kfree_skb(skb);
204 	return error < 0 ? error : 0;
205 }
206 
207 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
208 {
209 	int	error = 0;
210 
211 	if (c4iw_fatal_error(rdev)) {
212 		kfree_skb(skb);
213 		PDBG("%s - device in error state - dropping\n", __func__);
214 		return -EIO;
215 	}
216 	error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
217 	if (error < 0)
218 		kfree_skb(skb);
219 	return error < 0 ? error : 0;
220 }
221 
222 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
223 {
224 	struct cpl_tid_release *req;
225 
226 	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
227 	if (!skb)
228 		return;
229 	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
230 	INIT_TP_WR(req, hwtid);
231 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
232 	set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
233 	c4iw_ofld_send(rdev, skb);
234 	return;
235 }
236 
237 static void set_emss(struct c4iw_ep *ep, u16 opt)
238 {
239 	ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
240 		   ((AF_INET == ep->com.remote_addr.ss_family) ?
241 		    sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
242 		   sizeof(struct tcphdr);
243 	ep->mss = ep->emss;
244 	if (TCPOPT_TSTAMP_G(opt))
245 		ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
246 	if (ep->emss < 128)
247 		ep->emss = 128;
248 	if (ep->emss & 7)
249 		PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
250 		     TCPOPT_MSS_G(opt), ep->mss, ep->emss);
251 	PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
252 	     ep->mss, ep->emss);
253 }
254 
255 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
256 {
257 	enum c4iw_ep_state state;
258 
259 	mutex_lock(&epc->mutex);
260 	state = epc->state;
261 	mutex_unlock(&epc->mutex);
262 	return state;
263 }
264 
265 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
266 {
267 	epc->state = new;
268 }
269 
270 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
271 {
272 	mutex_lock(&epc->mutex);
273 	PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
274 	__state_set(epc, new);
275 	mutex_unlock(&epc->mutex);
276 	return;
277 }
278 
279 static void *alloc_ep(int size, gfp_t gfp)
280 {
281 	struct c4iw_ep_common *epc;
282 
283 	epc = kzalloc(size, gfp);
284 	if (epc) {
285 		kref_init(&epc->kref);
286 		mutex_init(&epc->mutex);
287 		c4iw_init_wr_wait(&epc->wr_wait);
288 	}
289 	PDBG("%s alloc ep %p\n", __func__, epc);
290 	return epc;
291 }
292 
293 void _c4iw_free_ep(struct kref *kref)
294 {
295 	struct c4iw_ep *ep;
296 
297 	ep = container_of(kref, struct c4iw_ep, com.kref);
298 	PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
299 	if (test_bit(QP_REFERENCED, &ep->com.flags))
300 		deref_qp(ep);
301 	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
302 		if (ep->com.remote_addr.ss_family == AF_INET6) {
303 			struct sockaddr_in6 *sin6 =
304 					(struct sockaddr_in6 *)
305 					&ep->com.local_addr;
306 
307 			cxgb4_clip_release(
308 					ep->com.dev->rdev.lldi.ports[0],
309 					(const u32 *)&sin6->sin6_addr.s6_addr,
310 					1);
311 		}
312 		remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
313 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
314 		dst_release(ep->dst);
315 		cxgb4_l2t_release(ep->l2t);
316 	}
317 	kfree(ep);
318 }
319 
320 static void release_ep_resources(struct c4iw_ep *ep)
321 {
322 	set_bit(RELEASE_RESOURCES, &ep->com.flags);
323 	c4iw_put_ep(&ep->com);
324 }
325 
326 static int status2errno(int status)
327 {
328 	switch (status) {
329 	case CPL_ERR_NONE:
330 		return 0;
331 	case CPL_ERR_CONN_RESET:
332 		return -ECONNRESET;
333 	case CPL_ERR_ARP_MISS:
334 		return -EHOSTUNREACH;
335 	case CPL_ERR_CONN_TIMEDOUT:
336 		return -ETIMEDOUT;
337 	case CPL_ERR_TCAM_FULL:
338 		return -ENOMEM;
339 	case CPL_ERR_CONN_EXIST:
340 		return -EADDRINUSE;
341 	default:
342 		return -EIO;
343 	}
344 }
345 
346 /*
347  * Try and reuse skbs already allocated...
348  */
349 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
350 {
351 	if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
352 		skb_trim(skb, 0);
353 		skb_get(skb);
354 		skb_reset_transport_header(skb);
355 	} else {
356 		skb = alloc_skb(len, gfp);
357 	}
358 	t4_set_arp_err_handler(skb, NULL, NULL);
359 	return skb;
360 }
361 
362 static struct net_device *get_real_dev(struct net_device *egress_dev)
363 {
364 	return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
365 }
366 
367 static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
368 {
369 	int i;
370 
371 	egress_dev = get_real_dev(egress_dev);
372 	for (i = 0; i < dev->rdev.lldi.nports; i++)
373 		if (dev->rdev.lldi.ports[i] == egress_dev)
374 			return 1;
375 	return 0;
376 }
377 
378 static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
379 				     __u8 *peer_ip, __be16 local_port,
380 				     __be16 peer_port, u8 tos,
381 				     __u32 sin6_scope_id)
382 {
383 	struct dst_entry *dst = NULL;
384 
385 	if (IS_ENABLED(CONFIG_IPV6)) {
386 		struct flowi6 fl6;
387 
388 		memset(&fl6, 0, sizeof(fl6));
389 		memcpy(&fl6.daddr, peer_ip, 16);
390 		memcpy(&fl6.saddr, local_ip, 16);
391 		if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
392 			fl6.flowi6_oif = sin6_scope_id;
393 		dst = ip6_route_output(&init_net, NULL, &fl6);
394 		if (!dst)
395 			goto out;
396 		if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
397 		    !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
398 			dst_release(dst);
399 			dst = NULL;
400 		}
401 	}
402 
403 out:
404 	return dst;
405 }
406 
407 static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
408 				 __be32 peer_ip, __be16 local_port,
409 				 __be16 peer_port, u8 tos)
410 {
411 	struct rtable *rt;
412 	struct flowi4 fl4;
413 	struct neighbour *n;
414 
415 	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
416 				   peer_port, local_port, IPPROTO_TCP,
417 				   tos, 0);
418 	if (IS_ERR(rt))
419 		return NULL;
420 	n = dst_neigh_lookup(&rt->dst, &peer_ip);
421 	if (!n)
422 		return NULL;
423 	if (!our_interface(dev, n->dev) &&
424 	    !(n->dev->flags & IFF_LOOPBACK)) {
425 		neigh_release(n);
426 		dst_release(&rt->dst);
427 		return NULL;
428 	}
429 	neigh_release(n);
430 	return &rt->dst;
431 }
432 
433 static void arp_failure_discard(void *handle, struct sk_buff *skb)
434 {
435 	PDBG("%s c4iw_dev %p\n", __func__, handle);
436 	kfree_skb(skb);
437 }
438 
439 /*
440  * Handle an ARP failure for an active open.
441  */
442 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
443 {
444 	struct c4iw_ep *ep = handle;
445 
446 	printk(KERN_ERR MOD "ARP failure during connect\n");
447 	kfree_skb(skb);
448 	connect_reply_upcall(ep, -EHOSTUNREACH);
449 	state_set(&ep->com, DEAD);
450 	if (ep->com.remote_addr.ss_family == AF_INET6) {
451 		struct sockaddr_in6 *sin6 =
452 			(struct sockaddr_in6 *)&ep->com.local_addr;
453 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
454 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
455 	}
456 	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
457 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
458 	dst_release(ep->dst);
459 	cxgb4_l2t_release(ep->l2t);
460 	c4iw_put_ep(&ep->com);
461 }
462 
463 /*
464  * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
465  * and send it along.
466  */
467 static void abort_arp_failure(void *handle, struct sk_buff *skb)
468 {
469 	struct c4iw_rdev *rdev = handle;
470 	struct cpl_abort_req *req = cplhdr(skb);
471 
472 	PDBG("%s rdev %p\n", __func__, rdev);
473 	req->cmd = CPL_ABORT_NO_RST;
474 	c4iw_ofld_send(rdev, skb);
475 }
476 
477 static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
478 {
479 	unsigned int flowclen = 80;
480 	struct fw_flowc_wr *flowc;
481 	int i;
482 	u16 vlan = ep->l2t->vlan;
483 	int nparams;
484 
485 	if (vlan == CPL_L2T_VLAN_NONE)
486 		nparams = 8;
487 	else
488 		nparams = 9;
489 
490 	skb = get_skb(skb, flowclen, GFP_KERNEL);
491 	flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
492 
493 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
494 					   FW_FLOWC_WR_NPARAMS_V(nparams));
495 	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
496 					  16)) | FW_WR_FLOWID_V(ep->hwtid));
497 
498 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
499 	flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
500 					    (ep->com.dev->rdev.lldi.pf));
501 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
502 	flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
503 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
504 	flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
505 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
506 	flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
507 	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
508 	flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
509 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
510 	flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
511 	flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
512 	flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
513 	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
514 	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
515 	if (nparams == 9) {
516 		u16 pri;
517 
518 		pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
519 		flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
520 		flowc->mnemval[8].val = cpu_to_be32(pri);
521 	} else {
522 		/* Pad WR to 16 byte boundary */
523 		flowc->mnemval[8].mnemonic = 0;
524 		flowc->mnemval[8].val = 0;
525 	}
526 	for (i = 0; i < 9; i++) {
527 		flowc->mnemval[i].r4[0] = 0;
528 		flowc->mnemval[i].r4[1] = 0;
529 		flowc->mnemval[i].r4[2] = 0;
530 	}
531 
532 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
533 	c4iw_ofld_send(&ep->com.dev->rdev, skb);
534 }
535 
536 static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
537 {
538 	struct cpl_close_con_req *req;
539 	struct sk_buff *skb;
540 	int wrlen = roundup(sizeof *req, 16);
541 
542 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
543 	skb = get_skb(NULL, wrlen, gfp);
544 	if (!skb) {
545 		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
546 		return -ENOMEM;
547 	}
548 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
549 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
550 	req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
551 	memset(req, 0, wrlen);
552 	INIT_TP_WR(req, ep->hwtid);
553 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
554 						    ep->hwtid));
555 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
556 }
557 
558 static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
559 {
560 	struct cpl_abort_req *req;
561 	int wrlen = roundup(sizeof *req, 16);
562 
563 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
564 	skb = get_skb(skb, wrlen, gfp);
565 	if (!skb) {
566 		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
567 		       __func__);
568 		return -ENOMEM;
569 	}
570 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
571 	t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
572 	req = (struct cpl_abort_req *) skb_put(skb, wrlen);
573 	memset(req, 0, wrlen);
574 	INIT_TP_WR(req, ep->hwtid);
575 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
576 	req->cmd = CPL_ABORT_SEND_RST;
577 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
578 }
579 
580 static void best_mtu(const unsigned short *mtus, unsigned short mtu,
581 		     unsigned int *idx, int use_ts, int ipv6)
582 {
583 	unsigned short hdr_size = (ipv6 ?
584 				   sizeof(struct ipv6hdr) :
585 				   sizeof(struct iphdr)) +
586 				  sizeof(struct tcphdr) +
587 				  (use_ts ?
588 				   round_up(TCPOLEN_TIMESTAMP, 4) : 0);
589 	unsigned short data_size = mtu - hdr_size;
590 
591 	cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
592 }
593 
594 static int send_connect(struct c4iw_ep *ep)
595 {
596 	struct cpl_act_open_req *req = NULL;
597 	struct cpl_t5_act_open_req *t5req = NULL;
598 	struct cpl_t6_act_open_req *t6req = NULL;
599 	struct cpl_act_open_req6 *req6 = NULL;
600 	struct cpl_t5_act_open_req6 *t5req6 = NULL;
601 	struct cpl_t6_act_open_req6 *t6req6 = NULL;
602 	struct sk_buff *skb;
603 	u64 opt0;
604 	u32 opt2;
605 	unsigned int mtu_idx;
606 	int wscale;
607 	int win, sizev4, sizev6, wrlen;
608 	struct sockaddr_in *la = (struct sockaddr_in *)
609 				 &ep->com.local_addr;
610 	struct sockaddr_in *ra = (struct sockaddr_in *)
611 				 &ep->com.remote_addr;
612 	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
613 				   &ep->com.local_addr;
614 	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
615 				   &ep->com.remote_addr;
616 	int ret;
617 	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
618 	u32 isn = (prandom_u32() & ~7UL) - 1;
619 
620 	switch (CHELSIO_CHIP_VERSION(adapter_type)) {
621 	case CHELSIO_T4:
622 		sizev4 = sizeof(struct cpl_act_open_req);
623 		sizev6 = sizeof(struct cpl_act_open_req6);
624 		break;
625 	case CHELSIO_T5:
626 		sizev4 = sizeof(struct cpl_t5_act_open_req);
627 		sizev6 = sizeof(struct cpl_t5_act_open_req6);
628 		break;
629 	case CHELSIO_T6:
630 		sizev4 = sizeof(struct cpl_t6_act_open_req);
631 		sizev6 = sizeof(struct cpl_t6_act_open_req6);
632 		break;
633 	default:
634 		pr_err("T%d Chip is not supported\n",
635 		       CHELSIO_CHIP_VERSION(adapter_type));
636 		return -EINVAL;
637 	}
638 
639 	wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
640 			roundup(sizev4, 16) :
641 			roundup(sizev6, 16);
642 
643 	PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
644 
645 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
646 	if (!skb) {
647 		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
648 		       __func__);
649 		return -ENOMEM;
650 	}
651 	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
652 
653 	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
654 		 enable_tcp_timestamps,
655 		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
656 	wscale = compute_wscale(rcv_win);
657 
658 	/*
659 	 * Specify the largest window that will fit in opt0. The
660 	 * remainder will be specified in the rx_data_ack.
661 	 */
662 	win = ep->rcv_win >> 10;
663 	if (win > RCV_BUFSIZ_M)
664 		win = RCV_BUFSIZ_M;
665 
666 	opt0 = (nocong ? NO_CONG_F : 0) |
667 	       KEEP_ALIVE_F |
668 	       DELACK_F |
669 	       WND_SCALE_V(wscale) |
670 	       MSS_IDX_V(mtu_idx) |
671 	       L2T_IDX_V(ep->l2t->idx) |
672 	       TX_CHAN_V(ep->tx_chan) |
673 	       SMAC_SEL_V(ep->smac_idx) |
674 	       DSCP_V(ep->tos >> 2) |
675 	       ULP_MODE_V(ULP_MODE_TCPDDP) |
676 	       RCV_BUFSIZ_V(win);
677 	opt2 = RX_CHANNEL_V(0) |
678 	       CCTRL_ECN_V(enable_ecn) |
679 	       RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
680 	if (enable_tcp_timestamps)
681 		opt2 |= TSTAMPS_EN_F;
682 	if (enable_tcp_sack)
683 		opt2 |= SACK_EN_F;
684 	if (wscale && enable_tcp_window_scaling)
685 		opt2 |= WND_SCALE_EN_F;
686 	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
687 		if (peer2peer)
688 			isn += 4;
689 
690 		opt2 |= T5_OPT_2_VALID_F;
691 		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
692 		opt2 |= T5_ISS_F;
693 	}
694 
695 	if (ep->com.remote_addr.ss_family == AF_INET6)
696 		cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
697 			       (const u32 *)&la6->sin6_addr.s6_addr, 1);
698 
699 	t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
700 
701 	if (ep->com.remote_addr.ss_family == AF_INET) {
702 		switch (CHELSIO_CHIP_VERSION(adapter_type)) {
703 		case CHELSIO_T4:
704 			req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
705 			INIT_TP_WR(req, 0);
706 			break;
707 		case CHELSIO_T5:
708 			t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
709 					wrlen);
710 			INIT_TP_WR(t5req, 0);
711 			req = (struct cpl_act_open_req *)t5req;
712 			break;
713 		case CHELSIO_T6:
714 			t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
715 					wrlen);
716 			INIT_TP_WR(t6req, 0);
717 			req = (struct cpl_act_open_req *)t6req;
718 			t5req = (struct cpl_t5_act_open_req *)t6req;
719 			break;
720 		default:
721 			pr_err("T%d Chip is not supported\n",
722 			       CHELSIO_CHIP_VERSION(adapter_type));
723 			ret = -EINVAL;
724 			goto clip_release;
725 		}
726 
727 		OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
728 					((ep->rss_qid<<14) | ep->atid)));
729 		req->local_port = la->sin_port;
730 		req->peer_port = ra->sin_port;
731 		req->local_ip = la->sin_addr.s_addr;
732 		req->peer_ip = ra->sin_addr.s_addr;
733 		req->opt0 = cpu_to_be64(opt0);
734 
735 		if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
736 			req->params = cpu_to_be32(cxgb4_select_ntuple(
737 						ep->com.dev->rdev.lldi.ports[0],
738 						ep->l2t));
739 			req->opt2 = cpu_to_be32(opt2);
740 		} else {
741 			t5req->params = cpu_to_be64(FILTER_TUPLE_V(
742 						cxgb4_select_ntuple(
743 						ep->com.dev->rdev.lldi.ports[0],
744 						ep->l2t)));
745 			t5req->rsvd = cpu_to_be32(isn);
746 			PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
747 			t5req->opt2 = cpu_to_be32(opt2);
748 		}
749 	} else {
750 		switch (CHELSIO_CHIP_VERSION(adapter_type)) {
751 		case CHELSIO_T4:
752 			req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
753 			INIT_TP_WR(req6, 0);
754 			break;
755 		case CHELSIO_T5:
756 			t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
757 					wrlen);
758 			INIT_TP_WR(t5req6, 0);
759 			req6 = (struct cpl_act_open_req6 *)t5req6;
760 			break;
761 		case CHELSIO_T6:
762 			t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
763 					wrlen);
764 			INIT_TP_WR(t6req6, 0);
765 			req6 = (struct cpl_act_open_req6 *)t6req6;
766 			t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
767 			break;
768 		default:
769 			pr_err("T%d Chip is not supported\n",
770 			       CHELSIO_CHIP_VERSION(adapter_type));
771 			ret = -EINVAL;
772 			goto clip_release;
773 		}
774 
775 		OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
776 					((ep->rss_qid<<14)|ep->atid)));
777 		req6->local_port = la6->sin6_port;
778 		req6->peer_port = ra6->sin6_port;
779 		req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
780 		req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
781 		req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
782 		req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
783 		req6->opt0 = cpu_to_be64(opt0);
784 
785 		if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
786 			req6->params = cpu_to_be32(cxgb4_select_ntuple(
787 						ep->com.dev->rdev.lldi.ports[0],
788 						ep->l2t));
789 			req6->opt2 = cpu_to_be32(opt2);
790 		} else {
791 			t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
792 						cxgb4_select_ntuple(
793 						ep->com.dev->rdev.lldi.ports[0],
794 						ep->l2t)));
795 			t5req6->rsvd = cpu_to_be32(isn);
796 			PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
797 			t5req6->opt2 = cpu_to_be32(opt2);
798 		}
799 	}
800 
801 	set_bit(ACT_OPEN_REQ, &ep->com.history);
802 	ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
803 clip_release:
804 	if (ret && ep->com.remote_addr.ss_family == AF_INET6)
805 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
806 				   (const u32 *)&la6->sin6_addr.s6_addr, 1);
807 	return ret;
808 }
809 
810 static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
811 		u8 mpa_rev_to_use)
812 {
813 	int mpalen, wrlen;
814 	struct fw_ofld_tx_data_wr *req;
815 	struct mpa_message *mpa;
816 	struct mpa_v2_conn_params mpa_v2_params;
817 
818 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
819 
820 	BUG_ON(skb_cloned(skb));
821 
822 	mpalen = sizeof(*mpa) + ep->plen;
823 	if (mpa_rev_to_use == 2)
824 		mpalen += sizeof(struct mpa_v2_conn_params);
825 	wrlen = roundup(mpalen + sizeof *req, 16);
826 	skb = get_skb(skb, wrlen, GFP_KERNEL);
827 	if (!skb) {
828 		connect_reply_upcall(ep, -ENOMEM);
829 		return;
830 	}
831 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
832 
833 	req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
834 	memset(req, 0, wrlen);
835 	req->op_to_immdlen = cpu_to_be32(
836 		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
837 		FW_WR_COMPL_F |
838 		FW_WR_IMMDLEN_V(mpalen));
839 	req->flowid_len16 = cpu_to_be32(
840 		FW_WR_FLOWID_V(ep->hwtid) |
841 		FW_WR_LEN16_V(wrlen >> 4));
842 	req->plen = cpu_to_be32(mpalen);
843 	req->tunnel_to_proxy = cpu_to_be32(
844 		FW_OFLD_TX_DATA_WR_FLUSH_F |
845 		FW_OFLD_TX_DATA_WR_SHOVE_F);
846 
847 	mpa = (struct mpa_message *)(req + 1);
848 	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
849 	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
850 		     (markers_enabled ? MPA_MARKERS : 0) |
851 		     (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
852 	mpa->private_data_size = htons(ep->plen);
853 	mpa->revision = mpa_rev_to_use;
854 	if (mpa_rev_to_use == 1) {
855 		ep->tried_with_mpa_v1 = 1;
856 		ep->retry_with_mpa_v1 = 0;
857 	}
858 
859 	if (mpa_rev_to_use == 2) {
860 		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
861 					       sizeof (struct mpa_v2_conn_params));
862 		PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
863 		     ep->ord);
864 		mpa_v2_params.ird = htons((u16)ep->ird);
865 		mpa_v2_params.ord = htons((u16)ep->ord);
866 
867 		if (peer2peer) {
868 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
869 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
870 				mpa_v2_params.ord |=
871 					htons(MPA_V2_RDMA_WRITE_RTR);
872 			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
873 				mpa_v2_params.ord |=
874 					htons(MPA_V2_RDMA_READ_RTR);
875 		}
876 		memcpy(mpa->private_data, &mpa_v2_params,
877 		       sizeof(struct mpa_v2_conn_params));
878 
879 		if (ep->plen)
880 			memcpy(mpa->private_data +
881 			       sizeof(struct mpa_v2_conn_params),
882 			       ep->mpa_pkt + sizeof(*mpa), ep->plen);
883 	} else
884 		if (ep->plen)
885 			memcpy(mpa->private_data,
886 					ep->mpa_pkt + sizeof(*mpa), ep->plen);
887 
888 	/*
889 	 * Reference the mpa skb.  This ensures the data area
890 	 * will remain in memory until the hw acks the tx.
891 	 * Function fw4_ack() will deref it.
892 	 */
893 	skb_get(skb);
894 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
895 	BUG_ON(ep->mpa_skb);
896 	ep->mpa_skb = skb;
897 	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
898 	start_ep_timer(ep);
899 	__state_set(&ep->com, MPA_REQ_SENT);
900 	ep->mpa_attr.initiator = 1;
901 	ep->snd_seq += mpalen;
902 	return;
903 }
904 
905 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
906 {
907 	int mpalen, wrlen;
908 	struct fw_ofld_tx_data_wr *req;
909 	struct mpa_message *mpa;
910 	struct sk_buff *skb;
911 	struct mpa_v2_conn_params mpa_v2_params;
912 
913 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
914 
915 	mpalen = sizeof(*mpa) + plen;
916 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
917 		mpalen += sizeof(struct mpa_v2_conn_params);
918 	wrlen = roundup(mpalen + sizeof *req, 16);
919 
920 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
921 	if (!skb) {
922 		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
923 		return -ENOMEM;
924 	}
925 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
926 
927 	req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
928 	memset(req, 0, wrlen);
929 	req->op_to_immdlen = cpu_to_be32(
930 		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
931 		FW_WR_COMPL_F |
932 		FW_WR_IMMDLEN_V(mpalen));
933 	req->flowid_len16 = cpu_to_be32(
934 		FW_WR_FLOWID_V(ep->hwtid) |
935 		FW_WR_LEN16_V(wrlen >> 4));
936 	req->plen = cpu_to_be32(mpalen);
937 	req->tunnel_to_proxy = cpu_to_be32(
938 		FW_OFLD_TX_DATA_WR_FLUSH_F |
939 		FW_OFLD_TX_DATA_WR_SHOVE_F);
940 
941 	mpa = (struct mpa_message *)(req + 1);
942 	memset(mpa, 0, sizeof(*mpa));
943 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
944 	mpa->flags = MPA_REJECT;
945 	mpa->revision = ep->mpa_attr.version;
946 	mpa->private_data_size = htons(plen);
947 
948 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
949 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
950 		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
951 					       sizeof (struct mpa_v2_conn_params));
952 		mpa_v2_params.ird = htons(((u16)ep->ird) |
953 					  (peer2peer ? MPA_V2_PEER2PEER_MODEL :
954 					   0));
955 		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
956 					  (p2p_type ==
957 					   FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
958 					   MPA_V2_RDMA_WRITE_RTR : p2p_type ==
959 					   FW_RI_INIT_P2PTYPE_READ_REQ ?
960 					   MPA_V2_RDMA_READ_RTR : 0) : 0));
961 		memcpy(mpa->private_data, &mpa_v2_params,
962 		       sizeof(struct mpa_v2_conn_params));
963 
964 		if (ep->plen)
965 			memcpy(mpa->private_data +
966 			       sizeof(struct mpa_v2_conn_params), pdata, plen);
967 	} else
968 		if (plen)
969 			memcpy(mpa->private_data, pdata, plen);
970 
971 	/*
972 	 * Reference the mpa skb again.  This ensures the data area
973 	 * will remain in memory until the hw acks the tx.
974 	 * Function fw4_ack() will deref it.
975 	 */
976 	skb_get(skb);
977 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
978 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
979 	BUG_ON(ep->mpa_skb);
980 	ep->mpa_skb = skb;
981 	ep->snd_seq += mpalen;
982 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
983 }
984 
985 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
986 {
987 	int mpalen, wrlen;
988 	struct fw_ofld_tx_data_wr *req;
989 	struct mpa_message *mpa;
990 	struct sk_buff *skb;
991 	struct mpa_v2_conn_params mpa_v2_params;
992 
993 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
994 
995 	mpalen = sizeof(*mpa) + plen;
996 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
997 		mpalen += sizeof(struct mpa_v2_conn_params);
998 	wrlen = roundup(mpalen + sizeof *req, 16);
999 
1000 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
1001 	if (!skb) {
1002 		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
1003 		return -ENOMEM;
1004 	}
1005 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1006 
1007 	req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
1008 	memset(req, 0, wrlen);
1009 	req->op_to_immdlen = cpu_to_be32(
1010 		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1011 		FW_WR_COMPL_F |
1012 		FW_WR_IMMDLEN_V(mpalen));
1013 	req->flowid_len16 = cpu_to_be32(
1014 		FW_WR_FLOWID_V(ep->hwtid) |
1015 		FW_WR_LEN16_V(wrlen >> 4));
1016 	req->plen = cpu_to_be32(mpalen);
1017 	req->tunnel_to_proxy = cpu_to_be32(
1018 		FW_OFLD_TX_DATA_WR_FLUSH_F |
1019 		FW_OFLD_TX_DATA_WR_SHOVE_F);
1020 
1021 	mpa = (struct mpa_message *)(req + 1);
1022 	memset(mpa, 0, sizeof(*mpa));
1023 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1024 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1025 		     (markers_enabled ? MPA_MARKERS : 0);
1026 	mpa->revision = ep->mpa_attr.version;
1027 	mpa->private_data_size = htons(plen);
1028 
1029 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1030 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1031 		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1032 					       sizeof (struct mpa_v2_conn_params));
1033 		mpa_v2_params.ird = htons((u16)ep->ird);
1034 		mpa_v2_params.ord = htons((u16)ep->ord);
1035 		if (peer2peer && (ep->mpa_attr.p2p_type !=
1036 					FW_RI_INIT_P2PTYPE_DISABLED)) {
1037 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1038 
1039 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1040 				mpa_v2_params.ord |=
1041 					htons(MPA_V2_RDMA_WRITE_RTR);
1042 			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1043 				mpa_v2_params.ord |=
1044 					htons(MPA_V2_RDMA_READ_RTR);
1045 		}
1046 
1047 		memcpy(mpa->private_data, &mpa_v2_params,
1048 		       sizeof(struct mpa_v2_conn_params));
1049 
1050 		if (ep->plen)
1051 			memcpy(mpa->private_data +
1052 			       sizeof(struct mpa_v2_conn_params), pdata, plen);
1053 	} else
1054 		if (plen)
1055 			memcpy(mpa->private_data, pdata, plen);
1056 
1057 	/*
1058 	 * Reference the mpa skb.  This ensures the data area
1059 	 * will remain in memory until the hw acks the tx.
1060 	 * Function fw4_ack() will deref it.
1061 	 */
1062 	skb_get(skb);
1063 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1064 	ep->mpa_skb = skb;
1065 	__state_set(&ep->com, MPA_REP_SENT);
1066 	ep->snd_seq += mpalen;
1067 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1068 }
1069 
1070 static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1071 {
1072 	struct c4iw_ep *ep;
1073 	struct cpl_act_establish *req = cplhdr(skb);
1074 	unsigned int tid = GET_TID(req);
1075 	unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1076 	struct tid_info *t = dev->rdev.lldi.tids;
1077 
1078 	ep = lookup_atid(t, atid);
1079 
1080 	PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
1081 	     be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1082 
1083 	mutex_lock(&ep->com.mutex);
1084 	dst_confirm(ep->dst);
1085 
1086 	/* setup the hwtid for this connection */
1087 	ep->hwtid = tid;
1088 	cxgb4_insert_tid(t, ep, tid);
1089 	insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid);
1090 
1091 	ep->snd_seq = be32_to_cpu(req->snd_isn);
1092 	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1093 
1094 	set_emss(ep, ntohs(req->tcp_opt));
1095 
1096 	/* dealloc the atid */
1097 	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
1098 	cxgb4_free_atid(t, atid);
1099 	set_bit(ACT_ESTAB, &ep->com.history);
1100 
1101 	/* start MPA negotiation */
1102 	send_flowc(ep, NULL);
1103 	if (ep->retry_with_mpa_v1)
1104 		send_mpa_req(ep, skb, 1);
1105 	else
1106 		send_mpa_req(ep, skb, mpa_rev);
1107 	mutex_unlock(&ep->com.mutex);
1108 	return 0;
1109 }
1110 
1111 static void close_complete_upcall(struct c4iw_ep *ep, int status)
1112 {
1113 	struct iw_cm_event event;
1114 
1115 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1116 	memset(&event, 0, sizeof(event));
1117 	event.event = IW_CM_EVENT_CLOSE;
1118 	event.status = status;
1119 	if (ep->com.cm_id) {
1120 		PDBG("close complete delivered ep %p cm_id %p tid %u\n",
1121 		     ep, ep->com.cm_id, ep->hwtid);
1122 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1123 		ep->com.cm_id->rem_ref(ep->com.cm_id);
1124 		ep->com.cm_id = NULL;
1125 		set_bit(CLOSE_UPCALL, &ep->com.history);
1126 	}
1127 }
1128 
1129 static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
1130 {
1131 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1132 	__state_set(&ep->com, ABORTING);
1133 	set_bit(ABORT_CONN, &ep->com.history);
1134 	return send_abort(ep, skb, gfp);
1135 }
1136 
1137 static void peer_close_upcall(struct c4iw_ep *ep)
1138 {
1139 	struct iw_cm_event event;
1140 
1141 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1142 	memset(&event, 0, sizeof(event));
1143 	event.event = IW_CM_EVENT_DISCONNECT;
1144 	if (ep->com.cm_id) {
1145 		PDBG("peer close delivered ep %p cm_id %p tid %u\n",
1146 		     ep, ep->com.cm_id, ep->hwtid);
1147 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1148 		set_bit(DISCONN_UPCALL, &ep->com.history);
1149 	}
1150 }
1151 
1152 static void peer_abort_upcall(struct c4iw_ep *ep)
1153 {
1154 	struct iw_cm_event event;
1155 
1156 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1157 	memset(&event, 0, sizeof(event));
1158 	event.event = IW_CM_EVENT_CLOSE;
1159 	event.status = -ECONNRESET;
1160 	if (ep->com.cm_id) {
1161 		PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
1162 		     ep->com.cm_id, ep->hwtid);
1163 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1164 		ep->com.cm_id->rem_ref(ep->com.cm_id);
1165 		ep->com.cm_id = NULL;
1166 		set_bit(ABORT_UPCALL, &ep->com.history);
1167 	}
1168 }
1169 
1170 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1171 {
1172 	struct iw_cm_event event;
1173 
1174 	PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
1175 	memset(&event, 0, sizeof(event));
1176 	event.event = IW_CM_EVENT_CONNECT_REPLY;
1177 	event.status = status;
1178 	memcpy(&event.local_addr, &ep->com.local_addr,
1179 	       sizeof(ep->com.local_addr));
1180 	memcpy(&event.remote_addr, &ep->com.remote_addr,
1181 	       sizeof(ep->com.remote_addr));
1182 
1183 	if ((status == 0) || (status == -ECONNREFUSED)) {
1184 		if (!ep->tried_with_mpa_v1) {
1185 			/* this means MPA_v2 is used */
1186 			event.ord = ep->ird;
1187 			event.ird = ep->ord;
1188 			event.private_data_len = ep->plen -
1189 				sizeof(struct mpa_v2_conn_params);
1190 			event.private_data = ep->mpa_pkt +
1191 				sizeof(struct mpa_message) +
1192 				sizeof(struct mpa_v2_conn_params);
1193 		} else {
1194 			/* this means MPA_v1 is used */
1195 			event.ord = cur_max_read_depth(ep->com.dev);
1196 			event.ird = cur_max_read_depth(ep->com.dev);
1197 			event.private_data_len = ep->plen;
1198 			event.private_data = ep->mpa_pkt +
1199 				sizeof(struct mpa_message);
1200 		}
1201 	}
1202 
1203 	PDBG("%s ep %p tid %u status %d\n", __func__, ep,
1204 	     ep->hwtid, status);
1205 	set_bit(CONN_RPL_UPCALL, &ep->com.history);
1206 	ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1207 
1208 	if (status < 0) {
1209 		ep->com.cm_id->rem_ref(ep->com.cm_id);
1210 		ep->com.cm_id = NULL;
1211 	}
1212 }
1213 
1214 static int connect_request_upcall(struct c4iw_ep *ep)
1215 {
1216 	struct iw_cm_event event;
1217 	int ret;
1218 
1219 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1220 	memset(&event, 0, sizeof(event));
1221 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1222 	memcpy(&event.local_addr, &ep->com.local_addr,
1223 	       sizeof(ep->com.local_addr));
1224 	memcpy(&event.remote_addr, &ep->com.remote_addr,
1225 	       sizeof(ep->com.remote_addr));
1226 	event.provider_data = ep;
1227 	if (!ep->tried_with_mpa_v1) {
1228 		/* this means MPA_v2 is used */
1229 		event.ord = ep->ord;
1230 		event.ird = ep->ird;
1231 		event.private_data_len = ep->plen -
1232 			sizeof(struct mpa_v2_conn_params);
1233 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1234 			sizeof(struct mpa_v2_conn_params);
1235 	} else {
1236 		/* this means MPA_v1 is used. Send max supported */
1237 		event.ord = cur_max_read_depth(ep->com.dev);
1238 		event.ird = cur_max_read_depth(ep->com.dev);
1239 		event.private_data_len = ep->plen;
1240 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1241 	}
1242 	c4iw_get_ep(&ep->com);
1243 	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1244 						      &event);
1245 	if (ret)
1246 		c4iw_put_ep(&ep->com);
1247 	set_bit(CONNREQ_UPCALL, &ep->com.history);
1248 	c4iw_put_ep(&ep->parent_ep->com);
1249 	return ret;
1250 }
1251 
1252 static void established_upcall(struct c4iw_ep *ep)
1253 {
1254 	struct iw_cm_event event;
1255 
1256 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1257 	memset(&event, 0, sizeof(event));
1258 	event.event = IW_CM_EVENT_ESTABLISHED;
1259 	event.ird = ep->ord;
1260 	event.ord = ep->ird;
1261 	if (ep->com.cm_id) {
1262 		PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1263 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1264 		set_bit(ESTAB_UPCALL, &ep->com.history);
1265 	}
1266 }
1267 
1268 static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1269 {
1270 	struct cpl_rx_data_ack *req;
1271 	struct sk_buff *skb;
1272 	int wrlen = roundup(sizeof *req, 16);
1273 
1274 	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
1275 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
1276 	if (!skb) {
1277 		printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
1278 		return 0;
1279 	}
1280 
1281 	/*
1282 	 * If we couldn't specify the entire rcv window at connection setup
1283 	 * due to the limit in the number of bits in the RCV_BUFSIZ field,
1284 	 * then add the overage in to the credits returned.
1285 	 */
1286 	if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1287 		credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1288 
1289 	req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
1290 	memset(req, 0, wrlen);
1291 	INIT_TP_WR(req, ep->hwtid);
1292 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1293 						    ep->hwtid));
1294 	req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
1295 				       RX_DACK_CHANGE_F |
1296 				       RX_DACK_MODE_V(dack_mode));
1297 	set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
1298 	c4iw_ofld_send(&ep->com.dev->rdev, skb);
1299 	return credits;
1300 }
1301 
1302 #define RELAXED_IRD_NEGOTIATION 1
1303 
1304 static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1305 {
1306 	struct mpa_message *mpa;
1307 	struct mpa_v2_conn_params *mpa_v2_params;
1308 	u16 plen;
1309 	u16 resp_ird, resp_ord;
1310 	u8 rtr_mismatch = 0, insuff_ird = 0;
1311 	struct c4iw_qp_attributes attrs;
1312 	enum c4iw_qp_attr_mask mask;
1313 	int err;
1314 	int disconnect = 0;
1315 
1316 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1317 
1318 	/*
1319 	 * Stop mpa timer.  If it expired, then
1320 	 * we ignore the MPA reply.  process_timeout()
1321 	 * will abort the connection.
1322 	 */
1323 	if (stop_ep_timer(ep))
1324 		return 0;
1325 
1326 	/*
1327 	 * If we get more than the supported amount of private data
1328 	 * then we must fail this connection.
1329 	 */
1330 	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1331 		err = -EINVAL;
1332 		goto err;
1333 	}
1334 
1335 	/*
1336 	 * copy the new data into our accumulation buffer.
1337 	 */
1338 	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1339 				  skb->len);
1340 	ep->mpa_pkt_len += skb->len;
1341 
1342 	/*
1343 	 * if we don't even have the mpa message, then bail.
1344 	 */
1345 	if (ep->mpa_pkt_len < sizeof(*mpa))
1346 		return 0;
1347 	mpa = (struct mpa_message *) ep->mpa_pkt;
1348 
1349 	/* Validate MPA header. */
1350 	if (mpa->revision > mpa_rev) {
1351 		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1352 		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
1353 		err = -EPROTO;
1354 		goto err;
1355 	}
1356 	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1357 		err = -EPROTO;
1358 		goto err;
1359 	}
1360 
1361 	plen = ntohs(mpa->private_data_size);
1362 
1363 	/*
1364 	 * Fail if there's too much private data.
1365 	 */
1366 	if (plen > MPA_MAX_PRIVATE_DATA) {
1367 		err = -EPROTO;
1368 		goto err;
1369 	}
1370 
1371 	/*
1372 	 * If plen does not account for pkt size
1373 	 */
1374 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1375 		err = -EPROTO;
1376 		goto err;
1377 	}
1378 
1379 	ep->plen = (u8) plen;
1380 
1381 	/*
1382 	 * If we don't have all the pdata yet, then bail.
1383 	 * We'll continue process when more data arrives.
1384 	 */
1385 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1386 		return 0;
1387 
1388 	if (mpa->flags & MPA_REJECT) {
1389 		err = -ECONNREFUSED;
1390 		goto err;
1391 	}
1392 
1393 	/*
1394 	 * If we get here we have accumulated the entire mpa
1395 	 * start reply message including private data. And
1396 	 * the MPA header is valid.
1397 	 */
1398 	__state_set(&ep->com, FPDU_MODE);
1399 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1400 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1401 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1402 	ep->mpa_attr.version = mpa->revision;
1403 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1404 
1405 	if (mpa->revision == 2) {
1406 		ep->mpa_attr.enhanced_rdma_conn =
1407 			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1408 		if (ep->mpa_attr.enhanced_rdma_conn) {
1409 			mpa_v2_params = (struct mpa_v2_conn_params *)
1410 				(ep->mpa_pkt + sizeof(*mpa));
1411 			resp_ird = ntohs(mpa_v2_params->ird) &
1412 				MPA_V2_IRD_ORD_MASK;
1413 			resp_ord = ntohs(mpa_v2_params->ord) &
1414 				MPA_V2_IRD_ORD_MASK;
1415 			PDBG("%s responder ird %u ord %u ep ird %u ord %u\n",
1416 			     __func__, resp_ird, resp_ord, ep->ird, ep->ord);
1417 
1418 			/*
1419 			 * This is a double-check. Ideally, below checks are
1420 			 * not required since ird/ord stuff has been taken
1421 			 * care of in c4iw_accept_cr
1422 			 */
1423 			if (ep->ird < resp_ord) {
1424 				if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1425 				    ep->com.dev->rdev.lldi.max_ordird_qp)
1426 					ep->ird = resp_ord;
1427 				else
1428 					insuff_ird = 1;
1429 			} else if (ep->ird > resp_ord) {
1430 				ep->ird = resp_ord;
1431 			}
1432 			if (ep->ord > resp_ird) {
1433 				if (RELAXED_IRD_NEGOTIATION)
1434 					ep->ord = resp_ird;
1435 				else
1436 					insuff_ird = 1;
1437 			}
1438 			if (insuff_ird) {
1439 				err = -ENOMEM;
1440 				ep->ird = resp_ord;
1441 				ep->ord = resp_ird;
1442 			}
1443 
1444 			if (ntohs(mpa_v2_params->ird) &
1445 					MPA_V2_PEER2PEER_MODEL) {
1446 				if (ntohs(mpa_v2_params->ord) &
1447 						MPA_V2_RDMA_WRITE_RTR)
1448 					ep->mpa_attr.p2p_type =
1449 						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1450 				else if (ntohs(mpa_v2_params->ord) &
1451 						MPA_V2_RDMA_READ_RTR)
1452 					ep->mpa_attr.p2p_type =
1453 						FW_RI_INIT_P2PTYPE_READ_REQ;
1454 			}
1455 		}
1456 	} else if (mpa->revision == 1)
1457 		if (peer2peer)
1458 			ep->mpa_attr.p2p_type = p2p_type;
1459 
1460 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1461 	     "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
1462 	     "%d\n", __func__, ep->mpa_attr.crc_enabled,
1463 	     ep->mpa_attr.recv_marker_enabled,
1464 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1465 	     ep->mpa_attr.p2p_type, p2p_type);
1466 
1467 	/*
1468 	 * If responder's RTR does not match with that of initiator, assign
1469 	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1470 	 * generated when moving QP to RTS state.
1471 	 * A TERM message will be sent after QP has moved to RTS state
1472 	 */
1473 	if ((ep->mpa_attr.version == 2) && peer2peer &&
1474 			(ep->mpa_attr.p2p_type != p2p_type)) {
1475 		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1476 		rtr_mismatch = 1;
1477 	}
1478 
1479 	attrs.mpa_attr = ep->mpa_attr;
1480 	attrs.max_ird = ep->ird;
1481 	attrs.max_ord = ep->ord;
1482 	attrs.llp_stream_handle = ep;
1483 	attrs.next_state = C4IW_QP_STATE_RTS;
1484 
1485 	mask = C4IW_QP_ATTR_NEXT_STATE |
1486 	    C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1487 	    C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1488 
1489 	/* bind QP and TID with INIT_WR */
1490 	err = c4iw_modify_qp(ep->com.qp->rhp,
1491 			     ep->com.qp, mask, &attrs, 1);
1492 	if (err)
1493 		goto err;
1494 
1495 	/*
1496 	 * If responder's RTR requirement did not match with what initiator
1497 	 * supports, generate TERM message
1498 	 */
1499 	if (rtr_mismatch) {
1500 		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1501 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1502 		attrs.ecode = MPA_NOMATCH_RTR;
1503 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1504 		attrs.send_term = 1;
1505 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1506 				C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1507 		err = -ENOMEM;
1508 		disconnect = 1;
1509 		goto out;
1510 	}
1511 
1512 	/*
1513 	 * Generate TERM if initiator IRD is not sufficient for responder
1514 	 * provided ORD. Currently, we do the same behaviour even when
1515 	 * responder provided IRD is also not sufficient as regards to
1516 	 * initiator ORD.
1517 	 */
1518 	if (insuff_ird) {
1519 		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1520 				__func__);
1521 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1522 		attrs.ecode = MPA_INSUFF_IRD;
1523 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1524 		attrs.send_term = 1;
1525 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1526 				C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1527 		err = -ENOMEM;
1528 		disconnect = 1;
1529 		goto out;
1530 	}
1531 	goto out;
1532 err:
1533 	__state_set(&ep->com, ABORTING);
1534 	send_abort(ep, skb, GFP_KERNEL);
1535 out:
1536 	connect_reply_upcall(ep, err);
1537 	return disconnect;
1538 }
1539 
1540 static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1541 {
1542 	struct mpa_message *mpa;
1543 	struct mpa_v2_conn_params *mpa_v2_params;
1544 	u16 plen;
1545 
1546 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1547 
1548 	/*
1549 	 * If we get more than the supported amount of private data
1550 	 * then we must fail this connection.
1551 	 */
1552 	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1553 		(void)stop_ep_timer(ep);
1554 		abort_connection(ep, skb, GFP_KERNEL);
1555 		return;
1556 	}
1557 
1558 	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1559 
1560 	/*
1561 	 * Copy the new data into our accumulation buffer.
1562 	 */
1563 	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1564 				  skb->len);
1565 	ep->mpa_pkt_len += skb->len;
1566 
1567 	/*
1568 	 * If we don't even have the mpa message, then bail.
1569 	 * We'll continue process when more data arrives.
1570 	 */
1571 	if (ep->mpa_pkt_len < sizeof(*mpa))
1572 		return;
1573 
1574 	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1575 	mpa = (struct mpa_message *) ep->mpa_pkt;
1576 
1577 	/*
1578 	 * Validate MPA Header.
1579 	 */
1580 	if (mpa->revision > mpa_rev) {
1581 		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1582 		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
1583 		(void)stop_ep_timer(ep);
1584 		abort_connection(ep, skb, GFP_KERNEL);
1585 		return;
1586 	}
1587 
1588 	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
1589 		(void)stop_ep_timer(ep);
1590 		abort_connection(ep, skb, GFP_KERNEL);
1591 		return;
1592 	}
1593 
1594 	plen = ntohs(mpa->private_data_size);
1595 
1596 	/*
1597 	 * Fail if there's too much private data.
1598 	 */
1599 	if (plen > MPA_MAX_PRIVATE_DATA) {
1600 		(void)stop_ep_timer(ep);
1601 		abort_connection(ep, skb, GFP_KERNEL);
1602 		return;
1603 	}
1604 
1605 	/*
1606 	 * If plen does not account for pkt size
1607 	 */
1608 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1609 		(void)stop_ep_timer(ep);
1610 		abort_connection(ep, skb, GFP_KERNEL);
1611 		return;
1612 	}
1613 	ep->plen = (u8) plen;
1614 
1615 	/*
1616 	 * If we don't have all the pdata yet, then bail.
1617 	 */
1618 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1619 		return;
1620 
1621 	/*
1622 	 * If we get here we have accumulated the entire mpa
1623 	 * start reply message including private data.
1624 	 */
1625 	ep->mpa_attr.initiator = 0;
1626 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1627 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1628 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1629 	ep->mpa_attr.version = mpa->revision;
1630 	if (mpa->revision == 1)
1631 		ep->tried_with_mpa_v1 = 1;
1632 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1633 
1634 	if (mpa->revision == 2) {
1635 		ep->mpa_attr.enhanced_rdma_conn =
1636 			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1637 		if (ep->mpa_attr.enhanced_rdma_conn) {
1638 			mpa_v2_params = (struct mpa_v2_conn_params *)
1639 				(ep->mpa_pkt + sizeof(*mpa));
1640 			ep->ird = ntohs(mpa_v2_params->ird) &
1641 				MPA_V2_IRD_ORD_MASK;
1642 			ep->ord = ntohs(mpa_v2_params->ord) &
1643 				MPA_V2_IRD_ORD_MASK;
1644 			PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
1645 			     ep->ord);
1646 			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1647 				if (peer2peer) {
1648 					if (ntohs(mpa_v2_params->ord) &
1649 							MPA_V2_RDMA_WRITE_RTR)
1650 						ep->mpa_attr.p2p_type =
1651 						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1652 					else if (ntohs(mpa_v2_params->ord) &
1653 							MPA_V2_RDMA_READ_RTR)
1654 						ep->mpa_attr.p2p_type =
1655 						FW_RI_INIT_P2PTYPE_READ_REQ;
1656 				}
1657 		}
1658 	} else if (mpa->revision == 1)
1659 		if (peer2peer)
1660 			ep->mpa_attr.p2p_type = p2p_type;
1661 
1662 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1663 	     "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
1664 	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1665 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1666 	     ep->mpa_attr.p2p_type);
1667 
1668 	/*
1669 	 * If the endpoint timer already expired, then we ignore
1670 	 * the start request.  process_timeout() will abort
1671 	 * the connection.
1672 	 */
1673 	if (!stop_ep_timer(ep)) {
1674 		__state_set(&ep->com, MPA_REQ_RCVD);
1675 
1676 		/* drive upcall */
1677 		mutex_lock_nested(&ep->parent_ep->com.mutex,
1678 				  SINGLE_DEPTH_NESTING);
1679 		if (ep->parent_ep->com.state != DEAD) {
1680 			if (connect_request_upcall(ep))
1681 				abort_connection(ep, skb, GFP_KERNEL);
1682 		} else {
1683 			abort_connection(ep, skb, GFP_KERNEL);
1684 		}
1685 		mutex_unlock(&ep->parent_ep->com.mutex);
1686 	}
1687 	return;
1688 }
1689 
1690 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1691 {
1692 	struct c4iw_ep *ep;
1693 	struct cpl_rx_data *hdr = cplhdr(skb);
1694 	unsigned int dlen = ntohs(hdr->len);
1695 	unsigned int tid = GET_TID(hdr);
1696 	struct tid_info *t = dev->rdev.lldi.tids;
1697 	__u8 status = hdr->status;
1698 	int disconnect = 0;
1699 
1700 	ep = lookup_tid(t, tid);
1701 	if (!ep)
1702 		return 0;
1703 	PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
1704 	skb_pull(skb, sizeof(*hdr));
1705 	skb_trim(skb, dlen);
1706 	mutex_lock(&ep->com.mutex);
1707 
1708 	/* update RX credits */
1709 	update_rx_credits(ep, dlen);
1710 
1711 	switch (ep->com.state) {
1712 	case MPA_REQ_SENT:
1713 		ep->rcv_seq += dlen;
1714 		disconnect = process_mpa_reply(ep, skb);
1715 		break;
1716 	case MPA_REQ_WAIT:
1717 		ep->rcv_seq += dlen;
1718 		process_mpa_request(ep, skb);
1719 		break;
1720 	case FPDU_MODE: {
1721 		struct c4iw_qp_attributes attrs;
1722 		BUG_ON(!ep->com.qp);
1723 		if (status)
1724 			pr_err("%s Unexpected streaming data." \
1725 			       " qpid %u ep %p state %d tid %u status %d\n",
1726 			       __func__, ep->com.qp->wq.sq.qid, ep,
1727 			       ep->com.state, ep->hwtid, status);
1728 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1729 		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1730 			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1731 		disconnect = 1;
1732 		break;
1733 	}
1734 	default:
1735 		break;
1736 	}
1737 	mutex_unlock(&ep->com.mutex);
1738 	if (disconnect)
1739 		c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1740 	return 0;
1741 }
1742 
1743 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1744 {
1745 	struct c4iw_ep *ep;
1746 	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
1747 	int release = 0;
1748 	unsigned int tid = GET_TID(rpl);
1749 	struct tid_info *t = dev->rdev.lldi.tids;
1750 
1751 	ep = lookup_tid(t, tid);
1752 	if (!ep) {
1753 		printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
1754 		return 0;
1755 	}
1756 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1757 	mutex_lock(&ep->com.mutex);
1758 	switch (ep->com.state) {
1759 	case ABORTING:
1760 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1761 		__state_set(&ep->com, DEAD);
1762 		release = 1;
1763 		break;
1764 	default:
1765 		printk(KERN_ERR "%s ep %p state %d\n",
1766 		     __func__, ep, ep->com.state);
1767 		break;
1768 	}
1769 	mutex_unlock(&ep->com.mutex);
1770 
1771 	if (release)
1772 		release_ep_resources(ep);
1773 	return 0;
1774 }
1775 
1776 static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1777 {
1778 	struct sk_buff *skb;
1779 	struct fw_ofld_connection_wr *req;
1780 	unsigned int mtu_idx;
1781 	int wscale;
1782 	struct sockaddr_in *sin;
1783 	int win;
1784 
1785 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1786 	req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
1787 	memset(req, 0, sizeof(*req));
1788 	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1789 	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1790 	req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1791 				     ep->com.dev->rdev.lldi.ports[0],
1792 				     ep->l2t));
1793 	sin = (struct sockaddr_in *)&ep->com.local_addr;
1794 	req->le.lport = sin->sin_port;
1795 	req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1796 	sin = (struct sockaddr_in *)&ep->com.remote_addr;
1797 	req->le.pport = sin->sin_port;
1798 	req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1799 	req->tcb.t_state_to_astid =
1800 			htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1801 			FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1802 	req->tcb.cplrxdataack_cplpassacceptrpl =
1803 			htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1804 	req->tcb.tx_max = (__force __be32) jiffies;
1805 	req->tcb.rcv_adv = htons(1);
1806 	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1807 		 enable_tcp_timestamps,
1808 		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
1809 	wscale = compute_wscale(rcv_win);
1810 
1811 	/*
1812 	 * Specify the largest window that will fit in opt0. The
1813 	 * remainder will be specified in the rx_data_ack.
1814 	 */
1815 	win = ep->rcv_win >> 10;
1816 	if (win > RCV_BUFSIZ_M)
1817 		win = RCV_BUFSIZ_M;
1818 
1819 	req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
1820 		(nocong ? NO_CONG_F : 0) |
1821 		KEEP_ALIVE_F |
1822 		DELACK_F |
1823 		WND_SCALE_V(wscale) |
1824 		MSS_IDX_V(mtu_idx) |
1825 		L2T_IDX_V(ep->l2t->idx) |
1826 		TX_CHAN_V(ep->tx_chan) |
1827 		SMAC_SEL_V(ep->smac_idx) |
1828 		DSCP_V(ep->tos >> 2) |
1829 		ULP_MODE_V(ULP_MODE_TCPDDP) |
1830 		RCV_BUFSIZ_V(win));
1831 	req->tcb.opt2 = (__force __be32) (PACE_V(1) |
1832 		TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
1833 		RX_CHANNEL_V(0) |
1834 		CCTRL_ECN_V(enable_ecn) |
1835 		RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
1836 	if (enable_tcp_timestamps)
1837 		req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
1838 	if (enable_tcp_sack)
1839 		req->tcb.opt2 |= (__force __be32)SACK_EN_F;
1840 	if (wscale && enable_tcp_window_scaling)
1841 		req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
1842 	req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
1843 	req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
1844 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
1845 	set_bit(ACT_OFLD_CONN, &ep->com.history);
1846 	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1847 }
1848 
1849 /*
1850  * Return whether a failed active open has allocated a TID
1851  */
1852 static inline int act_open_has_tid(int status)
1853 {
1854 	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1855 	       status != CPL_ERR_ARP_MISS;
1856 }
1857 
1858 /* Returns whether a CPL status conveys negative advice.
1859  */
1860 static int is_neg_adv(unsigned int status)
1861 {
1862 	return status == CPL_ERR_RTX_NEG_ADVICE ||
1863 	       status == CPL_ERR_PERSIST_NEG_ADVICE ||
1864 	       status == CPL_ERR_KEEPALV_NEG_ADVICE;
1865 }
1866 
1867 static char *neg_adv_str(unsigned int status)
1868 {
1869 	switch (status) {
1870 	case CPL_ERR_RTX_NEG_ADVICE:
1871 		return "Retransmit timeout";
1872 	case CPL_ERR_PERSIST_NEG_ADVICE:
1873 		return "Persist timeout";
1874 	case CPL_ERR_KEEPALV_NEG_ADVICE:
1875 		return "Keepalive timeout";
1876 	default:
1877 		return "Unknown";
1878 	}
1879 }
1880 
1881 static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
1882 {
1883 	ep->snd_win = snd_win;
1884 	ep->rcv_win = rcv_win;
1885 	PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
1886 }
1887 
1888 #define ACT_OPEN_RETRY_COUNT 2
1889 
1890 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
1891 		     struct dst_entry *dst, struct c4iw_dev *cdev,
1892 		     bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
1893 {
1894 	struct neighbour *n;
1895 	int err, step;
1896 	struct net_device *pdev;
1897 
1898 	n = dst_neigh_lookup(dst, peer_ip);
1899 	if (!n)
1900 		return -ENODEV;
1901 
1902 	rcu_read_lock();
1903 	err = -ENOMEM;
1904 	if (n->dev->flags & IFF_LOOPBACK) {
1905 		if (iptype == 4)
1906 			pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
1907 		else if (IS_ENABLED(CONFIG_IPV6))
1908 			for_each_netdev(&init_net, pdev) {
1909 				if (ipv6_chk_addr(&init_net,
1910 						  (struct in6_addr *)peer_ip,
1911 						  pdev, 1))
1912 					break;
1913 			}
1914 		else
1915 			pdev = NULL;
1916 
1917 		if (!pdev) {
1918 			err = -ENODEV;
1919 			goto out;
1920 		}
1921 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1922 					n, pdev, rt_tos2priority(tos));
1923 		if (!ep->l2t)
1924 			goto out;
1925 		ep->mtu = pdev->mtu;
1926 		ep->tx_chan = cxgb4_port_chan(pdev);
1927 		ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
1928 						cxgb4_port_viid(pdev));
1929 		step = cdev->rdev.lldi.ntxq /
1930 			cdev->rdev.lldi.nchan;
1931 		ep->txq_idx = cxgb4_port_idx(pdev) * step;
1932 		step = cdev->rdev.lldi.nrxq /
1933 			cdev->rdev.lldi.nchan;
1934 		ep->ctrlq_idx = cxgb4_port_idx(pdev);
1935 		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1936 			cxgb4_port_idx(pdev) * step];
1937 		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
1938 		dev_put(pdev);
1939 	} else {
1940 		pdev = get_real_dev(n->dev);
1941 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1942 					n, pdev, 0);
1943 		if (!ep->l2t)
1944 			goto out;
1945 		ep->mtu = dst_mtu(dst);
1946 		ep->tx_chan = cxgb4_port_chan(pdev);
1947 		ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
1948 						cxgb4_port_viid(pdev));
1949 		step = cdev->rdev.lldi.ntxq /
1950 			cdev->rdev.lldi.nchan;
1951 		ep->txq_idx = cxgb4_port_idx(pdev) * step;
1952 		ep->ctrlq_idx = cxgb4_port_idx(pdev);
1953 		step = cdev->rdev.lldi.nrxq /
1954 			cdev->rdev.lldi.nchan;
1955 		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1956 			cxgb4_port_idx(pdev) * step];
1957 		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
1958 
1959 		if (clear_mpa_v1) {
1960 			ep->retry_with_mpa_v1 = 0;
1961 			ep->tried_with_mpa_v1 = 0;
1962 		}
1963 	}
1964 	err = 0;
1965 out:
1966 	rcu_read_unlock();
1967 
1968 	neigh_release(n);
1969 
1970 	return err;
1971 }
1972 
1973 static int c4iw_reconnect(struct c4iw_ep *ep)
1974 {
1975 	int err = 0;
1976 	struct sockaddr_in *laddr = (struct sockaddr_in *)
1977 				    &ep->com.cm_id->m_local_addr;
1978 	struct sockaddr_in *raddr = (struct sockaddr_in *)
1979 				    &ep->com.cm_id->m_remote_addr;
1980 	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
1981 				      &ep->com.cm_id->m_local_addr;
1982 	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
1983 				      &ep->com.cm_id->m_remote_addr;
1984 	int iptype;
1985 	__u8 *ra;
1986 
1987 	PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1988 	init_timer(&ep->timer);
1989 
1990 	/*
1991 	 * Allocate an active TID to initiate a TCP connection.
1992 	 */
1993 	ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
1994 	if (ep->atid == -1) {
1995 		pr_err("%s - cannot alloc atid.\n", __func__);
1996 		err = -ENOMEM;
1997 		goto fail2;
1998 	}
1999 	insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
2000 
2001 	/* find a route */
2002 	if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2003 		ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
2004 				     raddr->sin_addr.s_addr, laddr->sin_port,
2005 				     raddr->sin_port, ep->com.cm_id->tos);
2006 		iptype = 4;
2007 		ra = (__u8 *)&raddr->sin_addr;
2008 	} else {
2009 		ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
2010 				      raddr6->sin6_addr.s6_addr,
2011 				      laddr6->sin6_port, raddr6->sin6_port, 0,
2012 				      raddr6->sin6_scope_id);
2013 		iptype = 6;
2014 		ra = (__u8 *)&raddr6->sin6_addr;
2015 	}
2016 	if (!ep->dst) {
2017 		pr_err("%s - cannot find route.\n", __func__);
2018 		err = -EHOSTUNREACH;
2019 		goto fail3;
2020 	}
2021 	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2022 			ep->com.dev->rdev.lldi.adapter_type,
2023 			ep->com.cm_id->tos);
2024 	if (err) {
2025 		pr_err("%s - cannot alloc l2e.\n", __func__);
2026 		goto fail4;
2027 	}
2028 
2029 	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2030 	     __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2031 	     ep->l2t->idx);
2032 
2033 	state_set(&ep->com, CONNECTING);
2034 	ep->tos = ep->com.cm_id->tos;
2035 
2036 	/* send connect request to rnic */
2037 	err = send_connect(ep);
2038 	if (!err)
2039 		goto out;
2040 
2041 	cxgb4_l2t_release(ep->l2t);
2042 fail4:
2043 	dst_release(ep->dst);
2044 fail3:
2045 	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
2046 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2047 fail2:
2048 	/*
2049 	 * remember to send notification to upper layer.
2050 	 * We are in here so the upper layer is not aware that this is
2051 	 * re-connect attempt and so, upper layer is still waiting for
2052 	 * response of 1st connect request.
2053 	 */
2054 	connect_reply_upcall(ep, -ECONNRESET);
2055 	c4iw_put_ep(&ep->com);
2056 out:
2057 	return err;
2058 }
2059 
2060 static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2061 {
2062 	struct c4iw_ep *ep;
2063 	struct cpl_act_open_rpl *rpl = cplhdr(skb);
2064 	unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2065 				      ntohl(rpl->atid_status)));
2066 	struct tid_info *t = dev->rdev.lldi.tids;
2067 	int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2068 	struct sockaddr_in *la;
2069 	struct sockaddr_in *ra;
2070 	struct sockaddr_in6 *la6;
2071 	struct sockaddr_in6 *ra6;
2072 
2073 	ep = lookup_atid(t, atid);
2074 	la = (struct sockaddr_in *)&ep->com.local_addr;
2075 	ra = (struct sockaddr_in *)&ep->com.remote_addr;
2076 	la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2077 	ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2078 
2079 	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
2080 	     status, status2errno(status));
2081 
2082 	if (is_neg_adv(status)) {
2083 		PDBG("%s Connection problems for atid %u status %u (%s)\n",
2084 		     __func__, atid, status, neg_adv_str(status));
2085 		ep->stats.connect_neg_adv++;
2086 		mutex_lock(&dev->rdev.stats.lock);
2087 		dev->rdev.stats.neg_adv++;
2088 		mutex_unlock(&dev->rdev.stats.lock);
2089 		return 0;
2090 	}
2091 
2092 	set_bit(ACT_OPEN_RPL, &ep->com.history);
2093 
2094 	/*
2095 	 * Log interesting failures.
2096 	 */
2097 	switch (status) {
2098 	case CPL_ERR_CONN_RESET:
2099 	case CPL_ERR_CONN_TIMEDOUT:
2100 		break;
2101 	case CPL_ERR_TCAM_FULL:
2102 		mutex_lock(&dev->rdev.stats.lock);
2103 		dev->rdev.stats.tcam_full++;
2104 		mutex_unlock(&dev->rdev.stats.lock);
2105 		if (ep->com.local_addr.ss_family == AF_INET &&
2106 		    dev->rdev.lldi.enable_fw_ofld_conn) {
2107 			send_fw_act_open_req(ep,
2108 					     TID_TID_G(AOPEN_ATID_G(
2109 					     ntohl(rpl->atid_status))));
2110 			return 0;
2111 		}
2112 		break;
2113 	case CPL_ERR_CONN_EXIST:
2114 		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2115 			set_bit(ACT_RETRY_INUSE, &ep->com.history);
2116 			if (ep->com.remote_addr.ss_family == AF_INET6) {
2117 				struct sockaddr_in6 *sin6 =
2118 						(struct sockaddr_in6 *)
2119 						&ep->com.local_addr;
2120 				cxgb4_clip_release(
2121 						ep->com.dev->rdev.lldi.ports[0],
2122 						(const u32 *)
2123 						&sin6->sin6_addr.s6_addr, 1);
2124 			}
2125 			remove_handle(ep->com.dev, &ep->com.dev->atid_idr,
2126 					atid);
2127 			cxgb4_free_atid(t, atid);
2128 			dst_release(ep->dst);
2129 			cxgb4_l2t_release(ep->l2t);
2130 			c4iw_reconnect(ep);
2131 			return 0;
2132 		}
2133 		break;
2134 	default:
2135 		if (ep->com.local_addr.ss_family == AF_INET) {
2136 			pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2137 				atid, status, status2errno(status),
2138 				&la->sin_addr.s_addr, ntohs(la->sin_port),
2139 				&ra->sin_addr.s_addr, ntohs(ra->sin_port));
2140 		} else {
2141 			pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2142 				atid, status, status2errno(status),
2143 				la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2144 				ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2145 		}
2146 		break;
2147 	}
2148 
2149 	connect_reply_upcall(ep, status2errno(status));
2150 	state_set(&ep->com, DEAD);
2151 
2152 	if (ep->com.remote_addr.ss_family == AF_INET6) {
2153 		struct sockaddr_in6 *sin6 =
2154 			(struct sockaddr_in6 *)&ep->com.local_addr;
2155 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2156 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2157 	}
2158 	if (status && act_open_has_tid(status))
2159 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
2160 
2161 	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
2162 	cxgb4_free_atid(t, atid);
2163 	dst_release(ep->dst);
2164 	cxgb4_l2t_release(ep->l2t);
2165 	c4iw_put_ep(&ep->com);
2166 
2167 	return 0;
2168 }
2169 
2170 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2171 {
2172 	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2173 	struct tid_info *t = dev->rdev.lldi.tids;
2174 	unsigned int stid = GET_TID(rpl);
2175 	struct c4iw_listen_ep *ep = lookup_stid(t, stid);
2176 
2177 	if (!ep) {
2178 		PDBG("%s stid %d lookup failure!\n", __func__, stid);
2179 		goto out;
2180 	}
2181 	PDBG("%s ep %p status %d error %d\n", __func__, ep,
2182 	     rpl->status, status2errno(rpl->status));
2183 	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
2184 
2185 out:
2186 	return 0;
2187 }
2188 
2189 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2190 {
2191 	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2192 	struct tid_info *t = dev->rdev.lldi.tids;
2193 	unsigned int stid = GET_TID(rpl);
2194 	struct c4iw_listen_ep *ep = lookup_stid(t, stid);
2195 
2196 	PDBG("%s ep %p\n", __func__, ep);
2197 	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
2198 	return 0;
2199 }
2200 
2201 static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2202 		      struct cpl_pass_accept_req *req)
2203 {
2204 	struct cpl_pass_accept_rpl *rpl;
2205 	unsigned int mtu_idx;
2206 	u64 opt0;
2207 	u32 opt2;
2208 	int wscale;
2209 	struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2210 	int win;
2211 	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2212 
2213 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2214 	BUG_ON(skb_cloned(skb));
2215 
2216 	skb_get(skb);
2217 	rpl = cplhdr(skb);
2218 	if (!is_t4(adapter_type)) {
2219 		skb_trim(skb, roundup(sizeof(*rpl5), 16));
2220 		rpl5 = (void *)rpl;
2221 		INIT_TP_WR(rpl5, ep->hwtid);
2222 	} else {
2223 		skb_trim(skb, sizeof(*rpl));
2224 		INIT_TP_WR(rpl, ep->hwtid);
2225 	}
2226 	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2227 						    ep->hwtid));
2228 
2229 	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2230 		 enable_tcp_timestamps && req->tcpopt.tstamp,
2231 		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
2232 	wscale = compute_wscale(rcv_win);
2233 
2234 	/*
2235 	 * Specify the largest window that will fit in opt0. The
2236 	 * remainder will be specified in the rx_data_ack.
2237 	 */
2238 	win = ep->rcv_win >> 10;
2239 	if (win > RCV_BUFSIZ_M)
2240 		win = RCV_BUFSIZ_M;
2241 	opt0 = (nocong ? NO_CONG_F : 0) |
2242 	       KEEP_ALIVE_F |
2243 	       DELACK_F |
2244 	       WND_SCALE_V(wscale) |
2245 	       MSS_IDX_V(mtu_idx) |
2246 	       L2T_IDX_V(ep->l2t->idx) |
2247 	       TX_CHAN_V(ep->tx_chan) |
2248 	       SMAC_SEL_V(ep->smac_idx) |
2249 	       DSCP_V(ep->tos >> 2) |
2250 	       ULP_MODE_V(ULP_MODE_TCPDDP) |
2251 	       RCV_BUFSIZ_V(win);
2252 	opt2 = RX_CHANNEL_V(0) |
2253 	       RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2254 
2255 	if (enable_tcp_timestamps && req->tcpopt.tstamp)
2256 		opt2 |= TSTAMPS_EN_F;
2257 	if (enable_tcp_sack && req->tcpopt.sack)
2258 		opt2 |= SACK_EN_F;
2259 	if (wscale && enable_tcp_window_scaling)
2260 		opt2 |= WND_SCALE_EN_F;
2261 	if (enable_ecn) {
2262 		const struct tcphdr *tcph;
2263 		u32 hlen = ntohl(req->hdr_len);
2264 
2265 		if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2266 			tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2267 				IP_HDR_LEN_G(hlen);
2268 		else
2269 			tcph = (const void *)(req + 1) +
2270 				T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2271 		if (tcph->ece && tcph->cwr)
2272 			opt2 |= CCTRL_ECN_V(1);
2273 	}
2274 	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
2275 		u32 isn = (prandom_u32() & ~7UL) - 1;
2276 		opt2 |= T5_OPT_2_VALID_F;
2277 		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2278 		opt2 |= T5_ISS_F;
2279 		rpl5 = (void *)rpl;
2280 		memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
2281 		if (peer2peer)
2282 			isn += 4;
2283 		rpl5->iss = cpu_to_be32(isn);
2284 		PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
2285 	}
2286 
2287 	rpl->opt0 = cpu_to_be64(opt0);
2288 	rpl->opt2 = cpu_to_be32(opt2);
2289 	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2290 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
2291 	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2292 
2293 	return;
2294 }
2295 
2296 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2297 {
2298 	PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
2299 	BUG_ON(skb_cloned(skb));
2300 	skb_trim(skb, sizeof(struct cpl_tid_release));
2301 	release_tid(&dev->rdev, hwtid, skb);
2302 	return;
2303 }
2304 
2305 static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
2306 		       int *iptype, __u8 *local_ip, __u8 *peer_ip,
2307 		       __be16 *local_port, __be16 *peer_port)
2308 {
2309 	int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
2310 		      ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
2311 		      T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
2312 	int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
2313 		     IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
2314 		     T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
2315 	struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
2316 	struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
2317 	struct tcphdr *tcp = (struct tcphdr *)
2318 			     ((u8 *)(req + 1) + eth_len + ip_len);
2319 
2320 	if (ip->version == 4) {
2321 		PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
2322 		     ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
2323 		     ntohs(tcp->dest));
2324 		*iptype = 4;
2325 		memcpy(peer_ip, &ip->saddr, 4);
2326 		memcpy(local_ip, &ip->daddr, 4);
2327 	} else {
2328 		PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
2329 		     ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
2330 		     ntohs(tcp->dest));
2331 		*iptype = 6;
2332 		memcpy(peer_ip, ip6->saddr.s6_addr, 16);
2333 		memcpy(local_ip, ip6->daddr.s6_addr, 16);
2334 	}
2335 	*peer_port = tcp->source;
2336 	*local_port = tcp->dest;
2337 
2338 	return;
2339 }
2340 
2341 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2342 {
2343 	struct c4iw_ep *child_ep = NULL, *parent_ep;
2344 	struct cpl_pass_accept_req *req = cplhdr(skb);
2345 	unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2346 	struct tid_info *t = dev->rdev.lldi.tids;
2347 	unsigned int hwtid = GET_TID(req);
2348 	struct dst_entry *dst;
2349 	__u8 local_ip[16], peer_ip[16];
2350 	__be16 local_port, peer_port;
2351 	struct sockaddr_in6 *sin6;
2352 	int err;
2353 	u16 peer_mss = ntohs(req->tcpopt.mss);
2354 	int iptype;
2355 	unsigned short hdrs;
2356 	u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2357 
2358 	parent_ep = lookup_stid(t, stid);
2359 	if (!parent_ep) {
2360 		PDBG("%s connect request on invalid stid %d\n", __func__, stid);
2361 		goto reject;
2362 	}
2363 
2364 	if (state_read(&parent_ep->com) != LISTEN) {
2365 		PDBG("%s - listening ep not in LISTEN\n", __func__);
2366 		goto reject;
2367 	}
2368 
2369 	get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
2370 		   local_ip, peer_ip, &local_port, &peer_port);
2371 
2372 	/* Find output route */
2373 	if (iptype == 4)  {
2374 		PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2375 		     , __func__, parent_ep, hwtid,
2376 		     local_ip, peer_ip, ntohs(local_port),
2377 		     ntohs(peer_port), peer_mss);
2378 		dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
2379 				 local_port, peer_port,
2380 				 tos);
2381 	} else {
2382 		PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2383 		     , __func__, parent_ep, hwtid,
2384 		     local_ip, peer_ip, ntohs(local_port),
2385 		     ntohs(peer_port), peer_mss);
2386 		dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
2387 				  PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
2388 				  ((struct sockaddr_in6 *)
2389 				  &parent_ep->com.local_addr)->sin6_scope_id);
2390 	}
2391 	if (!dst) {
2392 		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
2393 		       __func__);
2394 		goto reject;
2395 	}
2396 
2397 	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2398 	if (!child_ep) {
2399 		printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
2400 		       __func__);
2401 		dst_release(dst);
2402 		goto reject;
2403 	}
2404 
2405 	err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2406 			parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2407 	if (err) {
2408 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
2409 		       __func__);
2410 		dst_release(dst);
2411 		kfree(child_ep);
2412 		goto reject;
2413 	}
2414 
2415 	hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
2416 	       ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2417 	if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2418 		child_ep->mtu = peer_mss + hdrs;
2419 
2420 	state_set(&child_ep->com, CONNECTING);
2421 	child_ep->com.dev = dev;
2422 	child_ep->com.cm_id = NULL;
2423 
2424 	if (iptype == 4) {
2425 		struct sockaddr_in *sin = (struct sockaddr_in *)
2426 			&child_ep->com.local_addr;
2427 
2428 		sin->sin_family = PF_INET;
2429 		sin->sin_port = local_port;
2430 		sin->sin_addr.s_addr = *(__be32 *)local_ip;
2431 
2432 		sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2433 		sin->sin_family = PF_INET;
2434 		sin->sin_port = ((struct sockaddr_in *)
2435 				 &parent_ep->com.local_addr)->sin_port;
2436 		sin->sin_addr.s_addr = *(__be32 *)local_ip;
2437 
2438 		sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2439 		sin->sin_family = PF_INET;
2440 		sin->sin_port = peer_port;
2441 		sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2442 	} else {
2443 		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2444 		sin6->sin6_family = PF_INET6;
2445 		sin6->sin6_port = local_port;
2446 		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2447 
2448 		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2449 		sin6->sin6_family = PF_INET6;
2450 		sin6->sin6_port = ((struct sockaddr_in6 *)
2451 				   &parent_ep->com.local_addr)->sin6_port;
2452 		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2453 
2454 		sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2455 		sin6->sin6_family = PF_INET6;
2456 		sin6->sin6_port = peer_port;
2457 		memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2458 	}
2459 
2460 	c4iw_get_ep(&parent_ep->com);
2461 	child_ep->parent_ep = parent_ep;
2462 	child_ep->tos = tos;
2463 	child_ep->dst = dst;
2464 	child_ep->hwtid = hwtid;
2465 
2466 	PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
2467 	     child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2468 
2469 	init_timer(&child_ep->timer);
2470 	cxgb4_insert_tid(t, child_ep, hwtid);
2471 	insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
2472 	accept_cr(child_ep, skb, req);
2473 	set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2474 	if (iptype == 6) {
2475 		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2476 		cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2477 			       (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2478 	}
2479 	goto out;
2480 reject:
2481 	reject_cr(dev, hwtid, skb);
2482 out:
2483 	return 0;
2484 }
2485 
2486 static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2487 {
2488 	struct c4iw_ep *ep;
2489 	struct cpl_pass_establish *req = cplhdr(skb);
2490 	struct tid_info *t = dev->rdev.lldi.tids;
2491 	unsigned int tid = GET_TID(req);
2492 
2493 	ep = lookup_tid(t, tid);
2494 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2495 	ep->snd_seq = be32_to_cpu(req->snd_isn);
2496 	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2497 
2498 	PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
2499 	     ntohs(req->tcp_opt));
2500 
2501 	set_emss(ep, ntohs(req->tcp_opt));
2502 
2503 	dst_confirm(ep->dst);
2504 	state_set(&ep->com, MPA_REQ_WAIT);
2505 	start_ep_timer(ep);
2506 	send_flowc(ep, skb);
2507 	set_bit(PASS_ESTAB, &ep->com.history);
2508 
2509 	return 0;
2510 }
2511 
2512 static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2513 {
2514 	struct cpl_peer_close *hdr = cplhdr(skb);
2515 	struct c4iw_ep *ep;
2516 	struct c4iw_qp_attributes attrs;
2517 	int disconnect = 1;
2518 	int release = 0;
2519 	struct tid_info *t = dev->rdev.lldi.tids;
2520 	unsigned int tid = GET_TID(hdr);
2521 	int ret;
2522 
2523 	ep = lookup_tid(t, tid);
2524 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2525 	dst_confirm(ep->dst);
2526 
2527 	set_bit(PEER_CLOSE, &ep->com.history);
2528 	mutex_lock(&ep->com.mutex);
2529 	switch (ep->com.state) {
2530 	case MPA_REQ_WAIT:
2531 		__state_set(&ep->com, CLOSING);
2532 		break;
2533 	case MPA_REQ_SENT:
2534 		__state_set(&ep->com, CLOSING);
2535 		connect_reply_upcall(ep, -ECONNRESET);
2536 		break;
2537 	case MPA_REQ_RCVD:
2538 
2539 		/*
2540 		 * We're gonna mark this puppy DEAD, but keep
2541 		 * the reference on it until the ULP accepts or
2542 		 * rejects the CR. Also wake up anyone waiting
2543 		 * in rdma connection migration (see c4iw_accept_cr()).
2544 		 */
2545 		__state_set(&ep->com, CLOSING);
2546 		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
2547 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2548 		break;
2549 	case MPA_REP_SENT:
2550 		__state_set(&ep->com, CLOSING);
2551 		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
2552 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2553 		break;
2554 	case FPDU_MODE:
2555 		start_ep_timer(ep);
2556 		__state_set(&ep->com, CLOSING);
2557 		attrs.next_state = C4IW_QP_STATE_CLOSING;
2558 		ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2559 				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2560 		if (ret != -ECONNRESET) {
2561 			peer_close_upcall(ep);
2562 			disconnect = 1;
2563 		}
2564 		break;
2565 	case ABORTING:
2566 		disconnect = 0;
2567 		break;
2568 	case CLOSING:
2569 		__state_set(&ep->com, MORIBUND);
2570 		disconnect = 0;
2571 		break;
2572 	case MORIBUND:
2573 		(void)stop_ep_timer(ep);
2574 		if (ep->com.cm_id && ep->com.qp) {
2575 			attrs.next_state = C4IW_QP_STATE_IDLE;
2576 			c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2577 				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2578 		}
2579 		close_complete_upcall(ep, 0);
2580 		__state_set(&ep->com, DEAD);
2581 		release = 1;
2582 		disconnect = 0;
2583 		break;
2584 	case DEAD:
2585 		disconnect = 0;
2586 		break;
2587 	default:
2588 		BUG_ON(1);
2589 	}
2590 	mutex_unlock(&ep->com.mutex);
2591 	if (disconnect)
2592 		c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2593 	if (release)
2594 		release_ep_resources(ep);
2595 	return 0;
2596 }
2597 
2598 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2599 {
2600 	struct cpl_abort_req_rss *req = cplhdr(skb);
2601 	struct c4iw_ep *ep;
2602 	struct cpl_abort_rpl *rpl;
2603 	struct sk_buff *rpl_skb;
2604 	struct c4iw_qp_attributes attrs;
2605 	int ret;
2606 	int release = 0;
2607 	struct tid_info *t = dev->rdev.lldi.tids;
2608 	unsigned int tid = GET_TID(req);
2609 
2610 	ep = lookup_tid(t, tid);
2611 	if (is_neg_adv(req->status)) {
2612 		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
2613 		     __func__, ep->hwtid, req->status,
2614 		     neg_adv_str(req->status));
2615 		ep->stats.abort_neg_adv++;
2616 		mutex_lock(&dev->rdev.stats.lock);
2617 		dev->rdev.stats.neg_adv++;
2618 		mutex_unlock(&dev->rdev.stats.lock);
2619 		return 0;
2620 	}
2621 	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
2622 	     ep->com.state);
2623 	set_bit(PEER_ABORT, &ep->com.history);
2624 
2625 	/*
2626 	 * Wake up any threads in rdma_init() or rdma_fini().
2627 	 * However, this is not needed if com state is just
2628 	 * MPA_REQ_SENT
2629 	 */
2630 	if (ep->com.state != MPA_REQ_SENT)
2631 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2632 
2633 	mutex_lock(&ep->com.mutex);
2634 	switch (ep->com.state) {
2635 	case CONNECTING:
2636 		break;
2637 	case MPA_REQ_WAIT:
2638 		(void)stop_ep_timer(ep);
2639 		break;
2640 	case MPA_REQ_SENT:
2641 		(void)stop_ep_timer(ep);
2642 		if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
2643 			connect_reply_upcall(ep, -ECONNRESET);
2644 		else {
2645 			/*
2646 			 * we just don't send notification upwards because we
2647 			 * want to retry with mpa_v1 without upper layers even
2648 			 * knowing it.
2649 			 *
2650 			 * do some housekeeping so as to re-initiate the
2651 			 * connection
2652 			 */
2653 			PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
2654 			     mpa_rev);
2655 			ep->retry_with_mpa_v1 = 1;
2656 		}
2657 		break;
2658 	case MPA_REP_SENT:
2659 		break;
2660 	case MPA_REQ_RCVD:
2661 		break;
2662 	case MORIBUND:
2663 	case CLOSING:
2664 		stop_ep_timer(ep);
2665 		/*FALLTHROUGH*/
2666 	case FPDU_MODE:
2667 		if (ep->com.cm_id && ep->com.qp) {
2668 			attrs.next_state = C4IW_QP_STATE_ERROR;
2669 			ret = c4iw_modify_qp(ep->com.qp->rhp,
2670 				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2671 				     &attrs, 1);
2672 			if (ret)
2673 				printk(KERN_ERR MOD
2674 				       "%s - qp <- error failed!\n",
2675 				       __func__);
2676 		}
2677 		peer_abort_upcall(ep);
2678 		break;
2679 	case ABORTING:
2680 		break;
2681 	case DEAD:
2682 		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2683 		mutex_unlock(&ep->com.mutex);
2684 		return 0;
2685 	default:
2686 		BUG_ON(1);
2687 		break;
2688 	}
2689 	dst_confirm(ep->dst);
2690 	if (ep->com.state != ABORTING) {
2691 		__state_set(&ep->com, DEAD);
2692 		/* we don't release if we want to retry with mpa_v1 */
2693 		if (!ep->retry_with_mpa_v1)
2694 			release = 1;
2695 	}
2696 	mutex_unlock(&ep->com.mutex);
2697 
2698 	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
2699 	if (!rpl_skb) {
2700 		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
2701 		       __func__);
2702 		release = 1;
2703 		goto out;
2704 	}
2705 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
2706 	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
2707 	INIT_TP_WR(rpl, ep->hwtid);
2708 	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
2709 	rpl->cmd = CPL_ABORT_NO_RST;
2710 	c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2711 out:
2712 	if (release)
2713 		release_ep_resources(ep);
2714 	else if (ep->retry_with_mpa_v1) {
2715 		if (ep->com.remote_addr.ss_family == AF_INET6) {
2716 			struct sockaddr_in6 *sin6 =
2717 					(struct sockaddr_in6 *)
2718 					&ep->com.local_addr;
2719 			cxgb4_clip_release(
2720 					ep->com.dev->rdev.lldi.ports[0],
2721 					(const u32 *)&sin6->sin6_addr.s6_addr,
2722 					1);
2723 		}
2724 		remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
2725 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
2726 		dst_release(ep->dst);
2727 		cxgb4_l2t_release(ep->l2t);
2728 		c4iw_reconnect(ep);
2729 	}
2730 
2731 	return 0;
2732 }
2733 
2734 static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2735 {
2736 	struct c4iw_ep *ep;
2737 	struct c4iw_qp_attributes attrs;
2738 	struct cpl_close_con_rpl *rpl = cplhdr(skb);
2739 	int release = 0;
2740 	struct tid_info *t = dev->rdev.lldi.tids;
2741 	unsigned int tid = GET_TID(rpl);
2742 
2743 	ep = lookup_tid(t, tid);
2744 
2745 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2746 	BUG_ON(!ep);
2747 
2748 	/* The cm_id may be null if we failed to connect */
2749 	mutex_lock(&ep->com.mutex);
2750 	switch (ep->com.state) {
2751 	case CLOSING:
2752 		__state_set(&ep->com, MORIBUND);
2753 		break;
2754 	case MORIBUND:
2755 		(void)stop_ep_timer(ep);
2756 		if ((ep->com.cm_id) && (ep->com.qp)) {
2757 			attrs.next_state = C4IW_QP_STATE_IDLE;
2758 			c4iw_modify_qp(ep->com.qp->rhp,
2759 					     ep->com.qp,
2760 					     C4IW_QP_ATTR_NEXT_STATE,
2761 					     &attrs, 1);
2762 		}
2763 		close_complete_upcall(ep, 0);
2764 		__state_set(&ep->com, DEAD);
2765 		release = 1;
2766 		break;
2767 	case ABORTING:
2768 	case DEAD:
2769 		break;
2770 	default:
2771 		BUG_ON(1);
2772 		break;
2773 	}
2774 	mutex_unlock(&ep->com.mutex);
2775 	if (release)
2776 		release_ep_resources(ep);
2777 	return 0;
2778 }
2779 
2780 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
2781 {
2782 	struct cpl_rdma_terminate *rpl = cplhdr(skb);
2783 	struct tid_info *t = dev->rdev.lldi.tids;
2784 	unsigned int tid = GET_TID(rpl);
2785 	struct c4iw_ep *ep;
2786 	struct c4iw_qp_attributes attrs;
2787 
2788 	ep = lookup_tid(t, tid);
2789 	BUG_ON(!ep);
2790 
2791 	if (ep && ep->com.qp) {
2792 		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
2793 		       ep->com.qp->wq.sq.qid);
2794 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2795 		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2796 			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2797 	} else
2798 		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2799 
2800 	return 0;
2801 }
2802 
2803 /*
2804  * Upcall from the adapter indicating data has been transmitted.
2805  * For us its just the single MPA request or reply.  We can now free
2806  * the skb holding the mpa message.
2807  */
2808 static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
2809 {
2810 	struct c4iw_ep *ep;
2811 	struct cpl_fw4_ack *hdr = cplhdr(skb);
2812 	u8 credits = hdr->credits;
2813 	unsigned int tid = GET_TID(hdr);
2814 	struct tid_info *t = dev->rdev.lldi.tids;
2815 
2816 
2817 	ep = lookup_tid(t, tid);
2818 	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
2819 	if (credits == 0) {
2820 		PDBG("%s 0 credit ack ep %p tid %u state %u\n",
2821 		     __func__, ep, ep->hwtid, state_read(&ep->com));
2822 		return 0;
2823 	}
2824 
2825 	dst_confirm(ep->dst);
2826 	if (ep->mpa_skb) {
2827 		PDBG("%s last streaming msg ack ep %p tid %u state %u "
2828 		     "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
2829 		     state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
2830 		kfree_skb(ep->mpa_skb);
2831 		ep->mpa_skb = NULL;
2832 	}
2833 	return 0;
2834 }
2835 
2836 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2837 {
2838 	int err = 0;
2839 	int disconnect = 0;
2840 	struct c4iw_ep *ep = to_ep(cm_id);
2841 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2842 
2843 	mutex_lock(&ep->com.mutex);
2844 	if (ep->com.state == DEAD) {
2845 		mutex_unlock(&ep->com.mutex);
2846 		c4iw_put_ep(&ep->com);
2847 		return -ECONNRESET;
2848 	}
2849 	set_bit(ULP_REJECT, &ep->com.history);
2850 	BUG_ON(ep->com.state != MPA_REQ_RCVD);
2851 	if (mpa_rev == 0)
2852 		abort_connection(ep, NULL, GFP_KERNEL);
2853 	else {
2854 		err = send_mpa_reject(ep, pdata, pdata_len);
2855 		disconnect = 1;
2856 	}
2857 	mutex_unlock(&ep->com.mutex);
2858 	if (disconnect)
2859 		err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2860 	c4iw_put_ep(&ep->com);
2861 	return 0;
2862 }
2863 
2864 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2865 {
2866 	int err;
2867 	struct c4iw_qp_attributes attrs;
2868 	enum c4iw_qp_attr_mask mask;
2869 	struct c4iw_ep *ep = to_ep(cm_id);
2870 	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2871 	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2872 
2873 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2874 
2875 	mutex_lock(&ep->com.mutex);
2876 	if (ep->com.state == DEAD) {
2877 		err = -ECONNRESET;
2878 		goto err;
2879 	}
2880 
2881 	BUG_ON(ep->com.state != MPA_REQ_RCVD);
2882 	BUG_ON(!qp);
2883 
2884 	set_bit(ULP_ACCEPT, &ep->com.history);
2885 	if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
2886 	    (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
2887 		abort_connection(ep, NULL, GFP_KERNEL);
2888 		err = -EINVAL;
2889 		goto err;
2890 	}
2891 
2892 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2893 		if (conn_param->ord > ep->ird) {
2894 			if (RELAXED_IRD_NEGOTIATION) {
2895 				ep->ord = ep->ird;
2896 			} else {
2897 				ep->ird = conn_param->ird;
2898 				ep->ord = conn_param->ord;
2899 				send_mpa_reject(ep, conn_param->private_data,
2900 						conn_param->private_data_len);
2901 				abort_connection(ep, NULL, GFP_KERNEL);
2902 				err = -ENOMEM;
2903 				goto err;
2904 			}
2905 		}
2906 		if (conn_param->ird < ep->ord) {
2907 			if (RELAXED_IRD_NEGOTIATION &&
2908 			    ep->ord <= h->rdev.lldi.max_ordird_qp) {
2909 				conn_param->ird = ep->ord;
2910 			} else {
2911 				abort_connection(ep, NULL, GFP_KERNEL);
2912 				err = -ENOMEM;
2913 				goto err;
2914 			}
2915 		}
2916 	}
2917 	ep->ird = conn_param->ird;
2918 	ep->ord = conn_param->ord;
2919 
2920 	if (ep->mpa_attr.version == 1) {
2921 		if (peer2peer && ep->ird == 0)
2922 			ep->ird = 1;
2923 	} else {
2924 		if (peer2peer &&
2925 		    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
2926 		    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
2927 			ep->ird = 1;
2928 	}
2929 
2930 	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
2931 
2932 	cm_id->add_ref(cm_id);
2933 	ep->com.cm_id = cm_id;
2934 	ep->com.qp = qp;
2935 	ref_qp(ep);
2936 
2937 	/* bind QP to EP and move to RTS */
2938 	attrs.mpa_attr = ep->mpa_attr;
2939 	attrs.max_ird = ep->ird;
2940 	attrs.max_ord = ep->ord;
2941 	attrs.llp_stream_handle = ep;
2942 	attrs.next_state = C4IW_QP_STATE_RTS;
2943 
2944 	/* bind QP and TID with INIT_WR */
2945 	mask = C4IW_QP_ATTR_NEXT_STATE |
2946 			     C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2947 			     C4IW_QP_ATTR_MPA_ATTR |
2948 			     C4IW_QP_ATTR_MAX_IRD |
2949 			     C4IW_QP_ATTR_MAX_ORD;
2950 
2951 	err = c4iw_modify_qp(ep->com.qp->rhp,
2952 			     ep->com.qp, mask, &attrs, 1);
2953 	if (err)
2954 		goto err1;
2955 	err = send_mpa_reply(ep, conn_param->private_data,
2956 			     conn_param->private_data_len);
2957 	if (err)
2958 		goto err1;
2959 
2960 	__state_set(&ep->com, FPDU_MODE);
2961 	established_upcall(ep);
2962 	mutex_unlock(&ep->com.mutex);
2963 	c4iw_put_ep(&ep->com);
2964 	return 0;
2965 err1:
2966 	ep->com.cm_id = NULL;
2967 	abort_connection(ep, NULL, GFP_KERNEL);
2968 	cm_id->rem_ref(cm_id);
2969 err:
2970 	mutex_unlock(&ep->com.mutex);
2971 	c4iw_put_ep(&ep->com);
2972 	return err;
2973 }
2974 
2975 static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
2976 {
2977 	struct in_device *ind;
2978 	int found = 0;
2979 	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
2980 	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
2981 
2982 	ind = in_dev_get(dev->rdev.lldi.ports[0]);
2983 	if (!ind)
2984 		return -EADDRNOTAVAIL;
2985 	for_primary_ifa(ind) {
2986 		laddr->sin_addr.s_addr = ifa->ifa_address;
2987 		raddr->sin_addr.s_addr = ifa->ifa_address;
2988 		found = 1;
2989 		break;
2990 	}
2991 	endfor_ifa(ind);
2992 	in_dev_put(ind);
2993 	return found ? 0 : -EADDRNOTAVAIL;
2994 }
2995 
2996 static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
2997 		      unsigned char banned_flags)
2998 {
2999 	struct inet6_dev *idev;
3000 	int err = -EADDRNOTAVAIL;
3001 
3002 	rcu_read_lock();
3003 	idev = __in6_dev_get(dev);
3004 	if (idev != NULL) {
3005 		struct inet6_ifaddr *ifp;
3006 
3007 		read_lock_bh(&idev->lock);
3008 		list_for_each_entry(ifp, &idev->addr_list, if_list) {
3009 			if (ifp->scope == IFA_LINK &&
3010 			    !(ifp->flags & banned_flags)) {
3011 				memcpy(addr, &ifp->addr, 16);
3012 				err = 0;
3013 				break;
3014 			}
3015 		}
3016 		read_unlock_bh(&idev->lock);
3017 	}
3018 	rcu_read_unlock();
3019 	return err;
3020 }
3021 
3022 static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3023 {
3024 	struct in6_addr uninitialized_var(addr);
3025 	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3026 	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3027 
3028 	if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3029 		memcpy(la6->sin6_addr.s6_addr, &addr, 16);
3030 		memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
3031 		return 0;
3032 	}
3033 	return -EADDRNOTAVAIL;
3034 }
3035 
3036 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3037 {
3038 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3039 	struct c4iw_ep *ep;
3040 	int err = 0;
3041 	struct sockaddr_in *laddr;
3042 	struct sockaddr_in *raddr;
3043 	struct sockaddr_in6 *laddr6;
3044 	struct sockaddr_in6 *raddr6;
3045 	__u8 *ra;
3046 	int iptype;
3047 
3048 	if ((conn_param->ord > cur_max_read_depth(dev)) ||
3049 	    (conn_param->ird > cur_max_read_depth(dev))) {
3050 		err = -EINVAL;
3051 		goto out;
3052 	}
3053 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3054 	if (!ep) {
3055 		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
3056 		err = -ENOMEM;
3057 		goto out;
3058 	}
3059 	init_timer(&ep->timer);
3060 	ep->plen = conn_param->private_data_len;
3061 	if (ep->plen)
3062 		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3063 		       conn_param->private_data, ep->plen);
3064 	ep->ird = conn_param->ird;
3065 	ep->ord = conn_param->ord;
3066 
3067 	if (peer2peer && ep->ord == 0)
3068 		ep->ord = 1;
3069 
3070 	cm_id->add_ref(cm_id);
3071 	ep->com.dev = dev;
3072 	ep->com.cm_id = cm_id;
3073 	ep->com.qp = get_qhp(dev, conn_param->qpn);
3074 	if (!ep->com.qp) {
3075 		PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3076 		err = -EINVAL;
3077 		goto fail1;
3078 	}
3079 	ref_qp(ep);
3080 	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
3081 	     ep->com.qp, cm_id);
3082 
3083 	/*
3084 	 * Allocate an active TID to initiate a TCP connection.
3085 	 */
3086 	ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3087 	if (ep->atid == -1) {
3088 		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
3089 		err = -ENOMEM;
3090 		goto fail1;
3091 	}
3092 	insert_handle(dev, &dev->atid_idr, ep, ep->atid);
3093 
3094 	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3095 	       sizeof(ep->com.local_addr));
3096 	memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3097 	       sizeof(ep->com.remote_addr));
3098 
3099 	laddr = (struct sockaddr_in *)&ep->com.local_addr;
3100 	raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3101 	laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3102 	raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3103 
3104 	if (cm_id->m_remote_addr.ss_family == AF_INET) {
3105 		iptype = 4;
3106 		ra = (__u8 *)&raddr->sin_addr;
3107 
3108 		/*
3109 		 * Handle loopback requests to INADDR_ANY.
3110 		 */
3111 		if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
3112 			err = pick_local_ipaddrs(dev, cm_id);
3113 			if (err)
3114 				goto fail1;
3115 		}
3116 
3117 		/* find a route */
3118 		PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3119 		     __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
3120 		     ra, ntohs(raddr->sin_port));
3121 		ep->dst = find_route(dev, laddr->sin_addr.s_addr,
3122 				     raddr->sin_addr.s_addr, laddr->sin_port,
3123 				     raddr->sin_port, cm_id->tos);
3124 	} else {
3125 		iptype = 6;
3126 		ra = (__u8 *)&raddr6->sin6_addr;
3127 
3128 		/*
3129 		 * Handle loopback requests to INADDR_ANY.
3130 		 */
3131 		if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3132 			err = pick_local_ip6addrs(dev, cm_id);
3133 			if (err)
3134 				goto fail1;
3135 		}
3136 
3137 		/* find a route */
3138 		PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3139 		     __func__, laddr6->sin6_addr.s6_addr,
3140 		     ntohs(laddr6->sin6_port),
3141 		     raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3142 		ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
3143 				      raddr6->sin6_addr.s6_addr,
3144 				      laddr6->sin6_port, raddr6->sin6_port, 0,
3145 				      raddr6->sin6_scope_id);
3146 	}
3147 	if (!ep->dst) {
3148 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
3149 		err = -EHOSTUNREACH;
3150 		goto fail2;
3151 	}
3152 
3153 	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3154 			ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3155 	if (err) {
3156 		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
3157 		goto fail3;
3158 	}
3159 
3160 	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3161 		__func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3162 		ep->l2t->idx);
3163 
3164 	state_set(&ep->com, CONNECTING);
3165 	ep->tos = cm_id->tos;
3166 
3167 	/* send connect request to rnic */
3168 	err = send_connect(ep);
3169 	if (!err)
3170 		goto out;
3171 
3172 	cxgb4_l2t_release(ep->l2t);
3173 fail3:
3174 	dst_release(ep->dst);
3175 fail2:
3176 	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
3177 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3178 fail1:
3179 	cm_id->rem_ref(cm_id);
3180 	c4iw_put_ep(&ep->com);
3181 out:
3182 	return err;
3183 }
3184 
3185 static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3186 {
3187 	int err;
3188 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3189 				    &ep->com.local_addr;
3190 
3191 	if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3192 		err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3193 				     (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3194 		if (err)
3195 			return err;
3196 	}
3197 	c4iw_init_wr_wait(&ep->com.wr_wait);
3198 	err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3199 				   ep->stid, &sin6->sin6_addr,
3200 				   sin6->sin6_port,
3201 				   ep->com.dev->rdev.lldi.rxq_ids[0]);
3202 	if (!err)
3203 		err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3204 					  &ep->com.wr_wait,
3205 					  0, 0, __func__);
3206 	else if (err > 0)
3207 		err = net_xmit_errno(err);
3208 	if (err) {
3209 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3210 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3211 		pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3212 		       err, ep->stid,
3213 		       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3214 	}
3215 	return err;
3216 }
3217 
3218 static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3219 {
3220 	int err;
3221 	struct sockaddr_in *sin = (struct sockaddr_in *)
3222 				  &ep->com.local_addr;
3223 
3224 	if (dev->rdev.lldi.enable_fw_ofld_conn) {
3225 		do {
3226 			err = cxgb4_create_server_filter(
3227 				ep->com.dev->rdev.lldi.ports[0], ep->stid,
3228 				sin->sin_addr.s_addr, sin->sin_port, 0,
3229 				ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3230 			if (err == -EBUSY) {
3231 				if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3232 					err = -EIO;
3233 					break;
3234 				}
3235 				set_current_state(TASK_UNINTERRUPTIBLE);
3236 				schedule_timeout(usecs_to_jiffies(100));
3237 			}
3238 		} while (err == -EBUSY);
3239 	} else {
3240 		c4iw_init_wr_wait(&ep->com.wr_wait);
3241 		err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3242 				ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3243 				0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3244 		if (!err)
3245 			err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3246 						  &ep->com.wr_wait,
3247 						  0, 0, __func__);
3248 		else if (err > 0)
3249 			err = net_xmit_errno(err);
3250 	}
3251 	if (err)
3252 		pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3253 		       , err, ep->stid,
3254 		       &sin->sin_addr, ntohs(sin->sin_port));
3255 	return err;
3256 }
3257 
3258 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3259 {
3260 	int err = 0;
3261 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3262 	struct c4iw_listen_ep *ep;
3263 
3264 	might_sleep();
3265 
3266 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3267 	if (!ep) {
3268 		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
3269 		err = -ENOMEM;
3270 		goto fail1;
3271 	}
3272 	PDBG("%s ep %p\n", __func__, ep);
3273 	cm_id->add_ref(cm_id);
3274 	ep->com.cm_id = cm_id;
3275 	ep->com.dev = dev;
3276 	ep->backlog = backlog;
3277 	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3278 	       sizeof(ep->com.local_addr));
3279 
3280 	/*
3281 	 * Allocate a server TID.
3282 	 */
3283 	if (dev->rdev.lldi.enable_fw_ofld_conn &&
3284 	    ep->com.local_addr.ss_family == AF_INET)
3285 		ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3286 					     cm_id->m_local_addr.ss_family, ep);
3287 	else
3288 		ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3289 					    cm_id->m_local_addr.ss_family, ep);
3290 
3291 	if (ep->stid == -1) {
3292 		printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
3293 		err = -ENOMEM;
3294 		goto fail2;
3295 	}
3296 	insert_handle(dev, &dev->stid_idr, ep, ep->stid);
3297 
3298 	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3299 	       sizeof(ep->com.local_addr));
3300 
3301 	state_set(&ep->com, LISTEN);
3302 	if (ep->com.local_addr.ss_family == AF_INET)
3303 		err = create_server4(dev, ep);
3304 	else
3305 		err = create_server6(dev, ep);
3306 	if (!err) {
3307 		cm_id->provider_data = ep;
3308 		goto out;
3309 	}
3310 
3311 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3312 			ep->com.local_addr.ss_family);
3313 fail2:
3314 	cm_id->rem_ref(cm_id);
3315 	c4iw_put_ep(&ep->com);
3316 fail1:
3317 out:
3318 	return err;
3319 }
3320 
3321 int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3322 {
3323 	int err;
3324 	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3325 
3326 	PDBG("%s ep %p\n", __func__, ep);
3327 
3328 	might_sleep();
3329 	state_set(&ep->com, DEAD);
3330 	if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3331 	    ep->com.local_addr.ss_family == AF_INET) {
3332 		err = cxgb4_remove_server_filter(
3333 			ep->com.dev->rdev.lldi.ports[0], ep->stid,
3334 			ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3335 	} else {
3336 		struct sockaddr_in6 *sin6;
3337 		c4iw_init_wr_wait(&ep->com.wr_wait);
3338 		err = cxgb4_remove_server(
3339 				ep->com.dev->rdev.lldi.ports[0], ep->stid,
3340 				ep->com.dev->rdev.lldi.rxq_ids[0], 0);
3341 		if (err)
3342 			goto done;
3343 		err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
3344 					  0, 0, __func__);
3345 		sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3346 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3347 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3348 	}
3349 	remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
3350 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3351 			ep->com.local_addr.ss_family);
3352 done:
3353 	cm_id->rem_ref(cm_id);
3354 	c4iw_put_ep(&ep->com);
3355 	return err;
3356 }
3357 
3358 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3359 {
3360 	int ret = 0;
3361 	int close = 0;
3362 	int fatal = 0;
3363 	struct c4iw_rdev *rdev;
3364 
3365 	mutex_lock(&ep->com.mutex);
3366 
3367 	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
3368 	     states[ep->com.state], abrupt);
3369 
3370 	rdev = &ep->com.dev->rdev;
3371 	if (c4iw_fatal_error(rdev)) {
3372 		fatal = 1;
3373 		close_complete_upcall(ep, -EIO);
3374 		ep->com.state = DEAD;
3375 	}
3376 	switch (ep->com.state) {
3377 	case MPA_REQ_WAIT:
3378 	case MPA_REQ_SENT:
3379 	case MPA_REQ_RCVD:
3380 	case MPA_REP_SENT:
3381 	case FPDU_MODE:
3382 		close = 1;
3383 		if (abrupt)
3384 			ep->com.state = ABORTING;
3385 		else {
3386 			ep->com.state = CLOSING;
3387 			start_ep_timer(ep);
3388 		}
3389 		set_bit(CLOSE_SENT, &ep->com.flags);
3390 		break;
3391 	case CLOSING:
3392 		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3393 			close = 1;
3394 			if (abrupt) {
3395 				(void)stop_ep_timer(ep);
3396 				ep->com.state = ABORTING;
3397 			} else
3398 				ep->com.state = MORIBUND;
3399 		}
3400 		break;
3401 	case MORIBUND:
3402 	case ABORTING:
3403 	case DEAD:
3404 		PDBG("%s ignoring disconnect ep %p state %u\n",
3405 		     __func__, ep, ep->com.state);
3406 		break;
3407 	default:
3408 		BUG();
3409 		break;
3410 	}
3411 
3412 	if (close) {
3413 		if (abrupt) {
3414 			set_bit(EP_DISC_ABORT, &ep->com.history);
3415 			close_complete_upcall(ep, -ECONNRESET);
3416 			ret = send_abort(ep, NULL, gfp);
3417 		} else {
3418 			set_bit(EP_DISC_CLOSE, &ep->com.history);
3419 			ret = send_halfclose(ep, gfp);
3420 		}
3421 		if (ret)
3422 			fatal = 1;
3423 	}
3424 	mutex_unlock(&ep->com.mutex);
3425 	if (fatal)
3426 		release_ep_resources(ep);
3427 	return ret;
3428 }
3429 
3430 static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3431 			struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3432 {
3433 	struct c4iw_ep *ep;
3434 	int atid = be32_to_cpu(req->tid);
3435 
3436 	ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3437 					   (__force u32) req->tid);
3438 	if (!ep)
3439 		return;
3440 
3441 	switch (req->retval) {
3442 	case FW_ENOMEM:
3443 		set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3444 		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3445 			send_fw_act_open_req(ep, atid);
3446 			return;
3447 		}
3448 	case FW_EADDRINUSE:
3449 		set_bit(ACT_RETRY_INUSE, &ep->com.history);
3450 		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3451 			send_fw_act_open_req(ep, atid);
3452 			return;
3453 		}
3454 		break;
3455 	default:
3456 		pr_info("%s unexpected ofld conn wr retval %d\n",
3457 		       __func__, req->retval);
3458 		break;
3459 	}
3460 	pr_err("active ofld_connect_wr failure %d atid %d\n",
3461 	       req->retval, atid);
3462 	mutex_lock(&dev->rdev.stats.lock);
3463 	dev->rdev.stats.act_ofld_conn_fails++;
3464 	mutex_unlock(&dev->rdev.stats.lock);
3465 	connect_reply_upcall(ep, status2errno(req->retval));
3466 	state_set(&ep->com, DEAD);
3467 	if (ep->com.remote_addr.ss_family == AF_INET6) {
3468 		struct sockaddr_in6 *sin6 =
3469 			(struct sockaddr_in6 *)&ep->com.local_addr;
3470 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3471 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3472 	}
3473 	remove_handle(dev, &dev->atid_idr, atid);
3474 	cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3475 	dst_release(ep->dst);
3476 	cxgb4_l2t_release(ep->l2t);
3477 	c4iw_put_ep(&ep->com);
3478 }
3479 
3480 static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3481 			struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3482 {
3483 	struct sk_buff *rpl_skb;
3484 	struct cpl_pass_accept_req *cpl;
3485 	int ret;
3486 
3487 	rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3488 	BUG_ON(!rpl_skb);
3489 	if (req->retval) {
3490 		PDBG("%s passive open failure %d\n", __func__, req->retval);
3491 		mutex_lock(&dev->rdev.stats.lock);
3492 		dev->rdev.stats.pas_ofld_conn_fails++;
3493 		mutex_unlock(&dev->rdev.stats.lock);
3494 		kfree_skb(rpl_skb);
3495 	} else {
3496 		cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3497 		OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3498 					(__force u32) htonl(
3499 					(__force u32) req->tid)));
3500 		ret = pass_accept_req(dev, rpl_skb);
3501 		if (!ret)
3502 			kfree_skb(rpl_skb);
3503 	}
3504 	return;
3505 }
3506 
3507 static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3508 {
3509 	struct cpl_fw6_msg *rpl = cplhdr(skb);
3510 	struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3511 
3512 	switch (rpl->type) {
3513 	case FW6_TYPE_CQE:
3514 		c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3515 		break;
3516 	case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3517 		req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3518 		switch (req->t_state) {
3519 		case TCP_SYN_SENT:
3520 			active_ofld_conn_reply(dev, skb, req);
3521 			break;
3522 		case TCP_SYN_RECV:
3523 			passive_ofld_conn_reply(dev, skb, req);
3524 			break;
3525 		default:
3526 			pr_err("%s unexpected ofld conn wr state %d\n",
3527 			       __func__, req->t_state);
3528 			break;
3529 		}
3530 		break;
3531 	}
3532 	return 0;
3533 }
3534 
3535 static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3536 {
3537 	__be32 l2info;
3538 	__be16 hdr_len, vlantag, len;
3539 	u16 eth_hdr_len;
3540 	int tcp_hdr_len, ip_hdr_len;
3541 	u8 intf;
3542 	struct cpl_rx_pkt *cpl = cplhdr(skb);
3543 	struct cpl_pass_accept_req *req;
3544 	struct tcp_options_received tmp_opt;
3545 	struct c4iw_dev *dev;
3546 	enum chip_type type;
3547 
3548 	dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3549 	/* Store values from cpl_rx_pkt in temporary location. */
3550 	vlantag = cpl->vlan;
3551 	len = cpl->len;
3552 	l2info  = cpl->l2info;
3553 	hdr_len = cpl->hdr_len;
3554 	intf = cpl->iff;
3555 
3556 	__skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3557 
3558 	/*
3559 	 * We need to parse the TCP options from SYN packet.
3560 	 * to generate cpl_pass_accept_req.
3561 	 */
3562 	memset(&tmp_opt, 0, sizeof(tmp_opt));
3563 	tcp_clear_options(&tmp_opt);
3564 	tcp_parse_options(skb, &tmp_opt, 0, NULL);
3565 
3566 	req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
3567 	memset(req, 0, sizeof(*req));
3568 	req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3569 			 SYN_MAC_IDX_V(RX_MACIDX_G(
3570 			 be32_to_cpu(l2info))) |
3571 			 SYN_XACT_MATCH_F);
3572 	type = dev->rdev.lldi.adapter_type;
3573 	tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3574 	ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3575 	req->hdr_len =
3576 		cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3577 	if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3578 		eth_hdr_len = is_t4(type) ?
3579 				RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3580 				RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3581 		req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3582 					    IP_HDR_LEN_V(ip_hdr_len) |
3583 					    ETH_HDR_LEN_V(eth_hdr_len));
3584 	} else { /* T6 and later */
3585 		eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3586 		req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3587 					    T6_IP_HDR_LEN_V(ip_hdr_len) |
3588 					    T6_ETH_HDR_LEN_V(eth_hdr_len));
3589 	}
3590 	req->vlan = vlantag;
3591 	req->len = len;
3592 	req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3593 				    PASS_OPEN_TOS_V(tos));
3594 	req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3595 	if (tmp_opt.wscale_ok)
3596 		req->tcpopt.wsf = tmp_opt.snd_wscale;
3597 	req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3598 	if (tmp_opt.sack_ok)
3599 		req->tcpopt.sack = 1;
3600 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3601 	return;
3602 }
3603 
3604 static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3605 				  __be32 laddr, __be16 lport,
3606 				  __be32 raddr, __be16 rport,
3607 				  u32 rcv_isn, u32 filter, u16 window,
3608 				  u32 rss_qid, u8 port_id)
3609 {
3610 	struct sk_buff *req_skb;
3611 	struct fw_ofld_connection_wr *req;
3612 	struct cpl_pass_accept_req *cpl = cplhdr(skb);
3613 	int ret;
3614 
3615 	req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
3616 	req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
3617 	memset(req, 0, sizeof(*req));
3618 	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
3619 	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
3620 	req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
3621 	req->le.filter = (__force __be32) filter;
3622 	req->le.lport = lport;
3623 	req->le.pport = rport;
3624 	req->le.u.ipv4.lip = laddr;
3625 	req->le.u.ipv4.pip = raddr;
3626 	req->tcb.rcv_nxt = htonl(rcv_isn + 1);
3627 	req->tcb.rcv_adv = htons(window);
3628 	req->tcb.t_state_to_astid =
3629 		 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
3630 			FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
3631 			FW_OFLD_CONNECTION_WR_ASTID_V(
3632 			PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
3633 
3634 	/*
3635 	 * We store the qid in opt2 which will be used by the firmware
3636 	 * to send us the wr response.
3637 	 */
3638 	req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
3639 
3640 	/*
3641 	 * We initialize the MSS index in TCB to 0xF.
3642 	 * So that when driver sends cpl_pass_accept_rpl
3643 	 * TCB picks up the correct value. If this was 0
3644 	 * TP will ignore any value > 0 for MSS index.
3645 	 */
3646 	req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
3647 	req->cookie = (uintptr_t)skb;
3648 
3649 	set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
3650 	ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
3651 	if (ret < 0) {
3652 		pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
3653 		       ret);
3654 		kfree_skb(skb);
3655 		kfree_skb(req_skb);
3656 	}
3657 }
3658 
3659 /*
3660  * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
3661  * messages when a filter is being used instead of server to
3662  * redirect a syn packet. When packets hit filter they are redirected
3663  * to the offload queue and driver tries to establish the connection
3664  * using firmware work request.
3665  */
3666 static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
3667 {
3668 	int stid;
3669 	unsigned int filter;
3670 	struct ethhdr *eh = NULL;
3671 	struct vlan_ethhdr *vlan_eh = NULL;
3672 	struct iphdr *iph;
3673 	struct tcphdr *tcph;
3674 	struct rss_header *rss = (void *)skb->data;
3675 	struct cpl_rx_pkt *cpl = (void *)skb->data;
3676 	struct cpl_pass_accept_req *req = (void *)(rss + 1);
3677 	struct l2t_entry *e;
3678 	struct dst_entry *dst;
3679 	struct c4iw_ep *lep;
3680 	u16 window;
3681 	struct port_info *pi;
3682 	struct net_device *pdev;
3683 	u16 rss_qid, eth_hdr_len;
3684 	int step;
3685 	u32 tx_chan;
3686 	struct neighbour *neigh;
3687 
3688 	/* Drop all non-SYN packets */
3689 	if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
3690 		goto reject;
3691 
3692 	/*
3693 	 * Drop all packets which did not hit the filter.
3694 	 * Unlikely to happen.
3695 	 */
3696 	if (!(rss->filter_hit && rss->filter_tid))
3697 		goto reject;
3698 
3699 	/*
3700 	 * Calculate the server tid from filter hit index from cpl_rx_pkt.
3701 	 */
3702 	stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
3703 
3704 	lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
3705 	if (!lep) {
3706 		PDBG("%s connect request on invalid stid %d\n", __func__, stid);
3707 		goto reject;
3708 	}
3709 
3710 	switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
3711 	case CHELSIO_T4:
3712 		eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
3713 		break;
3714 	case CHELSIO_T5:
3715 		eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
3716 		break;
3717 	case CHELSIO_T6:
3718 		eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
3719 		break;
3720 	default:
3721 		pr_err("T%d Chip is not supported\n",
3722 		       CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
3723 		goto reject;
3724 	}
3725 
3726 	if (eth_hdr_len == ETH_HLEN) {
3727 		eh = (struct ethhdr *)(req + 1);
3728 		iph = (struct iphdr *)(eh + 1);
3729 	} else {
3730 		vlan_eh = (struct vlan_ethhdr *)(req + 1);
3731 		iph = (struct iphdr *)(vlan_eh + 1);
3732 		skb->vlan_tci = ntohs(cpl->vlan);
3733 	}
3734 
3735 	if (iph->version != 0x4)
3736 		goto reject;
3737 
3738 	tcph = (struct tcphdr *)(iph + 1);
3739 	skb_set_network_header(skb, (void *)iph - (void *)rss);
3740 	skb_set_transport_header(skb, (void *)tcph - (void *)rss);
3741 	skb_get(skb);
3742 
3743 	PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
3744 	     ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
3745 	     ntohs(tcph->source), iph->tos);
3746 
3747 	dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
3748 			 iph->tos);
3749 	if (!dst) {
3750 		pr_err("%s - failed to find dst entry!\n",
3751 		       __func__);
3752 		goto reject;
3753 	}
3754 	neigh = dst_neigh_lookup_skb(dst, skb);
3755 
3756 	if (!neigh) {
3757 		pr_err("%s - failed to allocate neigh!\n",
3758 		       __func__);
3759 		goto free_dst;
3760 	}
3761 
3762 	if (neigh->dev->flags & IFF_LOOPBACK) {
3763 		pdev = ip_dev_find(&init_net, iph->daddr);
3764 		e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
3765 				    pdev, 0);
3766 		pi = (struct port_info *)netdev_priv(pdev);
3767 		tx_chan = cxgb4_port_chan(pdev);
3768 		dev_put(pdev);
3769 	} else {
3770 		pdev = get_real_dev(neigh->dev);
3771 		e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
3772 					pdev, 0);
3773 		pi = (struct port_info *)netdev_priv(pdev);
3774 		tx_chan = cxgb4_port_chan(pdev);
3775 	}
3776 	neigh_release(neigh);
3777 	if (!e) {
3778 		pr_err("%s - failed to allocate l2t entry!\n",
3779 		       __func__);
3780 		goto free_dst;
3781 	}
3782 
3783 	step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
3784 	rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
3785 	window = (__force u16) htons((__force u16)tcph->window);
3786 
3787 	/* Calcuate filter portion for LE region. */
3788 	filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
3789 						    dev->rdev.lldi.ports[0],
3790 						    e));
3791 
3792 	/*
3793 	 * Synthesize the cpl_pass_accept_req. We have everything except the
3794 	 * TID. Once firmware sends a reply with TID we update the TID field
3795 	 * in cpl and pass it through the regular cpl_pass_accept_req path.
3796 	 */
3797 	build_cpl_pass_accept_req(skb, stid, iph->tos);
3798 	send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
3799 			      tcph->source, ntohl(tcph->seq), filter, window,
3800 			      rss_qid, pi->port_id);
3801 	cxgb4_l2t_release(e);
3802 free_dst:
3803 	dst_release(dst);
3804 reject:
3805 	return 0;
3806 }
3807 
3808 /*
3809  * These are the real handlers that are called from a
3810  * work queue.
3811  */
3812 static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
3813 	[CPL_ACT_ESTABLISH] = act_establish,
3814 	[CPL_ACT_OPEN_RPL] = act_open_rpl,
3815 	[CPL_RX_DATA] = rx_data,
3816 	[CPL_ABORT_RPL_RSS] = abort_rpl,
3817 	[CPL_ABORT_RPL] = abort_rpl,
3818 	[CPL_PASS_OPEN_RPL] = pass_open_rpl,
3819 	[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
3820 	[CPL_PASS_ACCEPT_REQ] = pass_accept_req,
3821 	[CPL_PASS_ESTABLISH] = pass_establish,
3822 	[CPL_PEER_CLOSE] = peer_close,
3823 	[CPL_ABORT_REQ_RSS] = peer_abort,
3824 	[CPL_CLOSE_CON_RPL] = close_con_rpl,
3825 	[CPL_RDMA_TERMINATE] = terminate,
3826 	[CPL_FW4_ACK] = fw4_ack,
3827 	[CPL_FW6_MSG] = deferred_fw6_msg,
3828 	[CPL_RX_PKT] = rx_pkt
3829 };
3830 
3831 static void process_timeout(struct c4iw_ep *ep)
3832 {
3833 	struct c4iw_qp_attributes attrs;
3834 	int abort = 1;
3835 
3836 	mutex_lock(&ep->com.mutex);
3837 	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
3838 	     ep->com.state);
3839 	set_bit(TIMEDOUT, &ep->com.history);
3840 	switch (ep->com.state) {
3841 	case MPA_REQ_SENT:
3842 		__state_set(&ep->com, ABORTING);
3843 		connect_reply_upcall(ep, -ETIMEDOUT);
3844 		break;
3845 	case MPA_REQ_WAIT:
3846 		__state_set(&ep->com, ABORTING);
3847 		break;
3848 	case CLOSING:
3849 	case MORIBUND:
3850 		if (ep->com.cm_id && ep->com.qp) {
3851 			attrs.next_state = C4IW_QP_STATE_ERROR;
3852 			c4iw_modify_qp(ep->com.qp->rhp,
3853 				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
3854 				     &attrs, 1);
3855 		}
3856 		__state_set(&ep->com, ABORTING);
3857 		close_complete_upcall(ep, -ETIMEDOUT);
3858 		break;
3859 	case ABORTING:
3860 	case DEAD:
3861 
3862 		/*
3863 		 * These states are expected if the ep timed out at the same
3864 		 * time as another thread was calling stop_ep_timer().
3865 		 * So we silently do nothing for these states.
3866 		 */
3867 		abort = 0;
3868 		break;
3869 	default:
3870 		WARN(1, "%s unexpected state ep %p tid %u state %u\n",
3871 			__func__, ep, ep->hwtid, ep->com.state);
3872 		abort = 0;
3873 	}
3874 	if (abort)
3875 		abort_connection(ep, NULL, GFP_KERNEL);
3876 	mutex_unlock(&ep->com.mutex);
3877 	c4iw_put_ep(&ep->com);
3878 }
3879 
3880 static void process_timedout_eps(void)
3881 {
3882 	struct c4iw_ep *ep;
3883 
3884 	spin_lock_irq(&timeout_lock);
3885 	while (!list_empty(&timeout_list)) {
3886 		struct list_head *tmp;
3887 
3888 		tmp = timeout_list.next;
3889 		list_del(tmp);
3890 		tmp->next = NULL;
3891 		tmp->prev = NULL;
3892 		spin_unlock_irq(&timeout_lock);
3893 		ep = list_entry(tmp, struct c4iw_ep, entry);
3894 		process_timeout(ep);
3895 		spin_lock_irq(&timeout_lock);
3896 	}
3897 	spin_unlock_irq(&timeout_lock);
3898 }
3899 
3900 static void process_work(struct work_struct *work)
3901 {
3902 	struct sk_buff *skb = NULL;
3903 	struct c4iw_dev *dev;
3904 	struct cpl_act_establish *rpl;
3905 	unsigned int opcode;
3906 	int ret;
3907 
3908 	process_timedout_eps();
3909 	while ((skb = skb_dequeue(&rxq))) {
3910 		rpl = cplhdr(skb);
3911 		dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3912 		opcode = rpl->ot.opcode;
3913 
3914 		BUG_ON(!work_handlers[opcode]);
3915 		ret = work_handlers[opcode](dev, skb);
3916 		if (!ret)
3917 			kfree_skb(skb);
3918 		process_timedout_eps();
3919 	}
3920 }
3921 
3922 static DECLARE_WORK(skb_work, process_work);
3923 
3924 static void ep_timeout(unsigned long arg)
3925 {
3926 	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
3927 	int kickit = 0;
3928 
3929 	spin_lock(&timeout_lock);
3930 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
3931 		/*
3932 		 * Only insert if it is not already on the list.
3933 		 */
3934 		if (!ep->entry.next) {
3935 			list_add_tail(&ep->entry, &timeout_list);
3936 			kickit = 1;
3937 		}
3938 	}
3939 	spin_unlock(&timeout_lock);
3940 	if (kickit)
3941 		queue_work(workq, &skb_work);
3942 }
3943 
3944 /*
3945  * All the CM events are handled on a work queue to have a safe context.
3946  */
3947 static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
3948 {
3949 
3950 	/*
3951 	 * Save dev in the skb->cb area.
3952 	 */
3953 	*((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
3954 
3955 	/*
3956 	 * Queue the skb and schedule the worker thread.
3957 	 */
3958 	skb_queue_tail(&rxq, skb);
3959 	queue_work(workq, &skb_work);
3960 	return 0;
3961 }
3962 
3963 static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3964 {
3965 	struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
3966 
3967 	if (rpl->status != CPL_ERR_NONE) {
3968 		printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
3969 		       "for tid %u\n", rpl->status, GET_TID(rpl));
3970 	}
3971 	kfree_skb(skb);
3972 	return 0;
3973 }
3974 
3975 static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3976 {
3977 	struct cpl_fw6_msg *rpl = cplhdr(skb);
3978 	struct c4iw_wr_wait *wr_waitp;
3979 	int ret;
3980 
3981 	PDBG("%s type %u\n", __func__, rpl->type);
3982 
3983 	switch (rpl->type) {
3984 	case FW6_TYPE_WR_RPL:
3985 		ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
3986 		wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
3987 		PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
3988 		if (wr_waitp)
3989 			c4iw_wake_up(wr_waitp, ret ? -ret : 0);
3990 		kfree_skb(skb);
3991 		break;
3992 	case FW6_TYPE_CQE:
3993 	case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3994 		sched(dev, skb);
3995 		break;
3996 	default:
3997 		printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
3998 		       rpl->type);
3999 		kfree_skb(skb);
4000 		break;
4001 	}
4002 	return 0;
4003 }
4004 
4005 static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4006 {
4007 	struct cpl_abort_req_rss *req = cplhdr(skb);
4008 	struct c4iw_ep *ep;
4009 	struct tid_info *t = dev->rdev.lldi.tids;
4010 	unsigned int tid = GET_TID(req);
4011 
4012 	ep = lookup_tid(t, tid);
4013 	if (!ep) {
4014 		printk(KERN_WARNING MOD
4015 		       "Abort on non-existent endpoint, tid %d\n", tid);
4016 		kfree_skb(skb);
4017 		return 0;
4018 	}
4019 	if (is_neg_adv(req->status)) {
4020 		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
4021 		     __func__, ep->hwtid, req->status,
4022 		     neg_adv_str(req->status));
4023 		ep->stats.abort_neg_adv++;
4024 		dev->rdev.stats.neg_adv++;
4025 		kfree_skb(skb);
4026 		return 0;
4027 	}
4028 	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
4029 	     ep->com.state);
4030 
4031 	/*
4032 	 * Wake up any threads in rdma_init() or rdma_fini().
4033 	 * However, if we are on MPAv2 and want to retry with MPAv1
4034 	 * then, don't wake up yet.
4035 	 */
4036 	if (mpa_rev == 2 && !ep->tried_with_mpa_v1) {
4037 		if (ep->com.state != MPA_REQ_SENT)
4038 			c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
4039 	} else
4040 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
4041 	sched(dev, skb);
4042 	return 0;
4043 }
4044 
4045 /*
4046  * Most upcalls from the T4 Core go to sched() to
4047  * schedule the processing on a work queue.
4048  */
4049 c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4050 	[CPL_ACT_ESTABLISH] = sched,
4051 	[CPL_ACT_OPEN_RPL] = sched,
4052 	[CPL_RX_DATA] = sched,
4053 	[CPL_ABORT_RPL_RSS] = sched,
4054 	[CPL_ABORT_RPL] = sched,
4055 	[CPL_PASS_OPEN_RPL] = sched,
4056 	[CPL_CLOSE_LISTSRV_RPL] = sched,
4057 	[CPL_PASS_ACCEPT_REQ] = sched,
4058 	[CPL_PASS_ESTABLISH] = sched,
4059 	[CPL_PEER_CLOSE] = sched,
4060 	[CPL_CLOSE_CON_RPL] = sched,
4061 	[CPL_ABORT_REQ_RSS] = peer_abort_intr,
4062 	[CPL_RDMA_TERMINATE] = sched,
4063 	[CPL_FW4_ACK] = sched,
4064 	[CPL_SET_TCB_RPL] = set_tcb_rpl,
4065 	[CPL_FW6_MSG] = fw6_msg,
4066 	[CPL_RX_PKT] = sched
4067 };
4068 
4069 int __init c4iw_cm_init(void)
4070 {
4071 	spin_lock_init(&timeout_lock);
4072 	skb_queue_head_init(&rxq);
4073 
4074 	workq = create_singlethread_workqueue("iw_cxgb4");
4075 	if (!workq)
4076 		return -ENOMEM;
4077 
4078 	return 0;
4079 }
4080 
4081 void c4iw_cm_term(void)
4082 {
4083 	WARN_ON(!list_empty(&timeout_list));
4084 	flush_workqueue(workq);
4085 	destroy_workqueue(workq);
4086 }
4087