xref: /openbmc/linux/drivers/scsi/cxgbi/libcxgbi.c (revision 81d67439)
1 /*
2  * libcxgbi.c: Chelsio common library for T3/T4 iSCSI driver.
3  *
4  * Copyright (c) 2010 Chelsio Communications, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation.
9  *
10  * Written by: Karen Xie (kxie@chelsio.com)
11  * Written by: Rakesh Ranjan (rranjan@chelsio.com)
12  */
13 
14 #define pr_fmt(fmt)	KBUILD_MODNAME ":%s: " fmt, __func__
15 
16 #include <linux/skbuff.h>
17 #include <linux/crypto.h>
18 #include <linux/scatterlist.h>
19 #include <linux/pci.h>
20 #include <scsi/scsi.h>
21 #include <scsi/scsi_cmnd.h>
22 #include <scsi/scsi_host.h>
23 #include <linux/if_vlan.h>
24 #include <linux/inet.h>
25 #include <net/dst.h>
26 #include <net/route.h>
27 #include <linux/inetdevice.h>	/* ip_dev_find */
28 #include <net/tcp.h>
29 
30 static unsigned int dbg_level;
31 
32 #include "libcxgbi.h"
33 
34 #define DRV_MODULE_NAME		"libcxgbi"
35 #define DRV_MODULE_DESC		"Chelsio iSCSI driver library"
36 #define DRV_MODULE_VERSION	"0.9.0"
37 #define DRV_MODULE_RELDATE	"Jun. 2010"
38 
39 MODULE_AUTHOR("Chelsio Communications, Inc.");
40 MODULE_DESCRIPTION(DRV_MODULE_DESC);
41 MODULE_VERSION(DRV_MODULE_VERSION);
42 MODULE_LICENSE("GPL");
43 
44 module_param(dbg_level, uint, 0644);
45 MODULE_PARM_DESC(dbg_level, "libiscsi debug level (default=0)");
46 
47 
48 /*
49  * cxgbi device management
50  * maintains a list of the cxgbi devices
51  */
52 static LIST_HEAD(cdev_list);
53 static DEFINE_MUTEX(cdev_mutex);
54 
55 int cxgbi_device_portmap_create(struct cxgbi_device *cdev, unsigned int base,
56 				unsigned int max_conn)
57 {
58 	struct cxgbi_ports_map *pmap = &cdev->pmap;
59 
60 	pmap->port_csk = cxgbi_alloc_big_mem(max_conn *
61 					     sizeof(struct cxgbi_sock *),
62 					     GFP_KERNEL);
63 	if (!pmap->port_csk) {
64 		pr_warn("cdev 0x%p, portmap OOM %u.\n", cdev, max_conn);
65 		return -ENOMEM;
66 	}
67 
68 	pmap->max_connect = max_conn;
69 	pmap->sport_base = base;
70 	spin_lock_init(&pmap->lock);
71 	return 0;
72 }
73 EXPORT_SYMBOL_GPL(cxgbi_device_portmap_create);
74 
75 void cxgbi_device_portmap_cleanup(struct cxgbi_device *cdev)
76 {
77 	struct cxgbi_ports_map *pmap = &cdev->pmap;
78 	struct cxgbi_sock *csk;
79 	int i;
80 
81 	for (i = 0; i < pmap->max_connect; i++) {
82 		if (pmap->port_csk[i]) {
83 			csk = pmap->port_csk[i];
84 			pmap->port_csk[i] = NULL;
85 			log_debug(1 << CXGBI_DBG_SOCK,
86 				"csk 0x%p, cdev 0x%p, offload down.\n",
87 				csk, cdev);
88 			spin_lock_bh(&csk->lock);
89 			cxgbi_sock_set_flag(csk, CTPF_OFFLOAD_DOWN);
90 			cxgbi_sock_closed(csk);
91 			spin_unlock_bh(&csk->lock);
92 			cxgbi_sock_put(csk);
93 		}
94 	}
95 }
96 EXPORT_SYMBOL_GPL(cxgbi_device_portmap_cleanup);
97 
98 static inline void cxgbi_device_destroy(struct cxgbi_device *cdev)
99 {
100 	log_debug(1 << CXGBI_DBG_DEV,
101 		"cdev 0x%p, p# %u.\n", cdev, cdev->nports);
102 	cxgbi_hbas_remove(cdev);
103 	cxgbi_device_portmap_cleanup(cdev);
104 	if (cdev->dev_ddp_cleanup)
105 		cdev->dev_ddp_cleanup(cdev);
106 	else
107 		cxgbi_ddp_cleanup(cdev);
108 	if (cdev->ddp)
109 		cxgbi_ddp_cleanup(cdev);
110 	if (cdev->pmap.max_connect)
111 		cxgbi_free_big_mem(cdev->pmap.port_csk);
112 	kfree(cdev);
113 }
114 
115 struct cxgbi_device *cxgbi_device_register(unsigned int extra,
116 					   unsigned int nports)
117 {
118 	struct cxgbi_device *cdev;
119 
120 	cdev = kzalloc(sizeof(*cdev) + extra + nports *
121 			(sizeof(struct cxgbi_hba *) +
122 			 sizeof(struct net_device *)),
123 			GFP_KERNEL);
124 	if (!cdev) {
125 		pr_warn("nport %d, OOM.\n", nports);
126 		return NULL;
127 	}
128 	cdev->ports = (struct net_device **)(cdev + 1);
129 	cdev->hbas = (struct cxgbi_hba **)(((char*)cdev->ports) + nports *
130 						sizeof(struct net_device *));
131 	if (extra)
132 		cdev->dd_data = ((char *)cdev->hbas) +
133 				nports * sizeof(struct cxgbi_hba *);
134 	spin_lock_init(&cdev->pmap.lock);
135 
136 	mutex_lock(&cdev_mutex);
137 	list_add_tail(&cdev->list_head, &cdev_list);
138 	mutex_unlock(&cdev_mutex);
139 
140 	log_debug(1 << CXGBI_DBG_DEV,
141 		"cdev 0x%p, p# %u.\n", cdev, nports);
142 	return cdev;
143 }
144 EXPORT_SYMBOL_GPL(cxgbi_device_register);
145 
146 void cxgbi_device_unregister(struct cxgbi_device *cdev)
147 {
148 	log_debug(1 << CXGBI_DBG_DEV,
149 		"cdev 0x%p, p# %u,%s.\n",
150 		cdev, cdev->nports, cdev->nports ? cdev->ports[0]->name : "");
151 	mutex_lock(&cdev_mutex);
152 	list_del(&cdev->list_head);
153 	mutex_unlock(&cdev_mutex);
154 	cxgbi_device_destroy(cdev);
155 }
156 EXPORT_SYMBOL_GPL(cxgbi_device_unregister);
157 
158 void cxgbi_device_unregister_all(unsigned int flag)
159 {
160 	struct cxgbi_device *cdev, *tmp;
161 
162 	mutex_lock(&cdev_mutex);
163 	list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
164 		if ((cdev->flags & flag) == flag) {
165 			log_debug(1 << CXGBI_DBG_DEV,
166 				"cdev 0x%p, p# %u,%s.\n",
167 				cdev, cdev->nports, cdev->nports ?
168 				 cdev->ports[0]->name : "");
169 			list_del(&cdev->list_head);
170 			cxgbi_device_destroy(cdev);
171 		}
172 	}
173 	mutex_unlock(&cdev_mutex);
174 }
175 EXPORT_SYMBOL_GPL(cxgbi_device_unregister_all);
176 
177 struct cxgbi_device *cxgbi_device_find_by_lldev(void *lldev)
178 {
179 	struct cxgbi_device *cdev, *tmp;
180 
181 	mutex_lock(&cdev_mutex);
182 	list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
183 		if (cdev->lldev == lldev) {
184 			mutex_unlock(&cdev_mutex);
185 			return cdev;
186 		}
187 	}
188 	mutex_unlock(&cdev_mutex);
189 	log_debug(1 << CXGBI_DBG_DEV,
190 		"lldev 0x%p, NO match found.\n", lldev);
191 	return NULL;
192 }
193 EXPORT_SYMBOL_GPL(cxgbi_device_find_by_lldev);
194 
195 static struct cxgbi_device *cxgbi_device_find_by_netdev(struct net_device *ndev,
196 							int *port)
197 {
198 	struct net_device *vdev = NULL;
199 	struct cxgbi_device *cdev, *tmp;
200 	int i;
201 
202 	if (ndev->priv_flags & IFF_802_1Q_VLAN) {
203 		vdev = ndev;
204 		ndev = vlan_dev_real_dev(ndev);
205 		log_debug(1 << CXGBI_DBG_DEV,
206 			"vlan dev %s -> %s.\n", vdev->name, ndev->name);
207 	}
208 
209 	mutex_lock(&cdev_mutex);
210 	list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
211 		for (i = 0; i < cdev->nports; i++) {
212 			if (ndev == cdev->ports[i]) {
213 				cdev->hbas[i]->vdev = vdev;
214 				mutex_unlock(&cdev_mutex);
215 				if (port)
216 					*port = i;
217 				return cdev;
218 			}
219 		}
220 	}
221 	mutex_unlock(&cdev_mutex);
222 	log_debug(1 << CXGBI_DBG_DEV,
223 		"ndev 0x%p, %s, NO match found.\n", ndev, ndev->name);
224 	return NULL;
225 }
226 
227 void cxgbi_hbas_remove(struct cxgbi_device *cdev)
228 {
229 	int i;
230 	struct cxgbi_hba *chba;
231 
232 	log_debug(1 << CXGBI_DBG_DEV,
233 		"cdev 0x%p, p#%u.\n", cdev, cdev->nports);
234 
235 	for (i = 0; i < cdev->nports; i++) {
236 		chba = cdev->hbas[i];
237 		if (chba) {
238 			cdev->hbas[i] = NULL;
239 			iscsi_host_remove(chba->shost);
240 			pci_dev_put(cdev->pdev);
241 			iscsi_host_free(chba->shost);
242 		}
243 	}
244 }
245 EXPORT_SYMBOL_GPL(cxgbi_hbas_remove);
246 
247 int cxgbi_hbas_add(struct cxgbi_device *cdev, unsigned int max_lun,
248 		unsigned int max_id, struct scsi_host_template *sht,
249 		struct scsi_transport_template *stt)
250 {
251 	struct cxgbi_hba *chba;
252 	struct Scsi_Host *shost;
253 	int i, err;
254 
255 	log_debug(1 << CXGBI_DBG_DEV, "cdev 0x%p, p#%u.\n", cdev, cdev->nports);
256 
257 	for (i = 0; i < cdev->nports; i++) {
258 		shost = iscsi_host_alloc(sht, sizeof(*chba), 1);
259 		if (!shost) {
260 			pr_info("0x%p, p%d, %s, host alloc failed.\n",
261 				cdev, i, cdev->ports[i]->name);
262 			err = -ENOMEM;
263 			goto err_out;
264 		}
265 
266 		shost->transportt = stt;
267 		shost->max_lun = max_lun;
268 		shost->max_id = max_id;
269 		shost->max_channel = 0;
270 		shost->max_cmd_len = 16;
271 
272 		chba = iscsi_host_priv(shost);
273 		chba->cdev = cdev;
274 		chba->ndev = cdev->ports[i];
275 		chba->shost = shost;
276 
277 		log_debug(1 << CXGBI_DBG_DEV,
278 			"cdev 0x%p, p#%d %s: chba 0x%p.\n",
279 			cdev, i, cdev->ports[i]->name, chba);
280 
281 		pci_dev_get(cdev->pdev);
282 		err = iscsi_host_add(shost, &cdev->pdev->dev);
283 		if (err) {
284 			pr_info("cdev 0x%p, p#%d %s, host add failed.\n",
285 				cdev, i, cdev->ports[i]->name);
286 			pci_dev_put(cdev->pdev);
287 			scsi_host_put(shost);
288 			goto  err_out;
289 		}
290 
291 		cdev->hbas[i] = chba;
292 	}
293 
294 	return 0;
295 
296 err_out:
297 	cxgbi_hbas_remove(cdev);
298 	return err;
299 }
300 EXPORT_SYMBOL_GPL(cxgbi_hbas_add);
301 
302 /*
303  * iSCSI offload
304  *
305  * - source port management
306  *   To find a free source port in the port allocation map we use a very simple
307  *   rotor scheme to look for the next free port.
308  *
309  *   If a source port has been specified make sure that it doesn't collide with
310  *   our normal source port allocation map.  If it's outside the range of our
311  *   allocation/deallocation scheme just let them use it.
312  *
313  *   If the source port is outside our allocation range, the caller is
314  *   responsible for keeping track of their port usage.
315  */
316 static int sock_get_port(struct cxgbi_sock *csk)
317 {
318 	struct cxgbi_device *cdev = csk->cdev;
319 	struct cxgbi_ports_map *pmap = &cdev->pmap;
320 	unsigned int start;
321 	int idx;
322 
323 	if (!pmap->max_connect) {
324 		pr_err("cdev 0x%p, p#%u %s, NO port map.\n",
325 			   cdev, csk->port_id, cdev->ports[csk->port_id]->name);
326 		return -EADDRNOTAVAIL;
327 	}
328 
329 	if (csk->saddr.sin_port) {
330 		pr_err("source port NON-ZERO %u.\n",
331 			ntohs(csk->saddr.sin_port));
332 		return -EADDRINUSE;
333 	}
334 
335 	spin_lock_bh(&pmap->lock);
336 	if (pmap->used >= pmap->max_connect) {
337 		spin_unlock_bh(&pmap->lock);
338 		pr_info("cdev 0x%p, p#%u %s, ALL ports used.\n",
339 			cdev, csk->port_id, cdev->ports[csk->port_id]->name);
340 		return -EADDRNOTAVAIL;
341 	}
342 
343 	start = idx = pmap->next;
344 	do {
345 		if (++idx >= pmap->max_connect)
346 			idx = 0;
347 		if (!pmap->port_csk[idx]) {
348 			pmap->used++;
349 			csk->saddr.sin_port =
350 				htons(pmap->sport_base + idx);
351 			pmap->next = idx;
352 			pmap->port_csk[idx] = csk;
353 			spin_unlock_bh(&pmap->lock);
354 			cxgbi_sock_get(csk);
355 			log_debug(1 << CXGBI_DBG_SOCK,
356 				"cdev 0x%p, p#%u %s, p %u, %u.\n",
357 				cdev, csk->port_id,
358 				cdev->ports[csk->port_id]->name,
359 				pmap->sport_base + idx, pmap->next);
360 			return 0;
361 		}
362 	} while (idx != start);
363 	spin_unlock_bh(&pmap->lock);
364 
365 	/* should not happen */
366 	pr_warn("cdev 0x%p, p#%u %s, next %u?\n",
367 		cdev, csk->port_id, cdev->ports[csk->port_id]->name,
368 		pmap->next);
369 	return -EADDRNOTAVAIL;
370 }
371 
372 static void sock_put_port(struct cxgbi_sock *csk)
373 {
374 	struct cxgbi_device *cdev = csk->cdev;
375 	struct cxgbi_ports_map *pmap = &cdev->pmap;
376 
377 	if (csk->saddr.sin_port) {
378 		int idx = ntohs(csk->saddr.sin_port) - pmap->sport_base;
379 
380 		csk->saddr.sin_port = 0;
381 		if (idx < 0 || idx >= pmap->max_connect) {
382 			pr_err("cdev 0x%p, p#%u %s, port %u OOR.\n",
383 				cdev, csk->port_id,
384 				cdev->ports[csk->port_id]->name,
385 				ntohs(csk->saddr.sin_port));
386 			return;
387 		}
388 
389 		spin_lock_bh(&pmap->lock);
390 		pmap->port_csk[idx] = NULL;
391 		pmap->used--;
392 		spin_unlock_bh(&pmap->lock);
393 
394 		log_debug(1 << CXGBI_DBG_SOCK,
395 			"cdev 0x%p, p#%u %s, release %u.\n",
396 			cdev, csk->port_id, cdev->ports[csk->port_id]->name,
397 			pmap->sport_base + idx);
398 
399 		cxgbi_sock_put(csk);
400 	}
401 }
402 
403 /*
404  * iscsi tcp connection
405  */
406 void cxgbi_sock_free_cpl_skbs(struct cxgbi_sock *csk)
407 {
408 	if (csk->cpl_close) {
409 		kfree_skb(csk->cpl_close);
410 		csk->cpl_close = NULL;
411 	}
412 	if (csk->cpl_abort_req) {
413 		kfree_skb(csk->cpl_abort_req);
414 		csk->cpl_abort_req = NULL;
415 	}
416 	if (csk->cpl_abort_rpl) {
417 		kfree_skb(csk->cpl_abort_rpl);
418 		csk->cpl_abort_rpl = NULL;
419 	}
420 }
421 EXPORT_SYMBOL_GPL(cxgbi_sock_free_cpl_skbs);
422 
423 static struct cxgbi_sock *cxgbi_sock_create(struct cxgbi_device *cdev)
424 {
425 	struct cxgbi_sock *csk = kzalloc(sizeof(*csk), GFP_NOIO);
426 
427 	if (!csk) {
428 		pr_info("alloc csk %zu failed.\n", sizeof(*csk));
429 		return NULL;
430 	}
431 
432 	if (cdev->csk_alloc_cpls(csk) < 0) {
433 		pr_info("csk 0x%p, alloc cpls failed.\n", csk);
434 		kfree(csk);
435 		return NULL;
436 	}
437 
438 	spin_lock_init(&csk->lock);
439 	kref_init(&csk->refcnt);
440 	skb_queue_head_init(&csk->receive_queue);
441 	skb_queue_head_init(&csk->write_queue);
442 	setup_timer(&csk->retry_timer, NULL, (unsigned long)csk);
443 	rwlock_init(&csk->callback_lock);
444 	csk->cdev = cdev;
445 	csk->flags = 0;
446 	cxgbi_sock_set_state(csk, CTP_CLOSED);
447 
448 	log_debug(1 << CXGBI_DBG_SOCK, "cdev 0x%p, new csk 0x%p.\n", cdev, csk);
449 
450 	return csk;
451 }
452 
453 static struct rtable *find_route_ipv4(struct flowi4 *fl4,
454 				      __be32 saddr, __be32 daddr,
455 				      __be16 sport, __be16 dport, u8 tos)
456 {
457 	struct rtable *rt;
458 
459 	rt = ip_route_output_ports(&init_net, fl4, NULL, daddr, saddr,
460 				   dport, sport, IPPROTO_TCP, tos, 0);
461 	if (IS_ERR(rt))
462 		return NULL;
463 
464 	return rt;
465 }
466 
467 static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
468 {
469 	struct sockaddr_in *daddr = (struct sockaddr_in *)dst_addr;
470 	struct dst_entry *dst;
471 	struct net_device *ndev;
472 	struct cxgbi_device *cdev;
473 	struct rtable *rt = NULL;
474 	struct flowi4 fl4;
475 	struct cxgbi_sock *csk = NULL;
476 	unsigned int mtu = 0;
477 	int port = 0xFFFF;
478 	int err = 0;
479 
480 	if (daddr->sin_family != AF_INET) {
481 		pr_info("address family 0x%x NOT supported.\n",
482 			daddr->sin_family);
483 		err = -EAFNOSUPPORT;
484 		goto err_out;
485 	}
486 
487 	rt = find_route_ipv4(&fl4, 0, daddr->sin_addr.s_addr, 0, daddr->sin_port, 0);
488 	if (!rt) {
489 		pr_info("no route to ipv4 0x%x, port %u.\n",
490 			daddr->sin_addr.s_addr, daddr->sin_port);
491 		err = -ENETUNREACH;
492 		goto err_out;
493 	}
494 	dst = &rt->dst;
495 	ndev = dst_get_neighbour(dst)->dev;
496 
497 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
498 		pr_info("multi-cast route %pI4, port %u, dev %s.\n",
499 			&daddr->sin_addr.s_addr, ntohs(daddr->sin_port),
500 			ndev->name);
501 		err = -ENETUNREACH;
502 		goto rel_rt;
503 	}
504 
505 	if (ndev->flags & IFF_LOOPBACK) {
506 		ndev = ip_dev_find(&init_net, daddr->sin_addr.s_addr);
507 		mtu = ndev->mtu;
508 		pr_info("rt dev %s, loopback -> %s, mtu %u.\n",
509 			dst_get_neighbour(dst)->dev->name, ndev->name, mtu);
510 	}
511 
512 	cdev = cxgbi_device_find_by_netdev(ndev, &port);
513 	if (!cdev) {
514 		pr_info("dst %pI4, %s, NOT cxgbi device.\n",
515 			&daddr->sin_addr.s_addr, ndev->name);
516 		err = -ENETUNREACH;
517 		goto rel_rt;
518 	}
519 	log_debug(1 << CXGBI_DBG_SOCK,
520 		"route to %pI4 :%u, ndev p#%d,%s, cdev 0x%p.\n",
521 		&daddr->sin_addr.s_addr, ntohs(daddr->sin_port),
522 			   port, ndev->name, cdev);
523 
524 	csk = cxgbi_sock_create(cdev);
525 	if (!csk) {
526 		err = -ENOMEM;
527 		goto rel_rt;
528 	}
529 	csk->cdev = cdev;
530 	csk->port_id = port;
531 	csk->mtu = mtu;
532 	csk->dst = dst;
533 	csk->daddr.sin_addr.s_addr = daddr->sin_addr.s_addr;
534 	csk->daddr.sin_port = daddr->sin_port;
535 	csk->daddr.sin_family = daddr->sin_family;
536 	csk->saddr.sin_addr.s_addr = fl4.saddr;
537 
538 	return csk;
539 
540 rel_rt:
541 	ip_rt_put(rt);
542 	if (csk)
543 		cxgbi_sock_closed(csk);
544 err_out:
545 	return ERR_PTR(err);
546 }
547 
548 void cxgbi_sock_established(struct cxgbi_sock *csk, unsigned int snd_isn,
549 			unsigned int opt)
550 {
551 	csk->write_seq = csk->snd_nxt = csk->snd_una = snd_isn;
552 	dst_confirm(csk->dst);
553 	smp_mb();
554 	cxgbi_sock_set_state(csk, CTP_ESTABLISHED);
555 }
556 EXPORT_SYMBOL_GPL(cxgbi_sock_established);
557 
558 static void cxgbi_inform_iscsi_conn_closing(struct cxgbi_sock *csk)
559 {
560 	log_debug(1 << CXGBI_DBG_SOCK,
561 		"csk 0x%p, state %u, flags 0x%lx, conn 0x%p.\n",
562 		csk, csk->state, csk->flags, csk->user_data);
563 
564 	if (csk->state != CTP_ESTABLISHED) {
565 		read_lock_bh(&csk->callback_lock);
566 		if (csk->user_data)
567 			iscsi_conn_failure(csk->user_data,
568 					ISCSI_ERR_CONN_FAILED);
569 		read_unlock_bh(&csk->callback_lock);
570 	}
571 }
572 
573 void cxgbi_sock_closed(struct cxgbi_sock *csk)
574 {
575 	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
576 		csk, (csk)->state, (csk)->flags, (csk)->tid);
577 	cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED);
578 	if (csk->state == CTP_ACTIVE_OPEN || csk->state == CTP_CLOSED)
579 		return;
580 	if (csk->saddr.sin_port)
581 		sock_put_port(csk);
582 	if (csk->dst)
583 		dst_release(csk->dst);
584 	csk->cdev->csk_release_offload_resources(csk);
585 	cxgbi_sock_set_state(csk, CTP_CLOSED);
586 	cxgbi_inform_iscsi_conn_closing(csk);
587 	cxgbi_sock_put(csk);
588 }
589 EXPORT_SYMBOL_GPL(cxgbi_sock_closed);
590 
591 static void need_active_close(struct cxgbi_sock *csk)
592 {
593 	int data_lost;
594 	int close_req = 0;
595 
596 	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
597 		csk, (csk)->state, (csk)->flags, (csk)->tid);
598 	spin_lock_bh(&csk->lock);
599 	dst_confirm(csk->dst);
600 	data_lost = skb_queue_len(&csk->receive_queue);
601 	__skb_queue_purge(&csk->receive_queue);
602 
603 	if (csk->state == CTP_ACTIVE_OPEN)
604 		cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED);
605 	else if (csk->state == CTP_ESTABLISHED) {
606 		close_req = 1;
607 		cxgbi_sock_set_state(csk, CTP_ACTIVE_CLOSE);
608 	} else if (csk->state == CTP_PASSIVE_CLOSE) {
609 		close_req = 1;
610 		cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2);
611 	}
612 
613 	if (close_req) {
614 		if (data_lost)
615 			csk->cdev->csk_send_abort_req(csk);
616 		else
617 			csk->cdev->csk_send_close_req(csk);
618 	}
619 
620 	spin_unlock_bh(&csk->lock);
621 }
622 
623 void cxgbi_sock_fail_act_open(struct cxgbi_sock *csk, int errno)
624 {
625 	pr_info("csk 0x%p,%u,%lx, %pI4:%u-%pI4:%u, err %d.\n",
626 			csk, csk->state, csk->flags,
627 			&csk->saddr.sin_addr.s_addr, csk->saddr.sin_port,
628 			&csk->daddr.sin_addr.s_addr, csk->daddr.sin_port,
629 			errno);
630 
631 	cxgbi_sock_set_state(csk, CTP_CONNECTING);
632 	csk->err = errno;
633 	cxgbi_sock_closed(csk);
634 }
635 EXPORT_SYMBOL_GPL(cxgbi_sock_fail_act_open);
636 
637 void cxgbi_sock_act_open_req_arp_failure(void *handle, struct sk_buff *skb)
638 {
639 	struct cxgbi_sock *csk = (struct cxgbi_sock *)skb->sk;
640 
641 	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
642 		csk, (csk)->state, (csk)->flags, (csk)->tid);
643 	cxgbi_sock_get(csk);
644 	spin_lock_bh(&csk->lock);
645 	if (csk->state == CTP_ACTIVE_OPEN)
646 		cxgbi_sock_fail_act_open(csk, -EHOSTUNREACH);
647 	spin_unlock_bh(&csk->lock);
648 	cxgbi_sock_put(csk);
649 	__kfree_skb(skb);
650 }
651 EXPORT_SYMBOL_GPL(cxgbi_sock_act_open_req_arp_failure);
652 
653 void cxgbi_sock_rcv_abort_rpl(struct cxgbi_sock *csk)
654 {
655 	cxgbi_sock_get(csk);
656 	spin_lock_bh(&csk->lock);
657 	if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING)) {
658 		if (!cxgbi_sock_flag(csk, CTPF_ABORT_RPL_RCVD))
659 			cxgbi_sock_set_flag(csk, CTPF_ABORT_RPL_RCVD);
660 		else {
661 			cxgbi_sock_clear_flag(csk, CTPF_ABORT_RPL_RCVD);
662 			cxgbi_sock_clear_flag(csk, CTPF_ABORT_RPL_PENDING);
663 			if (cxgbi_sock_flag(csk, CTPF_ABORT_REQ_RCVD))
664 				pr_err("csk 0x%p,%u,0x%lx,%u,ABT_RPL_RSS.\n",
665 					csk, csk->state, csk->flags, csk->tid);
666 			cxgbi_sock_closed(csk);
667 		}
668 	}
669 	spin_unlock_bh(&csk->lock);
670 	cxgbi_sock_put(csk);
671 }
672 EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_abort_rpl);
673 
674 void cxgbi_sock_rcv_peer_close(struct cxgbi_sock *csk)
675 {
676 	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
677 		csk, (csk)->state, (csk)->flags, (csk)->tid);
678 	cxgbi_sock_get(csk);
679 	spin_lock_bh(&csk->lock);
680 
681 	if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING))
682 		goto done;
683 
684 	switch (csk->state) {
685 	case CTP_ESTABLISHED:
686 		cxgbi_sock_set_state(csk, CTP_PASSIVE_CLOSE);
687 		break;
688 	case CTP_ACTIVE_CLOSE:
689 		cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2);
690 		break;
691 	case CTP_CLOSE_WAIT_1:
692 		cxgbi_sock_closed(csk);
693 		break;
694 	case CTP_ABORTING:
695 		break;
696 	default:
697 		pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n",
698 			csk, csk->state, csk->flags, csk->tid);
699 	}
700 	cxgbi_inform_iscsi_conn_closing(csk);
701 done:
702 	spin_unlock_bh(&csk->lock);
703 	cxgbi_sock_put(csk);
704 }
705 EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_peer_close);
706 
707 void cxgbi_sock_rcv_close_conn_rpl(struct cxgbi_sock *csk, u32 snd_nxt)
708 {
709 	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
710 		csk, (csk)->state, (csk)->flags, (csk)->tid);
711 	cxgbi_sock_get(csk);
712 	spin_lock_bh(&csk->lock);
713 
714 	csk->snd_una = snd_nxt - 1;
715 	if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING))
716 		goto done;
717 
718 	switch (csk->state) {
719 	case CTP_ACTIVE_CLOSE:
720 		cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_1);
721 		break;
722 	case CTP_CLOSE_WAIT_1:
723 	case CTP_CLOSE_WAIT_2:
724 		cxgbi_sock_closed(csk);
725 		break;
726 	case CTP_ABORTING:
727 		break;
728 	default:
729 		pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n",
730 			csk, csk->state, csk->flags, csk->tid);
731 	}
732 done:
733 	spin_unlock_bh(&csk->lock);
734 	cxgbi_sock_put(csk);
735 }
736 EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_close_conn_rpl);
737 
738 void cxgbi_sock_rcv_wr_ack(struct cxgbi_sock *csk, unsigned int credits,
739 			   unsigned int snd_una, int seq_chk)
740 {
741 	log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK,
742 			"csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, snd_una %u,%d.\n",
743 			csk, csk->state, csk->flags, csk->tid, credits,
744 			csk->wr_cred, csk->wr_una_cred, snd_una, seq_chk);
745 
746 	spin_lock_bh(&csk->lock);
747 
748 	csk->wr_cred += credits;
749 	if (csk->wr_una_cred > csk->wr_max_cred - csk->wr_cred)
750 		csk->wr_una_cred = csk->wr_max_cred - csk->wr_cred;
751 
752 	while (credits) {
753 		struct sk_buff *p = cxgbi_sock_peek_wr(csk);
754 
755 		if (unlikely(!p)) {
756 			pr_err("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, empty.\n",
757 				csk, csk->state, csk->flags, csk->tid, credits,
758 				csk->wr_cred, csk->wr_una_cred);
759 			break;
760 		}
761 
762 		if (unlikely(credits < p->csum)) {
763 			pr_warn("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, < %u.\n",
764 				csk, csk->state, csk->flags, csk->tid,
765 				credits, csk->wr_cred, csk->wr_una_cred,
766 				p->csum);
767 			p->csum -= credits;
768 			break;
769 		} else {
770 			cxgbi_sock_dequeue_wr(csk);
771 			credits -= p->csum;
772 			kfree_skb(p);
773 		}
774 	}
775 
776 	cxgbi_sock_check_wr_invariants(csk);
777 
778 	if (seq_chk) {
779 		if (unlikely(before(snd_una, csk->snd_una))) {
780 			pr_warn("csk 0x%p,%u,0x%lx,%u, snd_una %u/%u.",
781 				csk, csk->state, csk->flags, csk->tid, snd_una,
782 				csk->snd_una);
783 			goto done;
784 		}
785 
786 		if (csk->snd_una != snd_una) {
787 			csk->snd_una = snd_una;
788 			dst_confirm(csk->dst);
789 		}
790 	}
791 
792 	if (skb_queue_len(&csk->write_queue)) {
793 		if (csk->cdev->csk_push_tx_frames(csk, 0))
794 			cxgbi_conn_tx_open(csk);
795 	} else
796 		cxgbi_conn_tx_open(csk);
797 done:
798 	spin_unlock_bh(&csk->lock);
799 }
800 EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_wr_ack);
801 
802 static unsigned int cxgbi_sock_find_best_mtu(struct cxgbi_sock *csk,
803 					     unsigned short mtu)
804 {
805 	int i = 0;
806 
807 	while (i < csk->cdev->nmtus - 1 && csk->cdev->mtus[i + 1] <= mtu)
808 		++i;
809 
810 	return i;
811 }
812 
813 unsigned int cxgbi_sock_select_mss(struct cxgbi_sock *csk, unsigned int pmtu)
814 {
815 	unsigned int idx;
816 	struct dst_entry *dst = csk->dst;
817 
818 	csk->advmss = dst_metric_advmss(dst);
819 
820 	if (csk->advmss > pmtu - 40)
821 		csk->advmss = pmtu - 40;
822 	if (csk->advmss < csk->cdev->mtus[0] - 40)
823 		csk->advmss = csk->cdev->mtus[0] - 40;
824 	idx = cxgbi_sock_find_best_mtu(csk, csk->advmss + 40);
825 
826 	return idx;
827 }
828 EXPORT_SYMBOL_GPL(cxgbi_sock_select_mss);
829 
830 void cxgbi_sock_skb_entail(struct cxgbi_sock *csk, struct sk_buff *skb)
831 {
832 	cxgbi_skcb_tcp_seq(skb) = csk->write_seq;
833 	__skb_queue_tail(&csk->write_queue, skb);
834 }
835 EXPORT_SYMBOL_GPL(cxgbi_sock_skb_entail);
836 
837 void cxgbi_sock_purge_wr_queue(struct cxgbi_sock *csk)
838 {
839 	struct sk_buff *skb;
840 
841 	while ((skb = cxgbi_sock_dequeue_wr(csk)) != NULL)
842 		kfree_skb(skb);
843 }
844 EXPORT_SYMBOL_GPL(cxgbi_sock_purge_wr_queue);
845 
846 void cxgbi_sock_check_wr_invariants(const struct cxgbi_sock *csk)
847 {
848 	int pending = cxgbi_sock_count_pending_wrs(csk);
849 
850 	if (unlikely(csk->wr_cred + pending != csk->wr_max_cred))
851 		pr_err("csk 0x%p, tid %u, credit %u + %u != %u.\n",
852 			csk, csk->tid, csk->wr_cred, pending, csk->wr_max_cred);
853 }
854 EXPORT_SYMBOL_GPL(cxgbi_sock_check_wr_invariants);
855 
856 static int cxgbi_sock_send_pdus(struct cxgbi_sock *csk, struct sk_buff *skb)
857 {
858 	struct cxgbi_device *cdev = csk->cdev;
859 	struct sk_buff *next;
860 	int err, copied = 0;
861 
862 	spin_lock_bh(&csk->lock);
863 
864 	if (csk->state != CTP_ESTABLISHED) {
865 		log_debug(1 << CXGBI_DBG_PDU_TX,
866 			"csk 0x%p,%u,0x%lx,%u, EAGAIN.\n",
867 			csk, csk->state, csk->flags, csk->tid);
868 		err = -EAGAIN;
869 		goto out_err;
870 	}
871 
872 	if (csk->err) {
873 		log_debug(1 << CXGBI_DBG_PDU_TX,
874 			"csk 0x%p,%u,0x%lx,%u, EPIPE %d.\n",
875 			csk, csk->state, csk->flags, csk->tid, csk->err);
876 		err = -EPIPE;
877 		goto out_err;
878 	}
879 
880 	if (csk->write_seq - csk->snd_una >= cdev->snd_win) {
881 		log_debug(1 << CXGBI_DBG_PDU_TX,
882 			"csk 0x%p,%u,0x%lx,%u, FULL %u-%u >= %u.\n",
883 			csk, csk->state, csk->flags, csk->tid, csk->write_seq,
884 			csk->snd_una, cdev->snd_win);
885 		err = -ENOBUFS;
886 		goto out_err;
887 	}
888 
889 	while (skb) {
890 		int frags = skb_shinfo(skb)->nr_frags +
891 				(skb->len != skb->data_len);
892 
893 		if (unlikely(skb_headroom(skb) < cdev->skb_tx_rsvd)) {
894 			pr_err("csk 0x%p, skb head %u < %u.\n",
895 				csk, skb_headroom(skb), cdev->skb_tx_rsvd);
896 			err = -EINVAL;
897 			goto out_err;
898 		}
899 
900 		if (frags >= SKB_WR_LIST_SIZE) {
901 			pr_err("csk 0x%p, frags %d, %u,%u >%u.\n",
902 				csk, skb_shinfo(skb)->nr_frags, skb->len,
903 				skb->data_len, (uint)(SKB_WR_LIST_SIZE));
904 			err = -EINVAL;
905 			goto out_err;
906 		}
907 
908 		next = skb->next;
909 		skb->next = NULL;
910 		cxgbi_skcb_set_flag(skb, SKCBF_TX_NEED_HDR);
911 		cxgbi_sock_skb_entail(csk, skb);
912 		copied += skb->len;
913 		csk->write_seq += skb->len +
914 				cxgbi_ulp_extra_len(cxgbi_skcb_ulp_mode(skb));
915 		skb = next;
916 	}
917 done:
918 	if (likely(skb_queue_len(&csk->write_queue)))
919 		cdev->csk_push_tx_frames(csk, 1);
920 	spin_unlock_bh(&csk->lock);
921 	return copied;
922 
923 out_err:
924 	if (copied == 0 && err == -EPIPE)
925 		copied = csk->err ? csk->err : -EPIPE;
926 	else
927 		copied = err;
928 	goto done;
929 }
930 
931 /*
932  * Direct Data Placement -
933  * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted
934  * final destination host-memory buffers based on the Initiator Task Tag (ITT)
935  * in Data-In or Target Task Tag (TTT) in Data-Out PDUs.
936  * The host memory address is programmed into h/w in the format of pagepod
937  * entries.
938  * The location of the pagepod entry is encoded into ddp tag which is used as
939  * the base for ITT/TTT.
940  */
941 
942 static unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4};
943 static unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16};
944 static unsigned char page_idx = DDP_PGIDX_MAX;
945 
946 static unsigned char sw_tag_idx_bits;
947 static unsigned char sw_tag_age_bits;
948 
949 /*
950  * Direct-Data Placement page size adjustment
951  */
952 static int ddp_adjust_page_table(void)
953 {
954 	int i;
955 	unsigned int base_order, order;
956 
957 	if (PAGE_SIZE < (1UL << ddp_page_shift[0])) {
958 		pr_info("PAGE_SIZE 0x%lx too small, min 0x%lx\n",
959 			PAGE_SIZE, 1UL << ddp_page_shift[0]);
960 		return -EINVAL;
961 	}
962 
963 	base_order = get_order(1UL << ddp_page_shift[0]);
964 	order = get_order(1UL << PAGE_SHIFT);
965 
966 	for (i = 0; i < DDP_PGIDX_MAX; i++) {
967 		/* first is the kernel page size, then just doubling */
968 		ddp_page_order[i] = order - base_order + i;
969 		ddp_page_shift[i] = PAGE_SHIFT + i;
970 	}
971 	return 0;
972 }
973 
974 static int ddp_find_page_index(unsigned long pgsz)
975 {
976 	int i;
977 
978 	for (i = 0; i < DDP_PGIDX_MAX; i++) {
979 		if (pgsz == (1UL << ddp_page_shift[i]))
980 			return i;
981 	}
982 	pr_info("ddp page size %lu not supported.\n", pgsz);
983 	return DDP_PGIDX_MAX;
984 }
985 
986 static void ddp_setup_host_page_size(void)
987 {
988 	if (page_idx == DDP_PGIDX_MAX) {
989 		page_idx = ddp_find_page_index(PAGE_SIZE);
990 
991 		if (page_idx == DDP_PGIDX_MAX) {
992 			pr_info("system PAGE %lu, update hw.\n", PAGE_SIZE);
993 			if (ddp_adjust_page_table() < 0) {
994 				pr_info("PAGE %lu, disable ddp.\n", PAGE_SIZE);
995 				return;
996 			}
997 			page_idx = ddp_find_page_index(PAGE_SIZE);
998 		}
999 		pr_info("system PAGE %lu, ddp idx %u.\n", PAGE_SIZE, page_idx);
1000 	}
1001 }
1002 
1003 void cxgbi_ddp_page_size_factor(int *pgsz_factor)
1004 {
1005 	int i;
1006 
1007 	for (i = 0; i < DDP_PGIDX_MAX; i++)
1008 		pgsz_factor[i] = ddp_page_order[i];
1009 }
1010 EXPORT_SYMBOL_GPL(cxgbi_ddp_page_size_factor);
1011 
1012 /*
1013  * DDP setup & teardown
1014  */
1015 
1016 void cxgbi_ddp_ppod_set(struct cxgbi_pagepod *ppod,
1017 			struct cxgbi_pagepod_hdr *hdr,
1018 			struct cxgbi_gather_list *gl, unsigned int gidx)
1019 {
1020 	int i;
1021 
1022 	memcpy(ppod, hdr, sizeof(*hdr));
1023 	for (i = 0; i < (PPOD_PAGES_MAX + 1); i++, gidx++) {
1024 		ppod->addr[i] = gidx < gl->nelem ?
1025 				cpu_to_be64(gl->phys_addr[gidx]) : 0ULL;
1026 	}
1027 }
1028 EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_set);
1029 
1030 void cxgbi_ddp_ppod_clear(struct cxgbi_pagepod *ppod)
1031 {
1032 	memset(ppod, 0, sizeof(*ppod));
1033 }
1034 EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_clear);
1035 
1036 static inline int ddp_find_unused_entries(struct cxgbi_ddp_info *ddp,
1037 					unsigned int start, unsigned int max,
1038 					unsigned int count,
1039 					struct cxgbi_gather_list *gl)
1040 {
1041 	unsigned int i, j, k;
1042 
1043 	/*  not enough entries */
1044 	if ((max - start) < count) {
1045 		log_debug(1 << CXGBI_DBG_DDP,
1046 			"NOT enough entries %u+%u < %u.\n", start, count, max);
1047 		return -EBUSY;
1048 	}
1049 
1050 	max -= count;
1051 	spin_lock(&ddp->map_lock);
1052 	for (i = start; i < max;) {
1053 		for (j = 0, k = i; j < count; j++, k++) {
1054 			if (ddp->gl_map[k])
1055 				break;
1056 		}
1057 		if (j == count) {
1058 			for (j = 0, k = i; j < count; j++, k++)
1059 				ddp->gl_map[k] = gl;
1060 			spin_unlock(&ddp->map_lock);
1061 			return i;
1062 		}
1063 		i += j + 1;
1064 	}
1065 	spin_unlock(&ddp->map_lock);
1066 	log_debug(1 << CXGBI_DBG_DDP,
1067 		"NO suitable entries %u available.\n", count);
1068 	return -EBUSY;
1069 }
1070 
1071 static inline void ddp_unmark_entries(struct cxgbi_ddp_info *ddp,
1072 						int start, int count)
1073 {
1074 	spin_lock(&ddp->map_lock);
1075 	memset(&ddp->gl_map[start], 0,
1076 		count * sizeof(struct cxgbi_gather_list *));
1077 	spin_unlock(&ddp->map_lock);
1078 }
1079 
1080 static inline void ddp_gl_unmap(struct pci_dev *pdev,
1081 					struct cxgbi_gather_list *gl)
1082 {
1083 	int i;
1084 
1085 	for (i = 0; i < gl->nelem; i++)
1086 		dma_unmap_page(&pdev->dev, gl->phys_addr[i], PAGE_SIZE,
1087 				PCI_DMA_FROMDEVICE);
1088 }
1089 
1090 static inline int ddp_gl_map(struct pci_dev *pdev,
1091 				    struct cxgbi_gather_list *gl)
1092 {
1093 	int i;
1094 
1095 	for (i = 0; i < gl->nelem; i++) {
1096 		gl->phys_addr[i] = dma_map_page(&pdev->dev, gl->pages[i], 0,
1097 						PAGE_SIZE,
1098 						PCI_DMA_FROMDEVICE);
1099 		if (unlikely(dma_mapping_error(&pdev->dev, gl->phys_addr[i]))) {
1100 			log_debug(1 << CXGBI_DBG_DDP,
1101 				"page %d 0x%p, 0x%p dma mapping err.\n",
1102 				i, gl->pages[i], pdev);
1103 			goto unmap;
1104 		}
1105 	}
1106 	return i;
1107 unmap:
1108 	if (i) {
1109 		unsigned int nelem = gl->nelem;
1110 
1111 		gl->nelem = i;
1112 		ddp_gl_unmap(pdev, gl);
1113 		gl->nelem = nelem;
1114 	}
1115 	return -EINVAL;
1116 }
1117 
1118 static void ddp_release_gl(struct cxgbi_gather_list *gl,
1119 				  struct pci_dev *pdev)
1120 {
1121 	ddp_gl_unmap(pdev, gl);
1122 	kfree(gl);
1123 }
1124 
1125 static struct cxgbi_gather_list *ddp_make_gl(unsigned int xferlen,
1126 						    struct scatterlist *sgl,
1127 						    unsigned int sgcnt,
1128 						    struct pci_dev *pdev,
1129 						    gfp_t gfp)
1130 {
1131 	struct cxgbi_gather_list *gl;
1132 	struct scatterlist *sg = sgl;
1133 	struct page *sgpage = sg_page(sg);
1134 	unsigned int sglen = sg->length;
1135 	unsigned int sgoffset = sg->offset;
1136 	unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >>
1137 				PAGE_SHIFT;
1138 	int i = 1, j = 0;
1139 
1140 	if (xferlen < DDP_THRESHOLD) {
1141 		log_debug(1 << CXGBI_DBG_DDP,
1142 			"xfer %u < threshold %u, no ddp.\n",
1143 			xferlen, DDP_THRESHOLD);
1144 		return NULL;
1145 	}
1146 
1147 	gl = kzalloc(sizeof(struct cxgbi_gather_list) +
1148 		     npages * (sizeof(dma_addr_t) +
1149 		     sizeof(struct page *)), gfp);
1150 	if (!gl) {
1151 		log_debug(1 << CXGBI_DBG_DDP,
1152 			"xfer %u, %u pages, OOM.\n", xferlen, npages);
1153 		return NULL;
1154 	}
1155 
1156 	 log_debug(1 << CXGBI_DBG_DDP,
1157 		"xfer %u, sgl %u, gl max %u.\n", xferlen, sgcnt, npages);
1158 
1159 	gl->pages = (struct page **)&gl->phys_addr[npages];
1160 	gl->nelem = npages;
1161 	gl->length = xferlen;
1162 	gl->offset = sgoffset;
1163 	gl->pages[0] = sgpage;
1164 
1165 	for (i = 1, sg = sg_next(sgl), j = 0; i < sgcnt;
1166 		i++, sg = sg_next(sg)) {
1167 		struct page *page = sg_page(sg);
1168 
1169 		if (sgpage == page && sg->offset == sgoffset + sglen)
1170 			sglen += sg->length;
1171 		else {
1172 			/*  make sure the sgl is fit for ddp:
1173 			 *  each has the same page size, and
1174 			 *  all of the middle pages are used completely
1175 			 */
1176 			if ((j && sgoffset) || ((i != sgcnt - 1) &&
1177 			    ((sglen + sgoffset) & ~PAGE_MASK))) {
1178 				log_debug(1 << CXGBI_DBG_DDP,
1179 					"page %d/%u, %u + %u.\n",
1180 					i, sgcnt, sgoffset, sglen);
1181 				goto error_out;
1182 			}
1183 
1184 			j++;
1185 			if (j == gl->nelem || sg->offset) {
1186 				log_debug(1 << CXGBI_DBG_DDP,
1187 					"page %d/%u, offset %u.\n",
1188 					j, gl->nelem, sg->offset);
1189 				goto error_out;
1190 			}
1191 			gl->pages[j] = page;
1192 			sglen = sg->length;
1193 			sgoffset = sg->offset;
1194 			sgpage = page;
1195 		}
1196 	}
1197 	gl->nelem = ++j;
1198 
1199 	if (ddp_gl_map(pdev, gl) < 0)
1200 		goto error_out;
1201 
1202 	return gl;
1203 
1204 error_out:
1205 	kfree(gl);
1206 	return NULL;
1207 }
1208 
1209 static void ddp_tag_release(struct cxgbi_hba *chba, u32 tag)
1210 {
1211 	struct cxgbi_device *cdev = chba->cdev;
1212 	struct cxgbi_ddp_info *ddp = cdev->ddp;
1213 	u32 idx;
1214 
1215 	idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
1216 	if (idx < ddp->nppods) {
1217 		struct cxgbi_gather_list *gl = ddp->gl_map[idx];
1218 		unsigned int npods;
1219 
1220 		if (!gl || !gl->nelem) {
1221 			pr_warn("tag 0x%x, idx %u, gl 0x%p, %u.\n",
1222 				tag, idx, gl, gl ? gl->nelem : 0);
1223 			return;
1224 		}
1225 		npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
1226 		log_debug(1 << CXGBI_DBG_DDP,
1227 			"tag 0x%x, release idx %u, npods %u.\n",
1228 			tag, idx, npods);
1229 		cdev->csk_ddp_clear(chba, tag, idx, npods);
1230 		ddp_unmark_entries(ddp, idx, npods);
1231 		ddp_release_gl(gl, ddp->pdev);
1232 	} else
1233 		pr_warn("tag 0x%x, idx %u > max %u.\n", tag, idx, ddp->nppods);
1234 }
1235 
1236 static int ddp_tag_reserve(struct cxgbi_sock *csk, unsigned int tid,
1237 			   u32 sw_tag, u32 *tagp, struct cxgbi_gather_list *gl,
1238 			   gfp_t gfp)
1239 {
1240 	struct cxgbi_device *cdev = csk->cdev;
1241 	struct cxgbi_ddp_info *ddp = cdev->ddp;
1242 	struct cxgbi_tag_format *tformat = &cdev->tag_format;
1243 	struct cxgbi_pagepod_hdr hdr;
1244 	unsigned int npods;
1245 	int idx = -1;
1246 	int err = -ENOMEM;
1247 	u32 tag;
1248 
1249 	npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
1250 	if (ddp->idx_last == ddp->nppods)
1251 		idx = ddp_find_unused_entries(ddp, 0, ddp->nppods,
1252 							npods, gl);
1253 	else {
1254 		idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1,
1255 							ddp->nppods, npods,
1256 							gl);
1257 		if (idx < 0 && ddp->idx_last >= npods) {
1258 			idx = ddp_find_unused_entries(ddp, 0,
1259 				min(ddp->idx_last + npods, ddp->nppods),
1260 							npods, gl);
1261 		}
1262 	}
1263 	if (idx < 0) {
1264 		log_debug(1 << CXGBI_DBG_DDP,
1265 			"xferlen %u, gl %u, npods %u NO DDP.\n",
1266 			gl->length, gl->nelem, npods);
1267 		return idx;
1268 	}
1269 
1270 	tag = cxgbi_ddp_tag_base(tformat, sw_tag);
1271 	tag |= idx << PPOD_IDX_SHIFT;
1272 
1273 	hdr.rsvd = 0;
1274 	hdr.vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
1275 	hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
1276 	hdr.max_offset = htonl(gl->length);
1277 	hdr.page_offset = htonl(gl->offset);
1278 
1279 	err = cdev->csk_ddp_set(csk, &hdr, idx, npods, gl);
1280 	if (err < 0)
1281 		goto unmark_entries;
1282 
1283 	ddp->idx_last = idx;
1284 	log_debug(1 << CXGBI_DBG_DDP,
1285 		"xfer %u, gl %u,%u, tid 0x%x, tag 0x%x->0x%x(%u,%u).\n",
1286 		gl->length, gl->nelem, gl->offset, tid, sw_tag, tag, idx,
1287 		npods);
1288 	*tagp = tag;
1289 	return 0;
1290 
1291 unmark_entries:
1292 	ddp_unmark_entries(ddp, idx, npods);
1293 	return err;
1294 }
1295 
1296 int cxgbi_ddp_reserve(struct cxgbi_sock *csk, unsigned int *tagp,
1297 			unsigned int sw_tag, unsigned int xferlen,
1298 			struct scatterlist *sgl, unsigned int sgcnt, gfp_t gfp)
1299 {
1300 	struct cxgbi_device *cdev = csk->cdev;
1301 	struct cxgbi_tag_format *tformat = &cdev->tag_format;
1302 	struct cxgbi_gather_list *gl;
1303 	int err;
1304 
1305 	if (page_idx >= DDP_PGIDX_MAX || !cdev->ddp ||
1306 	    xferlen < DDP_THRESHOLD) {
1307 		log_debug(1 << CXGBI_DBG_DDP,
1308 			"pgidx %u, xfer %u, NO ddp.\n", page_idx, xferlen);
1309 		return -EINVAL;
1310 	}
1311 
1312 	if (!cxgbi_sw_tag_usable(tformat, sw_tag)) {
1313 		log_debug(1 << CXGBI_DBG_DDP,
1314 			"sw_tag 0x%x NOT usable.\n", sw_tag);
1315 		return -EINVAL;
1316 	}
1317 
1318 	gl = ddp_make_gl(xferlen, sgl, sgcnt, cdev->pdev, gfp);
1319 	if (!gl)
1320 		return -ENOMEM;
1321 
1322 	err = ddp_tag_reserve(csk, csk->tid, sw_tag, tagp, gl, gfp);
1323 	if (err < 0)
1324 		ddp_release_gl(gl, cdev->pdev);
1325 
1326 	return err;
1327 }
1328 
1329 static void ddp_destroy(struct kref *kref)
1330 {
1331 	struct cxgbi_ddp_info *ddp = container_of(kref,
1332 						struct cxgbi_ddp_info,
1333 						refcnt);
1334 	struct cxgbi_device *cdev = ddp->cdev;
1335 	int i = 0;
1336 
1337 	pr_info("kref 0, destroy ddp 0x%p, cdev 0x%p.\n", ddp, cdev);
1338 
1339 	while (i < ddp->nppods) {
1340 		struct cxgbi_gather_list *gl = ddp->gl_map[i];
1341 
1342 		if (gl) {
1343 			int npods = (gl->nelem + PPOD_PAGES_MAX - 1)
1344 					>> PPOD_PAGES_SHIFT;
1345 			pr_info("cdev 0x%p, ddp %d + %d.\n", cdev, i, npods);
1346 			kfree(gl);
1347 			i += npods;
1348 		} else
1349 			i++;
1350 	}
1351 	cxgbi_free_big_mem(ddp);
1352 }
1353 
1354 int cxgbi_ddp_cleanup(struct cxgbi_device *cdev)
1355 {
1356 	struct cxgbi_ddp_info *ddp = cdev->ddp;
1357 
1358 	log_debug(1 << CXGBI_DBG_DDP,
1359 		"cdev 0x%p, release ddp 0x%p.\n", cdev, ddp);
1360 	cdev->ddp = NULL;
1361 	if (ddp)
1362 		return kref_put(&ddp->refcnt, ddp_destroy);
1363 	return 0;
1364 }
1365 EXPORT_SYMBOL_GPL(cxgbi_ddp_cleanup);
1366 
1367 int cxgbi_ddp_init(struct cxgbi_device *cdev,
1368 		   unsigned int llimit, unsigned int ulimit,
1369 		   unsigned int max_txsz, unsigned int max_rxsz)
1370 {
1371 	struct cxgbi_ddp_info *ddp;
1372 	unsigned int ppmax, bits;
1373 
1374 	ppmax = (ulimit - llimit + 1) >> PPOD_SIZE_SHIFT;
1375 	bits = __ilog2_u32(ppmax) + 1;
1376 	if (bits > PPOD_IDX_MAX_SIZE)
1377 		bits = PPOD_IDX_MAX_SIZE;
1378 	ppmax = (1 << (bits - 1)) - 1;
1379 
1380 	ddp = cxgbi_alloc_big_mem(sizeof(struct cxgbi_ddp_info) +
1381 				ppmax * (sizeof(struct cxgbi_gather_list *) +
1382 					 sizeof(struct sk_buff *)),
1383 				GFP_KERNEL);
1384 	if (!ddp) {
1385 		pr_warn("cdev 0x%p, ddp ppmax %u OOM.\n", cdev, ppmax);
1386 		return -ENOMEM;
1387 	}
1388 	ddp->gl_map = (struct cxgbi_gather_list **)(ddp + 1);
1389 	cdev->ddp = ddp;
1390 
1391 	spin_lock_init(&ddp->map_lock);
1392 	kref_init(&ddp->refcnt);
1393 
1394 	ddp->cdev = cdev;
1395 	ddp->pdev = cdev->pdev;
1396 	ddp->llimit = llimit;
1397 	ddp->ulimit = ulimit;
1398 	ddp->max_txsz = min_t(unsigned int, max_txsz, ULP2_MAX_PKT_SIZE);
1399 	ddp->max_rxsz = min_t(unsigned int, max_rxsz, ULP2_MAX_PKT_SIZE);
1400 	ddp->nppods = ppmax;
1401 	ddp->idx_last = ppmax;
1402 	ddp->idx_bits = bits;
1403 	ddp->idx_mask = (1 << bits) - 1;
1404 	ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
1405 
1406 	cdev->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
1407 	cdev->tag_format.rsvd_bits = ddp->idx_bits;
1408 	cdev->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
1409 	cdev->tag_format.rsvd_mask = (1 << cdev->tag_format.rsvd_bits) - 1;
1410 
1411 	pr_info("%s tag format, sw %u, rsvd %u,%u, mask 0x%x.\n",
1412 		cdev->ports[0]->name, cdev->tag_format.sw_bits,
1413 		cdev->tag_format.rsvd_bits, cdev->tag_format.rsvd_shift,
1414 		cdev->tag_format.rsvd_mask);
1415 
1416 	cdev->tx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
1417 				ddp->max_txsz - ISCSI_PDU_NONPAYLOAD_LEN);
1418 	cdev->rx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
1419 				ddp->max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN);
1420 
1421 	log_debug(1 << CXGBI_DBG_DDP,
1422 		"%s max payload size: %u/%u, %u/%u.\n",
1423 		cdev->ports[0]->name, cdev->tx_max_size, ddp->max_txsz,
1424 		cdev->rx_max_size, ddp->max_rxsz);
1425 	return 0;
1426 }
1427 EXPORT_SYMBOL_GPL(cxgbi_ddp_init);
1428 
1429 /*
1430  * APIs interacting with open-iscsi libraries
1431  */
1432 
1433 static unsigned char padding[4];
1434 
1435 static void task_release_itt(struct iscsi_task *task, itt_t hdr_itt)
1436 {
1437 	struct scsi_cmnd *sc = task->sc;
1438 	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
1439 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
1440 	struct cxgbi_hba *chba = cconn->chba;
1441 	struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
1442 	u32 tag = ntohl((__force u32)hdr_itt);
1443 
1444 	log_debug(1 << CXGBI_DBG_DDP,
1445 		   "cdev 0x%p, release tag 0x%x.\n", chba->cdev, tag);
1446 	if (sc &&
1447 	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE) &&
1448 	    cxgbi_is_ddp_tag(tformat, tag))
1449 		ddp_tag_release(chba, tag);
1450 }
1451 
1452 static int task_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
1453 {
1454 	struct scsi_cmnd *sc = task->sc;
1455 	struct iscsi_conn *conn = task->conn;
1456 	struct iscsi_session *sess = conn->session;
1457 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1458 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
1459 	struct cxgbi_hba *chba = cconn->chba;
1460 	struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
1461 	u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
1462 	u32 tag = 0;
1463 	int err = -EINVAL;
1464 
1465 	if (sc &&
1466 	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE)) {
1467 		err = cxgbi_ddp_reserve(cconn->cep->csk, &tag, sw_tag,
1468 					scsi_in(sc)->length,
1469 					scsi_in(sc)->table.sgl,
1470 					scsi_in(sc)->table.nents,
1471 					GFP_ATOMIC);
1472 		if (err < 0)
1473 			log_debug(1 << CXGBI_DBG_DDP,
1474 				"csk 0x%p, R task 0x%p, %u,%u, no ddp.\n",
1475 				cconn->cep->csk, task, scsi_in(sc)->length,
1476 				scsi_in(sc)->table.nents);
1477 	}
1478 
1479 	if (err < 0)
1480 		tag = cxgbi_set_non_ddp_tag(tformat, sw_tag);
1481 	/*  the itt need to sent in big-endian order */
1482 	*hdr_itt = (__force itt_t)htonl(tag);
1483 
1484 	log_debug(1 << CXGBI_DBG_DDP,
1485 		"cdev 0x%p, task 0x%p, 0x%x(0x%x,0x%x)->0x%x/0x%x.\n",
1486 		chba->cdev, task, sw_tag, task->itt, sess->age, tag, *hdr_itt);
1487 	return 0;
1488 }
1489 
1490 void cxgbi_parse_pdu_itt(struct iscsi_conn *conn, itt_t itt, int *idx, int *age)
1491 {
1492 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1493 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
1494 	struct cxgbi_device *cdev = cconn->chba->cdev;
1495 	u32 tag = ntohl((__force u32) itt);
1496 	u32 sw_bits;
1497 
1498 	sw_bits = cxgbi_tag_nonrsvd_bits(&cdev->tag_format, tag);
1499 	if (idx)
1500 		*idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
1501 	if (age)
1502 		*age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
1503 
1504 	log_debug(1 << CXGBI_DBG_DDP,
1505 		"cdev 0x%p, tag 0x%x/0x%x, -> 0x%x(0x%x,0x%x).\n",
1506 		cdev, tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
1507 		age ? *age : 0xFF);
1508 }
1509 EXPORT_SYMBOL_GPL(cxgbi_parse_pdu_itt);
1510 
1511 void cxgbi_conn_tx_open(struct cxgbi_sock *csk)
1512 {
1513 	struct iscsi_conn *conn = csk->user_data;
1514 
1515 	if (conn) {
1516 		log_debug(1 << CXGBI_DBG_SOCK,
1517 			"csk 0x%p, cid %d.\n", csk, conn->id);
1518 		iscsi_conn_queue_work(conn);
1519 	}
1520 }
1521 EXPORT_SYMBOL_GPL(cxgbi_conn_tx_open);
1522 
1523 /*
1524  * pdu receive, interact with libiscsi_tcp
1525  */
1526 static inline int read_pdu_skb(struct iscsi_conn *conn,
1527 			       struct sk_buff *skb,
1528 			       unsigned int offset,
1529 			       int offloaded)
1530 {
1531 	int status = 0;
1532 	int bytes_read;
1533 
1534 	bytes_read = iscsi_tcp_recv_skb(conn, skb, offset, offloaded, &status);
1535 	switch (status) {
1536 	case ISCSI_TCP_CONN_ERR:
1537 		pr_info("skb 0x%p, off %u, %d, TCP_ERR.\n",
1538 			  skb, offset, offloaded);
1539 		return -EIO;
1540 	case ISCSI_TCP_SUSPENDED:
1541 		log_debug(1 << CXGBI_DBG_PDU_RX,
1542 			"skb 0x%p, off %u, %d, TCP_SUSPEND, rc %d.\n",
1543 			skb, offset, offloaded, bytes_read);
1544 		/* no transfer - just have caller flush queue */
1545 		return bytes_read;
1546 	case ISCSI_TCP_SKB_DONE:
1547 		pr_info("skb 0x%p, off %u, %d, TCP_SKB_DONE.\n",
1548 			skb, offset, offloaded);
1549 		/*
1550 		 * pdus should always fit in the skb and we should get
1551 		 * segment done notifcation.
1552 		 */
1553 		iscsi_conn_printk(KERN_ERR, conn, "Invalid pdu or skb.");
1554 		return -EFAULT;
1555 	case ISCSI_TCP_SEGMENT_DONE:
1556 		log_debug(1 << CXGBI_DBG_PDU_RX,
1557 			"skb 0x%p, off %u, %d, TCP_SEG_DONE, rc %d.\n",
1558 			skb, offset, offloaded, bytes_read);
1559 		return bytes_read;
1560 	default:
1561 		pr_info("skb 0x%p, off %u, %d, invalid status %d.\n",
1562 			skb, offset, offloaded, status);
1563 		return -EINVAL;
1564 	}
1565 }
1566 
1567 static int skb_read_pdu_bhs(struct iscsi_conn *conn, struct sk_buff *skb)
1568 {
1569 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1570 
1571 	log_debug(1 << CXGBI_DBG_PDU_RX,
1572 		"conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
1573 		conn, skb, skb->len, cxgbi_skcb_flags(skb));
1574 
1575 	if (!iscsi_tcp_recv_segment_is_hdr(tcp_conn)) {
1576 		pr_info("conn 0x%p, skb 0x%p, not hdr.\n", conn, skb);
1577 		iscsi_conn_failure(conn, ISCSI_ERR_PROTO);
1578 		return -EIO;
1579 	}
1580 
1581 	if (conn->hdrdgst_en &&
1582 	    cxgbi_skcb_test_flag(skb, SKCBF_RX_HCRC_ERR)) {
1583 		pr_info("conn 0x%p, skb 0x%p, hcrc.\n", conn, skb);
1584 		iscsi_conn_failure(conn, ISCSI_ERR_HDR_DGST);
1585 		return -EIO;
1586 	}
1587 
1588 	return read_pdu_skb(conn, skb, 0, 0);
1589 }
1590 
1591 static int skb_read_pdu_data(struct iscsi_conn *conn, struct sk_buff *lskb,
1592 			     struct sk_buff *skb, unsigned int offset)
1593 {
1594 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1595 	bool offloaded = 0;
1596 	int opcode = tcp_conn->in.hdr->opcode & ISCSI_OPCODE_MASK;
1597 
1598 	log_debug(1 << CXGBI_DBG_PDU_RX,
1599 		"conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
1600 		conn, skb, skb->len, cxgbi_skcb_flags(skb));
1601 
1602 	if (conn->datadgst_en &&
1603 	    cxgbi_skcb_test_flag(lskb, SKCBF_RX_DCRC_ERR)) {
1604 		pr_info("conn 0x%p, skb 0x%p, dcrc 0x%lx.\n",
1605 			conn, lskb, cxgbi_skcb_flags(lskb));
1606 		iscsi_conn_failure(conn, ISCSI_ERR_DATA_DGST);
1607 		return -EIO;
1608 	}
1609 
1610 	if (iscsi_tcp_recv_segment_is_hdr(tcp_conn))
1611 		return 0;
1612 
1613 	/* coalesced, add header digest length */
1614 	if (lskb == skb && conn->hdrdgst_en)
1615 		offset += ISCSI_DIGEST_SIZE;
1616 
1617 	if (cxgbi_skcb_test_flag(lskb, SKCBF_RX_DATA_DDPD))
1618 		offloaded = 1;
1619 
1620 	if (opcode == ISCSI_OP_SCSI_DATA_IN)
1621 		log_debug(1 << CXGBI_DBG_PDU_RX,
1622 			"skb 0x%p, op 0x%x, itt 0x%x, %u %s ddp'ed.\n",
1623 			skb, opcode, ntohl(tcp_conn->in.hdr->itt),
1624 			tcp_conn->in.datalen, offloaded ? "is" : "not");
1625 
1626 	return read_pdu_skb(conn, skb, offset, offloaded);
1627 }
1628 
1629 static void csk_return_rx_credits(struct cxgbi_sock *csk, int copied)
1630 {
1631 	struct cxgbi_device *cdev = csk->cdev;
1632 	int must_send;
1633 	u32 credits;
1634 
1635 	log_debug(1 << CXGBI_DBG_PDU_RX,
1636 		"csk 0x%p,%u,0x%lu,%u, seq %u, wup %u, thre %u, %u.\n",
1637 		csk, csk->state, csk->flags, csk->tid, csk->copied_seq,
1638 		csk->rcv_wup, cdev->rx_credit_thres,
1639 		cdev->rcv_win);
1640 
1641 	if (csk->state != CTP_ESTABLISHED)
1642 		return;
1643 
1644 	credits = csk->copied_seq - csk->rcv_wup;
1645 	if (unlikely(!credits))
1646 		return;
1647 	if (unlikely(cdev->rx_credit_thres == 0))
1648 		return;
1649 
1650 	must_send = credits + 16384 >= cdev->rcv_win;
1651 	if (must_send || credits >= cdev->rx_credit_thres)
1652 		csk->rcv_wup += cdev->csk_send_rx_credits(csk, credits);
1653 }
1654 
1655 void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
1656 {
1657 	struct cxgbi_device *cdev = csk->cdev;
1658 	struct iscsi_conn *conn = csk->user_data;
1659 	struct sk_buff *skb;
1660 	unsigned int read = 0;
1661 	int err = 0;
1662 
1663 	log_debug(1 << CXGBI_DBG_PDU_RX,
1664 		"csk 0x%p, conn 0x%p.\n", csk, conn);
1665 
1666 	if (unlikely(!conn || conn->suspend_rx)) {
1667 		log_debug(1 << CXGBI_DBG_PDU_RX,
1668 			"csk 0x%p, conn 0x%p, id %d, suspend_rx %lu!\n",
1669 			csk, conn, conn ? conn->id : 0xFF,
1670 			conn ? conn->suspend_rx : 0xFF);
1671 		return;
1672 	}
1673 
1674 	while (!err) {
1675 		skb = skb_peek(&csk->receive_queue);
1676 		if (!skb ||
1677 		    !(cxgbi_skcb_test_flag(skb, SKCBF_RX_STATUS))) {
1678 			if (skb)
1679 				log_debug(1 << CXGBI_DBG_PDU_RX,
1680 					"skb 0x%p, NOT ready 0x%lx.\n",
1681 					skb, cxgbi_skcb_flags(skb));
1682 			break;
1683 		}
1684 		__skb_unlink(skb, &csk->receive_queue);
1685 
1686 		read += cxgbi_skcb_rx_pdulen(skb);
1687 		log_debug(1 << CXGBI_DBG_PDU_RX,
1688 			"csk 0x%p, skb 0x%p,%u,f 0x%lx, pdu len %u.\n",
1689 			csk, skb, skb->len, cxgbi_skcb_flags(skb),
1690 			cxgbi_skcb_rx_pdulen(skb));
1691 
1692 		if (cxgbi_skcb_test_flag(skb, SKCBF_RX_COALESCED)) {
1693 			err = skb_read_pdu_bhs(conn, skb);
1694 			if (err < 0) {
1695 				pr_err("coalesced bhs, csk 0x%p, skb 0x%p,%u, "
1696 					"f 0x%lx, plen %u.\n",
1697 					csk, skb, skb->len,
1698 					cxgbi_skcb_flags(skb),
1699 					cxgbi_skcb_rx_pdulen(skb));
1700 				goto skb_done;
1701 			}
1702 			err = skb_read_pdu_data(conn, skb, skb,
1703 						err + cdev->skb_rx_extra);
1704 			if (err < 0)
1705 				pr_err("coalesced data, csk 0x%p, skb 0x%p,%u, "
1706 					"f 0x%lx, plen %u.\n",
1707 					csk, skb, skb->len,
1708 					cxgbi_skcb_flags(skb),
1709 					cxgbi_skcb_rx_pdulen(skb));
1710 		} else {
1711 			err = skb_read_pdu_bhs(conn, skb);
1712 			if (err < 0) {
1713 				pr_err("bhs, csk 0x%p, skb 0x%p,%u, "
1714 					"f 0x%lx, plen %u.\n",
1715 					csk, skb, skb->len,
1716 					cxgbi_skcb_flags(skb),
1717 					cxgbi_skcb_rx_pdulen(skb));
1718 				goto skb_done;
1719 			}
1720 
1721 			if (cxgbi_skcb_test_flag(skb, SKCBF_RX_DATA)) {
1722 				struct sk_buff *dskb;
1723 
1724 				dskb = skb_peek(&csk->receive_queue);
1725 				if (!dskb) {
1726 					pr_err("csk 0x%p, skb 0x%p,%u, f 0x%lx,"
1727 						" plen %u, NO data.\n",
1728 						csk, skb, skb->len,
1729 						cxgbi_skcb_flags(skb),
1730 						cxgbi_skcb_rx_pdulen(skb));
1731 					err = -EIO;
1732 					goto skb_done;
1733 				}
1734 				__skb_unlink(dskb, &csk->receive_queue);
1735 
1736 				err = skb_read_pdu_data(conn, skb, dskb, 0);
1737 				if (err < 0)
1738 					pr_err("data, csk 0x%p, skb 0x%p,%u, "
1739 						"f 0x%lx, plen %u, dskb 0x%p,"
1740 						"%u.\n",
1741 						csk, skb, skb->len,
1742 						cxgbi_skcb_flags(skb),
1743 						cxgbi_skcb_rx_pdulen(skb),
1744 						dskb, dskb->len);
1745 				__kfree_skb(dskb);
1746 			} else
1747 				err = skb_read_pdu_data(conn, skb, skb, 0);
1748 		}
1749 skb_done:
1750 		__kfree_skb(skb);
1751 
1752 		if (err < 0)
1753 			break;
1754 	}
1755 
1756 	log_debug(1 << CXGBI_DBG_PDU_RX, "csk 0x%p, read %u.\n", csk, read);
1757 	if (read) {
1758 		csk->copied_seq += read;
1759 		csk_return_rx_credits(csk, read);
1760 		conn->rxdata_octets += read;
1761 	}
1762 
1763 	if (err < 0) {
1764 		pr_info("csk 0x%p, 0x%p, rx failed %d, read %u.\n",
1765 			csk, conn, err, read);
1766 		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
1767 	}
1768 }
1769 EXPORT_SYMBOL_GPL(cxgbi_conn_pdu_ready);
1770 
1771 static int sgl_seek_offset(struct scatterlist *sgl, unsigned int sgcnt,
1772 				unsigned int offset, unsigned int *off,
1773 				struct scatterlist **sgp)
1774 {
1775 	int i;
1776 	struct scatterlist *sg;
1777 
1778 	for_each_sg(sgl, sg, sgcnt, i) {
1779 		if (offset < sg->length) {
1780 			*off = offset;
1781 			*sgp = sg;
1782 			return 0;
1783 		}
1784 		offset -= sg->length;
1785 	}
1786 	return -EFAULT;
1787 }
1788 
1789 static int sgl_read_to_frags(struct scatterlist *sg, unsigned int sgoffset,
1790 				unsigned int dlen, skb_frag_t *frags,
1791 				int frag_max)
1792 {
1793 	unsigned int datalen = dlen;
1794 	unsigned int sglen = sg->length - sgoffset;
1795 	struct page *page = sg_page(sg);
1796 	int i;
1797 
1798 	i = 0;
1799 	do {
1800 		unsigned int copy;
1801 
1802 		if (!sglen) {
1803 			sg = sg_next(sg);
1804 			if (!sg) {
1805 				pr_warn("sg %d NULL, len %u/%u.\n",
1806 					i, datalen, dlen);
1807 				return -EINVAL;
1808 			}
1809 			sgoffset = 0;
1810 			sglen = sg->length;
1811 			page = sg_page(sg);
1812 
1813 		}
1814 		copy = min(datalen, sglen);
1815 		if (i && page == frags[i - 1].page &&
1816 		    sgoffset + sg->offset ==
1817 			frags[i - 1].page_offset + frags[i - 1].size) {
1818 			frags[i - 1].size += copy;
1819 		} else {
1820 			if (i >= frag_max) {
1821 				pr_warn("too many pages %u, dlen %u.\n",
1822 					frag_max, dlen);
1823 				return -EINVAL;
1824 			}
1825 
1826 			frags[i].page = page;
1827 			frags[i].page_offset = sg->offset + sgoffset;
1828 			frags[i].size = copy;
1829 			i++;
1830 		}
1831 		datalen -= copy;
1832 		sgoffset += copy;
1833 		sglen -= copy;
1834 	} while (datalen);
1835 
1836 	return i;
1837 }
1838 
1839 int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
1840 {
1841 	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
1842 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
1843 	struct cxgbi_device *cdev = cconn->chba->cdev;
1844 	struct iscsi_conn *conn = task->conn;
1845 	struct iscsi_tcp_task *tcp_task = task->dd_data;
1846 	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
1847 	struct scsi_cmnd *sc = task->sc;
1848 	int headroom = SKB_TX_ISCSI_PDU_HEADER_MAX;
1849 
1850 	tcp_task->dd_data = tdata;
1851 	task->hdr = NULL;
1852 
1853 	if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) &&
1854 	    (opcode == ISCSI_OP_SCSI_DATA_OUT ||
1855 	     (opcode == ISCSI_OP_SCSI_CMD &&
1856 	      (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_TO_DEVICE))))
1857 		/* data could goes into skb head */
1858 		headroom += min_t(unsigned int,
1859 				SKB_MAX_HEAD(cdev->skb_tx_rsvd),
1860 				conn->max_xmit_dlength);
1861 
1862 	tdata->skb = alloc_skb(cdev->skb_tx_rsvd + headroom, GFP_ATOMIC);
1863 	if (!tdata->skb) {
1864 		pr_warn("alloc skb %u+%u, opcode 0x%x failed.\n",
1865 			cdev->skb_tx_rsvd, headroom, opcode);
1866 		return -ENOMEM;
1867 	}
1868 
1869 	skb_reserve(tdata->skb, cdev->skb_tx_rsvd);
1870 	task->hdr = (struct iscsi_hdr *)tdata->skb->data;
1871 	task->hdr_max = SKB_TX_ISCSI_PDU_HEADER_MAX; /* BHS + AHS */
1872 
1873 	/* data_out uses scsi_cmd's itt */
1874 	if (opcode != ISCSI_OP_SCSI_DATA_OUT)
1875 		task_reserve_itt(task, &task->hdr->itt);
1876 
1877 	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
1878 		"task 0x%p, op 0x%x, skb 0x%p,%u+%u/%u, itt 0x%x.\n",
1879 		task, opcode, tdata->skb, cdev->skb_tx_rsvd, headroom,
1880 		conn->max_xmit_dlength, ntohl(task->hdr->itt));
1881 
1882 	return 0;
1883 }
1884 EXPORT_SYMBOL_GPL(cxgbi_conn_alloc_pdu);
1885 
1886 static inline void tx_skb_setmode(struct sk_buff *skb, int hcrc, int dcrc)
1887 {
1888 	if (hcrc || dcrc) {
1889 		u8 submode = 0;
1890 
1891 		if (hcrc)
1892 			submode |= 1;
1893 		if (dcrc)
1894 			submode |= 2;
1895 		cxgbi_skcb_ulp_mode(skb) = (ULP2_MODE_ISCSI << 4) | submode;
1896 	} else
1897 		cxgbi_skcb_ulp_mode(skb) = 0;
1898 }
1899 
1900 int cxgbi_conn_init_pdu(struct iscsi_task *task, unsigned int offset,
1901 			      unsigned int count)
1902 {
1903 	struct iscsi_conn *conn = task->conn;
1904 	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
1905 	struct sk_buff *skb = tdata->skb;
1906 	unsigned int datalen = count;
1907 	int i, padlen = iscsi_padding(count);
1908 	struct page *pg;
1909 
1910 	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
1911 		"task 0x%p,0x%p, skb 0x%p, 0x%x,0x%x,0x%x, %u+%u.\n",
1912 		task, task->sc, skb, (*skb->data) & ISCSI_OPCODE_MASK,
1913 		ntohl(task->cmdsn), ntohl(task->hdr->itt), offset, count);
1914 
1915 	skb_put(skb, task->hdr_len);
1916 	tx_skb_setmode(skb, conn->hdrdgst_en, datalen ? conn->datadgst_en : 0);
1917 	if (!count)
1918 		return 0;
1919 
1920 	if (task->sc) {
1921 		struct scsi_data_buffer *sdb = scsi_out(task->sc);
1922 		struct scatterlist *sg = NULL;
1923 		int err;
1924 
1925 		tdata->offset = offset;
1926 		tdata->count = count;
1927 		err = sgl_seek_offset(
1928 					sdb->table.sgl, sdb->table.nents,
1929 					tdata->offset, &tdata->sgoffset, &sg);
1930 		if (err < 0) {
1931 			pr_warn("tpdu, sgl %u, bad offset %u/%u.\n",
1932 				sdb->table.nents, tdata->offset, sdb->length);
1933 			return err;
1934 		}
1935 		err = sgl_read_to_frags(sg, tdata->sgoffset, tdata->count,
1936 					tdata->frags, MAX_PDU_FRAGS);
1937 		if (err < 0) {
1938 			pr_warn("tpdu, sgl %u, bad offset %u + %u.\n",
1939 				sdb->table.nents, tdata->offset, tdata->count);
1940 			return err;
1941 		}
1942 		tdata->nr_frags = err;
1943 
1944 		if (tdata->nr_frags > MAX_SKB_FRAGS ||
1945 		    (padlen && tdata->nr_frags == MAX_SKB_FRAGS)) {
1946 			char *dst = skb->data + task->hdr_len;
1947 			skb_frag_t *frag = tdata->frags;
1948 
1949 			/* data fits in the skb's headroom */
1950 			for (i = 0; i < tdata->nr_frags; i++, frag++) {
1951 				char *src = kmap_atomic(frag->page,
1952 							KM_SOFTIRQ0);
1953 
1954 				memcpy(dst, src+frag->page_offset, frag->size);
1955 				dst += frag->size;
1956 				kunmap_atomic(src, KM_SOFTIRQ0);
1957 			}
1958 			if (padlen) {
1959 				memset(dst, 0, padlen);
1960 				padlen = 0;
1961 			}
1962 			skb_put(skb, count + padlen);
1963 		} else {
1964 			/* data fit into frag_list */
1965 			for (i = 0; i < tdata->nr_frags; i++)
1966 				get_page(tdata->frags[i].page);
1967 
1968 			memcpy(skb_shinfo(skb)->frags, tdata->frags,
1969 				sizeof(skb_frag_t) * tdata->nr_frags);
1970 			skb_shinfo(skb)->nr_frags = tdata->nr_frags;
1971 			skb->len += count;
1972 			skb->data_len += count;
1973 			skb->truesize += count;
1974 		}
1975 
1976 	} else {
1977 		pg = virt_to_page(task->data);
1978 
1979 		get_page(pg);
1980 		skb_fill_page_desc(skb, 0, pg, offset_in_page(task->data),
1981 					count);
1982 		skb->len += count;
1983 		skb->data_len += count;
1984 		skb->truesize += count;
1985 	}
1986 
1987 	if (padlen) {
1988 		i = skb_shinfo(skb)->nr_frags;
1989 		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1990 				virt_to_page(padding), offset_in_page(padding),
1991 				padlen);
1992 
1993 		skb->data_len += padlen;
1994 		skb->truesize += padlen;
1995 		skb->len += padlen;
1996 	}
1997 
1998 	return 0;
1999 }
2000 EXPORT_SYMBOL_GPL(cxgbi_conn_init_pdu);
2001 
2002 int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
2003 {
2004 	struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
2005 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2006 	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
2007 	struct sk_buff *skb = tdata->skb;
2008 	unsigned int datalen;
2009 	int err;
2010 
2011 	if (!skb) {
2012 		log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
2013 			"task 0x%p, skb NULL.\n", task);
2014 		return 0;
2015 	}
2016 
2017 	datalen = skb->data_len;
2018 	tdata->skb = NULL;
2019 	err = cxgbi_sock_send_pdus(cconn->cep->csk, skb);
2020 	if (err > 0) {
2021 		int pdulen = err;
2022 
2023 		log_debug(1 << CXGBI_DBG_PDU_TX,
2024 			"task 0x%p,0x%p, skb 0x%p, len %u/%u, rv %d.\n",
2025 			task, task->sc, skb, skb->len, skb->data_len, err);
2026 
2027 		if (task->conn->hdrdgst_en)
2028 			pdulen += ISCSI_DIGEST_SIZE;
2029 
2030 		if (datalen && task->conn->datadgst_en)
2031 			pdulen += ISCSI_DIGEST_SIZE;
2032 
2033 		task->conn->txdata_octets += pdulen;
2034 		return 0;
2035 	}
2036 
2037 	if (err == -EAGAIN || err == -ENOBUFS) {
2038 		log_debug(1 << CXGBI_DBG_PDU_TX,
2039 			"task 0x%p, skb 0x%p, len %u/%u, %d EAGAIN.\n",
2040 			task, skb, skb->len, skb->data_len, err);
2041 		/* reset skb to send when we are called again */
2042 		tdata->skb = skb;
2043 		return err;
2044 	}
2045 
2046 	kfree_skb(skb);
2047 	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
2048 		"itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n",
2049 		task->itt, skb, skb->len, skb->data_len, err);
2050 	iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err);
2051 	iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED);
2052 	return err;
2053 }
2054 EXPORT_SYMBOL_GPL(cxgbi_conn_xmit_pdu);
2055 
2056 void cxgbi_cleanup_task(struct iscsi_task *task)
2057 {
2058 	struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
2059 
2060 	log_debug(1 << CXGBI_DBG_ISCSI,
2061 		"task 0x%p, skb 0x%p, itt 0x%x.\n",
2062 		task, tdata->skb, task->hdr_itt);
2063 
2064 	/*  never reached the xmit task callout */
2065 	if (tdata->skb)
2066 		__kfree_skb(tdata->skb);
2067 	memset(tdata, 0, sizeof(*tdata));
2068 
2069 	task_release_itt(task, task->hdr_itt);
2070 	iscsi_tcp_cleanup_task(task);
2071 }
2072 EXPORT_SYMBOL_GPL(cxgbi_cleanup_task);
2073 
2074 void cxgbi_get_conn_stats(struct iscsi_cls_conn *cls_conn,
2075 				struct iscsi_stats *stats)
2076 {
2077 	struct iscsi_conn *conn = cls_conn->dd_data;
2078 
2079 	stats->txdata_octets = conn->txdata_octets;
2080 	stats->rxdata_octets = conn->rxdata_octets;
2081 	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
2082 	stats->dataout_pdus = conn->dataout_pdus_cnt;
2083 	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
2084 	stats->datain_pdus = conn->datain_pdus_cnt;
2085 	stats->r2t_pdus = conn->r2t_pdus_cnt;
2086 	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
2087 	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
2088 	stats->digest_err = 0;
2089 	stats->timeout_err = 0;
2090 	stats->custom_length = 1;
2091 	strcpy(stats->custom[0].desc, "eh_abort_cnt");
2092 	stats->custom[0].value = conn->eh_abort_cnt;
2093 }
2094 EXPORT_SYMBOL_GPL(cxgbi_get_conn_stats);
2095 
2096 static int cxgbi_conn_max_xmit_dlength(struct iscsi_conn *conn)
2097 {
2098 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2099 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2100 	struct cxgbi_device *cdev = cconn->chba->cdev;
2101 	unsigned int headroom = SKB_MAX_HEAD(cdev->skb_tx_rsvd);
2102 	unsigned int max_def = 512 * MAX_SKB_FRAGS;
2103 	unsigned int max = max(max_def, headroom);
2104 
2105 	max = min(cconn->chba->cdev->tx_max_size, max);
2106 	if (conn->max_xmit_dlength)
2107 		conn->max_xmit_dlength = min(conn->max_xmit_dlength, max);
2108 	else
2109 		conn->max_xmit_dlength = max;
2110 	cxgbi_align_pdu_size(conn->max_xmit_dlength);
2111 
2112 	return 0;
2113 }
2114 
2115 static int cxgbi_conn_max_recv_dlength(struct iscsi_conn *conn)
2116 {
2117 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2118 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2119 	unsigned int max = cconn->chba->cdev->rx_max_size;
2120 
2121 	cxgbi_align_pdu_size(max);
2122 
2123 	if (conn->max_recv_dlength) {
2124 		if (conn->max_recv_dlength > max) {
2125 			pr_err("MaxRecvDataSegmentLength %u > %u.\n",
2126 				conn->max_recv_dlength, max);
2127 			return -EINVAL;
2128 		}
2129 		conn->max_recv_dlength = min(conn->max_recv_dlength, max);
2130 		cxgbi_align_pdu_size(conn->max_recv_dlength);
2131 	} else
2132 		conn->max_recv_dlength = max;
2133 
2134 	return 0;
2135 }
2136 
2137 int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn,
2138 			enum iscsi_param param, char *buf, int buflen)
2139 {
2140 	struct iscsi_conn *conn = cls_conn->dd_data;
2141 	struct iscsi_session *session = conn->session;
2142 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2143 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2144 	struct cxgbi_sock *csk = cconn->cep->csk;
2145 	int value, err = 0;
2146 
2147 	log_debug(1 << CXGBI_DBG_ISCSI,
2148 		"cls_conn 0x%p, param %d, buf(%d) %s.\n",
2149 		cls_conn, param, buflen, buf);
2150 
2151 	switch (param) {
2152 	case ISCSI_PARAM_HDRDGST_EN:
2153 		err = iscsi_set_param(cls_conn, param, buf, buflen);
2154 		if (!err && conn->hdrdgst_en)
2155 			err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid,
2156 							conn->hdrdgst_en,
2157 							conn->datadgst_en, 0);
2158 		break;
2159 	case ISCSI_PARAM_DATADGST_EN:
2160 		err = iscsi_set_param(cls_conn, param, buf, buflen);
2161 		if (!err && conn->datadgst_en)
2162 			err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid,
2163 							conn->hdrdgst_en,
2164 							conn->datadgst_en, 0);
2165 		break;
2166 	case ISCSI_PARAM_MAX_R2T:
2167 		sscanf(buf, "%d", &value);
2168 		if (value <= 0 || !is_power_of_2(value))
2169 			return -EINVAL;
2170 		if (session->max_r2t == value)
2171 			break;
2172 		iscsi_tcp_r2tpool_free(session);
2173 		err = iscsi_set_param(cls_conn, param, buf, buflen);
2174 		if (!err && iscsi_tcp_r2tpool_alloc(session))
2175 			return -ENOMEM;
2176 	case ISCSI_PARAM_MAX_RECV_DLENGTH:
2177 		err = iscsi_set_param(cls_conn, param, buf, buflen);
2178 		if (!err)
2179 			err = cxgbi_conn_max_recv_dlength(conn);
2180 		break;
2181 	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
2182 		err = iscsi_set_param(cls_conn, param, buf, buflen);
2183 		if (!err)
2184 			err = cxgbi_conn_max_xmit_dlength(conn);
2185 		break;
2186 	default:
2187 		return iscsi_set_param(cls_conn, param, buf, buflen);
2188 	}
2189 	return err;
2190 }
2191 EXPORT_SYMBOL_GPL(cxgbi_set_conn_param);
2192 
2193 int cxgbi_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param,
2194 		       char *buf)
2195 {
2196 	struct cxgbi_endpoint *cep = ep->dd_data;
2197 	struct cxgbi_sock *csk;
2198 	int len;
2199 
2200 	log_debug(1 << CXGBI_DBG_ISCSI,
2201 		"cls_conn 0x%p, param %d.\n", ep, param);
2202 
2203 	switch (param) {
2204 	case ISCSI_PARAM_CONN_PORT:
2205 	case ISCSI_PARAM_CONN_ADDRESS:
2206 		if (!cep)
2207 			return -ENOTCONN;
2208 
2209 		csk = cep->csk;
2210 		if (!csk)
2211 			return -ENOTCONN;
2212 
2213 		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
2214 						 &csk->daddr, param, buf);
2215 	default:
2216 		return -ENOSYS;
2217 	}
2218 	return len;
2219 }
2220 EXPORT_SYMBOL_GPL(cxgbi_get_ep_param);
2221 
2222 struct iscsi_cls_conn *
2223 cxgbi_create_conn(struct iscsi_cls_session *cls_session, u32 cid)
2224 {
2225 	struct iscsi_cls_conn *cls_conn;
2226 	struct iscsi_conn *conn;
2227 	struct iscsi_tcp_conn *tcp_conn;
2228 	struct cxgbi_conn *cconn;
2229 
2230 	cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*cconn), cid);
2231 	if (!cls_conn)
2232 		return NULL;
2233 
2234 	conn = cls_conn->dd_data;
2235 	tcp_conn = conn->dd_data;
2236 	cconn = tcp_conn->dd_data;
2237 	cconn->iconn = conn;
2238 
2239 	log_debug(1 << CXGBI_DBG_ISCSI,
2240 		"cid %u(0x%x), cls 0x%p,0x%p, conn 0x%p,0x%p,0x%p.\n",
2241 		cid, cid, cls_session, cls_conn, conn, tcp_conn, cconn);
2242 
2243 	return cls_conn;
2244 }
2245 EXPORT_SYMBOL_GPL(cxgbi_create_conn);
2246 
2247 int cxgbi_bind_conn(struct iscsi_cls_session *cls_session,
2248 				struct iscsi_cls_conn *cls_conn,
2249 				u64 transport_eph, int is_leading)
2250 {
2251 	struct iscsi_conn *conn = cls_conn->dd_data;
2252 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
2253 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
2254 	struct iscsi_endpoint *ep;
2255 	struct cxgbi_endpoint *cep;
2256 	struct cxgbi_sock *csk;
2257 	int err;
2258 
2259 	ep = iscsi_lookup_endpoint(transport_eph);
2260 	if (!ep)
2261 		return -EINVAL;
2262 
2263 	/*  setup ddp pagesize */
2264 	cep = ep->dd_data;
2265 	csk = cep->csk;
2266 	err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid, page_idx, 0);
2267 	if (err < 0)
2268 		return err;
2269 
2270 	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
2271 	if (err)
2272 		return -EINVAL;
2273 
2274 	/*  calculate the tag idx bits needed for this conn based on cmds_max */
2275 	cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
2276 
2277 	write_lock_bh(&csk->callback_lock);
2278 	csk->user_data = conn;
2279 	cconn->chba = cep->chba;
2280 	cconn->cep = cep;
2281 	cep->cconn = cconn;
2282 	write_unlock_bh(&csk->callback_lock);
2283 
2284 	cxgbi_conn_max_xmit_dlength(conn);
2285 	cxgbi_conn_max_recv_dlength(conn);
2286 
2287 	log_debug(1 << CXGBI_DBG_ISCSI,
2288 		"cls 0x%p,0x%p, ep 0x%p, cconn 0x%p, csk 0x%p.\n",
2289 		cls_session, cls_conn, ep, cconn, csk);
2290 	/*  init recv engine */
2291 	iscsi_tcp_hdr_recv_prep(tcp_conn);
2292 
2293 	return 0;
2294 }
2295 EXPORT_SYMBOL_GPL(cxgbi_bind_conn);
2296 
2297 struct iscsi_cls_session *cxgbi_create_session(struct iscsi_endpoint *ep,
2298 						u16 cmds_max, u16 qdepth,
2299 						u32 initial_cmdsn)
2300 {
2301 	struct cxgbi_endpoint *cep;
2302 	struct cxgbi_hba *chba;
2303 	struct Scsi_Host *shost;
2304 	struct iscsi_cls_session *cls_session;
2305 	struct iscsi_session *session;
2306 
2307 	if (!ep) {
2308 		pr_err("missing endpoint.\n");
2309 		return NULL;
2310 	}
2311 
2312 	cep = ep->dd_data;
2313 	chba = cep->chba;
2314 	shost = chba->shost;
2315 
2316 	BUG_ON(chba != iscsi_host_priv(shost));
2317 
2318 	cls_session = iscsi_session_setup(chba->cdev->itp, shost,
2319 					cmds_max, 0,
2320 					sizeof(struct iscsi_tcp_task) +
2321 					sizeof(struct cxgbi_task_data),
2322 					initial_cmdsn, ISCSI_MAX_TARGET);
2323 	if (!cls_session)
2324 		return NULL;
2325 
2326 	session = cls_session->dd_data;
2327 	if (iscsi_tcp_r2tpool_alloc(session))
2328 		goto remove_session;
2329 
2330 	log_debug(1 << CXGBI_DBG_ISCSI,
2331 		"ep 0x%p, cls sess 0x%p.\n", ep, cls_session);
2332 	return cls_session;
2333 
2334 remove_session:
2335 	iscsi_session_teardown(cls_session);
2336 	return NULL;
2337 }
2338 EXPORT_SYMBOL_GPL(cxgbi_create_session);
2339 
2340 void cxgbi_destroy_session(struct iscsi_cls_session *cls_session)
2341 {
2342 	log_debug(1 << CXGBI_DBG_ISCSI,
2343 		"cls sess 0x%p.\n", cls_session);
2344 
2345 	iscsi_tcp_r2tpool_free(cls_session->dd_data);
2346 	iscsi_session_teardown(cls_session);
2347 }
2348 EXPORT_SYMBOL_GPL(cxgbi_destroy_session);
2349 
2350 int cxgbi_set_host_param(struct Scsi_Host *shost, enum iscsi_host_param param,
2351 			char *buf, int buflen)
2352 {
2353 	struct cxgbi_hba *chba = iscsi_host_priv(shost);
2354 
2355 	if (!chba->ndev) {
2356 		shost_printk(KERN_ERR, shost, "Could not get host param. "
2357 				"netdev for host not set.\n");
2358 		return -ENODEV;
2359 	}
2360 
2361 	log_debug(1 << CXGBI_DBG_ISCSI,
2362 		"shost 0x%p, hba 0x%p,%s, param %d, buf(%d) %s.\n",
2363 		shost, chba, chba->ndev->name, param, buflen, buf);
2364 
2365 	switch (param) {
2366 	case ISCSI_HOST_PARAM_IPADDRESS:
2367 	{
2368 		__be32 addr = in_aton(buf);
2369 		log_debug(1 << CXGBI_DBG_ISCSI,
2370 			"hba %s, req. ipv4 %pI4.\n", chba->ndev->name, &addr);
2371 		cxgbi_set_iscsi_ipv4(chba, addr);
2372 		return 0;
2373 	}
2374 	case ISCSI_HOST_PARAM_HWADDRESS:
2375 	case ISCSI_HOST_PARAM_NETDEV_NAME:
2376 		return 0;
2377 	default:
2378 		return iscsi_host_set_param(shost, param, buf, buflen);
2379 	}
2380 }
2381 EXPORT_SYMBOL_GPL(cxgbi_set_host_param);
2382 
2383 int cxgbi_get_host_param(struct Scsi_Host *shost, enum iscsi_host_param param,
2384 			char *buf)
2385 {
2386 	struct cxgbi_hba *chba = iscsi_host_priv(shost);
2387 	int len = 0;
2388 
2389 	if (!chba->ndev) {
2390 		shost_printk(KERN_ERR, shost, "Could not get host param. "
2391 				"netdev for host not set.\n");
2392 		return -ENODEV;
2393 	}
2394 
2395 	log_debug(1 << CXGBI_DBG_ISCSI,
2396 		"shost 0x%p, hba 0x%p,%s, param %d.\n",
2397 		shost, chba, chba->ndev->name, param);
2398 
2399 	switch (param) {
2400 	case ISCSI_HOST_PARAM_HWADDRESS:
2401 		len = sysfs_format_mac(buf, chba->ndev->dev_addr, 6);
2402 		break;
2403 	case ISCSI_HOST_PARAM_NETDEV_NAME:
2404 		len = sprintf(buf, "%s\n", chba->ndev->name);
2405 		break;
2406 	case ISCSI_HOST_PARAM_IPADDRESS:
2407 	{
2408 		__be32 addr;
2409 
2410 		addr = cxgbi_get_iscsi_ipv4(chba);
2411 		len = sprintf(buf, "%pI4", &addr);
2412 		log_debug(1 << CXGBI_DBG_ISCSI,
2413 			"hba %s, ipv4 %pI4.\n", chba->ndev->name, &addr);
2414 		break;
2415 	}
2416 	default:
2417 		return iscsi_host_get_param(shost, param, buf);
2418 	}
2419 
2420 	return len;
2421 }
2422 EXPORT_SYMBOL_GPL(cxgbi_get_host_param);
2423 
2424 struct iscsi_endpoint *cxgbi_ep_connect(struct Scsi_Host *shost,
2425 					struct sockaddr *dst_addr,
2426 					int non_blocking)
2427 {
2428 	struct iscsi_endpoint *ep;
2429 	struct cxgbi_endpoint *cep;
2430 	struct cxgbi_hba *hba = NULL;
2431 	struct cxgbi_sock *csk;
2432 	int err = -EINVAL;
2433 
2434 	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
2435 		"shost 0x%p, non_blocking %d, dst_addr 0x%p.\n",
2436 		shost, non_blocking, dst_addr);
2437 
2438 	if (shost) {
2439 		hba = iscsi_host_priv(shost);
2440 		if (!hba) {
2441 			pr_info("shost 0x%p, priv NULL.\n", shost);
2442 			goto err_out;
2443 		}
2444 	}
2445 
2446 	csk = cxgbi_check_route(dst_addr);
2447 	if (IS_ERR(csk))
2448 		return (struct iscsi_endpoint *)csk;
2449 	cxgbi_sock_get(csk);
2450 
2451 	if (!hba)
2452 		hba = csk->cdev->hbas[csk->port_id];
2453 	else if (hba != csk->cdev->hbas[csk->port_id]) {
2454 		pr_info("Could not connect through requested host %u"
2455 			"hba 0x%p != 0x%p (%u).\n",
2456 			shost->host_no, hba,
2457 			csk->cdev->hbas[csk->port_id], csk->port_id);
2458 		err = -ENOSPC;
2459 		goto release_conn;
2460 	}
2461 
2462 	err = sock_get_port(csk);
2463 	if (err)
2464 		goto release_conn;
2465 
2466 	cxgbi_sock_set_state(csk, CTP_CONNECTING);
2467 	err = csk->cdev->csk_init_act_open(csk);
2468 	if (err)
2469 		goto release_conn;
2470 
2471 	if (cxgbi_sock_is_closing(csk)) {
2472 		err = -ENOSPC;
2473 		pr_info("csk 0x%p is closing.\n", csk);
2474 		goto release_conn;
2475 	}
2476 
2477 	ep = iscsi_create_endpoint(sizeof(*cep));
2478 	if (!ep) {
2479 		err = -ENOMEM;
2480 		pr_info("iscsi alloc ep, OOM.\n");
2481 		goto release_conn;
2482 	}
2483 
2484 	cep = ep->dd_data;
2485 	cep->csk = csk;
2486 	cep->chba = hba;
2487 
2488 	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
2489 		"ep 0x%p, cep 0x%p, csk 0x%p, hba 0x%p,%s.\n",
2490 		ep, cep, csk, hba, hba->ndev->name);
2491 	return ep;
2492 
2493 release_conn:
2494 	cxgbi_sock_put(csk);
2495 	cxgbi_sock_closed(csk);
2496 err_out:
2497 	return ERR_PTR(err);
2498 }
2499 EXPORT_SYMBOL_GPL(cxgbi_ep_connect);
2500 
2501 int cxgbi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
2502 {
2503 	struct cxgbi_endpoint *cep = ep->dd_data;
2504 	struct cxgbi_sock *csk = cep->csk;
2505 
2506 	if (!cxgbi_sock_is_established(csk))
2507 		return 0;
2508 	return 1;
2509 }
2510 EXPORT_SYMBOL_GPL(cxgbi_ep_poll);
2511 
2512 void cxgbi_ep_disconnect(struct iscsi_endpoint *ep)
2513 {
2514 	struct cxgbi_endpoint *cep = ep->dd_data;
2515 	struct cxgbi_conn *cconn = cep->cconn;
2516 	struct cxgbi_sock *csk = cep->csk;
2517 
2518 	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
2519 		"ep 0x%p, cep 0x%p, cconn 0x%p, csk 0x%p,%u,0x%lx.\n",
2520 		ep, cep, cconn, csk, csk->state, csk->flags);
2521 
2522 	if (cconn && cconn->iconn) {
2523 		iscsi_suspend_tx(cconn->iconn);
2524 		write_lock_bh(&csk->callback_lock);
2525 		cep->csk->user_data = NULL;
2526 		cconn->cep = NULL;
2527 		write_unlock_bh(&csk->callback_lock);
2528 	}
2529 	iscsi_destroy_endpoint(ep);
2530 
2531 	if (likely(csk->state >= CTP_ESTABLISHED))
2532 		need_active_close(csk);
2533 	else
2534 		cxgbi_sock_closed(csk);
2535 
2536 	cxgbi_sock_put(csk);
2537 }
2538 EXPORT_SYMBOL_GPL(cxgbi_ep_disconnect);
2539 
2540 int cxgbi_iscsi_init(struct iscsi_transport *itp,
2541 			struct scsi_transport_template **stt)
2542 {
2543 	*stt = iscsi_register_transport(itp);
2544 	if (*stt == NULL) {
2545 		pr_err("unable to register %s transport 0x%p.\n",
2546 			itp->name, itp);
2547 		return -ENODEV;
2548 	}
2549 	log_debug(1 << CXGBI_DBG_ISCSI,
2550 		"%s, registered iscsi transport 0x%p.\n",
2551 		itp->name, stt);
2552 	return 0;
2553 }
2554 EXPORT_SYMBOL_GPL(cxgbi_iscsi_init);
2555 
2556 void cxgbi_iscsi_cleanup(struct iscsi_transport *itp,
2557 			struct scsi_transport_template **stt)
2558 {
2559 	if (*stt) {
2560 		log_debug(1 << CXGBI_DBG_ISCSI,
2561 			"de-register transport 0x%p, %s, stt 0x%p.\n",
2562 			itp, itp->name, *stt);
2563 		*stt = NULL;
2564 		iscsi_unregister_transport(itp);
2565 	}
2566 }
2567 EXPORT_SYMBOL_GPL(cxgbi_iscsi_cleanup);
2568 
2569 static int __init libcxgbi_init_module(void)
2570 {
2571 	sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
2572 	sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
2573 
2574 	pr_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
2575 		ISCSI_ITT_MASK, sw_tag_idx_bits,
2576 		ISCSI_AGE_MASK, sw_tag_age_bits);
2577 
2578 	ddp_setup_host_page_size();
2579 	return 0;
2580 }
2581 
2582 static void __exit libcxgbi_exit_module(void)
2583 {
2584 	cxgbi_device_unregister_all(0xFF);
2585 	return;
2586 }
2587 
2588 module_init(libcxgbi_init_module);
2589 module_exit(libcxgbi_exit_module);
2590