1 /* 2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/module.h> 33 #include <linux/list.h> 34 #include <linux/workqueue.h> 35 #include <linux/skbuff.h> 36 #include <linux/timer.h> 37 #include <linux/notifier.h> 38 #include <linux/inetdevice.h> 39 #include <linux/ip.h> 40 #include <linux/tcp.h> 41 #include <linux/if_vlan.h> 42 43 #include <net/neighbour.h> 44 #include <net/netevent.h> 45 #include <net/route.h> 46 #include <net/tcp.h> 47 #include <net/ip6_route.h> 48 #include <net/addrconf.h> 49 50 #include <rdma/ib_addr.h> 51 52 #include <libcxgb_cm.h> 53 #include "iw_cxgb4.h" 54 #include "clip_tbl.h" 55 56 static char *states[] = { 57 "idle", 58 "listen", 59 "connecting", 60 "mpa_wait_req", 61 "mpa_req_sent", 62 "mpa_req_rcvd", 63 "mpa_rep_sent", 64 "fpdu_mode", 65 "aborting", 66 "closing", 67 "moribund", 68 "dead", 69 NULL, 70 }; 71 72 static int nocong; 73 module_param(nocong, int, 0644); 74 MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); 75 76 static int enable_ecn; 77 module_param(enable_ecn, int, 0644); 78 MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); 79 80 static int dack_mode = 1; 81 module_param(dack_mode, int, 0644); 82 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); 83 84 uint c4iw_max_read_depth = 32; 85 module_param(c4iw_max_read_depth, int, 0644); 86 MODULE_PARM_DESC(c4iw_max_read_depth, 87 "Per-connection max ORD/IRD (default=32)"); 88 89 static int enable_tcp_timestamps; 90 module_param(enable_tcp_timestamps, int, 0644); 91 MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)"); 92 93 static int enable_tcp_sack; 94 module_param(enable_tcp_sack, int, 0644); 95 MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)"); 96 97 static int enable_tcp_window_scaling = 1; 98 module_param(enable_tcp_window_scaling, int, 0644); 99 MODULE_PARM_DESC(enable_tcp_window_scaling, 100 "Enable tcp window scaling (default=1)"); 101 102 static int peer2peer = 1; 103 module_param(peer2peer, int, 0644); 104 MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); 105 106 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; 107 module_param(p2p_type, int, 0644); 108 MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: " 109 "1=RDMA_READ 0=RDMA_WRITE (default 1)"); 110 111 static int ep_timeout_secs = 60; 112 module_param(ep_timeout_secs, int, 0644); 113 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " 114 "in seconds (default=60)"); 115 116 static int mpa_rev = 2; 117 module_param(mpa_rev, int, 0644); 118 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " 119 "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft" 120 " compliant (default=2)"); 121 122 static int markers_enabled; 123 module_param(markers_enabled, int, 0644); 124 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); 125 126 static int crc_enabled = 1; 127 module_param(crc_enabled, int, 0644); 128 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); 129 130 static int rcv_win = 256 * 1024; 131 module_param(rcv_win, int, 0644); 132 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)"); 133 134 static int snd_win = 128 * 1024; 135 module_param(snd_win, int, 0644); 136 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)"); 137 138 static struct workqueue_struct *workq; 139 140 static struct sk_buff_head rxq; 141 142 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); 143 static void ep_timeout(struct timer_list *t); 144 static void connect_reply_upcall(struct c4iw_ep *ep, int status); 145 static int sched(struct c4iw_dev *dev, struct sk_buff *skb); 146 147 static LIST_HEAD(timeout_list); 148 static spinlock_t timeout_lock; 149 150 static void deref_cm_id(struct c4iw_ep_common *epc) 151 { 152 epc->cm_id->rem_ref(epc->cm_id); 153 epc->cm_id = NULL; 154 set_bit(CM_ID_DEREFED, &epc->history); 155 } 156 157 static void ref_cm_id(struct c4iw_ep_common *epc) 158 { 159 set_bit(CM_ID_REFED, &epc->history); 160 epc->cm_id->add_ref(epc->cm_id); 161 } 162 163 static void deref_qp(struct c4iw_ep *ep) 164 { 165 c4iw_qp_rem_ref(&ep->com.qp->ibqp); 166 clear_bit(QP_REFERENCED, &ep->com.flags); 167 set_bit(QP_DEREFED, &ep->com.history); 168 } 169 170 static void ref_qp(struct c4iw_ep *ep) 171 { 172 set_bit(QP_REFERENCED, &ep->com.flags); 173 set_bit(QP_REFED, &ep->com.history); 174 c4iw_qp_add_ref(&ep->com.qp->ibqp); 175 } 176 177 static void start_ep_timer(struct c4iw_ep *ep) 178 { 179 pr_debug("ep %p\n", ep); 180 if (timer_pending(&ep->timer)) { 181 pr_err("%s timer already started! ep %p\n", 182 __func__, ep); 183 return; 184 } 185 clear_bit(TIMEOUT, &ep->com.flags); 186 c4iw_get_ep(&ep->com); 187 ep->timer.expires = jiffies + ep_timeout_secs * HZ; 188 add_timer(&ep->timer); 189 } 190 191 static int stop_ep_timer(struct c4iw_ep *ep) 192 { 193 pr_debug("ep %p stopping\n", ep); 194 del_timer_sync(&ep->timer); 195 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 196 c4iw_put_ep(&ep->com); 197 return 0; 198 } 199 return 1; 200 } 201 202 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, 203 struct l2t_entry *l2e) 204 { 205 int error = 0; 206 207 if (c4iw_fatal_error(rdev)) { 208 kfree_skb(skb); 209 pr_err("%s - device in error state - dropping\n", __func__); 210 return -EIO; 211 } 212 error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); 213 if (error < 0) 214 kfree_skb(skb); 215 else if (error == NET_XMIT_DROP) 216 return -ENOMEM; 217 return error < 0 ? error : 0; 218 } 219 220 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) 221 { 222 int error = 0; 223 224 if (c4iw_fatal_error(rdev)) { 225 kfree_skb(skb); 226 pr_err("%s - device in error state - dropping\n", __func__); 227 return -EIO; 228 } 229 error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); 230 if (error < 0) 231 kfree_skb(skb); 232 return error < 0 ? error : 0; 233 } 234 235 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) 236 { 237 u32 len = roundup(sizeof(struct cpl_tid_release), 16); 238 239 skb = get_skb(skb, len, GFP_KERNEL); 240 if (!skb) 241 return; 242 243 cxgb_mk_tid_release(skb, len, hwtid, 0); 244 c4iw_ofld_send(rdev, skb); 245 return; 246 } 247 248 static void set_emss(struct c4iw_ep *ep, u16 opt) 249 { 250 ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] - 251 ((AF_INET == ep->com.remote_addr.ss_family) ? 252 sizeof(struct iphdr) : sizeof(struct ipv6hdr)) - 253 sizeof(struct tcphdr); 254 ep->mss = ep->emss; 255 if (TCPOPT_TSTAMP_G(opt)) 256 ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4); 257 if (ep->emss < 128) 258 ep->emss = 128; 259 if (ep->emss & 7) 260 pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n", 261 TCPOPT_MSS_G(opt), ep->mss, ep->emss); 262 pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss, 263 ep->emss); 264 } 265 266 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) 267 { 268 enum c4iw_ep_state state; 269 270 mutex_lock(&epc->mutex); 271 state = epc->state; 272 mutex_unlock(&epc->mutex); 273 return state; 274 } 275 276 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 277 { 278 epc->state = new; 279 } 280 281 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 282 { 283 mutex_lock(&epc->mutex); 284 pr_debug("%s -> %s\n", states[epc->state], states[new]); 285 __state_set(epc, new); 286 mutex_unlock(&epc->mutex); 287 return; 288 } 289 290 static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size) 291 { 292 struct sk_buff *skb; 293 unsigned int i; 294 size_t len; 295 296 len = roundup(sizeof(union cpl_wr_size), 16); 297 for (i = 0; i < size; i++) { 298 skb = alloc_skb(len, GFP_KERNEL); 299 if (!skb) 300 goto fail; 301 skb_queue_tail(ep_skb_list, skb); 302 } 303 return 0; 304 fail: 305 skb_queue_purge(ep_skb_list); 306 return -ENOMEM; 307 } 308 309 static void *alloc_ep(int size, gfp_t gfp) 310 { 311 struct c4iw_ep_common *epc; 312 313 epc = kzalloc(size, gfp); 314 if (epc) { 315 epc->wr_waitp = c4iw_alloc_wr_wait(gfp); 316 if (!epc->wr_waitp) { 317 kfree(epc); 318 epc = NULL; 319 goto out; 320 } 321 kref_init(&epc->kref); 322 mutex_init(&epc->mutex); 323 c4iw_init_wr_wait(epc->wr_waitp); 324 } 325 pr_debug("alloc ep %p\n", epc); 326 out: 327 return epc; 328 } 329 330 static void remove_ep_tid(struct c4iw_ep *ep) 331 { 332 unsigned long flags; 333 334 xa_lock_irqsave(&ep->com.dev->hwtids, flags); 335 __xa_erase(&ep->com.dev->hwtids, ep->hwtid); 336 if (xa_empty(&ep->com.dev->hwtids)) 337 wake_up(&ep->com.dev->wait); 338 xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); 339 } 340 341 static int insert_ep_tid(struct c4iw_ep *ep) 342 { 343 unsigned long flags; 344 int err; 345 346 xa_lock_irqsave(&ep->com.dev->hwtids, flags); 347 err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL); 348 xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); 349 350 return err; 351 } 352 353 /* 354 * Atomically lookup the ep ptr given the tid and grab a reference on the ep. 355 */ 356 static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) 357 { 358 struct c4iw_ep *ep; 359 unsigned long flags; 360 361 xa_lock_irqsave(&dev->hwtids, flags); 362 ep = xa_load(&dev->hwtids, tid); 363 if (ep) 364 c4iw_get_ep(&ep->com); 365 xa_unlock_irqrestore(&dev->hwtids, flags); 366 return ep; 367 } 368 369 /* 370 * Atomically lookup the ep ptr given the stid and grab a reference on the ep. 371 */ 372 static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, 373 unsigned int stid) 374 { 375 struct c4iw_listen_ep *ep; 376 unsigned long flags; 377 378 xa_lock_irqsave(&dev->stids, flags); 379 ep = xa_load(&dev->stids, stid); 380 if (ep) 381 c4iw_get_ep(&ep->com); 382 xa_unlock_irqrestore(&dev->stids, flags); 383 return ep; 384 } 385 386 void _c4iw_free_ep(struct kref *kref) 387 { 388 struct c4iw_ep *ep; 389 390 ep = container_of(kref, struct c4iw_ep, com.kref); 391 pr_debug("ep %p state %s\n", ep, states[ep->com.state]); 392 if (test_bit(QP_REFERENCED, &ep->com.flags)) 393 deref_qp(ep); 394 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { 395 if (ep->com.remote_addr.ss_family == AF_INET6) { 396 struct sockaddr_in6 *sin6 = 397 (struct sockaddr_in6 *) 398 &ep->com.local_addr; 399 400 cxgb4_clip_release( 401 ep->com.dev->rdev.lldi.ports[0], 402 (const u32 *)&sin6->sin6_addr.s6_addr, 403 1); 404 } 405 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, 406 ep->com.local_addr.ss_family); 407 dst_release(ep->dst); 408 cxgb4_l2t_release(ep->l2t); 409 kfree_skb(ep->mpa_skb); 410 } 411 if (!skb_queue_empty(&ep->com.ep_skb_list)) 412 skb_queue_purge(&ep->com.ep_skb_list); 413 c4iw_put_wr_wait(ep->com.wr_waitp); 414 kfree(ep); 415 } 416 417 static void release_ep_resources(struct c4iw_ep *ep) 418 { 419 set_bit(RELEASE_RESOURCES, &ep->com.flags); 420 421 /* 422 * If we have a hwtid, then remove it from the idr table 423 * so lookups will no longer find this endpoint. Otherwise 424 * we have a race where one thread finds the ep ptr just 425 * before the other thread is freeing the ep memory. 426 */ 427 if (ep->hwtid != -1) 428 remove_ep_tid(ep); 429 c4iw_put_ep(&ep->com); 430 } 431 432 static int status2errno(int status) 433 { 434 switch (status) { 435 case CPL_ERR_NONE: 436 return 0; 437 case CPL_ERR_CONN_RESET: 438 return -ECONNRESET; 439 case CPL_ERR_ARP_MISS: 440 return -EHOSTUNREACH; 441 case CPL_ERR_CONN_TIMEDOUT: 442 return -ETIMEDOUT; 443 case CPL_ERR_TCAM_FULL: 444 return -ENOMEM; 445 case CPL_ERR_CONN_EXIST: 446 return -EADDRINUSE; 447 default: 448 return -EIO; 449 } 450 } 451 452 /* 453 * Try and reuse skbs already allocated... 454 */ 455 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) 456 { 457 if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { 458 skb_trim(skb, 0); 459 skb_get(skb); 460 skb_reset_transport_header(skb); 461 } else { 462 skb = alloc_skb(len, gfp); 463 if (!skb) 464 return NULL; 465 } 466 t4_set_arp_err_handler(skb, NULL, NULL); 467 return skb; 468 } 469 470 static struct net_device *get_real_dev(struct net_device *egress_dev) 471 { 472 return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; 473 } 474 475 static void arp_failure_discard(void *handle, struct sk_buff *skb) 476 { 477 pr_err("ARP failure\n"); 478 kfree_skb(skb); 479 } 480 481 static void mpa_start_arp_failure(void *handle, struct sk_buff *skb) 482 { 483 pr_err("ARP failure during MPA Negotiation - Closing Connection\n"); 484 } 485 486 enum { 487 NUM_FAKE_CPLS = 2, 488 FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0, 489 FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1, 490 }; 491 492 static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) 493 { 494 struct c4iw_ep *ep; 495 496 ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); 497 release_ep_resources(ep); 498 kfree_skb(skb); 499 return 0; 500 } 501 502 static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) 503 { 504 struct c4iw_ep *ep; 505 506 ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); 507 c4iw_put_ep(&ep->parent_ep->com); 508 release_ep_resources(ep); 509 kfree_skb(skb); 510 return 0; 511 } 512 513 /* 514 * Fake up a special CPL opcode and call sched() so process_work() will call 515 * _put_ep_safe() in a safe context to free the ep resources. This is needed 516 * because ARP error handlers are called in an ATOMIC context, and 517 * _c4iw_free_ep() needs to block. 518 */ 519 static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb, 520 int cpl) 521 { 522 struct cpl_act_establish *rpl = cplhdr(skb); 523 524 /* Set our special ARP_FAILURE opcode */ 525 rpl->ot.opcode = cpl; 526 527 /* 528 * Save ep in the skb->cb area, after where sched() will save the dev 529 * ptr. 530 */ 531 *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep; 532 sched(ep->com.dev, skb); 533 } 534 535 /* Handle an ARP failure for an accept */ 536 static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb) 537 { 538 struct c4iw_ep *ep = handle; 539 540 pr_err("ARP failure during accept - tid %u - dropping connection\n", 541 ep->hwtid); 542 543 __state_set(&ep->com, DEAD); 544 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE); 545 } 546 547 /* 548 * Handle an ARP failure for an active open. 549 */ 550 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) 551 { 552 struct c4iw_ep *ep = handle; 553 554 pr_err("ARP failure during connect\n"); 555 connect_reply_upcall(ep, -EHOSTUNREACH); 556 __state_set(&ep->com, DEAD); 557 if (ep->com.remote_addr.ss_family == AF_INET6) { 558 struct sockaddr_in6 *sin6 = 559 (struct sockaddr_in6 *)&ep->com.local_addr; 560 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 561 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 562 } 563 xa_erase_irq(&ep->com.dev->atids, ep->atid); 564 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 565 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); 566 } 567 568 /* 569 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 570 * and send it along. 571 */ 572 static void abort_arp_failure(void *handle, struct sk_buff *skb) 573 { 574 int ret; 575 struct c4iw_ep *ep = handle; 576 struct c4iw_rdev *rdev = &ep->com.dev->rdev; 577 struct cpl_abort_req *req = cplhdr(skb); 578 579 pr_debug("rdev %p\n", rdev); 580 req->cmd = CPL_ABORT_NO_RST; 581 skb_get(skb); 582 ret = c4iw_ofld_send(rdev, skb); 583 if (ret) { 584 __state_set(&ep->com, DEAD); 585 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); 586 } else 587 kfree_skb(skb); 588 } 589 590 static int send_flowc(struct c4iw_ep *ep) 591 { 592 struct fw_flowc_wr *flowc; 593 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); 594 u16 vlan = ep->l2t->vlan; 595 int nparams; 596 int flowclen, flowclen16; 597 598 if (WARN_ON(!skb)) 599 return -ENOMEM; 600 601 if (vlan == CPL_L2T_VLAN_NONE) 602 nparams = 9; 603 else 604 nparams = 10; 605 606 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); 607 flowclen16 = DIV_ROUND_UP(flowclen, 16); 608 flowclen = flowclen16 * 16; 609 610 flowc = __skb_put(skb, flowclen); 611 memset(flowc, 0, flowclen); 612 613 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 614 FW_FLOWC_WR_NPARAMS_V(nparams)); 615 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | 616 FW_WR_FLOWID_V(ep->hwtid)); 617 618 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 619 flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V 620 (ep->com.dev->rdev.lldi.pf)); 621 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 622 flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan); 623 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 624 flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan); 625 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 626 flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid); 627 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; 628 flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq); 629 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 630 flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); 631 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 632 flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); 633 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 634 flowc->mnemval[7].val = cpu_to_be32(ep->emss); 635 flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; 636 flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale); 637 if (nparams == 10) { 638 u16 pri; 639 pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 640 flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 641 flowc->mnemval[9].val = cpu_to_be32(pri); 642 } 643 644 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 645 return c4iw_ofld_send(&ep->com.dev->rdev, skb); 646 } 647 648 static int send_halfclose(struct c4iw_ep *ep) 649 { 650 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); 651 u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); 652 653 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 654 if (WARN_ON(!skb)) 655 return -ENOMEM; 656 657 cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, 658 NULL, arp_failure_discard); 659 660 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 661 } 662 663 static void read_tcb(struct c4iw_ep *ep) 664 { 665 struct sk_buff *skb; 666 struct cpl_get_tcb *req; 667 int wrlen = roundup(sizeof(*req), 16); 668 669 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); 670 if (WARN_ON(!skb)) 671 return; 672 673 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); 674 req = (struct cpl_get_tcb *) skb_put(skb, wrlen); 675 memset(req, 0, wrlen); 676 INIT_TP_WR(req, ep->hwtid); 677 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid)); 678 req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid)); 679 680 /* 681 * keep a ref on the ep so the tcb is not unlocked before this 682 * cpl completes. The ref is released in read_tcb_rpl(). 683 */ 684 c4iw_get_ep(&ep->com); 685 if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb))) 686 c4iw_put_ep(&ep->com); 687 } 688 689 static int send_abort_req(struct c4iw_ep *ep) 690 { 691 u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); 692 struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); 693 694 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 695 if (WARN_ON(!req_skb)) 696 return -ENOMEM; 697 698 cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, 699 ep, abort_arp_failure); 700 701 return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); 702 } 703 704 static int send_abort(struct c4iw_ep *ep) 705 { 706 if (!ep->com.qp || !ep->com.qp->srq) { 707 send_abort_req(ep); 708 return 0; 709 } 710 set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags); 711 read_tcb(ep); 712 return 0; 713 } 714 715 static int send_connect(struct c4iw_ep *ep) 716 { 717 struct cpl_act_open_req *req = NULL; 718 struct cpl_t5_act_open_req *t5req = NULL; 719 struct cpl_t6_act_open_req *t6req = NULL; 720 struct cpl_act_open_req6 *req6 = NULL; 721 struct cpl_t5_act_open_req6 *t5req6 = NULL; 722 struct cpl_t6_act_open_req6 *t6req6 = NULL; 723 struct sk_buff *skb; 724 u64 opt0; 725 u32 opt2; 726 unsigned int mtu_idx; 727 u32 wscale; 728 int win, sizev4, sizev6, wrlen; 729 struct sockaddr_in *la = (struct sockaddr_in *) 730 &ep->com.local_addr; 731 struct sockaddr_in *ra = (struct sockaddr_in *) 732 &ep->com.remote_addr; 733 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) 734 &ep->com.local_addr; 735 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) 736 &ep->com.remote_addr; 737 int ret; 738 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; 739 u32 isn = (prandom_u32() & ~7UL) - 1; 740 struct net_device *netdev; 741 u64 params; 742 743 netdev = ep->com.dev->rdev.lldi.ports[0]; 744 745 switch (CHELSIO_CHIP_VERSION(adapter_type)) { 746 case CHELSIO_T4: 747 sizev4 = sizeof(struct cpl_act_open_req); 748 sizev6 = sizeof(struct cpl_act_open_req6); 749 break; 750 case CHELSIO_T5: 751 sizev4 = sizeof(struct cpl_t5_act_open_req); 752 sizev6 = sizeof(struct cpl_t5_act_open_req6); 753 break; 754 case CHELSIO_T6: 755 sizev4 = sizeof(struct cpl_t6_act_open_req); 756 sizev6 = sizeof(struct cpl_t6_act_open_req6); 757 break; 758 default: 759 pr_err("T%d Chip is not supported\n", 760 CHELSIO_CHIP_VERSION(adapter_type)); 761 return -EINVAL; 762 } 763 764 wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? 765 roundup(sizev4, 16) : 766 roundup(sizev6, 16); 767 768 pr_debug("ep %p atid %u\n", ep, ep->atid); 769 770 skb = get_skb(NULL, wrlen, GFP_KERNEL); 771 if (!skb) { 772 pr_err("%s - failed to alloc skb\n", __func__); 773 return -ENOMEM; 774 } 775 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 776 777 cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 778 enable_tcp_timestamps, 779 (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); 780 wscale = cxgb_compute_wscale(rcv_win); 781 782 /* 783 * Specify the largest window that will fit in opt0. The 784 * remainder will be specified in the rx_data_ack. 785 */ 786 win = ep->rcv_win >> 10; 787 if (win > RCV_BUFSIZ_M) 788 win = RCV_BUFSIZ_M; 789 790 opt0 = (nocong ? NO_CONG_F : 0) | 791 KEEP_ALIVE_F | 792 DELACK_F | 793 WND_SCALE_V(wscale) | 794 MSS_IDX_V(mtu_idx) | 795 L2T_IDX_V(ep->l2t->idx) | 796 TX_CHAN_V(ep->tx_chan) | 797 SMAC_SEL_V(ep->smac_idx) | 798 DSCP_V(ep->tos >> 2) | 799 ULP_MODE_V(ULP_MODE_TCPDDP) | 800 RCV_BUFSIZ_V(win); 801 opt2 = RX_CHANNEL_V(0) | 802 CCTRL_ECN_V(enable_ecn) | 803 RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); 804 if (enable_tcp_timestamps) 805 opt2 |= TSTAMPS_EN_F; 806 if (enable_tcp_sack) 807 opt2 |= SACK_EN_F; 808 if (wscale && enable_tcp_window_scaling) 809 opt2 |= WND_SCALE_EN_F; 810 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { 811 if (peer2peer) 812 isn += 4; 813 814 opt2 |= T5_OPT_2_VALID_F; 815 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); 816 opt2 |= T5_ISS_F; 817 } 818 819 params = cxgb4_select_ntuple(netdev, ep->l2t); 820 821 if (ep->com.remote_addr.ss_family == AF_INET6) 822 cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], 823 (const u32 *)&la6->sin6_addr.s6_addr, 1); 824 825 t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); 826 827 if (ep->com.remote_addr.ss_family == AF_INET) { 828 switch (CHELSIO_CHIP_VERSION(adapter_type)) { 829 case CHELSIO_T4: 830 req = skb_put(skb, wrlen); 831 INIT_TP_WR(req, 0); 832 break; 833 case CHELSIO_T5: 834 t5req = skb_put(skb, wrlen); 835 INIT_TP_WR(t5req, 0); 836 req = (struct cpl_act_open_req *)t5req; 837 break; 838 case CHELSIO_T6: 839 t6req = skb_put(skb, wrlen); 840 INIT_TP_WR(t6req, 0); 841 req = (struct cpl_act_open_req *)t6req; 842 t5req = (struct cpl_t5_act_open_req *)t6req; 843 break; 844 default: 845 pr_err("T%d Chip is not supported\n", 846 CHELSIO_CHIP_VERSION(adapter_type)); 847 ret = -EINVAL; 848 goto clip_release; 849 } 850 851 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, 852 ((ep->rss_qid<<14) | ep->atid))); 853 req->local_port = la->sin_port; 854 req->peer_port = ra->sin_port; 855 req->local_ip = la->sin_addr.s_addr; 856 req->peer_ip = ra->sin_addr.s_addr; 857 req->opt0 = cpu_to_be64(opt0); 858 859 if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { 860 req->params = cpu_to_be32(params); 861 req->opt2 = cpu_to_be32(opt2); 862 } else { 863 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 864 t5req->params = 865 cpu_to_be64(FILTER_TUPLE_V(params)); 866 t5req->rsvd = cpu_to_be32(isn); 867 pr_debug("snd_isn %u\n", t5req->rsvd); 868 t5req->opt2 = cpu_to_be32(opt2); 869 } else { 870 t6req->params = 871 cpu_to_be64(FILTER_TUPLE_V(params)); 872 t6req->rsvd = cpu_to_be32(isn); 873 pr_debug("snd_isn %u\n", t6req->rsvd); 874 t6req->opt2 = cpu_to_be32(opt2); 875 } 876 } 877 } else { 878 switch (CHELSIO_CHIP_VERSION(adapter_type)) { 879 case CHELSIO_T4: 880 req6 = skb_put(skb, wrlen); 881 INIT_TP_WR(req6, 0); 882 break; 883 case CHELSIO_T5: 884 t5req6 = skb_put(skb, wrlen); 885 INIT_TP_WR(t5req6, 0); 886 req6 = (struct cpl_act_open_req6 *)t5req6; 887 break; 888 case CHELSIO_T6: 889 t6req6 = skb_put(skb, wrlen); 890 INIT_TP_WR(t6req6, 0); 891 req6 = (struct cpl_act_open_req6 *)t6req6; 892 t5req6 = (struct cpl_t5_act_open_req6 *)t6req6; 893 break; 894 default: 895 pr_err("T%d Chip is not supported\n", 896 CHELSIO_CHIP_VERSION(adapter_type)); 897 ret = -EINVAL; 898 goto clip_release; 899 } 900 901 OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, 902 ((ep->rss_qid<<14)|ep->atid))); 903 req6->local_port = la6->sin6_port; 904 req6->peer_port = ra6->sin6_port; 905 req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr)); 906 req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8)); 907 req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr)); 908 req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8)); 909 req6->opt0 = cpu_to_be64(opt0); 910 911 if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { 912 req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev, 913 ep->l2t)); 914 req6->opt2 = cpu_to_be32(opt2); 915 } else { 916 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 917 t5req6->params = 918 cpu_to_be64(FILTER_TUPLE_V(params)); 919 t5req6->rsvd = cpu_to_be32(isn); 920 pr_debug("snd_isn %u\n", t5req6->rsvd); 921 t5req6->opt2 = cpu_to_be32(opt2); 922 } else { 923 t6req6->params = 924 cpu_to_be64(FILTER_TUPLE_V(params)); 925 t6req6->rsvd = cpu_to_be32(isn); 926 pr_debug("snd_isn %u\n", t6req6->rsvd); 927 t6req6->opt2 = cpu_to_be32(opt2); 928 } 929 930 } 931 } 932 933 set_bit(ACT_OPEN_REQ, &ep->com.history); 934 ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 935 clip_release: 936 if (ret && ep->com.remote_addr.ss_family == AF_INET6) 937 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 938 (const u32 *)&la6->sin6_addr.s6_addr, 1); 939 return ret; 940 } 941 942 static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, 943 u8 mpa_rev_to_use) 944 { 945 int mpalen, wrlen, ret; 946 struct fw_ofld_tx_data_wr *req; 947 struct mpa_message *mpa; 948 struct mpa_v2_conn_params mpa_v2_params; 949 950 pr_debug("ep %p tid %u pd_len %d\n", 951 ep, ep->hwtid, ep->plen); 952 953 mpalen = sizeof(*mpa) + ep->plen; 954 if (mpa_rev_to_use == 2) 955 mpalen += sizeof(struct mpa_v2_conn_params); 956 wrlen = roundup(mpalen + sizeof *req, 16); 957 skb = get_skb(skb, wrlen, GFP_KERNEL); 958 if (!skb) { 959 connect_reply_upcall(ep, -ENOMEM); 960 return -ENOMEM; 961 } 962 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 963 964 req = skb_put_zero(skb, wrlen); 965 req->op_to_immdlen = cpu_to_be32( 966 FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | 967 FW_WR_COMPL_F | 968 FW_WR_IMMDLEN_V(mpalen)); 969 req->flowid_len16 = cpu_to_be32( 970 FW_WR_FLOWID_V(ep->hwtid) | 971 FW_WR_LEN16_V(wrlen >> 4)); 972 req->plen = cpu_to_be32(mpalen); 973 req->tunnel_to_proxy = cpu_to_be32( 974 FW_OFLD_TX_DATA_WR_FLUSH_F | 975 FW_OFLD_TX_DATA_WR_SHOVE_F); 976 977 mpa = (struct mpa_message *)(req + 1); 978 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 979 980 mpa->flags = 0; 981 if (crc_enabled) 982 mpa->flags |= MPA_CRC; 983 if (markers_enabled) { 984 mpa->flags |= MPA_MARKERS; 985 ep->mpa_attr.recv_marker_enabled = 1; 986 } else { 987 ep->mpa_attr.recv_marker_enabled = 0; 988 } 989 if (mpa_rev_to_use == 2) 990 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 991 992 mpa->private_data_size = htons(ep->plen); 993 mpa->revision = mpa_rev_to_use; 994 if (mpa_rev_to_use == 1) { 995 ep->tried_with_mpa_v1 = 1; 996 ep->retry_with_mpa_v1 = 0; 997 } 998 999 if (mpa_rev_to_use == 2) { 1000 mpa->private_data_size = htons(ntohs(mpa->private_data_size) + 1001 sizeof (struct mpa_v2_conn_params)); 1002 pr_debug("initiator ird %u ord %u\n", ep->ird, 1003 ep->ord); 1004 mpa_v2_params.ird = htons((u16)ep->ird); 1005 mpa_v2_params.ord = htons((u16)ep->ord); 1006 1007 if (peer2peer) { 1008 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 1009 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) 1010 mpa_v2_params.ord |= 1011 htons(MPA_V2_RDMA_WRITE_RTR); 1012 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) 1013 mpa_v2_params.ord |= 1014 htons(MPA_V2_RDMA_READ_RTR); 1015 } 1016 memcpy(mpa->private_data, &mpa_v2_params, 1017 sizeof(struct mpa_v2_conn_params)); 1018 1019 if (ep->plen) 1020 memcpy(mpa->private_data + 1021 sizeof(struct mpa_v2_conn_params), 1022 ep->mpa_pkt + sizeof(*mpa), ep->plen); 1023 } else 1024 if (ep->plen) 1025 memcpy(mpa->private_data, 1026 ep->mpa_pkt + sizeof(*mpa), ep->plen); 1027 1028 /* 1029 * Reference the mpa skb. This ensures the data area 1030 * will remain in memory until the hw acks the tx. 1031 * Function fw4_ack() will deref it. 1032 */ 1033 skb_get(skb); 1034 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 1035 ep->mpa_skb = skb; 1036 ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1037 if (ret) 1038 return ret; 1039 start_ep_timer(ep); 1040 __state_set(&ep->com, MPA_REQ_SENT); 1041 ep->mpa_attr.initiator = 1; 1042 ep->snd_seq += mpalen; 1043 return ret; 1044 } 1045 1046 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) 1047 { 1048 int mpalen, wrlen; 1049 struct fw_ofld_tx_data_wr *req; 1050 struct mpa_message *mpa; 1051 struct sk_buff *skb; 1052 struct mpa_v2_conn_params mpa_v2_params; 1053 1054 pr_debug("ep %p tid %u pd_len %d\n", 1055 ep, ep->hwtid, ep->plen); 1056 1057 mpalen = sizeof(*mpa) + plen; 1058 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) 1059 mpalen += sizeof(struct mpa_v2_conn_params); 1060 wrlen = roundup(mpalen + sizeof *req, 16); 1061 1062 skb = get_skb(NULL, wrlen, GFP_KERNEL); 1063 if (!skb) { 1064 pr_err("%s - cannot alloc skb!\n", __func__); 1065 return -ENOMEM; 1066 } 1067 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1068 1069 req = skb_put_zero(skb, wrlen); 1070 req->op_to_immdlen = cpu_to_be32( 1071 FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | 1072 FW_WR_COMPL_F | 1073 FW_WR_IMMDLEN_V(mpalen)); 1074 req->flowid_len16 = cpu_to_be32( 1075 FW_WR_FLOWID_V(ep->hwtid) | 1076 FW_WR_LEN16_V(wrlen >> 4)); 1077 req->plen = cpu_to_be32(mpalen); 1078 req->tunnel_to_proxy = cpu_to_be32( 1079 FW_OFLD_TX_DATA_WR_FLUSH_F | 1080 FW_OFLD_TX_DATA_WR_SHOVE_F); 1081 1082 mpa = (struct mpa_message *)(req + 1); 1083 memset(mpa, 0, sizeof(*mpa)); 1084 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1085 mpa->flags = MPA_REJECT; 1086 mpa->revision = ep->mpa_attr.version; 1087 mpa->private_data_size = htons(plen); 1088 1089 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1090 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1091 mpa->private_data_size = htons(ntohs(mpa->private_data_size) + 1092 sizeof (struct mpa_v2_conn_params)); 1093 mpa_v2_params.ird = htons(((u16)ep->ird) | 1094 (peer2peer ? MPA_V2_PEER2PEER_MODEL : 1095 0)); 1096 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? 1097 (p2p_type == 1098 FW_RI_INIT_P2PTYPE_RDMA_WRITE ? 1099 MPA_V2_RDMA_WRITE_RTR : p2p_type == 1100 FW_RI_INIT_P2PTYPE_READ_REQ ? 1101 MPA_V2_RDMA_READ_RTR : 0) : 0)); 1102 memcpy(mpa->private_data, &mpa_v2_params, 1103 sizeof(struct mpa_v2_conn_params)); 1104 1105 if (ep->plen) 1106 memcpy(mpa->private_data + 1107 sizeof(struct mpa_v2_conn_params), pdata, plen); 1108 } else 1109 if (plen) 1110 memcpy(mpa->private_data, pdata, plen); 1111 1112 /* 1113 * Reference the mpa skb again. This ensures the data area 1114 * will remain in memory until the hw acks the tx. 1115 * Function fw4_ack() will deref it. 1116 */ 1117 skb_get(skb); 1118 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1119 t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); 1120 ep->mpa_skb = skb; 1121 ep->snd_seq += mpalen; 1122 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1123 } 1124 1125 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) 1126 { 1127 int mpalen, wrlen; 1128 struct fw_ofld_tx_data_wr *req; 1129 struct mpa_message *mpa; 1130 struct sk_buff *skb; 1131 struct mpa_v2_conn_params mpa_v2_params; 1132 1133 pr_debug("ep %p tid %u pd_len %d\n", 1134 ep, ep->hwtid, ep->plen); 1135 1136 mpalen = sizeof(*mpa) + plen; 1137 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) 1138 mpalen += sizeof(struct mpa_v2_conn_params); 1139 wrlen = roundup(mpalen + sizeof *req, 16); 1140 1141 skb = get_skb(NULL, wrlen, GFP_KERNEL); 1142 if (!skb) { 1143 pr_err("%s - cannot alloc skb!\n", __func__); 1144 return -ENOMEM; 1145 } 1146 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1147 1148 req = skb_put_zero(skb, wrlen); 1149 req->op_to_immdlen = cpu_to_be32( 1150 FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | 1151 FW_WR_COMPL_F | 1152 FW_WR_IMMDLEN_V(mpalen)); 1153 req->flowid_len16 = cpu_to_be32( 1154 FW_WR_FLOWID_V(ep->hwtid) | 1155 FW_WR_LEN16_V(wrlen >> 4)); 1156 req->plen = cpu_to_be32(mpalen); 1157 req->tunnel_to_proxy = cpu_to_be32( 1158 FW_OFLD_TX_DATA_WR_FLUSH_F | 1159 FW_OFLD_TX_DATA_WR_SHOVE_F); 1160 1161 mpa = (struct mpa_message *)(req + 1); 1162 memset(mpa, 0, sizeof(*mpa)); 1163 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1164 mpa->flags = 0; 1165 if (ep->mpa_attr.crc_enabled) 1166 mpa->flags |= MPA_CRC; 1167 if (ep->mpa_attr.recv_marker_enabled) 1168 mpa->flags |= MPA_MARKERS; 1169 mpa->revision = ep->mpa_attr.version; 1170 mpa->private_data_size = htons(plen); 1171 1172 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1173 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1174 mpa->private_data_size = htons(ntohs(mpa->private_data_size) + 1175 sizeof (struct mpa_v2_conn_params)); 1176 mpa_v2_params.ird = htons((u16)ep->ird); 1177 mpa_v2_params.ord = htons((u16)ep->ord); 1178 if (peer2peer && (ep->mpa_attr.p2p_type != 1179 FW_RI_INIT_P2PTYPE_DISABLED)) { 1180 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 1181 1182 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) 1183 mpa_v2_params.ord |= 1184 htons(MPA_V2_RDMA_WRITE_RTR); 1185 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) 1186 mpa_v2_params.ord |= 1187 htons(MPA_V2_RDMA_READ_RTR); 1188 } 1189 1190 memcpy(mpa->private_data, &mpa_v2_params, 1191 sizeof(struct mpa_v2_conn_params)); 1192 1193 if (ep->plen) 1194 memcpy(mpa->private_data + 1195 sizeof(struct mpa_v2_conn_params), pdata, plen); 1196 } else 1197 if (plen) 1198 memcpy(mpa->private_data, pdata, plen); 1199 1200 /* 1201 * Reference the mpa skb. This ensures the data area 1202 * will remain in memory until the hw acks the tx. 1203 * Function fw4_ack() will deref it. 1204 */ 1205 skb_get(skb); 1206 t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); 1207 ep->mpa_skb = skb; 1208 __state_set(&ep->com, MPA_REP_SENT); 1209 ep->snd_seq += mpalen; 1210 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1211 } 1212 1213 static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) 1214 { 1215 struct c4iw_ep *ep; 1216 struct cpl_act_establish *req = cplhdr(skb); 1217 unsigned short tcp_opt = ntohs(req->tcp_opt); 1218 unsigned int tid = GET_TID(req); 1219 unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); 1220 struct tid_info *t = dev->rdev.lldi.tids; 1221 int ret; 1222 1223 ep = lookup_atid(t, atid); 1224 1225 pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid, 1226 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); 1227 1228 mutex_lock(&ep->com.mutex); 1229 dst_confirm(ep->dst); 1230 1231 /* setup the hwtid for this connection */ 1232 ep->hwtid = tid; 1233 cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family); 1234 insert_ep_tid(ep); 1235 1236 ep->snd_seq = be32_to_cpu(req->snd_isn); 1237 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 1238 ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); 1239 1240 set_emss(ep, tcp_opt); 1241 1242 /* dealloc the atid */ 1243 xa_erase_irq(&ep->com.dev->atids, atid); 1244 cxgb4_free_atid(t, atid); 1245 set_bit(ACT_ESTAB, &ep->com.history); 1246 1247 /* start MPA negotiation */ 1248 ret = send_flowc(ep); 1249 if (ret) 1250 goto err; 1251 if (ep->retry_with_mpa_v1) 1252 ret = send_mpa_req(ep, skb, 1); 1253 else 1254 ret = send_mpa_req(ep, skb, mpa_rev); 1255 if (ret) 1256 goto err; 1257 mutex_unlock(&ep->com.mutex); 1258 return 0; 1259 err: 1260 mutex_unlock(&ep->com.mutex); 1261 connect_reply_upcall(ep, -ENOMEM); 1262 c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 1263 return 0; 1264 } 1265 1266 static void close_complete_upcall(struct c4iw_ep *ep, int status) 1267 { 1268 struct iw_cm_event event; 1269 1270 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1271 memset(&event, 0, sizeof(event)); 1272 event.event = IW_CM_EVENT_CLOSE; 1273 event.status = status; 1274 if (ep->com.cm_id) { 1275 pr_debug("close complete delivered ep %p cm_id %p tid %u\n", 1276 ep, ep->com.cm_id, ep->hwtid); 1277 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1278 deref_cm_id(&ep->com); 1279 set_bit(CLOSE_UPCALL, &ep->com.history); 1280 } 1281 } 1282 1283 static void peer_close_upcall(struct c4iw_ep *ep) 1284 { 1285 struct iw_cm_event event; 1286 1287 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1288 memset(&event, 0, sizeof(event)); 1289 event.event = IW_CM_EVENT_DISCONNECT; 1290 if (ep->com.cm_id) { 1291 pr_debug("peer close delivered ep %p cm_id %p tid %u\n", 1292 ep, ep->com.cm_id, ep->hwtid); 1293 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1294 set_bit(DISCONN_UPCALL, &ep->com.history); 1295 } 1296 } 1297 1298 static void peer_abort_upcall(struct c4iw_ep *ep) 1299 { 1300 struct iw_cm_event event; 1301 1302 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1303 memset(&event, 0, sizeof(event)); 1304 event.event = IW_CM_EVENT_CLOSE; 1305 event.status = -ECONNRESET; 1306 if (ep->com.cm_id) { 1307 pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep, 1308 ep->com.cm_id, ep->hwtid); 1309 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1310 deref_cm_id(&ep->com); 1311 set_bit(ABORT_UPCALL, &ep->com.history); 1312 } 1313 } 1314 1315 static void connect_reply_upcall(struct c4iw_ep *ep, int status) 1316 { 1317 struct iw_cm_event event; 1318 1319 pr_debug("ep %p tid %u status %d\n", 1320 ep, ep->hwtid, status); 1321 memset(&event, 0, sizeof(event)); 1322 event.event = IW_CM_EVENT_CONNECT_REPLY; 1323 event.status = status; 1324 memcpy(&event.local_addr, &ep->com.local_addr, 1325 sizeof(ep->com.local_addr)); 1326 memcpy(&event.remote_addr, &ep->com.remote_addr, 1327 sizeof(ep->com.remote_addr)); 1328 1329 if ((status == 0) || (status == -ECONNREFUSED)) { 1330 if (!ep->tried_with_mpa_v1) { 1331 /* this means MPA_v2 is used */ 1332 event.ord = ep->ird; 1333 event.ird = ep->ord; 1334 event.private_data_len = ep->plen - 1335 sizeof(struct mpa_v2_conn_params); 1336 event.private_data = ep->mpa_pkt + 1337 sizeof(struct mpa_message) + 1338 sizeof(struct mpa_v2_conn_params); 1339 } else { 1340 /* this means MPA_v1 is used */ 1341 event.ord = cur_max_read_depth(ep->com.dev); 1342 event.ird = cur_max_read_depth(ep->com.dev); 1343 event.private_data_len = ep->plen; 1344 event.private_data = ep->mpa_pkt + 1345 sizeof(struct mpa_message); 1346 } 1347 } 1348 1349 pr_debug("ep %p tid %u status %d\n", ep, 1350 ep->hwtid, status); 1351 set_bit(CONN_RPL_UPCALL, &ep->com.history); 1352 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1353 1354 if (status < 0) 1355 deref_cm_id(&ep->com); 1356 } 1357 1358 static int connect_request_upcall(struct c4iw_ep *ep) 1359 { 1360 struct iw_cm_event event; 1361 int ret; 1362 1363 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1364 memset(&event, 0, sizeof(event)); 1365 event.event = IW_CM_EVENT_CONNECT_REQUEST; 1366 memcpy(&event.local_addr, &ep->com.local_addr, 1367 sizeof(ep->com.local_addr)); 1368 memcpy(&event.remote_addr, &ep->com.remote_addr, 1369 sizeof(ep->com.remote_addr)); 1370 event.provider_data = ep; 1371 if (!ep->tried_with_mpa_v1) { 1372 /* this means MPA_v2 is used */ 1373 event.ord = ep->ord; 1374 event.ird = ep->ird; 1375 event.private_data_len = ep->plen - 1376 sizeof(struct mpa_v2_conn_params); 1377 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + 1378 sizeof(struct mpa_v2_conn_params); 1379 } else { 1380 /* this means MPA_v1 is used. Send max supported */ 1381 event.ord = cur_max_read_depth(ep->com.dev); 1382 event.ird = cur_max_read_depth(ep->com.dev); 1383 event.private_data_len = ep->plen; 1384 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 1385 } 1386 c4iw_get_ep(&ep->com); 1387 ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, 1388 &event); 1389 if (ret) 1390 c4iw_put_ep(&ep->com); 1391 set_bit(CONNREQ_UPCALL, &ep->com.history); 1392 c4iw_put_ep(&ep->parent_ep->com); 1393 return ret; 1394 } 1395 1396 static void established_upcall(struct c4iw_ep *ep) 1397 { 1398 struct iw_cm_event event; 1399 1400 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1401 memset(&event, 0, sizeof(event)); 1402 event.event = IW_CM_EVENT_ESTABLISHED; 1403 event.ird = ep->ord; 1404 event.ord = ep->ird; 1405 if (ep->com.cm_id) { 1406 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1407 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1408 set_bit(ESTAB_UPCALL, &ep->com.history); 1409 } 1410 } 1411 1412 static int update_rx_credits(struct c4iw_ep *ep, u32 credits) 1413 { 1414 struct sk_buff *skb; 1415 u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); 1416 u32 credit_dack; 1417 1418 pr_debug("ep %p tid %u credits %u\n", 1419 ep, ep->hwtid, credits); 1420 skb = get_skb(NULL, wrlen, GFP_KERNEL); 1421 if (!skb) { 1422 pr_err("update_rx_credits - cannot alloc skb!\n"); 1423 return 0; 1424 } 1425 1426 /* 1427 * If we couldn't specify the entire rcv window at connection setup 1428 * due to the limit in the number of bits in the RCV_BUFSIZ field, 1429 * then add the overage in to the credits returned. 1430 */ 1431 if (ep->rcv_win > RCV_BUFSIZ_M * 1024) 1432 credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; 1433 1434 credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | 1435 RX_DACK_MODE_V(dack_mode); 1436 1437 cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, 1438 credit_dack); 1439 1440 c4iw_ofld_send(&ep->com.dev->rdev, skb); 1441 return credits; 1442 } 1443 1444 #define RELAXED_IRD_NEGOTIATION 1 1445 1446 /* 1447 * process_mpa_reply - process streaming mode MPA reply 1448 * 1449 * Returns: 1450 * 1451 * 0 upon success indicating a connect request was delivered to the ULP 1452 * or the mpa request is incomplete but valid so far. 1453 * 1454 * 1 if a failure requires the caller to close the connection. 1455 * 1456 * 2 if a failure requires the caller to abort the connection. 1457 */ 1458 static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) 1459 { 1460 struct mpa_message *mpa; 1461 struct mpa_v2_conn_params *mpa_v2_params; 1462 u16 plen; 1463 u16 resp_ird, resp_ord; 1464 u8 rtr_mismatch = 0, insuff_ird = 0; 1465 struct c4iw_qp_attributes attrs; 1466 enum c4iw_qp_attr_mask mask; 1467 int err; 1468 int disconnect = 0; 1469 1470 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1471 1472 /* 1473 * If we get more than the supported amount of private data 1474 * then we must fail this connection. 1475 */ 1476 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { 1477 err = -EINVAL; 1478 goto err_stop_timer; 1479 } 1480 1481 /* 1482 * copy the new data into our accumulation buffer. 1483 */ 1484 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), 1485 skb->len); 1486 ep->mpa_pkt_len += skb->len; 1487 1488 /* 1489 * if we don't even have the mpa message, then bail. 1490 */ 1491 if (ep->mpa_pkt_len < sizeof(*mpa)) 1492 return 0; 1493 mpa = (struct mpa_message *) ep->mpa_pkt; 1494 1495 /* Validate MPA header. */ 1496 if (mpa->revision > mpa_rev) { 1497 pr_err("%s MPA version mismatch. Local = %d, Received = %d\n", 1498 __func__, mpa_rev, mpa->revision); 1499 err = -EPROTO; 1500 goto err_stop_timer; 1501 } 1502 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 1503 err = -EPROTO; 1504 goto err_stop_timer; 1505 } 1506 1507 plen = ntohs(mpa->private_data_size); 1508 1509 /* 1510 * Fail if there's too much private data. 1511 */ 1512 if (plen > MPA_MAX_PRIVATE_DATA) { 1513 err = -EPROTO; 1514 goto err_stop_timer; 1515 } 1516 1517 /* 1518 * If plen does not account for pkt size 1519 */ 1520 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1521 err = -EPROTO; 1522 goto err_stop_timer; 1523 } 1524 1525 ep->plen = (u8) plen; 1526 1527 /* 1528 * If we don't have all the pdata yet, then bail. 1529 * We'll continue process when more data arrives. 1530 */ 1531 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1532 return 0; 1533 1534 if (mpa->flags & MPA_REJECT) { 1535 err = -ECONNREFUSED; 1536 goto err_stop_timer; 1537 } 1538 1539 /* 1540 * Stop mpa timer. If it expired, then 1541 * we ignore the MPA reply. process_timeout() 1542 * will abort the connection. 1543 */ 1544 if (stop_ep_timer(ep)) 1545 return 0; 1546 1547 /* 1548 * If we get here we have accumulated the entire mpa 1549 * start reply message including private data. And 1550 * the MPA header is valid. 1551 */ 1552 __state_set(&ep->com, FPDU_MODE); 1553 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1554 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1555 ep->mpa_attr.version = mpa->revision; 1556 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1557 1558 if (mpa->revision == 2) { 1559 ep->mpa_attr.enhanced_rdma_conn = 1560 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1561 if (ep->mpa_attr.enhanced_rdma_conn) { 1562 mpa_v2_params = (struct mpa_v2_conn_params *) 1563 (ep->mpa_pkt + sizeof(*mpa)); 1564 resp_ird = ntohs(mpa_v2_params->ird) & 1565 MPA_V2_IRD_ORD_MASK; 1566 resp_ord = ntohs(mpa_v2_params->ord) & 1567 MPA_V2_IRD_ORD_MASK; 1568 pr_debug("responder ird %u ord %u ep ird %u ord %u\n", 1569 resp_ird, resp_ord, ep->ird, ep->ord); 1570 1571 /* 1572 * This is a double-check. Ideally, below checks are 1573 * not required since ird/ord stuff has been taken 1574 * care of in c4iw_accept_cr 1575 */ 1576 if (ep->ird < resp_ord) { 1577 if (RELAXED_IRD_NEGOTIATION && resp_ord <= 1578 ep->com.dev->rdev.lldi.max_ordird_qp) 1579 ep->ird = resp_ord; 1580 else 1581 insuff_ird = 1; 1582 } else if (ep->ird > resp_ord) { 1583 ep->ird = resp_ord; 1584 } 1585 if (ep->ord > resp_ird) { 1586 if (RELAXED_IRD_NEGOTIATION) 1587 ep->ord = resp_ird; 1588 else 1589 insuff_ird = 1; 1590 } 1591 if (insuff_ird) { 1592 err = -ENOMEM; 1593 ep->ird = resp_ord; 1594 ep->ord = resp_ird; 1595 } 1596 1597 if (ntohs(mpa_v2_params->ird) & 1598 MPA_V2_PEER2PEER_MODEL) { 1599 if (ntohs(mpa_v2_params->ord) & 1600 MPA_V2_RDMA_WRITE_RTR) 1601 ep->mpa_attr.p2p_type = 1602 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1603 else if (ntohs(mpa_v2_params->ord) & 1604 MPA_V2_RDMA_READ_RTR) 1605 ep->mpa_attr.p2p_type = 1606 FW_RI_INIT_P2PTYPE_READ_REQ; 1607 } 1608 } 1609 } else if (mpa->revision == 1) 1610 if (peer2peer) 1611 ep->mpa_attr.p2p_type = p2p_type; 1612 1613 pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n", 1614 ep->mpa_attr.crc_enabled, 1615 ep->mpa_attr.recv_marker_enabled, 1616 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1617 ep->mpa_attr.p2p_type, p2p_type); 1618 1619 /* 1620 * If responder's RTR does not match with that of initiator, assign 1621 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not 1622 * generated when moving QP to RTS state. 1623 * A TERM message will be sent after QP has moved to RTS state 1624 */ 1625 if ((ep->mpa_attr.version == 2) && peer2peer && 1626 (ep->mpa_attr.p2p_type != p2p_type)) { 1627 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1628 rtr_mismatch = 1; 1629 } 1630 1631 attrs.mpa_attr = ep->mpa_attr; 1632 attrs.max_ird = ep->ird; 1633 attrs.max_ord = ep->ord; 1634 attrs.llp_stream_handle = ep; 1635 attrs.next_state = C4IW_QP_STATE_RTS; 1636 1637 mask = C4IW_QP_ATTR_NEXT_STATE | 1638 C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | 1639 C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; 1640 1641 /* bind QP and TID with INIT_WR */ 1642 err = c4iw_modify_qp(ep->com.qp->rhp, 1643 ep->com.qp, mask, &attrs, 1); 1644 if (err) 1645 goto err; 1646 1647 /* 1648 * If responder's RTR requirement did not match with what initiator 1649 * supports, generate TERM message 1650 */ 1651 if (rtr_mismatch) { 1652 pr_err("%s: RTR mismatch, sending TERM\n", __func__); 1653 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1654 attrs.ecode = MPA_NOMATCH_RTR; 1655 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1656 attrs.send_term = 1; 1657 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1658 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 1659 err = -ENOMEM; 1660 disconnect = 1; 1661 goto out; 1662 } 1663 1664 /* 1665 * Generate TERM if initiator IRD is not sufficient for responder 1666 * provided ORD. Currently, we do the same behaviour even when 1667 * responder provided IRD is also not sufficient as regards to 1668 * initiator ORD. 1669 */ 1670 if (insuff_ird) { 1671 pr_err("%s: Insufficient IRD, sending TERM\n", __func__); 1672 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1673 attrs.ecode = MPA_INSUFF_IRD; 1674 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1675 attrs.send_term = 1; 1676 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1677 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 1678 err = -ENOMEM; 1679 disconnect = 1; 1680 goto out; 1681 } 1682 goto out; 1683 err_stop_timer: 1684 stop_ep_timer(ep); 1685 err: 1686 disconnect = 2; 1687 out: 1688 connect_reply_upcall(ep, err); 1689 return disconnect; 1690 } 1691 1692 /* 1693 * process_mpa_request - process streaming mode MPA request 1694 * 1695 * Returns: 1696 * 1697 * 0 upon success indicating a connect request was delivered to the ULP 1698 * or the mpa request is incomplete but valid so far. 1699 * 1700 * 1 if a failure requires the caller to close the connection. 1701 * 1702 * 2 if a failure requires the caller to abort the connection. 1703 */ 1704 static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) 1705 { 1706 struct mpa_message *mpa; 1707 struct mpa_v2_conn_params *mpa_v2_params; 1708 u16 plen; 1709 1710 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1711 1712 /* 1713 * If we get more than the supported amount of private data 1714 * then we must fail this connection. 1715 */ 1716 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) 1717 goto err_stop_timer; 1718 1719 pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); 1720 1721 /* 1722 * Copy the new data into our accumulation buffer. 1723 */ 1724 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), 1725 skb->len); 1726 ep->mpa_pkt_len += skb->len; 1727 1728 /* 1729 * If we don't even have the mpa message, then bail. 1730 * We'll continue process when more data arrives. 1731 */ 1732 if (ep->mpa_pkt_len < sizeof(*mpa)) 1733 return 0; 1734 1735 pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); 1736 mpa = (struct mpa_message *) ep->mpa_pkt; 1737 1738 /* 1739 * Validate MPA Header. 1740 */ 1741 if (mpa->revision > mpa_rev) { 1742 pr_err("%s MPA version mismatch. Local = %d, Received = %d\n", 1743 __func__, mpa_rev, mpa->revision); 1744 goto err_stop_timer; 1745 } 1746 1747 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) 1748 goto err_stop_timer; 1749 1750 plen = ntohs(mpa->private_data_size); 1751 1752 /* 1753 * Fail if there's too much private data. 1754 */ 1755 if (plen > MPA_MAX_PRIVATE_DATA) 1756 goto err_stop_timer; 1757 1758 /* 1759 * If plen does not account for pkt size 1760 */ 1761 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) 1762 goto err_stop_timer; 1763 ep->plen = (u8) plen; 1764 1765 /* 1766 * If we don't have all the pdata yet, then bail. 1767 */ 1768 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1769 return 0; 1770 1771 /* 1772 * If we get here we have accumulated the entire mpa 1773 * start reply message including private data. 1774 */ 1775 ep->mpa_attr.initiator = 0; 1776 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1777 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1778 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1779 ep->mpa_attr.version = mpa->revision; 1780 if (mpa->revision == 1) 1781 ep->tried_with_mpa_v1 = 1; 1782 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1783 1784 if (mpa->revision == 2) { 1785 ep->mpa_attr.enhanced_rdma_conn = 1786 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1787 if (ep->mpa_attr.enhanced_rdma_conn) { 1788 mpa_v2_params = (struct mpa_v2_conn_params *) 1789 (ep->mpa_pkt + sizeof(*mpa)); 1790 ep->ird = ntohs(mpa_v2_params->ird) & 1791 MPA_V2_IRD_ORD_MASK; 1792 ep->ird = min_t(u32, ep->ird, 1793 cur_max_read_depth(ep->com.dev)); 1794 ep->ord = ntohs(mpa_v2_params->ord) & 1795 MPA_V2_IRD_ORD_MASK; 1796 ep->ord = min_t(u32, ep->ord, 1797 cur_max_read_depth(ep->com.dev)); 1798 pr_debug("initiator ird %u ord %u\n", 1799 ep->ird, ep->ord); 1800 if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) 1801 if (peer2peer) { 1802 if (ntohs(mpa_v2_params->ord) & 1803 MPA_V2_RDMA_WRITE_RTR) 1804 ep->mpa_attr.p2p_type = 1805 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1806 else if (ntohs(mpa_v2_params->ord) & 1807 MPA_V2_RDMA_READ_RTR) 1808 ep->mpa_attr.p2p_type = 1809 FW_RI_INIT_P2PTYPE_READ_REQ; 1810 } 1811 } 1812 } else if (mpa->revision == 1) 1813 if (peer2peer) 1814 ep->mpa_attr.p2p_type = p2p_type; 1815 1816 pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n", 1817 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1818 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1819 ep->mpa_attr.p2p_type); 1820 1821 __state_set(&ep->com, MPA_REQ_RCVD); 1822 1823 /* drive upcall */ 1824 mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING); 1825 if (ep->parent_ep->com.state != DEAD) { 1826 if (connect_request_upcall(ep)) 1827 goto err_unlock_parent; 1828 } else { 1829 goto err_unlock_parent; 1830 } 1831 mutex_unlock(&ep->parent_ep->com.mutex); 1832 return 0; 1833 1834 err_unlock_parent: 1835 mutex_unlock(&ep->parent_ep->com.mutex); 1836 goto err_out; 1837 err_stop_timer: 1838 (void)stop_ep_timer(ep); 1839 err_out: 1840 return 2; 1841 } 1842 1843 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) 1844 { 1845 struct c4iw_ep *ep; 1846 struct cpl_rx_data *hdr = cplhdr(skb); 1847 unsigned int dlen = ntohs(hdr->len); 1848 unsigned int tid = GET_TID(hdr); 1849 __u8 status = hdr->status; 1850 int disconnect = 0; 1851 1852 ep = get_ep_from_tid(dev, tid); 1853 if (!ep) 1854 return 0; 1855 pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen); 1856 skb_pull(skb, sizeof(*hdr)); 1857 skb_trim(skb, dlen); 1858 mutex_lock(&ep->com.mutex); 1859 1860 switch (ep->com.state) { 1861 case MPA_REQ_SENT: 1862 update_rx_credits(ep, dlen); 1863 ep->rcv_seq += dlen; 1864 disconnect = process_mpa_reply(ep, skb); 1865 break; 1866 case MPA_REQ_WAIT: 1867 update_rx_credits(ep, dlen); 1868 ep->rcv_seq += dlen; 1869 disconnect = process_mpa_request(ep, skb); 1870 break; 1871 case FPDU_MODE: { 1872 struct c4iw_qp_attributes attrs; 1873 1874 update_rx_credits(ep, dlen); 1875 if (status) 1876 pr_err("%s Unexpected streaming data." \ 1877 " qpid %u ep %p state %d tid %u status %d\n", 1878 __func__, ep->com.qp->wq.sq.qid, ep, 1879 ep->com.state, ep->hwtid, status); 1880 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1881 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1882 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 1883 disconnect = 1; 1884 break; 1885 } 1886 default: 1887 break; 1888 } 1889 mutex_unlock(&ep->com.mutex); 1890 if (disconnect) 1891 c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); 1892 c4iw_put_ep(&ep->com); 1893 return 0; 1894 } 1895 1896 static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx) 1897 { 1898 enum chip_type adapter_type; 1899 1900 adapter_type = ep->com.dev->rdev.lldi.adapter_type; 1901 1902 /* 1903 * If this TCB had a srq buffer cached, then we must complete 1904 * it. For user mode, that means saving the srqidx in the 1905 * user/kernel status page for this qp. For kernel mode, just 1906 * synthesize the CQE now. 1907 */ 1908 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { 1909 if (ep->com.qp->ibqp.uobject) 1910 t4_set_wq_in_error(&ep->com.qp->wq, srqidx); 1911 else 1912 c4iw_flush_srqidx(ep->com.qp, srqidx); 1913 } 1914 } 1915 1916 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 1917 { 1918 u32 srqidx; 1919 struct c4iw_ep *ep; 1920 struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); 1921 int release = 0; 1922 unsigned int tid = GET_TID(rpl); 1923 1924 ep = get_ep_from_tid(dev, tid); 1925 if (!ep) { 1926 pr_warn("Abort rpl to freed endpoint\n"); 1927 return 0; 1928 } 1929 1930 if (ep->com.qp && ep->com.qp->srq) { 1931 srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status)); 1932 complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx); 1933 } 1934 1935 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1936 mutex_lock(&ep->com.mutex); 1937 switch (ep->com.state) { 1938 case ABORTING: 1939 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 1940 __state_set(&ep->com, DEAD); 1941 release = 1; 1942 break; 1943 default: 1944 pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); 1945 break; 1946 } 1947 mutex_unlock(&ep->com.mutex); 1948 1949 if (release) { 1950 close_complete_upcall(ep, -ECONNRESET); 1951 release_ep_resources(ep); 1952 } 1953 c4iw_put_ep(&ep->com); 1954 return 0; 1955 } 1956 1957 static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) 1958 { 1959 struct sk_buff *skb; 1960 struct fw_ofld_connection_wr *req; 1961 unsigned int mtu_idx; 1962 u32 wscale; 1963 struct sockaddr_in *sin; 1964 int win; 1965 1966 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); 1967 req = __skb_put_zero(skb, sizeof(*req)); 1968 req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR)); 1969 req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); 1970 req->le.filter = cpu_to_be32(cxgb4_select_ntuple( 1971 ep->com.dev->rdev.lldi.ports[0], 1972 ep->l2t)); 1973 sin = (struct sockaddr_in *)&ep->com.local_addr; 1974 req->le.lport = sin->sin_port; 1975 req->le.u.ipv4.lip = sin->sin_addr.s_addr; 1976 sin = (struct sockaddr_in *)&ep->com.remote_addr; 1977 req->le.pport = sin->sin_port; 1978 req->le.u.ipv4.pip = sin->sin_addr.s_addr; 1979 req->tcb.t_state_to_astid = 1980 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) | 1981 FW_OFLD_CONNECTION_WR_ASTID_V(atid)); 1982 req->tcb.cplrxdataack_cplpassacceptrpl = 1983 htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); 1984 req->tcb.tx_max = (__force __be32) jiffies; 1985 req->tcb.rcv_adv = htons(1); 1986 cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 1987 enable_tcp_timestamps, 1988 (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); 1989 wscale = cxgb_compute_wscale(rcv_win); 1990 1991 /* 1992 * Specify the largest window that will fit in opt0. The 1993 * remainder will be specified in the rx_data_ack. 1994 */ 1995 win = ep->rcv_win >> 10; 1996 if (win > RCV_BUFSIZ_M) 1997 win = RCV_BUFSIZ_M; 1998 1999 req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F | 2000 (nocong ? NO_CONG_F : 0) | 2001 KEEP_ALIVE_F | 2002 DELACK_F | 2003 WND_SCALE_V(wscale) | 2004 MSS_IDX_V(mtu_idx) | 2005 L2T_IDX_V(ep->l2t->idx) | 2006 TX_CHAN_V(ep->tx_chan) | 2007 SMAC_SEL_V(ep->smac_idx) | 2008 DSCP_V(ep->tos >> 2) | 2009 ULP_MODE_V(ULP_MODE_TCPDDP) | 2010 RCV_BUFSIZ_V(win)); 2011 req->tcb.opt2 = (__force __be32) (PACE_V(1) | 2012 TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | 2013 RX_CHANNEL_V(0) | 2014 CCTRL_ECN_V(enable_ecn) | 2015 RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid)); 2016 if (enable_tcp_timestamps) 2017 req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F; 2018 if (enable_tcp_sack) 2019 req->tcb.opt2 |= (__force __be32)SACK_EN_F; 2020 if (wscale && enable_tcp_window_scaling) 2021 req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F; 2022 req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0); 2023 req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2); 2024 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); 2025 set_bit(ACT_OFLD_CONN, &ep->com.history); 2026 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2027 } 2028 2029 /* 2030 * Some of the error codes above implicitly indicate that there is no TID 2031 * allocated with the result of an ACT_OPEN. We use this predicate to make 2032 * that explicit. 2033 */ 2034 static inline int act_open_has_tid(int status) 2035 { 2036 return (status != CPL_ERR_TCAM_PARITY && 2037 status != CPL_ERR_TCAM_MISS && 2038 status != CPL_ERR_TCAM_FULL && 2039 status != CPL_ERR_CONN_EXIST_SYNRECV && 2040 status != CPL_ERR_CONN_EXIST); 2041 } 2042 2043 static char *neg_adv_str(unsigned int status) 2044 { 2045 switch (status) { 2046 case CPL_ERR_RTX_NEG_ADVICE: 2047 return "Retransmit timeout"; 2048 case CPL_ERR_PERSIST_NEG_ADVICE: 2049 return "Persist timeout"; 2050 case CPL_ERR_KEEPALV_NEG_ADVICE: 2051 return "Keepalive timeout"; 2052 default: 2053 return "Unknown"; 2054 } 2055 } 2056 2057 static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) 2058 { 2059 ep->snd_win = snd_win; 2060 ep->rcv_win = rcv_win; 2061 pr_debug("snd_win %d rcv_win %d\n", 2062 ep->snd_win, ep->rcv_win); 2063 } 2064 2065 #define ACT_OPEN_RETRY_COUNT 2 2066 2067 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, 2068 struct dst_entry *dst, struct c4iw_dev *cdev, 2069 bool clear_mpa_v1, enum chip_type adapter_type, u8 tos) 2070 { 2071 struct neighbour *n; 2072 int err, step; 2073 struct net_device *pdev; 2074 2075 n = dst_neigh_lookup(dst, peer_ip); 2076 if (!n) 2077 return -ENODEV; 2078 2079 rcu_read_lock(); 2080 err = -ENOMEM; 2081 if (n->dev->flags & IFF_LOOPBACK) { 2082 if (iptype == 4) 2083 pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); 2084 else if (IS_ENABLED(CONFIG_IPV6)) 2085 for_each_netdev(&init_net, pdev) { 2086 if (ipv6_chk_addr(&init_net, 2087 (struct in6_addr *)peer_ip, 2088 pdev, 1)) 2089 break; 2090 } 2091 else 2092 pdev = NULL; 2093 2094 if (!pdev) { 2095 err = -ENODEV; 2096 goto out; 2097 } 2098 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, 2099 n, pdev, rt_tos2priority(tos)); 2100 if (!ep->l2t) { 2101 dev_put(pdev); 2102 goto out; 2103 } 2104 ep->mtu = pdev->mtu; 2105 ep->tx_chan = cxgb4_port_chan(pdev); 2106 ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; 2107 step = cdev->rdev.lldi.ntxq / 2108 cdev->rdev.lldi.nchan; 2109 ep->txq_idx = cxgb4_port_idx(pdev) * step; 2110 step = cdev->rdev.lldi.nrxq / 2111 cdev->rdev.lldi.nchan; 2112 ep->ctrlq_idx = cxgb4_port_idx(pdev); 2113 ep->rss_qid = cdev->rdev.lldi.rxq_ids[ 2114 cxgb4_port_idx(pdev) * step]; 2115 set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); 2116 dev_put(pdev); 2117 } else { 2118 pdev = get_real_dev(n->dev); 2119 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, 2120 n, pdev, rt_tos2priority(tos)); 2121 if (!ep->l2t) 2122 goto out; 2123 ep->mtu = dst_mtu(dst); 2124 ep->tx_chan = cxgb4_port_chan(pdev); 2125 ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; 2126 step = cdev->rdev.lldi.ntxq / 2127 cdev->rdev.lldi.nchan; 2128 ep->txq_idx = cxgb4_port_idx(pdev) * step; 2129 ep->ctrlq_idx = cxgb4_port_idx(pdev); 2130 step = cdev->rdev.lldi.nrxq / 2131 cdev->rdev.lldi.nchan; 2132 ep->rss_qid = cdev->rdev.lldi.rxq_ids[ 2133 cxgb4_port_idx(pdev) * step]; 2134 set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); 2135 2136 if (clear_mpa_v1) { 2137 ep->retry_with_mpa_v1 = 0; 2138 ep->tried_with_mpa_v1 = 0; 2139 } 2140 } 2141 err = 0; 2142 out: 2143 rcu_read_unlock(); 2144 2145 neigh_release(n); 2146 2147 return err; 2148 } 2149 2150 static int c4iw_reconnect(struct c4iw_ep *ep) 2151 { 2152 int err = 0; 2153 int size = 0; 2154 struct sockaddr_in *laddr = (struct sockaddr_in *) 2155 &ep->com.cm_id->m_local_addr; 2156 struct sockaddr_in *raddr = (struct sockaddr_in *) 2157 &ep->com.cm_id->m_remote_addr; 2158 struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) 2159 &ep->com.cm_id->m_local_addr; 2160 struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) 2161 &ep->com.cm_id->m_remote_addr; 2162 int iptype; 2163 __u8 *ra; 2164 2165 pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id); 2166 c4iw_init_wr_wait(ep->com.wr_waitp); 2167 2168 /* When MPA revision is different on nodes, the node with MPA_rev=2 2169 * tries to reconnect with MPA_rev 1 for the same EP through 2170 * c4iw_reconnect(), where the same EP is assigned with new tid for 2171 * further connection establishment. As we are using the same EP pointer 2172 * for reconnect, few skbs are used during the previous c4iw_connect(), 2173 * which leaves the EP with inadequate skbs for further 2174 * c4iw_reconnect(), Further causing a crash due to an empty 2175 * skb_list() during peer_abort(). Allocate skbs which is already used. 2176 */ 2177 size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); 2178 if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) { 2179 err = -ENOMEM; 2180 goto fail1; 2181 } 2182 2183 /* 2184 * Allocate an active TID to initiate a TCP connection. 2185 */ 2186 ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); 2187 if (ep->atid == -1) { 2188 pr_err("%s - cannot alloc atid\n", __func__); 2189 err = -ENOMEM; 2190 goto fail2; 2191 } 2192 err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL); 2193 if (err) 2194 goto fail2a; 2195 2196 /* find a route */ 2197 if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { 2198 ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, 2199 laddr->sin_addr.s_addr, 2200 raddr->sin_addr.s_addr, 2201 laddr->sin_port, 2202 raddr->sin_port, ep->com.cm_id->tos); 2203 iptype = 4; 2204 ra = (__u8 *)&raddr->sin_addr; 2205 } else { 2206 ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, 2207 get_real_dev, 2208 laddr6->sin6_addr.s6_addr, 2209 raddr6->sin6_addr.s6_addr, 2210 laddr6->sin6_port, 2211 raddr6->sin6_port, 2212 ep->com.cm_id->tos, 2213 raddr6->sin6_scope_id); 2214 iptype = 6; 2215 ra = (__u8 *)&raddr6->sin6_addr; 2216 } 2217 if (!ep->dst) { 2218 pr_err("%s - cannot find route\n", __func__); 2219 err = -EHOSTUNREACH; 2220 goto fail3; 2221 } 2222 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, 2223 ep->com.dev->rdev.lldi.adapter_type, 2224 ep->com.cm_id->tos); 2225 if (err) { 2226 pr_err("%s - cannot alloc l2e\n", __func__); 2227 goto fail4; 2228 } 2229 2230 pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", 2231 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, 2232 ep->l2t->idx); 2233 2234 state_set(&ep->com, CONNECTING); 2235 ep->tos = ep->com.cm_id->tos; 2236 2237 /* send connect request to rnic */ 2238 err = send_connect(ep); 2239 if (!err) 2240 goto out; 2241 2242 cxgb4_l2t_release(ep->l2t); 2243 fail4: 2244 dst_release(ep->dst); 2245 fail3: 2246 xa_erase_irq(&ep->com.dev->atids, ep->atid); 2247 fail2a: 2248 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 2249 fail2: 2250 /* 2251 * remember to send notification to upper layer. 2252 * We are in here so the upper layer is not aware that this is 2253 * re-connect attempt and so, upper layer is still waiting for 2254 * response of 1st connect request. 2255 */ 2256 connect_reply_upcall(ep, -ECONNRESET); 2257 fail1: 2258 c4iw_put_ep(&ep->com); 2259 out: 2260 return err; 2261 } 2262 2263 static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2264 { 2265 struct c4iw_ep *ep; 2266 struct cpl_act_open_rpl *rpl = cplhdr(skb); 2267 unsigned int atid = TID_TID_G(AOPEN_ATID_G( 2268 ntohl(rpl->atid_status))); 2269 struct tid_info *t = dev->rdev.lldi.tids; 2270 int status = AOPEN_STATUS_G(ntohl(rpl->atid_status)); 2271 struct sockaddr_in *la; 2272 struct sockaddr_in *ra; 2273 struct sockaddr_in6 *la6; 2274 struct sockaddr_in6 *ra6; 2275 int ret = 0; 2276 2277 ep = lookup_atid(t, atid); 2278 la = (struct sockaddr_in *)&ep->com.local_addr; 2279 ra = (struct sockaddr_in *)&ep->com.remote_addr; 2280 la6 = (struct sockaddr_in6 *)&ep->com.local_addr; 2281 ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; 2282 2283 pr_debug("ep %p atid %u status %u errno %d\n", ep, atid, 2284 status, status2errno(status)); 2285 2286 if (cxgb_is_neg_adv(status)) { 2287 pr_debug("Connection problems for atid %u status %u (%s)\n", 2288 atid, status, neg_adv_str(status)); 2289 ep->stats.connect_neg_adv++; 2290 mutex_lock(&dev->rdev.stats.lock); 2291 dev->rdev.stats.neg_adv++; 2292 mutex_unlock(&dev->rdev.stats.lock); 2293 return 0; 2294 } 2295 2296 set_bit(ACT_OPEN_RPL, &ep->com.history); 2297 2298 /* 2299 * Log interesting failures. 2300 */ 2301 switch (status) { 2302 case CPL_ERR_CONN_RESET: 2303 case CPL_ERR_CONN_TIMEDOUT: 2304 break; 2305 case CPL_ERR_TCAM_FULL: 2306 mutex_lock(&dev->rdev.stats.lock); 2307 dev->rdev.stats.tcam_full++; 2308 mutex_unlock(&dev->rdev.stats.lock); 2309 if (ep->com.local_addr.ss_family == AF_INET && 2310 dev->rdev.lldi.enable_fw_ofld_conn) { 2311 ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G( 2312 ntohl(rpl->atid_status)))); 2313 if (ret) 2314 goto fail; 2315 return 0; 2316 } 2317 break; 2318 case CPL_ERR_CONN_EXIST: 2319 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { 2320 set_bit(ACT_RETRY_INUSE, &ep->com.history); 2321 if (ep->com.remote_addr.ss_family == AF_INET6) { 2322 struct sockaddr_in6 *sin6 = 2323 (struct sockaddr_in6 *) 2324 &ep->com.local_addr; 2325 cxgb4_clip_release( 2326 ep->com.dev->rdev.lldi.ports[0], 2327 (const u32 *) 2328 &sin6->sin6_addr.s6_addr, 1); 2329 } 2330 xa_erase_irq(&ep->com.dev->atids, atid); 2331 cxgb4_free_atid(t, atid); 2332 dst_release(ep->dst); 2333 cxgb4_l2t_release(ep->l2t); 2334 c4iw_reconnect(ep); 2335 return 0; 2336 } 2337 break; 2338 default: 2339 if (ep->com.local_addr.ss_family == AF_INET) { 2340 pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n", 2341 atid, status, status2errno(status), 2342 &la->sin_addr.s_addr, ntohs(la->sin_port), 2343 &ra->sin_addr.s_addr, ntohs(ra->sin_port)); 2344 } else { 2345 pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n", 2346 atid, status, status2errno(status), 2347 la6->sin6_addr.s6_addr, ntohs(la6->sin6_port), 2348 ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port)); 2349 } 2350 break; 2351 } 2352 2353 fail: 2354 connect_reply_upcall(ep, status2errno(status)); 2355 state_set(&ep->com, DEAD); 2356 2357 if (ep->com.remote_addr.ss_family == AF_INET6) { 2358 struct sockaddr_in6 *sin6 = 2359 (struct sockaddr_in6 *)&ep->com.local_addr; 2360 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 2361 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 2362 } 2363 if (status && act_open_has_tid(status)) 2364 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl), 2365 ep->com.local_addr.ss_family); 2366 2367 xa_erase_irq(&ep->com.dev->atids, atid); 2368 cxgb4_free_atid(t, atid); 2369 dst_release(ep->dst); 2370 cxgb4_l2t_release(ep->l2t); 2371 c4iw_put_ep(&ep->com); 2372 2373 return 0; 2374 } 2375 2376 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2377 { 2378 struct cpl_pass_open_rpl *rpl = cplhdr(skb); 2379 unsigned int stid = GET_TID(rpl); 2380 struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); 2381 2382 if (!ep) { 2383 pr_warn("%s stid %d lookup failure!\n", __func__, stid); 2384 goto out; 2385 } 2386 pr_debug("ep %p status %d error %d\n", ep, 2387 rpl->status, status2errno(rpl->status)); 2388 c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); 2389 c4iw_put_ep(&ep->com); 2390 out: 2391 return 0; 2392 } 2393 2394 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2395 { 2396 struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); 2397 unsigned int stid = GET_TID(rpl); 2398 struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); 2399 2400 if (!ep) { 2401 pr_warn("%s stid %d lookup failure!\n", __func__, stid); 2402 goto out; 2403 } 2404 pr_debug("ep %p\n", ep); 2405 c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); 2406 c4iw_put_ep(&ep->com); 2407 out: 2408 return 0; 2409 } 2410 2411 static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, 2412 struct cpl_pass_accept_req *req) 2413 { 2414 struct cpl_pass_accept_rpl *rpl; 2415 unsigned int mtu_idx; 2416 u64 opt0; 2417 u32 opt2; 2418 u32 wscale; 2419 struct cpl_t5_pass_accept_rpl *rpl5 = NULL; 2420 int win; 2421 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; 2422 2423 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2424 2425 skb_get(skb); 2426 rpl = cplhdr(skb); 2427 if (!is_t4(adapter_type)) { 2428 skb_trim(skb, roundup(sizeof(*rpl5), 16)); 2429 rpl5 = (void *)rpl; 2430 INIT_TP_WR(rpl5, ep->hwtid); 2431 } else { 2432 skb_trim(skb, sizeof(*rpl)); 2433 INIT_TP_WR(rpl, ep->hwtid); 2434 } 2435 OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, 2436 ep->hwtid)); 2437 2438 cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 2439 enable_tcp_timestamps && req->tcpopt.tstamp, 2440 (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); 2441 wscale = cxgb_compute_wscale(rcv_win); 2442 2443 /* 2444 * Specify the largest window that will fit in opt0. The 2445 * remainder will be specified in the rx_data_ack. 2446 */ 2447 win = ep->rcv_win >> 10; 2448 if (win > RCV_BUFSIZ_M) 2449 win = RCV_BUFSIZ_M; 2450 opt0 = (nocong ? NO_CONG_F : 0) | 2451 KEEP_ALIVE_F | 2452 DELACK_F | 2453 WND_SCALE_V(wscale) | 2454 MSS_IDX_V(mtu_idx) | 2455 L2T_IDX_V(ep->l2t->idx) | 2456 TX_CHAN_V(ep->tx_chan) | 2457 SMAC_SEL_V(ep->smac_idx) | 2458 DSCP_V(ep->tos >> 2) | 2459 ULP_MODE_V(ULP_MODE_TCPDDP) | 2460 RCV_BUFSIZ_V(win); 2461 opt2 = RX_CHANNEL_V(0) | 2462 RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); 2463 2464 if (enable_tcp_timestamps && req->tcpopt.tstamp) 2465 opt2 |= TSTAMPS_EN_F; 2466 if (enable_tcp_sack && req->tcpopt.sack) 2467 opt2 |= SACK_EN_F; 2468 if (wscale && enable_tcp_window_scaling) 2469 opt2 |= WND_SCALE_EN_F; 2470 if (enable_ecn) { 2471 const struct tcphdr *tcph; 2472 u32 hlen = ntohl(req->hdr_len); 2473 2474 if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5) 2475 tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + 2476 IP_HDR_LEN_G(hlen); 2477 else 2478 tcph = (const void *)(req + 1) + 2479 T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen); 2480 if (tcph->ece && tcph->cwr) 2481 opt2 |= CCTRL_ECN_V(1); 2482 } 2483 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { 2484 u32 isn = (prandom_u32() & ~7UL) - 1; 2485 opt2 |= T5_OPT_2_VALID_F; 2486 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); 2487 opt2 |= T5_ISS_F; 2488 rpl5 = (void *)rpl; 2489 memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); 2490 if (peer2peer) 2491 isn += 4; 2492 rpl5->iss = cpu_to_be32(isn); 2493 pr_debug("iss %u\n", be32_to_cpu(rpl5->iss)); 2494 } 2495 2496 rpl->opt0 = cpu_to_be64(opt0); 2497 rpl->opt2 = cpu_to_be32(opt2); 2498 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 2499 t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure); 2500 2501 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2502 } 2503 2504 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) 2505 { 2506 pr_debug("c4iw_dev %p tid %u\n", dev, hwtid); 2507 skb_trim(skb, sizeof(struct cpl_tid_release)); 2508 release_tid(&dev->rdev, hwtid, skb); 2509 return; 2510 } 2511 2512 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) 2513 { 2514 struct c4iw_ep *child_ep = NULL, *parent_ep; 2515 struct cpl_pass_accept_req *req = cplhdr(skb); 2516 unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid)); 2517 struct tid_info *t = dev->rdev.lldi.tids; 2518 unsigned int hwtid = GET_TID(req); 2519 struct dst_entry *dst; 2520 __u8 local_ip[16], peer_ip[16]; 2521 __be16 local_port, peer_port; 2522 struct sockaddr_in6 *sin6; 2523 int err; 2524 u16 peer_mss = ntohs(req->tcpopt.mss); 2525 int iptype; 2526 unsigned short hdrs; 2527 u8 tos; 2528 2529 parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); 2530 if (!parent_ep) { 2531 pr_err("%s connect request on invalid stid %d\n", 2532 __func__, stid); 2533 goto reject; 2534 } 2535 2536 if (state_read(&parent_ep->com) != LISTEN) { 2537 pr_err("%s - listening ep not in LISTEN\n", __func__); 2538 goto reject; 2539 } 2540 2541 if (parent_ep->com.cm_id->tos_set) 2542 tos = parent_ep->com.cm_id->tos; 2543 else 2544 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); 2545 2546 cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, 2547 &iptype, local_ip, peer_ip, &local_port, &peer_port); 2548 2549 /* Find output route */ 2550 if (iptype == 4) { 2551 pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n" 2552 , parent_ep, hwtid, 2553 local_ip, peer_ip, ntohs(local_port), 2554 ntohs(peer_port), peer_mss); 2555 dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, 2556 *(__be32 *)local_ip, *(__be32 *)peer_ip, 2557 local_port, peer_port, tos); 2558 } else { 2559 pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" 2560 , parent_ep, hwtid, 2561 local_ip, peer_ip, ntohs(local_port), 2562 ntohs(peer_port), peer_mss); 2563 dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, 2564 local_ip, peer_ip, local_port, peer_port, 2565 tos, 2566 ((struct sockaddr_in6 *) 2567 &parent_ep->com.local_addr)->sin6_scope_id); 2568 } 2569 if (!dst) { 2570 pr_err("%s - failed to find dst entry!\n", __func__); 2571 goto reject; 2572 } 2573 2574 child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); 2575 if (!child_ep) { 2576 pr_err("%s - failed to allocate ep entry!\n", __func__); 2577 dst_release(dst); 2578 goto reject; 2579 } 2580 2581 err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, 2582 parent_ep->com.dev->rdev.lldi.adapter_type, tos); 2583 if (err) { 2584 pr_err("%s - failed to allocate l2t entry!\n", __func__); 2585 dst_release(dst); 2586 kfree(child_ep); 2587 goto reject; 2588 } 2589 2590 hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) + 2591 sizeof(struct tcphdr) + 2592 ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); 2593 if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) 2594 child_ep->mtu = peer_mss + hdrs; 2595 2596 skb_queue_head_init(&child_ep->com.ep_skb_list); 2597 if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF)) 2598 goto fail; 2599 2600 state_set(&child_ep->com, CONNECTING); 2601 child_ep->com.dev = dev; 2602 child_ep->com.cm_id = NULL; 2603 2604 if (iptype == 4) { 2605 struct sockaddr_in *sin = (struct sockaddr_in *) 2606 &child_ep->com.local_addr; 2607 2608 sin->sin_family = AF_INET; 2609 sin->sin_port = local_port; 2610 sin->sin_addr.s_addr = *(__be32 *)local_ip; 2611 2612 sin = (struct sockaddr_in *)&child_ep->com.local_addr; 2613 sin->sin_family = AF_INET; 2614 sin->sin_port = ((struct sockaddr_in *) 2615 &parent_ep->com.local_addr)->sin_port; 2616 sin->sin_addr.s_addr = *(__be32 *)local_ip; 2617 2618 sin = (struct sockaddr_in *)&child_ep->com.remote_addr; 2619 sin->sin_family = AF_INET; 2620 sin->sin_port = peer_port; 2621 sin->sin_addr.s_addr = *(__be32 *)peer_ip; 2622 } else { 2623 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2624 sin6->sin6_family = PF_INET6; 2625 sin6->sin6_port = local_port; 2626 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); 2627 2628 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2629 sin6->sin6_family = PF_INET6; 2630 sin6->sin6_port = ((struct sockaddr_in6 *) 2631 &parent_ep->com.local_addr)->sin6_port; 2632 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); 2633 2634 sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; 2635 sin6->sin6_family = PF_INET6; 2636 sin6->sin6_port = peer_port; 2637 memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); 2638 } 2639 2640 c4iw_get_ep(&parent_ep->com); 2641 child_ep->parent_ep = parent_ep; 2642 child_ep->tos = tos; 2643 child_ep->dst = dst; 2644 child_ep->hwtid = hwtid; 2645 2646 pr_debug("tx_chan %u smac_idx %u rss_qid %u\n", 2647 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); 2648 2649 timer_setup(&child_ep->timer, ep_timeout, 0); 2650 cxgb4_insert_tid(t, child_ep, hwtid, 2651 child_ep->com.local_addr.ss_family); 2652 insert_ep_tid(child_ep); 2653 if (accept_cr(child_ep, skb, req)) { 2654 c4iw_put_ep(&parent_ep->com); 2655 release_ep_resources(child_ep); 2656 } else { 2657 set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); 2658 } 2659 if (iptype == 6) { 2660 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2661 cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], 2662 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 2663 } 2664 goto out; 2665 fail: 2666 c4iw_put_ep(&child_ep->com); 2667 reject: 2668 reject_cr(dev, hwtid, skb); 2669 out: 2670 if (parent_ep) 2671 c4iw_put_ep(&parent_ep->com); 2672 return 0; 2673 } 2674 2675 static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) 2676 { 2677 struct c4iw_ep *ep; 2678 struct cpl_pass_establish *req = cplhdr(skb); 2679 unsigned int tid = GET_TID(req); 2680 int ret; 2681 u16 tcp_opt = ntohs(req->tcp_opt); 2682 2683 ep = get_ep_from_tid(dev, tid); 2684 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2685 ep->snd_seq = be32_to_cpu(req->snd_isn); 2686 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 2687 ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); 2688 2689 pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt); 2690 2691 set_emss(ep, tcp_opt); 2692 2693 dst_confirm(ep->dst); 2694 mutex_lock(&ep->com.mutex); 2695 ep->com.state = MPA_REQ_WAIT; 2696 start_ep_timer(ep); 2697 set_bit(PASS_ESTAB, &ep->com.history); 2698 ret = send_flowc(ep); 2699 mutex_unlock(&ep->com.mutex); 2700 if (ret) 2701 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 2702 c4iw_put_ep(&ep->com); 2703 2704 return 0; 2705 } 2706 2707 static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) 2708 { 2709 struct cpl_peer_close *hdr = cplhdr(skb); 2710 struct c4iw_ep *ep; 2711 struct c4iw_qp_attributes attrs; 2712 int disconnect = 1; 2713 int release = 0; 2714 unsigned int tid = GET_TID(hdr); 2715 int ret; 2716 2717 ep = get_ep_from_tid(dev, tid); 2718 if (!ep) 2719 return 0; 2720 2721 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2722 dst_confirm(ep->dst); 2723 2724 set_bit(PEER_CLOSE, &ep->com.history); 2725 mutex_lock(&ep->com.mutex); 2726 switch (ep->com.state) { 2727 case MPA_REQ_WAIT: 2728 __state_set(&ep->com, CLOSING); 2729 break; 2730 case MPA_REQ_SENT: 2731 __state_set(&ep->com, CLOSING); 2732 connect_reply_upcall(ep, -ECONNRESET); 2733 break; 2734 case MPA_REQ_RCVD: 2735 2736 /* 2737 * We're gonna mark this puppy DEAD, but keep 2738 * the reference on it until the ULP accepts or 2739 * rejects the CR. Also wake up anyone waiting 2740 * in rdma connection migration (see c4iw_accept_cr()). 2741 */ 2742 __state_set(&ep->com, CLOSING); 2743 pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); 2744 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 2745 break; 2746 case MPA_REP_SENT: 2747 __state_set(&ep->com, CLOSING); 2748 pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); 2749 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 2750 break; 2751 case FPDU_MODE: 2752 start_ep_timer(ep); 2753 __state_set(&ep->com, CLOSING); 2754 attrs.next_state = C4IW_QP_STATE_CLOSING; 2755 ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 2756 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2757 if (ret != -ECONNRESET) { 2758 peer_close_upcall(ep); 2759 disconnect = 1; 2760 } 2761 break; 2762 case ABORTING: 2763 disconnect = 0; 2764 break; 2765 case CLOSING: 2766 __state_set(&ep->com, MORIBUND); 2767 disconnect = 0; 2768 break; 2769 case MORIBUND: 2770 (void)stop_ep_timer(ep); 2771 if (ep->com.cm_id && ep->com.qp) { 2772 attrs.next_state = C4IW_QP_STATE_IDLE; 2773 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 2774 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2775 } 2776 close_complete_upcall(ep, 0); 2777 __state_set(&ep->com, DEAD); 2778 release = 1; 2779 disconnect = 0; 2780 break; 2781 case DEAD: 2782 disconnect = 0; 2783 break; 2784 default: 2785 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 2786 } 2787 mutex_unlock(&ep->com.mutex); 2788 if (disconnect) 2789 c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 2790 if (release) 2791 release_ep_resources(ep); 2792 c4iw_put_ep(&ep->com); 2793 return 0; 2794 } 2795 2796 static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep) 2797 { 2798 complete_cached_srq_buffers(ep, ep->srqe_idx); 2799 if (ep->com.cm_id && ep->com.qp) { 2800 struct c4iw_qp_attributes attrs; 2801 2802 attrs.next_state = C4IW_QP_STATE_ERROR; 2803 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 2804 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2805 } 2806 peer_abort_upcall(ep); 2807 release_ep_resources(ep); 2808 c4iw_put_ep(&ep->com); 2809 } 2810 2811 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) 2812 { 2813 struct cpl_abort_req_rss6 *req = cplhdr(skb); 2814 struct c4iw_ep *ep; 2815 struct sk_buff *rpl_skb; 2816 struct c4iw_qp_attributes attrs; 2817 int ret; 2818 int release = 0; 2819 unsigned int tid = GET_TID(req); 2820 u8 status; 2821 u32 srqidx; 2822 2823 u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); 2824 2825 ep = get_ep_from_tid(dev, tid); 2826 if (!ep) 2827 return 0; 2828 2829 status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status)); 2830 2831 if (cxgb_is_neg_adv(status)) { 2832 pr_debug("Negative advice on abort- tid %u status %d (%s)\n", 2833 ep->hwtid, status, neg_adv_str(status)); 2834 ep->stats.abort_neg_adv++; 2835 mutex_lock(&dev->rdev.stats.lock); 2836 dev->rdev.stats.neg_adv++; 2837 mutex_unlock(&dev->rdev.stats.lock); 2838 goto deref_ep; 2839 } 2840 2841 pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, 2842 ep->com.state); 2843 set_bit(PEER_ABORT, &ep->com.history); 2844 2845 /* 2846 * Wake up any threads in rdma_init() or rdma_fini(). 2847 * However, this is not needed if com state is just 2848 * MPA_REQ_SENT 2849 */ 2850 if (ep->com.state != MPA_REQ_SENT) 2851 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 2852 2853 mutex_lock(&ep->com.mutex); 2854 switch (ep->com.state) { 2855 case CONNECTING: 2856 c4iw_put_ep(&ep->parent_ep->com); 2857 break; 2858 case MPA_REQ_WAIT: 2859 (void)stop_ep_timer(ep); 2860 break; 2861 case MPA_REQ_SENT: 2862 (void)stop_ep_timer(ep); 2863 if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 || 2864 (mpa_rev == 2 && ep->tried_with_mpa_v1)) 2865 connect_reply_upcall(ep, -ECONNRESET); 2866 else { 2867 /* 2868 * we just don't send notification upwards because we 2869 * want to retry with mpa_v1 without upper layers even 2870 * knowing it. 2871 * 2872 * do some housekeeping so as to re-initiate the 2873 * connection 2874 */ 2875 pr_info("%s: mpa_rev=%d. Retrying with mpav1\n", 2876 __func__, mpa_rev); 2877 ep->retry_with_mpa_v1 = 1; 2878 } 2879 break; 2880 case MPA_REP_SENT: 2881 break; 2882 case MPA_REQ_RCVD: 2883 break; 2884 case MORIBUND: 2885 case CLOSING: 2886 stop_ep_timer(ep); 2887 /*FALLTHROUGH*/ 2888 case FPDU_MODE: 2889 if (ep->com.qp && ep->com.qp->srq) { 2890 srqidx = ABORT_RSS_SRQIDX_G( 2891 be32_to_cpu(req->srqidx_status)); 2892 if (srqidx) { 2893 complete_cached_srq_buffers(ep, 2894 req->srqidx_status); 2895 } else { 2896 /* Hold ep ref until finish_peer_abort() */ 2897 c4iw_get_ep(&ep->com); 2898 __state_set(&ep->com, ABORTING); 2899 set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags); 2900 read_tcb(ep); 2901 break; 2902 2903 } 2904 } 2905 2906 if (ep->com.cm_id && ep->com.qp) { 2907 attrs.next_state = C4IW_QP_STATE_ERROR; 2908 ret = c4iw_modify_qp(ep->com.qp->rhp, 2909 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 2910 &attrs, 1); 2911 if (ret) 2912 pr_err("%s - qp <- error failed!\n", __func__); 2913 } 2914 peer_abort_upcall(ep); 2915 break; 2916 case ABORTING: 2917 break; 2918 case DEAD: 2919 pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); 2920 mutex_unlock(&ep->com.mutex); 2921 goto deref_ep; 2922 default: 2923 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 2924 break; 2925 } 2926 dst_confirm(ep->dst); 2927 if (ep->com.state != ABORTING) { 2928 __state_set(&ep->com, DEAD); 2929 /* we don't release if we want to retry with mpa_v1 */ 2930 if (!ep->retry_with_mpa_v1) 2931 release = 1; 2932 } 2933 mutex_unlock(&ep->com.mutex); 2934 2935 rpl_skb = skb_dequeue(&ep->com.ep_skb_list); 2936 if (WARN_ON(!rpl_skb)) { 2937 release = 1; 2938 goto out; 2939 } 2940 2941 cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); 2942 2943 c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); 2944 out: 2945 if (release) 2946 release_ep_resources(ep); 2947 else if (ep->retry_with_mpa_v1) { 2948 if (ep->com.remote_addr.ss_family == AF_INET6) { 2949 struct sockaddr_in6 *sin6 = 2950 (struct sockaddr_in6 *) 2951 &ep->com.local_addr; 2952 cxgb4_clip_release( 2953 ep->com.dev->rdev.lldi.ports[0], 2954 (const u32 *)&sin6->sin6_addr.s6_addr, 2955 1); 2956 } 2957 xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid); 2958 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, 2959 ep->com.local_addr.ss_family); 2960 dst_release(ep->dst); 2961 cxgb4_l2t_release(ep->l2t); 2962 c4iw_reconnect(ep); 2963 } 2964 2965 deref_ep: 2966 c4iw_put_ep(&ep->com); 2967 /* Dereferencing ep, referenced in peer_abort_intr() */ 2968 c4iw_put_ep(&ep->com); 2969 return 0; 2970 } 2971 2972 static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2973 { 2974 struct c4iw_ep *ep; 2975 struct c4iw_qp_attributes attrs; 2976 struct cpl_close_con_rpl *rpl = cplhdr(skb); 2977 int release = 0; 2978 unsigned int tid = GET_TID(rpl); 2979 2980 ep = get_ep_from_tid(dev, tid); 2981 if (!ep) 2982 return 0; 2983 2984 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2985 2986 /* The cm_id may be null if we failed to connect */ 2987 mutex_lock(&ep->com.mutex); 2988 set_bit(CLOSE_CON_RPL, &ep->com.history); 2989 switch (ep->com.state) { 2990 case CLOSING: 2991 __state_set(&ep->com, MORIBUND); 2992 break; 2993 case MORIBUND: 2994 (void)stop_ep_timer(ep); 2995 if ((ep->com.cm_id) && (ep->com.qp)) { 2996 attrs.next_state = C4IW_QP_STATE_IDLE; 2997 c4iw_modify_qp(ep->com.qp->rhp, 2998 ep->com.qp, 2999 C4IW_QP_ATTR_NEXT_STATE, 3000 &attrs, 1); 3001 } 3002 close_complete_upcall(ep, 0); 3003 __state_set(&ep->com, DEAD); 3004 release = 1; 3005 break; 3006 case ABORTING: 3007 case DEAD: 3008 break; 3009 default: 3010 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 3011 break; 3012 } 3013 mutex_unlock(&ep->com.mutex); 3014 if (release) 3015 release_ep_resources(ep); 3016 c4iw_put_ep(&ep->com); 3017 return 0; 3018 } 3019 3020 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) 3021 { 3022 struct cpl_rdma_terminate *rpl = cplhdr(skb); 3023 unsigned int tid = GET_TID(rpl); 3024 struct c4iw_ep *ep; 3025 struct c4iw_qp_attributes attrs; 3026 3027 ep = get_ep_from_tid(dev, tid); 3028 3029 if (ep) { 3030 if (ep->com.qp) { 3031 pr_warn("TERM received tid %u qpid %u\n", tid, 3032 ep->com.qp->wq.sq.qid); 3033 attrs.next_state = C4IW_QP_STATE_TERMINATE; 3034 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 3035 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 3036 } 3037 3038 c4iw_put_ep(&ep->com); 3039 } else 3040 pr_warn("TERM received tid %u no ep/qp\n", tid); 3041 3042 return 0; 3043 } 3044 3045 /* 3046 * Upcall from the adapter indicating data has been transmitted. 3047 * For us its just the single MPA request or reply. We can now free 3048 * the skb holding the mpa message. 3049 */ 3050 static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) 3051 { 3052 struct c4iw_ep *ep; 3053 struct cpl_fw4_ack *hdr = cplhdr(skb); 3054 u8 credits = hdr->credits; 3055 unsigned int tid = GET_TID(hdr); 3056 3057 3058 ep = get_ep_from_tid(dev, tid); 3059 if (!ep) 3060 return 0; 3061 pr_debug("ep %p tid %u credits %u\n", 3062 ep, ep->hwtid, credits); 3063 if (credits == 0) { 3064 pr_debug("0 credit ack ep %p tid %u state %u\n", 3065 ep, ep->hwtid, state_read(&ep->com)); 3066 goto out; 3067 } 3068 3069 dst_confirm(ep->dst); 3070 if (ep->mpa_skb) { 3071 pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n", 3072 ep, ep->hwtid, state_read(&ep->com), 3073 ep->mpa_attr.initiator ? 1 : 0); 3074 mutex_lock(&ep->com.mutex); 3075 kfree_skb(ep->mpa_skb); 3076 ep->mpa_skb = NULL; 3077 if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) 3078 stop_ep_timer(ep); 3079 mutex_unlock(&ep->com.mutex); 3080 } 3081 out: 3082 c4iw_put_ep(&ep->com); 3083 return 0; 3084 } 3085 3086 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 3087 { 3088 int abort; 3089 struct c4iw_ep *ep = to_ep(cm_id); 3090 3091 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 3092 3093 mutex_lock(&ep->com.mutex); 3094 if (ep->com.state != MPA_REQ_RCVD) { 3095 mutex_unlock(&ep->com.mutex); 3096 c4iw_put_ep(&ep->com); 3097 return -ECONNRESET; 3098 } 3099 set_bit(ULP_REJECT, &ep->com.history); 3100 if (mpa_rev == 0) 3101 abort = 1; 3102 else 3103 abort = send_mpa_reject(ep, pdata, pdata_len); 3104 mutex_unlock(&ep->com.mutex); 3105 3106 stop_ep_timer(ep); 3107 c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); 3108 c4iw_put_ep(&ep->com); 3109 return 0; 3110 } 3111 3112 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 3113 { 3114 int err; 3115 struct c4iw_qp_attributes attrs; 3116 enum c4iw_qp_attr_mask mask; 3117 struct c4iw_ep *ep = to_ep(cm_id); 3118 struct c4iw_dev *h = to_c4iw_dev(cm_id->device); 3119 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); 3120 int abort = 0; 3121 3122 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 3123 3124 mutex_lock(&ep->com.mutex); 3125 if (ep->com.state != MPA_REQ_RCVD) { 3126 err = -ECONNRESET; 3127 goto err_out; 3128 } 3129 3130 if (!qp) { 3131 err = -EINVAL; 3132 goto err_out; 3133 } 3134 3135 set_bit(ULP_ACCEPT, &ep->com.history); 3136 if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || 3137 (conn_param->ird > cur_max_read_depth(ep->com.dev))) { 3138 err = -EINVAL; 3139 goto err_abort; 3140 } 3141 3142 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 3143 if (conn_param->ord > ep->ird) { 3144 if (RELAXED_IRD_NEGOTIATION) { 3145 conn_param->ord = ep->ird; 3146 } else { 3147 ep->ird = conn_param->ird; 3148 ep->ord = conn_param->ord; 3149 send_mpa_reject(ep, conn_param->private_data, 3150 conn_param->private_data_len); 3151 err = -ENOMEM; 3152 goto err_abort; 3153 } 3154 } 3155 if (conn_param->ird < ep->ord) { 3156 if (RELAXED_IRD_NEGOTIATION && 3157 ep->ord <= h->rdev.lldi.max_ordird_qp) { 3158 conn_param->ird = ep->ord; 3159 } else { 3160 err = -ENOMEM; 3161 goto err_abort; 3162 } 3163 } 3164 } 3165 ep->ird = conn_param->ird; 3166 ep->ord = conn_param->ord; 3167 3168 if (ep->mpa_attr.version == 1) { 3169 if (peer2peer && ep->ird == 0) 3170 ep->ird = 1; 3171 } else { 3172 if (peer2peer && 3173 (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && 3174 (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) 3175 ep->ird = 1; 3176 } 3177 3178 pr_debug("ird %d ord %d\n", ep->ird, ep->ord); 3179 3180 ep->com.cm_id = cm_id; 3181 ref_cm_id(&ep->com); 3182 ep->com.qp = qp; 3183 ref_qp(ep); 3184 3185 /* bind QP to EP and move to RTS */ 3186 attrs.mpa_attr = ep->mpa_attr; 3187 attrs.max_ird = ep->ird; 3188 attrs.max_ord = ep->ord; 3189 attrs.llp_stream_handle = ep; 3190 attrs.next_state = C4IW_QP_STATE_RTS; 3191 3192 /* bind QP and TID with INIT_WR */ 3193 mask = C4IW_QP_ATTR_NEXT_STATE | 3194 C4IW_QP_ATTR_LLP_STREAM_HANDLE | 3195 C4IW_QP_ATTR_MPA_ATTR | 3196 C4IW_QP_ATTR_MAX_IRD | 3197 C4IW_QP_ATTR_MAX_ORD; 3198 3199 err = c4iw_modify_qp(ep->com.qp->rhp, 3200 ep->com.qp, mask, &attrs, 1); 3201 if (err) 3202 goto err_deref_cm_id; 3203 3204 set_bit(STOP_MPA_TIMER, &ep->com.flags); 3205 err = send_mpa_reply(ep, conn_param->private_data, 3206 conn_param->private_data_len); 3207 if (err) 3208 goto err_deref_cm_id; 3209 3210 __state_set(&ep->com, FPDU_MODE); 3211 established_upcall(ep); 3212 mutex_unlock(&ep->com.mutex); 3213 c4iw_put_ep(&ep->com); 3214 return 0; 3215 err_deref_cm_id: 3216 deref_cm_id(&ep->com); 3217 err_abort: 3218 abort = 1; 3219 err_out: 3220 mutex_unlock(&ep->com.mutex); 3221 if (abort) 3222 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 3223 c4iw_put_ep(&ep->com); 3224 return err; 3225 } 3226 3227 static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) 3228 { 3229 struct in_device *ind; 3230 int found = 0; 3231 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; 3232 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; 3233 3234 ind = in_dev_get(dev->rdev.lldi.ports[0]); 3235 if (!ind) 3236 return -EADDRNOTAVAIL; 3237 for_primary_ifa(ind) { 3238 laddr->sin_addr.s_addr = ifa->ifa_address; 3239 raddr->sin_addr.s_addr = ifa->ifa_address; 3240 found = 1; 3241 break; 3242 } 3243 endfor_ifa(ind); 3244 in_dev_put(ind); 3245 return found ? 0 : -EADDRNOTAVAIL; 3246 } 3247 3248 static int get_lladdr(struct net_device *dev, struct in6_addr *addr, 3249 unsigned char banned_flags) 3250 { 3251 struct inet6_dev *idev; 3252 int err = -EADDRNOTAVAIL; 3253 3254 rcu_read_lock(); 3255 idev = __in6_dev_get(dev); 3256 if (idev != NULL) { 3257 struct inet6_ifaddr *ifp; 3258 3259 read_lock_bh(&idev->lock); 3260 list_for_each_entry(ifp, &idev->addr_list, if_list) { 3261 if (ifp->scope == IFA_LINK && 3262 !(ifp->flags & banned_flags)) { 3263 memcpy(addr, &ifp->addr, 16); 3264 err = 0; 3265 break; 3266 } 3267 } 3268 read_unlock_bh(&idev->lock); 3269 } 3270 rcu_read_unlock(); 3271 return err; 3272 } 3273 3274 static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) 3275 { 3276 struct in6_addr uninitialized_var(addr); 3277 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; 3278 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr; 3279 3280 if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { 3281 memcpy(la6->sin6_addr.s6_addr, &addr, 16); 3282 memcpy(ra6->sin6_addr.s6_addr, &addr, 16); 3283 return 0; 3284 } 3285 return -EADDRNOTAVAIL; 3286 } 3287 3288 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 3289 { 3290 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 3291 struct c4iw_ep *ep; 3292 int err = 0; 3293 struct sockaddr_in *laddr; 3294 struct sockaddr_in *raddr; 3295 struct sockaddr_in6 *laddr6; 3296 struct sockaddr_in6 *raddr6; 3297 __u8 *ra; 3298 int iptype; 3299 3300 if ((conn_param->ord > cur_max_read_depth(dev)) || 3301 (conn_param->ird > cur_max_read_depth(dev))) { 3302 err = -EINVAL; 3303 goto out; 3304 } 3305 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 3306 if (!ep) { 3307 pr_err("%s - cannot alloc ep\n", __func__); 3308 err = -ENOMEM; 3309 goto out; 3310 } 3311 3312 skb_queue_head_init(&ep->com.ep_skb_list); 3313 if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) { 3314 err = -ENOMEM; 3315 goto fail1; 3316 } 3317 3318 timer_setup(&ep->timer, ep_timeout, 0); 3319 ep->plen = conn_param->private_data_len; 3320 if (ep->plen) 3321 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 3322 conn_param->private_data, ep->plen); 3323 ep->ird = conn_param->ird; 3324 ep->ord = conn_param->ord; 3325 3326 if (peer2peer && ep->ord == 0) 3327 ep->ord = 1; 3328 3329 ep->com.cm_id = cm_id; 3330 ref_cm_id(&ep->com); 3331 cm_id->provider_data = ep; 3332 ep->com.dev = dev; 3333 ep->com.qp = get_qhp(dev, conn_param->qpn); 3334 if (!ep->com.qp) { 3335 pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); 3336 err = -EINVAL; 3337 goto fail2; 3338 } 3339 ref_qp(ep); 3340 pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn, 3341 ep->com.qp, cm_id); 3342 3343 /* 3344 * Allocate an active TID to initiate a TCP connection. 3345 */ 3346 ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep); 3347 if (ep->atid == -1) { 3348 pr_err("%s - cannot alloc atid\n", __func__); 3349 err = -ENOMEM; 3350 goto fail2; 3351 } 3352 err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL); 3353 if (err) 3354 goto fail5; 3355 3356 memcpy(&ep->com.local_addr, &cm_id->m_local_addr, 3357 sizeof(ep->com.local_addr)); 3358 memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, 3359 sizeof(ep->com.remote_addr)); 3360 3361 laddr = (struct sockaddr_in *)&ep->com.local_addr; 3362 raddr = (struct sockaddr_in *)&ep->com.remote_addr; 3363 laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr; 3364 raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr; 3365 3366 if (cm_id->m_remote_addr.ss_family == AF_INET) { 3367 iptype = 4; 3368 ra = (__u8 *)&raddr->sin_addr; 3369 3370 /* 3371 * Handle loopback requests to INADDR_ANY. 3372 */ 3373 if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { 3374 err = pick_local_ipaddrs(dev, cm_id); 3375 if (err) 3376 goto fail2; 3377 } 3378 3379 /* find a route */ 3380 pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", 3381 &laddr->sin_addr, ntohs(laddr->sin_port), 3382 ra, ntohs(raddr->sin_port)); 3383 ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, 3384 laddr->sin_addr.s_addr, 3385 raddr->sin_addr.s_addr, 3386 laddr->sin_port, 3387 raddr->sin_port, cm_id->tos); 3388 } else { 3389 iptype = 6; 3390 ra = (__u8 *)&raddr6->sin6_addr; 3391 3392 /* 3393 * Handle loopback requests to INADDR_ANY. 3394 */ 3395 if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { 3396 err = pick_local_ip6addrs(dev, cm_id); 3397 if (err) 3398 goto fail2; 3399 } 3400 3401 /* find a route */ 3402 pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n", 3403 laddr6->sin6_addr.s6_addr, 3404 ntohs(laddr6->sin6_port), 3405 raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); 3406 ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, 3407 laddr6->sin6_addr.s6_addr, 3408 raddr6->sin6_addr.s6_addr, 3409 laddr6->sin6_port, 3410 raddr6->sin6_port, cm_id->tos, 3411 raddr6->sin6_scope_id); 3412 } 3413 if (!ep->dst) { 3414 pr_err("%s - cannot find route\n", __func__); 3415 err = -EHOSTUNREACH; 3416 goto fail3; 3417 } 3418 3419 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, 3420 ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); 3421 if (err) { 3422 pr_err("%s - cannot alloc l2e\n", __func__); 3423 goto fail4; 3424 } 3425 3426 pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", 3427 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, 3428 ep->l2t->idx); 3429 3430 state_set(&ep->com, CONNECTING); 3431 ep->tos = cm_id->tos; 3432 3433 /* send connect request to rnic */ 3434 err = send_connect(ep); 3435 if (!err) 3436 goto out; 3437 3438 cxgb4_l2t_release(ep->l2t); 3439 fail4: 3440 dst_release(ep->dst); 3441 fail3: 3442 xa_erase_irq(&ep->com.dev->atids, ep->atid); 3443 fail5: 3444 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 3445 fail2: 3446 skb_queue_purge(&ep->com.ep_skb_list); 3447 deref_cm_id(&ep->com); 3448 fail1: 3449 c4iw_put_ep(&ep->com); 3450 out: 3451 return err; 3452 } 3453 3454 static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) 3455 { 3456 int err; 3457 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) 3458 &ep->com.local_addr; 3459 3460 if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) { 3461 err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], 3462 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3463 if (err) 3464 return err; 3465 } 3466 c4iw_init_wr_wait(ep->com.wr_waitp); 3467 err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], 3468 ep->stid, &sin6->sin6_addr, 3469 sin6->sin6_port, 3470 ep->com.dev->rdev.lldi.rxq_ids[0]); 3471 if (!err) 3472 err = c4iw_wait_for_reply(&ep->com.dev->rdev, 3473 ep->com.wr_waitp, 3474 0, 0, __func__); 3475 else if (err > 0) 3476 err = net_xmit_errno(err); 3477 if (err) { 3478 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3479 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3480 pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", 3481 err, ep->stid, 3482 sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); 3483 } 3484 return err; 3485 } 3486 3487 static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) 3488 { 3489 int err; 3490 struct sockaddr_in *sin = (struct sockaddr_in *) 3491 &ep->com.local_addr; 3492 3493 if (dev->rdev.lldi.enable_fw_ofld_conn) { 3494 do { 3495 err = cxgb4_create_server_filter( 3496 ep->com.dev->rdev.lldi.ports[0], ep->stid, 3497 sin->sin_addr.s_addr, sin->sin_port, 0, 3498 ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); 3499 if (err == -EBUSY) { 3500 if (c4iw_fatal_error(&ep->com.dev->rdev)) { 3501 err = -EIO; 3502 break; 3503 } 3504 set_current_state(TASK_UNINTERRUPTIBLE); 3505 schedule_timeout(usecs_to_jiffies(100)); 3506 } 3507 } while (err == -EBUSY); 3508 } else { 3509 c4iw_init_wr_wait(ep->com.wr_waitp); 3510 err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], 3511 ep->stid, sin->sin_addr.s_addr, sin->sin_port, 3512 0, ep->com.dev->rdev.lldi.rxq_ids[0]); 3513 if (!err) 3514 err = c4iw_wait_for_reply(&ep->com.dev->rdev, 3515 ep->com.wr_waitp, 3516 0, 0, __func__); 3517 else if (err > 0) 3518 err = net_xmit_errno(err); 3519 } 3520 if (err) 3521 pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n" 3522 , err, ep->stid, 3523 &sin->sin_addr, ntohs(sin->sin_port)); 3524 return err; 3525 } 3526 3527 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) 3528 { 3529 int err = 0; 3530 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 3531 struct c4iw_listen_ep *ep; 3532 3533 might_sleep(); 3534 3535 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 3536 if (!ep) { 3537 pr_err("%s - cannot alloc ep\n", __func__); 3538 err = -ENOMEM; 3539 goto fail1; 3540 } 3541 skb_queue_head_init(&ep->com.ep_skb_list); 3542 pr_debug("ep %p\n", ep); 3543 ep->com.cm_id = cm_id; 3544 ref_cm_id(&ep->com); 3545 ep->com.dev = dev; 3546 ep->backlog = backlog; 3547 memcpy(&ep->com.local_addr, &cm_id->m_local_addr, 3548 sizeof(ep->com.local_addr)); 3549 3550 /* 3551 * Allocate a server TID. 3552 */ 3553 if (dev->rdev.lldi.enable_fw_ofld_conn && 3554 ep->com.local_addr.ss_family == AF_INET) 3555 ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, 3556 cm_id->m_local_addr.ss_family, ep); 3557 else 3558 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, 3559 cm_id->m_local_addr.ss_family, ep); 3560 3561 if (ep->stid == -1) { 3562 pr_err("%s - cannot alloc stid\n", __func__); 3563 err = -ENOMEM; 3564 goto fail2; 3565 } 3566 err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL); 3567 if (err) 3568 goto fail3; 3569 3570 state_set(&ep->com, LISTEN); 3571 if (ep->com.local_addr.ss_family == AF_INET) 3572 err = create_server4(dev, ep); 3573 else 3574 err = create_server6(dev, ep); 3575 if (!err) { 3576 cm_id->provider_data = ep; 3577 goto out; 3578 } 3579 xa_erase_irq(&ep->com.dev->stids, ep->stid); 3580 fail3: 3581 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3582 ep->com.local_addr.ss_family); 3583 fail2: 3584 deref_cm_id(&ep->com); 3585 c4iw_put_ep(&ep->com); 3586 fail1: 3587 out: 3588 return err; 3589 } 3590 3591 int c4iw_destroy_listen(struct iw_cm_id *cm_id) 3592 { 3593 int err; 3594 struct c4iw_listen_ep *ep = to_listen_ep(cm_id); 3595 3596 pr_debug("ep %p\n", ep); 3597 3598 might_sleep(); 3599 state_set(&ep->com, DEAD); 3600 if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn && 3601 ep->com.local_addr.ss_family == AF_INET) { 3602 err = cxgb4_remove_server_filter( 3603 ep->com.dev->rdev.lldi.ports[0], ep->stid, 3604 ep->com.dev->rdev.lldi.rxq_ids[0], 0); 3605 } else { 3606 struct sockaddr_in6 *sin6; 3607 c4iw_init_wr_wait(ep->com.wr_waitp); 3608 err = cxgb4_remove_server( 3609 ep->com.dev->rdev.lldi.ports[0], ep->stid, 3610 ep->com.dev->rdev.lldi.rxq_ids[0], 0); 3611 if (err) 3612 goto done; 3613 err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp, 3614 0, 0, __func__); 3615 sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; 3616 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3617 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3618 } 3619 xa_erase_irq(&ep->com.dev->stids, ep->stid); 3620 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3621 ep->com.local_addr.ss_family); 3622 done: 3623 deref_cm_id(&ep->com); 3624 c4iw_put_ep(&ep->com); 3625 return err; 3626 } 3627 3628 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) 3629 { 3630 int ret = 0; 3631 int close = 0; 3632 int fatal = 0; 3633 struct c4iw_rdev *rdev; 3634 3635 mutex_lock(&ep->com.mutex); 3636 3637 pr_debug("ep %p state %s, abrupt %d\n", ep, 3638 states[ep->com.state], abrupt); 3639 3640 /* 3641 * Ref the ep here in case we have fatal errors causing the 3642 * ep to be released and freed. 3643 */ 3644 c4iw_get_ep(&ep->com); 3645 3646 rdev = &ep->com.dev->rdev; 3647 if (c4iw_fatal_error(rdev)) { 3648 fatal = 1; 3649 close_complete_upcall(ep, -EIO); 3650 ep->com.state = DEAD; 3651 } 3652 switch (ep->com.state) { 3653 case MPA_REQ_WAIT: 3654 case MPA_REQ_SENT: 3655 case MPA_REQ_RCVD: 3656 case MPA_REP_SENT: 3657 case FPDU_MODE: 3658 case CONNECTING: 3659 close = 1; 3660 if (abrupt) 3661 ep->com.state = ABORTING; 3662 else { 3663 ep->com.state = CLOSING; 3664 3665 /* 3666 * if we close before we see the fw4_ack() then we fix 3667 * up the timer state since we're reusing it. 3668 */ 3669 if (ep->mpa_skb && 3670 test_bit(STOP_MPA_TIMER, &ep->com.flags)) { 3671 clear_bit(STOP_MPA_TIMER, &ep->com.flags); 3672 stop_ep_timer(ep); 3673 } 3674 start_ep_timer(ep); 3675 } 3676 set_bit(CLOSE_SENT, &ep->com.flags); 3677 break; 3678 case CLOSING: 3679 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { 3680 close = 1; 3681 if (abrupt) { 3682 (void)stop_ep_timer(ep); 3683 ep->com.state = ABORTING; 3684 } else 3685 ep->com.state = MORIBUND; 3686 } 3687 break; 3688 case MORIBUND: 3689 case ABORTING: 3690 case DEAD: 3691 pr_debug("ignoring disconnect ep %p state %u\n", 3692 ep, ep->com.state); 3693 break; 3694 default: 3695 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 3696 break; 3697 } 3698 3699 if (close) { 3700 if (abrupt) { 3701 set_bit(EP_DISC_ABORT, &ep->com.history); 3702 ret = send_abort(ep); 3703 } else { 3704 set_bit(EP_DISC_CLOSE, &ep->com.history); 3705 ret = send_halfclose(ep); 3706 } 3707 if (ret) { 3708 set_bit(EP_DISC_FAIL, &ep->com.history); 3709 if (!abrupt) { 3710 stop_ep_timer(ep); 3711 close_complete_upcall(ep, -EIO); 3712 } 3713 if (ep->com.qp) { 3714 struct c4iw_qp_attributes attrs; 3715 3716 attrs.next_state = C4IW_QP_STATE_ERROR; 3717 ret = c4iw_modify_qp(ep->com.qp->rhp, 3718 ep->com.qp, 3719 C4IW_QP_ATTR_NEXT_STATE, 3720 &attrs, 1); 3721 if (ret) 3722 pr_err("%s - qp <- error failed!\n", 3723 __func__); 3724 } 3725 fatal = 1; 3726 } 3727 } 3728 mutex_unlock(&ep->com.mutex); 3729 c4iw_put_ep(&ep->com); 3730 if (fatal) 3731 release_ep_resources(ep); 3732 return ret; 3733 } 3734 3735 static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, 3736 struct cpl_fw6_msg_ofld_connection_wr_rpl *req) 3737 { 3738 struct c4iw_ep *ep; 3739 int atid = be32_to_cpu(req->tid); 3740 3741 ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, 3742 (__force u32) req->tid); 3743 if (!ep) 3744 return; 3745 3746 switch (req->retval) { 3747 case FW_ENOMEM: 3748 set_bit(ACT_RETRY_NOMEM, &ep->com.history); 3749 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { 3750 send_fw_act_open_req(ep, atid); 3751 return; 3752 } 3753 /* fall through */ 3754 case FW_EADDRINUSE: 3755 set_bit(ACT_RETRY_INUSE, &ep->com.history); 3756 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { 3757 send_fw_act_open_req(ep, atid); 3758 return; 3759 } 3760 break; 3761 default: 3762 pr_info("%s unexpected ofld conn wr retval %d\n", 3763 __func__, req->retval); 3764 break; 3765 } 3766 pr_err("active ofld_connect_wr failure %d atid %d\n", 3767 req->retval, atid); 3768 mutex_lock(&dev->rdev.stats.lock); 3769 dev->rdev.stats.act_ofld_conn_fails++; 3770 mutex_unlock(&dev->rdev.stats.lock); 3771 connect_reply_upcall(ep, status2errno(req->retval)); 3772 state_set(&ep->com, DEAD); 3773 if (ep->com.remote_addr.ss_family == AF_INET6) { 3774 struct sockaddr_in6 *sin6 = 3775 (struct sockaddr_in6 *)&ep->com.local_addr; 3776 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3777 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3778 } 3779 xa_erase_irq(&dev->atids, atid); 3780 cxgb4_free_atid(dev->rdev.lldi.tids, atid); 3781 dst_release(ep->dst); 3782 cxgb4_l2t_release(ep->l2t); 3783 c4iw_put_ep(&ep->com); 3784 } 3785 3786 static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, 3787 struct cpl_fw6_msg_ofld_connection_wr_rpl *req) 3788 { 3789 struct sk_buff *rpl_skb; 3790 struct cpl_pass_accept_req *cpl; 3791 int ret; 3792 3793 rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; 3794 if (req->retval) { 3795 pr_err("%s passive open failure %d\n", __func__, req->retval); 3796 mutex_lock(&dev->rdev.stats.lock); 3797 dev->rdev.stats.pas_ofld_conn_fails++; 3798 mutex_unlock(&dev->rdev.stats.lock); 3799 kfree_skb(rpl_skb); 3800 } else { 3801 cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); 3802 OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 3803 (__force u32) htonl( 3804 (__force u32) req->tid))); 3805 ret = pass_accept_req(dev, rpl_skb); 3806 if (!ret) 3807 kfree_skb(rpl_skb); 3808 } 3809 return; 3810 } 3811 3812 static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word) 3813 { 3814 u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]); 3815 u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]); 3816 u64 t; 3817 u32 shift = 32; 3818 3819 t = (thi << shift) | (tlo >> shift); 3820 3821 return t; 3822 } 3823 3824 static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift) 3825 { 3826 u32 v; 3827 u64 t = be64_to_cpu(tcb[(31 - word) / 2]); 3828 3829 if (word & 0x1) 3830 shift += 32; 3831 v = (t >> shift) & mask; 3832 return v; 3833 } 3834 3835 static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 3836 { 3837 struct cpl_get_tcb_rpl *rpl = cplhdr(skb); 3838 __be64 *tcb = (__be64 *)(rpl + 1); 3839 unsigned int tid = GET_TID(rpl); 3840 struct c4iw_ep *ep; 3841 u64 t_flags_64; 3842 u32 rx_pdu_out; 3843 3844 ep = get_ep_from_tid(dev, tid); 3845 if (!ep) 3846 return 0; 3847 /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to 3848 * determine if there's a rx PDU feedback event pending. 3849 * 3850 * If that bit is set, it means we'll need to re-read the TCB's 3851 * rq_start value. The final value is the one present in a TCB 3852 * with the TF_RX_PDU_OUT bit cleared. 3853 */ 3854 3855 t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W); 3856 rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S; 3857 3858 c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */ 3859 c4iw_put_ep(&ep->com); /* from read_tcb() */ 3860 3861 /* If TF_RX_PDU_OUT bit is set, re-read the TCB */ 3862 if (rx_pdu_out) { 3863 if (++ep->rx_pdu_out_cnt >= 2) { 3864 WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n"); 3865 goto cleanup; 3866 } 3867 read_tcb(ep); 3868 return 0; 3869 } 3870 3871 ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W, 3872 TCB_RQ_START_S); 3873 cleanup: 3874 pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx); 3875 3876 if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) 3877 finish_peer_abort(dev, ep); 3878 else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) 3879 send_abort_req(ep); 3880 else 3881 WARN_ONCE(1, "unexpected state!"); 3882 3883 return 0; 3884 } 3885 3886 static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) 3887 { 3888 struct cpl_fw6_msg *rpl = cplhdr(skb); 3889 struct cpl_fw6_msg_ofld_connection_wr_rpl *req; 3890 3891 switch (rpl->type) { 3892 case FW6_TYPE_CQE: 3893 c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); 3894 break; 3895 case FW6_TYPE_OFLD_CONNECTION_WR_RPL: 3896 req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; 3897 switch (req->t_state) { 3898 case TCP_SYN_SENT: 3899 active_ofld_conn_reply(dev, skb, req); 3900 break; 3901 case TCP_SYN_RECV: 3902 passive_ofld_conn_reply(dev, skb, req); 3903 break; 3904 default: 3905 pr_err("%s unexpected ofld conn wr state %d\n", 3906 __func__, req->t_state); 3907 break; 3908 } 3909 break; 3910 } 3911 return 0; 3912 } 3913 3914 static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) 3915 { 3916 __be32 l2info; 3917 __be16 hdr_len, vlantag, len; 3918 u16 eth_hdr_len; 3919 int tcp_hdr_len, ip_hdr_len; 3920 u8 intf; 3921 struct cpl_rx_pkt *cpl = cplhdr(skb); 3922 struct cpl_pass_accept_req *req; 3923 struct tcp_options_received tmp_opt; 3924 struct c4iw_dev *dev; 3925 enum chip_type type; 3926 3927 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); 3928 /* Store values from cpl_rx_pkt in temporary location. */ 3929 vlantag = cpl->vlan; 3930 len = cpl->len; 3931 l2info = cpl->l2info; 3932 hdr_len = cpl->hdr_len; 3933 intf = cpl->iff; 3934 3935 __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); 3936 3937 /* 3938 * We need to parse the TCP options from SYN packet. 3939 * to generate cpl_pass_accept_req. 3940 */ 3941 memset(&tmp_opt, 0, sizeof(tmp_opt)); 3942 tcp_clear_options(&tmp_opt); 3943 tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); 3944 3945 req = __skb_push(skb, sizeof(*req)); 3946 memset(req, 0, sizeof(*req)); 3947 req->l2info = cpu_to_be16(SYN_INTF_V(intf) | 3948 SYN_MAC_IDX_V(RX_MACIDX_G( 3949 be32_to_cpu(l2info))) | 3950 SYN_XACT_MATCH_F); 3951 type = dev->rdev.lldi.adapter_type; 3952 tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len)); 3953 ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len)); 3954 req->hdr_len = 3955 cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info)))); 3956 if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) { 3957 eth_hdr_len = is_t4(type) ? 3958 RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) : 3959 RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info)); 3960 req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) | 3961 IP_HDR_LEN_V(ip_hdr_len) | 3962 ETH_HDR_LEN_V(eth_hdr_len)); 3963 } else { /* T6 and later */ 3964 eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info)); 3965 req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) | 3966 T6_IP_HDR_LEN_V(ip_hdr_len) | 3967 T6_ETH_HDR_LEN_V(eth_hdr_len)); 3968 } 3969 req->vlan = vlantag; 3970 req->len = len; 3971 req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) | 3972 PASS_OPEN_TOS_V(tos)); 3973 req->tcpopt.mss = htons(tmp_opt.mss_clamp); 3974 if (tmp_opt.wscale_ok) 3975 req->tcpopt.wsf = tmp_opt.snd_wscale; 3976 req->tcpopt.tstamp = tmp_opt.saw_tstamp; 3977 if (tmp_opt.sack_ok) 3978 req->tcpopt.sack = 1; 3979 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); 3980 return; 3981 } 3982 3983 static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, 3984 __be32 laddr, __be16 lport, 3985 __be32 raddr, __be16 rport, 3986 u32 rcv_isn, u32 filter, u16 window, 3987 u32 rss_qid, u8 port_id) 3988 { 3989 struct sk_buff *req_skb; 3990 struct fw_ofld_connection_wr *req; 3991 struct cpl_pass_accept_req *cpl = cplhdr(skb); 3992 int ret; 3993 3994 req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); 3995 if (!req_skb) 3996 return; 3997 req = __skb_put_zero(req_skb, sizeof(*req)); 3998 req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F); 3999 req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); 4000 req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F); 4001 req->le.filter = (__force __be32) filter; 4002 req->le.lport = lport; 4003 req->le.pport = rport; 4004 req->le.u.ipv4.lip = laddr; 4005 req->le.u.ipv4.pip = raddr; 4006 req->tcb.rcv_nxt = htonl(rcv_isn + 1); 4007 req->tcb.rcv_adv = htons(window); 4008 req->tcb.t_state_to_astid = 4009 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) | 4010 FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) | 4011 FW_OFLD_CONNECTION_WR_ASTID_V( 4012 PASS_OPEN_TID_G(ntohl(cpl->tos_stid)))); 4013 4014 /* 4015 * We store the qid in opt2 which will be used by the firmware 4016 * to send us the wr response. 4017 */ 4018 req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid)); 4019 4020 /* 4021 * We initialize the MSS index in TCB to 0xF. 4022 * So that when driver sends cpl_pass_accept_rpl 4023 * TCB picks up the correct value. If this was 0 4024 * TP will ignore any value > 0 for MSS index. 4025 */ 4026 req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF)); 4027 req->cookie = (uintptr_t)skb; 4028 4029 set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); 4030 ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); 4031 if (ret < 0) { 4032 pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, 4033 ret); 4034 kfree_skb(skb); 4035 kfree_skb(req_skb); 4036 } 4037 } 4038 4039 /* 4040 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt 4041 * messages when a filter is being used instead of server to 4042 * redirect a syn packet. When packets hit filter they are redirected 4043 * to the offload queue and driver tries to establish the connection 4044 * using firmware work request. 4045 */ 4046 static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) 4047 { 4048 int stid; 4049 unsigned int filter; 4050 struct ethhdr *eh = NULL; 4051 struct vlan_ethhdr *vlan_eh = NULL; 4052 struct iphdr *iph; 4053 struct tcphdr *tcph; 4054 struct rss_header *rss = (void *)skb->data; 4055 struct cpl_rx_pkt *cpl = (void *)skb->data; 4056 struct cpl_pass_accept_req *req = (void *)(rss + 1); 4057 struct l2t_entry *e; 4058 struct dst_entry *dst; 4059 struct c4iw_ep *lep = NULL; 4060 u16 window; 4061 struct port_info *pi; 4062 struct net_device *pdev; 4063 u16 rss_qid, eth_hdr_len; 4064 int step; 4065 struct neighbour *neigh; 4066 4067 /* Drop all non-SYN packets */ 4068 if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F))) 4069 goto reject; 4070 4071 /* 4072 * Drop all packets which did not hit the filter. 4073 * Unlikely to happen. 4074 */ 4075 if (!(rss->filter_hit && rss->filter_tid)) 4076 goto reject; 4077 4078 /* 4079 * Calculate the server tid from filter hit index from cpl_rx_pkt. 4080 */ 4081 stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); 4082 4083 lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); 4084 if (!lep) { 4085 pr_warn("%s connect request on invalid stid %d\n", 4086 __func__, stid); 4087 goto reject; 4088 } 4089 4090 switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) { 4091 case CHELSIO_T4: 4092 eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); 4093 break; 4094 case CHELSIO_T5: 4095 eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); 4096 break; 4097 case CHELSIO_T6: 4098 eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); 4099 break; 4100 default: 4101 pr_err("T%d Chip is not supported\n", 4102 CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)); 4103 goto reject; 4104 } 4105 4106 if (eth_hdr_len == ETH_HLEN) { 4107 eh = (struct ethhdr *)(req + 1); 4108 iph = (struct iphdr *)(eh + 1); 4109 } else { 4110 vlan_eh = (struct vlan_ethhdr *)(req + 1); 4111 iph = (struct iphdr *)(vlan_eh + 1); 4112 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); 4113 } 4114 4115 if (iph->version != 0x4) 4116 goto reject; 4117 4118 tcph = (struct tcphdr *)(iph + 1); 4119 skb_set_network_header(skb, (void *)iph - (void *)rss); 4120 skb_set_transport_header(skb, (void *)tcph - (void *)rss); 4121 skb_get(skb); 4122 4123 pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n", 4124 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), 4125 ntohs(tcph->source), iph->tos); 4126 4127 dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, 4128 iph->daddr, iph->saddr, tcph->dest, 4129 tcph->source, iph->tos); 4130 if (!dst) { 4131 pr_err("%s - failed to find dst entry!\n", __func__); 4132 goto reject; 4133 } 4134 neigh = dst_neigh_lookup_skb(dst, skb); 4135 4136 if (!neigh) { 4137 pr_err("%s - failed to allocate neigh!\n", __func__); 4138 goto free_dst; 4139 } 4140 4141 if (neigh->dev->flags & IFF_LOOPBACK) { 4142 pdev = ip_dev_find(&init_net, iph->daddr); 4143 e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, 4144 pdev, 0); 4145 pi = (struct port_info *)netdev_priv(pdev); 4146 dev_put(pdev); 4147 } else { 4148 pdev = get_real_dev(neigh->dev); 4149 e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, 4150 pdev, 0); 4151 pi = (struct port_info *)netdev_priv(pdev); 4152 } 4153 neigh_release(neigh); 4154 if (!e) { 4155 pr_err("%s - failed to allocate l2t entry!\n", 4156 __func__); 4157 goto free_dst; 4158 } 4159 4160 step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; 4161 rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; 4162 window = (__force u16) htons((__force u16)tcph->window); 4163 4164 /* Calcuate filter portion for LE region. */ 4165 filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple( 4166 dev->rdev.lldi.ports[0], 4167 e)); 4168 4169 /* 4170 * Synthesize the cpl_pass_accept_req. We have everything except the 4171 * TID. Once firmware sends a reply with TID we update the TID field 4172 * in cpl and pass it through the regular cpl_pass_accept_req path. 4173 */ 4174 build_cpl_pass_accept_req(skb, stid, iph->tos); 4175 send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, 4176 tcph->source, ntohl(tcph->seq), filter, window, 4177 rss_qid, pi->port_id); 4178 cxgb4_l2t_release(e); 4179 free_dst: 4180 dst_release(dst); 4181 reject: 4182 if (lep) 4183 c4iw_put_ep(&lep->com); 4184 return 0; 4185 } 4186 4187 /* 4188 * These are the real handlers that are called from a 4189 * work queue. 4190 */ 4191 static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { 4192 [CPL_ACT_ESTABLISH] = act_establish, 4193 [CPL_ACT_OPEN_RPL] = act_open_rpl, 4194 [CPL_RX_DATA] = rx_data, 4195 [CPL_ABORT_RPL_RSS] = abort_rpl, 4196 [CPL_ABORT_RPL] = abort_rpl, 4197 [CPL_PASS_OPEN_RPL] = pass_open_rpl, 4198 [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, 4199 [CPL_PASS_ACCEPT_REQ] = pass_accept_req, 4200 [CPL_PASS_ESTABLISH] = pass_establish, 4201 [CPL_PEER_CLOSE] = peer_close, 4202 [CPL_ABORT_REQ_RSS] = peer_abort, 4203 [CPL_CLOSE_CON_RPL] = close_con_rpl, 4204 [CPL_RDMA_TERMINATE] = terminate, 4205 [CPL_FW4_ACK] = fw4_ack, 4206 [CPL_GET_TCB_RPL] = read_tcb_rpl, 4207 [CPL_FW6_MSG] = deferred_fw6_msg, 4208 [CPL_RX_PKT] = rx_pkt, 4209 [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe, 4210 [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe 4211 }; 4212 4213 static void process_timeout(struct c4iw_ep *ep) 4214 { 4215 struct c4iw_qp_attributes attrs; 4216 int abort = 1; 4217 4218 mutex_lock(&ep->com.mutex); 4219 pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state); 4220 set_bit(TIMEDOUT, &ep->com.history); 4221 switch (ep->com.state) { 4222 case MPA_REQ_SENT: 4223 connect_reply_upcall(ep, -ETIMEDOUT); 4224 break; 4225 case MPA_REQ_WAIT: 4226 case MPA_REQ_RCVD: 4227 case MPA_REP_SENT: 4228 case FPDU_MODE: 4229 break; 4230 case CLOSING: 4231 case MORIBUND: 4232 if (ep->com.cm_id && ep->com.qp) { 4233 attrs.next_state = C4IW_QP_STATE_ERROR; 4234 c4iw_modify_qp(ep->com.qp->rhp, 4235 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 4236 &attrs, 1); 4237 } 4238 close_complete_upcall(ep, -ETIMEDOUT); 4239 break; 4240 case ABORTING: 4241 case DEAD: 4242 4243 /* 4244 * These states are expected if the ep timed out at the same 4245 * time as another thread was calling stop_ep_timer(). 4246 * So we silently do nothing for these states. 4247 */ 4248 abort = 0; 4249 break; 4250 default: 4251 WARN(1, "%s unexpected state ep %p tid %u state %u\n", 4252 __func__, ep, ep->hwtid, ep->com.state); 4253 abort = 0; 4254 } 4255 mutex_unlock(&ep->com.mutex); 4256 if (abort) 4257 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 4258 c4iw_put_ep(&ep->com); 4259 } 4260 4261 static void process_timedout_eps(void) 4262 { 4263 struct c4iw_ep *ep; 4264 4265 spin_lock_irq(&timeout_lock); 4266 while (!list_empty(&timeout_list)) { 4267 struct list_head *tmp; 4268 4269 tmp = timeout_list.next; 4270 list_del(tmp); 4271 tmp->next = NULL; 4272 tmp->prev = NULL; 4273 spin_unlock_irq(&timeout_lock); 4274 ep = list_entry(tmp, struct c4iw_ep, entry); 4275 process_timeout(ep); 4276 spin_lock_irq(&timeout_lock); 4277 } 4278 spin_unlock_irq(&timeout_lock); 4279 } 4280 4281 static void process_work(struct work_struct *work) 4282 { 4283 struct sk_buff *skb = NULL; 4284 struct c4iw_dev *dev; 4285 struct cpl_act_establish *rpl; 4286 unsigned int opcode; 4287 int ret; 4288 4289 process_timedout_eps(); 4290 while ((skb = skb_dequeue(&rxq))) { 4291 rpl = cplhdr(skb); 4292 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); 4293 opcode = rpl->ot.opcode; 4294 4295 if (opcode >= ARRAY_SIZE(work_handlers) || 4296 !work_handlers[opcode]) { 4297 pr_err("No handler for opcode 0x%x.\n", opcode); 4298 kfree_skb(skb); 4299 } else { 4300 ret = work_handlers[opcode](dev, skb); 4301 if (!ret) 4302 kfree_skb(skb); 4303 } 4304 process_timedout_eps(); 4305 } 4306 } 4307 4308 static DECLARE_WORK(skb_work, process_work); 4309 4310 static void ep_timeout(struct timer_list *t) 4311 { 4312 struct c4iw_ep *ep = from_timer(ep, t, timer); 4313 int kickit = 0; 4314 4315 spin_lock(&timeout_lock); 4316 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 4317 /* 4318 * Only insert if it is not already on the list. 4319 */ 4320 if (!ep->entry.next) { 4321 list_add_tail(&ep->entry, &timeout_list); 4322 kickit = 1; 4323 } 4324 } 4325 spin_unlock(&timeout_lock); 4326 if (kickit) 4327 queue_work(workq, &skb_work); 4328 } 4329 4330 /* 4331 * All the CM events are handled on a work queue to have a safe context. 4332 */ 4333 static int sched(struct c4iw_dev *dev, struct sk_buff *skb) 4334 { 4335 4336 /* 4337 * Save dev in the skb->cb area. 4338 */ 4339 *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev; 4340 4341 /* 4342 * Queue the skb and schedule the worker thread. 4343 */ 4344 skb_queue_tail(&rxq, skb); 4345 queue_work(workq, &skb_work); 4346 return 0; 4347 } 4348 4349 static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 4350 { 4351 struct cpl_set_tcb_rpl *rpl = cplhdr(skb); 4352 4353 if (rpl->status != CPL_ERR_NONE) { 4354 pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", 4355 rpl->status, GET_TID(rpl)); 4356 } 4357 kfree_skb(skb); 4358 return 0; 4359 } 4360 4361 static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) 4362 { 4363 struct cpl_fw6_msg *rpl = cplhdr(skb); 4364 struct c4iw_wr_wait *wr_waitp; 4365 int ret; 4366 4367 pr_debug("type %u\n", rpl->type); 4368 4369 switch (rpl->type) { 4370 case FW6_TYPE_WR_RPL: 4371 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); 4372 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; 4373 pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret); 4374 if (wr_waitp) 4375 c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0); 4376 kfree_skb(skb); 4377 break; 4378 case FW6_TYPE_CQE: 4379 case FW6_TYPE_OFLD_CONNECTION_WR_RPL: 4380 sched(dev, skb); 4381 break; 4382 default: 4383 pr_err("%s unexpected fw6 msg type %u\n", 4384 __func__, rpl->type); 4385 kfree_skb(skb); 4386 break; 4387 } 4388 return 0; 4389 } 4390 4391 static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) 4392 { 4393 struct cpl_abort_req_rss *req = cplhdr(skb); 4394 struct c4iw_ep *ep; 4395 unsigned int tid = GET_TID(req); 4396 4397 ep = get_ep_from_tid(dev, tid); 4398 /* This EP will be dereferenced in peer_abort() */ 4399 if (!ep) { 4400 pr_warn("Abort on non-existent endpoint, tid %d\n", tid); 4401 kfree_skb(skb); 4402 return 0; 4403 } 4404 if (cxgb_is_neg_adv(req->status)) { 4405 pr_debug("Negative advice on abort- tid %u status %d (%s)\n", 4406 ep->hwtid, req->status, 4407 neg_adv_str(req->status)); 4408 goto out; 4409 } 4410 pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); 4411 4412 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 4413 out: 4414 sched(dev, skb); 4415 return 0; 4416 } 4417 4418 /* 4419 * Most upcalls from the T4 Core go to sched() to 4420 * schedule the processing on a work queue. 4421 */ 4422 c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { 4423 [CPL_ACT_ESTABLISH] = sched, 4424 [CPL_ACT_OPEN_RPL] = sched, 4425 [CPL_RX_DATA] = sched, 4426 [CPL_ABORT_RPL_RSS] = sched, 4427 [CPL_ABORT_RPL] = sched, 4428 [CPL_PASS_OPEN_RPL] = sched, 4429 [CPL_CLOSE_LISTSRV_RPL] = sched, 4430 [CPL_PASS_ACCEPT_REQ] = sched, 4431 [CPL_PASS_ESTABLISH] = sched, 4432 [CPL_PEER_CLOSE] = sched, 4433 [CPL_CLOSE_CON_RPL] = sched, 4434 [CPL_ABORT_REQ_RSS] = peer_abort_intr, 4435 [CPL_RDMA_TERMINATE] = sched, 4436 [CPL_FW4_ACK] = sched, 4437 [CPL_SET_TCB_RPL] = set_tcb_rpl, 4438 [CPL_GET_TCB_RPL] = sched, 4439 [CPL_FW6_MSG] = fw6_msg, 4440 [CPL_RX_PKT] = sched 4441 }; 4442 4443 int __init c4iw_cm_init(void) 4444 { 4445 spin_lock_init(&timeout_lock); 4446 skb_queue_head_init(&rxq); 4447 4448 workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM); 4449 if (!workq) 4450 return -ENOMEM; 4451 4452 return 0; 4453 } 4454 4455 void c4iw_cm_term(void) 4456 { 4457 WARN_ON(!list_empty(&timeout_list)); 4458 flush_workqueue(workq); 4459 destroy_workqueue(workq); 4460 } 4461