1 /* 2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/module.h> 33 #include <linux/list.h> 34 #include <linux/workqueue.h> 35 #include <linux/skbuff.h> 36 #include <linux/timer.h> 37 #include <linux/notifier.h> 38 #include <linux/inetdevice.h> 39 #include <linux/ip.h> 40 #include <linux/tcp.h> 41 #include <linux/if_vlan.h> 42 43 #include <net/neighbour.h> 44 #include <net/netevent.h> 45 #include <net/route.h> 46 #include <net/tcp.h> 47 #include <net/ip6_route.h> 48 #include <net/addrconf.h> 49 50 #include <rdma/ib_addr.h> 51 52 #include <libcxgb_cm.h> 53 #include "iw_cxgb4.h" 54 #include "clip_tbl.h" 55 56 static char *states[] = { 57 "idle", 58 "listen", 59 "connecting", 60 "mpa_wait_req", 61 "mpa_req_sent", 62 "mpa_req_rcvd", 63 "mpa_rep_sent", 64 "fpdu_mode", 65 "aborting", 66 "closing", 67 "moribund", 68 "dead", 69 NULL, 70 }; 71 72 static int nocong; 73 module_param(nocong, int, 0644); 74 MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); 75 76 static int enable_ecn; 77 module_param(enable_ecn, int, 0644); 78 MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); 79 80 static int dack_mode; 81 module_param(dack_mode, int, 0644); 82 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)"); 83 84 uint c4iw_max_read_depth = 32; 85 module_param(c4iw_max_read_depth, int, 0644); 86 MODULE_PARM_DESC(c4iw_max_read_depth, 87 "Per-connection max ORD/IRD (default=32)"); 88 89 static int enable_tcp_timestamps; 90 module_param(enable_tcp_timestamps, int, 0644); 91 MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)"); 92 93 static int enable_tcp_sack; 94 module_param(enable_tcp_sack, int, 0644); 95 MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)"); 96 97 static int enable_tcp_window_scaling = 1; 98 module_param(enable_tcp_window_scaling, int, 0644); 99 MODULE_PARM_DESC(enable_tcp_window_scaling, 100 "Enable tcp window scaling (default=1)"); 101 102 static int peer2peer = 1; 103 module_param(peer2peer, int, 0644); 104 MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); 105 106 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; 107 module_param(p2p_type, int, 0644); 108 MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: " 109 "1=RDMA_READ 0=RDMA_WRITE (default 1)"); 110 111 static int ep_timeout_secs = 60; 112 module_param(ep_timeout_secs, int, 0644); 113 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " 114 "in seconds (default=60)"); 115 116 static int mpa_rev = 2; 117 module_param(mpa_rev, int, 0644); 118 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " 119 "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft" 120 " compliant (default=2)"); 121 122 static int markers_enabled; 123 module_param(markers_enabled, int, 0644); 124 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); 125 126 static int crc_enabled = 1; 127 module_param(crc_enabled, int, 0644); 128 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); 129 130 static int rcv_win = 256 * 1024; 131 module_param(rcv_win, int, 0644); 132 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)"); 133 134 static int snd_win = 128 * 1024; 135 module_param(snd_win, int, 0644); 136 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)"); 137 138 static struct workqueue_struct *workq; 139 140 static struct sk_buff_head rxq; 141 142 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); 143 static void ep_timeout(struct timer_list *t); 144 static void connect_reply_upcall(struct c4iw_ep *ep, int status); 145 static int sched(struct c4iw_dev *dev, struct sk_buff *skb); 146 147 static LIST_HEAD(timeout_list); 148 static DEFINE_SPINLOCK(timeout_lock); 149 150 static void deref_cm_id(struct c4iw_ep_common *epc) 151 { 152 epc->cm_id->rem_ref(epc->cm_id); 153 epc->cm_id = NULL; 154 set_bit(CM_ID_DEREFED, &epc->history); 155 } 156 157 static void ref_cm_id(struct c4iw_ep_common *epc) 158 { 159 set_bit(CM_ID_REFED, &epc->history); 160 epc->cm_id->add_ref(epc->cm_id); 161 } 162 163 static void deref_qp(struct c4iw_ep *ep) 164 { 165 c4iw_qp_rem_ref(&ep->com.qp->ibqp); 166 clear_bit(QP_REFERENCED, &ep->com.flags); 167 set_bit(QP_DEREFED, &ep->com.history); 168 } 169 170 static void ref_qp(struct c4iw_ep *ep) 171 { 172 set_bit(QP_REFERENCED, &ep->com.flags); 173 set_bit(QP_REFED, &ep->com.history); 174 c4iw_qp_add_ref(&ep->com.qp->ibqp); 175 } 176 177 static void start_ep_timer(struct c4iw_ep *ep) 178 { 179 pr_debug("ep %p\n", ep); 180 if (timer_pending(&ep->timer)) { 181 pr_err("%s timer already started! ep %p\n", 182 __func__, ep); 183 return; 184 } 185 clear_bit(TIMEOUT, &ep->com.flags); 186 c4iw_get_ep(&ep->com); 187 ep->timer.expires = jiffies + ep_timeout_secs * HZ; 188 add_timer(&ep->timer); 189 } 190 191 static int stop_ep_timer(struct c4iw_ep *ep) 192 { 193 pr_debug("ep %p stopping\n", ep); 194 del_timer_sync(&ep->timer); 195 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 196 c4iw_put_ep(&ep->com); 197 return 0; 198 } 199 return 1; 200 } 201 202 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, 203 struct l2t_entry *l2e) 204 { 205 int error = 0; 206 207 if (c4iw_fatal_error(rdev)) { 208 kfree_skb(skb); 209 pr_err("%s - device in error state - dropping\n", __func__); 210 return -EIO; 211 } 212 error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); 213 if (error < 0) 214 kfree_skb(skb); 215 else if (error == NET_XMIT_DROP) 216 return -ENOMEM; 217 return error < 0 ? error : 0; 218 } 219 220 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) 221 { 222 int error = 0; 223 224 if (c4iw_fatal_error(rdev)) { 225 kfree_skb(skb); 226 pr_err("%s - device in error state - dropping\n", __func__); 227 return -EIO; 228 } 229 error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); 230 if (error < 0) 231 kfree_skb(skb); 232 return error < 0 ? error : 0; 233 } 234 235 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) 236 { 237 u32 len = roundup(sizeof(struct cpl_tid_release), 16); 238 239 skb = get_skb(skb, len, GFP_KERNEL); 240 if (!skb) 241 return; 242 243 cxgb_mk_tid_release(skb, len, hwtid, 0); 244 c4iw_ofld_send(rdev, skb); 245 return; 246 } 247 248 static void set_emss(struct c4iw_ep *ep, u16 opt) 249 { 250 ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] - 251 ((AF_INET == ep->com.remote_addr.ss_family) ? 252 sizeof(struct iphdr) : sizeof(struct ipv6hdr)) - 253 sizeof(struct tcphdr); 254 ep->mss = ep->emss; 255 if (TCPOPT_TSTAMP_G(opt)) 256 ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4); 257 if (ep->emss < 128) 258 ep->emss = 128; 259 if (ep->emss & 7) 260 pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n", 261 TCPOPT_MSS_G(opt), ep->mss, ep->emss); 262 pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss, 263 ep->emss); 264 } 265 266 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) 267 { 268 enum c4iw_ep_state state; 269 270 mutex_lock(&epc->mutex); 271 state = epc->state; 272 mutex_unlock(&epc->mutex); 273 return state; 274 } 275 276 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 277 { 278 epc->state = new; 279 } 280 281 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 282 { 283 mutex_lock(&epc->mutex); 284 pr_debug("%s -> %s\n", states[epc->state], states[new]); 285 __state_set(epc, new); 286 mutex_unlock(&epc->mutex); 287 return; 288 } 289 290 static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size) 291 { 292 struct sk_buff *skb; 293 unsigned int i; 294 size_t len; 295 296 len = roundup(sizeof(union cpl_wr_size), 16); 297 for (i = 0; i < size; i++) { 298 skb = alloc_skb(len, GFP_KERNEL); 299 if (!skb) 300 goto fail; 301 skb_queue_tail(ep_skb_list, skb); 302 } 303 return 0; 304 fail: 305 skb_queue_purge(ep_skb_list); 306 return -ENOMEM; 307 } 308 309 static void *alloc_ep(int size, gfp_t gfp) 310 { 311 struct c4iw_ep_common *epc; 312 313 epc = kzalloc(size, gfp); 314 if (epc) { 315 epc->wr_waitp = c4iw_alloc_wr_wait(gfp); 316 if (!epc->wr_waitp) { 317 kfree(epc); 318 epc = NULL; 319 goto out; 320 } 321 kref_init(&epc->kref); 322 mutex_init(&epc->mutex); 323 c4iw_init_wr_wait(epc->wr_waitp); 324 } 325 pr_debug("alloc ep %p\n", epc); 326 out: 327 return epc; 328 } 329 330 static void remove_ep_tid(struct c4iw_ep *ep) 331 { 332 unsigned long flags; 333 334 xa_lock_irqsave(&ep->com.dev->hwtids, flags); 335 __xa_erase(&ep->com.dev->hwtids, ep->hwtid); 336 if (xa_empty(&ep->com.dev->hwtids)) 337 wake_up(&ep->com.dev->wait); 338 xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); 339 } 340 341 static int insert_ep_tid(struct c4iw_ep *ep) 342 { 343 unsigned long flags; 344 int err; 345 346 xa_lock_irqsave(&ep->com.dev->hwtids, flags); 347 err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL); 348 xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); 349 350 return err; 351 } 352 353 /* 354 * Atomically lookup the ep ptr given the tid and grab a reference on the ep. 355 */ 356 static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) 357 { 358 struct c4iw_ep *ep; 359 unsigned long flags; 360 361 xa_lock_irqsave(&dev->hwtids, flags); 362 ep = xa_load(&dev->hwtids, tid); 363 if (ep) 364 c4iw_get_ep(&ep->com); 365 xa_unlock_irqrestore(&dev->hwtids, flags); 366 return ep; 367 } 368 369 /* 370 * Atomically lookup the ep ptr given the stid and grab a reference on the ep. 371 */ 372 static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, 373 unsigned int stid) 374 { 375 struct c4iw_listen_ep *ep; 376 unsigned long flags; 377 378 xa_lock_irqsave(&dev->stids, flags); 379 ep = xa_load(&dev->stids, stid); 380 if (ep) 381 c4iw_get_ep(&ep->com); 382 xa_unlock_irqrestore(&dev->stids, flags); 383 return ep; 384 } 385 386 void _c4iw_free_ep(struct kref *kref) 387 { 388 struct c4iw_ep *ep; 389 390 ep = container_of(kref, struct c4iw_ep, com.kref); 391 pr_debug("ep %p state %s\n", ep, states[ep->com.state]); 392 if (test_bit(QP_REFERENCED, &ep->com.flags)) 393 deref_qp(ep); 394 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { 395 if (ep->com.remote_addr.ss_family == AF_INET6) { 396 struct sockaddr_in6 *sin6 = 397 (struct sockaddr_in6 *) 398 &ep->com.local_addr; 399 400 cxgb4_clip_release( 401 ep->com.dev->rdev.lldi.ports[0], 402 (const u32 *)&sin6->sin6_addr.s6_addr, 403 1); 404 } 405 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, 406 ep->com.local_addr.ss_family); 407 dst_release(ep->dst); 408 cxgb4_l2t_release(ep->l2t); 409 kfree_skb(ep->mpa_skb); 410 } 411 if (!skb_queue_empty(&ep->com.ep_skb_list)) 412 skb_queue_purge(&ep->com.ep_skb_list); 413 c4iw_put_wr_wait(ep->com.wr_waitp); 414 kfree(ep); 415 } 416 417 static void release_ep_resources(struct c4iw_ep *ep) 418 { 419 set_bit(RELEASE_RESOURCES, &ep->com.flags); 420 421 /* 422 * If we have a hwtid, then remove it from the idr table 423 * so lookups will no longer find this endpoint. Otherwise 424 * we have a race where one thread finds the ep ptr just 425 * before the other thread is freeing the ep memory. 426 */ 427 if (ep->hwtid != -1) 428 remove_ep_tid(ep); 429 c4iw_put_ep(&ep->com); 430 } 431 432 static int status2errno(int status) 433 { 434 switch (status) { 435 case CPL_ERR_NONE: 436 return 0; 437 case CPL_ERR_CONN_RESET: 438 return -ECONNRESET; 439 case CPL_ERR_ARP_MISS: 440 return -EHOSTUNREACH; 441 case CPL_ERR_CONN_TIMEDOUT: 442 return -ETIMEDOUT; 443 case CPL_ERR_TCAM_FULL: 444 return -ENOMEM; 445 case CPL_ERR_CONN_EXIST: 446 return -EADDRINUSE; 447 default: 448 return -EIO; 449 } 450 } 451 452 /* 453 * Try and reuse skbs already allocated... 454 */ 455 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) 456 { 457 if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { 458 skb_trim(skb, 0); 459 skb_get(skb); 460 skb_reset_transport_header(skb); 461 } else { 462 skb = alloc_skb(len, gfp); 463 if (!skb) 464 return NULL; 465 } 466 t4_set_arp_err_handler(skb, NULL, NULL); 467 return skb; 468 } 469 470 static struct net_device *get_real_dev(struct net_device *egress_dev) 471 { 472 return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; 473 } 474 475 static void arp_failure_discard(void *handle, struct sk_buff *skb) 476 { 477 pr_err("ARP failure\n"); 478 kfree_skb(skb); 479 } 480 481 static void mpa_start_arp_failure(void *handle, struct sk_buff *skb) 482 { 483 pr_err("ARP failure during MPA Negotiation - Closing Connection\n"); 484 } 485 486 enum { 487 NUM_FAKE_CPLS = 2, 488 FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0, 489 FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1, 490 }; 491 492 static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) 493 { 494 struct c4iw_ep *ep; 495 496 ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); 497 release_ep_resources(ep); 498 return 0; 499 } 500 501 static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) 502 { 503 struct c4iw_ep *ep; 504 505 ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); 506 c4iw_put_ep(&ep->parent_ep->com); 507 release_ep_resources(ep); 508 return 0; 509 } 510 511 /* 512 * Fake up a special CPL opcode and call sched() so process_work() will call 513 * _put_ep_safe() in a safe context to free the ep resources. This is needed 514 * because ARP error handlers are called in an ATOMIC context, and 515 * _c4iw_free_ep() needs to block. 516 */ 517 static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb, 518 int cpl) 519 { 520 struct cpl_act_establish *rpl = cplhdr(skb); 521 522 /* Set our special ARP_FAILURE opcode */ 523 rpl->ot.opcode = cpl; 524 525 /* 526 * Save ep in the skb->cb area, after where sched() will save the dev 527 * ptr. 528 */ 529 *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep; 530 sched(ep->com.dev, skb); 531 } 532 533 /* Handle an ARP failure for an accept */ 534 static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb) 535 { 536 struct c4iw_ep *ep = handle; 537 538 pr_err("ARP failure during accept - tid %u - dropping connection\n", 539 ep->hwtid); 540 541 __state_set(&ep->com, DEAD); 542 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE); 543 } 544 545 /* 546 * Handle an ARP failure for an active open. 547 */ 548 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) 549 { 550 struct c4iw_ep *ep = handle; 551 552 pr_err("ARP failure during connect\n"); 553 connect_reply_upcall(ep, -EHOSTUNREACH); 554 __state_set(&ep->com, DEAD); 555 if (ep->com.remote_addr.ss_family == AF_INET6) { 556 struct sockaddr_in6 *sin6 = 557 (struct sockaddr_in6 *)&ep->com.local_addr; 558 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 559 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 560 } 561 xa_erase_irq(&ep->com.dev->atids, ep->atid); 562 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 563 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); 564 } 565 566 /* 567 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 568 * and send it along. 569 */ 570 static void abort_arp_failure(void *handle, struct sk_buff *skb) 571 { 572 int ret; 573 struct c4iw_ep *ep = handle; 574 struct c4iw_rdev *rdev = &ep->com.dev->rdev; 575 struct cpl_abort_req *req = cplhdr(skb); 576 577 pr_debug("rdev %p\n", rdev); 578 req->cmd = CPL_ABORT_NO_RST; 579 skb_get(skb); 580 ret = c4iw_ofld_send(rdev, skb); 581 if (ret) { 582 __state_set(&ep->com, DEAD); 583 queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); 584 } else 585 kfree_skb(skb); 586 } 587 588 static int send_flowc(struct c4iw_ep *ep) 589 { 590 struct fw_flowc_wr *flowc; 591 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); 592 u16 vlan = ep->l2t->vlan; 593 int nparams; 594 int flowclen, flowclen16; 595 596 if (WARN_ON(!skb)) 597 return -ENOMEM; 598 599 if (vlan == CPL_L2T_VLAN_NONE) 600 nparams = 9; 601 else 602 nparams = 10; 603 604 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); 605 flowclen16 = DIV_ROUND_UP(flowclen, 16); 606 flowclen = flowclen16 * 16; 607 608 flowc = __skb_put(skb, flowclen); 609 memset(flowc, 0, flowclen); 610 611 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 612 FW_FLOWC_WR_NPARAMS_V(nparams)); 613 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | 614 FW_WR_FLOWID_V(ep->hwtid)); 615 616 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 617 flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V 618 (ep->com.dev->rdev.lldi.pf)); 619 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 620 flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan); 621 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 622 flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan); 623 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 624 flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid); 625 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; 626 flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq); 627 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 628 flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); 629 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 630 flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); 631 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 632 flowc->mnemval[7].val = cpu_to_be32(ep->emss); 633 flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; 634 flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale); 635 if (nparams == 10) { 636 u16 pri; 637 pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 638 flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 639 flowc->mnemval[9].val = cpu_to_be32(pri); 640 } 641 642 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 643 return c4iw_ofld_send(&ep->com.dev->rdev, skb); 644 } 645 646 static int send_halfclose(struct c4iw_ep *ep) 647 { 648 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); 649 u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); 650 651 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 652 if (WARN_ON(!skb)) 653 return -ENOMEM; 654 655 cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, 656 NULL, arp_failure_discard); 657 658 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 659 } 660 661 static void read_tcb(struct c4iw_ep *ep) 662 { 663 struct sk_buff *skb; 664 struct cpl_get_tcb *req; 665 int wrlen = roundup(sizeof(*req), 16); 666 667 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); 668 if (WARN_ON(!skb)) 669 return; 670 671 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); 672 req = (struct cpl_get_tcb *) skb_put(skb, wrlen); 673 memset(req, 0, wrlen); 674 INIT_TP_WR(req, ep->hwtid); 675 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid)); 676 req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid)); 677 678 /* 679 * keep a ref on the ep so the tcb is not unlocked before this 680 * cpl completes. The ref is released in read_tcb_rpl(). 681 */ 682 c4iw_get_ep(&ep->com); 683 if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb))) 684 c4iw_put_ep(&ep->com); 685 } 686 687 static int send_abort_req(struct c4iw_ep *ep) 688 { 689 u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); 690 struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); 691 692 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 693 if (WARN_ON(!req_skb)) 694 return -ENOMEM; 695 696 cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, 697 ep, abort_arp_failure); 698 699 return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); 700 } 701 702 static int send_abort(struct c4iw_ep *ep) 703 { 704 if (!ep->com.qp || !ep->com.qp->srq) { 705 send_abort_req(ep); 706 return 0; 707 } 708 set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags); 709 read_tcb(ep); 710 return 0; 711 } 712 713 static int send_connect(struct c4iw_ep *ep) 714 { 715 struct cpl_act_open_req *req = NULL; 716 struct cpl_t5_act_open_req *t5req = NULL; 717 struct cpl_t6_act_open_req *t6req = NULL; 718 struct cpl_act_open_req6 *req6 = NULL; 719 struct cpl_t5_act_open_req6 *t5req6 = NULL; 720 struct cpl_t6_act_open_req6 *t6req6 = NULL; 721 struct sk_buff *skb; 722 u64 opt0; 723 u32 opt2; 724 unsigned int mtu_idx; 725 u32 wscale; 726 int win, sizev4, sizev6, wrlen; 727 struct sockaddr_in *la = (struct sockaddr_in *) 728 &ep->com.local_addr; 729 struct sockaddr_in *ra = (struct sockaddr_in *) 730 &ep->com.remote_addr; 731 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) 732 &ep->com.local_addr; 733 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) 734 &ep->com.remote_addr; 735 int ret; 736 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; 737 u32 isn = (get_random_u32() & ~7UL) - 1; 738 struct net_device *netdev; 739 u64 params; 740 741 netdev = ep->com.dev->rdev.lldi.ports[0]; 742 743 switch (CHELSIO_CHIP_VERSION(adapter_type)) { 744 case CHELSIO_T4: 745 sizev4 = sizeof(struct cpl_act_open_req); 746 sizev6 = sizeof(struct cpl_act_open_req6); 747 break; 748 case CHELSIO_T5: 749 sizev4 = sizeof(struct cpl_t5_act_open_req); 750 sizev6 = sizeof(struct cpl_t5_act_open_req6); 751 break; 752 case CHELSIO_T6: 753 sizev4 = sizeof(struct cpl_t6_act_open_req); 754 sizev6 = sizeof(struct cpl_t6_act_open_req6); 755 break; 756 default: 757 pr_err("T%d Chip is not supported\n", 758 CHELSIO_CHIP_VERSION(adapter_type)); 759 return -EINVAL; 760 } 761 762 wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? 763 roundup(sizev4, 16) : 764 roundup(sizev6, 16); 765 766 pr_debug("ep %p atid %u\n", ep, ep->atid); 767 768 skb = get_skb(NULL, wrlen, GFP_KERNEL); 769 if (!skb) { 770 pr_err("%s - failed to alloc skb\n", __func__); 771 return -ENOMEM; 772 } 773 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 774 775 cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 776 enable_tcp_timestamps, 777 (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); 778 wscale = cxgb_compute_wscale(rcv_win); 779 780 /* 781 * Specify the largest window that will fit in opt0. The 782 * remainder will be specified in the rx_data_ack. 783 */ 784 win = ep->rcv_win >> 10; 785 if (win > RCV_BUFSIZ_M) 786 win = RCV_BUFSIZ_M; 787 788 opt0 = (nocong ? NO_CONG_F : 0) | 789 KEEP_ALIVE_F | 790 DELACK_F | 791 WND_SCALE_V(wscale) | 792 MSS_IDX_V(mtu_idx) | 793 L2T_IDX_V(ep->l2t->idx) | 794 TX_CHAN_V(ep->tx_chan) | 795 SMAC_SEL_V(ep->smac_idx) | 796 DSCP_V(ep->tos >> 2) | 797 ULP_MODE_V(ULP_MODE_TCPDDP) | 798 RCV_BUFSIZ_V(win); 799 opt2 = RX_CHANNEL_V(0) | 800 CCTRL_ECN_V(enable_ecn) | 801 RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); 802 if (enable_tcp_timestamps) 803 opt2 |= TSTAMPS_EN_F; 804 if (enable_tcp_sack) 805 opt2 |= SACK_EN_F; 806 if (wscale && enable_tcp_window_scaling) 807 opt2 |= WND_SCALE_EN_F; 808 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { 809 if (peer2peer) 810 isn += 4; 811 812 opt2 |= T5_OPT_2_VALID_F; 813 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); 814 opt2 |= T5_ISS_F; 815 } 816 817 params = cxgb4_select_ntuple(netdev, ep->l2t); 818 819 if (ep->com.remote_addr.ss_family == AF_INET6) 820 cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], 821 (const u32 *)&la6->sin6_addr.s6_addr, 1); 822 823 t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); 824 825 if (ep->com.remote_addr.ss_family == AF_INET) { 826 switch (CHELSIO_CHIP_VERSION(adapter_type)) { 827 case CHELSIO_T4: 828 req = skb_put(skb, wrlen); 829 INIT_TP_WR(req, 0); 830 break; 831 case CHELSIO_T5: 832 t5req = skb_put(skb, wrlen); 833 INIT_TP_WR(t5req, 0); 834 req = (struct cpl_act_open_req *)t5req; 835 break; 836 case CHELSIO_T6: 837 t6req = skb_put(skb, wrlen); 838 INIT_TP_WR(t6req, 0); 839 req = (struct cpl_act_open_req *)t6req; 840 t5req = (struct cpl_t5_act_open_req *)t6req; 841 break; 842 default: 843 pr_err("T%d Chip is not supported\n", 844 CHELSIO_CHIP_VERSION(adapter_type)); 845 ret = -EINVAL; 846 goto clip_release; 847 } 848 849 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, 850 ((ep->rss_qid<<14) | ep->atid))); 851 req->local_port = la->sin_port; 852 req->peer_port = ra->sin_port; 853 req->local_ip = la->sin_addr.s_addr; 854 req->peer_ip = ra->sin_addr.s_addr; 855 req->opt0 = cpu_to_be64(opt0); 856 857 if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { 858 req->params = cpu_to_be32(params); 859 req->opt2 = cpu_to_be32(opt2); 860 } else { 861 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 862 t5req->params = 863 cpu_to_be64(FILTER_TUPLE_V(params)); 864 t5req->rsvd = cpu_to_be32(isn); 865 pr_debug("snd_isn %u\n", t5req->rsvd); 866 t5req->opt2 = cpu_to_be32(opt2); 867 } else { 868 t6req->params = 869 cpu_to_be64(FILTER_TUPLE_V(params)); 870 t6req->rsvd = cpu_to_be32(isn); 871 pr_debug("snd_isn %u\n", t6req->rsvd); 872 t6req->opt2 = cpu_to_be32(opt2); 873 } 874 } 875 } else { 876 switch (CHELSIO_CHIP_VERSION(adapter_type)) { 877 case CHELSIO_T4: 878 req6 = skb_put(skb, wrlen); 879 INIT_TP_WR(req6, 0); 880 break; 881 case CHELSIO_T5: 882 t5req6 = skb_put(skb, wrlen); 883 INIT_TP_WR(t5req6, 0); 884 req6 = (struct cpl_act_open_req6 *)t5req6; 885 break; 886 case CHELSIO_T6: 887 t6req6 = skb_put(skb, wrlen); 888 INIT_TP_WR(t6req6, 0); 889 req6 = (struct cpl_act_open_req6 *)t6req6; 890 t5req6 = (struct cpl_t5_act_open_req6 *)t6req6; 891 break; 892 default: 893 pr_err("T%d Chip is not supported\n", 894 CHELSIO_CHIP_VERSION(adapter_type)); 895 ret = -EINVAL; 896 goto clip_release; 897 } 898 899 OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, 900 ((ep->rss_qid<<14)|ep->atid))); 901 req6->local_port = la6->sin6_port; 902 req6->peer_port = ra6->sin6_port; 903 req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr)); 904 req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8)); 905 req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr)); 906 req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8)); 907 req6->opt0 = cpu_to_be64(opt0); 908 909 if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { 910 req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev, 911 ep->l2t)); 912 req6->opt2 = cpu_to_be32(opt2); 913 } else { 914 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 915 t5req6->params = 916 cpu_to_be64(FILTER_TUPLE_V(params)); 917 t5req6->rsvd = cpu_to_be32(isn); 918 pr_debug("snd_isn %u\n", t5req6->rsvd); 919 t5req6->opt2 = cpu_to_be32(opt2); 920 } else { 921 t6req6->params = 922 cpu_to_be64(FILTER_TUPLE_V(params)); 923 t6req6->rsvd = cpu_to_be32(isn); 924 pr_debug("snd_isn %u\n", t6req6->rsvd); 925 t6req6->opt2 = cpu_to_be32(opt2); 926 } 927 928 } 929 } 930 931 set_bit(ACT_OPEN_REQ, &ep->com.history); 932 ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 933 clip_release: 934 if (ret && ep->com.remote_addr.ss_family == AF_INET6) 935 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 936 (const u32 *)&la6->sin6_addr.s6_addr, 1); 937 return ret; 938 } 939 940 static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, 941 u8 mpa_rev_to_use) 942 { 943 int mpalen, wrlen, ret; 944 struct fw_ofld_tx_data_wr *req; 945 struct mpa_message *mpa; 946 struct mpa_v2_conn_params mpa_v2_params; 947 948 pr_debug("ep %p tid %u pd_len %d\n", 949 ep, ep->hwtid, ep->plen); 950 951 mpalen = sizeof(*mpa) + ep->plen; 952 if (mpa_rev_to_use == 2) 953 mpalen += sizeof(struct mpa_v2_conn_params); 954 wrlen = roundup(mpalen + sizeof(*req), 16); 955 skb = get_skb(skb, wrlen, GFP_KERNEL); 956 if (!skb) { 957 connect_reply_upcall(ep, -ENOMEM); 958 return -ENOMEM; 959 } 960 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 961 962 req = skb_put_zero(skb, wrlen); 963 req->op_to_immdlen = cpu_to_be32( 964 FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | 965 FW_WR_COMPL_F | 966 FW_WR_IMMDLEN_V(mpalen)); 967 req->flowid_len16 = cpu_to_be32( 968 FW_WR_FLOWID_V(ep->hwtid) | 969 FW_WR_LEN16_V(wrlen >> 4)); 970 req->plen = cpu_to_be32(mpalen); 971 req->tunnel_to_proxy = cpu_to_be32( 972 FW_OFLD_TX_DATA_WR_FLUSH_F | 973 FW_OFLD_TX_DATA_WR_SHOVE_F); 974 975 mpa = (struct mpa_message *)(req + 1); 976 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 977 978 mpa->flags = 0; 979 if (crc_enabled) 980 mpa->flags |= MPA_CRC; 981 if (markers_enabled) { 982 mpa->flags |= MPA_MARKERS; 983 ep->mpa_attr.recv_marker_enabled = 1; 984 } else { 985 ep->mpa_attr.recv_marker_enabled = 0; 986 } 987 if (mpa_rev_to_use == 2) 988 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 989 990 mpa->private_data_size = htons(ep->plen); 991 mpa->revision = mpa_rev_to_use; 992 if (mpa_rev_to_use == 1) { 993 ep->tried_with_mpa_v1 = 1; 994 ep->retry_with_mpa_v1 = 0; 995 } 996 997 if (mpa_rev_to_use == 2) { 998 mpa->private_data_size = 999 htons(ntohs(mpa->private_data_size) + 1000 sizeof(struct mpa_v2_conn_params)); 1001 pr_debug("initiator ird %u ord %u\n", ep->ird, 1002 ep->ord); 1003 mpa_v2_params.ird = htons((u16)ep->ird); 1004 mpa_v2_params.ord = htons((u16)ep->ord); 1005 1006 if (peer2peer) { 1007 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 1008 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) 1009 mpa_v2_params.ord |= 1010 htons(MPA_V2_RDMA_WRITE_RTR); 1011 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) 1012 mpa_v2_params.ord |= 1013 htons(MPA_V2_RDMA_READ_RTR); 1014 } 1015 memcpy(mpa->private_data, &mpa_v2_params, 1016 sizeof(struct mpa_v2_conn_params)); 1017 1018 if (ep->plen) 1019 memcpy(mpa->private_data + 1020 sizeof(struct mpa_v2_conn_params), 1021 ep->mpa_pkt + sizeof(*mpa), ep->plen); 1022 } else 1023 if (ep->plen) 1024 memcpy(mpa->private_data, 1025 ep->mpa_pkt + sizeof(*mpa), ep->plen); 1026 1027 /* 1028 * Reference the mpa skb. This ensures the data area 1029 * will remain in memory until the hw acks the tx. 1030 * Function fw4_ack() will deref it. 1031 */ 1032 skb_get(skb); 1033 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 1034 ep->mpa_skb = skb; 1035 ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1036 if (ret) 1037 return ret; 1038 start_ep_timer(ep); 1039 __state_set(&ep->com, MPA_REQ_SENT); 1040 ep->mpa_attr.initiator = 1; 1041 ep->snd_seq += mpalen; 1042 return ret; 1043 } 1044 1045 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) 1046 { 1047 int mpalen, wrlen; 1048 struct fw_ofld_tx_data_wr *req; 1049 struct mpa_message *mpa; 1050 struct sk_buff *skb; 1051 struct mpa_v2_conn_params mpa_v2_params; 1052 1053 pr_debug("ep %p tid %u pd_len %d\n", 1054 ep, ep->hwtid, ep->plen); 1055 1056 mpalen = sizeof(*mpa) + plen; 1057 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) 1058 mpalen += sizeof(struct mpa_v2_conn_params); 1059 wrlen = roundup(mpalen + sizeof(*req), 16); 1060 1061 skb = get_skb(NULL, wrlen, GFP_KERNEL); 1062 if (!skb) { 1063 pr_err("%s - cannot alloc skb!\n", __func__); 1064 return -ENOMEM; 1065 } 1066 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1067 1068 req = skb_put_zero(skb, wrlen); 1069 req->op_to_immdlen = cpu_to_be32( 1070 FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | 1071 FW_WR_COMPL_F | 1072 FW_WR_IMMDLEN_V(mpalen)); 1073 req->flowid_len16 = cpu_to_be32( 1074 FW_WR_FLOWID_V(ep->hwtid) | 1075 FW_WR_LEN16_V(wrlen >> 4)); 1076 req->plen = cpu_to_be32(mpalen); 1077 req->tunnel_to_proxy = cpu_to_be32( 1078 FW_OFLD_TX_DATA_WR_FLUSH_F | 1079 FW_OFLD_TX_DATA_WR_SHOVE_F); 1080 1081 mpa = (struct mpa_message *)(req + 1); 1082 memset(mpa, 0, sizeof(*mpa)); 1083 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1084 mpa->flags = MPA_REJECT; 1085 mpa->revision = ep->mpa_attr.version; 1086 mpa->private_data_size = htons(plen); 1087 1088 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1089 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1090 mpa->private_data_size = 1091 htons(ntohs(mpa->private_data_size) + 1092 sizeof(struct mpa_v2_conn_params)); 1093 mpa_v2_params.ird = htons(((u16)ep->ird) | 1094 (peer2peer ? MPA_V2_PEER2PEER_MODEL : 1095 0)); 1096 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? 1097 (p2p_type == 1098 FW_RI_INIT_P2PTYPE_RDMA_WRITE ? 1099 MPA_V2_RDMA_WRITE_RTR : p2p_type == 1100 FW_RI_INIT_P2PTYPE_READ_REQ ? 1101 MPA_V2_RDMA_READ_RTR : 0) : 0)); 1102 memcpy(mpa->private_data, &mpa_v2_params, 1103 sizeof(struct mpa_v2_conn_params)); 1104 1105 if (ep->plen) 1106 memcpy(mpa->private_data + 1107 sizeof(struct mpa_v2_conn_params), pdata, plen); 1108 } else 1109 if (plen) 1110 memcpy(mpa->private_data, pdata, plen); 1111 1112 /* 1113 * Reference the mpa skb again. This ensures the data area 1114 * will remain in memory until the hw acks the tx. 1115 * Function fw4_ack() will deref it. 1116 */ 1117 skb_get(skb); 1118 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1119 t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); 1120 ep->mpa_skb = skb; 1121 ep->snd_seq += mpalen; 1122 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1123 } 1124 1125 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) 1126 { 1127 int mpalen, wrlen; 1128 struct fw_ofld_tx_data_wr *req; 1129 struct mpa_message *mpa; 1130 struct sk_buff *skb; 1131 struct mpa_v2_conn_params mpa_v2_params; 1132 1133 pr_debug("ep %p tid %u pd_len %d\n", 1134 ep, ep->hwtid, ep->plen); 1135 1136 mpalen = sizeof(*mpa) + plen; 1137 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) 1138 mpalen += sizeof(struct mpa_v2_conn_params); 1139 wrlen = roundup(mpalen + sizeof(*req), 16); 1140 1141 skb = get_skb(NULL, wrlen, GFP_KERNEL); 1142 if (!skb) { 1143 pr_err("%s - cannot alloc skb!\n", __func__); 1144 return -ENOMEM; 1145 } 1146 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1147 1148 req = skb_put_zero(skb, wrlen); 1149 req->op_to_immdlen = cpu_to_be32( 1150 FW_WR_OP_V(FW_OFLD_TX_DATA_WR) | 1151 FW_WR_COMPL_F | 1152 FW_WR_IMMDLEN_V(mpalen)); 1153 req->flowid_len16 = cpu_to_be32( 1154 FW_WR_FLOWID_V(ep->hwtid) | 1155 FW_WR_LEN16_V(wrlen >> 4)); 1156 req->plen = cpu_to_be32(mpalen); 1157 req->tunnel_to_proxy = cpu_to_be32( 1158 FW_OFLD_TX_DATA_WR_FLUSH_F | 1159 FW_OFLD_TX_DATA_WR_SHOVE_F); 1160 1161 mpa = (struct mpa_message *)(req + 1); 1162 memset(mpa, 0, sizeof(*mpa)); 1163 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1164 mpa->flags = 0; 1165 if (ep->mpa_attr.crc_enabled) 1166 mpa->flags |= MPA_CRC; 1167 if (ep->mpa_attr.recv_marker_enabled) 1168 mpa->flags |= MPA_MARKERS; 1169 mpa->revision = ep->mpa_attr.version; 1170 mpa->private_data_size = htons(plen); 1171 1172 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1173 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1174 mpa->private_data_size = 1175 htons(ntohs(mpa->private_data_size) + 1176 sizeof(struct mpa_v2_conn_params)); 1177 mpa_v2_params.ird = htons((u16)ep->ird); 1178 mpa_v2_params.ord = htons((u16)ep->ord); 1179 if (peer2peer && (ep->mpa_attr.p2p_type != 1180 FW_RI_INIT_P2PTYPE_DISABLED)) { 1181 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 1182 1183 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) 1184 mpa_v2_params.ord |= 1185 htons(MPA_V2_RDMA_WRITE_RTR); 1186 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) 1187 mpa_v2_params.ord |= 1188 htons(MPA_V2_RDMA_READ_RTR); 1189 } 1190 1191 memcpy(mpa->private_data, &mpa_v2_params, 1192 sizeof(struct mpa_v2_conn_params)); 1193 1194 if (ep->plen) 1195 memcpy(mpa->private_data + 1196 sizeof(struct mpa_v2_conn_params), pdata, plen); 1197 } else 1198 if (plen) 1199 memcpy(mpa->private_data, pdata, plen); 1200 1201 /* 1202 * Reference the mpa skb. This ensures the data area 1203 * will remain in memory until the hw acks the tx. 1204 * Function fw4_ack() will deref it. 1205 */ 1206 skb_get(skb); 1207 t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); 1208 ep->mpa_skb = skb; 1209 __state_set(&ep->com, MPA_REP_SENT); 1210 ep->snd_seq += mpalen; 1211 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1212 } 1213 1214 static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) 1215 { 1216 struct c4iw_ep *ep; 1217 struct cpl_act_establish *req = cplhdr(skb); 1218 unsigned short tcp_opt = ntohs(req->tcp_opt); 1219 unsigned int tid = GET_TID(req); 1220 unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); 1221 struct tid_info *t = dev->rdev.lldi.tids; 1222 int ret; 1223 1224 ep = lookup_atid(t, atid); 1225 1226 pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid, 1227 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); 1228 1229 mutex_lock(&ep->com.mutex); 1230 dst_confirm(ep->dst); 1231 1232 /* setup the hwtid for this connection */ 1233 ep->hwtid = tid; 1234 cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family); 1235 insert_ep_tid(ep); 1236 1237 ep->snd_seq = be32_to_cpu(req->snd_isn); 1238 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 1239 ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); 1240 1241 set_emss(ep, tcp_opt); 1242 1243 /* dealloc the atid */ 1244 xa_erase_irq(&ep->com.dev->atids, atid); 1245 cxgb4_free_atid(t, atid); 1246 set_bit(ACT_ESTAB, &ep->com.history); 1247 1248 /* start MPA negotiation */ 1249 ret = send_flowc(ep); 1250 if (ret) 1251 goto err; 1252 if (ep->retry_with_mpa_v1) 1253 ret = send_mpa_req(ep, skb, 1); 1254 else 1255 ret = send_mpa_req(ep, skb, mpa_rev); 1256 if (ret) 1257 goto err; 1258 mutex_unlock(&ep->com.mutex); 1259 return 0; 1260 err: 1261 mutex_unlock(&ep->com.mutex); 1262 connect_reply_upcall(ep, -ENOMEM); 1263 c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 1264 return 0; 1265 } 1266 1267 static void close_complete_upcall(struct c4iw_ep *ep, int status) 1268 { 1269 struct iw_cm_event event; 1270 1271 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1272 memset(&event, 0, sizeof(event)); 1273 event.event = IW_CM_EVENT_CLOSE; 1274 event.status = status; 1275 if (ep->com.cm_id) { 1276 pr_debug("close complete delivered ep %p cm_id %p tid %u\n", 1277 ep, ep->com.cm_id, ep->hwtid); 1278 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1279 deref_cm_id(&ep->com); 1280 set_bit(CLOSE_UPCALL, &ep->com.history); 1281 } 1282 } 1283 1284 static void peer_close_upcall(struct c4iw_ep *ep) 1285 { 1286 struct iw_cm_event event; 1287 1288 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1289 memset(&event, 0, sizeof(event)); 1290 event.event = IW_CM_EVENT_DISCONNECT; 1291 if (ep->com.cm_id) { 1292 pr_debug("peer close delivered ep %p cm_id %p tid %u\n", 1293 ep, ep->com.cm_id, ep->hwtid); 1294 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1295 set_bit(DISCONN_UPCALL, &ep->com.history); 1296 } 1297 } 1298 1299 static void peer_abort_upcall(struct c4iw_ep *ep) 1300 { 1301 struct iw_cm_event event; 1302 1303 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1304 memset(&event, 0, sizeof(event)); 1305 event.event = IW_CM_EVENT_CLOSE; 1306 event.status = -ECONNRESET; 1307 if (ep->com.cm_id) { 1308 pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep, 1309 ep->com.cm_id, ep->hwtid); 1310 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1311 deref_cm_id(&ep->com); 1312 set_bit(ABORT_UPCALL, &ep->com.history); 1313 } 1314 } 1315 1316 static void connect_reply_upcall(struct c4iw_ep *ep, int status) 1317 { 1318 struct iw_cm_event event; 1319 1320 pr_debug("ep %p tid %u status %d\n", 1321 ep, ep->hwtid, status); 1322 memset(&event, 0, sizeof(event)); 1323 event.event = IW_CM_EVENT_CONNECT_REPLY; 1324 event.status = status; 1325 memcpy(&event.local_addr, &ep->com.local_addr, 1326 sizeof(ep->com.local_addr)); 1327 memcpy(&event.remote_addr, &ep->com.remote_addr, 1328 sizeof(ep->com.remote_addr)); 1329 1330 if ((status == 0) || (status == -ECONNREFUSED)) { 1331 if (!ep->tried_with_mpa_v1) { 1332 /* this means MPA_v2 is used */ 1333 event.ord = ep->ird; 1334 event.ird = ep->ord; 1335 event.private_data_len = ep->plen - 1336 sizeof(struct mpa_v2_conn_params); 1337 event.private_data = ep->mpa_pkt + 1338 sizeof(struct mpa_message) + 1339 sizeof(struct mpa_v2_conn_params); 1340 } else { 1341 /* this means MPA_v1 is used */ 1342 event.ord = cur_max_read_depth(ep->com.dev); 1343 event.ird = cur_max_read_depth(ep->com.dev); 1344 event.private_data_len = ep->plen; 1345 event.private_data = ep->mpa_pkt + 1346 sizeof(struct mpa_message); 1347 } 1348 } 1349 1350 pr_debug("ep %p tid %u status %d\n", ep, 1351 ep->hwtid, status); 1352 set_bit(CONN_RPL_UPCALL, &ep->com.history); 1353 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1354 1355 if (status < 0) 1356 deref_cm_id(&ep->com); 1357 } 1358 1359 static int connect_request_upcall(struct c4iw_ep *ep) 1360 { 1361 struct iw_cm_event event; 1362 int ret; 1363 1364 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1365 memset(&event, 0, sizeof(event)); 1366 event.event = IW_CM_EVENT_CONNECT_REQUEST; 1367 memcpy(&event.local_addr, &ep->com.local_addr, 1368 sizeof(ep->com.local_addr)); 1369 memcpy(&event.remote_addr, &ep->com.remote_addr, 1370 sizeof(ep->com.remote_addr)); 1371 event.provider_data = ep; 1372 if (!ep->tried_with_mpa_v1) { 1373 /* this means MPA_v2 is used */ 1374 event.ord = ep->ord; 1375 event.ird = ep->ird; 1376 event.private_data_len = ep->plen - 1377 sizeof(struct mpa_v2_conn_params); 1378 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + 1379 sizeof(struct mpa_v2_conn_params); 1380 } else { 1381 /* this means MPA_v1 is used. Send max supported */ 1382 event.ord = cur_max_read_depth(ep->com.dev); 1383 event.ird = cur_max_read_depth(ep->com.dev); 1384 event.private_data_len = ep->plen; 1385 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 1386 } 1387 c4iw_get_ep(&ep->com); 1388 ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, 1389 &event); 1390 if (ret) 1391 c4iw_put_ep(&ep->com); 1392 set_bit(CONNREQ_UPCALL, &ep->com.history); 1393 c4iw_put_ep(&ep->parent_ep->com); 1394 return ret; 1395 } 1396 1397 static void established_upcall(struct c4iw_ep *ep) 1398 { 1399 struct iw_cm_event event; 1400 1401 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1402 memset(&event, 0, sizeof(event)); 1403 event.event = IW_CM_EVENT_ESTABLISHED; 1404 event.ird = ep->ord; 1405 event.ord = ep->ird; 1406 if (ep->com.cm_id) { 1407 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1408 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1409 set_bit(ESTAB_UPCALL, &ep->com.history); 1410 } 1411 } 1412 1413 static int update_rx_credits(struct c4iw_ep *ep, u32 credits) 1414 { 1415 struct sk_buff *skb; 1416 u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); 1417 u32 credit_dack; 1418 1419 pr_debug("ep %p tid %u credits %u\n", 1420 ep, ep->hwtid, credits); 1421 skb = get_skb(NULL, wrlen, GFP_KERNEL); 1422 if (!skb) { 1423 pr_err("update_rx_credits - cannot alloc skb!\n"); 1424 return 0; 1425 } 1426 1427 /* 1428 * If we couldn't specify the entire rcv window at connection setup 1429 * due to the limit in the number of bits in the RCV_BUFSIZ field, 1430 * then add the overage in to the credits returned. 1431 */ 1432 if (ep->rcv_win > RCV_BUFSIZ_M * 1024) 1433 credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; 1434 1435 credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | 1436 RX_DACK_MODE_V(dack_mode); 1437 1438 cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, 1439 credit_dack); 1440 1441 c4iw_ofld_send(&ep->com.dev->rdev, skb); 1442 return credits; 1443 } 1444 1445 #define RELAXED_IRD_NEGOTIATION 1 1446 1447 /* 1448 * process_mpa_reply - process streaming mode MPA reply 1449 * 1450 * Returns: 1451 * 1452 * 0 upon success indicating a connect request was delivered to the ULP 1453 * or the mpa request is incomplete but valid so far. 1454 * 1455 * 1 if a failure requires the caller to close the connection. 1456 * 1457 * 2 if a failure requires the caller to abort the connection. 1458 */ 1459 static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) 1460 { 1461 struct mpa_message *mpa; 1462 struct mpa_v2_conn_params *mpa_v2_params; 1463 u16 plen; 1464 u16 resp_ird, resp_ord; 1465 u8 rtr_mismatch = 0, insuff_ird = 0; 1466 struct c4iw_qp_attributes attrs; 1467 enum c4iw_qp_attr_mask mask; 1468 int err; 1469 int disconnect = 0; 1470 1471 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1472 1473 /* 1474 * If we get more than the supported amount of private data 1475 * then we must fail this connection. 1476 */ 1477 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { 1478 err = -EINVAL; 1479 goto err_stop_timer; 1480 } 1481 1482 /* 1483 * copy the new data into our accumulation buffer. 1484 */ 1485 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), 1486 skb->len); 1487 ep->mpa_pkt_len += skb->len; 1488 1489 /* 1490 * if we don't even have the mpa message, then bail. 1491 */ 1492 if (ep->mpa_pkt_len < sizeof(*mpa)) 1493 return 0; 1494 mpa = (struct mpa_message *) ep->mpa_pkt; 1495 1496 /* Validate MPA header. */ 1497 if (mpa->revision > mpa_rev) { 1498 pr_err("%s MPA version mismatch. Local = %d, Received = %d\n", 1499 __func__, mpa_rev, mpa->revision); 1500 err = -EPROTO; 1501 goto err_stop_timer; 1502 } 1503 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 1504 err = -EPROTO; 1505 goto err_stop_timer; 1506 } 1507 1508 plen = ntohs(mpa->private_data_size); 1509 1510 /* 1511 * Fail if there's too much private data. 1512 */ 1513 if (plen > MPA_MAX_PRIVATE_DATA) { 1514 err = -EPROTO; 1515 goto err_stop_timer; 1516 } 1517 1518 /* 1519 * If plen does not account for pkt size 1520 */ 1521 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1522 err = -EPROTO; 1523 goto err_stop_timer; 1524 } 1525 1526 ep->plen = (u8) plen; 1527 1528 /* 1529 * If we don't have all the pdata yet, then bail. 1530 * We'll continue process when more data arrives. 1531 */ 1532 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1533 return 0; 1534 1535 if (mpa->flags & MPA_REJECT) { 1536 err = -ECONNREFUSED; 1537 goto err_stop_timer; 1538 } 1539 1540 /* 1541 * Stop mpa timer. If it expired, then 1542 * we ignore the MPA reply. process_timeout() 1543 * will abort the connection. 1544 */ 1545 if (stop_ep_timer(ep)) 1546 return 0; 1547 1548 /* 1549 * If we get here we have accumulated the entire mpa 1550 * start reply message including private data. And 1551 * the MPA header is valid. 1552 */ 1553 __state_set(&ep->com, FPDU_MODE); 1554 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1555 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1556 ep->mpa_attr.version = mpa->revision; 1557 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1558 1559 if (mpa->revision == 2) { 1560 ep->mpa_attr.enhanced_rdma_conn = 1561 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1562 if (ep->mpa_attr.enhanced_rdma_conn) { 1563 mpa_v2_params = (struct mpa_v2_conn_params *) 1564 (ep->mpa_pkt + sizeof(*mpa)); 1565 resp_ird = ntohs(mpa_v2_params->ird) & 1566 MPA_V2_IRD_ORD_MASK; 1567 resp_ord = ntohs(mpa_v2_params->ord) & 1568 MPA_V2_IRD_ORD_MASK; 1569 pr_debug("responder ird %u ord %u ep ird %u ord %u\n", 1570 resp_ird, resp_ord, ep->ird, ep->ord); 1571 1572 /* 1573 * This is a double-check. Ideally, below checks are 1574 * not required since ird/ord stuff has been taken 1575 * care of in c4iw_accept_cr 1576 */ 1577 if (ep->ird < resp_ord) { 1578 if (RELAXED_IRD_NEGOTIATION && resp_ord <= 1579 ep->com.dev->rdev.lldi.max_ordird_qp) 1580 ep->ird = resp_ord; 1581 else 1582 insuff_ird = 1; 1583 } else if (ep->ird > resp_ord) { 1584 ep->ird = resp_ord; 1585 } 1586 if (ep->ord > resp_ird) { 1587 if (RELAXED_IRD_NEGOTIATION) 1588 ep->ord = resp_ird; 1589 else 1590 insuff_ird = 1; 1591 } 1592 if (insuff_ird) { 1593 err = -ENOMEM; 1594 ep->ird = resp_ord; 1595 ep->ord = resp_ird; 1596 } 1597 1598 if (ntohs(mpa_v2_params->ird) & 1599 MPA_V2_PEER2PEER_MODEL) { 1600 if (ntohs(mpa_v2_params->ord) & 1601 MPA_V2_RDMA_WRITE_RTR) 1602 ep->mpa_attr.p2p_type = 1603 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1604 else if (ntohs(mpa_v2_params->ord) & 1605 MPA_V2_RDMA_READ_RTR) 1606 ep->mpa_attr.p2p_type = 1607 FW_RI_INIT_P2PTYPE_READ_REQ; 1608 } 1609 } 1610 } else if (mpa->revision == 1) 1611 if (peer2peer) 1612 ep->mpa_attr.p2p_type = p2p_type; 1613 1614 pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n", 1615 ep->mpa_attr.crc_enabled, 1616 ep->mpa_attr.recv_marker_enabled, 1617 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1618 ep->mpa_attr.p2p_type, p2p_type); 1619 1620 /* 1621 * If responder's RTR does not match with that of initiator, assign 1622 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not 1623 * generated when moving QP to RTS state. 1624 * A TERM message will be sent after QP has moved to RTS state 1625 */ 1626 if ((ep->mpa_attr.version == 2) && peer2peer && 1627 (ep->mpa_attr.p2p_type != p2p_type)) { 1628 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1629 rtr_mismatch = 1; 1630 } 1631 1632 attrs.mpa_attr = ep->mpa_attr; 1633 attrs.max_ird = ep->ird; 1634 attrs.max_ord = ep->ord; 1635 attrs.llp_stream_handle = ep; 1636 attrs.next_state = C4IW_QP_STATE_RTS; 1637 1638 mask = C4IW_QP_ATTR_NEXT_STATE | 1639 C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | 1640 C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; 1641 1642 /* bind QP and TID with INIT_WR */ 1643 err = c4iw_modify_qp(ep->com.qp->rhp, 1644 ep->com.qp, mask, &attrs, 1); 1645 if (err) 1646 goto err; 1647 1648 /* 1649 * If responder's RTR requirement did not match with what initiator 1650 * supports, generate TERM message 1651 */ 1652 if (rtr_mismatch) { 1653 pr_err("%s: RTR mismatch, sending TERM\n", __func__); 1654 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1655 attrs.ecode = MPA_NOMATCH_RTR; 1656 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1657 attrs.send_term = 1; 1658 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1659 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 1660 err = -ENOMEM; 1661 disconnect = 1; 1662 goto out; 1663 } 1664 1665 /* 1666 * Generate TERM if initiator IRD is not sufficient for responder 1667 * provided ORD. Currently, we do the same behaviour even when 1668 * responder provided IRD is also not sufficient as regards to 1669 * initiator ORD. 1670 */ 1671 if (insuff_ird) { 1672 pr_err("%s: Insufficient IRD, sending TERM\n", __func__); 1673 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1674 attrs.ecode = MPA_INSUFF_IRD; 1675 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1676 attrs.send_term = 1; 1677 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1678 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 1679 err = -ENOMEM; 1680 disconnect = 1; 1681 goto out; 1682 } 1683 goto out; 1684 err_stop_timer: 1685 stop_ep_timer(ep); 1686 err: 1687 disconnect = 2; 1688 out: 1689 connect_reply_upcall(ep, err); 1690 return disconnect; 1691 } 1692 1693 /* 1694 * process_mpa_request - process streaming mode MPA request 1695 * 1696 * Returns: 1697 * 1698 * 0 upon success indicating a connect request was delivered to the ULP 1699 * or the mpa request is incomplete but valid so far. 1700 * 1701 * 1 if a failure requires the caller to close the connection. 1702 * 1703 * 2 if a failure requires the caller to abort the connection. 1704 */ 1705 static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) 1706 { 1707 struct mpa_message *mpa; 1708 struct mpa_v2_conn_params *mpa_v2_params; 1709 u16 plen; 1710 1711 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1712 1713 /* 1714 * If we get more than the supported amount of private data 1715 * then we must fail this connection. 1716 */ 1717 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) 1718 goto err_stop_timer; 1719 1720 pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); 1721 1722 /* 1723 * Copy the new data into our accumulation buffer. 1724 */ 1725 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), 1726 skb->len); 1727 ep->mpa_pkt_len += skb->len; 1728 1729 /* 1730 * If we don't even have the mpa message, then bail. 1731 * We'll continue process when more data arrives. 1732 */ 1733 if (ep->mpa_pkt_len < sizeof(*mpa)) 1734 return 0; 1735 1736 pr_debug("enter (%s line %u)\n", __FILE__, __LINE__); 1737 mpa = (struct mpa_message *) ep->mpa_pkt; 1738 1739 /* 1740 * Validate MPA Header. 1741 */ 1742 if (mpa->revision > mpa_rev) { 1743 pr_err("%s MPA version mismatch. Local = %d, Received = %d\n", 1744 __func__, mpa_rev, mpa->revision); 1745 goto err_stop_timer; 1746 } 1747 1748 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) 1749 goto err_stop_timer; 1750 1751 plen = ntohs(mpa->private_data_size); 1752 1753 /* 1754 * Fail if there's too much private data. 1755 */ 1756 if (plen > MPA_MAX_PRIVATE_DATA) 1757 goto err_stop_timer; 1758 1759 /* 1760 * If plen does not account for pkt size 1761 */ 1762 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) 1763 goto err_stop_timer; 1764 ep->plen = (u8) plen; 1765 1766 /* 1767 * If we don't have all the pdata yet, then bail. 1768 */ 1769 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1770 return 0; 1771 1772 /* 1773 * If we get here we have accumulated the entire mpa 1774 * start reply message including private data. 1775 */ 1776 ep->mpa_attr.initiator = 0; 1777 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1778 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1779 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1780 ep->mpa_attr.version = mpa->revision; 1781 if (mpa->revision == 1) 1782 ep->tried_with_mpa_v1 = 1; 1783 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1784 1785 if (mpa->revision == 2) { 1786 ep->mpa_attr.enhanced_rdma_conn = 1787 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1788 if (ep->mpa_attr.enhanced_rdma_conn) { 1789 mpa_v2_params = (struct mpa_v2_conn_params *) 1790 (ep->mpa_pkt + sizeof(*mpa)); 1791 ep->ird = ntohs(mpa_v2_params->ird) & 1792 MPA_V2_IRD_ORD_MASK; 1793 ep->ird = min_t(u32, ep->ird, 1794 cur_max_read_depth(ep->com.dev)); 1795 ep->ord = ntohs(mpa_v2_params->ord) & 1796 MPA_V2_IRD_ORD_MASK; 1797 ep->ord = min_t(u32, ep->ord, 1798 cur_max_read_depth(ep->com.dev)); 1799 pr_debug("initiator ird %u ord %u\n", 1800 ep->ird, ep->ord); 1801 if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) 1802 if (peer2peer) { 1803 if (ntohs(mpa_v2_params->ord) & 1804 MPA_V2_RDMA_WRITE_RTR) 1805 ep->mpa_attr.p2p_type = 1806 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1807 else if (ntohs(mpa_v2_params->ord) & 1808 MPA_V2_RDMA_READ_RTR) 1809 ep->mpa_attr.p2p_type = 1810 FW_RI_INIT_P2PTYPE_READ_REQ; 1811 } 1812 } 1813 } else if (mpa->revision == 1) 1814 if (peer2peer) 1815 ep->mpa_attr.p2p_type = p2p_type; 1816 1817 pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n", 1818 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1819 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1820 ep->mpa_attr.p2p_type); 1821 1822 __state_set(&ep->com, MPA_REQ_RCVD); 1823 1824 /* drive upcall */ 1825 mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING); 1826 if (ep->parent_ep->com.state != DEAD) { 1827 if (connect_request_upcall(ep)) 1828 goto err_unlock_parent; 1829 } else { 1830 goto err_unlock_parent; 1831 } 1832 mutex_unlock(&ep->parent_ep->com.mutex); 1833 return 0; 1834 1835 err_unlock_parent: 1836 mutex_unlock(&ep->parent_ep->com.mutex); 1837 goto err_out; 1838 err_stop_timer: 1839 (void)stop_ep_timer(ep); 1840 err_out: 1841 return 2; 1842 } 1843 1844 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) 1845 { 1846 struct c4iw_ep *ep; 1847 struct cpl_rx_data *hdr = cplhdr(skb); 1848 unsigned int dlen = ntohs(hdr->len); 1849 unsigned int tid = GET_TID(hdr); 1850 __u8 status = hdr->status; 1851 int disconnect = 0; 1852 1853 ep = get_ep_from_tid(dev, tid); 1854 if (!ep) 1855 return 0; 1856 pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen); 1857 skb_pull(skb, sizeof(*hdr)); 1858 skb_trim(skb, dlen); 1859 mutex_lock(&ep->com.mutex); 1860 1861 switch (ep->com.state) { 1862 case MPA_REQ_SENT: 1863 update_rx_credits(ep, dlen); 1864 ep->rcv_seq += dlen; 1865 disconnect = process_mpa_reply(ep, skb); 1866 break; 1867 case MPA_REQ_WAIT: 1868 update_rx_credits(ep, dlen); 1869 ep->rcv_seq += dlen; 1870 disconnect = process_mpa_request(ep, skb); 1871 break; 1872 case FPDU_MODE: { 1873 struct c4iw_qp_attributes attrs; 1874 1875 update_rx_credits(ep, dlen); 1876 if (status) 1877 pr_err("%s Unexpected streaming data." \ 1878 " qpid %u ep %p state %d tid %u status %d\n", 1879 __func__, ep->com.qp->wq.sq.qid, ep, 1880 ep->com.state, ep->hwtid, status); 1881 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1882 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1883 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 1884 disconnect = 1; 1885 break; 1886 } 1887 default: 1888 break; 1889 } 1890 mutex_unlock(&ep->com.mutex); 1891 if (disconnect) 1892 c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); 1893 c4iw_put_ep(&ep->com); 1894 return 0; 1895 } 1896 1897 static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx) 1898 { 1899 enum chip_type adapter_type; 1900 1901 adapter_type = ep->com.dev->rdev.lldi.adapter_type; 1902 1903 /* 1904 * If this TCB had a srq buffer cached, then we must complete 1905 * it. For user mode, that means saving the srqidx in the 1906 * user/kernel status page for this qp. For kernel mode, just 1907 * synthesize the CQE now. 1908 */ 1909 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { 1910 if (ep->com.qp->ibqp.uobject) 1911 t4_set_wq_in_error(&ep->com.qp->wq, srqidx); 1912 else 1913 c4iw_flush_srqidx(ep->com.qp, srqidx); 1914 } 1915 } 1916 1917 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 1918 { 1919 u32 srqidx; 1920 struct c4iw_ep *ep; 1921 struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); 1922 int release = 0; 1923 unsigned int tid = GET_TID(rpl); 1924 1925 ep = get_ep_from_tid(dev, tid); 1926 if (!ep) { 1927 pr_warn("Abort rpl to freed endpoint\n"); 1928 return 0; 1929 } 1930 1931 if (ep->com.qp && ep->com.qp->srq) { 1932 srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status)); 1933 complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx); 1934 } 1935 1936 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1937 mutex_lock(&ep->com.mutex); 1938 switch (ep->com.state) { 1939 case ABORTING: 1940 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 1941 __state_set(&ep->com, DEAD); 1942 release = 1; 1943 break; 1944 default: 1945 pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); 1946 break; 1947 } 1948 mutex_unlock(&ep->com.mutex); 1949 1950 if (release) { 1951 close_complete_upcall(ep, -ECONNRESET); 1952 release_ep_resources(ep); 1953 } 1954 c4iw_put_ep(&ep->com); 1955 return 0; 1956 } 1957 1958 static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) 1959 { 1960 struct sk_buff *skb; 1961 struct fw_ofld_connection_wr *req; 1962 unsigned int mtu_idx; 1963 u32 wscale; 1964 struct sockaddr_in *sin; 1965 int win; 1966 1967 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); 1968 if (!skb) 1969 return -ENOMEM; 1970 1971 req = __skb_put_zero(skb, sizeof(*req)); 1972 req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR)); 1973 req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); 1974 req->le.filter = cpu_to_be32(cxgb4_select_ntuple( 1975 ep->com.dev->rdev.lldi.ports[0], 1976 ep->l2t)); 1977 sin = (struct sockaddr_in *)&ep->com.local_addr; 1978 req->le.lport = sin->sin_port; 1979 req->le.u.ipv4.lip = sin->sin_addr.s_addr; 1980 sin = (struct sockaddr_in *)&ep->com.remote_addr; 1981 req->le.pport = sin->sin_port; 1982 req->le.u.ipv4.pip = sin->sin_addr.s_addr; 1983 req->tcb.t_state_to_astid = 1984 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) | 1985 FW_OFLD_CONNECTION_WR_ASTID_V(atid)); 1986 req->tcb.cplrxdataack_cplpassacceptrpl = 1987 htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); 1988 req->tcb.tx_max = (__force __be32) jiffies; 1989 req->tcb.rcv_adv = htons(1); 1990 cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 1991 enable_tcp_timestamps, 1992 (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); 1993 wscale = cxgb_compute_wscale(rcv_win); 1994 1995 /* 1996 * Specify the largest window that will fit in opt0. The 1997 * remainder will be specified in the rx_data_ack. 1998 */ 1999 win = ep->rcv_win >> 10; 2000 if (win > RCV_BUFSIZ_M) 2001 win = RCV_BUFSIZ_M; 2002 2003 req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F | 2004 (nocong ? NO_CONG_F : 0) | 2005 KEEP_ALIVE_F | 2006 DELACK_F | 2007 WND_SCALE_V(wscale) | 2008 MSS_IDX_V(mtu_idx) | 2009 L2T_IDX_V(ep->l2t->idx) | 2010 TX_CHAN_V(ep->tx_chan) | 2011 SMAC_SEL_V(ep->smac_idx) | 2012 DSCP_V(ep->tos >> 2) | 2013 ULP_MODE_V(ULP_MODE_TCPDDP) | 2014 RCV_BUFSIZ_V(win)); 2015 req->tcb.opt2 = (__force __be32) (PACE_V(1) | 2016 TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | 2017 RX_CHANNEL_V(0) | 2018 CCTRL_ECN_V(enable_ecn) | 2019 RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid)); 2020 if (enable_tcp_timestamps) 2021 req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F; 2022 if (enable_tcp_sack) 2023 req->tcb.opt2 |= (__force __be32)SACK_EN_F; 2024 if (wscale && enable_tcp_window_scaling) 2025 req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F; 2026 req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0); 2027 req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2); 2028 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); 2029 set_bit(ACT_OFLD_CONN, &ep->com.history); 2030 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2031 } 2032 2033 /* 2034 * Some of the error codes above implicitly indicate that there is no TID 2035 * allocated with the result of an ACT_OPEN. We use this predicate to make 2036 * that explicit. 2037 */ 2038 static inline int act_open_has_tid(int status) 2039 { 2040 return (status != CPL_ERR_TCAM_PARITY && 2041 status != CPL_ERR_TCAM_MISS && 2042 status != CPL_ERR_TCAM_FULL && 2043 status != CPL_ERR_CONN_EXIST_SYNRECV && 2044 status != CPL_ERR_CONN_EXIST); 2045 } 2046 2047 static char *neg_adv_str(unsigned int status) 2048 { 2049 switch (status) { 2050 case CPL_ERR_RTX_NEG_ADVICE: 2051 return "Retransmit timeout"; 2052 case CPL_ERR_PERSIST_NEG_ADVICE: 2053 return "Persist timeout"; 2054 case CPL_ERR_KEEPALV_NEG_ADVICE: 2055 return "Keepalive timeout"; 2056 default: 2057 return "Unknown"; 2058 } 2059 } 2060 2061 static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) 2062 { 2063 ep->snd_win = snd_win; 2064 ep->rcv_win = rcv_win; 2065 pr_debug("snd_win %d rcv_win %d\n", 2066 ep->snd_win, ep->rcv_win); 2067 } 2068 2069 #define ACT_OPEN_RETRY_COUNT 2 2070 2071 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, 2072 struct dst_entry *dst, struct c4iw_dev *cdev, 2073 bool clear_mpa_v1, enum chip_type adapter_type, u8 tos) 2074 { 2075 struct neighbour *n; 2076 int err, step; 2077 struct net_device *pdev; 2078 2079 n = dst_neigh_lookup(dst, peer_ip); 2080 if (!n) 2081 return -ENODEV; 2082 2083 rcu_read_lock(); 2084 err = -ENOMEM; 2085 if (n->dev->flags & IFF_LOOPBACK) { 2086 if (iptype == 4) 2087 pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); 2088 else if (IS_ENABLED(CONFIG_IPV6)) 2089 for_each_netdev(&init_net, pdev) { 2090 if (ipv6_chk_addr(&init_net, 2091 (struct in6_addr *)peer_ip, 2092 pdev, 1)) 2093 break; 2094 } 2095 else 2096 pdev = NULL; 2097 2098 if (!pdev) { 2099 err = -ENODEV; 2100 goto out; 2101 } 2102 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, 2103 n, pdev, rt_tos2priority(tos)); 2104 if (!ep->l2t) { 2105 dev_put(pdev); 2106 goto out; 2107 } 2108 ep->mtu = pdev->mtu; 2109 ep->tx_chan = cxgb4_port_chan(pdev); 2110 ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; 2111 step = cdev->rdev.lldi.ntxq / 2112 cdev->rdev.lldi.nchan; 2113 ep->txq_idx = cxgb4_port_idx(pdev) * step; 2114 step = cdev->rdev.lldi.nrxq / 2115 cdev->rdev.lldi.nchan; 2116 ep->ctrlq_idx = cxgb4_port_idx(pdev); 2117 ep->rss_qid = cdev->rdev.lldi.rxq_ids[ 2118 cxgb4_port_idx(pdev) * step]; 2119 set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); 2120 dev_put(pdev); 2121 } else { 2122 pdev = get_real_dev(n->dev); 2123 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, 2124 n, pdev, rt_tos2priority(tos)); 2125 if (!ep->l2t) 2126 goto out; 2127 ep->mtu = dst_mtu(dst); 2128 ep->tx_chan = cxgb4_port_chan(pdev); 2129 ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; 2130 step = cdev->rdev.lldi.ntxq / 2131 cdev->rdev.lldi.nchan; 2132 ep->txq_idx = cxgb4_port_idx(pdev) * step; 2133 ep->ctrlq_idx = cxgb4_port_idx(pdev); 2134 step = cdev->rdev.lldi.nrxq / 2135 cdev->rdev.lldi.nchan; 2136 ep->rss_qid = cdev->rdev.lldi.rxq_ids[ 2137 cxgb4_port_idx(pdev) * step]; 2138 set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); 2139 2140 if (clear_mpa_v1) { 2141 ep->retry_with_mpa_v1 = 0; 2142 ep->tried_with_mpa_v1 = 0; 2143 } 2144 } 2145 err = 0; 2146 out: 2147 rcu_read_unlock(); 2148 2149 neigh_release(n); 2150 2151 return err; 2152 } 2153 2154 static int c4iw_reconnect(struct c4iw_ep *ep) 2155 { 2156 int err = 0; 2157 int size = 0; 2158 struct sockaddr_in *laddr = (struct sockaddr_in *) 2159 &ep->com.cm_id->m_local_addr; 2160 struct sockaddr_in *raddr = (struct sockaddr_in *) 2161 &ep->com.cm_id->m_remote_addr; 2162 struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) 2163 &ep->com.cm_id->m_local_addr; 2164 struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) 2165 &ep->com.cm_id->m_remote_addr; 2166 int iptype; 2167 __u8 *ra; 2168 2169 pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id); 2170 c4iw_init_wr_wait(ep->com.wr_waitp); 2171 2172 /* When MPA revision is different on nodes, the node with MPA_rev=2 2173 * tries to reconnect with MPA_rev 1 for the same EP through 2174 * c4iw_reconnect(), where the same EP is assigned with new tid for 2175 * further connection establishment. As we are using the same EP pointer 2176 * for reconnect, few skbs are used during the previous c4iw_connect(), 2177 * which leaves the EP with inadequate skbs for further 2178 * c4iw_reconnect(), Further causing a crash due to an empty 2179 * skb_list() during peer_abort(). Allocate skbs which is already used. 2180 */ 2181 size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); 2182 if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) { 2183 err = -ENOMEM; 2184 goto fail1; 2185 } 2186 2187 /* 2188 * Allocate an active TID to initiate a TCP connection. 2189 */ 2190 ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); 2191 if (ep->atid == -1) { 2192 pr_err("%s - cannot alloc atid\n", __func__); 2193 err = -ENOMEM; 2194 goto fail2; 2195 } 2196 err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL); 2197 if (err) 2198 goto fail2a; 2199 2200 /* find a route */ 2201 if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { 2202 ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, 2203 laddr->sin_addr.s_addr, 2204 raddr->sin_addr.s_addr, 2205 laddr->sin_port, 2206 raddr->sin_port, ep->com.cm_id->tos); 2207 iptype = 4; 2208 ra = (__u8 *)&raddr->sin_addr; 2209 } else { 2210 ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, 2211 get_real_dev, 2212 laddr6->sin6_addr.s6_addr, 2213 raddr6->sin6_addr.s6_addr, 2214 laddr6->sin6_port, 2215 raddr6->sin6_port, 2216 ep->com.cm_id->tos, 2217 raddr6->sin6_scope_id); 2218 iptype = 6; 2219 ra = (__u8 *)&raddr6->sin6_addr; 2220 } 2221 if (!ep->dst) { 2222 pr_err("%s - cannot find route\n", __func__); 2223 err = -EHOSTUNREACH; 2224 goto fail3; 2225 } 2226 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, 2227 ep->com.dev->rdev.lldi.adapter_type, 2228 ep->com.cm_id->tos); 2229 if (err) { 2230 pr_err("%s - cannot alloc l2e\n", __func__); 2231 goto fail4; 2232 } 2233 2234 pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", 2235 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, 2236 ep->l2t->idx); 2237 2238 state_set(&ep->com, CONNECTING); 2239 ep->tos = ep->com.cm_id->tos; 2240 2241 /* send connect request to rnic */ 2242 err = send_connect(ep); 2243 if (!err) 2244 goto out; 2245 2246 cxgb4_l2t_release(ep->l2t); 2247 fail4: 2248 dst_release(ep->dst); 2249 fail3: 2250 xa_erase_irq(&ep->com.dev->atids, ep->atid); 2251 fail2a: 2252 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 2253 fail2: 2254 /* 2255 * remember to send notification to upper layer. 2256 * We are in here so the upper layer is not aware that this is 2257 * re-connect attempt and so, upper layer is still waiting for 2258 * response of 1st connect request. 2259 */ 2260 connect_reply_upcall(ep, -ECONNRESET); 2261 fail1: 2262 c4iw_put_ep(&ep->com); 2263 out: 2264 return err; 2265 } 2266 2267 static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2268 { 2269 struct c4iw_ep *ep; 2270 struct cpl_act_open_rpl *rpl = cplhdr(skb); 2271 unsigned int atid = TID_TID_G(AOPEN_ATID_G( 2272 ntohl(rpl->atid_status))); 2273 struct tid_info *t = dev->rdev.lldi.tids; 2274 int status = AOPEN_STATUS_G(ntohl(rpl->atid_status)); 2275 struct sockaddr_in *la; 2276 struct sockaddr_in *ra; 2277 struct sockaddr_in6 *la6; 2278 struct sockaddr_in6 *ra6; 2279 int ret = 0; 2280 2281 ep = lookup_atid(t, atid); 2282 la = (struct sockaddr_in *)&ep->com.local_addr; 2283 ra = (struct sockaddr_in *)&ep->com.remote_addr; 2284 la6 = (struct sockaddr_in6 *)&ep->com.local_addr; 2285 ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; 2286 2287 pr_debug("ep %p atid %u status %u errno %d\n", ep, atid, 2288 status, status2errno(status)); 2289 2290 if (cxgb_is_neg_adv(status)) { 2291 pr_debug("Connection problems for atid %u status %u (%s)\n", 2292 atid, status, neg_adv_str(status)); 2293 ep->stats.connect_neg_adv++; 2294 mutex_lock(&dev->rdev.stats.lock); 2295 dev->rdev.stats.neg_adv++; 2296 mutex_unlock(&dev->rdev.stats.lock); 2297 return 0; 2298 } 2299 2300 set_bit(ACT_OPEN_RPL, &ep->com.history); 2301 2302 /* 2303 * Log interesting failures. 2304 */ 2305 switch (status) { 2306 case CPL_ERR_CONN_RESET: 2307 case CPL_ERR_CONN_TIMEDOUT: 2308 break; 2309 case CPL_ERR_TCAM_FULL: 2310 mutex_lock(&dev->rdev.stats.lock); 2311 dev->rdev.stats.tcam_full++; 2312 mutex_unlock(&dev->rdev.stats.lock); 2313 if (ep->com.local_addr.ss_family == AF_INET && 2314 dev->rdev.lldi.enable_fw_ofld_conn) { 2315 ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G( 2316 ntohl(rpl->atid_status)))); 2317 if (ret) 2318 goto fail; 2319 return 0; 2320 } 2321 break; 2322 case CPL_ERR_CONN_EXIST: 2323 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { 2324 set_bit(ACT_RETRY_INUSE, &ep->com.history); 2325 if (ep->com.remote_addr.ss_family == AF_INET6) { 2326 struct sockaddr_in6 *sin6 = 2327 (struct sockaddr_in6 *) 2328 &ep->com.local_addr; 2329 cxgb4_clip_release( 2330 ep->com.dev->rdev.lldi.ports[0], 2331 (const u32 *) 2332 &sin6->sin6_addr.s6_addr, 1); 2333 } 2334 xa_erase_irq(&ep->com.dev->atids, atid); 2335 cxgb4_free_atid(t, atid); 2336 dst_release(ep->dst); 2337 cxgb4_l2t_release(ep->l2t); 2338 c4iw_reconnect(ep); 2339 return 0; 2340 } 2341 break; 2342 default: 2343 if (ep->com.local_addr.ss_family == AF_INET) { 2344 pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n", 2345 atid, status, status2errno(status), 2346 &la->sin_addr.s_addr, ntohs(la->sin_port), 2347 &ra->sin_addr.s_addr, ntohs(ra->sin_port)); 2348 } else { 2349 pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n", 2350 atid, status, status2errno(status), 2351 la6->sin6_addr.s6_addr, ntohs(la6->sin6_port), 2352 ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port)); 2353 } 2354 break; 2355 } 2356 2357 fail: 2358 connect_reply_upcall(ep, status2errno(status)); 2359 state_set(&ep->com, DEAD); 2360 2361 if (ep->com.remote_addr.ss_family == AF_INET6) { 2362 struct sockaddr_in6 *sin6 = 2363 (struct sockaddr_in6 *)&ep->com.local_addr; 2364 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 2365 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 2366 } 2367 if (status && act_open_has_tid(status)) 2368 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl), 2369 ep->com.local_addr.ss_family); 2370 2371 xa_erase_irq(&ep->com.dev->atids, atid); 2372 cxgb4_free_atid(t, atid); 2373 dst_release(ep->dst); 2374 cxgb4_l2t_release(ep->l2t); 2375 c4iw_put_ep(&ep->com); 2376 2377 return 0; 2378 } 2379 2380 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2381 { 2382 struct cpl_pass_open_rpl *rpl = cplhdr(skb); 2383 unsigned int stid = GET_TID(rpl); 2384 struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); 2385 2386 if (!ep) { 2387 pr_warn("%s stid %d lookup failure!\n", __func__, stid); 2388 goto out; 2389 } 2390 pr_debug("ep %p status %d error %d\n", ep, 2391 rpl->status, status2errno(rpl->status)); 2392 c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); 2393 c4iw_put_ep(&ep->com); 2394 out: 2395 return 0; 2396 } 2397 2398 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2399 { 2400 struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); 2401 unsigned int stid = GET_TID(rpl); 2402 struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); 2403 2404 if (!ep) { 2405 pr_warn("%s stid %d lookup failure!\n", __func__, stid); 2406 goto out; 2407 } 2408 pr_debug("ep %p\n", ep); 2409 c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status)); 2410 c4iw_put_ep(&ep->com); 2411 out: 2412 return 0; 2413 } 2414 2415 static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, 2416 struct cpl_pass_accept_req *req) 2417 { 2418 struct cpl_pass_accept_rpl *rpl; 2419 unsigned int mtu_idx; 2420 u64 opt0; 2421 u32 opt2; 2422 u32 wscale; 2423 struct cpl_t5_pass_accept_rpl *rpl5 = NULL; 2424 int win; 2425 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; 2426 2427 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2428 cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 2429 enable_tcp_timestamps && req->tcpopt.tstamp, 2430 (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); 2431 wscale = cxgb_compute_wscale(rcv_win); 2432 2433 /* 2434 * Specify the largest window that will fit in opt0. The 2435 * remainder will be specified in the rx_data_ack. 2436 */ 2437 win = ep->rcv_win >> 10; 2438 if (win > RCV_BUFSIZ_M) 2439 win = RCV_BUFSIZ_M; 2440 opt0 = (nocong ? NO_CONG_F : 0) | 2441 KEEP_ALIVE_F | 2442 DELACK_F | 2443 WND_SCALE_V(wscale) | 2444 MSS_IDX_V(mtu_idx) | 2445 L2T_IDX_V(ep->l2t->idx) | 2446 TX_CHAN_V(ep->tx_chan) | 2447 SMAC_SEL_V(ep->smac_idx) | 2448 DSCP_V(ep->tos >> 2) | 2449 ULP_MODE_V(ULP_MODE_TCPDDP) | 2450 RCV_BUFSIZ_V(win); 2451 opt2 = RX_CHANNEL_V(0) | 2452 RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid); 2453 2454 if (enable_tcp_timestamps && req->tcpopt.tstamp) 2455 opt2 |= TSTAMPS_EN_F; 2456 if (enable_tcp_sack && req->tcpopt.sack) 2457 opt2 |= SACK_EN_F; 2458 if (wscale && enable_tcp_window_scaling) 2459 opt2 |= WND_SCALE_EN_F; 2460 if (enable_ecn) { 2461 const struct tcphdr *tcph; 2462 u32 hlen = ntohl(req->hdr_len); 2463 2464 if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5) 2465 tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + 2466 IP_HDR_LEN_G(hlen); 2467 else 2468 tcph = (const void *)(req + 1) + 2469 T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen); 2470 if (tcph->ece && tcph->cwr) 2471 opt2 |= CCTRL_ECN_V(1); 2472 } 2473 2474 if (!is_t4(adapter_type)) { 2475 u32 isn = (get_random_u32() & ~7UL) - 1; 2476 2477 skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL); 2478 rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16)); 2479 rpl = (void *)rpl5; 2480 INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid); 2481 opt2 |= T5_OPT_2_VALID_F; 2482 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); 2483 opt2 |= T5_ISS_F; 2484 if (peer2peer) 2485 isn += 4; 2486 rpl5->iss = cpu_to_be32(isn); 2487 pr_debug("iss %u\n", be32_to_cpu(rpl5->iss)); 2488 } else { 2489 skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); 2490 rpl = __skb_put_zero(skb, sizeof(*rpl)); 2491 INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid); 2492 } 2493 2494 rpl->opt0 = cpu_to_be64(opt0); 2495 rpl->opt2 = cpu_to_be32(opt2); 2496 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 2497 t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure); 2498 2499 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2500 } 2501 2502 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) 2503 { 2504 pr_debug("c4iw_dev %p tid %u\n", dev, hwtid); 2505 skb_trim(skb, sizeof(struct cpl_tid_release)); 2506 release_tid(&dev->rdev, hwtid, skb); 2507 return; 2508 } 2509 2510 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) 2511 { 2512 struct c4iw_ep *child_ep = NULL, *parent_ep; 2513 struct cpl_pass_accept_req *req = cplhdr(skb); 2514 unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid)); 2515 struct tid_info *t = dev->rdev.lldi.tids; 2516 unsigned int hwtid = GET_TID(req); 2517 struct dst_entry *dst; 2518 __u8 local_ip[16], peer_ip[16]; 2519 __be16 local_port, peer_port; 2520 struct sockaddr_in6 *sin6; 2521 int err; 2522 u16 peer_mss = ntohs(req->tcpopt.mss); 2523 int iptype; 2524 unsigned short hdrs; 2525 u8 tos; 2526 2527 parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); 2528 if (!parent_ep) { 2529 pr_err("%s connect request on invalid stid %d\n", 2530 __func__, stid); 2531 goto reject; 2532 } 2533 2534 if (state_read(&parent_ep->com) != LISTEN) { 2535 pr_err("%s - listening ep not in LISTEN\n", __func__); 2536 goto reject; 2537 } 2538 2539 if (parent_ep->com.cm_id->tos_set) 2540 tos = parent_ep->com.cm_id->tos; 2541 else 2542 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); 2543 2544 cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, 2545 &iptype, local_ip, peer_ip, &local_port, &peer_port); 2546 2547 /* Find output route */ 2548 if (iptype == 4) { 2549 pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n" 2550 , parent_ep, hwtid, 2551 local_ip, peer_ip, ntohs(local_port), 2552 ntohs(peer_port), peer_mss); 2553 dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, 2554 *(__be32 *)local_ip, *(__be32 *)peer_ip, 2555 local_port, peer_port, tos); 2556 } else { 2557 pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" 2558 , parent_ep, hwtid, 2559 local_ip, peer_ip, ntohs(local_port), 2560 ntohs(peer_port), peer_mss); 2561 dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, 2562 local_ip, peer_ip, local_port, peer_port, 2563 tos, 2564 ((struct sockaddr_in6 *) 2565 &parent_ep->com.local_addr)->sin6_scope_id); 2566 } 2567 if (!dst) { 2568 pr_err("%s - failed to find dst entry!\n", __func__); 2569 goto reject; 2570 } 2571 2572 child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); 2573 if (!child_ep) { 2574 pr_err("%s - failed to allocate ep entry!\n", __func__); 2575 dst_release(dst); 2576 goto reject; 2577 } 2578 2579 err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, 2580 parent_ep->com.dev->rdev.lldi.adapter_type, tos); 2581 if (err) { 2582 pr_err("%s - failed to allocate l2t entry!\n", __func__); 2583 dst_release(dst); 2584 kfree(child_ep); 2585 goto reject; 2586 } 2587 2588 hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) + 2589 sizeof(struct tcphdr) + 2590 ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); 2591 if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) 2592 child_ep->mtu = peer_mss + hdrs; 2593 2594 skb_queue_head_init(&child_ep->com.ep_skb_list); 2595 if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF)) 2596 goto fail; 2597 2598 state_set(&child_ep->com, CONNECTING); 2599 child_ep->com.dev = dev; 2600 child_ep->com.cm_id = NULL; 2601 2602 if (iptype == 4) { 2603 struct sockaddr_in *sin = (struct sockaddr_in *) 2604 &child_ep->com.local_addr; 2605 2606 sin->sin_family = AF_INET; 2607 sin->sin_port = local_port; 2608 sin->sin_addr.s_addr = *(__be32 *)local_ip; 2609 2610 sin = (struct sockaddr_in *)&child_ep->com.local_addr; 2611 sin->sin_family = AF_INET; 2612 sin->sin_port = ((struct sockaddr_in *) 2613 &parent_ep->com.local_addr)->sin_port; 2614 sin->sin_addr.s_addr = *(__be32 *)local_ip; 2615 2616 sin = (struct sockaddr_in *)&child_ep->com.remote_addr; 2617 sin->sin_family = AF_INET; 2618 sin->sin_port = peer_port; 2619 sin->sin_addr.s_addr = *(__be32 *)peer_ip; 2620 } else { 2621 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2622 sin6->sin6_family = PF_INET6; 2623 sin6->sin6_port = local_port; 2624 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); 2625 2626 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2627 sin6->sin6_family = PF_INET6; 2628 sin6->sin6_port = ((struct sockaddr_in6 *) 2629 &parent_ep->com.local_addr)->sin6_port; 2630 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); 2631 2632 sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; 2633 sin6->sin6_family = PF_INET6; 2634 sin6->sin6_port = peer_port; 2635 memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); 2636 } 2637 2638 c4iw_get_ep(&parent_ep->com); 2639 child_ep->parent_ep = parent_ep; 2640 child_ep->tos = tos; 2641 child_ep->dst = dst; 2642 child_ep->hwtid = hwtid; 2643 2644 pr_debug("tx_chan %u smac_idx %u rss_qid %u\n", 2645 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); 2646 2647 timer_setup(&child_ep->timer, ep_timeout, 0); 2648 cxgb4_insert_tid(t, child_ep, hwtid, 2649 child_ep->com.local_addr.ss_family); 2650 insert_ep_tid(child_ep); 2651 if (accept_cr(child_ep, skb, req)) { 2652 c4iw_put_ep(&parent_ep->com); 2653 release_ep_resources(child_ep); 2654 } else { 2655 set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); 2656 } 2657 if (iptype == 6) { 2658 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2659 cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], 2660 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 2661 } 2662 goto out; 2663 fail: 2664 c4iw_put_ep(&child_ep->com); 2665 reject: 2666 reject_cr(dev, hwtid, skb); 2667 out: 2668 if (parent_ep) 2669 c4iw_put_ep(&parent_ep->com); 2670 return 0; 2671 } 2672 2673 static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) 2674 { 2675 struct c4iw_ep *ep; 2676 struct cpl_pass_establish *req = cplhdr(skb); 2677 unsigned int tid = GET_TID(req); 2678 int ret; 2679 u16 tcp_opt = ntohs(req->tcp_opt); 2680 2681 ep = get_ep_from_tid(dev, tid); 2682 if (!ep) 2683 return 0; 2684 2685 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2686 ep->snd_seq = be32_to_cpu(req->snd_isn); 2687 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 2688 ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); 2689 2690 pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt); 2691 2692 set_emss(ep, tcp_opt); 2693 2694 dst_confirm(ep->dst); 2695 mutex_lock(&ep->com.mutex); 2696 ep->com.state = MPA_REQ_WAIT; 2697 start_ep_timer(ep); 2698 set_bit(PASS_ESTAB, &ep->com.history); 2699 ret = send_flowc(ep); 2700 mutex_unlock(&ep->com.mutex); 2701 if (ret) 2702 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 2703 c4iw_put_ep(&ep->com); 2704 2705 return 0; 2706 } 2707 2708 static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) 2709 { 2710 struct cpl_peer_close *hdr = cplhdr(skb); 2711 struct c4iw_ep *ep; 2712 struct c4iw_qp_attributes attrs; 2713 int disconnect = 1; 2714 int release = 0; 2715 unsigned int tid = GET_TID(hdr); 2716 int ret; 2717 2718 ep = get_ep_from_tid(dev, tid); 2719 if (!ep) 2720 return 0; 2721 2722 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2723 dst_confirm(ep->dst); 2724 2725 set_bit(PEER_CLOSE, &ep->com.history); 2726 mutex_lock(&ep->com.mutex); 2727 switch (ep->com.state) { 2728 case MPA_REQ_WAIT: 2729 __state_set(&ep->com, CLOSING); 2730 break; 2731 case MPA_REQ_SENT: 2732 __state_set(&ep->com, CLOSING); 2733 connect_reply_upcall(ep, -ECONNRESET); 2734 break; 2735 case MPA_REQ_RCVD: 2736 2737 /* 2738 * We're gonna mark this puppy DEAD, but keep 2739 * the reference on it until the ULP accepts or 2740 * rejects the CR. Also wake up anyone waiting 2741 * in rdma connection migration (see c4iw_accept_cr()). 2742 */ 2743 __state_set(&ep->com, CLOSING); 2744 pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); 2745 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 2746 break; 2747 case MPA_REP_SENT: 2748 __state_set(&ep->com, CLOSING); 2749 pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid); 2750 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 2751 break; 2752 case FPDU_MODE: 2753 start_ep_timer(ep); 2754 __state_set(&ep->com, CLOSING); 2755 attrs.next_state = C4IW_QP_STATE_CLOSING; 2756 ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 2757 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2758 if (ret != -ECONNRESET) { 2759 peer_close_upcall(ep); 2760 disconnect = 1; 2761 } 2762 break; 2763 case ABORTING: 2764 disconnect = 0; 2765 break; 2766 case CLOSING: 2767 __state_set(&ep->com, MORIBUND); 2768 disconnect = 0; 2769 break; 2770 case MORIBUND: 2771 (void)stop_ep_timer(ep); 2772 if (ep->com.cm_id && ep->com.qp) { 2773 attrs.next_state = C4IW_QP_STATE_IDLE; 2774 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 2775 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2776 } 2777 close_complete_upcall(ep, 0); 2778 __state_set(&ep->com, DEAD); 2779 release = 1; 2780 disconnect = 0; 2781 break; 2782 case DEAD: 2783 disconnect = 0; 2784 break; 2785 default: 2786 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 2787 } 2788 mutex_unlock(&ep->com.mutex); 2789 if (disconnect) 2790 c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 2791 if (release) 2792 release_ep_resources(ep); 2793 c4iw_put_ep(&ep->com); 2794 return 0; 2795 } 2796 2797 static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep) 2798 { 2799 complete_cached_srq_buffers(ep, ep->srqe_idx); 2800 if (ep->com.cm_id && ep->com.qp) { 2801 struct c4iw_qp_attributes attrs; 2802 2803 attrs.next_state = C4IW_QP_STATE_ERROR; 2804 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 2805 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2806 } 2807 peer_abort_upcall(ep); 2808 release_ep_resources(ep); 2809 c4iw_put_ep(&ep->com); 2810 } 2811 2812 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) 2813 { 2814 struct cpl_abort_req_rss6 *req = cplhdr(skb); 2815 struct c4iw_ep *ep; 2816 struct sk_buff *rpl_skb; 2817 struct c4iw_qp_attributes attrs; 2818 int ret; 2819 int release = 0; 2820 unsigned int tid = GET_TID(req); 2821 u8 status; 2822 u32 srqidx; 2823 2824 u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); 2825 2826 ep = get_ep_from_tid(dev, tid); 2827 if (!ep) 2828 return 0; 2829 2830 status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status)); 2831 2832 if (cxgb_is_neg_adv(status)) { 2833 pr_debug("Negative advice on abort- tid %u status %d (%s)\n", 2834 ep->hwtid, status, neg_adv_str(status)); 2835 ep->stats.abort_neg_adv++; 2836 mutex_lock(&dev->rdev.stats.lock); 2837 dev->rdev.stats.neg_adv++; 2838 mutex_unlock(&dev->rdev.stats.lock); 2839 goto deref_ep; 2840 } 2841 2842 pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, 2843 ep->com.state); 2844 set_bit(PEER_ABORT, &ep->com.history); 2845 2846 /* 2847 * Wake up any threads in rdma_init() or rdma_fini(). 2848 * However, this is not needed if com state is just 2849 * MPA_REQ_SENT 2850 */ 2851 if (ep->com.state != MPA_REQ_SENT) 2852 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 2853 2854 mutex_lock(&ep->com.mutex); 2855 switch (ep->com.state) { 2856 case CONNECTING: 2857 c4iw_put_ep(&ep->parent_ep->com); 2858 break; 2859 case MPA_REQ_WAIT: 2860 (void)stop_ep_timer(ep); 2861 break; 2862 case MPA_REQ_SENT: 2863 (void)stop_ep_timer(ep); 2864 if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 || 2865 (mpa_rev == 2 && ep->tried_with_mpa_v1)) 2866 connect_reply_upcall(ep, -ECONNRESET); 2867 else { 2868 /* 2869 * we just don't send notification upwards because we 2870 * want to retry with mpa_v1 without upper layers even 2871 * knowing it. 2872 * 2873 * do some housekeeping so as to re-initiate the 2874 * connection 2875 */ 2876 pr_info("%s: mpa_rev=%d. Retrying with mpav1\n", 2877 __func__, mpa_rev); 2878 ep->retry_with_mpa_v1 = 1; 2879 } 2880 break; 2881 case MPA_REP_SENT: 2882 break; 2883 case MPA_REQ_RCVD: 2884 break; 2885 case MORIBUND: 2886 case CLOSING: 2887 stop_ep_timer(ep); 2888 fallthrough; 2889 case FPDU_MODE: 2890 if (ep->com.qp && ep->com.qp->srq) { 2891 srqidx = ABORT_RSS_SRQIDX_G( 2892 be32_to_cpu(req->srqidx_status)); 2893 if (srqidx) { 2894 complete_cached_srq_buffers(ep, srqidx); 2895 } else { 2896 /* Hold ep ref until finish_peer_abort() */ 2897 c4iw_get_ep(&ep->com); 2898 __state_set(&ep->com, ABORTING); 2899 set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags); 2900 read_tcb(ep); 2901 break; 2902 2903 } 2904 } 2905 2906 if (ep->com.cm_id && ep->com.qp) { 2907 attrs.next_state = C4IW_QP_STATE_ERROR; 2908 ret = c4iw_modify_qp(ep->com.qp->rhp, 2909 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 2910 &attrs, 1); 2911 if (ret) 2912 pr_err("%s - qp <- error failed!\n", __func__); 2913 } 2914 peer_abort_upcall(ep); 2915 break; 2916 case ABORTING: 2917 break; 2918 case DEAD: 2919 pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); 2920 mutex_unlock(&ep->com.mutex); 2921 goto deref_ep; 2922 default: 2923 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 2924 break; 2925 } 2926 dst_confirm(ep->dst); 2927 if (ep->com.state != ABORTING) { 2928 __state_set(&ep->com, DEAD); 2929 /* we don't release if we want to retry with mpa_v1 */ 2930 if (!ep->retry_with_mpa_v1) 2931 release = 1; 2932 } 2933 mutex_unlock(&ep->com.mutex); 2934 2935 rpl_skb = skb_dequeue(&ep->com.ep_skb_list); 2936 if (WARN_ON(!rpl_skb)) { 2937 release = 1; 2938 goto out; 2939 } 2940 2941 cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); 2942 2943 c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); 2944 out: 2945 if (release) 2946 release_ep_resources(ep); 2947 else if (ep->retry_with_mpa_v1) { 2948 if (ep->com.remote_addr.ss_family == AF_INET6) { 2949 struct sockaddr_in6 *sin6 = 2950 (struct sockaddr_in6 *) 2951 &ep->com.local_addr; 2952 cxgb4_clip_release( 2953 ep->com.dev->rdev.lldi.ports[0], 2954 (const u32 *)&sin6->sin6_addr.s6_addr, 2955 1); 2956 } 2957 xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid); 2958 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, 2959 ep->com.local_addr.ss_family); 2960 dst_release(ep->dst); 2961 cxgb4_l2t_release(ep->l2t); 2962 c4iw_reconnect(ep); 2963 } 2964 2965 deref_ep: 2966 c4iw_put_ep(&ep->com); 2967 /* Dereferencing ep, referenced in peer_abort_intr() */ 2968 c4iw_put_ep(&ep->com); 2969 return 0; 2970 } 2971 2972 static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2973 { 2974 struct c4iw_ep *ep; 2975 struct c4iw_qp_attributes attrs; 2976 struct cpl_close_con_rpl *rpl = cplhdr(skb); 2977 int release = 0; 2978 unsigned int tid = GET_TID(rpl); 2979 2980 ep = get_ep_from_tid(dev, tid); 2981 if (!ep) 2982 return 0; 2983 2984 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2985 2986 /* The cm_id may be null if we failed to connect */ 2987 mutex_lock(&ep->com.mutex); 2988 set_bit(CLOSE_CON_RPL, &ep->com.history); 2989 switch (ep->com.state) { 2990 case CLOSING: 2991 __state_set(&ep->com, MORIBUND); 2992 break; 2993 case MORIBUND: 2994 (void)stop_ep_timer(ep); 2995 if ((ep->com.cm_id) && (ep->com.qp)) { 2996 attrs.next_state = C4IW_QP_STATE_IDLE; 2997 c4iw_modify_qp(ep->com.qp->rhp, 2998 ep->com.qp, 2999 C4IW_QP_ATTR_NEXT_STATE, 3000 &attrs, 1); 3001 } 3002 close_complete_upcall(ep, 0); 3003 __state_set(&ep->com, DEAD); 3004 release = 1; 3005 break; 3006 case ABORTING: 3007 case DEAD: 3008 break; 3009 default: 3010 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 3011 break; 3012 } 3013 mutex_unlock(&ep->com.mutex); 3014 if (release) 3015 release_ep_resources(ep); 3016 c4iw_put_ep(&ep->com); 3017 return 0; 3018 } 3019 3020 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) 3021 { 3022 struct cpl_rdma_terminate *rpl = cplhdr(skb); 3023 unsigned int tid = GET_TID(rpl); 3024 struct c4iw_ep *ep; 3025 struct c4iw_qp_attributes attrs; 3026 3027 ep = get_ep_from_tid(dev, tid); 3028 3029 if (ep) { 3030 if (ep->com.qp) { 3031 pr_warn("TERM received tid %u qpid %u\n", tid, 3032 ep->com.qp->wq.sq.qid); 3033 attrs.next_state = C4IW_QP_STATE_TERMINATE; 3034 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 3035 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 3036 } 3037 3038 /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3, 3039 * when entering the TERM state the RNIC MUST initiate a CLOSE. 3040 */ 3041 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 3042 c4iw_put_ep(&ep->com); 3043 } else 3044 pr_warn("TERM received tid %u no ep/qp\n", tid); 3045 3046 return 0; 3047 } 3048 3049 /* 3050 * Upcall from the adapter indicating data has been transmitted. 3051 * For us its just the single MPA request or reply. We can now free 3052 * the skb holding the mpa message. 3053 */ 3054 static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) 3055 { 3056 struct c4iw_ep *ep; 3057 struct cpl_fw4_ack *hdr = cplhdr(skb); 3058 u8 credits = hdr->credits; 3059 unsigned int tid = GET_TID(hdr); 3060 3061 3062 ep = get_ep_from_tid(dev, tid); 3063 if (!ep) 3064 return 0; 3065 pr_debug("ep %p tid %u credits %u\n", 3066 ep, ep->hwtid, credits); 3067 if (credits == 0) { 3068 pr_debug("0 credit ack ep %p tid %u state %u\n", 3069 ep, ep->hwtid, state_read(&ep->com)); 3070 goto out; 3071 } 3072 3073 dst_confirm(ep->dst); 3074 if (ep->mpa_skb) { 3075 pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n", 3076 ep, ep->hwtid, state_read(&ep->com), 3077 ep->mpa_attr.initiator ? 1 : 0); 3078 mutex_lock(&ep->com.mutex); 3079 kfree_skb(ep->mpa_skb); 3080 ep->mpa_skb = NULL; 3081 if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) 3082 stop_ep_timer(ep); 3083 mutex_unlock(&ep->com.mutex); 3084 } 3085 out: 3086 c4iw_put_ep(&ep->com); 3087 return 0; 3088 } 3089 3090 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 3091 { 3092 int abort; 3093 struct c4iw_ep *ep = to_ep(cm_id); 3094 3095 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 3096 3097 mutex_lock(&ep->com.mutex); 3098 if (ep->com.state != MPA_REQ_RCVD) { 3099 mutex_unlock(&ep->com.mutex); 3100 c4iw_put_ep(&ep->com); 3101 return -ECONNRESET; 3102 } 3103 set_bit(ULP_REJECT, &ep->com.history); 3104 if (mpa_rev == 0) 3105 abort = 1; 3106 else 3107 abort = send_mpa_reject(ep, pdata, pdata_len); 3108 mutex_unlock(&ep->com.mutex); 3109 3110 stop_ep_timer(ep); 3111 c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); 3112 c4iw_put_ep(&ep->com); 3113 return 0; 3114 } 3115 3116 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 3117 { 3118 int err; 3119 struct c4iw_qp_attributes attrs; 3120 enum c4iw_qp_attr_mask mask; 3121 struct c4iw_ep *ep = to_ep(cm_id); 3122 struct c4iw_dev *h = to_c4iw_dev(cm_id->device); 3123 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); 3124 int abort = 0; 3125 3126 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 3127 3128 mutex_lock(&ep->com.mutex); 3129 if (ep->com.state != MPA_REQ_RCVD) { 3130 err = -ECONNRESET; 3131 goto err_out; 3132 } 3133 3134 if (!qp) { 3135 err = -EINVAL; 3136 goto err_out; 3137 } 3138 3139 set_bit(ULP_ACCEPT, &ep->com.history); 3140 if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || 3141 (conn_param->ird > cur_max_read_depth(ep->com.dev))) { 3142 err = -EINVAL; 3143 goto err_abort; 3144 } 3145 3146 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 3147 if (conn_param->ord > ep->ird) { 3148 if (RELAXED_IRD_NEGOTIATION) { 3149 conn_param->ord = ep->ird; 3150 } else { 3151 ep->ird = conn_param->ird; 3152 ep->ord = conn_param->ord; 3153 send_mpa_reject(ep, conn_param->private_data, 3154 conn_param->private_data_len); 3155 err = -ENOMEM; 3156 goto err_abort; 3157 } 3158 } 3159 if (conn_param->ird < ep->ord) { 3160 if (RELAXED_IRD_NEGOTIATION && 3161 ep->ord <= h->rdev.lldi.max_ordird_qp) { 3162 conn_param->ird = ep->ord; 3163 } else { 3164 err = -ENOMEM; 3165 goto err_abort; 3166 } 3167 } 3168 } 3169 ep->ird = conn_param->ird; 3170 ep->ord = conn_param->ord; 3171 3172 if (ep->mpa_attr.version == 1) { 3173 if (peer2peer && ep->ird == 0) 3174 ep->ird = 1; 3175 } else { 3176 if (peer2peer && 3177 (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && 3178 (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) 3179 ep->ird = 1; 3180 } 3181 3182 pr_debug("ird %d ord %d\n", ep->ird, ep->ord); 3183 3184 ep->com.cm_id = cm_id; 3185 ref_cm_id(&ep->com); 3186 ep->com.qp = qp; 3187 ref_qp(ep); 3188 3189 /* bind QP to EP and move to RTS */ 3190 attrs.mpa_attr = ep->mpa_attr; 3191 attrs.max_ird = ep->ird; 3192 attrs.max_ord = ep->ord; 3193 attrs.llp_stream_handle = ep; 3194 attrs.next_state = C4IW_QP_STATE_RTS; 3195 3196 /* bind QP and TID with INIT_WR */ 3197 mask = C4IW_QP_ATTR_NEXT_STATE | 3198 C4IW_QP_ATTR_LLP_STREAM_HANDLE | 3199 C4IW_QP_ATTR_MPA_ATTR | 3200 C4IW_QP_ATTR_MAX_IRD | 3201 C4IW_QP_ATTR_MAX_ORD; 3202 3203 err = c4iw_modify_qp(ep->com.qp->rhp, 3204 ep->com.qp, mask, &attrs, 1); 3205 if (err) 3206 goto err_deref_cm_id; 3207 3208 set_bit(STOP_MPA_TIMER, &ep->com.flags); 3209 err = send_mpa_reply(ep, conn_param->private_data, 3210 conn_param->private_data_len); 3211 if (err) 3212 goto err_deref_cm_id; 3213 3214 __state_set(&ep->com, FPDU_MODE); 3215 established_upcall(ep); 3216 mutex_unlock(&ep->com.mutex); 3217 c4iw_put_ep(&ep->com); 3218 return 0; 3219 err_deref_cm_id: 3220 deref_cm_id(&ep->com); 3221 err_abort: 3222 abort = 1; 3223 err_out: 3224 mutex_unlock(&ep->com.mutex); 3225 if (abort) 3226 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 3227 c4iw_put_ep(&ep->com); 3228 return err; 3229 } 3230 3231 static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) 3232 { 3233 struct in_device *ind; 3234 int found = 0; 3235 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; 3236 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; 3237 const struct in_ifaddr *ifa; 3238 3239 ind = in_dev_get(dev->rdev.lldi.ports[0]); 3240 if (!ind) 3241 return -EADDRNOTAVAIL; 3242 rcu_read_lock(); 3243 in_dev_for_each_ifa_rcu(ifa, ind) { 3244 if (ifa->ifa_flags & IFA_F_SECONDARY) 3245 continue; 3246 laddr->sin_addr.s_addr = ifa->ifa_address; 3247 raddr->sin_addr.s_addr = ifa->ifa_address; 3248 found = 1; 3249 break; 3250 } 3251 rcu_read_unlock(); 3252 3253 in_dev_put(ind); 3254 return found ? 0 : -EADDRNOTAVAIL; 3255 } 3256 3257 static int get_lladdr(struct net_device *dev, struct in6_addr *addr, 3258 unsigned char banned_flags) 3259 { 3260 struct inet6_dev *idev; 3261 int err = -EADDRNOTAVAIL; 3262 3263 rcu_read_lock(); 3264 idev = __in6_dev_get(dev); 3265 if (idev != NULL) { 3266 struct inet6_ifaddr *ifp; 3267 3268 read_lock_bh(&idev->lock); 3269 list_for_each_entry(ifp, &idev->addr_list, if_list) { 3270 if (ifp->scope == IFA_LINK && 3271 !(ifp->flags & banned_flags)) { 3272 memcpy(addr, &ifp->addr, 16); 3273 err = 0; 3274 break; 3275 } 3276 } 3277 read_unlock_bh(&idev->lock); 3278 } 3279 rcu_read_unlock(); 3280 return err; 3281 } 3282 3283 static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) 3284 { 3285 struct in6_addr addr; 3286 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; 3287 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr; 3288 3289 if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { 3290 memcpy(la6->sin6_addr.s6_addr, &addr, 16); 3291 memcpy(ra6->sin6_addr.s6_addr, &addr, 16); 3292 return 0; 3293 } 3294 return -EADDRNOTAVAIL; 3295 } 3296 3297 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 3298 { 3299 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 3300 struct c4iw_ep *ep; 3301 int err = 0; 3302 struct sockaddr_in *laddr; 3303 struct sockaddr_in *raddr; 3304 struct sockaddr_in6 *laddr6; 3305 struct sockaddr_in6 *raddr6; 3306 __u8 *ra; 3307 int iptype; 3308 3309 if ((conn_param->ord > cur_max_read_depth(dev)) || 3310 (conn_param->ird > cur_max_read_depth(dev))) { 3311 err = -EINVAL; 3312 goto out; 3313 } 3314 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 3315 if (!ep) { 3316 pr_err("%s - cannot alloc ep\n", __func__); 3317 err = -ENOMEM; 3318 goto out; 3319 } 3320 3321 skb_queue_head_init(&ep->com.ep_skb_list); 3322 if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) { 3323 err = -ENOMEM; 3324 goto fail1; 3325 } 3326 3327 timer_setup(&ep->timer, ep_timeout, 0); 3328 ep->plen = conn_param->private_data_len; 3329 if (ep->plen) 3330 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 3331 conn_param->private_data, ep->plen); 3332 ep->ird = conn_param->ird; 3333 ep->ord = conn_param->ord; 3334 3335 if (peer2peer && ep->ord == 0) 3336 ep->ord = 1; 3337 3338 ep->com.cm_id = cm_id; 3339 ref_cm_id(&ep->com); 3340 cm_id->provider_data = ep; 3341 ep->com.dev = dev; 3342 ep->com.qp = get_qhp(dev, conn_param->qpn); 3343 if (!ep->com.qp) { 3344 pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); 3345 err = -EINVAL; 3346 goto fail2; 3347 } 3348 ref_qp(ep); 3349 pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn, 3350 ep->com.qp, cm_id); 3351 3352 /* 3353 * Allocate an active TID to initiate a TCP connection. 3354 */ 3355 ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep); 3356 if (ep->atid == -1) { 3357 pr_err("%s - cannot alloc atid\n", __func__); 3358 err = -ENOMEM; 3359 goto fail2; 3360 } 3361 err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL); 3362 if (err) 3363 goto fail5; 3364 3365 memcpy(&ep->com.local_addr, &cm_id->m_local_addr, 3366 sizeof(ep->com.local_addr)); 3367 memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, 3368 sizeof(ep->com.remote_addr)); 3369 3370 laddr = (struct sockaddr_in *)&ep->com.local_addr; 3371 raddr = (struct sockaddr_in *)&ep->com.remote_addr; 3372 laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr; 3373 raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr; 3374 3375 if (cm_id->m_remote_addr.ss_family == AF_INET) { 3376 iptype = 4; 3377 ra = (__u8 *)&raddr->sin_addr; 3378 3379 /* 3380 * Handle loopback requests to INADDR_ANY. 3381 */ 3382 if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { 3383 err = pick_local_ipaddrs(dev, cm_id); 3384 if (err) 3385 goto fail3; 3386 } 3387 3388 /* find a route */ 3389 pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", 3390 &laddr->sin_addr, ntohs(laddr->sin_port), 3391 ra, ntohs(raddr->sin_port)); 3392 ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, 3393 laddr->sin_addr.s_addr, 3394 raddr->sin_addr.s_addr, 3395 laddr->sin_port, 3396 raddr->sin_port, cm_id->tos); 3397 } else { 3398 iptype = 6; 3399 ra = (__u8 *)&raddr6->sin6_addr; 3400 3401 /* 3402 * Handle loopback requests to INADDR_ANY. 3403 */ 3404 if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { 3405 err = pick_local_ip6addrs(dev, cm_id); 3406 if (err) 3407 goto fail3; 3408 } 3409 3410 /* find a route */ 3411 pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n", 3412 laddr6->sin6_addr.s6_addr, 3413 ntohs(laddr6->sin6_port), 3414 raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); 3415 ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, 3416 laddr6->sin6_addr.s6_addr, 3417 raddr6->sin6_addr.s6_addr, 3418 laddr6->sin6_port, 3419 raddr6->sin6_port, cm_id->tos, 3420 raddr6->sin6_scope_id); 3421 } 3422 if (!ep->dst) { 3423 pr_err("%s - cannot find route\n", __func__); 3424 err = -EHOSTUNREACH; 3425 goto fail3; 3426 } 3427 3428 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, 3429 ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); 3430 if (err) { 3431 pr_err("%s - cannot alloc l2e\n", __func__); 3432 goto fail4; 3433 } 3434 3435 pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", 3436 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, 3437 ep->l2t->idx); 3438 3439 state_set(&ep->com, CONNECTING); 3440 ep->tos = cm_id->tos; 3441 3442 /* send connect request to rnic */ 3443 err = send_connect(ep); 3444 if (!err) 3445 goto out; 3446 3447 cxgb4_l2t_release(ep->l2t); 3448 fail4: 3449 dst_release(ep->dst); 3450 fail3: 3451 xa_erase_irq(&ep->com.dev->atids, ep->atid); 3452 fail5: 3453 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 3454 fail2: 3455 skb_queue_purge(&ep->com.ep_skb_list); 3456 deref_cm_id(&ep->com); 3457 fail1: 3458 c4iw_put_ep(&ep->com); 3459 out: 3460 return err; 3461 } 3462 3463 static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) 3464 { 3465 int err; 3466 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) 3467 &ep->com.local_addr; 3468 3469 if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) { 3470 err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], 3471 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3472 if (err) 3473 return err; 3474 } 3475 c4iw_init_wr_wait(ep->com.wr_waitp); 3476 err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], 3477 ep->stid, &sin6->sin6_addr, 3478 sin6->sin6_port, 3479 ep->com.dev->rdev.lldi.rxq_ids[0]); 3480 if (!err) 3481 err = c4iw_wait_for_reply(&ep->com.dev->rdev, 3482 ep->com.wr_waitp, 3483 0, 0, __func__); 3484 else if (err > 0) 3485 err = net_xmit_errno(err); 3486 if (err) { 3487 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3488 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3489 pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", 3490 err, ep->stid, 3491 sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); 3492 } 3493 return err; 3494 } 3495 3496 static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) 3497 { 3498 int err; 3499 struct sockaddr_in *sin = (struct sockaddr_in *) 3500 &ep->com.local_addr; 3501 3502 if (dev->rdev.lldi.enable_fw_ofld_conn) { 3503 do { 3504 err = cxgb4_create_server_filter( 3505 ep->com.dev->rdev.lldi.ports[0], ep->stid, 3506 sin->sin_addr.s_addr, sin->sin_port, 0, 3507 ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); 3508 if (err == -EBUSY) { 3509 if (c4iw_fatal_error(&ep->com.dev->rdev)) { 3510 err = -EIO; 3511 break; 3512 } 3513 set_current_state(TASK_UNINTERRUPTIBLE); 3514 schedule_timeout(usecs_to_jiffies(100)); 3515 } 3516 } while (err == -EBUSY); 3517 } else { 3518 c4iw_init_wr_wait(ep->com.wr_waitp); 3519 err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], 3520 ep->stid, sin->sin_addr.s_addr, sin->sin_port, 3521 0, ep->com.dev->rdev.lldi.rxq_ids[0]); 3522 if (!err) 3523 err = c4iw_wait_for_reply(&ep->com.dev->rdev, 3524 ep->com.wr_waitp, 3525 0, 0, __func__); 3526 else if (err > 0) 3527 err = net_xmit_errno(err); 3528 } 3529 if (err) 3530 pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n" 3531 , err, ep->stid, 3532 &sin->sin_addr, ntohs(sin->sin_port)); 3533 return err; 3534 } 3535 3536 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) 3537 { 3538 int err = 0; 3539 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 3540 struct c4iw_listen_ep *ep; 3541 3542 might_sleep(); 3543 3544 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 3545 if (!ep) { 3546 pr_err("%s - cannot alloc ep\n", __func__); 3547 err = -ENOMEM; 3548 goto fail1; 3549 } 3550 skb_queue_head_init(&ep->com.ep_skb_list); 3551 pr_debug("ep %p\n", ep); 3552 ep->com.cm_id = cm_id; 3553 ref_cm_id(&ep->com); 3554 ep->com.dev = dev; 3555 ep->backlog = backlog; 3556 memcpy(&ep->com.local_addr, &cm_id->m_local_addr, 3557 sizeof(ep->com.local_addr)); 3558 3559 /* 3560 * Allocate a server TID. 3561 */ 3562 if (dev->rdev.lldi.enable_fw_ofld_conn && 3563 ep->com.local_addr.ss_family == AF_INET) 3564 ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, 3565 cm_id->m_local_addr.ss_family, ep); 3566 else 3567 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, 3568 cm_id->m_local_addr.ss_family, ep); 3569 3570 if (ep->stid == -1) { 3571 pr_err("%s - cannot alloc stid\n", __func__); 3572 err = -ENOMEM; 3573 goto fail2; 3574 } 3575 err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL); 3576 if (err) 3577 goto fail3; 3578 3579 state_set(&ep->com, LISTEN); 3580 if (ep->com.local_addr.ss_family == AF_INET) 3581 err = create_server4(dev, ep); 3582 else 3583 err = create_server6(dev, ep); 3584 if (!err) { 3585 cm_id->provider_data = ep; 3586 goto out; 3587 } 3588 xa_erase_irq(&ep->com.dev->stids, ep->stid); 3589 fail3: 3590 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3591 ep->com.local_addr.ss_family); 3592 fail2: 3593 deref_cm_id(&ep->com); 3594 c4iw_put_ep(&ep->com); 3595 fail1: 3596 out: 3597 return err; 3598 } 3599 3600 int c4iw_destroy_listen(struct iw_cm_id *cm_id) 3601 { 3602 int err; 3603 struct c4iw_listen_ep *ep = to_listen_ep(cm_id); 3604 3605 pr_debug("ep %p\n", ep); 3606 3607 might_sleep(); 3608 state_set(&ep->com, DEAD); 3609 if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn && 3610 ep->com.local_addr.ss_family == AF_INET) { 3611 err = cxgb4_remove_server_filter( 3612 ep->com.dev->rdev.lldi.ports[0], ep->stid, 3613 ep->com.dev->rdev.lldi.rxq_ids[0], false); 3614 } else { 3615 struct sockaddr_in6 *sin6; 3616 c4iw_init_wr_wait(ep->com.wr_waitp); 3617 err = cxgb4_remove_server( 3618 ep->com.dev->rdev.lldi.ports[0], ep->stid, 3619 ep->com.dev->rdev.lldi.rxq_ids[0], 3620 ep->com.local_addr.ss_family == AF_INET6); 3621 if (err) 3622 goto done; 3623 err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp, 3624 0, 0, __func__); 3625 sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; 3626 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3627 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3628 } 3629 xa_erase_irq(&ep->com.dev->stids, ep->stid); 3630 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3631 ep->com.local_addr.ss_family); 3632 done: 3633 deref_cm_id(&ep->com); 3634 c4iw_put_ep(&ep->com); 3635 return err; 3636 } 3637 3638 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) 3639 { 3640 int ret = 0; 3641 int close = 0; 3642 int fatal = 0; 3643 struct c4iw_rdev *rdev; 3644 3645 mutex_lock(&ep->com.mutex); 3646 3647 pr_debug("ep %p state %s, abrupt %d\n", ep, 3648 states[ep->com.state], abrupt); 3649 3650 /* 3651 * Ref the ep here in case we have fatal errors causing the 3652 * ep to be released and freed. 3653 */ 3654 c4iw_get_ep(&ep->com); 3655 3656 rdev = &ep->com.dev->rdev; 3657 if (c4iw_fatal_error(rdev)) { 3658 fatal = 1; 3659 close_complete_upcall(ep, -EIO); 3660 ep->com.state = DEAD; 3661 } 3662 switch (ep->com.state) { 3663 case MPA_REQ_WAIT: 3664 case MPA_REQ_SENT: 3665 case MPA_REQ_RCVD: 3666 case MPA_REP_SENT: 3667 case FPDU_MODE: 3668 case CONNECTING: 3669 close = 1; 3670 if (abrupt) 3671 ep->com.state = ABORTING; 3672 else { 3673 ep->com.state = CLOSING; 3674 3675 /* 3676 * if we close before we see the fw4_ack() then we fix 3677 * up the timer state since we're reusing it. 3678 */ 3679 if (ep->mpa_skb && 3680 test_bit(STOP_MPA_TIMER, &ep->com.flags)) { 3681 clear_bit(STOP_MPA_TIMER, &ep->com.flags); 3682 stop_ep_timer(ep); 3683 } 3684 start_ep_timer(ep); 3685 } 3686 set_bit(CLOSE_SENT, &ep->com.flags); 3687 break; 3688 case CLOSING: 3689 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { 3690 close = 1; 3691 if (abrupt) { 3692 (void)stop_ep_timer(ep); 3693 ep->com.state = ABORTING; 3694 } else 3695 ep->com.state = MORIBUND; 3696 } 3697 break; 3698 case MORIBUND: 3699 case ABORTING: 3700 case DEAD: 3701 pr_debug("ignoring disconnect ep %p state %u\n", 3702 ep, ep->com.state); 3703 break; 3704 default: 3705 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 3706 break; 3707 } 3708 3709 if (close) { 3710 if (abrupt) { 3711 set_bit(EP_DISC_ABORT, &ep->com.history); 3712 ret = send_abort(ep); 3713 } else { 3714 set_bit(EP_DISC_CLOSE, &ep->com.history); 3715 ret = send_halfclose(ep); 3716 } 3717 if (ret) { 3718 set_bit(EP_DISC_FAIL, &ep->com.history); 3719 if (!abrupt) { 3720 stop_ep_timer(ep); 3721 close_complete_upcall(ep, -EIO); 3722 } 3723 if (ep->com.qp) { 3724 struct c4iw_qp_attributes attrs; 3725 3726 attrs.next_state = C4IW_QP_STATE_ERROR; 3727 ret = c4iw_modify_qp(ep->com.qp->rhp, 3728 ep->com.qp, 3729 C4IW_QP_ATTR_NEXT_STATE, 3730 &attrs, 1); 3731 if (ret) 3732 pr_err("%s - qp <- error failed!\n", 3733 __func__); 3734 } 3735 fatal = 1; 3736 } 3737 } 3738 mutex_unlock(&ep->com.mutex); 3739 c4iw_put_ep(&ep->com); 3740 if (fatal) 3741 release_ep_resources(ep); 3742 return ret; 3743 } 3744 3745 static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, 3746 struct cpl_fw6_msg_ofld_connection_wr_rpl *req) 3747 { 3748 struct c4iw_ep *ep; 3749 int atid = be32_to_cpu(req->tid); 3750 3751 ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, 3752 (__force u32) req->tid); 3753 if (!ep) 3754 return; 3755 3756 switch (req->retval) { 3757 case FW_ENOMEM: 3758 set_bit(ACT_RETRY_NOMEM, &ep->com.history); 3759 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { 3760 send_fw_act_open_req(ep, atid); 3761 return; 3762 } 3763 fallthrough; 3764 case FW_EADDRINUSE: 3765 set_bit(ACT_RETRY_INUSE, &ep->com.history); 3766 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { 3767 send_fw_act_open_req(ep, atid); 3768 return; 3769 } 3770 break; 3771 default: 3772 pr_info("%s unexpected ofld conn wr retval %d\n", 3773 __func__, req->retval); 3774 break; 3775 } 3776 pr_err("active ofld_connect_wr failure %d atid %d\n", 3777 req->retval, atid); 3778 mutex_lock(&dev->rdev.stats.lock); 3779 dev->rdev.stats.act_ofld_conn_fails++; 3780 mutex_unlock(&dev->rdev.stats.lock); 3781 connect_reply_upcall(ep, status2errno(req->retval)); 3782 state_set(&ep->com, DEAD); 3783 if (ep->com.remote_addr.ss_family == AF_INET6) { 3784 struct sockaddr_in6 *sin6 = 3785 (struct sockaddr_in6 *)&ep->com.local_addr; 3786 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3787 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3788 } 3789 xa_erase_irq(&dev->atids, atid); 3790 cxgb4_free_atid(dev->rdev.lldi.tids, atid); 3791 dst_release(ep->dst); 3792 cxgb4_l2t_release(ep->l2t); 3793 c4iw_put_ep(&ep->com); 3794 } 3795 3796 static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, 3797 struct cpl_fw6_msg_ofld_connection_wr_rpl *req) 3798 { 3799 struct sk_buff *rpl_skb; 3800 struct cpl_pass_accept_req *cpl; 3801 int ret; 3802 3803 rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; 3804 if (req->retval) { 3805 pr_err("%s passive open failure %d\n", __func__, req->retval); 3806 mutex_lock(&dev->rdev.stats.lock); 3807 dev->rdev.stats.pas_ofld_conn_fails++; 3808 mutex_unlock(&dev->rdev.stats.lock); 3809 kfree_skb(rpl_skb); 3810 } else { 3811 cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); 3812 OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 3813 (__force u32) htonl( 3814 (__force u32) req->tid))); 3815 ret = pass_accept_req(dev, rpl_skb); 3816 if (!ret) 3817 kfree_skb(rpl_skb); 3818 } 3819 return; 3820 } 3821 3822 static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word) 3823 { 3824 u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]); 3825 u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]); 3826 u64 t; 3827 u32 shift = 32; 3828 3829 t = (thi << shift) | (tlo >> shift); 3830 3831 return t; 3832 } 3833 3834 static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift) 3835 { 3836 u32 v; 3837 u64 t = be64_to_cpu(tcb[(31 - word) / 2]); 3838 3839 if (word & 0x1) 3840 shift += 32; 3841 v = (t >> shift) & mask; 3842 return v; 3843 } 3844 3845 static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 3846 { 3847 struct cpl_get_tcb_rpl *rpl = cplhdr(skb); 3848 __be64 *tcb = (__be64 *)(rpl + 1); 3849 unsigned int tid = GET_TID(rpl); 3850 struct c4iw_ep *ep; 3851 u64 t_flags_64; 3852 u32 rx_pdu_out; 3853 3854 ep = get_ep_from_tid(dev, tid); 3855 if (!ep) 3856 return 0; 3857 /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to 3858 * determine if there's a rx PDU feedback event pending. 3859 * 3860 * If that bit is set, it means we'll need to re-read the TCB's 3861 * rq_start value. The final value is the one present in a TCB 3862 * with the TF_RX_PDU_OUT bit cleared. 3863 */ 3864 3865 t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W); 3866 rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S; 3867 3868 c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */ 3869 c4iw_put_ep(&ep->com); /* from read_tcb() */ 3870 3871 /* If TF_RX_PDU_OUT bit is set, re-read the TCB */ 3872 if (rx_pdu_out) { 3873 if (++ep->rx_pdu_out_cnt >= 2) { 3874 WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n"); 3875 goto cleanup; 3876 } 3877 read_tcb(ep); 3878 return 0; 3879 } 3880 3881 ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M, 3882 TCB_RQ_START_S); 3883 cleanup: 3884 pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx); 3885 3886 if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) 3887 finish_peer_abort(dev, ep); 3888 else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) 3889 send_abort_req(ep); 3890 else 3891 WARN_ONCE(1, "unexpected state!"); 3892 3893 return 0; 3894 } 3895 3896 static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) 3897 { 3898 struct cpl_fw6_msg *rpl = cplhdr(skb); 3899 struct cpl_fw6_msg_ofld_connection_wr_rpl *req; 3900 3901 switch (rpl->type) { 3902 case FW6_TYPE_CQE: 3903 c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); 3904 break; 3905 case FW6_TYPE_OFLD_CONNECTION_WR_RPL: 3906 req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; 3907 switch (req->t_state) { 3908 case TCP_SYN_SENT: 3909 active_ofld_conn_reply(dev, skb, req); 3910 break; 3911 case TCP_SYN_RECV: 3912 passive_ofld_conn_reply(dev, skb, req); 3913 break; 3914 default: 3915 pr_err("%s unexpected ofld conn wr state %d\n", 3916 __func__, req->t_state); 3917 break; 3918 } 3919 break; 3920 } 3921 return 0; 3922 } 3923 3924 static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) 3925 { 3926 __be32 l2info; 3927 __be16 hdr_len, vlantag, len; 3928 u16 eth_hdr_len; 3929 int tcp_hdr_len, ip_hdr_len; 3930 u8 intf; 3931 struct cpl_rx_pkt *cpl = cplhdr(skb); 3932 struct cpl_pass_accept_req *req; 3933 struct tcp_options_received tmp_opt; 3934 struct c4iw_dev *dev; 3935 enum chip_type type; 3936 3937 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); 3938 /* Store values from cpl_rx_pkt in temporary location. */ 3939 vlantag = cpl->vlan; 3940 len = cpl->len; 3941 l2info = cpl->l2info; 3942 hdr_len = cpl->hdr_len; 3943 intf = cpl->iff; 3944 3945 __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); 3946 3947 /* 3948 * We need to parse the TCP options from SYN packet. 3949 * to generate cpl_pass_accept_req. 3950 */ 3951 memset(&tmp_opt, 0, sizeof(tmp_opt)); 3952 tcp_clear_options(&tmp_opt); 3953 tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); 3954 3955 req = __skb_push(skb, sizeof(*req)); 3956 memset(req, 0, sizeof(*req)); 3957 req->l2info = cpu_to_be16(SYN_INTF_V(intf) | 3958 SYN_MAC_IDX_V(RX_MACIDX_G( 3959 be32_to_cpu(l2info))) | 3960 SYN_XACT_MATCH_F); 3961 type = dev->rdev.lldi.adapter_type; 3962 tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len)); 3963 ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len)); 3964 req->hdr_len = 3965 cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info)))); 3966 if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) { 3967 eth_hdr_len = is_t4(type) ? 3968 RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) : 3969 RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info)); 3970 req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) | 3971 IP_HDR_LEN_V(ip_hdr_len) | 3972 ETH_HDR_LEN_V(eth_hdr_len)); 3973 } else { /* T6 and later */ 3974 eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info)); 3975 req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) | 3976 T6_IP_HDR_LEN_V(ip_hdr_len) | 3977 T6_ETH_HDR_LEN_V(eth_hdr_len)); 3978 } 3979 req->vlan = vlantag; 3980 req->len = len; 3981 req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) | 3982 PASS_OPEN_TOS_V(tos)); 3983 req->tcpopt.mss = htons(tmp_opt.mss_clamp); 3984 if (tmp_opt.wscale_ok) 3985 req->tcpopt.wsf = tmp_opt.snd_wscale; 3986 req->tcpopt.tstamp = tmp_opt.saw_tstamp; 3987 if (tmp_opt.sack_ok) 3988 req->tcpopt.sack = 1; 3989 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); 3990 return; 3991 } 3992 3993 static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, 3994 __be32 laddr, __be16 lport, 3995 __be32 raddr, __be16 rport, 3996 u32 rcv_isn, u32 filter, u16 window, 3997 u32 rss_qid, u8 port_id) 3998 { 3999 struct sk_buff *req_skb; 4000 struct fw_ofld_connection_wr *req; 4001 struct cpl_pass_accept_req *cpl = cplhdr(skb); 4002 int ret; 4003 4004 req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); 4005 if (!req_skb) 4006 return; 4007 req = __skb_put_zero(req_skb, sizeof(*req)); 4008 req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F); 4009 req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); 4010 req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F); 4011 req->le.filter = (__force __be32) filter; 4012 req->le.lport = lport; 4013 req->le.pport = rport; 4014 req->le.u.ipv4.lip = laddr; 4015 req->le.u.ipv4.pip = raddr; 4016 req->tcb.rcv_nxt = htonl(rcv_isn + 1); 4017 req->tcb.rcv_adv = htons(window); 4018 req->tcb.t_state_to_astid = 4019 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) | 4020 FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) | 4021 FW_OFLD_CONNECTION_WR_ASTID_V( 4022 PASS_OPEN_TID_G(ntohl(cpl->tos_stid)))); 4023 4024 /* 4025 * We store the qid in opt2 which will be used by the firmware 4026 * to send us the wr response. 4027 */ 4028 req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid)); 4029 4030 /* 4031 * We initialize the MSS index in TCB to 0xF. 4032 * So that when driver sends cpl_pass_accept_rpl 4033 * TCB picks up the correct value. If this was 0 4034 * TP will ignore any value > 0 for MSS index. 4035 */ 4036 req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF)); 4037 req->cookie = (uintptr_t)skb; 4038 4039 set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); 4040 ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); 4041 if (ret < 0) { 4042 pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, 4043 ret); 4044 kfree_skb(skb); 4045 kfree_skb(req_skb); 4046 } 4047 } 4048 4049 /* 4050 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt 4051 * messages when a filter is being used instead of server to 4052 * redirect a syn packet. When packets hit filter they are redirected 4053 * to the offload queue and driver tries to establish the connection 4054 * using firmware work request. 4055 */ 4056 static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) 4057 { 4058 int stid; 4059 unsigned int filter; 4060 struct ethhdr *eh = NULL; 4061 struct vlan_ethhdr *vlan_eh = NULL; 4062 struct iphdr *iph; 4063 struct tcphdr *tcph; 4064 struct rss_header *rss = (void *)skb->data; 4065 struct cpl_rx_pkt *cpl = (void *)skb->data; 4066 struct cpl_pass_accept_req *req = (void *)(rss + 1); 4067 struct l2t_entry *e; 4068 struct dst_entry *dst; 4069 struct c4iw_ep *lep = NULL; 4070 u16 window; 4071 struct port_info *pi; 4072 struct net_device *pdev; 4073 u16 rss_qid, eth_hdr_len; 4074 int step; 4075 struct neighbour *neigh; 4076 4077 /* Drop all non-SYN packets */ 4078 if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F))) 4079 goto reject; 4080 4081 /* 4082 * Drop all packets which did not hit the filter. 4083 * Unlikely to happen. 4084 */ 4085 if (!(rss->filter_hit && rss->filter_tid)) 4086 goto reject; 4087 4088 /* 4089 * Calculate the server tid from filter hit index from cpl_rx_pkt. 4090 */ 4091 stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); 4092 4093 lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); 4094 if (!lep) { 4095 pr_warn("%s connect request on invalid stid %d\n", 4096 __func__, stid); 4097 goto reject; 4098 } 4099 4100 switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) { 4101 case CHELSIO_T4: 4102 eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); 4103 break; 4104 case CHELSIO_T5: 4105 eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); 4106 break; 4107 case CHELSIO_T6: 4108 eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); 4109 break; 4110 default: 4111 pr_err("T%d Chip is not supported\n", 4112 CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)); 4113 goto reject; 4114 } 4115 4116 if (eth_hdr_len == ETH_HLEN) { 4117 eh = (struct ethhdr *)(req + 1); 4118 iph = (struct iphdr *)(eh + 1); 4119 } else { 4120 vlan_eh = (struct vlan_ethhdr *)(req + 1); 4121 iph = (struct iphdr *)(vlan_eh + 1); 4122 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); 4123 } 4124 4125 if (iph->version != 0x4) 4126 goto reject; 4127 4128 tcph = (struct tcphdr *)(iph + 1); 4129 skb_set_network_header(skb, (void *)iph - (void *)rss); 4130 skb_set_transport_header(skb, (void *)tcph - (void *)rss); 4131 skb_get(skb); 4132 4133 pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n", 4134 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), 4135 ntohs(tcph->source), iph->tos); 4136 4137 dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, 4138 iph->daddr, iph->saddr, tcph->dest, 4139 tcph->source, iph->tos); 4140 if (!dst) { 4141 pr_err("%s - failed to find dst entry!\n", __func__); 4142 goto reject; 4143 } 4144 neigh = dst_neigh_lookup_skb(dst, skb); 4145 4146 if (!neigh) { 4147 pr_err("%s - failed to allocate neigh!\n", __func__); 4148 goto free_dst; 4149 } 4150 4151 if (neigh->dev->flags & IFF_LOOPBACK) { 4152 pdev = ip_dev_find(&init_net, iph->daddr); 4153 if (!pdev) { 4154 pr_err("%s - failed to find device!\n", __func__); 4155 goto free_dst; 4156 } 4157 e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, 4158 pdev, 0); 4159 pi = (struct port_info *)netdev_priv(pdev); 4160 dev_put(pdev); 4161 } else { 4162 pdev = get_real_dev(neigh->dev); 4163 e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, 4164 pdev, 0); 4165 pi = (struct port_info *)netdev_priv(pdev); 4166 } 4167 neigh_release(neigh); 4168 if (!e) { 4169 pr_err("%s - failed to allocate l2t entry!\n", 4170 __func__); 4171 goto free_dst; 4172 } 4173 4174 step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; 4175 rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; 4176 window = (__force u16) htons((__force u16)tcph->window); 4177 4178 /* Calcuate filter portion for LE region. */ 4179 filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple( 4180 dev->rdev.lldi.ports[0], 4181 e)); 4182 4183 /* 4184 * Synthesize the cpl_pass_accept_req. We have everything except the 4185 * TID. Once firmware sends a reply with TID we update the TID field 4186 * in cpl and pass it through the regular cpl_pass_accept_req path. 4187 */ 4188 build_cpl_pass_accept_req(skb, stid, iph->tos); 4189 send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, 4190 tcph->source, ntohl(tcph->seq), filter, window, 4191 rss_qid, pi->port_id); 4192 cxgb4_l2t_release(e); 4193 free_dst: 4194 dst_release(dst); 4195 reject: 4196 if (lep) 4197 c4iw_put_ep(&lep->com); 4198 return 0; 4199 } 4200 4201 /* 4202 * These are the real handlers that are called from a 4203 * work queue. 4204 */ 4205 static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { 4206 [CPL_ACT_ESTABLISH] = act_establish, 4207 [CPL_ACT_OPEN_RPL] = act_open_rpl, 4208 [CPL_RX_DATA] = rx_data, 4209 [CPL_ABORT_RPL_RSS] = abort_rpl, 4210 [CPL_ABORT_RPL] = abort_rpl, 4211 [CPL_PASS_OPEN_RPL] = pass_open_rpl, 4212 [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, 4213 [CPL_PASS_ACCEPT_REQ] = pass_accept_req, 4214 [CPL_PASS_ESTABLISH] = pass_establish, 4215 [CPL_PEER_CLOSE] = peer_close, 4216 [CPL_ABORT_REQ_RSS] = peer_abort, 4217 [CPL_CLOSE_CON_RPL] = close_con_rpl, 4218 [CPL_RDMA_TERMINATE] = terminate, 4219 [CPL_FW4_ACK] = fw4_ack, 4220 [CPL_GET_TCB_RPL] = read_tcb_rpl, 4221 [CPL_FW6_MSG] = deferred_fw6_msg, 4222 [CPL_RX_PKT] = rx_pkt, 4223 [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe, 4224 [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe 4225 }; 4226 4227 static void process_timeout(struct c4iw_ep *ep) 4228 { 4229 struct c4iw_qp_attributes attrs; 4230 int abort = 1; 4231 4232 mutex_lock(&ep->com.mutex); 4233 pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state); 4234 set_bit(TIMEDOUT, &ep->com.history); 4235 switch (ep->com.state) { 4236 case MPA_REQ_SENT: 4237 connect_reply_upcall(ep, -ETIMEDOUT); 4238 break; 4239 case MPA_REQ_WAIT: 4240 case MPA_REQ_RCVD: 4241 case MPA_REP_SENT: 4242 case FPDU_MODE: 4243 break; 4244 case CLOSING: 4245 case MORIBUND: 4246 if (ep->com.cm_id && ep->com.qp) { 4247 attrs.next_state = C4IW_QP_STATE_ERROR; 4248 c4iw_modify_qp(ep->com.qp->rhp, 4249 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 4250 &attrs, 1); 4251 } 4252 close_complete_upcall(ep, -ETIMEDOUT); 4253 break; 4254 case ABORTING: 4255 case DEAD: 4256 4257 /* 4258 * These states are expected if the ep timed out at the same 4259 * time as another thread was calling stop_ep_timer(). 4260 * So we silently do nothing for these states. 4261 */ 4262 abort = 0; 4263 break; 4264 default: 4265 WARN(1, "%s unexpected state ep %p tid %u state %u\n", 4266 __func__, ep, ep->hwtid, ep->com.state); 4267 abort = 0; 4268 } 4269 mutex_unlock(&ep->com.mutex); 4270 if (abort) 4271 c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 4272 c4iw_put_ep(&ep->com); 4273 } 4274 4275 static void process_timedout_eps(void) 4276 { 4277 struct c4iw_ep *ep; 4278 4279 spin_lock_irq(&timeout_lock); 4280 while (!list_empty(&timeout_list)) { 4281 struct list_head *tmp; 4282 4283 tmp = timeout_list.next; 4284 list_del(tmp); 4285 tmp->next = NULL; 4286 tmp->prev = NULL; 4287 spin_unlock_irq(&timeout_lock); 4288 ep = list_entry(tmp, struct c4iw_ep, entry); 4289 process_timeout(ep); 4290 spin_lock_irq(&timeout_lock); 4291 } 4292 spin_unlock_irq(&timeout_lock); 4293 } 4294 4295 static void process_work(struct work_struct *work) 4296 { 4297 struct sk_buff *skb = NULL; 4298 struct c4iw_dev *dev; 4299 struct cpl_act_establish *rpl; 4300 unsigned int opcode; 4301 int ret; 4302 4303 process_timedout_eps(); 4304 while ((skb = skb_dequeue(&rxq))) { 4305 rpl = cplhdr(skb); 4306 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); 4307 opcode = rpl->ot.opcode; 4308 4309 if (opcode >= ARRAY_SIZE(work_handlers) || 4310 !work_handlers[opcode]) { 4311 pr_err("No handler for opcode 0x%x.\n", opcode); 4312 kfree_skb(skb); 4313 } else { 4314 ret = work_handlers[opcode](dev, skb); 4315 if (!ret) 4316 kfree_skb(skb); 4317 } 4318 process_timedout_eps(); 4319 } 4320 } 4321 4322 static DECLARE_WORK(skb_work, process_work); 4323 4324 static void ep_timeout(struct timer_list *t) 4325 { 4326 struct c4iw_ep *ep = from_timer(ep, t, timer); 4327 int kickit = 0; 4328 4329 spin_lock(&timeout_lock); 4330 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 4331 /* 4332 * Only insert if it is not already on the list. 4333 */ 4334 if (!ep->entry.next) { 4335 list_add_tail(&ep->entry, &timeout_list); 4336 kickit = 1; 4337 } 4338 } 4339 spin_unlock(&timeout_lock); 4340 if (kickit) 4341 queue_work(workq, &skb_work); 4342 } 4343 4344 /* 4345 * All the CM events are handled on a work queue to have a safe context. 4346 */ 4347 static int sched(struct c4iw_dev *dev, struct sk_buff *skb) 4348 { 4349 4350 /* 4351 * Save dev in the skb->cb area. 4352 */ 4353 *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev; 4354 4355 /* 4356 * Queue the skb and schedule the worker thread. 4357 */ 4358 skb_queue_tail(&rxq, skb); 4359 queue_work(workq, &skb_work); 4360 return 0; 4361 } 4362 4363 static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 4364 { 4365 struct cpl_set_tcb_rpl *rpl = cplhdr(skb); 4366 4367 if (rpl->status != CPL_ERR_NONE) { 4368 pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", 4369 rpl->status, GET_TID(rpl)); 4370 } 4371 kfree_skb(skb); 4372 return 0; 4373 } 4374 4375 static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) 4376 { 4377 struct cpl_fw6_msg *rpl = cplhdr(skb); 4378 struct c4iw_wr_wait *wr_waitp; 4379 int ret; 4380 4381 pr_debug("type %u\n", rpl->type); 4382 4383 switch (rpl->type) { 4384 case FW6_TYPE_WR_RPL: 4385 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); 4386 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; 4387 pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret); 4388 if (wr_waitp) 4389 c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0); 4390 kfree_skb(skb); 4391 break; 4392 case FW6_TYPE_CQE: 4393 case FW6_TYPE_OFLD_CONNECTION_WR_RPL: 4394 sched(dev, skb); 4395 break; 4396 default: 4397 pr_err("%s unexpected fw6 msg type %u\n", 4398 __func__, rpl->type); 4399 kfree_skb(skb); 4400 break; 4401 } 4402 return 0; 4403 } 4404 4405 static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) 4406 { 4407 struct cpl_abort_req_rss *req = cplhdr(skb); 4408 struct c4iw_ep *ep; 4409 unsigned int tid = GET_TID(req); 4410 4411 ep = get_ep_from_tid(dev, tid); 4412 /* This EP will be dereferenced in peer_abort() */ 4413 if (!ep) { 4414 pr_warn("Abort on non-existent endpoint, tid %d\n", tid); 4415 kfree_skb(skb); 4416 return 0; 4417 } 4418 if (cxgb_is_neg_adv(req->status)) { 4419 pr_debug("Negative advice on abort- tid %u status %d (%s)\n", 4420 ep->hwtid, req->status, 4421 neg_adv_str(req->status)); 4422 goto out; 4423 } 4424 pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); 4425 4426 c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); 4427 out: 4428 sched(dev, skb); 4429 return 0; 4430 } 4431 4432 /* 4433 * Most upcalls from the T4 Core go to sched() to 4434 * schedule the processing on a work queue. 4435 */ 4436 c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { 4437 [CPL_ACT_ESTABLISH] = sched, 4438 [CPL_ACT_OPEN_RPL] = sched, 4439 [CPL_RX_DATA] = sched, 4440 [CPL_ABORT_RPL_RSS] = sched, 4441 [CPL_ABORT_RPL] = sched, 4442 [CPL_PASS_OPEN_RPL] = sched, 4443 [CPL_CLOSE_LISTSRV_RPL] = sched, 4444 [CPL_PASS_ACCEPT_REQ] = sched, 4445 [CPL_PASS_ESTABLISH] = sched, 4446 [CPL_PEER_CLOSE] = sched, 4447 [CPL_CLOSE_CON_RPL] = sched, 4448 [CPL_ABORT_REQ_RSS] = peer_abort_intr, 4449 [CPL_RDMA_TERMINATE] = sched, 4450 [CPL_FW4_ACK] = sched, 4451 [CPL_SET_TCB_RPL] = set_tcb_rpl, 4452 [CPL_GET_TCB_RPL] = sched, 4453 [CPL_FW6_MSG] = fw6_msg, 4454 [CPL_RX_PKT] = sched 4455 }; 4456 4457 int __init c4iw_cm_init(void) 4458 { 4459 skb_queue_head_init(&rxq); 4460 4461 workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM); 4462 if (!workq) 4463 return -ENOMEM; 4464 4465 return 0; 4466 } 4467 4468 void c4iw_cm_term(void) 4469 { 4470 WARN_ON(!list_empty(&timeout_list)); 4471 destroy_workqueue(workq); 4472 } 4473