1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2018 Chelsio Communications, Inc. 4 * 5 * Written by: Atul Gupta (atul.gupta@chelsio.com) 6 */ 7 8 #include <linux/module.h> 9 #include <linux/list.h> 10 #include <linux/workqueue.h> 11 #include <linux/skbuff.h> 12 #include <linux/timer.h> 13 #include <linux/notifier.h> 14 #include <linux/inetdevice.h> 15 #include <linux/ip.h> 16 #include <linux/tcp.h> 17 #include <linux/sched/signal.h> 18 #include <net/tcp.h> 19 #include <net/busy_poll.h> 20 #include <crypto/aes.h> 21 22 #include "chtls.h" 23 #include "chtls_cm.h" 24 25 static bool is_tls_tx(struct chtls_sock *csk) 26 { 27 return csk->tlshws.txkey >= 0; 28 } 29 30 static bool is_tls_rx(struct chtls_sock *csk) 31 { 32 return csk->tlshws.rxkey >= 0; 33 } 34 35 static int data_sgl_len(const struct sk_buff *skb) 36 { 37 unsigned int cnt; 38 39 cnt = skb_shinfo(skb)->nr_frags; 40 return sgl_len(cnt) * 8; 41 } 42 43 static int nos_ivs(struct sock *sk, unsigned int size) 44 { 45 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 46 47 return DIV_ROUND_UP(size, csk->tlshws.mfs); 48 } 49 50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb) 51 { 52 int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE; 53 int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb); 54 55 if ((hlen + KEY_ON_MEM_SZ + ivs_size) < 56 MAX_IMM_OFLD_TX_DATA_WR_LEN) { 57 ULP_SKB_CB(skb)->ulp.tls.iv = 1; 58 return 1; 59 } 60 ULP_SKB_CB(skb)->ulp.tls.iv = 0; 61 return 0; 62 } 63 64 static int max_ivs_size(struct sock *sk, int size) 65 { 66 return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE; 67 } 68 69 static int ivs_size(struct sock *sk, const struct sk_buff *skb) 70 { 71 return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) * 72 CIPHER_BLOCK_SIZE) : 0; 73 } 74 75 static int flowc_wr_credits(int nparams, int *flowclenp) 76 { 77 int flowclen16, flowclen; 78 79 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); 80 flowclen16 = DIV_ROUND_UP(flowclen, 16); 81 flowclen = flowclen16 * 16; 82 83 if (flowclenp) 84 *flowclenp = flowclen; 85 86 return flowclen16; 87 } 88 89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk, 90 struct fw_flowc_wr *flowc, 91 int flowclen) 92 { 93 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 94 struct sk_buff *skb; 95 96 skb = alloc_skb(flowclen, GFP_ATOMIC); 97 if (!skb) 98 return NULL; 99 100 __skb_put_data(skb, flowc, flowclen); 101 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA); 102 103 return skb; 104 } 105 106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc, 107 int flowclen) 108 { 109 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 110 struct tcp_sock *tp = tcp_sk(sk); 111 struct sk_buff *skb; 112 int flowclen16; 113 int ret; 114 115 flowclen16 = flowclen / 16; 116 117 if (csk_flag(sk, CSK_TX_DATA_SENT)) { 118 skb = create_flowc_wr_skb(sk, flowc, flowclen); 119 if (!skb) 120 return -ENOMEM; 121 122 skb_entail(sk, skb, 123 ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND); 124 return 0; 125 } 126 127 ret = cxgb4_immdata_send(csk->egress_dev, 128 csk->txq_idx, 129 flowc, flowclen); 130 if (!ret) 131 return flowclen16; 132 skb = create_flowc_wr_skb(sk, flowc, flowclen); 133 if (!skb) 134 return -ENOMEM; 135 send_or_defer(sk, tp, skb, 0); 136 return flowclen16; 137 } 138 139 static u8 tcp_state_to_flowc_state(u8 state) 140 { 141 switch (state) { 142 case TCP_ESTABLISHED: 143 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 144 case TCP_CLOSE_WAIT: 145 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT; 146 case TCP_FIN_WAIT1: 147 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1; 148 case TCP_CLOSING: 149 return FW_FLOWC_MNEM_TCPSTATE_CLOSING; 150 case TCP_LAST_ACK: 151 return FW_FLOWC_MNEM_TCPSTATE_LASTACK; 152 case TCP_FIN_WAIT2: 153 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2; 154 } 155 156 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 157 } 158 159 int send_tx_flowc_wr(struct sock *sk, int compl, 160 u32 snd_nxt, u32 rcv_nxt) 161 { 162 struct flowc_packed { 163 struct fw_flowc_wr fc; 164 struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX]; 165 } __packed sflowc; 166 int nparams, paramidx, flowclen16, flowclen; 167 struct fw_flowc_wr *flowc; 168 struct chtls_sock *csk; 169 struct tcp_sock *tp; 170 171 csk = rcu_dereference_sk_user_data(sk); 172 tp = tcp_sk(sk); 173 memset(&sflowc, 0, sizeof(sflowc)); 174 flowc = &sflowc.fc; 175 176 #define FLOWC_PARAM(__m, __v) \ 177 do { \ 178 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ 179 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \ 180 paramidx++; \ 181 } while (0) 182 183 paramidx = 0; 184 185 FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf)); 186 FLOWC_PARAM(CH, csk->tx_chan); 187 FLOWC_PARAM(PORT, csk->tx_chan); 188 FLOWC_PARAM(IQID, csk->rss_qid); 189 FLOWC_PARAM(SNDNXT, tp->snd_nxt); 190 FLOWC_PARAM(RCVNXT, tp->rcv_nxt); 191 FLOWC_PARAM(SNDBUF, csk->sndbuf); 192 FLOWC_PARAM(MSS, tp->mss_cache); 193 FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state)); 194 195 if (SND_WSCALE(tp)) 196 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp)); 197 198 if (csk->ulp_mode == ULP_MODE_TLS) 199 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS); 200 201 if (csk->tlshws.fcplenmax) 202 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax); 203 204 nparams = paramidx; 205 #undef FLOWC_PARAM 206 207 flowclen16 = flowc_wr_credits(nparams, &flowclen); 208 flowc->op_to_nparams = 209 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 210 FW_WR_COMPL_V(compl) | 211 FW_FLOWC_WR_NPARAMS_V(nparams)); 212 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | 213 FW_WR_FLOWID_V(csk->tid)); 214 215 return send_flowc_wr(sk, flowc, flowclen); 216 } 217 218 /* Copy IVs to WR */ 219 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb) 220 221 { 222 struct chtls_sock *csk; 223 unsigned char *iv_loc; 224 struct chtls_hws *hws; 225 unsigned char *ivs; 226 u16 number_of_ivs; 227 struct page *page; 228 int err = 0; 229 230 csk = rcu_dereference_sk_user_data(sk); 231 hws = &csk->tlshws; 232 number_of_ivs = nos_ivs(sk, skb->len); 233 234 if (number_of_ivs > MAX_IVS_PAGE) { 235 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs); 236 return -ENOMEM; 237 } 238 239 /* generate the IVs */ 240 ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC); 241 if (!ivs) 242 return -ENOMEM; 243 get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 244 245 if (skb_ulp_tls_iv_imm(skb)) { 246 /* send the IVs as immediate data in the WR */ 247 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs * 248 CIPHER_BLOCK_SIZE); 249 if (iv_loc) 250 memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 251 252 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE; 253 } else { 254 /* Send the IVs as sgls */ 255 /* Already accounted IV DSGL for credits */ 256 skb_shinfo(skb)->nr_frags--; 257 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0); 258 if (!page) { 259 pr_info("%s : Page allocation for IVs failed\n", 260 __func__); 261 err = -ENOMEM; 262 goto out; 263 } 264 memcpy(page_address(page), ivs, number_of_ivs * 265 CIPHER_BLOCK_SIZE); 266 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0, 267 number_of_ivs * CIPHER_BLOCK_SIZE); 268 hws->ivsize = 0; 269 } 270 out: 271 kfree(ivs); 272 return err; 273 } 274 275 /* Copy Key to WR */ 276 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb) 277 { 278 struct ulptx_sc_memrd *sc_memrd; 279 struct chtls_sock *csk; 280 struct chtls_dev *cdev; 281 struct ulptx_idata *sc; 282 struct chtls_hws *hws; 283 u32 immdlen; 284 int kaddr; 285 286 csk = rcu_dereference_sk_user_data(sk); 287 hws = &csk->tlshws; 288 cdev = csk->cdev; 289 290 immdlen = sizeof(*sc) + sizeof(*sc_memrd); 291 kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey); 292 sc = (struct ulptx_idata *)__skb_push(skb, immdlen); 293 if (sc) { 294 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); 295 sc->len = htonl(0); 296 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1); 297 sc_memrd->cmd_to_len = 298 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) | 299 ULP_TX_SC_MORE_V(1) | 300 ULPTX_LEN16_V(hws->keylen >> 4)); 301 sc_memrd->addr = htonl(kaddr); 302 } 303 } 304 305 static u64 tlstx_incr_seqnum(struct chtls_hws *hws) 306 { 307 return hws->tx_seq_no++; 308 } 309 310 static bool is_sg_request(const struct sk_buff *skb) 311 { 312 return skb->peeked || 313 (skb->len > MAX_IMM_ULPTX_WR_LEN); 314 } 315 316 /* 317 * Returns true if an sk_buff carries urgent data. 318 */ 319 static bool skb_urgent(struct sk_buff *skb) 320 { 321 return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG; 322 } 323 324 /* TLS content type for CPL SFO */ 325 static unsigned char tls_content_type(unsigned char content_type) 326 { 327 switch (content_type) { 328 case TLS_HDR_TYPE_CCS: 329 return CPL_TX_TLS_SFO_TYPE_CCS; 330 case TLS_HDR_TYPE_ALERT: 331 return CPL_TX_TLS_SFO_TYPE_ALERT; 332 case TLS_HDR_TYPE_HANDSHAKE: 333 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE; 334 case TLS_HDR_TYPE_HEARTBEAT: 335 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT; 336 } 337 return CPL_TX_TLS_SFO_TYPE_DATA; 338 } 339 340 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb, 341 int dlen, int tls_immd, u32 credits, 342 int expn, int pdus) 343 { 344 struct fw_tlstx_data_wr *req_wr; 345 struct cpl_tx_tls_sfo *req_cpl; 346 unsigned int wr_ulp_mode_force; 347 struct tls_scmd *updated_scmd; 348 unsigned char data_type; 349 struct chtls_sock *csk; 350 struct net_device *dev; 351 struct chtls_hws *hws; 352 struct tls_scmd *scmd; 353 struct adapter *adap; 354 unsigned char *req; 355 int immd_len; 356 int iv_imm; 357 int len; 358 359 csk = rcu_dereference_sk_user_data(sk); 360 iv_imm = skb_ulp_tls_iv_imm(skb); 361 dev = csk->egress_dev; 362 adap = netdev2adap(dev); 363 hws = &csk->tlshws; 364 scmd = &hws->scmd; 365 len = dlen + expn; 366 367 dlen = (dlen < hws->mfs) ? dlen : hws->mfs; 368 atomic_inc(&adap->chcr_stats.tls_pdu_tx); 369 370 updated_scmd = scmd; 371 updated_scmd->seqno_numivs &= 0xffffff80; 372 updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus); 373 hws->scmd = *updated_scmd; 374 375 req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo)); 376 req_cpl = (struct cpl_tx_tls_sfo *)req; 377 req = (unsigned char *)__skb_push(skb, (sizeof(struct 378 fw_tlstx_data_wr))); 379 380 req_wr = (struct fw_tlstx_data_wr *)req; 381 immd_len = (tls_immd ? dlen : 0); 382 req_wr->op_to_immdlen = 383 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) | 384 FW_TLSTX_DATA_WR_COMPL_V(1) | 385 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len)); 386 req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) | 387 FW_TLSTX_DATA_WR_LEN16_V(credits)); 388 wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS); 389 390 if (is_sg_request(skb)) 391 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 392 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 393 FW_OFLD_TX_DATA_WR_SHOVE_F); 394 395 req_wr->lsodisable_to_flags = 396 htonl(TX_ULP_MODE_V(ULP_MODE_TLS) | 397 TX_URG_V(skb_urgent(skb)) | 398 T6_TX_FORCE_F | wr_ulp_mode_force | 399 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 400 skb_queue_empty(&csk->txq))); 401 402 req_wr->ctxloc_to_exp = 403 htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) | 404 FW_TLSTX_DATA_WR_EXP_V(expn) | 405 FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) | 406 FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) | 407 FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4)); 408 409 /* Fill in the length */ 410 req_wr->plen = htonl(len); 411 req_wr->mfs = htons(hws->mfs); 412 req_wr->adjustedplen_pkd = 413 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen)); 414 req_wr->expinplenmax_pkd = 415 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion)); 416 req_wr->pdusinplenmax_pkd = 417 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus); 418 req_wr->r10 = 0; 419 420 data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type); 421 req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) | 422 CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) | 423 CPL_TX_TLS_SFO_CPL_LEN_V(2) | 424 CPL_TX_TLS_SFO_SEG_LEN_V(dlen)); 425 req_cpl->pld_len = htonl(len - expn); 426 427 req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V 428 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ? 429 TLS_HDR_TYPE_HEARTBEAT : 0) | 430 CPL_TX_TLS_SFO_PROTOVER_V(0)); 431 432 /* create the s-command */ 433 req_cpl->r1_lo = 0; 434 req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs); 435 req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen); 436 req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws)); 437 } 438 439 /* 440 * Calculate the TLS data expansion size 441 */ 442 static int chtls_expansion_size(struct sock *sk, int data_len, 443 int fullpdu, 444 unsigned short *pducnt) 445 { 446 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 447 struct chtls_hws *hws = &csk->tlshws; 448 struct tls_scmd *scmd = &hws->scmd; 449 int fragsize = hws->mfs; 450 int expnsize = 0; 451 int fragleft; 452 int fragcnt; 453 int expppdu; 454 455 if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) == 456 SCMD_CIPH_MODE_AES_GCM) { 457 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE + 458 TLS_HEADER_LENGTH; 459 460 if (fullpdu) { 461 *pducnt = data_len / (expppdu + fragsize); 462 if (*pducnt > 32) 463 *pducnt = 32; 464 else if (!*pducnt) 465 *pducnt = 1; 466 expnsize = (*pducnt) * expppdu; 467 return expnsize; 468 } 469 fragcnt = (data_len / fragsize); 470 expnsize = fragcnt * expppdu; 471 fragleft = data_len % fragsize; 472 if (fragleft > 0) 473 expnsize += expppdu; 474 } 475 return expnsize; 476 } 477 478 /* WR with IV, KEY and CPL SFO added */ 479 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb, 480 int tls_tx_imm, int tls_len, u32 credits) 481 { 482 unsigned short pdus_per_ulp = 0; 483 struct chtls_sock *csk; 484 struct chtls_hws *hws; 485 int expn_sz; 486 int pdus; 487 488 csk = rcu_dereference_sk_user_data(sk); 489 hws = &csk->tlshws; 490 pdus = DIV_ROUND_UP(tls_len, hws->mfs); 491 expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL); 492 if (!hws->compute) { 493 hws->expansion = chtls_expansion_size(sk, 494 hws->fcplenmax, 495 1, &pdus_per_ulp); 496 hws->pdus = pdus_per_ulp; 497 hws->adjustlen = hws->pdus * 498 ((hws->expansion / hws->pdus) + hws->mfs); 499 hws->compute = 1; 500 } 501 if (tls_copy_ivs(sk, skb)) 502 return; 503 tls_copy_tx_key(sk, skb); 504 tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus); 505 hws->tx_seq_no += (pdus - 1); 506 } 507 508 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, 509 unsigned int immdlen, int len, 510 u32 credits, u32 compl) 511 { 512 struct fw_ofld_tx_data_wr *req; 513 unsigned int wr_ulp_mode_force; 514 struct chtls_sock *csk; 515 unsigned int opcode; 516 517 csk = rcu_dereference_sk_user_data(sk); 518 opcode = FW_OFLD_TX_DATA_WR; 519 520 req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req)); 521 req->op_to_immdlen = htonl(WR_OP_V(opcode) | 522 FW_WR_COMPL_V(compl) | 523 FW_WR_IMMDLEN_V(immdlen)); 524 req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) | 525 FW_WR_LEN16_V(credits)); 526 527 wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode); 528 if (is_sg_request(skb)) 529 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 530 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 531 FW_OFLD_TX_DATA_WR_SHOVE_F); 532 533 req->tunnel_to_proxy = htonl(wr_ulp_mode_force | 534 TX_URG_V(skb_urgent(skb)) | 535 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 536 skb_queue_empty(&csk->txq))); 537 req->plen = htonl(len); 538 } 539 540 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb, 541 bool size) 542 { 543 int wr_size; 544 545 wr_size = TLS_WR_CPL_LEN; 546 wr_size += KEY_ON_MEM_SZ; 547 wr_size += ivs_size(csk->sk, skb); 548 549 if (size) 550 return wr_size; 551 552 /* frags counted for IV dsgl */ 553 if (!skb_ulp_tls_iv_imm(skb)) 554 skb_shinfo(skb)->nr_frags++; 555 556 return wr_size; 557 } 558 559 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb) 560 { 561 int length = skb->len; 562 563 if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN) 564 return false; 565 566 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 567 /* Check TLS header len for Immediate */ 568 if (csk->ulp_mode == ULP_MODE_TLS && 569 skb_ulp_tls_inline(skb)) 570 length += chtls_wr_size(csk, skb, true); 571 else 572 length += sizeof(struct fw_ofld_tx_data_wr); 573 574 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; 575 } 576 return true; 577 } 578 579 static unsigned int calc_tx_flits(const struct sk_buff *skb, 580 unsigned int immdlen) 581 { 582 unsigned int flits, cnt; 583 584 flits = immdlen / 8; /* headers */ 585 cnt = skb_shinfo(skb)->nr_frags; 586 if (skb_tail_pointer(skb) != skb_transport_header(skb)) 587 cnt++; 588 return flits + sgl_len(cnt); 589 } 590 591 static void arp_failure_discard(void *handle, struct sk_buff *skb) 592 { 593 kfree_skb(skb); 594 } 595 596 int chtls_push_frames(struct chtls_sock *csk, int comp) 597 { 598 struct chtls_hws *hws = &csk->tlshws; 599 struct tcp_sock *tp; 600 struct sk_buff *skb; 601 int total_size = 0; 602 struct sock *sk; 603 int wr_size; 604 605 wr_size = sizeof(struct fw_ofld_tx_data_wr); 606 sk = csk->sk; 607 tp = tcp_sk(sk); 608 609 if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) 610 return 0; 611 612 if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) 613 return 0; 614 615 while (csk->wr_credits && (skb = skb_peek(&csk->txq)) && 616 (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) || 617 skb_queue_len(&csk->txq) > 1)) { 618 unsigned int credit_len = skb->len; 619 unsigned int credits_needed; 620 unsigned int completion = 0; 621 int tls_len = skb->len;/* TLS data len before IV/key */ 622 unsigned int immdlen; 623 int len = skb->len; /* length [ulp bytes] inserted by hw */ 624 int flowclen16 = 0; 625 int tls_tx_imm = 0; 626 627 immdlen = skb->len; 628 if (!is_ofld_imm(csk, skb)) { 629 immdlen = skb_transport_offset(skb); 630 if (skb_ulp_tls_inline(skb)) 631 wr_size = chtls_wr_size(csk, skb, false); 632 credit_len = 8 * calc_tx_flits(skb, immdlen); 633 } else { 634 if (skb_ulp_tls_inline(skb)) { 635 wr_size = chtls_wr_size(csk, skb, false); 636 tls_tx_imm = 1; 637 } 638 } 639 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) 640 credit_len += wr_size; 641 credits_needed = DIV_ROUND_UP(credit_len, 16); 642 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) { 643 flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt, 644 tp->rcv_nxt); 645 if (flowclen16 <= 0) 646 break; 647 csk->wr_credits -= flowclen16; 648 csk->wr_unacked += flowclen16; 649 csk->wr_nondata += flowclen16; 650 csk_set_flag(csk, CSK_TX_DATA_SENT); 651 } 652 653 if (csk->wr_credits < credits_needed) { 654 if (skb_ulp_tls_inline(skb) && 655 !skb_ulp_tls_iv_imm(skb)) 656 skb_shinfo(skb)->nr_frags--; 657 break; 658 } 659 660 __skb_unlink(skb, &csk->txq); 661 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | 662 CPL_PRIORITY_DATA); 663 if (hws->ofld) 664 hws->txqid = (skb->queue_mapping >> 1); 665 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata); 666 csk->wr_credits -= credits_needed; 667 csk->wr_unacked += credits_needed; 668 csk->wr_nondata = 0; 669 enqueue_wr(csk, skb); 670 671 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 672 if ((comp && csk->wr_unacked == credits_needed) || 673 (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) || 674 csk->wr_unacked >= csk->wr_max_credits / 2) { 675 completion = 1; 676 csk->wr_unacked = 0; 677 } 678 if (skb_ulp_tls_inline(skb)) 679 make_tlstx_data_wr(sk, skb, tls_tx_imm, 680 tls_len, credits_needed); 681 else 682 make_tx_data_wr(sk, skb, immdlen, len, 683 credits_needed, completion); 684 tp->snd_nxt += len; 685 tp->lsndtime = tcp_jiffies32; 686 if (completion) 687 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR; 688 } else { 689 struct cpl_close_con_req *req = cplhdr(skb); 690 unsigned int cmd = CPL_OPCODE_G(ntohl 691 (OPCODE_TID(req))); 692 693 if (cmd == CPL_CLOSE_CON_REQ) 694 csk_set_flag(csk, 695 CSK_CLOSE_CON_REQUESTED); 696 697 if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) && 698 (csk->wr_unacked >= csk->wr_max_credits / 2)) { 699 req->wr.wr_hi |= htonl(FW_WR_COMPL_F); 700 csk->wr_unacked = 0; 701 } 702 } 703 total_size += skb->truesize; 704 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER) 705 csk_set_flag(csk, CSK_TX_WAIT_IDLE); 706 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 707 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry); 708 } 709 sk->sk_wmem_queued -= total_size; 710 return total_size; 711 } 712 713 static void mark_urg(struct tcp_sock *tp, int flags, 714 struct sk_buff *skb) 715 { 716 if (unlikely(flags & MSG_OOB)) { 717 tp->snd_up = tp->write_seq; 718 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG | 719 ULPCB_FLAG_BARRIER | 720 ULPCB_FLAG_NO_APPEND | 721 ULPCB_FLAG_NEED_HDR; 722 } 723 } 724 725 /* 726 * Returns true if a connection should send more data to TCP engine 727 */ 728 static bool should_push(struct sock *sk) 729 { 730 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 731 struct chtls_dev *cdev = csk->cdev; 732 struct tcp_sock *tp = tcp_sk(sk); 733 734 /* 735 * If we've released our offload resources there's nothing to do ... 736 */ 737 if (!cdev) 738 return false; 739 740 /* 741 * If there aren't any work requests in flight, or there isn't enough 742 * data in flight, or Nagle is off then send the current TX_DATA 743 * otherwise hold it and wait to accumulate more data. 744 */ 745 return csk->wr_credits == csk->wr_max_credits || 746 (tp->nonagle & TCP_NAGLE_OFF); 747 } 748 749 /* 750 * Returns true if a TCP socket is corked. 751 */ 752 static bool corked(const struct tcp_sock *tp, int flags) 753 { 754 return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK); 755 } 756 757 /* 758 * Returns true if a send should try to push new data. 759 */ 760 static bool send_should_push(struct sock *sk, int flags) 761 { 762 return should_push(sk) && !corked(tcp_sk(sk), flags); 763 } 764 765 void chtls_tcp_push(struct sock *sk, int flags) 766 { 767 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 768 int qlen = skb_queue_len(&csk->txq); 769 770 if (likely(qlen)) { 771 struct sk_buff *skb = skb_peek_tail(&csk->txq); 772 struct tcp_sock *tp = tcp_sk(sk); 773 774 mark_urg(tp, flags, skb); 775 776 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) && 777 corked(tp, flags)) { 778 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD; 779 return; 780 } 781 782 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD; 783 if (qlen == 1 && 784 ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 785 should_push(sk))) 786 chtls_push_frames(csk, 1); 787 } 788 } 789 790 /* 791 * Calculate the size for a new send sk_buff. It's maximum size so we can 792 * pack lots of data into it, unless we plan to send it immediately, in which 793 * case we size it more tightly. 794 * 795 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't 796 * arise in normal cases and when it does we are just wasting memory. 797 */ 798 static int select_size(struct sock *sk, int io_len, int flags, int len) 799 { 800 const int pgbreak = SKB_MAX_HEAD(len); 801 802 /* 803 * If the data wouldn't fit in the main body anyway, put only the 804 * header in the main body so it can use immediate data and place all 805 * the payload in page fragments. 806 */ 807 if (io_len > pgbreak) 808 return 0; 809 810 /* 811 * If we will be accumulating payload get a large main body. 812 */ 813 if (!send_should_push(sk, flags)) 814 return pgbreak; 815 816 return io_len; 817 } 818 819 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags) 820 { 821 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 822 struct tcp_sock *tp = tcp_sk(sk); 823 824 ULP_SKB_CB(skb)->seq = tp->write_seq; 825 ULP_SKB_CB(skb)->flags = flags; 826 __skb_queue_tail(&csk->txq, skb); 827 sk->sk_wmem_queued += skb->truesize; 828 829 if (TCP_PAGE(sk) && TCP_OFF(sk)) { 830 put_page(TCP_PAGE(sk)); 831 TCP_PAGE(sk) = NULL; 832 TCP_OFF(sk) = 0; 833 } 834 } 835 836 static struct sk_buff *get_tx_skb(struct sock *sk, int size) 837 { 838 struct sk_buff *skb; 839 840 skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation); 841 if (likely(skb)) { 842 skb_reserve(skb, TX_HEADER_LEN); 843 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 844 skb_reset_transport_header(skb); 845 } 846 return skb; 847 } 848 849 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy) 850 { 851 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 852 struct sk_buff *skb; 853 854 skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN + 855 KEY_ON_MEM_SZ + max_ivs_size(sk, size)), 856 sk->sk_allocation); 857 if (likely(skb)) { 858 skb_reserve(skb, (TX_TLSHDR_LEN + 859 KEY_ON_MEM_SZ + max_ivs_size(sk, size))); 860 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 861 skb_reset_transport_header(skb); 862 ULP_SKB_CB(skb)->ulp.tls.ofld = 1; 863 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type; 864 } 865 return skb; 866 } 867 868 static void tx_skb_finalize(struct sk_buff *skb) 869 { 870 struct ulp_skb_cb *cb = ULP_SKB_CB(skb); 871 872 if (!(cb->flags & ULPCB_FLAG_NO_HDR)) 873 cb->flags = ULPCB_FLAG_NEED_HDR; 874 cb->flags |= ULPCB_FLAG_NO_APPEND; 875 } 876 877 static void push_frames_if_head(struct sock *sk) 878 { 879 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 880 881 if (skb_queue_len(&csk->txq) == 1) 882 chtls_push_frames(csk, 1); 883 } 884 885 static int chtls_skb_copy_to_page_nocache(struct sock *sk, 886 struct iov_iter *from, 887 struct sk_buff *skb, 888 struct page *page, 889 int off, int copy) 890 { 891 int err; 892 893 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + 894 off, copy, skb->len); 895 if (err) 896 return err; 897 898 skb->len += copy; 899 skb->data_len += copy; 900 skb->truesize += copy; 901 sk->sk_wmem_queued += copy; 902 return 0; 903 } 904 905 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk) 906 { 907 return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0); 908 } 909 910 static int csk_wait_memory(struct chtls_dev *cdev, 911 struct sock *sk, long *timeo_p) 912 { 913 DEFINE_WAIT_FUNC(wait, woken_wake_function); 914 int err = 0; 915 long current_timeo; 916 long vm_wait = 0; 917 bool noblock; 918 919 current_timeo = *timeo_p; 920 noblock = (*timeo_p ? false : true); 921 if (csk_mem_free(cdev, sk)) { 922 current_timeo = (prandom_u32() % (HZ / 5)) + 2; 923 vm_wait = (prandom_u32() % (HZ / 5)) + 2; 924 } 925 926 add_wait_queue(sk_sleep(sk), &wait); 927 while (1) { 928 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 929 930 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 931 goto do_error; 932 if (!*timeo_p) { 933 if (noblock) 934 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 935 goto do_nonblock; 936 } 937 if (signal_pending(current)) 938 goto do_interrupted; 939 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 940 if (csk_mem_free(cdev, sk) && !vm_wait) 941 break; 942 943 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 944 sk->sk_write_pending++; 945 sk_wait_event(sk, ¤t_timeo, sk->sk_err || 946 (sk->sk_shutdown & SEND_SHUTDOWN) || 947 (csk_mem_free(cdev, sk) && !vm_wait), &wait); 948 sk->sk_write_pending--; 949 950 if (vm_wait) { 951 vm_wait -= current_timeo; 952 current_timeo = *timeo_p; 953 if (current_timeo != MAX_SCHEDULE_TIMEOUT) { 954 current_timeo -= vm_wait; 955 if (current_timeo < 0) 956 current_timeo = 0; 957 } 958 vm_wait = 0; 959 } 960 *timeo_p = current_timeo; 961 } 962 do_rm_wq: 963 remove_wait_queue(sk_sleep(sk), &wait); 964 return err; 965 do_error: 966 err = -EPIPE; 967 goto do_rm_wq; 968 do_nonblock: 969 err = -EAGAIN; 970 goto do_rm_wq; 971 do_interrupted: 972 err = sock_intr_errno(*timeo_p); 973 goto do_rm_wq; 974 } 975 976 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg, 977 unsigned char *record_type) 978 { 979 struct cmsghdr *cmsg; 980 int rc = -EINVAL; 981 982 for_each_cmsghdr(cmsg, msg) { 983 if (!CMSG_OK(msg, cmsg)) 984 return -EINVAL; 985 if (cmsg->cmsg_level != SOL_TLS) 986 continue; 987 988 switch (cmsg->cmsg_type) { 989 case TLS_SET_RECORD_TYPE: 990 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) 991 return -EINVAL; 992 993 if (msg->msg_flags & MSG_MORE) 994 return -EINVAL; 995 996 *record_type = *(unsigned char *)CMSG_DATA(cmsg); 997 rc = 0; 998 break; 999 default: 1000 return -EINVAL; 1001 } 1002 } 1003 1004 return rc; 1005 } 1006 1007 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 1008 { 1009 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1010 struct chtls_dev *cdev = csk->cdev; 1011 struct tcp_sock *tp = tcp_sk(sk); 1012 struct sk_buff *skb; 1013 int mss, flags, err; 1014 int recordsz = 0; 1015 int copied = 0; 1016 long timeo; 1017 1018 lock_sock(sk); 1019 flags = msg->msg_flags; 1020 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1021 1022 if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { 1023 err = sk_stream_wait_connect(sk, &timeo); 1024 if (err) 1025 goto out_err; 1026 } 1027 1028 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1029 err = -EPIPE; 1030 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1031 goto out_err; 1032 1033 mss = csk->mss; 1034 csk_set_flag(csk, CSK_TX_MORE_DATA); 1035 1036 while (msg_data_left(msg)) { 1037 int copy = 0; 1038 1039 skb = skb_peek_tail(&csk->txq); 1040 if (skb) { 1041 copy = mss - skb->len; 1042 skb->ip_summed = CHECKSUM_UNNECESSARY; 1043 } 1044 if (!csk_mem_free(cdev, sk)) 1045 goto wait_for_sndbuf; 1046 1047 if (is_tls_tx(csk) && !csk->tlshws.txleft) { 1048 unsigned char record_type = TLS_RECORD_TYPE_DATA; 1049 1050 if (unlikely(msg->msg_controllen)) { 1051 err = chtls_proccess_cmsg(sk, msg, 1052 &record_type); 1053 if (err) 1054 goto out_err; 1055 1056 /* Avoid appending tls handshake, alert to tls data */ 1057 if (skb) 1058 tx_skb_finalize(skb); 1059 } 1060 1061 recordsz = size; 1062 csk->tlshws.txleft = recordsz; 1063 csk->tlshws.type = record_type; 1064 } 1065 1066 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 1067 copy <= 0) { 1068 new_buf: 1069 if (skb) { 1070 tx_skb_finalize(skb); 1071 push_frames_if_head(sk); 1072 } 1073 1074 if (is_tls_tx(csk)) { 1075 skb = get_record_skb(sk, 1076 select_size(sk, 1077 recordsz, 1078 flags, 1079 TX_TLSHDR_LEN), 1080 false); 1081 } else { 1082 skb = get_tx_skb(sk, 1083 select_size(sk, size, flags, 1084 TX_HEADER_LEN)); 1085 } 1086 if (unlikely(!skb)) 1087 goto wait_for_memory; 1088 1089 skb->ip_summed = CHECKSUM_UNNECESSARY; 1090 copy = mss; 1091 } 1092 if (copy > size) 1093 copy = size; 1094 1095 if (skb_tailroom(skb) > 0) { 1096 copy = min(copy, skb_tailroom(skb)); 1097 if (is_tls_tx(csk)) 1098 copy = min_t(int, copy, csk->tlshws.txleft); 1099 err = skb_add_data_nocache(sk, skb, 1100 &msg->msg_iter, copy); 1101 if (err) 1102 goto do_fault; 1103 } else { 1104 int i = skb_shinfo(skb)->nr_frags; 1105 struct page *page = TCP_PAGE(sk); 1106 int pg_size = PAGE_SIZE; 1107 int off = TCP_OFF(sk); 1108 bool merge; 1109 1110 if (page) 1111 pg_size = page_size(page); 1112 if (off < pg_size && 1113 skb_can_coalesce(skb, i, page, off)) { 1114 merge = true; 1115 goto copy; 1116 } 1117 merge = false; 1118 if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : 1119 MAX_SKB_FRAGS)) 1120 goto new_buf; 1121 1122 if (page && off == pg_size) { 1123 put_page(page); 1124 TCP_PAGE(sk) = page = NULL; 1125 pg_size = PAGE_SIZE; 1126 } 1127 1128 if (!page) { 1129 gfp_t gfp = sk->sk_allocation; 1130 int order = cdev->send_page_order; 1131 1132 if (order) { 1133 page = alloc_pages(gfp | __GFP_COMP | 1134 __GFP_NOWARN | 1135 __GFP_NORETRY, 1136 order); 1137 if (page) 1138 pg_size <<= order; 1139 } 1140 if (!page) { 1141 page = alloc_page(gfp); 1142 pg_size = PAGE_SIZE; 1143 } 1144 if (!page) 1145 goto wait_for_memory; 1146 off = 0; 1147 } 1148 copy: 1149 if (copy > pg_size - off) 1150 copy = pg_size - off; 1151 if (is_tls_tx(csk)) 1152 copy = min_t(int, copy, csk->tlshws.txleft); 1153 1154 err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter, 1155 skb, page, 1156 off, copy); 1157 if (unlikely(err)) { 1158 if (!TCP_PAGE(sk)) { 1159 TCP_PAGE(sk) = page; 1160 TCP_OFF(sk) = 0; 1161 } 1162 goto do_fault; 1163 } 1164 /* Update the skb. */ 1165 if (merge) { 1166 skb_frag_size_add( 1167 &skb_shinfo(skb)->frags[i - 1], 1168 copy); 1169 } else { 1170 skb_fill_page_desc(skb, i, page, off, copy); 1171 if (off + copy < pg_size) { 1172 /* space left keep page */ 1173 get_page(page); 1174 TCP_PAGE(sk) = page; 1175 } else { 1176 TCP_PAGE(sk) = NULL; 1177 } 1178 } 1179 TCP_OFF(sk) = off + copy; 1180 } 1181 if (unlikely(skb->len == mss)) 1182 tx_skb_finalize(skb); 1183 tp->write_seq += copy; 1184 copied += copy; 1185 size -= copy; 1186 1187 if (is_tls_tx(csk)) 1188 csk->tlshws.txleft -= copy; 1189 1190 if (corked(tp, flags) && 1191 (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) 1192 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; 1193 1194 if (size == 0) 1195 goto out; 1196 1197 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) 1198 push_frames_if_head(sk); 1199 continue; 1200 wait_for_sndbuf: 1201 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1202 wait_for_memory: 1203 err = csk_wait_memory(cdev, sk, &timeo); 1204 if (err) 1205 goto do_error; 1206 } 1207 out: 1208 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1209 if (copied) 1210 chtls_tcp_push(sk, flags); 1211 done: 1212 release_sock(sk); 1213 return copied; 1214 do_fault: 1215 if (!skb->len) { 1216 __skb_unlink(skb, &csk->txq); 1217 sk->sk_wmem_queued -= skb->truesize; 1218 __kfree_skb(skb); 1219 } 1220 do_error: 1221 if (copied) 1222 goto out; 1223 out_err: 1224 if (csk_conn_inline(csk)) 1225 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1226 copied = sk_stream_error(sk, flags, err); 1227 goto done; 1228 } 1229 1230 int chtls_sendpage(struct sock *sk, struct page *page, 1231 int offset, size_t size, int flags) 1232 { 1233 struct chtls_sock *csk; 1234 struct chtls_dev *cdev; 1235 int mss, err, copied; 1236 struct tcp_sock *tp; 1237 long timeo; 1238 1239 tp = tcp_sk(sk); 1240 copied = 0; 1241 csk = rcu_dereference_sk_user_data(sk); 1242 cdev = csk->cdev; 1243 lock_sock(sk); 1244 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1245 1246 err = sk_stream_wait_connect(sk, &timeo); 1247 if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && 1248 err != 0) 1249 goto out_err; 1250 1251 mss = csk->mss; 1252 csk_set_flag(csk, CSK_TX_MORE_DATA); 1253 1254 while (size > 0) { 1255 struct sk_buff *skb = skb_peek_tail(&csk->txq); 1256 int copy, i; 1257 1258 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 1259 (copy = mss - skb->len) <= 0) { 1260 new_buf: 1261 if (!csk_mem_free(cdev, sk)) 1262 goto wait_for_sndbuf; 1263 1264 if (is_tls_tx(csk)) { 1265 skb = get_record_skb(sk, 1266 select_size(sk, size, 1267 flags, 1268 TX_TLSHDR_LEN), 1269 true); 1270 } else { 1271 skb = get_tx_skb(sk, 0); 1272 } 1273 if (!skb) 1274 goto wait_for_memory; 1275 copy = mss; 1276 } 1277 if (copy > size) 1278 copy = size; 1279 1280 i = skb_shinfo(skb)->nr_frags; 1281 if (skb_can_coalesce(skb, i, page, offset)) { 1282 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1283 } else if (i < MAX_SKB_FRAGS) { 1284 get_page(page); 1285 skb_fill_page_desc(skb, i, page, offset, copy); 1286 } else { 1287 tx_skb_finalize(skb); 1288 push_frames_if_head(sk); 1289 goto new_buf; 1290 } 1291 1292 skb->len += copy; 1293 if (skb->len == mss) 1294 tx_skb_finalize(skb); 1295 skb->data_len += copy; 1296 skb->truesize += copy; 1297 sk->sk_wmem_queued += copy; 1298 tp->write_seq += copy; 1299 copied += copy; 1300 offset += copy; 1301 size -= copy; 1302 1303 if (corked(tp, flags) && 1304 (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) 1305 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; 1306 1307 if (!size) 1308 break; 1309 1310 if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)) 1311 push_frames_if_head(sk); 1312 continue; 1313 wait_for_sndbuf: 1314 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1315 wait_for_memory: 1316 err = csk_wait_memory(cdev, sk, &timeo); 1317 if (err) 1318 goto do_error; 1319 } 1320 out: 1321 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1322 if (copied) 1323 chtls_tcp_push(sk, flags); 1324 done: 1325 release_sock(sk); 1326 return copied; 1327 1328 do_error: 1329 if (copied) 1330 goto out; 1331 1332 out_err: 1333 if (csk_conn_inline(csk)) 1334 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1335 copied = sk_stream_error(sk, flags, err); 1336 goto done; 1337 } 1338 1339 static void chtls_select_window(struct sock *sk) 1340 { 1341 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1342 struct tcp_sock *tp = tcp_sk(sk); 1343 unsigned int wnd = tp->rcv_wnd; 1344 1345 wnd = max_t(unsigned int, wnd, tcp_full_space(sk)); 1346 wnd = max_t(unsigned int, MIN_RCV_WND, wnd); 1347 1348 if (wnd > MAX_RCV_WND) 1349 wnd = MAX_RCV_WND; 1350 1351 /* 1352 * Check if we need to grow the receive window in response to an increase in 1353 * the socket's receive buffer size. Some applications increase the buffer 1354 * size dynamically and rely on the window to grow accordingly. 1355 */ 1356 1357 if (wnd > tp->rcv_wnd) { 1358 tp->rcv_wup -= wnd - tp->rcv_wnd; 1359 tp->rcv_wnd = wnd; 1360 /* Mark the receive window as updated */ 1361 csk_reset_flag(csk, CSK_UPDATE_RCV_WND); 1362 } 1363 } 1364 1365 /* 1366 * Send RX credits through an RX_DATA_ACK CPL message. We are permitted 1367 * to return without sending the message in case we cannot allocate 1368 * an sk_buff. Returns the number of credits sent. 1369 */ 1370 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits) 1371 { 1372 struct cpl_rx_data_ack *req; 1373 struct sk_buff *skb; 1374 1375 skb = alloc_skb(sizeof(*req), GFP_ATOMIC); 1376 if (!skb) 1377 return 0; 1378 __skb_put(skb, sizeof(*req)); 1379 req = (struct cpl_rx_data_ack *)skb->head; 1380 1381 set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id); 1382 INIT_TP_WR(req, csk->tid); 1383 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, 1384 csk->tid)); 1385 req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) | 1386 RX_FORCE_ACK_F); 1387 cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); 1388 return credits; 1389 } 1390 1391 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \ 1392 TCPF_FIN_WAIT1 | \ 1393 TCPF_FIN_WAIT2) 1394 1395 /* 1396 * Called after some received data has been read. It returns RX credits 1397 * to the HW for the amount of data processed. 1398 */ 1399 static void chtls_cleanup_rbuf(struct sock *sk, int copied) 1400 { 1401 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1402 struct tcp_sock *tp; 1403 int must_send; 1404 u32 credits; 1405 u32 thres; 1406 1407 thres = 15 * 1024; 1408 1409 if (!sk_in_state(sk, CREDIT_RETURN_STATE)) 1410 return; 1411 1412 chtls_select_window(sk); 1413 tp = tcp_sk(sk); 1414 credits = tp->copied_seq - tp->rcv_wup; 1415 if (unlikely(!credits)) 1416 return; 1417 1418 /* 1419 * For coalescing to work effectively ensure the receive window has 1420 * at least 16KB left. 1421 */ 1422 must_send = credits + 16384 >= tp->rcv_wnd; 1423 1424 if (must_send || credits >= thres) 1425 tp->rcv_wup += send_rx_credits(csk, credits); 1426 } 1427 1428 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1429 int flags, int *addr_len) 1430 { 1431 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1432 struct chtls_hws *hws = &csk->tlshws; 1433 struct net_device *dev = csk->egress_dev; 1434 struct adapter *adap = netdev2adap(dev); 1435 struct tcp_sock *tp = tcp_sk(sk); 1436 unsigned long avail; 1437 int buffers_freed; 1438 int copied = 0; 1439 int target; 1440 long timeo; 1441 1442 buffers_freed = 0; 1443 1444 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1445 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1446 1447 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1448 chtls_cleanup_rbuf(sk, copied); 1449 1450 do { 1451 struct sk_buff *skb; 1452 u32 offset = 0; 1453 1454 if (unlikely(tp->urg_data && 1455 tp->urg_seq == tp->copied_seq)) { 1456 if (copied) 1457 break; 1458 if (signal_pending(current)) { 1459 copied = timeo ? sock_intr_errno(timeo) : 1460 -EAGAIN; 1461 break; 1462 } 1463 } 1464 skb = skb_peek(&sk->sk_receive_queue); 1465 if (skb) 1466 goto found_ok_skb; 1467 if (csk->wr_credits && 1468 skb_queue_len(&csk->txq) && 1469 chtls_push_frames(csk, csk->wr_credits == 1470 csk->wr_max_credits)) 1471 sk->sk_write_space(sk); 1472 1473 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1474 break; 1475 1476 if (copied) { 1477 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1478 (sk->sk_shutdown & RCV_SHUTDOWN) || 1479 signal_pending(current)) 1480 break; 1481 1482 if (!timeo) 1483 break; 1484 } else { 1485 if (sock_flag(sk, SOCK_DONE)) 1486 break; 1487 if (sk->sk_err) { 1488 copied = sock_error(sk); 1489 break; 1490 } 1491 if (sk->sk_shutdown & RCV_SHUTDOWN) 1492 break; 1493 if (sk->sk_state == TCP_CLOSE) { 1494 copied = -ENOTCONN; 1495 break; 1496 } 1497 if (!timeo) { 1498 copied = -EAGAIN; 1499 break; 1500 } 1501 if (signal_pending(current)) { 1502 copied = sock_intr_errno(timeo); 1503 break; 1504 } 1505 } 1506 if (READ_ONCE(sk->sk_backlog.tail)) { 1507 release_sock(sk); 1508 lock_sock(sk); 1509 chtls_cleanup_rbuf(sk, copied); 1510 continue; 1511 } 1512 1513 if (copied >= target) 1514 break; 1515 chtls_cleanup_rbuf(sk, copied); 1516 sk_wait_data(sk, &timeo, NULL); 1517 continue; 1518 found_ok_skb: 1519 if (!skb->len) { 1520 skb_dst_set(skb, NULL); 1521 __skb_unlink(skb, &sk->sk_receive_queue); 1522 kfree_skb(skb); 1523 1524 if (!copied && !timeo) { 1525 copied = -EAGAIN; 1526 break; 1527 } 1528 1529 if (copied < target) { 1530 release_sock(sk); 1531 lock_sock(sk); 1532 continue; 1533 } 1534 break; 1535 } 1536 offset = hws->copied_seq; 1537 avail = skb->len - offset; 1538 if (len < avail) 1539 avail = len; 1540 1541 if (unlikely(tp->urg_data)) { 1542 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1543 1544 if (urg_offset < avail) { 1545 if (urg_offset) { 1546 avail = urg_offset; 1547 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1548 /* First byte is urgent, skip */ 1549 tp->copied_seq++; 1550 offset++; 1551 avail--; 1552 if (!avail) 1553 goto skip_copy; 1554 } 1555 } 1556 } 1557 /* Set record type if not already done. For a non-data record, 1558 * do not proceed if record type could not be copied. 1559 */ 1560 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1561 struct tls_hdr *thdr = (struct tls_hdr *)skb->data; 1562 int cerr = 0; 1563 1564 cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, 1565 sizeof(thdr->type), &thdr->type); 1566 1567 if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) { 1568 copied = -EIO; 1569 break; 1570 } 1571 /* don't send tls header, skip copy */ 1572 goto skip_copy; 1573 } 1574 1575 if (skb_copy_datagram_msg(skb, offset, msg, avail)) { 1576 if (!copied) { 1577 copied = -EFAULT; 1578 break; 1579 } 1580 } 1581 1582 copied += avail; 1583 len -= avail; 1584 hws->copied_seq += avail; 1585 skip_copy: 1586 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1587 tp->urg_data = 0; 1588 1589 if ((avail + offset) >= skb->len) { 1590 struct sk_buff *next_skb; 1591 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1592 tp->copied_seq += skb->len; 1593 hws->rcvpld = skb->hdr_len; 1594 } else { 1595 atomic_inc(&adap->chcr_stats.tls_pdu_rx); 1596 tp->copied_seq += hws->rcvpld; 1597 } 1598 chtls_free_skb(sk, skb); 1599 buffers_freed++; 1600 hws->copied_seq = 0; 1601 next_skb = skb_peek(&sk->sk_receive_queue); 1602 if (copied >= target && !next_skb) 1603 break; 1604 if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) 1605 break; 1606 } 1607 } while (len > 0); 1608 1609 if (buffers_freed) 1610 chtls_cleanup_rbuf(sk, copied); 1611 release_sock(sk); 1612 return copied; 1613 } 1614 1615 /* 1616 * Peek at data in a socket's receive buffer. 1617 */ 1618 static int peekmsg(struct sock *sk, struct msghdr *msg, 1619 size_t len, int flags) 1620 { 1621 struct tcp_sock *tp = tcp_sk(sk); 1622 u32 peek_seq, offset; 1623 struct sk_buff *skb; 1624 int copied = 0; 1625 size_t avail; /* amount of available data in current skb */ 1626 long timeo; 1627 1628 lock_sock(sk); 1629 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1630 peek_seq = tp->copied_seq; 1631 1632 do { 1633 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) { 1634 if (copied) 1635 break; 1636 if (signal_pending(current)) { 1637 copied = timeo ? sock_intr_errno(timeo) : 1638 -EAGAIN; 1639 break; 1640 } 1641 } 1642 1643 skb_queue_walk(&sk->sk_receive_queue, skb) { 1644 offset = peek_seq - ULP_SKB_CB(skb)->seq; 1645 if (offset < skb->len) 1646 goto found_ok_skb; 1647 } 1648 1649 /* empty receive queue */ 1650 if (copied) 1651 break; 1652 if (sock_flag(sk, SOCK_DONE)) 1653 break; 1654 if (sk->sk_err) { 1655 copied = sock_error(sk); 1656 break; 1657 } 1658 if (sk->sk_shutdown & RCV_SHUTDOWN) 1659 break; 1660 if (sk->sk_state == TCP_CLOSE) { 1661 copied = -ENOTCONN; 1662 break; 1663 } 1664 if (!timeo) { 1665 copied = -EAGAIN; 1666 break; 1667 } 1668 if (signal_pending(current)) { 1669 copied = sock_intr_errno(timeo); 1670 break; 1671 } 1672 1673 if (READ_ONCE(sk->sk_backlog.tail)) { 1674 /* Do not sleep, just process backlog. */ 1675 release_sock(sk); 1676 lock_sock(sk); 1677 } else { 1678 sk_wait_data(sk, &timeo, NULL); 1679 } 1680 1681 if (unlikely(peek_seq != tp->copied_seq)) { 1682 if (net_ratelimit()) 1683 pr_info("TCP(%s:%d), race in MSG_PEEK.\n", 1684 current->comm, current->pid); 1685 peek_seq = tp->copied_seq; 1686 } 1687 continue; 1688 1689 found_ok_skb: 1690 avail = skb->len - offset; 1691 if (len < avail) 1692 avail = len; 1693 /* 1694 * Do we have urgent data here? We need to skip over the 1695 * urgent byte. 1696 */ 1697 if (unlikely(tp->urg_data)) { 1698 u32 urg_offset = tp->urg_seq - peek_seq; 1699 1700 if (urg_offset < avail) { 1701 /* 1702 * The amount of data we are preparing to copy 1703 * contains urgent data. 1704 */ 1705 if (!urg_offset) { /* First byte is urgent */ 1706 if (!sock_flag(sk, SOCK_URGINLINE)) { 1707 peek_seq++; 1708 offset++; 1709 avail--; 1710 } 1711 if (!avail) 1712 continue; 1713 } else { 1714 /* stop short of the urgent data */ 1715 avail = urg_offset; 1716 } 1717 } 1718 } 1719 1720 /* 1721 * If MSG_TRUNC is specified the data is discarded. 1722 */ 1723 if (likely(!(flags & MSG_TRUNC))) 1724 if (skb_copy_datagram_msg(skb, offset, msg, len)) { 1725 if (!copied) { 1726 copied = -EFAULT; 1727 break; 1728 } 1729 } 1730 peek_seq += avail; 1731 copied += avail; 1732 len -= avail; 1733 } while (len > 0); 1734 1735 release_sock(sk); 1736 return copied; 1737 } 1738 1739 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1740 int flags, int *addr_len) 1741 { 1742 struct tcp_sock *tp = tcp_sk(sk); 1743 struct chtls_sock *csk; 1744 unsigned long avail; /* amount of available data in current skb */ 1745 int buffers_freed; 1746 int copied = 0; 1747 long timeo; 1748 int target; /* Read at least this many bytes */ 1749 1750 buffers_freed = 0; 1751 1752 if (unlikely(flags & MSG_OOB)) 1753 return tcp_prot.recvmsg(sk, msg, len, flags, addr_len); 1754 1755 if (unlikely(flags & MSG_PEEK)) 1756 return peekmsg(sk, msg, len, flags); 1757 1758 if (sk_can_busy_loop(sk) && 1759 skb_queue_empty_lockless(&sk->sk_receive_queue) && 1760 sk->sk_state == TCP_ESTABLISHED) 1761 sk_busy_loop(sk, flags & MSG_DONTWAIT); 1762 1763 lock_sock(sk); 1764 csk = rcu_dereference_sk_user_data(sk); 1765 1766 if (is_tls_rx(csk)) 1767 return chtls_pt_recvmsg(sk, msg, len, flags, addr_len); 1768 1769 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1770 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1771 1772 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1773 chtls_cleanup_rbuf(sk, copied); 1774 1775 do { 1776 struct sk_buff *skb; 1777 u32 offset; 1778 1779 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) { 1780 if (copied) 1781 break; 1782 if (signal_pending(current)) { 1783 copied = timeo ? sock_intr_errno(timeo) : 1784 -EAGAIN; 1785 break; 1786 } 1787 } 1788 1789 skb = skb_peek(&sk->sk_receive_queue); 1790 if (skb) 1791 goto found_ok_skb; 1792 1793 if (csk->wr_credits && 1794 skb_queue_len(&csk->txq) && 1795 chtls_push_frames(csk, csk->wr_credits == 1796 csk->wr_max_credits)) 1797 sk->sk_write_space(sk); 1798 1799 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1800 break; 1801 1802 if (copied) { 1803 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1804 (sk->sk_shutdown & RCV_SHUTDOWN) || 1805 signal_pending(current)) 1806 break; 1807 } else { 1808 if (sock_flag(sk, SOCK_DONE)) 1809 break; 1810 if (sk->sk_err) { 1811 copied = sock_error(sk); 1812 break; 1813 } 1814 if (sk->sk_shutdown & RCV_SHUTDOWN) 1815 break; 1816 if (sk->sk_state == TCP_CLOSE) { 1817 copied = -ENOTCONN; 1818 break; 1819 } 1820 if (!timeo) { 1821 copied = -EAGAIN; 1822 break; 1823 } 1824 if (signal_pending(current)) { 1825 copied = sock_intr_errno(timeo); 1826 break; 1827 } 1828 } 1829 1830 if (READ_ONCE(sk->sk_backlog.tail)) { 1831 release_sock(sk); 1832 lock_sock(sk); 1833 chtls_cleanup_rbuf(sk, copied); 1834 continue; 1835 } 1836 1837 if (copied >= target) 1838 break; 1839 chtls_cleanup_rbuf(sk, copied); 1840 sk_wait_data(sk, &timeo, NULL); 1841 continue; 1842 1843 found_ok_skb: 1844 if (!skb->len) { 1845 chtls_kfree_skb(sk, skb); 1846 if (!copied && !timeo) { 1847 copied = -EAGAIN; 1848 break; 1849 } 1850 1851 if (copied < target) 1852 continue; 1853 1854 break; 1855 } 1856 1857 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq; 1858 avail = skb->len - offset; 1859 if (len < avail) 1860 avail = len; 1861 1862 if (unlikely(tp->urg_data)) { 1863 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1864 1865 if (urg_offset < avail) { 1866 if (urg_offset) { 1867 avail = urg_offset; 1868 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1869 tp->copied_seq++; 1870 offset++; 1871 avail--; 1872 if (!avail) 1873 goto skip_copy; 1874 } 1875 } 1876 } 1877 1878 if (likely(!(flags & MSG_TRUNC))) { 1879 if (skb_copy_datagram_msg(skb, offset, 1880 msg, avail)) { 1881 if (!copied) { 1882 copied = -EFAULT; 1883 break; 1884 } 1885 } 1886 } 1887 1888 tp->copied_seq += avail; 1889 copied += avail; 1890 len -= avail; 1891 1892 skip_copy: 1893 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1894 tp->urg_data = 0; 1895 1896 if (avail + offset >= skb->len) { 1897 chtls_free_skb(sk, skb); 1898 buffers_freed++; 1899 1900 if (copied >= target && 1901 !skb_peek(&sk->sk_receive_queue)) 1902 break; 1903 } 1904 } while (len > 0); 1905 1906 if (buffers_freed) 1907 chtls_cleanup_rbuf(sk, copied); 1908 1909 release_sock(sk); 1910 return copied; 1911 } 1912