1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2018 Chelsio Communications, Inc. 4 * 5 * Written by: Atul Gupta (atul.gupta@chelsio.com) 6 */ 7 8 #include <linux/module.h> 9 #include <linux/list.h> 10 #include <linux/workqueue.h> 11 #include <linux/skbuff.h> 12 #include <linux/timer.h> 13 #include <linux/notifier.h> 14 #include <linux/inetdevice.h> 15 #include <linux/ip.h> 16 #include <linux/tcp.h> 17 #include <linux/sched/signal.h> 18 #include <net/tcp.h> 19 #include <net/busy_poll.h> 20 #include <crypto/aes.h> 21 22 #include "chtls.h" 23 #include "chtls_cm.h" 24 25 static bool is_tls_tx(struct chtls_sock *csk) 26 { 27 return csk->tlshws.txkey >= 0; 28 } 29 30 static bool is_tls_rx(struct chtls_sock *csk) 31 { 32 return csk->tlshws.rxkey >= 0; 33 } 34 35 static int data_sgl_len(const struct sk_buff *skb) 36 { 37 unsigned int cnt; 38 39 cnt = skb_shinfo(skb)->nr_frags; 40 return sgl_len(cnt) * 8; 41 } 42 43 static int nos_ivs(struct sock *sk, unsigned int size) 44 { 45 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 46 47 return DIV_ROUND_UP(size, csk->tlshws.mfs); 48 } 49 50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb) 51 { 52 int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE; 53 int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb); 54 55 if ((hlen + KEY_ON_MEM_SZ + ivs_size) < 56 MAX_IMM_OFLD_TX_DATA_WR_LEN) { 57 ULP_SKB_CB(skb)->ulp.tls.iv = 1; 58 return 1; 59 } 60 ULP_SKB_CB(skb)->ulp.tls.iv = 0; 61 return 0; 62 } 63 64 static int max_ivs_size(struct sock *sk, int size) 65 { 66 return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE; 67 } 68 69 static int ivs_size(struct sock *sk, const struct sk_buff *skb) 70 { 71 return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) * 72 CIPHER_BLOCK_SIZE) : 0; 73 } 74 75 static int flowc_wr_credits(int nparams, int *flowclenp) 76 { 77 int flowclen16, flowclen; 78 79 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); 80 flowclen16 = DIV_ROUND_UP(flowclen, 16); 81 flowclen = flowclen16 * 16; 82 83 if (flowclenp) 84 *flowclenp = flowclen; 85 86 return flowclen16; 87 } 88 89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk, 90 struct fw_flowc_wr *flowc, 91 int flowclen) 92 { 93 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 94 struct sk_buff *skb; 95 96 skb = alloc_skb(flowclen, GFP_ATOMIC); 97 if (!skb) 98 return NULL; 99 100 __skb_put_data(skb, flowc, flowclen); 101 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA); 102 103 return skb; 104 } 105 106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc, 107 int flowclen) 108 { 109 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 110 struct tcp_sock *tp = tcp_sk(sk); 111 struct sk_buff *skb; 112 int flowclen16; 113 int ret; 114 115 flowclen16 = flowclen / 16; 116 117 if (csk_flag(sk, CSK_TX_DATA_SENT)) { 118 skb = create_flowc_wr_skb(sk, flowc, flowclen); 119 if (!skb) 120 return -ENOMEM; 121 122 skb_entail(sk, skb, 123 ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND); 124 return 0; 125 } 126 127 ret = cxgb4_immdata_send(csk->egress_dev, 128 csk->txq_idx, 129 flowc, flowclen); 130 if (!ret) 131 return flowclen16; 132 skb = create_flowc_wr_skb(sk, flowc, flowclen); 133 if (!skb) 134 return -ENOMEM; 135 send_or_defer(sk, tp, skb, 0); 136 return flowclen16; 137 } 138 139 static u8 tcp_state_to_flowc_state(u8 state) 140 { 141 switch (state) { 142 case TCP_ESTABLISHED: 143 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 144 case TCP_CLOSE_WAIT: 145 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT; 146 case TCP_FIN_WAIT1: 147 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1; 148 case TCP_CLOSING: 149 return FW_FLOWC_MNEM_TCPSTATE_CLOSING; 150 case TCP_LAST_ACK: 151 return FW_FLOWC_MNEM_TCPSTATE_LASTACK; 152 case TCP_FIN_WAIT2: 153 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2; 154 } 155 156 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 157 } 158 159 int send_tx_flowc_wr(struct sock *sk, int compl, 160 u32 snd_nxt, u32 rcv_nxt) 161 { 162 struct flowc_packed { 163 struct fw_flowc_wr fc; 164 struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX]; 165 } __packed sflowc; 166 int nparams, paramidx, flowclen16, flowclen; 167 struct fw_flowc_wr *flowc; 168 struct chtls_sock *csk; 169 struct tcp_sock *tp; 170 171 csk = rcu_dereference_sk_user_data(sk); 172 tp = tcp_sk(sk); 173 memset(&sflowc, 0, sizeof(sflowc)); 174 flowc = &sflowc.fc; 175 176 #define FLOWC_PARAM(__m, __v) \ 177 do { \ 178 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ 179 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \ 180 paramidx++; \ 181 } while (0) 182 183 paramidx = 0; 184 185 FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf)); 186 FLOWC_PARAM(CH, csk->tx_chan); 187 FLOWC_PARAM(PORT, csk->tx_chan); 188 FLOWC_PARAM(IQID, csk->rss_qid); 189 FLOWC_PARAM(SNDNXT, tp->snd_nxt); 190 FLOWC_PARAM(RCVNXT, tp->rcv_nxt); 191 FLOWC_PARAM(SNDBUF, csk->sndbuf); 192 FLOWC_PARAM(MSS, tp->mss_cache); 193 FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state)); 194 195 if (SND_WSCALE(tp)) 196 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp)); 197 198 if (csk->ulp_mode == ULP_MODE_TLS) 199 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS); 200 201 if (csk->tlshws.fcplenmax) 202 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax); 203 204 nparams = paramidx; 205 #undef FLOWC_PARAM 206 207 flowclen16 = flowc_wr_credits(nparams, &flowclen); 208 flowc->op_to_nparams = 209 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 210 FW_WR_COMPL_V(compl) | 211 FW_FLOWC_WR_NPARAMS_V(nparams)); 212 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | 213 FW_WR_FLOWID_V(csk->tid)); 214 215 return send_flowc_wr(sk, flowc, flowclen); 216 } 217 218 /* Copy IVs to WR */ 219 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb) 220 221 { 222 struct chtls_sock *csk; 223 unsigned char *iv_loc; 224 struct chtls_hws *hws; 225 unsigned char *ivs; 226 u16 number_of_ivs; 227 struct page *page; 228 int err = 0; 229 230 csk = rcu_dereference_sk_user_data(sk); 231 hws = &csk->tlshws; 232 number_of_ivs = nos_ivs(sk, skb->len); 233 234 if (number_of_ivs > MAX_IVS_PAGE) { 235 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs); 236 return -ENOMEM; 237 } 238 239 /* generate the IVs */ 240 ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC); 241 if (!ivs) 242 return -ENOMEM; 243 get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 244 245 if (skb_ulp_tls_iv_imm(skb)) { 246 /* send the IVs as immediate data in the WR */ 247 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs * 248 CIPHER_BLOCK_SIZE); 249 if (iv_loc) 250 memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 251 252 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE; 253 } else { 254 /* Send the IVs as sgls */ 255 /* Already accounted IV DSGL for credits */ 256 skb_shinfo(skb)->nr_frags--; 257 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0); 258 if (!page) { 259 pr_info("%s : Page allocation for IVs failed\n", 260 __func__); 261 err = -ENOMEM; 262 goto out; 263 } 264 memcpy(page_address(page), ivs, number_of_ivs * 265 CIPHER_BLOCK_SIZE); 266 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0, 267 number_of_ivs * CIPHER_BLOCK_SIZE); 268 hws->ivsize = 0; 269 } 270 out: 271 kfree(ivs); 272 return err; 273 } 274 275 /* Copy Key to WR */ 276 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb) 277 { 278 struct ulptx_sc_memrd *sc_memrd; 279 struct chtls_sock *csk; 280 struct chtls_dev *cdev; 281 struct ulptx_idata *sc; 282 struct chtls_hws *hws; 283 u32 immdlen; 284 int kaddr; 285 286 csk = rcu_dereference_sk_user_data(sk); 287 hws = &csk->tlshws; 288 cdev = csk->cdev; 289 290 immdlen = sizeof(*sc) + sizeof(*sc_memrd); 291 kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey); 292 sc = (struct ulptx_idata *)__skb_push(skb, immdlen); 293 if (sc) { 294 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); 295 sc->len = htonl(0); 296 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1); 297 sc_memrd->cmd_to_len = 298 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) | 299 ULP_TX_SC_MORE_V(1) | 300 ULPTX_LEN16_V(hws->keylen >> 4)); 301 sc_memrd->addr = htonl(kaddr); 302 } 303 } 304 305 static u64 tlstx_incr_seqnum(struct chtls_hws *hws) 306 { 307 return hws->tx_seq_no++; 308 } 309 310 static bool is_sg_request(const struct sk_buff *skb) 311 { 312 return skb->peeked || 313 (skb->len > MAX_IMM_ULPTX_WR_LEN); 314 } 315 316 /* 317 * Returns true if an sk_buff carries urgent data. 318 */ 319 static bool skb_urgent(struct sk_buff *skb) 320 { 321 return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG; 322 } 323 324 /* TLS content type for CPL SFO */ 325 static unsigned char tls_content_type(unsigned char content_type) 326 { 327 switch (content_type) { 328 case TLS_HDR_TYPE_CCS: 329 return CPL_TX_TLS_SFO_TYPE_CCS; 330 case TLS_HDR_TYPE_ALERT: 331 return CPL_TX_TLS_SFO_TYPE_ALERT; 332 case TLS_HDR_TYPE_HANDSHAKE: 333 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE; 334 case TLS_HDR_TYPE_HEARTBEAT: 335 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT; 336 } 337 return CPL_TX_TLS_SFO_TYPE_DATA; 338 } 339 340 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb, 341 int dlen, int tls_immd, u32 credits, 342 int expn, int pdus) 343 { 344 struct fw_tlstx_data_wr *req_wr; 345 struct cpl_tx_tls_sfo *req_cpl; 346 unsigned int wr_ulp_mode_force; 347 struct tls_scmd *updated_scmd; 348 unsigned char data_type; 349 struct chtls_sock *csk; 350 struct net_device *dev; 351 struct chtls_hws *hws; 352 struct tls_scmd *scmd; 353 struct adapter *adap; 354 unsigned char *req; 355 int immd_len; 356 int iv_imm; 357 int len; 358 359 csk = rcu_dereference_sk_user_data(sk); 360 iv_imm = skb_ulp_tls_iv_imm(skb); 361 dev = csk->egress_dev; 362 adap = netdev2adap(dev); 363 hws = &csk->tlshws; 364 scmd = &hws->scmd; 365 len = dlen + expn; 366 367 dlen = (dlen < hws->mfs) ? dlen : hws->mfs; 368 atomic_inc(&adap->chcr_stats.tls_pdu_tx); 369 370 updated_scmd = scmd; 371 updated_scmd->seqno_numivs &= 0xffffff80; 372 updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus); 373 hws->scmd = *updated_scmd; 374 375 req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo)); 376 req_cpl = (struct cpl_tx_tls_sfo *)req; 377 req = (unsigned char *)__skb_push(skb, (sizeof(struct 378 fw_tlstx_data_wr))); 379 380 req_wr = (struct fw_tlstx_data_wr *)req; 381 immd_len = (tls_immd ? dlen : 0); 382 req_wr->op_to_immdlen = 383 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) | 384 FW_TLSTX_DATA_WR_COMPL_V(1) | 385 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len)); 386 req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) | 387 FW_TLSTX_DATA_WR_LEN16_V(credits)); 388 wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS); 389 390 if (is_sg_request(skb)) 391 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 392 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 393 FW_OFLD_TX_DATA_WR_SHOVE_F); 394 395 req_wr->lsodisable_to_flags = 396 htonl(TX_ULP_MODE_V(ULP_MODE_TLS) | 397 TX_URG_V(skb_urgent(skb)) | 398 T6_TX_FORCE_F | wr_ulp_mode_force | 399 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 400 skb_queue_empty(&csk->txq))); 401 402 req_wr->ctxloc_to_exp = 403 htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) | 404 FW_TLSTX_DATA_WR_EXP_V(expn) | 405 FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) | 406 FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) | 407 FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4)); 408 409 /* Fill in the length */ 410 req_wr->plen = htonl(len); 411 req_wr->mfs = htons(hws->mfs); 412 req_wr->adjustedplen_pkd = 413 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen)); 414 req_wr->expinplenmax_pkd = 415 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion)); 416 req_wr->pdusinplenmax_pkd = 417 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus); 418 req_wr->r10 = 0; 419 420 data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type); 421 req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) | 422 CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) | 423 CPL_TX_TLS_SFO_CPL_LEN_V(2) | 424 CPL_TX_TLS_SFO_SEG_LEN_V(dlen)); 425 req_cpl->pld_len = htonl(len - expn); 426 427 req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V 428 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ? 429 TLS_HDR_TYPE_HEARTBEAT : 0) | 430 CPL_TX_TLS_SFO_PROTOVER_V(0)); 431 432 /* create the s-command */ 433 req_cpl->r1_lo = 0; 434 req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs); 435 req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen); 436 req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws)); 437 } 438 439 /* 440 * Calculate the TLS data expansion size 441 */ 442 static int chtls_expansion_size(struct sock *sk, int data_len, 443 int fullpdu, 444 unsigned short *pducnt) 445 { 446 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 447 struct chtls_hws *hws = &csk->tlshws; 448 struct tls_scmd *scmd = &hws->scmd; 449 int fragsize = hws->mfs; 450 int expnsize = 0; 451 int fragleft; 452 int fragcnt; 453 int expppdu; 454 455 if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) == 456 SCMD_CIPH_MODE_AES_GCM) { 457 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE + 458 TLS_HEADER_LENGTH; 459 460 if (fullpdu) { 461 *pducnt = data_len / (expppdu + fragsize); 462 if (*pducnt > 32) 463 *pducnt = 32; 464 else if (!*pducnt) 465 *pducnt = 1; 466 expnsize = (*pducnt) * expppdu; 467 return expnsize; 468 } 469 fragcnt = (data_len / fragsize); 470 expnsize = fragcnt * expppdu; 471 fragleft = data_len % fragsize; 472 if (fragleft > 0) 473 expnsize += expppdu; 474 } 475 return expnsize; 476 } 477 478 /* WR with IV, KEY and CPL SFO added */ 479 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb, 480 int tls_tx_imm, int tls_len, u32 credits) 481 { 482 unsigned short pdus_per_ulp = 0; 483 struct chtls_sock *csk; 484 struct chtls_hws *hws; 485 int expn_sz; 486 int pdus; 487 488 csk = rcu_dereference_sk_user_data(sk); 489 hws = &csk->tlshws; 490 pdus = DIV_ROUND_UP(tls_len, hws->mfs); 491 expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL); 492 if (!hws->compute) { 493 hws->expansion = chtls_expansion_size(sk, 494 hws->fcplenmax, 495 1, &pdus_per_ulp); 496 hws->pdus = pdus_per_ulp; 497 hws->adjustlen = hws->pdus * 498 ((hws->expansion / hws->pdus) + hws->mfs); 499 hws->compute = 1; 500 } 501 if (tls_copy_ivs(sk, skb)) 502 return; 503 tls_copy_tx_key(sk, skb); 504 tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus); 505 hws->tx_seq_no += (pdus - 1); 506 } 507 508 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, 509 unsigned int immdlen, int len, 510 u32 credits, u32 compl) 511 { 512 struct fw_ofld_tx_data_wr *req; 513 unsigned int wr_ulp_mode_force; 514 struct chtls_sock *csk; 515 unsigned int opcode; 516 517 csk = rcu_dereference_sk_user_data(sk); 518 opcode = FW_OFLD_TX_DATA_WR; 519 520 req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req)); 521 req->op_to_immdlen = htonl(WR_OP_V(opcode) | 522 FW_WR_COMPL_V(compl) | 523 FW_WR_IMMDLEN_V(immdlen)); 524 req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) | 525 FW_WR_LEN16_V(credits)); 526 527 wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode); 528 if (is_sg_request(skb)) 529 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 530 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 531 FW_OFLD_TX_DATA_WR_SHOVE_F); 532 533 req->tunnel_to_proxy = htonl(wr_ulp_mode_force | 534 TX_URG_V(skb_urgent(skb)) | 535 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 536 skb_queue_empty(&csk->txq))); 537 req->plen = htonl(len); 538 } 539 540 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb, 541 bool size) 542 { 543 int wr_size; 544 545 wr_size = TLS_WR_CPL_LEN; 546 wr_size += KEY_ON_MEM_SZ; 547 wr_size += ivs_size(csk->sk, skb); 548 549 if (size) 550 return wr_size; 551 552 /* frags counted for IV dsgl */ 553 if (!skb_ulp_tls_iv_imm(skb)) 554 skb_shinfo(skb)->nr_frags++; 555 556 return wr_size; 557 } 558 559 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb) 560 { 561 int length = skb->len; 562 563 if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN) 564 return false; 565 566 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 567 /* Check TLS header len for Immediate */ 568 if (csk->ulp_mode == ULP_MODE_TLS && 569 skb_ulp_tls_inline(skb)) 570 length += chtls_wr_size(csk, skb, true); 571 else 572 length += sizeof(struct fw_ofld_tx_data_wr); 573 574 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; 575 } 576 return true; 577 } 578 579 static unsigned int calc_tx_flits(const struct sk_buff *skb, 580 unsigned int immdlen) 581 { 582 unsigned int flits, cnt; 583 584 flits = immdlen / 8; /* headers */ 585 cnt = skb_shinfo(skb)->nr_frags; 586 if (skb_tail_pointer(skb) != skb_transport_header(skb)) 587 cnt++; 588 return flits + sgl_len(cnt); 589 } 590 591 static void arp_failure_discard(void *handle, struct sk_buff *skb) 592 { 593 kfree_skb(skb); 594 } 595 596 int chtls_push_frames(struct chtls_sock *csk, int comp) 597 { 598 struct chtls_hws *hws = &csk->tlshws; 599 struct tcp_sock *tp; 600 struct sk_buff *skb; 601 int total_size = 0; 602 struct sock *sk; 603 int wr_size; 604 605 wr_size = sizeof(struct fw_ofld_tx_data_wr); 606 sk = csk->sk; 607 tp = tcp_sk(sk); 608 609 if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) 610 return 0; 611 612 if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) 613 return 0; 614 615 while (csk->wr_credits && (skb = skb_peek(&csk->txq)) && 616 (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) || 617 skb_queue_len(&csk->txq) > 1)) { 618 unsigned int credit_len = skb->len; 619 unsigned int credits_needed; 620 unsigned int completion = 0; 621 int tls_len = skb->len;/* TLS data len before IV/key */ 622 unsigned int immdlen; 623 int len = skb->len; /* length [ulp bytes] inserted by hw */ 624 int flowclen16 = 0; 625 int tls_tx_imm = 0; 626 627 immdlen = skb->len; 628 if (!is_ofld_imm(csk, skb)) { 629 immdlen = skb_transport_offset(skb); 630 if (skb_ulp_tls_inline(skb)) 631 wr_size = chtls_wr_size(csk, skb, false); 632 credit_len = 8 * calc_tx_flits(skb, immdlen); 633 } else { 634 if (skb_ulp_tls_inline(skb)) { 635 wr_size = chtls_wr_size(csk, skb, false); 636 tls_tx_imm = 1; 637 } 638 } 639 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) 640 credit_len += wr_size; 641 credits_needed = DIV_ROUND_UP(credit_len, 16); 642 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) { 643 flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt, 644 tp->rcv_nxt); 645 if (flowclen16 <= 0) 646 break; 647 csk->wr_credits -= flowclen16; 648 csk->wr_unacked += flowclen16; 649 csk->wr_nondata += flowclen16; 650 csk_set_flag(csk, CSK_TX_DATA_SENT); 651 } 652 653 if (csk->wr_credits < credits_needed) { 654 if (skb_ulp_tls_inline(skb) && 655 !skb_ulp_tls_iv_imm(skb)) 656 skb_shinfo(skb)->nr_frags--; 657 break; 658 } 659 660 __skb_unlink(skb, &csk->txq); 661 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | 662 CPL_PRIORITY_DATA); 663 if (hws->ofld) 664 hws->txqid = (skb->queue_mapping >> 1); 665 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata); 666 csk->wr_credits -= credits_needed; 667 csk->wr_unacked += credits_needed; 668 csk->wr_nondata = 0; 669 enqueue_wr(csk, skb); 670 671 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 672 if ((comp && csk->wr_unacked == credits_needed) || 673 (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) || 674 csk->wr_unacked >= csk->wr_max_credits / 2) { 675 completion = 1; 676 csk->wr_unacked = 0; 677 } 678 if (skb_ulp_tls_inline(skb)) 679 make_tlstx_data_wr(sk, skb, tls_tx_imm, 680 tls_len, credits_needed); 681 else 682 make_tx_data_wr(sk, skb, immdlen, len, 683 credits_needed, completion); 684 tp->snd_nxt += len; 685 tp->lsndtime = tcp_jiffies32; 686 if (completion) 687 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR; 688 } else { 689 struct cpl_close_con_req *req = cplhdr(skb); 690 unsigned int cmd = CPL_OPCODE_G(ntohl 691 (OPCODE_TID(req))); 692 693 if (cmd == CPL_CLOSE_CON_REQ) 694 csk_set_flag(csk, 695 CSK_CLOSE_CON_REQUESTED); 696 697 if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) && 698 (csk->wr_unacked >= csk->wr_max_credits / 2)) { 699 req->wr.wr_hi |= htonl(FW_WR_COMPL_F); 700 csk->wr_unacked = 0; 701 } 702 } 703 total_size += skb->truesize; 704 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER) 705 csk_set_flag(csk, CSK_TX_WAIT_IDLE); 706 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 707 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry); 708 } 709 sk->sk_wmem_queued -= total_size; 710 return total_size; 711 } 712 713 static void mark_urg(struct tcp_sock *tp, int flags, 714 struct sk_buff *skb) 715 { 716 if (unlikely(flags & MSG_OOB)) { 717 tp->snd_up = tp->write_seq; 718 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG | 719 ULPCB_FLAG_BARRIER | 720 ULPCB_FLAG_NO_APPEND | 721 ULPCB_FLAG_NEED_HDR; 722 } 723 } 724 725 /* 726 * Returns true if a connection should send more data to TCP engine 727 */ 728 static bool should_push(struct sock *sk) 729 { 730 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 731 struct chtls_dev *cdev = csk->cdev; 732 struct tcp_sock *tp = tcp_sk(sk); 733 734 /* 735 * If we've released our offload resources there's nothing to do ... 736 */ 737 if (!cdev) 738 return false; 739 740 /* 741 * If there aren't any work requests in flight, or there isn't enough 742 * data in flight, or Nagle is off then send the current TX_DATA 743 * otherwise hold it and wait to accumulate more data. 744 */ 745 return csk->wr_credits == csk->wr_max_credits || 746 (tp->nonagle & TCP_NAGLE_OFF); 747 } 748 749 /* 750 * Returns true if a TCP socket is corked. 751 */ 752 static bool corked(const struct tcp_sock *tp, int flags) 753 { 754 return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK); 755 } 756 757 /* 758 * Returns true if a send should try to push new data. 759 */ 760 static bool send_should_push(struct sock *sk, int flags) 761 { 762 return should_push(sk) && !corked(tcp_sk(sk), flags); 763 } 764 765 void chtls_tcp_push(struct sock *sk, int flags) 766 { 767 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 768 int qlen = skb_queue_len(&csk->txq); 769 770 if (likely(qlen)) { 771 struct sk_buff *skb = skb_peek_tail(&csk->txq); 772 struct tcp_sock *tp = tcp_sk(sk); 773 774 mark_urg(tp, flags, skb); 775 776 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) && 777 corked(tp, flags)) { 778 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD; 779 return; 780 } 781 782 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD; 783 if (qlen == 1 && 784 ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 785 should_push(sk))) 786 chtls_push_frames(csk, 1); 787 } 788 } 789 790 /* 791 * Calculate the size for a new send sk_buff. It's maximum size so we can 792 * pack lots of data into it, unless we plan to send it immediately, in which 793 * case we size it more tightly. 794 * 795 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't 796 * arise in normal cases and when it does we are just wasting memory. 797 */ 798 static int select_size(struct sock *sk, int io_len, int flags, int len) 799 { 800 const int pgbreak = SKB_MAX_HEAD(len); 801 802 /* 803 * If the data wouldn't fit in the main body anyway, put only the 804 * header in the main body so it can use immediate data and place all 805 * the payload in page fragments. 806 */ 807 if (io_len > pgbreak) 808 return 0; 809 810 /* 811 * If we will be accumulating payload get a large main body. 812 */ 813 if (!send_should_push(sk, flags)) 814 return pgbreak; 815 816 return io_len; 817 } 818 819 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags) 820 { 821 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 822 struct tcp_sock *tp = tcp_sk(sk); 823 824 ULP_SKB_CB(skb)->seq = tp->write_seq; 825 ULP_SKB_CB(skb)->flags = flags; 826 __skb_queue_tail(&csk->txq, skb); 827 sk->sk_wmem_queued += skb->truesize; 828 829 if (TCP_PAGE(sk) && TCP_OFF(sk)) { 830 put_page(TCP_PAGE(sk)); 831 TCP_PAGE(sk) = NULL; 832 TCP_OFF(sk) = 0; 833 } 834 } 835 836 static struct sk_buff *get_tx_skb(struct sock *sk, int size) 837 { 838 struct sk_buff *skb; 839 840 skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation); 841 if (likely(skb)) { 842 skb_reserve(skb, TX_HEADER_LEN); 843 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 844 skb_reset_transport_header(skb); 845 } 846 return skb; 847 } 848 849 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy) 850 { 851 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 852 struct sk_buff *skb; 853 854 skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN + 855 KEY_ON_MEM_SZ + max_ivs_size(sk, size)), 856 sk->sk_allocation); 857 if (likely(skb)) { 858 skb_reserve(skb, (TX_TLSHDR_LEN + 859 KEY_ON_MEM_SZ + max_ivs_size(sk, size))); 860 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 861 skb_reset_transport_header(skb); 862 ULP_SKB_CB(skb)->ulp.tls.ofld = 1; 863 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type; 864 } 865 return skb; 866 } 867 868 static void tx_skb_finalize(struct sk_buff *skb) 869 { 870 struct ulp_skb_cb *cb = ULP_SKB_CB(skb); 871 872 if (!(cb->flags & ULPCB_FLAG_NO_HDR)) 873 cb->flags = ULPCB_FLAG_NEED_HDR; 874 cb->flags |= ULPCB_FLAG_NO_APPEND; 875 } 876 877 static void push_frames_if_head(struct sock *sk) 878 { 879 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 880 881 if (skb_queue_len(&csk->txq) == 1) 882 chtls_push_frames(csk, 1); 883 } 884 885 static int chtls_skb_copy_to_page_nocache(struct sock *sk, 886 struct iov_iter *from, 887 struct sk_buff *skb, 888 struct page *page, 889 int off, int copy) 890 { 891 int err; 892 893 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + 894 off, copy, skb->len); 895 if (err) 896 return err; 897 898 skb->len += copy; 899 skb->data_len += copy; 900 skb->truesize += copy; 901 sk->sk_wmem_queued += copy; 902 return 0; 903 } 904 905 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk) 906 { 907 return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0); 908 } 909 910 static int csk_wait_memory(struct chtls_dev *cdev, 911 struct sock *sk, long *timeo_p) 912 { 913 DEFINE_WAIT_FUNC(wait, woken_wake_function); 914 int err = 0; 915 long current_timeo; 916 long vm_wait = 0; 917 bool noblock; 918 919 current_timeo = *timeo_p; 920 noblock = (*timeo_p ? false : true); 921 if (csk_mem_free(cdev, sk)) { 922 current_timeo = get_random_u32_below(HZ / 5) + 2; 923 vm_wait = get_random_u32_below(HZ / 5) + 2; 924 } 925 926 add_wait_queue(sk_sleep(sk), &wait); 927 while (1) { 928 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 929 930 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 931 goto do_error; 932 if (!*timeo_p) { 933 if (noblock) 934 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 935 goto do_nonblock; 936 } 937 if (signal_pending(current)) 938 goto do_interrupted; 939 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 940 if (csk_mem_free(cdev, sk) && !vm_wait) 941 break; 942 943 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 944 sk->sk_write_pending++; 945 sk_wait_event(sk, ¤t_timeo, sk->sk_err || 946 (sk->sk_shutdown & SEND_SHUTDOWN) || 947 (csk_mem_free(cdev, sk) && !vm_wait), &wait); 948 sk->sk_write_pending--; 949 950 if (vm_wait) { 951 vm_wait -= current_timeo; 952 current_timeo = *timeo_p; 953 if (current_timeo != MAX_SCHEDULE_TIMEOUT) { 954 current_timeo -= vm_wait; 955 if (current_timeo < 0) 956 current_timeo = 0; 957 } 958 vm_wait = 0; 959 } 960 *timeo_p = current_timeo; 961 } 962 do_rm_wq: 963 remove_wait_queue(sk_sleep(sk), &wait); 964 return err; 965 do_error: 966 err = -EPIPE; 967 goto do_rm_wq; 968 do_nonblock: 969 err = -EAGAIN; 970 goto do_rm_wq; 971 do_interrupted: 972 err = sock_intr_errno(*timeo_p); 973 goto do_rm_wq; 974 } 975 976 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg, 977 unsigned char *record_type) 978 { 979 struct cmsghdr *cmsg; 980 int rc = -EINVAL; 981 982 for_each_cmsghdr(cmsg, msg) { 983 if (!CMSG_OK(msg, cmsg)) 984 return -EINVAL; 985 if (cmsg->cmsg_level != SOL_TLS) 986 continue; 987 988 switch (cmsg->cmsg_type) { 989 case TLS_SET_RECORD_TYPE: 990 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) 991 return -EINVAL; 992 993 if (msg->msg_flags & MSG_MORE) 994 return -EINVAL; 995 996 *record_type = *(unsigned char *)CMSG_DATA(cmsg); 997 rc = 0; 998 break; 999 default: 1000 return -EINVAL; 1001 } 1002 } 1003 1004 return rc; 1005 } 1006 1007 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 1008 { 1009 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1010 struct chtls_dev *cdev = csk->cdev; 1011 struct tcp_sock *tp = tcp_sk(sk); 1012 struct sk_buff *skb; 1013 int mss, flags, err; 1014 int recordsz = 0; 1015 int copied = 0; 1016 long timeo; 1017 1018 lock_sock(sk); 1019 flags = msg->msg_flags; 1020 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1021 1022 if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { 1023 err = sk_stream_wait_connect(sk, &timeo); 1024 if (err) 1025 goto out_err; 1026 } 1027 1028 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1029 err = -EPIPE; 1030 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1031 goto out_err; 1032 1033 mss = csk->mss; 1034 csk_set_flag(csk, CSK_TX_MORE_DATA); 1035 1036 while (msg_data_left(msg)) { 1037 int copy = 0; 1038 1039 skb = skb_peek_tail(&csk->txq); 1040 if (skb) { 1041 copy = mss - skb->len; 1042 skb->ip_summed = CHECKSUM_UNNECESSARY; 1043 } 1044 if (!csk_mem_free(cdev, sk)) 1045 goto wait_for_sndbuf; 1046 1047 if (is_tls_tx(csk) && !csk->tlshws.txleft) { 1048 unsigned char record_type = TLS_RECORD_TYPE_DATA; 1049 1050 if (unlikely(msg->msg_controllen)) { 1051 err = chtls_proccess_cmsg(sk, msg, 1052 &record_type); 1053 if (err) 1054 goto out_err; 1055 1056 /* Avoid appending tls handshake, alert to tls data */ 1057 if (skb) 1058 tx_skb_finalize(skb); 1059 } 1060 1061 recordsz = size; 1062 csk->tlshws.txleft = recordsz; 1063 csk->tlshws.type = record_type; 1064 } 1065 1066 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 1067 copy <= 0) { 1068 new_buf: 1069 if (skb) { 1070 tx_skb_finalize(skb); 1071 push_frames_if_head(sk); 1072 } 1073 1074 if (is_tls_tx(csk)) { 1075 skb = get_record_skb(sk, 1076 select_size(sk, 1077 recordsz, 1078 flags, 1079 TX_TLSHDR_LEN), 1080 false); 1081 } else { 1082 skb = get_tx_skb(sk, 1083 select_size(sk, size, flags, 1084 TX_HEADER_LEN)); 1085 } 1086 if (unlikely(!skb)) 1087 goto wait_for_memory; 1088 1089 skb->ip_summed = CHECKSUM_UNNECESSARY; 1090 copy = mss; 1091 } 1092 if (copy > size) 1093 copy = size; 1094 1095 if (msg->msg_flags & MSG_SPLICE_PAGES) { 1096 err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 1097 sk->sk_allocation); 1098 if (err < 0) { 1099 if (err == -EMSGSIZE) 1100 goto new_buf; 1101 goto do_fault; 1102 } 1103 copy = err; 1104 sk_wmem_queued_add(sk, copy); 1105 } else if (skb_tailroom(skb) > 0) { 1106 copy = min(copy, skb_tailroom(skb)); 1107 if (is_tls_tx(csk)) 1108 copy = min_t(int, copy, csk->tlshws.txleft); 1109 err = skb_add_data_nocache(sk, skb, 1110 &msg->msg_iter, copy); 1111 if (err) 1112 goto do_fault; 1113 } else { 1114 int i = skb_shinfo(skb)->nr_frags; 1115 struct page *page = TCP_PAGE(sk); 1116 int pg_size = PAGE_SIZE; 1117 int off = TCP_OFF(sk); 1118 bool merge; 1119 1120 if (page) 1121 pg_size = page_size(page); 1122 if (off < pg_size && 1123 skb_can_coalesce(skb, i, page, off)) { 1124 merge = true; 1125 goto copy; 1126 } 1127 merge = false; 1128 if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : 1129 MAX_SKB_FRAGS)) 1130 goto new_buf; 1131 1132 if (page && off == pg_size) { 1133 put_page(page); 1134 TCP_PAGE(sk) = page = NULL; 1135 pg_size = PAGE_SIZE; 1136 } 1137 1138 if (!page) { 1139 gfp_t gfp = sk->sk_allocation; 1140 int order = cdev->send_page_order; 1141 1142 if (order) { 1143 page = alloc_pages(gfp | __GFP_COMP | 1144 __GFP_NOWARN | 1145 __GFP_NORETRY, 1146 order); 1147 if (page) 1148 pg_size <<= order; 1149 } 1150 if (!page) { 1151 page = alloc_page(gfp); 1152 pg_size = PAGE_SIZE; 1153 } 1154 if (!page) 1155 goto wait_for_memory; 1156 off = 0; 1157 } 1158 copy: 1159 if (copy > pg_size - off) 1160 copy = pg_size - off; 1161 if (is_tls_tx(csk)) 1162 copy = min_t(int, copy, csk->tlshws.txleft); 1163 1164 err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter, 1165 skb, page, 1166 off, copy); 1167 if (unlikely(err)) { 1168 if (!TCP_PAGE(sk)) { 1169 TCP_PAGE(sk) = page; 1170 TCP_OFF(sk) = 0; 1171 } 1172 goto do_fault; 1173 } 1174 /* Update the skb. */ 1175 if (merge) { 1176 skb_frag_size_add( 1177 &skb_shinfo(skb)->frags[i - 1], 1178 copy); 1179 } else { 1180 skb_fill_page_desc(skb, i, page, off, copy); 1181 if (off + copy < pg_size) { 1182 /* space left keep page */ 1183 get_page(page); 1184 TCP_PAGE(sk) = page; 1185 } else { 1186 TCP_PAGE(sk) = NULL; 1187 } 1188 } 1189 TCP_OFF(sk) = off + copy; 1190 } 1191 if (unlikely(skb->len == mss)) 1192 tx_skb_finalize(skb); 1193 tp->write_seq += copy; 1194 copied += copy; 1195 size -= copy; 1196 1197 if (is_tls_tx(csk)) 1198 csk->tlshws.txleft -= copy; 1199 1200 if (corked(tp, flags) && 1201 (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) 1202 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; 1203 1204 if (size == 0) 1205 goto out; 1206 1207 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) 1208 push_frames_if_head(sk); 1209 continue; 1210 wait_for_sndbuf: 1211 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1212 wait_for_memory: 1213 err = csk_wait_memory(cdev, sk, &timeo); 1214 if (err) 1215 goto do_error; 1216 } 1217 out: 1218 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1219 if (copied) 1220 chtls_tcp_push(sk, flags); 1221 done: 1222 release_sock(sk); 1223 return copied; 1224 do_fault: 1225 if (!skb->len) { 1226 __skb_unlink(skb, &csk->txq); 1227 sk->sk_wmem_queued -= skb->truesize; 1228 __kfree_skb(skb); 1229 } 1230 do_error: 1231 if (copied) 1232 goto out; 1233 out_err: 1234 if (csk_conn_inline(csk)) 1235 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1236 copied = sk_stream_error(sk, flags, err); 1237 goto done; 1238 } 1239 1240 void chtls_splice_eof(struct socket *sock) 1241 { 1242 struct sock *sk = sock->sk; 1243 1244 lock_sock(sk); 1245 chtls_tcp_push(sk, 0); 1246 release_sock(sk); 1247 } 1248 1249 int chtls_sendpage(struct sock *sk, struct page *page, 1250 int offset, size_t size, int flags) 1251 { 1252 struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; 1253 struct bio_vec bvec; 1254 1255 if (flags & MSG_SENDPAGE_NOTLAST) 1256 msg.msg_flags |= MSG_MORE; 1257 1258 bvec_set_page(&bvec, page, size, offset); 1259 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); 1260 return chtls_sendmsg(sk, &msg, size); 1261 } 1262 1263 static void chtls_select_window(struct sock *sk) 1264 { 1265 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1266 struct tcp_sock *tp = tcp_sk(sk); 1267 unsigned int wnd = tp->rcv_wnd; 1268 1269 wnd = max_t(unsigned int, wnd, tcp_full_space(sk)); 1270 wnd = max_t(unsigned int, MIN_RCV_WND, wnd); 1271 1272 if (wnd > MAX_RCV_WND) 1273 wnd = MAX_RCV_WND; 1274 1275 /* 1276 * Check if we need to grow the receive window in response to an increase in 1277 * the socket's receive buffer size. Some applications increase the buffer 1278 * size dynamically and rely on the window to grow accordingly. 1279 */ 1280 1281 if (wnd > tp->rcv_wnd) { 1282 tp->rcv_wup -= wnd - tp->rcv_wnd; 1283 tp->rcv_wnd = wnd; 1284 /* Mark the receive window as updated */ 1285 csk_reset_flag(csk, CSK_UPDATE_RCV_WND); 1286 } 1287 } 1288 1289 /* 1290 * Send RX credits through an RX_DATA_ACK CPL message. We are permitted 1291 * to return without sending the message in case we cannot allocate 1292 * an sk_buff. Returns the number of credits sent. 1293 */ 1294 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits) 1295 { 1296 struct cpl_rx_data_ack *req; 1297 struct sk_buff *skb; 1298 1299 skb = alloc_skb(sizeof(*req), GFP_ATOMIC); 1300 if (!skb) 1301 return 0; 1302 __skb_put(skb, sizeof(*req)); 1303 req = (struct cpl_rx_data_ack *)skb->head; 1304 1305 set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id); 1306 INIT_TP_WR(req, csk->tid); 1307 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, 1308 csk->tid)); 1309 req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) | 1310 RX_FORCE_ACK_F); 1311 cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); 1312 return credits; 1313 } 1314 1315 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \ 1316 TCPF_FIN_WAIT1 | \ 1317 TCPF_FIN_WAIT2) 1318 1319 /* 1320 * Called after some received data has been read. It returns RX credits 1321 * to the HW for the amount of data processed. 1322 */ 1323 static void chtls_cleanup_rbuf(struct sock *sk, int copied) 1324 { 1325 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1326 struct tcp_sock *tp; 1327 int must_send; 1328 u32 credits; 1329 u32 thres; 1330 1331 thres = 15 * 1024; 1332 1333 if (!sk_in_state(sk, CREDIT_RETURN_STATE)) 1334 return; 1335 1336 chtls_select_window(sk); 1337 tp = tcp_sk(sk); 1338 credits = tp->copied_seq - tp->rcv_wup; 1339 if (unlikely(!credits)) 1340 return; 1341 1342 /* 1343 * For coalescing to work effectively ensure the receive window has 1344 * at least 16KB left. 1345 */ 1346 must_send = credits + 16384 >= tp->rcv_wnd; 1347 1348 if (must_send || credits >= thres) 1349 tp->rcv_wup += send_rx_credits(csk, credits); 1350 } 1351 1352 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1353 int flags, int *addr_len) 1354 { 1355 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1356 struct chtls_hws *hws = &csk->tlshws; 1357 struct net_device *dev = csk->egress_dev; 1358 struct adapter *adap = netdev2adap(dev); 1359 struct tcp_sock *tp = tcp_sk(sk); 1360 unsigned long avail; 1361 int buffers_freed; 1362 int copied = 0; 1363 int target; 1364 long timeo; 1365 1366 buffers_freed = 0; 1367 1368 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1369 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1370 1371 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1372 chtls_cleanup_rbuf(sk, copied); 1373 1374 do { 1375 struct sk_buff *skb; 1376 u32 offset = 0; 1377 1378 if (unlikely(tp->urg_data && 1379 tp->urg_seq == tp->copied_seq)) { 1380 if (copied) 1381 break; 1382 if (signal_pending(current)) { 1383 copied = timeo ? sock_intr_errno(timeo) : 1384 -EAGAIN; 1385 break; 1386 } 1387 } 1388 skb = skb_peek(&sk->sk_receive_queue); 1389 if (skb) 1390 goto found_ok_skb; 1391 if (csk->wr_credits && 1392 skb_queue_len(&csk->txq) && 1393 chtls_push_frames(csk, csk->wr_credits == 1394 csk->wr_max_credits)) 1395 sk->sk_write_space(sk); 1396 1397 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1398 break; 1399 1400 if (copied) { 1401 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1402 (sk->sk_shutdown & RCV_SHUTDOWN) || 1403 signal_pending(current)) 1404 break; 1405 1406 if (!timeo) 1407 break; 1408 } else { 1409 if (sock_flag(sk, SOCK_DONE)) 1410 break; 1411 if (sk->sk_err) { 1412 copied = sock_error(sk); 1413 break; 1414 } 1415 if (sk->sk_shutdown & RCV_SHUTDOWN) 1416 break; 1417 if (sk->sk_state == TCP_CLOSE) { 1418 copied = -ENOTCONN; 1419 break; 1420 } 1421 if (!timeo) { 1422 copied = -EAGAIN; 1423 break; 1424 } 1425 if (signal_pending(current)) { 1426 copied = sock_intr_errno(timeo); 1427 break; 1428 } 1429 } 1430 if (READ_ONCE(sk->sk_backlog.tail)) { 1431 release_sock(sk); 1432 lock_sock(sk); 1433 chtls_cleanup_rbuf(sk, copied); 1434 continue; 1435 } 1436 1437 if (copied >= target) 1438 break; 1439 chtls_cleanup_rbuf(sk, copied); 1440 sk_wait_data(sk, &timeo, NULL); 1441 continue; 1442 found_ok_skb: 1443 if (!skb->len) { 1444 skb_dst_set(skb, NULL); 1445 __skb_unlink(skb, &sk->sk_receive_queue); 1446 kfree_skb(skb); 1447 1448 if (!copied && !timeo) { 1449 copied = -EAGAIN; 1450 break; 1451 } 1452 1453 if (copied < target) { 1454 release_sock(sk); 1455 lock_sock(sk); 1456 continue; 1457 } 1458 break; 1459 } 1460 offset = hws->copied_seq; 1461 avail = skb->len - offset; 1462 if (len < avail) 1463 avail = len; 1464 1465 if (unlikely(tp->urg_data)) { 1466 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1467 1468 if (urg_offset < avail) { 1469 if (urg_offset) { 1470 avail = urg_offset; 1471 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1472 /* First byte is urgent, skip */ 1473 tp->copied_seq++; 1474 offset++; 1475 avail--; 1476 if (!avail) 1477 goto skip_copy; 1478 } 1479 } 1480 } 1481 /* Set record type if not already done. For a non-data record, 1482 * do not proceed if record type could not be copied. 1483 */ 1484 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1485 struct tls_hdr *thdr = (struct tls_hdr *)skb->data; 1486 int cerr = 0; 1487 1488 cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, 1489 sizeof(thdr->type), &thdr->type); 1490 1491 if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) { 1492 copied = -EIO; 1493 break; 1494 } 1495 /* don't send tls header, skip copy */ 1496 goto skip_copy; 1497 } 1498 1499 if (skb_copy_datagram_msg(skb, offset, msg, avail)) { 1500 if (!copied) { 1501 copied = -EFAULT; 1502 break; 1503 } 1504 } 1505 1506 copied += avail; 1507 len -= avail; 1508 hws->copied_seq += avail; 1509 skip_copy: 1510 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1511 tp->urg_data = 0; 1512 1513 if ((avail + offset) >= skb->len) { 1514 struct sk_buff *next_skb; 1515 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1516 tp->copied_seq += skb->len; 1517 hws->rcvpld = skb->hdr_len; 1518 } else { 1519 atomic_inc(&adap->chcr_stats.tls_pdu_rx); 1520 tp->copied_seq += hws->rcvpld; 1521 } 1522 chtls_free_skb(sk, skb); 1523 buffers_freed++; 1524 hws->copied_seq = 0; 1525 next_skb = skb_peek(&sk->sk_receive_queue); 1526 if (copied >= target && !next_skb) 1527 break; 1528 if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) 1529 break; 1530 } 1531 } while (len > 0); 1532 1533 if (buffers_freed) 1534 chtls_cleanup_rbuf(sk, copied); 1535 release_sock(sk); 1536 return copied; 1537 } 1538 1539 /* 1540 * Peek at data in a socket's receive buffer. 1541 */ 1542 static int peekmsg(struct sock *sk, struct msghdr *msg, 1543 size_t len, int flags) 1544 { 1545 struct tcp_sock *tp = tcp_sk(sk); 1546 u32 peek_seq, offset; 1547 struct sk_buff *skb; 1548 int copied = 0; 1549 size_t avail; /* amount of available data in current skb */ 1550 long timeo; 1551 1552 lock_sock(sk); 1553 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1554 peek_seq = tp->copied_seq; 1555 1556 do { 1557 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) { 1558 if (copied) 1559 break; 1560 if (signal_pending(current)) { 1561 copied = timeo ? sock_intr_errno(timeo) : 1562 -EAGAIN; 1563 break; 1564 } 1565 } 1566 1567 skb_queue_walk(&sk->sk_receive_queue, skb) { 1568 offset = peek_seq - ULP_SKB_CB(skb)->seq; 1569 if (offset < skb->len) 1570 goto found_ok_skb; 1571 } 1572 1573 /* empty receive queue */ 1574 if (copied) 1575 break; 1576 if (sock_flag(sk, SOCK_DONE)) 1577 break; 1578 if (sk->sk_err) { 1579 copied = sock_error(sk); 1580 break; 1581 } 1582 if (sk->sk_shutdown & RCV_SHUTDOWN) 1583 break; 1584 if (sk->sk_state == TCP_CLOSE) { 1585 copied = -ENOTCONN; 1586 break; 1587 } 1588 if (!timeo) { 1589 copied = -EAGAIN; 1590 break; 1591 } 1592 if (signal_pending(current)) { 1593 copied = sock_intr_errno(timeo); 1594 break; 1595 } 1596 1597 if (READ_ONCE(sk->sk_backlog.tail)) { 1598 /* Do not sleep, just process backlog. */ 1599 release_sock(sk); 1600 lock_sock(sk); 1601 } else { 1602 sk_wait_data(sk, &timeo, NULL); 1603 } 1604 1605 if (unlikely(peek_seq != tp->copied_seq)) { 1606 if (net_ratelimit()) 1607 pr_info("TCP(%s:%d), race in MSG_PEEK.\n", 1608 current->comm, current->pid); 1609 peek_seq = tp->copied_seq; 1610 } 1611 continue; 1612 1613 found_ok_skb: 1614 avail = skb->len - offset; 1615 if (len < avail) 1616 avail = len; 1617 /* 1618 * Do we have urgent data here? We need to skip over the 1619 * urgent byte. 1620 */ 1621 if (unlikely(tp->urg_data)) { 1622 u32 urg_offset = tp->urg_seq - peek_seq; 1623 1624 if (urg_offset < avail) { 1625 /* 1626 * The amount of data we are preparing to copy 1627 * contains urgent data. 1628 */ 1629 if (!urg_offset) { /* First byte is urgent */ 1630 if (!sock_flag(sk, SOCK_URGINLINE)) { 1631 peek_seq++; 1632 offset++; 1633 avail--; 1634 } 1635 if (!avail) 1636 continue; 1637 } else { 1638 /* stop short of the urgent data */ 1639 avail = urg_offset; 1640 } 1641 } 1642 } 1643 1644 /* 1645 * If MSG_TRUNC is specified the data is discarded. 1646 */ 1647 if (likely(!(flags & MSG_TRUNC))) 1648 if (skb_copy_datagram_msg(skb, offset, msg, len)) { 1649 if (!copied) { 1650 copied = -EFAULT; 1651 break; 1652 } 1653 } 1654 peek_seq += avail; 1655 copied += avail; 1656 len -= avail; 1657 } while (len > 0); 1658 1659 release_sock(sk); 1660 return copied; 1661 } 1662 1663 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1664 int flags, int *addr_len) 1665 { 1666 struct tcp_sock *tp = tcp_sk(sk); 1667 struct chtls_sock *csk; 1668 unsigned long avail; /* amount of available data in current skb */ 1669 int buffers_freed; 1670 int copied = 0; 1671 long timeo; 1672 int target; /* Read at least this many bytes */ 1673 1674 buffers_freed = 0; 1675 1676 if (unlikely(flags & MSG_OOB)) 1677 return tcp_prot.recvmsg(sk, msg, len, flags, addr_len); 1678 1679 if (unlikely(flags & MSG_PEEK)) 1680 return peekmsg(sk, msg, len, flags); 1681 1682 if (sk_can_busy_loop(sk) && 1683 skb_queue_empty_lockless(&sk->sk_receive_queue) && 1684 sk->sk_state == TCP_ESTABLISHED) 1685 sk_busy_loop(sk, flags & MSG_DONTWAIT); 1686 1687 lock_sock(sk); 1688 csk = rcu_dereference_sk_user_data(sk); 1689 1690 if (is_tls_rx(csk)) 1691 return chtls_pt_recvmsg(sk, msg, len, flags, addr_len); 1692 1693 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1694 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1695 1696 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1697 chtls_cleanup_rbuf(sk, copied); 1698 1699 do { 1700 struct sk_buff *skb; 1701 u32 offset; 1702 1703 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) { 1704 if (copied) 1705 break; 1706 if (signal_pending(current)) { 1707 copied = timeo ? sock_intr_errno(timeo) : 1708 -EAGAIN; 1709 break; 1710 } 1711 } 1712 1713 skb = skb_peek(&sk->sk_receive_queue); 1714 if (skb) 1715 goto found_ok_skb; 1716 1717 if (csk->wr_credits && 1718 skb_queue_len(&csk->txq) && 1719 chtls_push_frames(csk, csk->wr_credits == 1720 csk->wr_max_credits)) 1721 sk->sk_write_space(sk); 1722 1723 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1724 break; 1725 1726 if (copied) { 1727 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1728 (sk->sk_shutdown & RCV_SHUTDOWN) || 1729 signal_pending(current)) 1730 break; 1731 } else { 1732 if (sock_flag(sk, SOCK_DONE)) 1733 break; 1734 if (sk->sk_err) { 1735 copied = sock_error(sk); 1736 break; 1737 } 1738 if (sk->sk_shutdown & RCV_SHUTDOWN) 1739 break; 1740 if (sk->sk_state == TCP_CLOSE) { 1741 copied = -ENOTCONN; 1742 break; 1743 } 1744 if (!timeo) { 1745 copied = -EAGAIN; 1746 break; 1747 } 1748 if (signal_pending(current)) { 1749 copied = sock_intr_errno(timeo); 1750 break; 1751 } 1752 } 1753 1754 if (READ_ONCE(sk->sk_backlog.tail)) { 1755 release_sock(sk); 1756 lock_sock(sk); 1757 chtls_cleanup_rbuf(sk, copied); 1758 continue; 1759 } 1760 1761 if (copied >= target) 1762 break; 1763 chtls_cleanup_rbuf(sk, copied); 1764 sk_wait_data(sk, &timeo, NULL); 1765 continue; 1766 1767 found_ok_skb: 1768 if (!skb->len) { 1769 chtls_kfree_skb(sk, skb); 1770 if (!copied && !timeo) { 1771 copied = -EAGAIN; 1772 break; 1773 } 1774 1775 if (copied < target) 1776 continue; 1777 1778 break; 1779 } 1780 1781 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq; 1782 avail = skb->len - offset; 1783 if (len < avail) 1784 avail = len; 1785 1786 if (unlikely(tp->urg_data)) { 1787 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1788 1789 if (urg_offset < avail) { 1790 if (urg_offset) { 1791 avail = urg_offset; 1792 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1793 tp->copied_seq++; 1794 offset++; 1795 avail--; 1796 if (!avail) 1797 goto skip_copy; 1798 } 1799 } 1800 } 1801 1802 if (likely(!(flags & MSG_TRUNC))) { 1803 if (skb_copy_datagram_msg(skb, offset, 1804 msg, avail)) { 1805 if (!copied) { 1806 copied = -EFAULT; 1807 break; 1808 } 1809 } 1810 } 1811 1812 tp->copied_seq += avail; 1813 copied += avail; 1814 len -= avail; 1815 1816 skip_copy: 1817 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1818 tp->urg_data = 0; 1819 1820 if (avail + offset >= skb->len) { 1821 chtls_free_skb(sk, skb); 1822 buffers_freed++; 1823 1824 if (copied >= target && 1825 !skb_peek(&sk->sk_receive_queue)) 1826 break; 1827 } 1828 } while (len > 0); 1829 1830 if (buffers_freed) 1831 chtls_cleanup_rbuf(sk, copied); 1832 1833 release_sock(sk); 1834 return copied; 1835 } 1836