1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* RxRPC packet transmission 3 * 4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/net.h> 11 #include <linux/gfp.h> 12 #include <linux/skbuff.h> 13 #include <linux/export.h> 14 #include <net/sock.h> 15 #include <net/af_rxrpc.h> 16 #include "ar-internal.h" 17 18 struct rxrpc_ack_buffer { 19 struct rxrpc_wire_header whdr; 20 struct rxrpc_ackpacket ack; 21 u8 acks[255]; 22 u8 pad[3]; 23 struct rxrpc_ackinfo ackinfo; 24 }; 25 26 struct rxrpc_abort_buffer { 27 struct rxrpc_wire_header whdr; 28 __be32 abort_code; 29 }; 30 31 static const char rxrpc_keepalive_string[] = ""; 32 33 /* 34 * Increase Tx backoff on transmission failure and clear it on success. 35 */ 36 static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) 37 { 38 if (ret < 0) { 39 u16 tx_backoff = READ_ONCE(call->tx_backoff); 40 41 if (tx_backoff < HZ) 42 WRITE_ONCE(call->tx_backoff, tx_backoff + 1); 43 } else { 44 WRITE_ONCE(call->tx_backoff, 0); 45 } 46 } 47 48 /* 49 * Arrange for a keepalive ping a certain time after we last transmitted. This 50 * lets the far side know we're still interested in this call and helps keep 51 * the route through any intervening firewall open. 52 * 53 * Receiving a response to the ping will prevent the ->expect_rx_by timer from 54 * expiring. 55 */ 56 static void rxrpc_set_keepalive(struct rxrpc_call *call) 57 { 58 unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; 59 60 keepalive_at += now; 61 WRITE_ONCE(call->keepalive_at, keepalive_at); 62 rxrpc_reduce_call_timer(call, keepalive_at, now, 63 rxrpc_timer_set_for_keepalive); 64 } 65 66 /* 67 * Fill out an ACK packet. 68 */ 69 static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, 70 struct rxrpc_call *call, 71 struct rxrpc_ack_buffer *pkt, 72 rxrpc_seq_t *_hard_ack, 73 rxrpc_seq_t *_top, 74 u8 reason) 75 { 76 rxrpc_serial_t serial; 77 rxrpc_seq_t hard_ack, top, seq; 78 int ix; 79 u32 mtu, jmax; 80 u8 *ackp = pkt->acks; 81 82 /* Barrier against rxrpc_input_data(). */ 83 serial = call->ackr_serial; 84 hard_ack = READ_ONCE(call->rx_hard_ack); 85 top = smp_load_acquire(&call->rx_top); 86 *_hard_ack = hard_ack; 87 *_top = top; 88 89 pkt->ack.bufferSpace = htons(8); 90 pkt->ack.maxSkew = htons(0); 91 pkt->ack.firstPacket = htonl(hard_ack + 1); 92 pkt->ack.previousPacket = htonl(call->ackr_prev_seq); 93 pkt->ack.serial = htonl(serial); 94 pkt->ack.reason = reason; 95 pkt->ack.nAcks = top - hard_ack; 96 97 if (reason == RXRPC_ACK_PING) 98 pkt->whdr.flags |= RXRPC_REQUEST_ACK; 99 100 if (after(top, hard_ack)) { 101 seq = hard_ack + 1; 102 do { 103 ix = seq & RXRPC_RXTX_BUFF_MASK; 104 if (call->rxtx_buffer[ix]) 105 *ackp++ = RXRPC_ACK_TYPE_ACK; 106 else 107 *ackp++ = RXRPC_ACK_TYPE_NACK; 108 seq++; 109 } while (before_eq(seq, top)); 110 } 111 112 mtu = conn->params.peer->if_mtu; 113 mtu -= conn->params.peer->hdrsize; 114 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; 115 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 116 pkt->ackinfo.maxMTU = htonl(mtu); 117 pkt->ackinfo.rwind = htonl(call->rx_winsize); 118 pkt->ackinfo.jumbo_max = htonl(jmax); 119 120 *ackp++ = 0; 121 *ackp++ = 0; 122 *ackp++ = 0; 123 return top - hard_ack + 3; 124 } 125 126 /* 127 * Send an ACK call packet. 128 */ 129 int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, 130 rxrpc_serial_t *_serial) 131 { 132 struct rxrpc_connection *conn; 133 struct rxrpc_ack_buffer *pkt; 134 struct msghdr msg; 135 struct kvec iov[2]; 136 rxrpc_serial_t serial; 137 rxrpc_seq_t hard_ack, top; 138 size_t len, n; 139 int ret; 140 u8 reason; 141 142 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 143 return -ECONNRESET; 144 145 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 146 if (!pkt) 147 return -ENOMEM; 148 149 conn = call->conn; 150 151 msg.msg_name = &call->peer->srx.transport; 152 msg.msg_namelen = call->peer->srx.transport_len; 153 msg.msg_control = NULL; 154 msg.msg_controllen = 0; 155 msg.msg_flags = 0; 156 157 pkt->whdr.epoch = htonl(conn->proto.epoch); 158 pkt->whdr.cid = htonl(call->cid); 159 pkt->whdr.callNumber = htonl(call->call_id); 160 pkt->whdr.seq = 0; 161 pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; 162 pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; 163 pkt->whdr.userStatus = 0; 164 pkt->whdr.securityIndex = call->security_ix; 165 pkt->whdr._rsvd = 0; 166 pkt->whdr.serviceId = htons(call->service_id); 167 168 spin_lock_bh(&call->lock); 169 if (ping) { 170 reason = RXRPC_ACK_PING; 171 } else { 172 reason = call->ackr_reason; 173 if (!call->ackr_reason) { 174 spin_unlock_bh(&call->lock); 175 ret = 0; 176 goto out; 177 } 178 call->ackr_reason = 0; 179 } 180 n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); 181 182 spin_unlock_bh(&call->lock); 183 184 iov[0].iov_base = pkt; 185 iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; 186 iov[1].iov_base = &pkt->ackinfo; 187 iov[1].iov_len = sizeof(pkt->ackinfo); 188 len = iov[0].iov_len + iov[1].iov_len; 189 190 serial = atomic_inc_return(&conn->serial); 191 pkt->whdr.serial = htonl(serial); 192 trace_rxrpc_tx_ack(call->debug_id, serial, 193 ntohl(pkt->ack.firstPacket), 194 ntohl(pkt->ack.serial), 195 pkt->ack.reason, pkt->ack.nAcks); 196 if (_serial) 197 *_serial = serial; 198 199 if (ping) { 200 call->ping_serial = serial; 201 smp_wmb(); 202 /* We need to stick a time in before we send the packet in case 203 * the reply gets back before kernel_sendmsg() completes - but 204 * asking UDP to send the packet can take a relatively long 205 * time. 206 */ 207 call->ping_time = ktime_get_real(); 208 set_bit(RXRPC_CALL_PINGING, &call->flags); 209 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); 210 } 211 212 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 213 conn->params.peer->last_tx_at = ktime_get_seconds(); 214 if (ret < 0) 215 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 216 rxrpc_tx_point_call_ack); 217 else 218 trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr, 219 rxrpc_tx_point_call_ack); 220 rxrpc_tx_backoff(call, ret); 221 222 if (call->state < RXRPC_CALL_COMPLETE) { 223 if (ret < 0) { 224 if (ping) 225 clear_bit(RXRPC_CALL_PINGING, &call->flags); 226 rxrpc_propose_ACK(call, pkt->ack.reason, 227 ntohl(pkt->ack.serial), 228 false, true, 229 rxrpc_propose_ack_retry_tx); 230 } else { 231 spin_lock_bh(&call->lock); 232 if (after(hard_ack, call->ackr_consumed)) 233 call->ackr_consumed = hard_ack; 234 if (after(top, call->ackr_seen)) 235 call->ackr_seen = top; 236 spin_unlock_bh(&call->lock); 237 } 238 239 rxrpc_set_keepalive(call); 240 } 241 242 out: 243 kfree(pkt); 244 return ret; 245 } 246 247 /* 248 * Send an ABORT call packet. 249 */ 250 int rxrpc_send_abort_packet(struct rxrpc_call *call) 251 { 252 struct rxrpc_connection *conn; 253 struct rxrpc_abort_buffer pkt; 254 struct msghdr msg; 255 struct kvec iov[1]; 256 rxrpc_serial_t serial; 257 int ret; 258 259 /* Don't bother sending aborts for a client call once the server has 260 * hard-ACK'd all of its request data. After that point, we're not 261 * going to stop the operation proceeding, and whilst we might limit 262 * the reply, it's not worth it if we can send a new call on the same 263 * channel instead, thereby closing off this call. 264 */ 265 if (rxrpc_is_client_call(call) && 266 test_bit(RXRPC_CALL_TX_LAST, &call->flags)) 267 return 0; 268 269 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 270 return -ECONNRESET; 271 272 conn = call->conn; 273 274 msg.msg_name = &call->peer->srx.transport; 275 msg.msg_namelen = call->peer->srx.transport_len; 276 msg.msg_control = NULL; 277 msg.msg_controllen = 0; 278 msg.msg_flags = 0; 279 280 pkt.whdr.epoch = htonl(conn->proto.epoch); 281 pkt.whdr.cid = htonl(call->cid); 282 pkt.whdr.callNumber = htonl(call->call_id); 283 pkt.whdr.seq = 0; 284 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; 285 pkt.whdr.flags = conn->out_clientflag; 286 pkt.whdr.userStatus = 0; 287 pkt.whdr.securityIndex = call->security_ix; 288 pkt.whdr._rsvd = 0; 289 pkt.whdr.serviceId = htons(call->service_id); 290 pkt.abort_code = htonl(call->abort_code); 291 292 iov[0].iov_base = &pkt; 293 iov[0].iov_len = sizeof(pkt); 294 295 serial = atomic_inc_return(&conn->serial); 296 pkt.whdr.serial = htonl(serial); 297 298 ret = kernel_sendmsg(conn->params.local->socket, 299 &msg, iov, 1, sizeof(pkt)); 300 conn->params.peer->last_tx_at = ktime_get_seconds(); 301 if (ret < 0) 302 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 303 rxrpc_tx_point_call_abort); 304 else 305 trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, 306 rxrpc_tx_point_call_abort); 307 rxrpc_tx_backoff(call, ret); 308 return ret; 309 } 310 311 /* 312 * send a packet through the transport endpoint 313 */ 314 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, 315 bool retrans) 316 { 317 struct rxrpc_connection *conn = call->conn; 318 struct rxrpc_wire_header whdr; 319 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 320 struct msghdr msg; 321 struct kvec iov[2]; 322 rxrpc_serial_t serial; 323 size_t len; 324 int ret, opt; 325 326 _enter(",{%d}", skb->len); 327 328 /* Each transmission of a Tx packet needs a new serial number */ 329 serial = atomic_inc_return(&conn->serial); 330 331 whdr.epoch = htonl(conn->proto.epoch); 332 whdr.cid = htonl(call->cid); 333 whdr.callNumber = htonl(call->call_id); 334 whdr.seq = htonl(sp->hdr.seq); 335 whdr.serial = htonl(serial); 336 whdr.type = RXRPC_PACKET_TYPE_DATA; 337 whdr.flags = sp->hdr.flags; 338 whdr.userStatus = 0; 339 whdr.securityIndex = call->security_ix; 340 whdr._rsvd = htons(sp->hdr._rsvd); 341 whdr.serviceId = htons(call->service_id); 342 343 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && 344 sp->hdr.seq == 1) 345 whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; 346 347 iov[0].iov_base = &whdr; 348 iov[0].iov_len = sizeof(whdr); 349 iov[1].iov_base = skb->head; 350 iov[1].iov_len = skb->len; 351 len = iov[0].iov_len + iov[1].iov_len; 352 353 msg.msg_name = &call->peer->srx.transport; 354 msg.msg_namelen = call->peer->srx.transport_len; 355 msg.msg_control = NULL; 356 msg.msg_controllen = 0; 357 msg.msg_flags = 0; 358 359 /* If our RTT cache needs working on, request an ACK. Also request 360 * ACKs if a DATA packet appears to have been lost. 361 * 362 * However, we mustn't request an ACK on the last reply packet of a 363 * service call, lest OpenAFS incorrectly send us an ACK with some 364 * soft-ACKs in it and then never follow up with a proper hard ACK. 365 */ 366 if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) || 367 rxrpc_to_server(sp) 368 ) && 369 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || 370 retrans || 371 call->cong_mode == RXRPC_CALL_SLOW_START || 372 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || 373 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 374 ktime_get_real()))) 375 whdr.flags |= RXRPC_REQUEST_ACK; 376 377 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 378 static int lose; 379 if ((lose++ & 7) == 7) { 380 ret = 0; 381 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, 382 whdr.flags, retrans, true); 383 goto done; 384 } 385 } 386 387 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, 388 false); 389 390 /* send the packet with the don't fragment bit set if we currently 391 * think it's small enough */ 392 if (iov[1].iov_len >= call->peer->maxdata) 393 goto send_fragmentable; 394 395 down_read(&conn->params.local->defrag_sem); 396 397 sp->hdr.serial = serial; 398 smp_wmb(); /* Set serial before timestamp */ 399 skb->tstamp = ktime_get_real(); 400 401 /* send the packet by UDP 402 * - returns -EMSGSIZE if UDP would have to fragment the packet 403 * to go out of the interface 404 * - in which case, we'll have processed the ICMP error 405 * message and update the peer record 406 */ 407 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 408 conn->params.peer->last_tx_at = ktime_get_seconds(); 409 410 up_read(&conn->params.local->defrag_sem); 411 if (ret < 0) 412 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 413 rxrpc_tx_point_call_data_nofrag); 414 else 415 trace_rxrpc_tx_packet(call->debug_id, &whdr, 416 rxrpc_tx_point_call_data_nofrag); 417 rxrpc_tx_backoff(call, ret); 418 if (ret == -EMSGSIZE) 419 goto send_fragmentable; 420 421 done: 422 if (ret >= 0) { 423 if (whdr.flags & RXRPC_REQUEST_ACK) { 424 call->peer->rtt_last_req = skb->tstamp; 425 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 426 if (call->peer->rtt_usage > 1) { 427 unsigned long nowj = jiffies, ack_lost_at; 428 429 ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); 430 if (ack_lost_at < 1) 431 ack_lost_at = 1; 432 433 ack_lost_at += nowj; 434 WRITE_ONCE(call->ack_lost_at, ack_lost_at); 435 rxrpc_reduce_call_timer(call, ack_lost_at, nowj, 436 rxrpc_timer_set_for_lost_ack); 437 } 438 } 439 440 if (sp->hdr.seq == 1 && 441 !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, 442 &call->flags)) { 443 unsigned long nowj = jiffies, expect_rx_by; 444 445 expect_rx_by = nowj + call->next_rx_timo; 446 WRITE_ONCE(call->expect_rx_by, expect_rx_by); 447 rxrpc_reduce_call_timer(call, expect_rx_by, nowj, 448 rxrpc_timer_set_for_normal); 449 } 450 451 rxrpc_set_keepalive(call); 452 } else { 453 /* Cancel the call if the initial transmission fails, 454 * particularly if that's due to network routing issues that 455 * aren't going away anytime soon. The layer above can arrange 456 * the retransmission. 457 */ 458 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) 459 rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 460 RX_USER_ABORT, ret); 461 } 462 463 _leave(" = %d [%u]", ret, call->peer->maxdata); 464 return ret; 465 466 send_fragmentable: 467 /* attempt to send this message with fragmentation enabled */ 468 _debug("send fragment"); 469 470 down_write(&conn->params.local->defrag_sem); 471 472 sp->hdr.serial = serial; 473 smp_wmb(); /* Set serial before timestamp */ 474 skb->tstamp = ktime_get_real(); 475 476 switch (conn->params.local->srx.transport.family) { 477 case AF_INET6: 478 case AF_INET: 479 opt = IP_PMTUDISC_DONT; 480 kernel_setsockopt(conn->params.local->socket, 481 SOL_IP, IP_MTU_DISCOVER, 482 (char *)&opt, sizeof(opt)); 483 ret = kernel_sendmsg(conn->params.local->socket, &msg, 484 iov, 2, len); 485 conn->params.peer->last_tx_at = ktime_get_seconds(); 486 487 opt = IP_PMTUDISC_DO; 488 kernel_setsockopt(conn->params.local->socket, 489 SOL_IP, IP_MTU_DISCOVER, 490 (char *)&opt, sizeof(opt)); 491 break; 492 493 default: 494 BUG(); 495 } 496 497 if (ret < 0) 498 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 499 rxrpc_tx_point_call_data_frag); 500 else 501 trace_rxrpc_tx_packet(call->debug_id, &whdr, 502 rxrpc_tx_point_call_data_frag); 503 rxrpc_tx_backoff(call, ret); 504 505 up_write(&conn->params.local->defrag_sem); 506 goto done; 507 } 508 509 /* 510 * reject packets through the local endpoint 511 */ 512 void rxrpc_reject_packets(struct rxrpc_local *local) 513 { 514 struct sockaddr_rxrpc srx; 515 struct rxrpc_skb_priv *sp; 516 struct rxrpc_wire_header whdr; 517 struct sk_buff *skb; 518 struct msghdr msg; 519 struct kvec iov[2]; 520 size_t size; 521 __be32 code; 522 int ret, ioc; 523 524 _enter("%d", local->debug_id); 525 526 iov[0].iov_base = &whdr; 527 iov[0].iov_len = sizeof(whdr); 528 iov[1].iov_base = &code; 529 iov[1].iov_len = sizeof(code); 530 531 msg.msg_name = &srx.transport; 532 msg.msg_control = NULL; 533 msg.msg_controllen = 0; 534 msg.msg_flags = 0; 535 536 memset(&whdr, 0, sizeof(whdr)); 537 538 while ((skb = skb_dequeue(&local->reject_queue))) { 539 rxrpc_see_skb(skb, rxrpc_skb_seen); 540 sp = rxrpc_skb(skb); 541 542 switch (skb->mark) { 543 case RXRPC_SKB_MARK_REJECT_BUSY: 544 whdr.type = RXRPC_PACKET_TYPE_BUSY; 545 size = sizeof(whdr); 546 ioc = 1; 547 break; 548 case RXRPC_SKB_MARK_REJECT_ABORT: 549 whdr.type = RXRPC_PACKET_TYPE_ABORT; 550 code = htonl(skb->priority); 551 size = sizeof(whdr) + sizeof(code); 552 ioc = 2; 553 break; 554 default: 555 rxrpc_free_skb(skb, rxrpc_skb_freed); 556 continue; 557 } 558 559 if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { 560 msg.msg_namelen = srx.transport_len; 561 562 whdr.epoch = htonl(sp->hdr.epoch); 563 whdr.cid = htonl(sp->hdr.cid); 564 whdr.callNumber = htonl(sp->hdr.callNumber); 565 whdr.serviceId = htons(sp->hdr.serviceId); 566 whdr.flags = sp->hdr.flags; 567 whdr.flags ^= RXRPC_CLIENT_INITIATED; 568 whdr.flags &= RXRPC_CLIENT_INITIATED; 569 570 ret = kernel_sendmsg(local->socket, &msg, 571 iov, ioc, size); 572 if (ret < 0) 573 trace_rxrpc_tx_fail(local->debug_id, 0, ret, 574 rxrpc_tx_point_reject); 575 else 576 trace_rxrpc_tx_packet(local->debug_id, &whdr, 577 rxrpc_tx_point_reject); 578 } 579 580 rxrpc_free_skb(skb, rxrpc_skb_freed); 581 } 582 583 _leave(""); 584 } 585 586 /* 587 * Send a VERSION reply to a peer as a keepalive. 588 */ 589 void rxrpc_send_keepalive(struct rxrpc_peer *peer) 590 { 591 struct rxrpc_wire_header whdr; 592 struct msghdr msg; 593 struct kvec iov[2]; 594 size_t len; 595 int ret; 596 597 _enter(""); 598 599 msg.msg_name = &peer->srx.transport; 600 msg.msg_namelen = peer->srx.transport_len; 601 msg.msg_control = NULL; 602 msg.msg_controllen = 0; 603 msg.msg_flags = 0; 604 605 whdr.epoch = htonl(peer->local->rxnet->epoch); 606 whdr.cid = 0; 607 whdr.callNumber = 0; 608 whdr.seq = 0; 609 whdr.serial = 0; 610 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ 611 whdr.flags = RXRPC_LAST_PACKET; 612 whdr.userStatus = 0; 613 whdr.securityIndex = 0; 614 whdr._rsvd = 0; 615 whdr.serviceId = 0; 616 617 iov[0].iov_base = &whdr; 618 iov[0].iov_len = sizeof(whdr); 619 iov[1].iov_base = (char *)rxrpc_keepalive_string; 620 iov[1].iov_len = sizeof(rxrpc_keepalive_string); 621 622 len = iov[0].iov_len + iov[1].iov_len; 623 624 _proto("Tx VERSION (keepalive)"); 625 626 ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); 627 if (ret < 0) 628 trace_rxrpc_tx_fail(peer->debug_id, 0, ret, 629 rxrpc_tx_point_version_keepalive); 630 else 631 trace_rxrpc_tx_packet(peer->debug_id, &whdr, 632 rxrpc_tx_point_version_keepalive); 633 634 peer->last_tx_at = ktime_get_seconds(); 635 _leave(""); 636 } 637