1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* RxRPC packet transmission 3 * 4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/net.h> 11 #include <linux/gfp.h> 12 #include <linux/skbuff.h> 13 #include <linux/export.h> 14 #include <net/sock.h> 15 #include <net/af_rxrpc.h> 16 #include "ar-internal.h" 17 18 struct rxrpc_ack_buffer { 19 struct rxrpc_wire_header whdr; 20 struct rxrpc_ackpacket ack; 21 u8 acks[255]; 22 u8 pad[3]; 23 struct rxrpc_ackinfo ackinfo; 24 }; 25 26 struct rxrpc_abort_buffer { 27 struct rxrpc_wire_header whdr; 28 __be32 abort_code; 29 }; 30 31 static const char rxrpc_keepalive_string[] = ""; 32 33 /* 34 * Increase Tx backoff on transmission failure and clear it on success. 35 */ 36 static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) 37 { 38 if (ret < 0) { 39 u16 tx_backoff = READ_ONCE(call->tx_backoff); 40 41 if (tx_backoff < HZ) 42 WRITE_ONCE(call->tx_backoff, tx_backoff + 1); 43 } else { 44 WRITE_ONCE(call->tx_backoff, 0); 45 } 46 } 47 48 /* 49 * Arrange for a keepalive ping a certain time after we last transmitted. This 50 * lets the far side know we're still interested in this call and helps keep 51 * the route through any intervening firewall open. 52 * 53 * Receiving a response to the ping will prevent the ->expect_rx_by timer from 54 * expiring. 55 */ 56 static void rxrpc_set_keepalive(struct rxrpc_call *call) 57 { 58 unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; 59 60 keepalive_at += now; 61 WRITE_ONCE(call->keepalive_at, keepalive_at); 62 rxrpc_reduce_call_timer(call, keepalive_at, now, 63 rxrpc_timer_set_for_keepalive); 64 } 65 66 /* 67 * Fill out an ACK packet. 68 */ 69 static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, 70 struct rxrpc_call *call, 71 struct rxrpc_ack_buffer *pkt, 72 rxrpc_seq_t *_hard_ack, 73 rxrpc_seq_t *_top, 74 u8 reason) 75 { 76 rxrpc_serial_t serial; 77 rxrpc_seq_t hard_ack, top, seq; 78 int ix; 79 u32 mtu, jmax; 80 u8 *ackp = pkt->acks; 81 82 /* Barrier against rxrpc_input_data(). */ 83 serial = call->ackr_serial; 84 hard_ack = READ_ONCE(call->rx_hard_ack); 85 top = smp_load_acquire(&call->rx_top); 86 *_hard_ack = hard_ack; 87 *_top = top; 88 89 pkt->ack.bufferSpace = htons(8); 90 pkt->ack.maxSkew = htons(0); 91 pkt->ack.firstPacket = htonl(hard_ack + 1); 92 pkt->ack.previousPacket = htonl(call->ackr_prev_seq); 93 pkt->ack.serial = htonl(serial); 94 pkt->ack.reason = reason; 95 pkt->ack.nAcks = top - hard_ack; 96 97 if (reason == RXRPC_ACK_PING) 98 pkt->whdr.flags |= RXRPC_REQUEST_ACK; 99 100 if (after(top, hard_ack)) { 101 seq = hard_ack + 1; 102 do { 103 ix = seq & RXRPC_RXTX_BUFF_MASK; 104 if (call->rxtx_buffer[ix]) 105 *ackp++ = RXRPC_ACK_TYPE_ACK; 106 else 107 *ackp++ = RXRPC_ACK_TYPE_NACK; 108 seq++; 109 } while (before_eq(seq, top)); 110 } 111 112 mtu = conn->params.peer->if_mtu; 113 mtu -= conn->params.peer->hdrsize; 114 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; 115 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 116 pkt->ackinfo.maxMTU = htonl(mtu); 117 pkt->ackinfo.rwind = htonl(call->rx_winsize); 118 pkt->ackinfo.jumbo_max = htonl(jmax); 119 120 *ackp++ = 0; 121 *ackp++ = 0; 122 *ackp++ = 0; 123 return top - hard_ack + 3; 124 } 125 126 /* 127 * Send an ACK call packet. 128 */ 129 int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, 130 rxrpc_serial_t *_serial) 131 { 132 struct rxrpc_connection *conn; 133 struct rxrpc_ack_buffer *pkt; 134 struct msghdr msg; 135 struct kvec iov[2]; 136 rxrpc_serial_t serial; 137 rxrpc_seq_t hard_ack, top; 138 size_t len, n; 139 int ret; 140 u8 reason; 141 142 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 143 return -ECONNRESET; 144 145 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 146 if (!pkt) 147 return -ENOMEM; 148 149 conn = call->conn; 150 151 msg.msg_name = &call->peer->srx.transport; 152 msg.msg_namelen = call->peer->srx.transport_len; 153 msg.msg_control = NULL; 154 msg.msg_controllen = 0; 155 msg.msg_flags = 0; 156 157 pkt->whdr.epoch = htonl(conn->proto.epoch); 158 pkt->whdr.cid = htonl(call->cid); 159 pkt->whdr.callNumber = htonl(call->call_id); 160 pkt->whdr.seq = 0; 161 pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; 162 pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; 163 pkt->whdr.userStatus = 0; 164 pkt->whdr.securityIndex = call->security_ix; 165 pkt->whdr._rsvd = 0; 166 pkt->whdr.serviceId = htons(call->service_id); 167 168 spin_lock_bh(&call->lock); 169 if (ping) { 170 reason = RXRPC_ACK_PING; 171 } else { 172 reason = call->ackr_reason; 173 if (!call->ackr_reason) { 174 spin_unlock_bh(&call->lock); 175 ret = 0; 176 goto out; 177 } 178 call->ackr_reason = 0; 179 } 180 n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); 181 182 spin_unlock_bh(&call->lock); 183 184 iov[0].iov_base = pkt; 185 iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; 186 iov[1].iov_base = &pkt->ackinfo; 187 iov[1].iov_len = sizeof(pkt->ackinfo); 188 len = iov[0].iov_len + iov[1].iov_len; 189 190 serial = atomic_inc_return(&conn->serial); 191 pkt->whdr.serial = htonl(serial); 192 trace_rxrpc_tx_ack(call->debug_id, serial, 193 ntohl(pkt->ack.firstPacket), 194 ntohl(pkt->ack.serial), 195 pkt->ack.reason, pkt->ack.nAcks); 196 if (_serial) 197 *_serial = serial; 198 199 if (ping) { 200 call->ping_serial = serial; 201 smp_wmb(); 202 /* We need to stick a time in before we send the packet in case 203 * the reply gets back before kernel_sendmsg() completes - but 204 * asking UDP to send the packet can take a relatively long 205 * time. 206 */ 207 call->ping_time = ktime_get_real(); 208 set_bit(RXRPC_CALL_PINGING, &call->flags); 209 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); 210 } 211 212 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 213 conn->params.peer->last_tx_at = ktime_get_seconds(); 214 if (ret < 0) 215 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 216 rxrpc_tx_point_call_ack); 217 else 218 trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr, 219 rxrpc_tx_point_call_ack); 220 rxrpc_tx_backoff(call, ret); 221 222 if (call->state < RXRPC_CALL_COMPLETE) { 223 if (ret < 0) { 224 if (ping) 225 clear_bit(RXRPC_CALL_PINGING, &call->flags); 226 rxrpc_propose_ACK(call, pkt->ack.reason, 227 ntohl(pkt->ack.serial), 228 false, true, 229 rxrpc_propose_ack_retry_tx); 230 } else { 231 spin_lock_bh(&call->lock); 232 if (after(hard_ack, call->ackr_consumed)) 233 call->ackr_consumed = hard_ack; 234 if (after(top, call->ackr_seen)) 235 call->ackr_seen = top; 236 spin_unlock_bh(&call->lock); 237 } 238 239 rxrpc_set_keepalive(call); 240 } 241 242 out: 243 kfree(pkt); 244 return ret; 245 } 246 247 /* 248 * Send an ABORT call packet. 249 */ 250 int rxrpc_send_abort_packet(struct rxrpc_call *call) 251 { 252 struct rxrpc_connection *conn; 253 struct rxrpc_abort_buffer pkt; 254 struct msghdr msg; 255 struct kvec iov[1]; 256 rxrpc_serial_t serial; 257 int ret; 258 259 /* Don't bother sending aborts for a client call once the server has 260 * hard-ACK'd all of its request data. After that point, we're not 261 * going to stop the operation proceeding, and whilst we might limit 262 * the reply, it's not worth it if we can send a new call on the same 263 * channel instead, thereby closing off this call. 264 */ 265 if (rxrpc_is_client_call(call) && 266 test_bit(RXRPC_CALL_TX_LAST, &call->flags)) 267 return 0; 268 269 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 270 return -ECONNRESET; 271 272 conn = call->conn; 273 274 msg.msg_name = &call->peer->srx.transport; 275 msg.msg_namelen = call->peer->srx.transport_len; 276 msg.msg_control = NULL; 277 msg.msg_controllen = 0; 278 msg.msg_flags = 0; 279 280 pkt.whdr.epoch = htonl(conn->proto.epoch); 281 pkt.whdr.cid = htonl(call->cid); 282 pkt.whdr.callNumber = htonl(call->call_id); 283 pkt.whdr.seq = 0; 284 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; 285 pkt.whdr.flags = conn->out_clientflag; 286 pkt.whdr.userStatus = 0; 287 pkt.whdr.securityIndex = call->security_ix; 288 pkt.whdr._rsvd = 0; 289 pkt.whdr.serviceId = htons(call->service_id); 290 pkt.abort_code = htonl(call->abort_code); 291 292 iov[0].iov_base = &pkt; 293 iov[0].iov_len = sizeof(pkt); 294 295 serial = atomic_inc_return(&conn->serial); 296 pkt.whdr.serial = htonl(serial); 297 298 ret = kernel_sendmsg(conn->params.local->socket, 299 &msg, iov, 1, sizeof(pkt)); 300 conn->params.peer->last_tx_at = ktime_get_seconds(); 301 if (ret < 0) 302 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 303 rxrpc_tx_point_call_abort); 304 else 305 trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, 306 rxrpc_tx_point_call_abort); 307 rxrpc_tx_backoff(call, ret); 308 return ret; 309 } 310 311 /* 312 * send a packet through the transport endpoint 313 */ 314 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, 315 bool retrans) 316 { 317 struct rxrpc_connection *conn = call->conn; 318 struct rxrpc_wire_header whdr; 319 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 320 struct msghdr msg; 321 struct kvec iov[2]; 322 rxrpc_serial_t serial; 323 size_t len; 324 int ret; 325 326 _enter(",{%d}", skb->len); 327 328 /* Each transmission of a Tx packet needs a new serial number */ 329 serial = atomic_inc_return(&conn->serial); 330 331 whdr.epoch = htonl(conn->proto.epoch); 332 whdr.cid = htonl(call->cid); 333 whdr.callNumber = htonl(call->call_id); 334 whdr.seq = htonl(sp->hdr.seq); 335 whdr.serial = htonl(serial); 336 whdr.type = RXRPC_PACKET_TYPE_DATA; 337 whdr.flags = sp->hdr.flags; 338 whdr.userStatus = 0; 339 whdr.securityIndex = call->security_ix; 340 whdr._rsvd = htons(sp->hdr._rsvd); 341 whdr.serviceId = htons(call->service_id); 342 343 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && 344 sp->hdr.seq == 1) 345 whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; 346 347 iov[0].iov_base = &whdr; 348 iov[0].iov_len = sizeof(whdr); 349 iov[1].iov_base = skb->head; 350 iov[1].iov_len = skb->len; 351 len = iov[0].iov_len + iov[1].iov_len; 352 353 msg.msg_name = &call->peer->srx.transport; 354 msg.msg_namelen = call->peer->srx.transport_len; 355 msg.msg_control = NULL; 356 msg.msg_controllen = 0; 357 msg.msg_flags = 0; 358 359 /* If our RTT cache needs working on, request an ACK. Also request 360 * ACKs if a DATA packet appears to have been lost. 361 * 362 * However, we mustn't request an ACK on the last reply packet of a 363 * service call, lest OpenAFS incorrectly send us an ACK with some 364 * soft-ACKs in it and then never follow up with a proper hard ACK. 365 */ 366 if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) || 367 rxrpc_to_server(sp) 368 ) && 369 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || 370 retrans || 371 call->cong_mode == RXRPC_CALL_SLOW_START || 372 (call->peer->rtt_count < 3 && sp->hdr.seq & 1) || 373 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 374 ktime_get_real()))) 375 whdr.flags |= RXRPC_REQUEST_ACK; 376 377 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 378 static int lose; 379 if ((lose++ & 7) == 7) { 380 ret = 0; 381 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, 382 whdr.flags, retrans, true); 383 goto done; 384 } 385 } 386 387 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, 388 false); 389 390 /* send the packet with the don't fragment bit set if we currently 391 * think it's small enough */ 392 if (iov[1].iov_len >= call->peer->maxdata) 393 goto send_fragmentable; 394 395 down_read(&conn->params.local->defrag_sem); 396 397 sp->hdr.serial = serial; 398 smp_wmb(); /* Set serial before timestamp */ 399 skb->tstamp = ktime_get_real(); 400 401 /* send the packet by UDP 402 * - returns -EMSGSIZE if UDP would have to fragment the packet 403 * to go out of the interface 404 * - in which case, we'll have processed the ICMP error 405 * message and update the peer record 406 */ 407 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 408 conn->params.peer->last_tx_at = ktime_get_seconds(); 409 410 up_read(&conn->params.local->defrag_sem); 411 if (ret < 0) 412 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 413 rxrpc_tx_point_call_data_nofrag); 414 else 415 trace_rxrpc_tx_packet(call->debug_id, &whdr, 416 rxrpc_tx_point_call_data_nofrag); 417 rxrpc_tx_backoff(call, ret); 418 if (ret == -EMSGSIZE) 419 goto send_fragmentable; 420 421 done: 422 if (ret >= 0) { 423 if (whdr.flags & RXRPC_REQUEST_ACK) { 424 call->peer->rtt_last_req = skb->tstamp; 425 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 426 if (call->peer->rtt_count > 1) { 427 unsigned long nowj = jiffies, ack_lost_at; 428 429 ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); 430 ack_lost_at += nowj; 431 WRITE_ONCE(call->ack_lost_at, ack_lost_at); 432 rxrpc_reduce_call_timer(call, ack_lost_at, nowj, 433 rxrpc_timer_set_for_lost_ack); 434 } 435 } 436 437 if (sp->hdr.seq == 1 && 438 !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, 439 &call->flags)) { 440 unsigned long nowj = jiffies, expect_rx_by; 441 442 expect_rx_by = nowj + call->next_rx_timo; 443 WRITE_ONCE(call->expect_rx_by, expect_rx_by); 444 rxrpc_reduce_call_timer(call, expect_rx_by, nowj, 445 rxrpc_timer_set_for_normal); 446 } 447 448 rxrpc_set_keepalive(call); 449 } else { 450 /* Cancel the call if the initial transmission fails, 451 * particularly if that's due to network routing issues that 452 * aren't going away anytime soon. The layer above can arrange 453 * the retransmission. 454 */ 455 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) 456 rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 457 RX_USER_ABORT, ret); 458 } 459 460 _leave(" = %d [%u]", ret, call->peer->maxdata); 461 return ret; 462 463 send_fragmentable: 464 /* attempt to send this message with fragmentation enabled */ 465 _debug("send fragment"); 466 467 down_write(&conn->params.local->defrag_sem); 468 469 sp->hdr.serial = serial; 470 smp_wmb(); /* Set serial before timestamp */ 471 skb->tstamp = ktime_get_real(); 472 473 switch (conn->params.local->srx.transport.family) { 474 case AF_INET6: 475 case AF_INET: 476 ip_sock_set_mtu_discover(conn->params.local->socket->sk, 477 IP_PMTUDISC_DONT); 478 ret = kernel_sendmsg(conn->params.local->socket, &msg, 479 iov, 2, len); 480 conn->params.peer->last_tx_at = ktime_get_seconds(); 481 482 ip_sock_set_mtu_discover(conn->params.local->socket->sk, 483 IP_PMTUDISC_DO); 484 break; 485 486 default: 487 BUG(); 488 } 489 490 if (ret < 0) 491 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 492 rxrpc_tx_point_call_data_frag); 493 else 494 trace_rxrpc_tx_packet(call->debug_id, &whdr, 495 rxrpc_tx_point_call_data_frag); 496 rxrpc_tx_backoff(call, ret); 497 498 up_write(&conn->params.local->defrag_sem); 499 goto done; 500 } 501 502 /* 503 * reject packets through the local endpoint 504 */ 505 void rxrpc_reject_packets(struct rxrpc_local *local) 506 { 507 struct sockaddr_rxrpc srx; 508 struct rxrpc_skb_priv *sp; 509 struct rxrpc_wire_header whdr; 510 struct sk_buff *skb; 511 struct msghdr msg; 512 struct kvec iov[2]; 513 size_t size; 514 __be32 code; 515 int ret, ioc; 516 517 _enter("%d", local->debug_id); 518 519 iov[0].iov_base = &whdr; 520 iov[0].iov_len = sizeof(whdr); 521 iov[1].iov_base = &code; 522 iov[1].iov_len = sizeof(code); 523 524 msg.msg_name = &srx.transport; 525 msg.msg_control = NULL; 526 msg.msg_controllen = 0; 527 msg.msg_flags = 0; 528 529 memset(&whdr, 0, sizeof(whdr)); 530 531 while ((skb = skb_dequeue(&local->reject_queue))) { 532 rxrpc_see_skb(skb, rxrpc_skb_seen); 533 sp = rxrpc_skb(skb); 534 535 switch (skb->mark) { 536 case RXRPC_SKB_MARK_REJECT_BUSY: 537 whdr.type = RXRPC_PACKET_TYPE_BUSY; 538 size = sizeof(whdr); 539 ioc = 1; 540 break; 541 case RXRPC_SKB_MARK_REJECT_ABORT: 542 whdr.type = RXRPC_PACKET_TYPE_ABORT; 543 code = htonl(skb->priority); 544 size = sizeof(whdr) + sizeof(code); 545 ioc = 2; 546 break; 547 default: 548 rxrpc_free_skb(skb, rxrpc_skb_freed); 549 continue; 550 } 551 552 if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { 553 msg.msg_namelen = srx.transport_len; 554 555 whdr.epoch = htonl(sp->hdr.epoch); 556 whdr.cid = htonl(sp->hdr.cid); 557 whdr.callNumber = htonl(sp->hdr.callNumber); 558 whdr.serviceId = htons(sp->hdr.serviceId); 559 whdr.flags = sp->hdr.flags; 560 whdr.flags ^= RXRPC_CLIENT_INITIATED; 561 whdr.flags &= RXRPC_CLIENT_INITIATED; 562 563 ret = kernel_sendmsg(local->socket, &msg, 564 iov, ioc, size); 565 if (ret < 0) 566 trace_rxrpc_tx_fail(local->debug_id, 0, ret, 567 rxrpc_tx_point_reject); 568 else 569 trace_rxrpc_tx_packet(local->debug_id, &whdr, 570 rxrpc_tx_point_reject); 571 } 572 573 rxrpc_free_skb(skb, rxrpc_skb_freed); 574 } 575 576 _leave(""); 577 } 578 579 /* 580 * Send a VERSION reply to a peer as a keepalive. 581 */ 582 void rxrpc_send_keepalive(struct rxrpc_peer *peer) 583 { 584 struct rxrpc_wire_header whdr; 585 struct msghdr msg; 586 struct kvec iov[2]; 587 size_t len; 588 int ret; 589 590 _enter(""); 591 592 msg.msg_name = &peer->srx.transport; 593 msg.msg_namelen = peer->srx.transport_len; 594 msg.msg_control = NULL; 595 msg.msg_controllen = 0; 596 msg.msg_flags = 0; 597 598 whdr.epoch = htonl(peer->local->rxnet->epoch); 599 whdr.cid = 0; 600 whdr.callNumber = 0; 601 whdr.seq = 0; 602 whdr.serial = 0; 603 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ 604 whdr.flags = RXRPC_LAST_PACKET; 605 whdr.userStatus = 0; 606 whdr.securityIndex = 0; 607 whdr._rsvd = 0; 608 whdr.serviceId = 0; 609 610 iov[0].iov_base = &whdr; 611 iov[0].iov_len = sizeof(whdr); 612 iov[1].iov_base = (char *)rxrpc_keepalive_string; 613 iov[1].iov_len = sizeof(rxrpc_keepalive_string); 614 615 len = iov[0].iov_len + iov[1].iov_len; 616 617 _proto("Tx VERSION (keepalive)"); 618 619 ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); 620 if (ret < 0) 621 trace_rxrpc_tx_fail(peer->debug_id, 0, ret, 622 rxrpc_tx_point_version_keepalive); 623 else 624 trace_rxrpc_tx_packet(peer->debug_id, &whdr, 625 rxrpc_tx_point_version_keepalive); 626 627 peer->last_tx_at = ktime_get_seconds(); 628 _leave(""); 629 } 630