1 /* RxRPC packet transmission 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/net.h> 15 #include <linux/gfp.h> 16 #include <linux/skbuff.h> 17 #include <linux/export.h> 18 #include <net/sock.h> 19 #include <net/af_rxrpc.h> 20 #include "ar-internal.h" 21 22 struct rxrpc_ack_buffer { 23 struct rxrpc_wire_header whdr; 24 struct rxrpc_ackpacket ack; 25 u8 acks[255]; 26 u8 pad[3]; 27 struct rxrpc_ackinfo ackinfo; 28 }; 29 30 struct rxrpc_abort_buffer { 31 struct rxrpc_wire_header whdr; 32 __be32 abort_code; 33 }; 34 35 static const char rxrpc_keepalive_string[] = ""; 36 37 /* 38 * Arrange for a keepalive ping a certain time after we last transmitted. This 39 * lets the far side know we're still interested in this call and helps keep 40 * the route through any intervening firewall open. 41 * 42 * Receiving a response to the ping will prevent the ->expect_rx_by timer from 43 * expiring. 44 */ 45 static void rxrpc_set_keepalive(struct rxrpc_call *call) 46 { 47 unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; 48 49 keepalive_at += now; 50 WRITE_ONCE(call->keepalive_at, keepalive_at); 51 rxrpc_reduce_call_timer(call, keepalive_at, now, 52 rxrpc_timer_set_for_keepalive); 53 } 54 55 /* 56 * Fill out an ACK packet. 57 */ 58 static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, 59 struct rxrpc_call *call, 60 struct rxrpc_ack_buffer *pkt, 61 rxrpc_seq_t *_hard_ack, 62 rxrpc_seq_t *_top, 63 u8 reason) 64 { 65 rxrpc_serial_t serial; 66 rxrpc_seq_t hard_ack, top, seq; 67 int ix; 68 u32 mtu, jmax; 69 u8 *ackp = pkt->acks; 70 71 /* Barrier against rxrpc_input_data(). */ 72 serial = call->ackr_serial; 73 hard_ack = READ_ONCE(call->rx_hard_ack); 74 top = smp_load_acquire(&call->rx_top); 75 *_hard_ack = hard_ack; 76 *_top = top; 77 78 pkt->ack.bufferSpace = htons(8); 79 pkt->ack.maxSkew = htons(call->ackr_skew); 80 pkt->ack.firstPacket = htonl(hard_ack + 1); 81 pkt->ack.previousPacket = htonl(call->ackr_prev_seq); 82 pkt->ack.serial = htonl(serial); 83 pkt->ack.reason = reason; 84 pkt->ack.nAcks = top - hard_ack; 85 86 if (reason == RXRPC_ACK_PING) 87 pkt->whdr.flags |= RXRPC_REQUEST_ACK; 88 89 if (after(top, hard_ack)) { 90 seq = hard_ack + 1; 91 do { 92 ix = seq & RXRPC_RXTX_BUFF_MASK; 93 if (call->rxtx_buffer[ix]) 94 *ackp++ = RXRPC_ACK_TYPE_ACK; 95 else 96 *ackp++ = RXRPC_ACK_TYPE_NACK; 97 seq++; 98 } while (before_eq(seq, top)); 99 } 100 101 mtu = conn->params.peer->if_mtu; 102 mtu -= conn->params.peer->hdrsize; 103 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; 104 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 105 pkt->ackinfo.maxMTU = htonl(mtu); 106 pkt->ackinfo.rwind = htonl(call->rx_winsize); 107 pkt->ackinfo.jumbo_max = htonl(jmax); 108 109 *ackp++ = 0; 110 *ackp++ = 0; 111 *ackp++ = 0; 112 return top - hard_ack + 3; 113 } 114 115 /* 116 * Send an ACK call packet. 117 */ 118 int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, 119 rxrpc_serial_t *_serial) 120 { 121 struct rxrpc_connection *conn = NULL; 122 struct rxrpc_ack_buffer *pkt; 123 struct msghdr msg; 124 struct kvec iov[2]; 125 rxrpc_serial_t serial; 126 rxrpc_seq_t hard_ack, top; 127 ktime_t now; 128 size_t len, n; 129 int ret; 130 u8 reason; 131 132 spin_lock_bh(&call->lock); 133 if (call->conn) 134 conn = rxrpc_get_connection_maybe(call->conn); 135 spin_unlock_bh(&call->lock); 136 if (!conn) 137 return -ECONNRESET; 138 139 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 140 if (!pkt) { 141 rxrpc_put_connection(conn); 142 return -ENOMEM; 143 } 144 145 msg.msg_name = &call->peer->srx.transport; 146 msg.msg_namelen = call->peer->srx.transport_len; 147 msg.msg_control = NULL; 148 msg.msg_controllen = 0; 149 msg.msg_flags = 0; 150 151 pkt->whdr.epoch = htonl(conn->proto.epoch); 152 pkt->whdr.cid = htonl(call->cid); 153 pkt->whdr.callNumber = htonl(call->call_id); 154 pkt->whdr.seq = 0; 155 pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; 156 pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; 157 pkt->whdr.userStatus = 0; 158 pkt->whdr.securityIndex = call->security_ix; 159 pkt->whdr._rsvd = 0; 160 pkt->whdr.serviceId = htons(call->service_id); 161 162 spin_lock_bh(&call->lock); 163 if (ping) { 164 reason = RXRPC_ACK_PING; 165 } else { 166 reason = call->ackr_reason; 167 if (!call->ackr_reason) { 168 spin_unlock_bh(&call->lock); 169 ret = 0; 170 goto out; 171 } 172 call->ackr_reason = 0; 173 } 174 n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); 175 176 spin_unlock_bh(&call->lock); 177 178 iov[0].iov_base = pkt; 179 iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; 180 iov[1].iov_base = &pkt->ackinfo; 181 iov[1].iov_len = sizeof(pkt->ackinfo); 182 len = iov[0].iov_len + iov[1].iov_len; 183 184 serial = atomic_inc_return(&conn->serial); 185 pkt->whdr.serial = htonl(serial); 186 trace_rxrpc_tx_ack(call->debug_id, serial, 187 ntohl(pkt->ack.firstPacket), 188 ntohl(pkt->ack.serial), 189 pkt->ack.reason, pkt->ack.nAcks); 190 if (_serial) 191 *_serial = serial; 192 193 if (ping) { 194 call->ping_serial = serial; 195 smp_wmb(); 196 /* We need to stick a time in before we send the packet in case 197 * the reply gets back before kernel_sendmsg() completes - but 198 * asking UDP to send the packet can take a relatively long 199 * time, so we update the time after, on the assumption that 200 * the packet transmission is more likely to happen towards the 201 * end of the kernel_sendmsg() call. 202 */ 203 call->ping_time = ktime_get_real(); 204 set_bit(RXRPC_CALL_PINGING, &call->flags); 205 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); 206 } 207 208 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 209 now = ktime_get_real(); 210 if (ping) 211 call->ping_time = now; 212 conn->params.peer->last_tx_at = ktime_get_seconds(); 213 if (ret < 0) 214 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 215 rxrpc_tx_point_call_ack); 216 else 217 trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr, 218 rxrpc_tx_point_call_ack); 219 220 if (call->state < RXRPC_CALL_COMPLETE) { 221 if (ret < 0) { 222 if (ping) 223 clear_bit(RXRPC_CALL_PINGING, &call->flags); 224 rxrpc_propose_ACK(call, pkt->ack.reason, 225 ntohs(pkt->ack.maxSkew), 226 ntohl(pkt->ack.serial), 227 true, true, 228 rxrpc_propose_ack_retry_tx); 229 } else { 230 spin_lock_bh(&call->lock); 231 if (after(hard_ack, call->ackr_consumed)) 232 call->ackr_consumed = hard_ack; 233 if (after(top, call->ackr_seen)) 234 call->ackr_seen = top; 235 spin_unlock_bh(&call->lock); 236 } 237 238 rxrpc_set_keepalive(call); 239 } 240 241 out: 242 rxrpc_put_connection(conn); 243 kfree(pkt); 244 return ret; 245 } 246 247 /* 248 * Send an ABORT call packet. 249 */ 250 int rxrpc_send_abort_packet(struct rxrpc_call *call) 251 { 252 struct rxrpc_connection *conn = NULL; 253 struct rxrpc_abort_buffer pkt; 254 struct msghdr msg; 255 struct kvec iov[1]; 256 rxrpc_serial_t serial; 257 int ret; 258 259 /* Don't bother sending aborts for a client call once the server has 260 * hard-ACK'd all of its request data. After that point, we're not 261 * going to stop the operation proceeding, and whilst we might limit 262 * the reply, it's not worth it if we can send a new call on the same 263 * channel instead, thereby closing off this call. 264 */ 265 if (rxrpc_is_client_call(call) && 266 test_bit(RXRPC_CALL_TX_LAST, &call->flags)) 267 return 0; 268 269 spin_lock_bh(&call->lock); 270 if (call->conn) 271 conn = rxrpc_get_connection_maybe(call->conn); 272 spin_unlock_bh(&call->lock); 273 if (!conn) 274 return -ECONNRESET; 275 276 msg.msg_name = &call->peer->srx.transport; 277 msg.msg_namelen = call->peer->srx.transport_len; 278 msg.msg_control = NULL; 279 msg.msg_controllen = 0; 280 msg.msg_flags = 0; 281 282 pkt.whdr.epoch = htonl(conn->proto.epoch); 283 pkt.whdr.cid = htonl(call->cid); 284 pkt.whdr.callNumber = htonl(call->call_id); 285 pkt.whdr.seq = 0; 286 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; 287 pkt.whdr.flags = conn->out_clientflag; 288 pkt.whdr.userStatus = 0; 289 pkt.whdr.securityIndex = call->security_ix; 290 pkt.whdr._rsvd = 0; 291 pkt.whdr.serviceId = htons(call->service_id); 292 pkt.abort_code = htonl(call->abort_code); 293 294 iov[0].iov_base = &pkt; 295 iov[0].iov_len = sizeof(pkt); 296 297 serial = atomic_inc_return(&conn->serial); 298 pkt.whdr.serial = htonl(serial); 299 300 ret = kernel_sendmsg(conn->params.local->socket, 301 &msg, iov, 1, sizeof(pkt)); 302 conn->params.peer->last_tx_at = ktime_get_seconds(); 303 if (ret < 0) 304 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 305 rxrpc_tx_point_call_abort); 306 else 307 trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, 308 rxrpc_tx_point_call_abort); 309 310 311 rxrpc_put_connection(conn); 312 return ret; 313 } 314 315 /* 316 * send a packet through the transport endpoint 317 */ 318 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, 319 bool retrans) 320 { 321 struct rxrpc_connection *conn = call->conn; 322 struct rxrpc_wire_header whdr; 323 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 324 struct msghdr msg; 325 struct kvec iov[2]; 326 rxrpc_serial_t serial; 327 size_t len; 328 bool lost = false; 329 int ret, opt; 330 331 _enter(",{%d}", skb->len); 332 333 /* Each transmission of a Tx packet needs a new serial number */ 334 serial = atomic_inc_return(&conn->serial); 335 336 whdr.epoch = htonl(conn->proto.epoch); 337 whdr.cid = htonl(call->cid); 338 whdr.callNumber = htonl(call->call_id); 339 whdr.seq = htonl(sp->hdr.seq); 340 whdr.serial = htonl(serial); 341 whdr.type = RXRPC_PACKET_TYPE_DATA; 342 whdr.flags = sp->hdr.flags; 343 whdr.userStatus = 0; 344 whdr.securityIndex = call->security_ix; 345 whdr._rsvd = htons(sp->hdr._rsvd); 346 whdr.serviceId = htons(call->service_id); 347 348 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && 349 sp->hdr.seq == 1) 350 whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; 351 352 iov[0].iov_base = &whdr; 353 iov[0].iov_len = sizeof(whdr); 354 iov[1].iov_base = skb->head; 355 iov[1].iov_len = skb->len; 356 len = iov[0].iov_len + iov[1].iov_len; 357 358 msg.msg_name = &call->peer->srx.transport; 359 msg.msg_namelen = call->peer->srx.transport_len; 360 msg.msg_control = NULL; 361 msg.msg_controllen = 0; 362 msg.msg_flags = 0; 363 364 /* If our RTT cache needs working on, request an ACK. Also request 365 * ACKs if a DATA packet appears to have been lost. 366 */ 367 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && 368 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || 369 retrans || 370 call->cong_mode == RXRPC_CALL_SLOW_START || 371 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || 372 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 373 ktime_get_real()))) 374 whdr.flags |= RXRPC_REQUEST_ACK; 375 376 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 377 static int lose; 378 if ((lose++ & 7) == 7) { 379 ret = 0; 380 lost = true; 381 goto done; 382 } 383 } 384 385 _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); 386 387 /* send the packet with the don't fragment bit set if we currently 388 * think it's small enough */ 389 if (iov[1].iov_len >= call->peer->maxdata) 390 goto send_fragmentable; 391 392 down_read(&conn->params.local->defrag_sem); 393 /* send the packet by UDP 394 * - returns -EMSGSIZE if UDP would have to fragment the packet 395 * to go out of the interface 396 * - in which case, we'll have processed the ICMP error 397 * message and update the peer record 398 */ 399 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 400 conn->params.peer->last_tx_at = ktime_get_seconds(); 401 402 up_read(&conn->params.local->defrag_sem); 403 if (ret < 0) 404 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 405 rxrpc_tx_point_call_data_nofrag); 406 else 407 trace_rxrpc_tx_packet(call->debug_id, &whdr, 408 rxrpc_tx_point_call_data_nofrag); 409 if (ret == -EMSGSIZE) 410 goto send_fragmentable; 411 412 done: 413 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, 414 retrans, lost); 415 if (ret >= 0) { 416 ktime_t now = ktime_get_real(); 417 skb->tstamp = now; 418 smp_wmb(); 419 sp->hdr.serial = serial; 420 if (whdr.flags & RXRPC_REQUEST_ACK) { 421 call->peer->rtt_last_req = now; 422 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 423 if (call->peer->rtt_usage > 1) { 424 unsigned long nowj = jiffies, ack_lost_at; 425 426 ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); 427 if (ack_lost_at < 1) 428 ack_lost_at = 1; 429 430 ack_lost_at += nowj; 431 WRITE_ONCE(call->ack_lost_at, ack_lost_at); 432 rxrpc_reduce_call_timer(call, ack_lost_at, nowj, 433 rxrpc_timer_set_for_lost_ack); 434 } 435 } 436 437 if (sp->hdr.seq == 1 && 438 !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, 439 &call->flags)) { 440 unsigned long nowj = jiffies, expect_rx_by; 441 442 expect_rx_by = nowj + call->next_rx_timo; 443 WRITE_ONCE(call->expect_rx_by, expect_rx_by); 444 rxrpc_reduce_call_timer(call, expect_rx_by, nowj, 445 rxrpc_timer_set_for_normal); 446 } 447 } 448 449 rxrpc_set_keepalive(call); 450 451 _leave(" = %d [%u]", ret, call->peer->maxdata); 452 return ret; 453 454 send_fragmentable: 455 /* attempt to send this message with fragmentation enabled */ 456 _debug("send fragment"); 457 458 down_write(&conn->params.local->defrag_sem); 459 460 switch (conn->params.local->srx.transport.family) { 461 case AF_INET: 462 opt = IP_PMTUDISC_DONT; 463 ret = kernel_setsockopt(conn->params.local->socket, 464 SOL_IP, IP_MTU_DISCOVER, 465 (char *)&opt, sizeof(opt)); 466 if (ret == 0) { 467 ret = kernel_sendmsg(conn->params.local->socket, &msg, 468 iov, 2, len); 469 conn->params.peer->last_tx_at = ktime_get_seconds(); 470 471 opt = IP_PMTUDISC_DO; 472 kernel_setsockopt(conn->params.local->socket, SOL_IP, 473 IP_MTU_DISCOVER, 474 (char *)&opt, sizeof(opt)); 475 } 476 break; 477 478 #ifdef CONFIG_AF_RXRPC_IPV6 479 case AF_INET6: 480 opt = IPV6_PMTUDISC_DONT; 481 ret = kernel_setsockopt(conn->params.local->socket, 482 SOL_IPV6, IPV6_MTU_DISCOVER, 483 (char *)&opt, sizeof(opt)); 484 if (ret == 0) { 485 ret = kernel_sendmsg(conn->params.local->socket, &msg, 486 iov, 2, len); 487 conn->params.peer->last_tx_at = ktime_get_seconds(); 488 489 opt = IPV6_PMTUDISC_DO; 490 kernel_setsockopt(conn->params.local->socket, 491 SOL_IPV6, IPV6_MTU_DISCOVER, 492 (char *)&opt, sizeof(opt)); 493 } 494 break; 495 #endif 496 } 497 498 if (ret < 0) 499 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 500 rxrpc_tx_point_call_data_frag); 501 else 502 trace_rxrpc_tx_packet(call->debug_id, &whdr, 503 rxrpc_tx_point_call_data_frag); 504 505 up_write(&conn->params.local->defrag_sem); 506 goto done; 507 } 508 509 /* 510 * reject packets through the local endpoint 511 */ 512 void rxrpc_reject_packets(struct rxrpc_local *local) 513 { 514 struct sockaddr_rxrpc srx; 515 struct rxrpc_skb_priv *sp; 516 struct rxrpc_wire_header whdr; 517 struct sk_buff *skb; 518 struct msghdr msg; 519 struct kvec iov[2]; 520 size_t size; 521 __be32 code; 522 int ret; 523 524 _enter("%d", local->debug_id); 525 526 iov[0].iov_base = &whdr; 527 iov[0].iov_len = sizeof(whdr); 528 iov[1].iov_base = &code; 529 iov[1].iov_len = sizeof(code); 530 size = sizeof(whdr) + sizeof(code); 531 532 msg.msg_name = &srx.transport; 533 msg.msg_control = NULL; 534 msg.msg_controllen = 0; 535 msg.msg_flags = 0; 536 537 memset(&whdr, 0, sizeof(whdr)); 538 whdr.type = RXRPC_PACKET_TYPE_ABORT; 539 540 while ((skb = skb_dequeue(&local->reject_queue))) { 541 rxrpc_see_skb(skb, rxrpc_skb_rx_seen); 542 sp = rxrpc_skb(skb); 543 544 if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { 545 msg.msg_namelen = srx.transport_len; 546 547 code = htonl(skb->priority); 548 549 whdr.epoch = htonl(sp->hdr.epoch); 550 whdr.cid = htonl(sp->hdr.cid); 551 whdr.callNumber = htonl(sp->hdr.callNumber); 552 whdr.serviceId = htons(sp->hdr.serviceId); 553 whdr.flags = sp->hdr.flags; 554 whdr.flags ^= RXRPC_CLIENT_INITIATED; 555 whdr.flags &= RXRPC_CLIENT_INITIATED; 556 557 ret = kernel_sendmsg(local->socket, &msg, iov, 2, size); 558 if (ret < 0) 559 trace_rxrpc_tx_fail(local->debug_id, 0, ret, 560 rxrpc_tx_point_reject); 561 else 562 trace_rxrpc_tx_packet(local->debug_id, &whdr, 563 rxrpc_tx_point_reject); 564 } 565 566 rxrpc_free_skb(skb, rxrpc_skb_rx_freed); 567 } 568 569 _leave(""); 570 } 571 572 /* 573 * Send a VERSION reply to a peer as a keepalive. 574 */ 575 void rxrpc_send_keepalive(struct rxrpc_peer *peer) 576 { 577 struct rxrpc_wire_header whdr; 578 struct msghdr msg; 579 struct kvec iov[2]; 580 size_t len; 581 int ret; 582 583 _enter(""); 584 585 msg.msg_name = &peer->srx.transport; 586 msg.msg_namelen = peer->srx.transport_len; 587 msg.msg_control = NULL; 588 msg.msg_controllen = 0; 589 msg.msg_flags = 0; 590 591 whdr.epoch = htonl(peer->local->rxnet->epoch); 592 whdr.cid = 0; 593 whdr.callNumber = 0; 594 whdr.seq = 0; 595 whdr.serial = 0; 596 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ 597 whdr.flags = RXRPC_LAST_PACKET; 598 whdr.userStatus = 0; 599 whdr.securityIndex = 0; 600 whdr._rsvd = 0; 601 whdr.serviceId = 0; 602 603 iov[0].iov_base = &whdr; 604 iov[0].iov_len = sizeof(whdr); 605 iov[1].iov_base = (char *)rxrpc_keepalive_string; 606 iov[1].iov_len = sizeof(rxrpc_keepalive_string); 607 608 len = iov[0].iov_len + iov[1].iov_len; 609 610 _proto("Tx VERSION (keepalive)"); 611 612 ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); 613 if (ret < 0) 614 trace_rxrpc_tx_fail(peer->debug_id, 0, ret, 615 rxrpc_tx_point_version_keepalive); 616 else 617 trace_rxrpc_tx_packet(peer->debug_id, &whdr, 618 rxrpc_tx_point_version_keepalive); 619 620 peer->last_tx_at = ktime_get_seconds(); 621 _leave(""); 622 } 623