1 /* RxRPC packet transmission 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/net.h> 15 #include <linux/gfp.h> 16 #include <linux/skbuff.h> 17 #include <linux/export.h> 18 #include <net/sock.h> 19 #include <net/af_rxrpc.h> 20 #include "ar-internal.h" 21 22 struct rxrpc_ack_buffer { 23 struct rxrpc_wire_header whdr; 24 struct rxrpc_ackpacket ack; 25 u8 acks[255]; 26 u8 pad[3]; 27 struct rxrpc_ackinfo ackinfo; 28 }; 29 30 struct rxrpc_abort_buffer { 31 struct rxrpc_wire_header whdr; 32 __be32 abort_code; 33 }; 34 35 static const char rxrpc_keepalive_string[] = ""; 36 37 /* 38 * Arrange for a keepalive ping a certain time after we last transmitted. This 39 * lets the far side know we're still interested in this call and helps keep 40 * the route through any intervening firewall open. 41 * 42 * Receiving a response to the ping will prevent the ->expect_rx_by timer from 43 * expiring. 44 */ 45 static void rxrpc_set_keepalive(struct rxrpc_call *call) 46 { 47 unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; 48 49 keepalive_at += now; 50 WRITE_ONCE(call->keepalive_at, keepalive_at); 51 rxrpc_reduce_call_timer(call, keepalive_at, now, 52 rxrpc_timer_set_for_keepalive); 53 } 54 55 /* 56 * Fill out an ACK packet. 57 */ 58 static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, 59 struct rxrpc_call *call, 60 struct rxrpc_ack_buffer *pkt, 61 rxrpc_seq_t *_hard_ack, 62 rxrpc_seq_t *_top, 63 u8 reason) 64 { 65 rxrpc_serial_t serial; 66 rxrpc_seq_t hard_ack, top, seq; 67 int ix; 68 u32 mtu, jmax; 69 u8 *ackp = pkt->acks; 70 71 /* Barrier against rxrpc_input_data(). */ 72 serial = call->ackr_serial; 73 hard_ack = READ_ONCE(call->rx_hard_ack); 74 top = smp_load_acquire(&call->rx_top); 75 *_hard_ack = hard_ack; 76 *_top = top; 77 78 pkt->ack.bufferSpace = htons(8); 79 pkt->ack.maxSkew = htons(call->ackr_skew); 80 pkt->ack.firstPacket = htonl(hard_ack + 1); 81 pkt->ack.previousPacket = htonl(call->ackr_prev_seq); 82 pkt->ack.serial = htonl(serial); 83 pkt->ack.reason = reason; 84 pkt->ack.nAcks = top - hard_ack; 85 86 if (reason == RXRPC_ACK_PING) 87 pkt->whdr.flags |= RXRPC_REQUEST_ACK; 88 89 if (after(top, hard_ack)) { 90 seq = hard_ack + 1; 91 do { 92 ix = seq & RXRPC_RXTX_BUFF_MASK; 93 if (call->rxtx_buffer[ix]) 94 *ackp++ = RXRPC_ACK_TYPE_ACK; 95 else 96 *ackp++ = RXRPC_ACK_TYPE_NACK; 97 seq++; 98 } while (before_eq(seq, top)); 99 } 100 101 mtu = conn->params.peer->if_mtu; 102 mtu -= conn->params.peer->hdrsize; 103 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; 104 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 105 pkt->ackinfo.maxMTU = htonl(mtu); 106 pkt->ackinfo.rwind = htonl(call->rx_winsize); 107 pkt->ackinfo.jumbo_max = htonl(jmax); 108 109 *ackp++ = 0; 110 *ackp++ = 0; 111 *ackp++ = 0; 112 return top - hard_ack + 3; 113 } 114 115 /* 116 * Send an ACK call packet. 117 */ 118 int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, 119 rxrpc_serial_t *_serial) 120 { 121 struct rxrpc_connection *conn = NULL; 122 struct rxrpc_ack_buffer *pkt; 123 struct msghdr msg; 124 struct kvec iov[2]; 125 rxrpc_serial_t serial; 126 rxrpc_seq_t hard_ack, top; 127 ktime_t now; 128 size_t len, n; 129 int ret; 130 u8 reason; 131 132 spin_lock_bh(&call->lock); 133 if (call->conn) 134 conn = rxrpc_get_connection_maybe(call->conn); 135 spin_unlock_bh(&call->lock); 136 if (!conn) 137 return -ECONNRESET; 138 139 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 140 if (!pkt) { 141 rxrpc_put_connection(conn); 142 return -ENOMEM; 143 } 144 145 msg.msg_name = &call->peer->srx.transport; 146 msg.msg_namelen = call->peer->srx.transport_len; 147 msg.msg_control = NULL; 148 msg.msg_controllen = 0; 149 msg.msg_flags = 0; 150 151 pkt->whdr.epoch = htonl(conn->proto.epoch); 152 pkt->whdr.cid = htonl(call->cid); 153 pkt->whdr.callNumber = htonl(call->call_id); 154 pkt->whdr.seq = 0; 155 pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; 156 pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; 157 pkt->whdr.userStatus = 0; 158 pkt->whdr.securityIndex = call->security_ix; 159 pkt->whdr._rsvd = 0; 160 pkt->whdr.serviceId = htons(call->service_id); 161 162 spin_lock_bh(&call->lock); 163 if (ping) { 164 reason = RXRPC_ACK_PING; 165 } else { 166 reason = call->ackr_reason; 167 if (!call->ackr_reason) { 168 spin_unlock_bh(&call->lock); 169 ret = 0; 170 goto out; 171 } 172 call->ackr_reason = 0; 173 } 174 n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); 175 176 spin_unlock_bh(&call->lock); 177 178 iov[0].iov_base = pkt; 179 iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; 180 iov[1].iov_base = &pkt->ackinfo; 181 iov[1].iov_len = sizeof(pkt->ackinfo); 182 len = iov[0].iov_len + iov[1].iov_len; 183 184 serial = atomic_inc_return(&conn->serial); 185 pkt->whdr.serial = htonl(serial); 186 trace_rxrpc_tx_ack(call, serial, 187 ntohl(pkt->ack.firstPacket), 188 ntohl(pkt->ack.serial), 189 pkt->ack.reason, pkt->ack.nAcks); 190 if (_serial) 191 *_serial = serial; 192 193 if (ping) { 194 call->ping_serial = serial; 195 smp_wmb(); 196 /* We need to stick a time in before we send the packet in case 197 * the reply gets back before kernel_sendmsg() completes - but 198 * asking UDP to send the packet can take a relatively long 199 * time, so we update the time after, on the assumption that 200 * the packet transmission is more likely to happen towards the 201 * end of the kernel_sendmsg() call. 202 */ 203 call->ping_time = ktime_get_real(); 204 set_bit(RXRPC_CALL_PINGING, &call->flags); 205 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); 206 } 207 208 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 209 now = ktime_get_real(); 210 if (ping) 211 call->ping_time = now; 212 conn->params.peer->last_tx_at = ktime_get_real(); 213 if (ret < 0) 214 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 215 rxrpc_tx_fail_call_ack); 216 217 if (call->state < RXRPC_CALL_COMPLETE) { 218 if (ret < 0) { 219 if (ping) 220 clear_bit(RXRPC_CALL_PINGING, &call->flags); 221 rxrpc_propose_ACK(call, pkt->ack.reason, 222 ntohs(pkt->ack.maxSkew), 223 ntohl(pkt->ack.serial), 224 true, true, 225 rxrpc_propose_ack_retry_tx); 226 } else { 227 spin_lock_bh(&call->lock); 228 if (after(hard_ack, call->ackr_consumed)) 229 call->ackr_consumed = hard_ack; 230 if (after(top, call->ackr_seen)) 231 call->ackr_seen = top; 232 spin_unlock_bh(&call->lock); 233 } 234 235 rxrpc_set_keepalive(call); 236 } 237 238 out: 239 rxrpc_put_connection(conn); 240 kfree(pkt); 241 return ret; 242 } 243 244 /* 245 * Send an ABORT call packet. 246 */ 247 int rxrpc_send_abort_packet(struct rxrpc_call *call) 248 { 249 struct rxrpc_connection *conn = NULL; 250 struct rxrpc_abort_buffer pkt; 251 struct msghdr msg; 252 struct kvec iov[1]; 253 rxrpc_serial_t serial; 254 int ret; 255 256 /* Don't bother sending aborts for a client call once the server has 257 * hard-ACK'd all of its request data. After that point, we're not 258 * going to stop the operation proceeding, and whilst we might limit 259 * the reply, it's not worth it if we can send a new call on the same 260 * channel instead, thereby closing off this call. 261 */ 262 if (rxrpc_is_client_call(call) && 263 test_bit(RXRPC_CALL_TX_LAST, &call->flags)) 264 return 0; 265 266 spin_lock_bh(&call->lock); 267 if (call->conn) 268 conn = rxrpc_get_connection_maybe(call->conn); 269 spin_unlock_bh(&call->lock); 270 if (!conn) 271 return -ECONNRESET; 272 273 msg.msg_name = &call->peer->srx.transport; 274 msg.msg_namelen = call->peer->srx.transport_len; 275 msg.msg_control = NULL; 276 msg.msg_controllen = 0; 277 msg.msg_flags = 0; 278 279 pkt.whdr.epoch = htonl(conn->proto.epoch); 280 pkt.whdr.cid = htonl(call->cid); 281 pkt.whdr.callNumber = htonl(call->call_id); 282 pkt.whdr.seq = 0; 283 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; 284 pkt.whdr.flags = conn->out_clientflag; 285 pkt.whdr.userStatus = 0; 286 pkt.whdr.securityIndex = call->security_ix; 287 pkt.whdr._rsvd = 0; 288 pkt.whdr.serviceId = htons(call->service_id); 289 pkt.abort_code = htonl(call->abort_code); 290 291 iov[0].iov_base = &pkt; 292 iov[0].iov_len = sizeof(pkt); 293 294 serial = atomic_inc_return(&conn->serial); 295 pkt.whdr.serial = htonl(serial); 296 297 ret = kernel_sendmsg(conn->params.local->socket, 298 &msg, iov, 1, sizeof(pkt)); 299 conn->params.peer->last_tx_at = ktime_get_real(); 300 if (ret < 0) 301 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 302 rxrpc_tx_fail_call_abort); 303 304 305 rxrpc_put_connection(conn); 306 return ret; 307 } 308 309 /* 310 * send a packet through the transport endpoint 311 */ 312 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, 313 bool retrans) 314 { 315 struct rxrpc_connection *conn = call->conn; 316 struct rxrpc_wire_header whdr; 317 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 318 struct msghdr msg; 319 struct kvec iov[2]; 320 rxrpc_serial_t serial; 321 size_t len; 322 bool lost = false; 323 int ret, opt; 324 325 _enter(",{%d}", skb->len); 326 327 /* Each transmission of a Tx packet needs a new serial number */ 328 serial = atomic_inc_return(&conn->serial); 329 330 whdr.epoch = htonl(conn->proto.epoch); 331 whdr.cid = htonl(call->cid); 332 whdr.callNumber = htonl(call->call_id); 333 whdr.seq = htonl(sp->hdr.seq); 334 whdr.serial = htonl(serial); 335 whdr.type = RXRPC_PACKET_TYPE_DATA; 336 whdr.flags = sp->hdr.flags; 337 whdr.userStatus = 0; 338 whdr.securityIndex = call->security_ix; 339 whdr._rsvd = htons(sp->hdr._rsvd); 340 whdr.serviceId = htons(call->service_id); 341 342 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && 343 sp->hdr.seq == 1) 344 whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; 345 346 iov[0].iov_base = &whdr; 347 iov[0].iov_len = sizeof(whdr); 348 iov[1].iov_base = skb->head; 349 iov[1].iov_len = skb->len; 350 len = iov[0].iov_len + iov[1].iov_len; 351 352 msg.msg_name = &call->peer->srx.transport; 353 msg.msg_namelen = call->peer->srx.transport_len; 354 msg.msg_control = NULL; 355 msg.msg_controllen = 0; 356 msg.msg_flags = 0; 357 358 /* If our RTT cache needs working on, request an ACK. Also request 359 * ACKs if a DATA packet appears to have been lost. 360 */ 361 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && 362 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || 363 retrans || 364 call->cong_mode == RXRPC_CALL_SLOW_START || 365 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || 366 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 367 ktime_get_real()))) 368 whdr.flags |= RXRPC_REQUEST_ACK; 369 370 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 371 static int lose; 372 if ((lose++ & 7) == 7) { 373 ret = 0; 374 lost = true; 375 goto done; 376 } 377 } 378 379 _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); 380 381 /* send the packet with the don't fragment bit set if we currently 382 * think it's small enough */ 383 if (iov[1].iov_len >= call->peer->maxdata) 384 goto send_fragmentable; 385 386 down_read(&conn->params.local->defrag_sem); 387 /* send the packet by UDP 388 * - returns -EMSGSIZE if UDP would have to fragment the packet 389 * to go out of the interface 390 * - in which case, we'll have processed the ICMP error 391 * message and update the peer record 392 */ 393 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 394 conn->params.peer->last_tx_at = ktime_get_real(); 395 396 up_read(&conn->params.local->defrag_sem); 397 if (ret < 0) 398 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 399 rxrpc_tx_fail_call_data_nofrag); 400 if (ret == -EMSGSIZE) 401 goto send_fragmentable; 402 403 done: 404 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, 405 retrans, lost); 406 if (ret >= 0) { 407 ktime_t now = ktime_get_real(); 408 skb->tstamp = now; 409 smp_wmb(); 410 sp->hdr.serial = serial; 411 if (whdr.flags & RXRPC_REQUEST_ACK) { 412 call->peer->rtt_last_req = now; 413 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 414 if (call->peer->rtt_usage > 1) { 415 unsigned long nowj = jiffies, ack_lost_at; 416 417 ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); 418 if (ack_lost_at < 1) 419 ack_lost_at = 1; 420 421 ack_lost_at += nowj; 422 WRITE_ONCE(call->ack_lost_at, ack_lost_at); 423 rxrpc_reduce_call_timer(call, ack_lost_at, nowj, 424 rxrpc_timer_set_for_lost_ack); 425 } 426 } 427 428 if (sp->hdr.seq == 1 && 429 !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, 430 &call->flags)) { 431 unsigned long nowj = jiffies, expect_rx_by; 432 433 expect_rx_by = nowj + call->next_rx_timo; 434 WRITE_ONCE(call->expect_rx_by, expect_rx_by); 435 rxrpc_reduce_call_timer(call, expect_rx_by, nowj, 436 rxrpc_timer_set_for_normal); 437 } 438 } 439 440 rxrpc_set_keepalive(call); 441 442 _leave(" = %d [%u]", ret, call->peer->maxdata); 443 return ret; 444 445 send_fragmentable: 446 /* attempt to send this message with fragmentation enabled */ 447 _debug("send fragment"); 448 449 down_write(&conn->params.local->defrag_sem); 450 451 switch (conn->params.local->srx.transport.family) { 452 case AF_INET: 453 opt = IP_PMTUDISC_DONT; 454 ret = kernel_setsockopt(conn->params.local->socket, 455 SOL_IP, IP_MTU_DISCOVER, 456 (char *)&opt, sizeof(opt)); 457 if (ret == 0) { 458 ret = kernel_sendmsg(conn->params.local->socket, &msg, 459 iov, 2, len); 460 conn->params.peer->last_tx_at = ktime_get_real(); 461 462 opt = IP_PMTUDISC_DO; 463 kernel_setsockopt(conn->params.local->socket, SOL_IP, 464 IP_MTU_DISCOVER, 465 (char *)&opt, sizeof(opt)); 466 } 467 break; 468 469 #ifdef CONFIG_AF_RXRPC_IPV6 470 case AF_INET6: 471 opt = IPV6_PMTUDISC_DONT; 472 ret = kernel_setsockopt(conn->params.local->socket, 473 SOL_IPV6, IPV6_MTU_DISCOVER, 474 (char *)&opt, sizeof(opt)); 475 if (ret == 0) { 476 ret = kernel_sendmsg(conn->params.local->socket, &msg, 477 iov, 2, len); 478 conn->params.peer->last_tx_at = ktime_get_real(); 479 480 opt = IPV6_PMTUDISC_DO; 481 kernel_setsockopt(conn->params.local->socket, 482 SOL_IPV6, IPV6_MTU_DISCOVER, 483 (char *)&opt, sizeof(opt)); 484 } 485 break; 486 #endif 487 } 488 489 if (ret < 0) 490 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 491 rxrpc_tx_fail_call_data_frag); 492 493 up_write(&conn->params.local->defrag_sem); 494 goto done; 495 } 496 497 /* 498 * reject packets through the local endpoint 499 */ 500 void rxrpc_reject_packets(struct rxrpc_local *local) 501 { 502 struct sockaddr_rxrpc srx; 503 struct rxrpc_skb_priv *sp; 504 struct rxrpc_wire_header whdr; 505 struct sk_buff *skb; 506 struct msghdr msg; 507 struct kvec iov[2]; 508 size_t size; 509 __be32 code; 510 int ret; 511 512 _enter("%d", local->debug_id); 513 514 iov[0].iov_base = &whdr; 515 iov[0].iov_len = sizeof(whdr); 516 iov[1].iov_base = &code; 517 iov[1].iov_len = sizeof(code); 518 size = sizeof(whdr) + sizeof(code); 519 520 msg.msg_name = &srx.transport; 521 msg.msg_control = NULL; 522 msg.msg_controllen = 0; 523 msg.msg_flags = 0; 524 525 memset(&whdr, 0, sizeof(whdr)); 526 whdr.type = RXRPC_PACKET_TYPE_ABORT; 527 528 while ((skb = skb_dequeue(&local->reject_queue))) { 529 rxrpc_see_skb(skb, rxrpc_skb_rx_seen); 530 sp = rxrpc_skb(skb); 531 532 if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { 533 msg.msg_namelen = srx.transport_len; 534 535 code = htonl(skb->priority); 536 537 whdr.epoch = htonl(sp->hdr.epoch); 538 whdr.cid = htonl(sp->hdr.cid); 539 whdr.callNumber = htonl(sp->hdr.callNumber); 540 whdr.serviceId = htons(sp->hdr.serviceId); 541 whdr.flags = sp->hdr.flags; 542 whdr.flags ^= RXRPC_CLIENT_INITIATED; 543 whdr.flags &= RXRPC_CLIENT_INITIATED; 544 545 ret = kernel_sendmsg(local->socket, &msg, iov, 2, size); 546 if (ret < 0) 547 trace_rxrpc_tx_fail(local->debug_id, 0, ret, 548 rxrpc_tx_fail_reject); 549 } 550 551 rxrpc_free_skb(skb, rxrpc_skb_rx_freed); 552 } 553 554 _leave(""); 555 } 556 557 /* 558 * Send a VERSION reply to a peer as a keepalive. 559 */ 560 void rxrpc_send_keepalive(struct rxrpc_peer *peer) 561 { 562 struct rxrpc_wire_header whdr; 563 struct msghdr msg; 564 struct kvec iov[2]; 565 size_t len; 566 int ret; 567 568 _enter(""); 569 570 msg.msg_name = &peer->srx.transport; 571 msg.msg_namelen = peer->srx.transport_len; 572 msg.msg_control = NULL; 573 msg.msg_controllen = 0; 574 msg.msg_flags = 0; 575 576 whdr.epoch = htonl(peer->local->rxnet->epoch); 577 whdr.cid = 0; 578 whdr.callNumber = 0; 579 whdr.seq = 0; 580 whdr.serial = 0; 581 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ 582 whdr.flags = RXRPC_LAST_PACKET; 583 whdr.userStatus = 0; 584 whdr.securityIndex = 0; 585 whdr._rsvd = 0; 586 whdr.serviceId = 0; 587 588 iov[0].iov_base = &whdr; 589 iov[0].iov_len = sizeof(whdr); 590 iov[1].iov_base = (char *)rxrpc_keepalive_string; 591 iov[1].iov_len = sizeof(rxrpc_keepalive_string); 592 593 len = iov[0].iov_len + iov[1].iov_len; 594 595 _proto("Tx VERSION (keepalive)"); 596 597 ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); 598 if (ret < 0) 599 trace_rxrpc_tx_fail(peer->debug_id, 0, ret, 600 rxrpc_tx_fail_version_keepalive); 601 602 peer->last_tx_at = ktime_get_real(); 603 _leave(""); 604 } 605