1 /* RxRPC packet transmission 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/net.h> 15 #include <linux/gfp.h> 16 #include <linux/skbuff.h> 17 #include <linux/export.h> 18 #include <net/sock.h> 19 #include <net/af_rxrpc.h> 20 #include "ar-internal.h" 21 22 struct rxrpc_ack_buffer { 23 struct rxrpc_wire_header whdr; 24 struct rxrpc_ackpacket ack; 25 u8 acks[255]; 26 u8 pad[3]; 27 struct rxrpc_ackinfo ackinfo; 28 }; 29 30 struct rxrpc_abort_buffer { 31 struct rxrpc_wire_header whdr; 32 __be32 abort_code; 33 }; 34 35 static const char rxrpc_keepalive_string[] = ""; 36 37 /* 38 * Arrange for a keepalive ping a certain time after we last transmitted. This 39 * lets the far side know we're still interested in this call and helps keep 40 * the route through any intervening firewall open. 41 * 42 * Receiving a response to the ping will prevent the ->expect_rx_by timer from 43 * expiring. 44 */ 45 static void rxrpc_set_keepalive(struct rxrpc_call *call) 46 { 47 unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; 48 49 keepalive_at += now; 50 WRITE_ONCE(call->keepalive_at, keepalive_at); 51 rxrpc_reduce_call_timer(call, keepalive_at, now, 52 rxrpc_timer_set_for_keepalive); 53 } 54 55 /* 56 * Fill out an ACK packet. 57 */ 58 static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, 59 struct rxrpc_call *call, 60 struct rxrpc_ack_buffer *pkt, 61 rxrpc_seq_t *_hard_ack, 62 rxrpc_seq_t *_top, 63 u8 reason) 64 { 65 rxrpc_serial_t serial; 66 rxrpc_seq_t hard_ack, top, seq; 67 int ix; 68 u32 mtu, jmax; 69 u8 *ackp = pkt->acks; 70 71 /* Barrier against rxrpc_input_data(). */ 72 serial = call->ackr_serial; 73 hard_ack = READ_ONCE(call->rx_hard_ack); 74 top = smp_load_acquire(&call->rx_top); 75 *_hard_ack = hard_ack; 76 *_top = top; 77 78 pkt->ack.bufferSpace = htons(8); 79 pkt->ack.maxSkew = htons(call->ackr_skew); 80 pkt->ack.firstPacket = htonl(hard_ack + 1); 81 pkt->ack.previousPacket = htonl(call->ackr_prev_seq); 82 pkt->ack.serial = htonl(serial); 83 pkt->ack.reason = reason; 84 pkt->ack.nAcks = top - hard_ack; 85 86 if (reason == RXRPC_ACK_PING) 87 pkt->whdr.flags |= RXRPC_REQUEST_ACK; 88 89 if (after(top, hard_ack)) { 90 seq = hard_ack + 1; 91 do { 92 ix = seq & RXRPC_RXTX_BUFF_MASK; 93 if (call->rxtx_buffer[ix]) 94 *ackp++ = RXRPC_ACK_TYPE_ACK; 95 else 96 *ackp++ = RXRPC_ACK_TYPE_NACK; 97 seq++; 98 } while (before_eq(seq, top)); 99 } 100 101 mtu = conn->params.peer->if_mtu; 102 mtu -= conn->params.peer->hdrsize; 103 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; 104 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 105 pkt->ackinfo.maxMTU = htonl(mtu); 106 pkt->ackinfo.rwind = htonl(call->rx_winsize); 107 pkt->ackinfo.jumbo_max = htonl(jmax); 108 109 *ackp++ = 0; 110 *ackp++ = 0; 111 *ackp++ = 0; 112 return top - hard_ack + 3; 113 } 114 115 /* 116 * Send an ACK call packet. 117 */ 118 int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, 119 rxrpc_serial_t *_serial) 120 { 121 struct rxrpc_connection *conn = NULL; 122 struct rxrpc_ack_buffer *pkt; 123 struct msghdr msg; 124 struct kvec iov[2]; 125 rxrpc_serial_t serial; 126 rxrpc_seq_t hard_ack, top; 127 ktime_t now; 128 size_t len, n; 129 int ret; 130 u8 reason; 131 132 spin_lock_bh(&call->lock); 133 if (call->conn) 134 conn = rxrpc_get_connection_maybe(call->conn); 135 spin_unlock_bh(&call->lock); 136 if (!conn) 137 return -ECONNRESET; 138 139 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 140 if (!pkt) { 141 rxrpc_put_connection(conn); 142 return -ENOMEM; 143 } 144 145 msg.msg_name = &call->peer->srx.transport; 146 msg.msg_namelen = call->peer->srx.transport_len; 147 msg.msg_control = NULL; 148 msg.msg_controllen = 0; 149 msg.msg_flags = 0; 150 151 pkt->whdr.epoch = htonl(conn->proto.epoch); 152 pkt->whdr.cid = htonl(call->cid); 153 pkt->whdr.callNumber = htonl(call->call_id); 154 pkt->whdr.seq = 0; 155 pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; 156 pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; 157 pkt->whdr.userStatus = 0; 158 pkt->whdr.securityIndex = call->security_ix; 159 pkt->whdr._rsvd = 0; 160 pkt->whdr.serviceId = htons(call->service_id); 161 162 spin_lock_bh(&call->lock); 163 if (ping) { 164 reason = RXRPC_ACK_PING; 165 } else { 166 reason = call->ackr_reason; 167 if (!call->ackr_reason) { 168 spin_unlock_bh(&call->lock); 169 ret = 0; 170 goto out; 171 } 172 call->ackr_reason = 0; 173 } 174 n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); 175 176 spin_unlock_bh(&call->lock); 177 178 iov[0].iov_base = pkt; 179 iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; 180 iov[1].iov_base = &pkt->ackinfo; 181 iov[1].iov_len = sizeof(pkt->ackinfo); 182 len = iov[0].iov_len + iov[1].iov_len; 183 184 serial = atomic_inc_return(&conn->serial); 185 pkt->whdr.serial = htonl(serial); 186 trace_rxrpc_tx_ack(call, serial, 187 ntohl(pkt->ack.firstPacket), 188 ntohl(pkt->ack.serial), 189 pkt->ack.reason, pkt->ack.nAcks); 190 if (_serial) 191 *_serial = serial; 192 193 if (ping) { 194 call->ping_serial = serial; 195 smp_wmb(); 196 /* We need to stick a time in before we send the packet in case 197 * the reply gets back before kernel_sendmsg() completes - but 198 * asking UDP to send the packet can take a relatively long 199 * time, so we update the time after, on the assumption that 200 * the packet transmission is more likely to happen towards the 201 * end of the kernel_sendmsg() call. 202 */ 203 call->ping_time = ktime_get_real(); 204 set_bit(RXRPC_CALL_PINGING, &call->flags); 205 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); 206 } 207 208 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 209 now = ktime_get_real(); 210 if (ping) 211 call->ping_time = now; 212 conn->params.peer->last_tx_at = ktime_get_real(); 213 214 if (call->state < RXRPC_CALL_COMPLETE) { 215 if (ret < 0) { 216 if (ping) 217 clear_bit(RXRPC_CALL_PINGING, &call->flags); 218 rxrpc_propose_ACK(call, pkt->ack.reason, 219 ntohs(pkt->ack.maxSkew), 220 ntohl(pkt->ack.serial), 221 true, true, 222 rxrpc_propose_ack_retry_tx); 223 } else { 224 spin_lock_bh(&call->lock); 225 if (after(hard_ack, call->ackr_consumed)) 226 call->ackr_consumed = hard_ack; 227 if (after(top, call->ackr_seen)) 228 call->ackr_seen = top; 229 spin_unlock_bh(&call->lock); 230 } 231 232 rxrpc_set_keepalive(call); 233 } 234 235 out: 236 rxrpc_put_connection(conn); 237 kfree(pkt); 238 return ret; 239 } 240 241 /* 242 * Send an ABORT call packet. 243 */ 244 int rxrpc_send_abort_packet(struct rxrpc_call *call) 245 { 246 struct rxrpc_connection *conn = NULL; 247 struct rxrpc_abort_buffer pkt; 248 struct msghdr msg; 249 struct kvec iov[1]; 250 rxrpc_serial_t serial; 251 int ret; 252 253 /* Don't bother sending aborts for a client call once the server has 254 * hard-ACK'd all of its request data. After that point, we're not 255 * going to stop the operation proceeding, and whilst we might limit 256 * the reply, it's not worth it if we can send a new call on the same 257 * channel instead, thereby closing off this call. 258 */ 259 if (rxrpc_is_client_call(call) && 260 test_bit(RXRPC_CALL_TX_LAST, &call->flags)) 261 return 0; 262 263 spin_lock_bh(&call->lock); 264 if (call->conn) 265 conn = rxrpc_get_connection_maybe(call->conn); 266 spin_unlock_bh(&call->lock); 267 if (!conn) 268 return -ECONNRESET; 269 270 msg.msg_name = &call->peer->srx.transport; 271 msg.msg_namelen = call->peer->srx.transport_len; 272 msg.msg_control = NULL; 273 msg.msg_controllen = 0; 274 msg.msg_flags = 0; 275 276 pkt.whdr.epoch = htonl(conn->proto.epoch); 277 pkt.whdr.cid = htonl(call->cid); 278 pkt.whdr.callNumber = htonl(call->call_id); 279 pkt.whdr.seq = 0; 280 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; 281 pkt.whdr.flags = conn->out_clientflag; 282 pkt.whdr.userStatus = 0; 283 pkt.whdr.securityIndex = call->security_ix; 284 pkt.whdr._rsvd = 0; 285 pkt.whdr.serviceId = htons(call->service_id); 286 pkt.abort_code = htonl(call->abort_code); 287 288 iov[0].iov_base = &pkt; 289 iov[0].iov_len = sizeof(pkt); 290 291 serial = atomic_inc_return(&conn->serial); 292 pkt.whdr.serial = htonl(serial); 293 294 ret = kernel_sendmsg(conn->params.local->socket, 295 &msg, iov, 1, sizeof(pkt)); 296 conn->params.peer->last_tx_at = ktime_get_real(); 297 298 rxrpc_put_connection(conn); 299 return ret; 300 } 301 302 /* 303 * send a packet through the transport endpoint 304 */ 305 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, 306 bool retrans) 307 { 308 struct rxrpc_connection *conn = call->conn; 309 struct rxrpc_wire_header whdr; 310 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 311 struct msghdr msg; 312 struct kvec iov[2]; 313 rxrpc_serial_t serial; 314 size_t len; 315 bool lost = false; 316 int ret, opt; 317 318 _enter(",{%d}", skb->len); 319 320 /* Each transmission of a Tx packet needs a new serial number */ 321 serial = atomic_inc_return(&conn->serial); 322 323 whdr.epoch = htonl(conn->proto.epoch); 324 whdr.cid = htonl(call->cid); 325 whdr.callNumber = htonl(call->call_id); 326 whdr.seq = htonl(sp->hdr.seq); 327 whdr.serial = htonl(serial); 328 whdr.type = RXRPC_PACKET_TYPE_DATA; 329 whdr.flags = sp->hdr.flags; 330 whdr.userStatus = 0; 331 whdr.securityIndex = call->security_ix; 332 whdr._rsvd = htons(sp->hdr._rsvd); 333 whdr.serviceId = htons(call->service_id); 334 335 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && 336 sp->hdr.seq == 1) 337 whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; 338 339 iov[0].iov_base = &whdr; 340 iov[0].iov_len = sizeof(whdr); 341 iov[1].iov_base = skb->head; 342 iov[1].iov_len = skb->len; 343 len = iov[0].iov_len + iov[1].iov_len; 344 345 msg.msg_name = &call->peer->srx.transport; 346 msg.msg_namelen = call->peer->srx.transport_len; 347 msg.msg_control = NULL; 348 msg.msg_controllen = 0; 349 msg.msg_flags = 0; 350 351 /* If our RTT cache needs working on, request an ACK. Also request 352 * ACKs if a DATA packet appears to have been lost. 353 */ 354 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && 355 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || 356 retrans || 357 call->cong_mode == RXRPC_CALL_SLOW_START || 358 (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || 359 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 360 ktime_get_real()))) 361 whdr.flags |= RXRPC_REQUEST_ACK; 362 363 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 364 static int lose; 365 if ((lose++ & 7) == 7) { 366 ret = 0; 367 lost = true; 368 goto done; 369 } 370 } 371 372 _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); 373 374 /* send the packet with the don't fragment bit set if we currently 375 * think it's small enough */ 376 if (iov[1].iov_len >= call->peer->maxdata) 377 goto send_fragmentable; 378 379 down_read(&conn->params.local->defrag_sem); 380 /* send the packet by UDP 381 * - returns -EMSGSIZE if UDP would have to fragment the packet 382 * to go out of the interface 383 * - in which case, we'll have processed the ICMP error 384 * message and update the peer record 385 */ 386 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 387 conn->params.peer->last_tx_at = ktime_get_real(); 388 389 up_read(&conn->params.local->defrag_sem); 390 if (ret == -EMSGSIZE) 391 goto send_fragmentable; 392 393 done: 394 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, 395 retrans, lost); 396 if (ret >= 0) { 397 ktime_t now = ktime_get_real(); 398 skb->tstamp = now; 399 smp_wmb(); 400 sp->hdr.serial = serial; 401 if (whdr.flags & RXRPC_REQUEST_ACK) { 402 call->peer->rtt_last_req = now; 403 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 404 if (call->peer->rtt_usage > 1) { 405 unsigned long nowj = jiffies, ack_lost_at; 406 407 ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); 408 if (ack_lost_at < 1) 409 ack_lost_at = 1; 410 411 ack_lost_at += nowj; 412 WRITE_ONCE(call->ack_lost_at, ack_lost_at); 413 rxrpc_reduce_call_timer(call, ack_lost_at, nowj, 414 rxrpc_timer_set_for_lost_ack); 415 } 416 } 417 } 418 419 rxrpc_set_keepalive(call); 420 421 _leave(" = %d [%u]", ret, call->peer->maxdata); 422 return ret; 423 424 send_fragmentable: 425 /* attempt to send this message with fragmentation enabled */ 426 _debug("send fragment"); 427 428 down_write(&conn->params.local->defrag_sem); 429 430 switch (conn->params.local->srx.transport.family) { 431 case AF_INET: 432 opt = IP_PMTUDISC_DONT; 433 ret = kernel_setsockopt(conn->params.local->socket, 434 SOL_IP, IP_MTU_DISCOVER, 435 (char *)&opt, sizeof(opt)); 436 if (ret == 0) { 437 ret = kernel_sendmsg(conn->params.local->socket, &msg, 438 iov, 2, len); 439 conn->params.peer->last_tx_at = ktime_get_real(); 440 441 opt = IP_PMTUDISC_DO; 442 kernel_setsockopt(conn->params.local->socket, SOL_IP, 443 IP_MTU_DISCOVER, 444 (char *)&opt, sizeof(opt)); 445 } 446 break; 447 448 #ifdef CONFIG_AF_RXRPC_IPV6 449 case AF_INET6: 450 opt = IPV6_PMTUDISC_DONT; 451 ret = kernel_setsockopt(conn->params.local->socket, 452 SOL_IPV6, IPV6_MTU_DISCOVER, 453 (char *)&opt, sizeof(opt)); 454 if (ret == 0) { 455 ret = kernel_sendmsg(conn->params.local->socket, &msg, 456 iov, 2, len); 457 conn->params.peer->last_tx_at = ktime_get_real(); 458 459 opt = IPV6_PMTUDISC_DO; 460 kernel_setsockopt(conn->params.local->socket, 461 SOL_IPV6, IPV6_MTU_DISCOVER, 462 (char *)&opt, sizeof(opt)); 463 } 464 break; 465 #endif 466 } 467 468 up_write(&conn->params.local->defrag_sem); 469 goto done; 470 } 471 472 /* 473 * reject packets through the local endpoint 474 */ 475 void rxrpc_reject_packets(struct rxrpc_local *local) 476 { 477 struct sockaddr_rxrpc srx; 478 struct rxrpc_skb_priv *sp; 479 struct rxrpc_wire_header whdr; 480 struct sk_buff *skb; 481 struct msghdr msg; 482 struct kvec iov[2]; 483 size_t size; 484 __be32 code; 485 486 _enter("%d", local->debug_id); 487 488 iov[0].iov_base = &whdr; 489 iov[0].iov_len = sizeof(whdr); 490 iov[1].iov_base = &code; 491 iov[1].iov_len = sizeof(code); 492 size = sizeof(whdr) + sizeof(code); 493 494 msg.msg_name = &srx.transport; 495 msg.msg_control = NULL; 496 msg.msg_controllen = 0; 497 msg.msg_flags = 0; 498 499 memset(&whdr, 0, sizeof(whdr)); 500 whdr.type = RXRPC_PACKET_TYPE_ABORT; 501 502 while ((skb = skb_dequeue(&local->reject_queue))) { 503 rxrpc_see_skb(skb, rxrpc_skb_rx_seen); 504 sp = rxrpc_skb(skb); 505 506 if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { 507 msg.msg_namelen = srx.transport_len; 508 509 code = htonl(skb->priority); 510 511 whdr.epoch = htonl(sp->hdr.epoch); 512 whdr.cid = htonl(sp->hdr.cid); 513 whdr.callNumber = htonl(sp->hdr.callNumber); 514 whdr.serviceId = htons(sp->hdr.serviceId); 515 whdr.flags = sp->hdr.flags; 516 whdr.flags ^= RXRPC_CLIENT_INITIATED; 517 whdr.flags &= RXRPC_CLIENT_INITIATED; 518 519 kernel_sendmsg(local->socket, &msg, iov, 2, size); 520 } 521 522 rxrpc_free_skb(skb, rxrpc_skb_rx_freed); 523 } 524 525 _leave(""); 526 } 527 528 /* 529 * Send a VERSION reply to a peer as a keepalive. 530 */ 531 void rxrpc_send_keepalive(struct rxrpc_peer *peer) 532 { 533 struct rxrpc_wire_header whdr; 534 struct msghdr msg; 535 struct kvec iov[2]; 536 size_t len; 537 int ret; 538 539 _enter(""); 540 541 msg.msg_name = &peer->srx.transport; 542 msg.msg_namelen = peer->srx.transport_len; 543 msg.msg_control = NULL; 544 msg.msg_controllen = 0; 545 msg.msg_flags = 0; 546 547 whdr.epoch = htonl(peer->local->rxnet->epoch); 548 whdr.cid = 0; 549 whdr.callNumber = 0; 550 whdr.seq = 0; 551 whdr.serial = 0; 552 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ 553 whdr.flags = RXRPC_LAST_PACKET; 554 whdr.userStatus = 0; 555 whdr.securityIndex = 0; 556 whdr._rsvd = 0; 557 whdr.serviceId = 0; 558 559 iov[0].iov_base = &whdr; 560 iov[0].iov_len = sizeof(whdr); 561 iov[1].iov_base = (char *)rxrpc_keepalive_string; 562 iov[1].iov_len = sizeof(rxrpc_keepalive_string); 563 564 len = iov[0].iov_len + iov[1].iov_len; 565 566 _proto("Tx VERSION (keepalive)"); 567 568 ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); 569 if (ret < 0) 570 _debug("sendmsg failed: %d", ret); 571 572 peer->last_tx_at = ktime_get_real(); 573 _leave(""); 574 } 575