1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Peer event handling, typically ICMP messages. 3 * 4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/module.h> 9 #include <linux/net.h> 10 #include <linux/skbuff.h> 11 #include <linux/errqueue.h> 12 #include <linux/udp.h> 13 #include <linux/in.h> 14 #include <linux/in6.h> 15 #include <linux/icmp.h> 16 #include <net/sock.h> 17 #include <net/af_rxrpc.h> 18 #include <net/ip.h> 19 #include <net/icmp.h> 20 #include "ar-internal.h" 21 22 static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int); 23 static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); 24 static void rxrpc_distribute_error(struct rxrpc_peer *, int, 25 enum rxrpc_call_completion); 26 27 /* 28 * Find the peer associated with an ICMPv4 packet. 29 */ 30 static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, 31 struct sk_buff *skb, 32 unsigned int udp_offset, 33 unsigned int *info, 34 struct sockaddr_rxrpc *srx) 35 { 36 struct iphdr *ip, *ip0 = ip_hdr(skb); 37 struct icmphdr *icmp = icmp_hdr(skb); 38 struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); 39 40 _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code); 41 42 switch (icmp->type) { 43 case ICMP_DEST_UNREACH: 44 *info = ntohs(icmp->un.frag.mtu); 45 fallthrough; 46 case ICMP_TIME_EXCEEDED: 47 case ICMP_PARAMETERPROB: 48 ip = (struct iphdr *)((void *)icmp + 8); 49 break; 50 default: 51 return NULL; 52 } 53 54 memset(srx, 0, sizeof(*srx)); 55 srx->transport_type = local->srx.transport_type; 56 srx->transport_len = local->srx.transport_len; 57 srx->transport.family = local->srx.transport.family; 58 59 /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice 60 * versa? 61 */ 62 switch (srx->transport.family) { 63 case AF_INET: 64 srx->transport_len = sizeof(srx->transport.sin); 65 srx->transport.family = AF_INET; 66 srx->transport.sin.sin_port = udp->dest; 67 memcpy(&srx->transport.sin.sin_addr, &ip->daddr, 68 sizeof(struct in_addr)); 69 break; 70 71 #ifdef CONFIG_AF_RXRPC_IPV6 72 case AF_INET6: 73 srx->transport_len = sizeof(srx->transport.sin); 74 srx->transport.family = AF_INET; 75 srx->transport.sin.sin_port = udp->dest; 76 memcpy(&srx->transport.sin.sin_addr, &ip->daddr, 77 sizeof(struct in_addr)); 78 break; 79 #endif 80 81 default: 82 WARN_ON_ONCE(1); 83 return NULL; 84 } 85 86 _net("ICMP {%pISp}", &srx->transport); 87 return rxrpc_lookup_peer_rcu(local, srx); 88 } 89 90 #ifdef CONFIG_AF_RXRPC_IPV6 91 /* 92 * Find the peer associated with an ICMPv6 packet. 93 */ 94 static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local, 95 struct sk_buff *skb, 96 unsigned int udp_offset, 97 unsigned int *info, 98 struct sockaddr_rxrpc *srx) 99 { 100 struct icmp6hdr *icmp = icmp6_hdr(skb); 101 struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb); 102 struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); 103 104 _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code); 105 106 switch (icmp->icmp6_type) { 107 case ICMPV6_DEST_UNREACH: 108 *info = ntohl(icmp->icmp6_mtu); 109 fallthrough; 110 case ICMPV6_PKT_TOOBIG: 111 case ICMPV6_TIME_EXCEED: 112 case ICMPV6_PARAMPROB: 113 ip = (struct ipv6hdr *)((void *)icmp + 8); 114 break; 115 default: 116 return NULL; 117 } 118 119 memset(srx, 0, sizeof(*srx)); 120 srx->transport_type = local->srx.transport_type; 121 srx->transport_len = local->srx.transport_len; 122 srx->transport.family = local->srx.transport.family; 123 124 /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice 125 * versa? 126 */ 127 switch (srx->transport.family) { 128 case AF_INET: 129 _net("Rx ICMP6 on v4 sock"); 130 srx->transport_len = sizeof(srx->transport.sin); 131 srx->transport.family = AF_INET; 132 srx->transport.sin.sin_port = udp->dest; 133 memcpy(&srx->transport.sin.sin_addr, 134 &ip->daddr.s6_addr32[3], sizeof(struct in_addr)); 135 break; 136 case AF_INET6: 137 _net("Rx ICMP6"); 138 srx->transport.sin.sin_port = udp->dest; 139 memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr, 140 sizeof(struct in6_addr)); 141 break; 142 default: 143 WARN_ON_ONCE(1); 144 return NULL; 145 } 146 147 _net("ICMP {%pISp}", &srx->transport); 148 return rxrpc_lookup_peer_rcu(local, srx); 149 } 150 #endif /* CONFIG_AF_RXRPC_IPV6 */ 151 152 /* 153 * Handle an error received on the local endpoint as a tunnel. 154 */ 155 void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, 156 unsigned int udp_offset) 157 { 158 struct sock_extended_err ee; 159 struct sockaddr_rxrpc srx; 160 struct rxrpc_local *local; 161 struct rxrpc_peer *peer; 162 unsigned int info = 0; 163 int err; 164 u8 version = ip_hdr(skb)->version; 165 u8 type = icmp_hdr(skb)->type; 166 u8 code = icmp_hdr(skb)->code; 167 168 rcu_read_lock(); 169 local = rcu_dereference_sk_user_data(sk); 170 if (unlikely(!local)) { 171 rcu_read_unlock(); 172 return; 173 } 174 175 rxrpc_new_skb(skb, rxrpc_skb_received); 176 177 switch (ip_hdr(skb)->version) { 178 case IPVERSION: 179 peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset, 180 &info, &srx); 181 break; 182 #ifdef CONFIG_AF_RXRPC_IPV6 183 case 6: 184 peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset, 185 &info, &srx); 186 break; 187 #endif 188 default: 189 rcu_read_unlock(); 190 return; 191 } 192 193 if (peer && !rxrpc_get_peer_maybe(peer)) 194 peer = NULL; 195 if (!peer) { 196 rcu_read_unlock(); 197 return; 198 } 199 200 memset(&ee, 0, sizeof(ee)); 201 202 switch (version) { 203 case IPVERSION: 204 switch (type) { 205 case ICMP_DEST_UNREACH: 206 switch (code) { 207 case ICMP_FRAG_NEEDED: 208 rxrpc_adjust_mtu(peer, info); 209 rcu_read_unlock(); 210 rxrpc_put_peer(peer); 211 return; 212 default: 213 break; 214 } 215 216 err = EHOSTUNREACH; 217 if (code <= NR_ICMP_UNREACH) { 218 /* Might want to do something different with 219 * non-fatal errors 220 */ 221 //harderr = icmp_err_convert[code].fatal; 222 err = icmp_err_convert[code].errno; 223 } 224 break; 225 226 case ICMP_TIME_EXCEEDED: 227 err = EHOSTUNREACH; 228 break; 229 default: 230 err = EPROTO; 231 break; 232 } 233 234 ee.ee_origin = SO_EE_ORIGIN_ICMP; 235 ee.ee_type = type; 236 ee.ee_code = code; 237 ee.ee_errno = err; 238 break; 239 240 #ifdef CONFIG_AF_RXRPC_IPV6 241 case 6: 242 switch (type) { 243 case ICMPV6_PKT_TOOBIG: 244 rxrpc_adjust_mtu(peer, info); 245 rcu_read_unlock(); 246 rxrpc_put_peer(peer); 247 return; 248 } 249 250 icmpv6_err_convert(type, code, &err); 251 252 if (err == EACCES) 253 err = EHOSTUNREACH; 254 255 ee.ee_origin = SO_EE_ORIGIN_ICMP6; 256 ee.ee_type = type; 257 ee.ee_code = code; 258 ee.ee_errno = err; 259 break; 260 #endif 261 } 262 263 trace_rxrpc_rx_icmp(peer, &ee, &srx); 264 265 rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR); 266 rcu_read_unlock(); 267 rxrpc_put_peer(peer); 268 } 269 270 /* 271 * Find the peer associated with a local error. 272 */ 273 static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, 274 const struct sk_buff *skb, 275 struct sockaddr_rxrpc *srx) 276 { 277 struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); 278 279 _enter(""); 280 281 memset(srx, 0, sizeof(*srx)); 282 srx->transport_type = local->srx.transport_type; 283 srx->transport_len = local->srx.transport_len; 284 srx->transport.family = local->srx.transport.family; 285 286 switch (srx->transport.family) { 287 case AF_INET: 288 srx->transport_len = sizeof(srx->transport.sin); 289 srx->transport.family = AF_INET; 290 srx->transport.sin.sin_port = serr->port; 291 switch (serr->ee.ee_origin) { 292 case SO_EE_ORIGIN_ICMP: 293 _net("Rx ICMP"); 294 memcpy(&srx->transport.sin.sin_addr, 295 skb_network_header(skb) + serr->addr_offset, 296 sizeof(struct in_addr)); 297 break; 298 case SO_EE_ORIGIN_ICMP6: 299 _net("Rx ICMP6 on v4 sock"); 300 memcpy(&srx->transport.sin.sin_addr, 301 skb_network_header(skb) + serr->addr_offset + 12, 302 sizeof(struct in_addr)); 303 break; 304 default: 305 memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr, 306 sizeof(struct in_addr)); 307 break; 308 } 309 break; 310 311 #ifdef CONFIG_AF_RXRPC_IPV6 312 case AF_INET6: 313 switch (serr->ee.ee_origin) { 314 case SO_EE_ORIGIN_ICMP6: 315 _net("Rx ICMP6"); 316 srx->transport.sin6.sin6_port = serr->port; 317 memcpy(&srx->transport.sin6.sin6_addr, 318 skb_network_header(skb) + serr->addr_offset, 319 sizeof(struct in6_addr)); 320 break; 321 case SO_EE_ORIGIN_ICMP: 322 _net("Rx ICMP on v6 sock"); 323 srx->transport_len = sizeof(srx->transport.sin); 324 srx->transport.family = AF_INET; 325 srx->transport.sin.sin_port = serr->port; 326 memcpy(&srx->transport.sin.sin_addr, 327 skb_network_header(skb) + serr->addr_offset, 328 sizeof(struct in_addr)); 329 break; 330 default: 331 memcpy(&srx->transport.sin6.sin6_addr, 332 &ipv6_hdr(skb)->saddr, 333 sizeof(struct in6_addr)); 334 break; 335 } 336 break; 337 #endif 338 339 default: 340 BUG(); 341 } 342 343 return rxrpc_lookup_peer_rcu(local, srx); 344 } 345 346 /* 347 * Handle an MTU/fragmentation problem. 348 */ 349 static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) 350 { 351 _net("Rx ICMP Fragmentation Needed (%d)", mtu); 352 353 /* wind down the local interface MTU */ 354 if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) { 355 peer->if_mtu = mtu; 356 _net("I/F MTU %u", mtu); 357 } 358 359 if (mtu == 0) { 360 /* they didn't give us a size, estimate one */ 361 mtu = peer->if_mtu; 362 if (mtu > 1500) { 363 mtu >>= 1; 364 if (mtu < 1500) 365 mtu = 1500; 366 } else { 367 mtu -= 100; 368 if (mtu < peer->hdrsize) 369 mtu = peer->hdrsize + 4; 370 } 371 } 372 373 if (mtu < peer->mtu) { 374 spin_lock_bh(&peer->lock); 375 peer->mtu = mtu; 376 peer->maxdata = peer->mtu - peer->hdrsize; 377 spin_unlock_bh(&peer->lock); 378 _net("Net MTU %u (maxdata %u)", 379 peer->mtu, peer->maxdata); 380 } 381 } 382 383 /* 384 * Handle an error received on the local endpoint. 385 */ 386 void rxrpc_error_report(struct sock *sk) 387 { 388 struct sock_exterr_skb *serr; 389 struct sockaddr_rxrpc srx; 390 struct rxrpc_local *local; 391 struct rxrpc_peer *peer = NULL; 392 struct sk_buff *skb; 393 394 rcu_read_lock(); 395 local = rcu_dereference_sk_user_data(sk); 396 if (unlikely(!local)) { 397 rcu_read_unlock(); 398 return; 399 } 400 _enter("%p{%d}", sk, local->debug_id); 401 402 /* Clear the outstanding error value on the socket so that it doesn't 403 * cause kernel_sendmsg() to return it later. 404 */ 405 sock_error(sk); 406 407 skb = sock_dequeue_err_skb(sk); 408 if (!skb) { 409 rcu_read_unlock(); 410 _leave("UDP socket errqueue empty"); 411 return; 412 } 413 rxrpc_new_skb(skb, rxrpc_skb_received); 414 serr = SKB_EXT_ERR(skb); 415 416 if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) { 417 peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); 418 if (peer && !rxrpc_get_peer_maybe(peer)) 419 peer = NULL; 420 if (peer) { 421 trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); 422 rxrpc_store_error(peer, serr); 423 } 424 } 425 426 rcu_read_unlock(); 427 rxrpc_free_skb(skb, rxrpc_skb_freed); 428 rxrpc_put_peer(peer); 429 _leave(""); 430 } 431 432 /* 433 * Map an error report to error codes on the peer record. 434 */ 435 static void rxrpc_store_error(struct rxrpc_peer *peer, 436 struct sock_exterr_skb *serr) 437 { 438 enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR; 439 struct sock_extended_err *ee; 440 int err; 441 442 _enter(""); 443 444 ee = &serr->ee; 445 446 err = ee->ee_errno; 447 448 switch (ee->ee_origin) { 449 case SO_EE_ORIGIN_ICMP: 450 switch (ee->ee_type) { 451 case ICMP_DEST_UNREACH: 452 switch (ee->ee_code) { 453 case ICMP_NET_UNREACH: 454 _net("Rx Received ICMP Network Unreachable"); 455 break; 456 case ICMP_HOST_UNREACH: 457 _net("Rx Received ICMP Host Unreachable"); 458 break; 459 case ICMP_PORT_UNREACH: 460 _net("Rx Received ICMP Port Unreachable"); 461 break; 462 case ICMP_NET_UNKNOWN: 463 _net("Rx Received ICMP Unknown Network"); 464 break; 465 case ICMP_HOST_UNKNOWN: 466 _net("Rx Received ICMP Unknown Host"); 467 break; 468 default: 469 _net("Rx Received ICMP DestUnreach code=%u", 470 ee->ee_code); 471 break; 472 } 473 break; 474 475 case ICMP_TIME_EXCEEDED: 476 _net("Rx Received ICMP TTL Exceeded"); 477 break; 478 479 default: 480 _proto("Rx Received ICMP error { type=%u code=%u }", 481 ee->ee_type, ee->ee_code); 482 break; 483 } 484 break; 485 486 case SO_EE_ORIGIN_NONE: 487 case SO_EE_ORIGIN_LOCAL: 488 _proto("Rx Received local error { error=%d }", err); 489 compl = RXRPC_CALL_LOCAL_ERROR; 490 break; 491 492 case SO_EE_ORIGIN_ICMP6: 493 if (err == EACCES) 494 err = EHOSTUNREACH; 495 fallthrough; 496 default: 497 _proto("Rx Received error report { orig=%u }", ee->ee_origin); 498 break; 499 } 500 501 rxrpc_distribute_error(peer, err, compl); 502 } 503 504 /* 505 * Distribute an error that occurred on a peer. 506 */ 507 static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, 508 enum rxrpc_call_completion compl) 509 { 510 struct rxrpc_call *call; 511 512 hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { 513 rxrpc_see_call(call); 514 rxrpc_set_call_completion(call, compl, 0, -error); 515 } 516 } 517 518 /* 519 * Perform keep-alive pings. 520 */ 521 static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, 522 struct list_head *collector, 523 time64_t base, 524 u8 cursor) 525 { 526 struct rxrpc_peer *peer; 527 const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; 528 time64_t keepalive_at; 529 int slot; 530 531 spin_lock_bh(&rxnet->peer_hash_lock); 532 533 while (!list_empty(collector)) { 534 peer = list_entry(collector->next, 535 struct rxrpc_peer, keepalive_link); 536 537 list_del_init(&peer->keepalive_link); 538 if (!rxrpc_get_peer_maybe(peer)) 539 continue; 540 541 if (__rxrpc_use_local(peer->local)) { 542 spin_unlock_bh(&rxnet->peer_hash_lock); 543 544 keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; 545 slot = keepalive_at - base; 546 _debug("%02x peer %u t=%d {%pISp}", 547 cursor, peer->debug_id, slot, &peer->srx.transport); 548 549 if (keepalive_at <= base || 550 keepalive_at > base + RXRPC_KEEPALIVE_TIME) { 551 rxrpc_send_keepalive(peer); 552 slot = RXRPC_KEEPALIVE_TIME; 553 } 554 555 /* A transmission to this peer occurred since last we 556 * examined it so put it into the appropriate future 557 * bucket. 558 */ 559 slot += cursor; 560 slot &= mask; 561 spin_lock_bh(&rxnet->peer_hash_lock); 562 list_add_tail(&peer->keepalive_link, 563 &rxnet->peer_keepalive[slot & mask]); 564 rxrpc_unuse_local(peer->local); 565 } 566 rxrpc_put_peer_locked(peer); 567 } 568 569 spin_unlock_bh(&rxnet->peer_hash_lock); 570 } 571 572 /* 573 * Perform keep-alive pings with VERSION packets to keep any NAT alive. 574 */ 575 void rxrpc_peer_keepalive_worker(struct work_struct *work) 576 { 577 struct rxrpc_net *rxnet = 578 container_of(work, struct rxrpc_net, peer_keepalive_work); 579 const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; 580 time64_t base, now, delay; 581 u8 cursor, stop; 582 LIST_HEAD(collector); 583 584 now = ktime_get_seconds(); 585 base = rxnet->peer_keepalive_base; 586 cursor = rxnet->peer_keepalive_cursor; 587 _enter("%lld,%u", base - now, cursor); 588 589 if (!rxnet->live) 590 return; 591 592 /* Remove to a temporary list all the peers that are currently lodged 593 * in expired buckets plus all new peers. 594 * 595 * Everything in the bucket at the cursor is processed this 596 * second; the bucket at cursor + 1 goes at now + 1s and so 597 * on... 598 */ 599 spin_lock_bh(&rxnet->peer_hash_lock); 600 list_splice_init(&rxnet->peer_keepalive_new, &collector); 601 602 stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); 603 while (base <= now && (s8)(cursor - stop) < 0) { 604 list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask], 605 &collector); 606 base++; 607 cursor++; 608 } 609 610 base = now; 611 spin_unlock_bh(&rxnet->peer_hash_lock); 612 613 rxnet->peer_keepalive_base = base; 614 rxnet->peer_keepalive_cursor = cursor; 615 rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor); 616 ASSERT(list_empty(&collector)); 617 618 /* Schedule the timer for the next occupied timeslot. */ 619 cursor = rxnet->peer_keepalive_cursor; 620 stop = cursor + RXRPC_KEEPALIVE_TIME - 1; 621 for (; (s8)(cursor - stop) < 0; cursor++) { 622 if (!list_empty(&rxnet->peer_keepalive[cursor & mask])) 623 break; 624 base++; 625 } 626 627 now = ktime_get_seconds(); 628 delay = base - now; 629 if (delay < 1) 630 delay = 1; 631 delay *= HZ; 632 if (rxnet->live) 633 timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); 634 635 _leave(""); 636 } 637