1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 fragment reassembly 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: net/ipv4/ip_fragment.c 10 */ 11 12 /* 13 * Fixes: 14 * Andi Kleen Make it work with multiple hosts. 15 * More RFC compliance. 16 * 17 * Horst von Brand Add missing #include <linux/string.h> 18 * Alexey Kuznetsov SMP races, threading, cleanup. 19 * Patrick McHardy LRU queue of frag heads for evictor. 20 * Mitsuru KANDA @USAGI Register inet6_protocol{}. 21 * David Stevens and 22 * YOSHIFUJI,H. @USAGI Always remove fragment header to 23 * calculate ICV correctly. 24 */ 25 26 #define pr_fmt(fmt) "IPv6: " fmt 27 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/string.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/jiffies.h> 34 #include <linux/net.h> 35 #include <linux/list.h> 36 #include <linux/netdevice.h> 37 #include <linux/in6.h> 38 #include <linux/ipv6.h> 39 #include <linux/icmpv6.h> 40 #include <linux/random.h> 41 #include <linux/jhash.h> 42 #include <linux/skbuff.h> 43 #include <linux/slab.h> 44 #include <linux/export.h> 45 46 #include <net/sock.h> 47 #include <net/snmp.h> 48 49 #include <net/ipv6.h> 50 #include <net/ip6_route.h> 51 #include <net/protocol.h> 52 #include <net/transp_v6.h> 53 #include <net/rawv6.h> 54 #include <net/ndisc.h> 55 #include <net/addrconf.h> 56 #include <net/ipv6_frag.h> 57 #include <net/inet_ecn.h> 58 59 static const char ip6_frag_cache_name[] = "ip6-frags"; 60 61 static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) 62 { 63 return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); 64 } 65 66 static struct inet_frags ip6_frags; 67 68 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, 69 struct sk_buff *prev_tail, struct net_device *dev); 70 71 static void ip6_frag_expire(struct timer_list *t) 72 { 73 struct inet_frag_queue *frag = from_timer(frag, t, timer); 74 struct frag_queue *fq; 75 76 fq = container_of(frag, struct frag_queue, q); 77 78 ip6frag_expire_frag_queue(fq->q.fqdir->net, fq); 79 } 80 81 static struct frag_queue * 82 fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) 83 { 84 struct frag_v6_compare_key key = { 85 .id = id, 86 .saddr = hdr->saddr, 87 .daddr = hdr->daddr, 88 .user = IP6_DEFRAG_LOCAL_DELIVER, 89 .iif = iif, 90 }; 91 struct inet_frag_queue *q; 92 93 if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | 94 IPV6_ADDR_LINKLOCAL))) 95 key.iif = 0; 96 97 q = inet_frag_find(net->ipv6.fqdir, &key); 98 if (!q) 99 return NULL; 100 101 return container_of(q, struct frag_queue, q); 102 } 103 104 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, 105 struct frag_hdr *fhdr, int nhoff, 106 u32 *prob_offset) 107 { 108 struct net *net = dev_net(skb_dst(skb)->dev); 109 int offset, end, fragsize; 110 struct sk_buff *prev_tail; 111 struct net_device *dev; 112 int err = -ENOENT; 113 u8 ecn; 114 115 if (fq->q.flags & INET_FRAG_COMPLETE) 116 goto err; 117 118 err = -EINVAL; 119 offset = ntohs(fhdr->frag_off) & ~0x7; 120 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - 121 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); 122 123 if ((unsigned int)end > IPV6_MAXPLEN) { 124 *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); 125 /* note that if prob_offset is set, the skb is freed elsewhere, 126 * we do not free it here. 127 */ 128 return -1; 129 } 130 131 ecn = ip6_frag_ecn(ipv6_hdr(skb)); 132 133 if (skb->ip_summed == CHECKSUM_COMPLETE) { 134 const unsigned char *nh = skb_network_header(skb); 135 skb->csum = csum_sub(skb->csum, 136 csum_partial(nh, (u8 *)(fhdr + 1) - nh, 137 0)); 138 } 139 140 /* Is this the final fragment? */ 141 if (!(fhdr->frag_off & htons(IP6_MF))) { 142 /* If we already have some bits beyond end 143 * or have different end, the segment is corrupted. 144 */ 145 if (end < fq->q.len || 146 ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) 147 goto discard_fq; 148 fq->q.flags |= INET_FRAG_LAST_IN; 149 fq->q.len = end; 150 } else { 151 /* Check if the fragment is rounded to 8 bytes. 152 * Required by the RFC. 153 */ 154 if (end & 0x7) { 155 /* RFC2460 says always send parameter problem in 156 * this case. -DaveM 157 */ 158 *prob_offset = offsetof(struct ipv6hdr, payload_len); 159 return -1; 160 } 161 if (end > fq->q.len) { 162 /* Some bits beyond end -> corruption. */ 163 if (fq->q.flags & INET_FRAG_LAST_IN) 164 goto discard_fq; 165 fq->q.len = end; 166 } 167 } 168 169 if (end == offset) 170 goto discard_fq; 171 172 err = -ENOMEM; 173 /* Point into the IP datagram 'data' part. */ 174 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) 175 goto discard_fq; 176 177 err = pskb_trim_rcsum(skb, end - offset); 178 if (err) 179 goto discard_fq; 180 181 /* Note : skb->rbnode and skb->dev share the same location. */ 182 dev = skb->dev; 183 /* Makes sure compiler wont do silly aliasing games */ 184 barrier(); 185 186 prev_tail = fq->q.fragments_tail; 187 err = inet_frag_queue_insert(&fq->q, skb, offset, end); 188 if (err) 189 goto insert_error; 190 191 if (dev) 192 fq->iif = dev->ifindex; 193 194 fq->q.stamp = skb->tstamp; 195 fq->q.meat += skb->len; 196 fq->ecn |= ecn; 197 add_frag_mem_limit(fq->q.fqdir, skb->truesize); 198 199 fragsize = -skb_network_offset(skb) + skb->len; 200 if (fragsize > fq->q.max_size) 201 fq->q.max_size = fragsize; 202 203 /* The first fragment. 204 * nhoffset is obtained from the first fragment, of course. 205 */ 206 if (offset == 0) { 207 fq->nhoffset = nhoff; 208 fq->q.flags |= INET_FRAG_FIRST_IN; 209 } 210 211 if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 212 fq->q.meat == fq->q.len) { 213 unsigned long orefdst = skb->_skb_refdst; 214 215 skb->_skb_refdst = 0UL; 216 err = ip6_frag_reasm(fq, skb, prev_tail, dev); 217 skb->_skb_refdst = orefdst; 218 return err; 219 } 220 221 skb_dst_drop(skb); 222 return -EINPROGRESS; 223 224 insert_error: 225 if (err == IPFRAG_DUP) { 226 kfree_skb(skb); 227 return -EINVAL; 228 } 229 err = -EINVAL; 230 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 231 IPSTATS_MIB_REASM_OVERLAPS); 232 discard_fq: 233 inet_frag_kill(&fq->q); 234 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 235 IPSTATS_MIB_REASMFAILS); 236 err: 237 kfree_skb(skb); 238 return err; 239 } 240 241 /* 242 * Check if this packet is complete. 243 * 244 * It is called with locked fq, and caller must check that 245 * queue is eligible for reassembly i.e. it is not COMPLETE, 246 * the last and the first frames arrived and all the bits are here. 247 */ 248 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, 249 struct sk_buff *prev_tail, struct net_device *dev) 250 { 251 struct net *net = fq->q.fqdir->net; 252 unsigned int nhoff; 253 void *reasm_data; 254 int payload_len; 255 u8 ecn; 256 257 inet_frag_kill(&fq->q); 258 259 ecn = ip_frag_ecn_table[fq->ecn]; 260 if (unlikely(ecn == 0xff)) 261 goto out_fail; 262 263 reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); 264 if (!reasm_data) 265 goto out_oom; 266 267 payload_len = ((skb->data - skb_network_header(skb)) - 268 sizeof(struct ipv6hdr) + fq->q.len - 269 sizeof(struct frag_hdr)); 270 if (payload_len > IPV6_MAXPLEN) 271 goto out_oversize; 272 273 /* We have to remove fragment header from datagram and to relocate 274 * header in order to calculate ICV correctly. */ 275 nhoff = fq->nhoffset; 276 skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; 277 memmove(skb->head + sizeof(struct frag_hdr), skb->head, 278 (skb->data - skb->head) - sizeof(struct frag_hdr)); 279 if (skb_mac_header_was_set(skb)) 280 skb->mac_header += sizeof(struct frag_hdr); 281 skb->network_header += sizeof(struct frag_hdr); 282 283 skb_reset_transport_header(skb); 284 285 inet_frag_reasm_finish(&fq->q, skb, reasm_data, true); 286 287 skb->dev = dev; 288 ipv6_hdr(skb)->payload_len = htons(payload_len); 289 ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); 290 IP6CB(skb)->nhoff = nhoff; 291 IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; 292 IP6CB(skb)->frag_max_size = fq->q.max_size; 293 294 /* Yes, and fold redundant checksum back. 8) */ 295 skb_postpush_rcsum(skb, skb_network_header(skb), 296 skb_network_header_len(skb)); 297 298 rcu_read_lock(); 299 __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); 300 rcu_read_unlock(); 301 fq->q.rb_fragments = RB_ROOT; 302 fq->q.fragments_tail = NULL; 303 fq->q.last_run_head = NULL; 304 return 1; 305 306 out_oversize: 307 net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len); 308 goto out_fail; 309 out_oom: 310 net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); 311 out_fail: 312 rcu_read_lock(); 313 __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); 314 rcu_read_unlock(); 315 inet_frag_kill(&fq->q); 316 return -1; 317 } 318 319 static int ipv6_frag_rcv(struct sk_buff *skb) 320 { 321 struct frag_hdr *fhdr; 322 struct frag_queue *fq; 323 const struct ipv6hdr *hdr = ipv6_hdr(skb); 324 struct net *net = dev_net(skb_dst(skb)->dev); 325 int iif; 326 327 if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) 328 goto fail_hdr; 329 330 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); 331 332 /* Jumbo payload inhibits frag. header */ 333 if (hdr->payload_len == 0) 334 goto fail_hdr; 335 336 if (!pskb_may_pull(skb, (skb_transport_offset(skb) + 337 sizeof(struct frag_hdr)))) 338 goto fail_hdr; 339 340 hdr = ipv6_hdr(skb); 341 fhdr = (struct frag_hdr *)skb_transport_header(skb); 342 343 if (!(fhdr->frag_off & htons(0xFFF9))) { 344 /* It is not a fragmented frame */ 345 skb->transport_header += sizeof(struct frag_hdr); 346 __IP6_INC_STATS(net, 347 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); 348 349 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); 350 IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; 351 return 1; 352 } 353 354 iif = skb->dev ? skb->dev->ifindex : 0; 355 fq = fq_find(net, fhdr->identification, hdr, iif); 356 if (fq) { 357 u32 prob_offset = 0; 358 int ret; 359 360 spin_lock(&fq->q.lock); 361 362 fq->iif = iif; 363 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, 364 &prob_offset); 365 366 spin_unlock(&fq->q.lock); 367 inet_frag_put(&fq->q); 368 if (prob_offset) { 369 __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), 370 IPSTATS_MIB_INHDRERRORS); 371 /* icmpv6_param_prob() calls kfree_skb(skb) */ 372 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); 373 } 374 return ret; 375 } 376 377 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); 378 kfree_skb(skb); 379 return -1; 380 381 fail_hdr: 382 __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), 383 IPSTATS_MIB_INHDRERRORS); 384 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); 385 return -1; 386 } 387 388 static const struct inet6_protocol frag_protocol = { 389 .handler = ipv6_frag_rcv, 390 .flags = INET6_PROTO_NOPOLICY, 391 }; 392 393 #ifdef CONFIG_SYSCTL 394 395 static struct ctl_table ip6_frags_ns_ctl_table[] = { 396 { 397 .procname = "ip6frag_high_thresh", 398 .maxlen = sizeof(unsigned long), 399 .mode = 0644, 400 .proc_handler = proc_doulongvec_minmax, 401 }, 402 { 403 .procname = "ip6frag_low_thresh", 404 .maxlen = sizeof(unsigned long), 405 .mode = 0644, 406 .proc_handler = proc_doulongvec_minmax, 407 }, 408 { 409 .procname = "ip6frag_time", 410 .maxlen = sizeof(int), 411 .mode = 0644, 412 .proc_handler = proc_dointvec_jiffies, 413 }, 414 { } 415 }; 416 417 /* secret interval has been deprecated */ 418 static int ip6_frags_secret_interval_unused; 419 static struct ctl_table ip6_frags_ctl_table[] = { 420 { 421 .procname = "ip6frag_secret_interval", 422 .data = &ip6_frags_secret_interval_unused, 423 .maxlen = sizeof(int), 424 .mode = 0644, 425 .proc_handler = proc_dointvec_jiffies, 426 }, 427 { } 428 }; 429 430 static int __net_init ip6_frags_ns_sysctl_register(struct net *net) 431 { 432 struct ctl_table *table; 433 struct ctl_table_header *hdr; 434 435 table = ip6_frags_ns_ctl_table; 436 if (!net_eq(net, &init_net)) { 437 table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); 438 if (!table) 439 goto err_alloc; 440 441 } 442 table[0].data = &net->ipv6.fqdir->high_thresh; 443 table[0].extra1 = &net->ipv6.fqdir->low_thresh; 444 table[1].data = &net->ipv6.fqdir->low_thresh; 445 table[1].extra2 = &net->ipv6.fqdir->high_thresh; 446 table[2].data = &net->ipv6.fqdir->timeout; 447 448 hdr = register_net_sysctl(net, "net/ipv6", table); 449 if (!hdr) 450 goto err_reg; 451 452 net->ipv6.sysctl.frags_hdr = hdr; 453 return 0; 454 455 err_reg: 456 if (!net_eq(net, &init_net)) 457 kfree(table); 458 err_alloc: 459 return -ENOMEM; 460 } 461 462 static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net) 463 { 464 struct ctl_table *table; 465 466 table = net->ipv6.sysctl.frags_hdr->ctl_table_arg; 467 unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); 468 if (!net_eq(net, &init_net)) 469 kfree(table); 470 } 471 472 static struct ctl_table_header *ip6_ctl_header; 473 474 static int ip6_frags_sysctl_register(void) 475 { 476 ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6", 477 ip6_frags_ctl_table); 478 return ip6_ctl_header == NULL ? -ENOMEM : 0; 479 } 480 481 static void ip6_frags_sysctl_unregister(void) 482 { 483 unregister_net_sysctl_table(ip6_ctl_header); 484 } 485 #else 486 static int ip6_frags_ns_sysctl_register(struct net *net) 487 { 488 return 0; 489 } 490 491 static void ip6_frags_ns_sysctl_unregister(struct net *net) 492 { 493 } 494 495 static int ip6_frags_sysctl_register(void) 496 { 497 return 0; 498 } 499 500 static void ip6_frags_sysctl_unregister(void) 501 { 502 } 503 #endif 504 505 static int __net_init ipv6_frags_init_net(struct net *net) 506 { 507 int res; 508 509 res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net); 510 if (res < 0) 511 return res; 512 513 net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; 514 net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; 515 net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT; 516 517 res = ip6_frags_ns_sysctl_register(net); 518 if (res < 0) 519 fqdir_exit(net->ipv6.fqdir); 520 return res; 521 } 522 523 static void __net_exit ipv6_frags_pre_exit_net(struct net *net) 524 { 525 fqdir_pre_exit(net->ipv6.fqdir); 526 } 527 528 static void __net_exit ipv6_frags_exit_net(struct net *net) 529 { 530 ip6_frags_ns_sysctl_unregister(net); 531 fqdir_exit(net->ipv6.fqdir); 532 } 533 534 static struct pernet_operations ip6_frags_ops = { 535 .init = ipv6_frags_init_net, 536 .pre_exit = ipv6_frags_pre_exit_net, 537 .exit = ipv6_frags_exit_net, 538 }; 539 540 static const struct rhashtable_params ip6_rhash_params = { 541 .head_offset = offsetof(struct inet_frag_queue, node), 542 .hashfn = ip6frag_key_hashfn, 543 .obj_hashfn = ip6frag_obj_hashfn, 544 .obj_cmpfn = ip6frag_obj_cmpfn, 545 .automatic_shrinking = true, 546 }; 547 548 int __init ipv6_frag_init(void) 549 { 550 int ret; 551 552 ip6_frags.constructor = ip6frag_init; 553 ip6_frags.destructor = NULL; 554 ip6_frags.qsize = sizeof(struct frag_queue); 555 ip6_frags.frag_expire = ip6_frag_expire; 556 ip6_frags.frags_cache_name = ip6_frag_cache_name; 557 ip6_frags.rhash_params = ip6_rhash_params; 558 ret = inet_frags_init(&ip6_frags); 559 if (ret) 560 goto out; 561 562 ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); 563 if (ret) 564 goto err_protocol; 565 566 ret = ip6_frags_sysctl_register(); 567 if (ret) 568 goto err_sysctl; 569 570 ret = register_pernet_subsys(&ip6_frags_ops); 571 if (ret) 572 goto err_pernet; 573 574 out: 575 return ret; 576 577 err_pernet: 578 ip6_frags_sysctl_unregister(); 579 err_sysctl: 580 inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); 581 err_protocol: 582 inet_frags_fini(&ip6_frags); 583 goto out; 584 } 585 586 void ipv6_frag_exit(void) 587 { 588 ip6_frags_sysctl_unregister(); 589 unregister_pernet_subsys(&ip6_frags_ops); 590 inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); 591 inet_frags_fini(&ip6_frags); 592 } 593