1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * RAW sockets for IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Adapted from linux/net/ipv4/raw.c 10 * 11 * Fixes: 12 * Hideaki YOSHIFUJI : sin6_scope_id support 13 * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) 14 * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data 15 */ 16 17 #include <linux/errno.h> 18 #include <linux/types.h> 19 #include <linux/socket.h> 20 #include <linux/slab.h> 21 #include <linux/sockios.h> 22 #include <linux/net.h> 23 #include <linux/in6.h> 24 #include <linux/netdevice.h> 25 #include <linux/if_arp.h> 26 #include <linux/icmpv6.h> 27 #include <linux/netfilter.h> 28 #include <linux/netfilter_ipv6.h> 29 #include <linux/skbuff.h> 30 #include <linux/compat.h> 31 #include <linux/uaccess.h> 32 #include <asm/ioctls.h> 33 34 #include <net/net_namespace.h> 35 #include <net/ip.h> 36 #include <net/sock.h> 37 #include <net/snmp.h> 38 39 #include <net/ipv6.h> 40 #include <net/ndisc.h> 41 #include <net/protocol.h> 42 #include <net/ip6_route.h> 43 #include <net/ip6_checksum.h> 44 #include <net/addrconf.h> 45 #include <net/transp_v6.h> 46 #include <net/udp.h> 47 #include <net/inet_common.h> 48 #include <net/tcp_states.h> 49 #if IS_ENABLED(CONFIG_IPV6_MIP6) 50 #include <net/mip6.h> 51 #endif 52 #include <linux/mroute6.h> 53 54 #include <net/raw.h> 55 #include <net/rawv6.h> 56 #include <net/xfrm.h> 57 58 #include <linux/proc_fs.h> 59 #include <linux/seq_file.h> 60 #include <linux/export.h> 61 62 #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ 63 64 struct raw_hashinfo raw_v6_hashinfo; 65 EXPORT_SYMBOL_GPL(raw_v6_hashinfo); 66 67 bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, 68 const struct in6_addr *loc_addr, 69 const struct in6_addr *rmt_addr, int dif, int sdif) 70 { 71 if (inet_sk(sk)->inet_num != num || 72 !net_eq(sock_net(sk), net) || 73 (!ipv6_addr_any(&sk->sk_v6_daddr) && 74 !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || 75 !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, 76 dif, sdif)) 77 return false; 78 79 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || 80 ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) || 81 (ipv6_addr_is_multicast(loc_addr) && 82 inet6_mc_check(sk, loc_addr, rmt_addr))) 83 return true; 84 85 return false; 86 } 87 EXPORT_SYMBOL_GPL(raw_v6_match); 88 89 /* 90 * 0 - deliver 91 * 1 - block 92 */ 93 static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) 94 { 95 struct icmp6hdr _hdr; 96 const struct icmp6hdr *hdr; 97 98 /* We require only the four bytes of the ICMPv6 header, not any 99 * additional bytes of message body in "struct icmp6hdr". 100 */ 101 hdr = skb_header_pointer(skb, skb_transport_offset(skb), 102 ICMPV6_HDRLEN, &_hdr); 103 if (hdr) { 104 const __u32 *data = &raw6_sk(sk)->filter.data[0]; 105 unsigned int type = hdr->icmp6_type; 106 107 return (data[type >> 5] & (1U << (type & 31))) != 0; 108 } 109 return 1; 110 } 111 112 #if IS_ENABLED(CONFIG_IPV6_MIP6) 113 typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); 114 115 static mh_filter_t __rcu *mh_filter __read_mostly; 116 117 int rawv6_mh_filter_register(mh_filter_t filter) 118 { 119 rcu_assign_pointer(mh_filter, filter); 120 return 0; 121 } 122 EXPORT_SYMBOL(rawv6_mh_filter_register); 123 124 int rawv6_mh_filter_unregister(mh_filter_t filter) 125 { 126 RCU_INIT_POINTER(mh_filter, NULL); 127 synchronize_rcu(); 128 return 0; 129 } 130 EXPORT_SYMBOL(rawv6_mh_filter_unregister); 131 132 #endif 133 134 /* 135 * demultiplex raw sockets. 136 * (should consider queueing the skb in the sock receive_queue 137 * without calling rawv6.c) 138 * 139 * Caller owns SKB so we must make clones. 140 */ 141 static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) 142 { 143 struct net *net = dev_net(skb->dev); 144 struct hlist_nulls_head *hlist; 145 struct hlist_nulls_node *hnode; 146 const struct in6_addr *saddr; 147 const struct in6_addr *daddr; 148 struct sock *sk; 149 bool delivered = false; 150 __u8 hash; 151 152 saddr = &ipv6_hdr(skb)->saddr; 153 daddr = saddr + 1; 154 155 hash = raw_hashfunc(net, nexthdr); 156 hlist = &raw_v6_hashinfo.ht[hash]; 157 rcu_read_lock(); 158 sk_nulls_for_each(sk, hnode, hlist) { 159 int filtered; 160 161 if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, 162 inet6_iif(skb), inet6_sdif(skb))) 163 continue; 164 delivered = true; 165 switch (nexthdr) { 166 case IPPROTO_ICMPV6: 167 filtered = icmpv6_filter(sk, skb); 168 break; 169 170 #if IS_ENABLED(CONFIG_IPV6_MIP6) 171 case IPPROTO_MH: 172 { 173 /* XXX: To validate MH only once for each packet, 174 * this is placed here. It should be after checking 175 * xfrm policy, however it doesn't. The checking xfrm 176 * policy is placed in rawv6_rcv() because it is 177 * required for each socket. 178 */ 179 mh_filter_t *filter; 180 181 filter = rcu_dereference(mh_filter); 182 filtered = filter ? (*filter)(sk, skb) : 0; 183 break; 184 } 185 #endif 186 default: 187 filtered = 0; 188 break; 189 } 190 191 if (filtered < 0) 192 break; 193 if (filtered == 0) { 194 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); 195 196 /* Not releasing hash table! */ 197 if (clone) 198 rawv6_rcv(sk, clone); 199 } 200 } 201 rcu_read_unlock(); 202 return delivered; 203 } 204 205 bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) 206 { 207 return ipv6_raw_deliver(skb, nexthdr); 208 } 209 210 /* This cleans up af_inet6 a bit. -DaveM */ 211 static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 212 { 213 struct inet_sock *inet = inet_sk(sk); 214 struct ipv6_pinfo *np = inet6_sk(sk); 215 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; 216 __be32 v4addr = 0; 217 int addr_type; 218 int err; 219 220 if (addr_len < SIN6_LEN_RFC2133) 221 return -EINVAL; 222 223 if (addr->sin6_family != AF_INET6) 224 return -EINVAL; 225 226 addr_type = ipv6_addr_type(&addr->sin6_addr); 227 228 /* Raw sockets are IPv6 only */ 229 if (addr_type == IPV6_ADDR_MAPPED) 230 return -EADDRNOTAVAIL; 231 232 lock_sock(sk); 233 234 err = -EINVAL; 235 if (sk->sk_state != TCP_CLOSE) 236 goto out; 237 238 rcu_read_lock(); 239 /* Check if the address belongs to the host. */ 240 if (addr_type != IPV6_ADDR_ANY) { 241 struct net_device *dev = NULL; 242 243 if (__ipv6_addr_needs_scope_id(addr_type)) { 244 if (addr_len >= sizeof(struct sockaddr_in6) && 245 addr->sin6_scope_id) { 246 /* Override any existing binding, if another 247 * one is supplied by user. 248 */ 249 sk->sk_bound_dev_if = addr->sin6_scope_id; 250 } 251 252 /* Binding to link-local address requires an interface */ 253 if (!sk->sk_bound_dev_if) 254 goto out_unlock; 255 } 256 257 if (sk->sk_bound_dev_if) { 258 err = -ENODEV; 259 dev = dev_get_by_index_rcu(sock_net(sk), 260 sk->sk_bound_dev_if); 261 if (!dev) 262 goto out_unlock; 263 } 264 265 /* ipv4 addr of the socket is invalid. Only the 266 * unspecified and mapped address have a v4 equivalent. 267 */ 268 v4addr = LOOPBACK4_IPV6; 269 if (!(addr_type & IPV6_ADDR_MULTICAST) && 270 !ipv6_can_nonlocal_bind(sock_net(sk), inet)) { 271 err = -EADDRNOTAVAIL; 272 if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, 273 dev, 0)) { 274 goto out_unlock; 275 } 276 } 277 } 278 279 inet->inet_rcv_saddr = inet->inet_saddr = v4addr; 280 sk->sk_v6_rcv_saddr = addr->sin6_addr; 281 if (!(addr_type & IPV6_ADDR_MULTICAST)) 282 np->saddr = addr->sin6_addr; 283 err = 0; 284 out_unlock: 285 rcu_read_unlock(); 286 out: 287 release_sock(sk); 288 return err; 289 } 290 291 static void rawv6_err(struct sock *sk, struct sk_buff *skb, 292 struct inet6_skb_parm *opt, 293 u8 type, u8 code, int offset, __be32 info) 294 { 295 struct inet_sock *inet = inet_sk(sk); 296 struct ipv6_pinfo *np = inet6_sk(sk); 297 int err; 298 int harderr; 299 300 /* Report error on raw socket, if: 301 1. User requested recverr. 302 2. Socket is connected (otherwise the error indication 303 is useless without recverr and error is hard. 304 */ 305 if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) 306 return; 307 308 harderr = icmpv6_err_convert(type, code, &err); 309 if (type == ICMPV6_PKT_TOOBIG) { 310 ip6_sk_update_pmtu(skb, sk, info); 311 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); 312 } 313 if (type == NDISC_REDIRECT) { 314 ip6_sk_redirect(skb, sk); 315 return; 316 } 317 if (np->recverr) { 318 u8 *payload = skb->data; 319 if (!inet->hdrincl) 320 payload += offset; 321 ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); 322 } 323 324 if (np->recverr || harderr) { 325 sk->sk_err = err; 326 sk_error_report(sk); 327 } 328 } 329 330 void raw6_icmp_error(struct sk_buff *skb, int nexthdr, 331 u8 type, u8 code, int inner_offset, __be32 info) 332 { 333 struct net *net = dev_net(skb->dev); 334 struct hlist_nulls_head *hlist; 335 struct hlist_nulls_node *hnode; 336 struct sock *sk; 337 int hash; 338 339 hash = raw_hashfunc(net, nexthdr); 340 hlist = &raw_v6_hashinfo.ht[hash]; 341 rcu_read_lock(); 342 sk_nulls_for_each(sk, hnode, hlist) { 343 /* Note: ipv6_hdr(skb) != skb->data */ 344 const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; 345 346 if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, 347 inet6_iif(skb), inet6_iif(skb))) 348 continue; 349 rawv6_err(sk, skb, NULL, type, code, inner_offset, info); 350 } 351 rcu_read_unlock(); 352 } 353 354 static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) 355 { 356 enum skb_drop_reason reason; 357 358 if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && 359 skb_checksum_complete(skb)) { 360 atomic_inc(&sk->sk_drops); 361 kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM); 362 return NET_RX_DROP; 363 } 364 365 /* Charge it to the socket. */ 366 skb_dst_drop(skb); 367 if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { 368 kfree_skb_reason(skb, reason); 369 return NET_RX_DROP; 370 } 371 372 return 0; 373 } 374 375 /* 376 * This is next to useless... 377 * if we demultiplex in network layer we don't need the extra call 378 * just to queue the skb... 379 * maybe we could have the network decide upon a hint if it 380 * should call raw_rcv for demultiplexing 381 */ 382 int rawv6_rcv(struct sock *sk, struct sk_buff *skb) 383 { 384 struct inet_sock *inet = inet_sk(sk); 385 struct raw6_sock *rp = raw6_sk(sk); 386 387 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 388 atomic_inc(&sk->sk_drops); 389 kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); 390 return NET_RX_DROP; 391 } 392 nf_reset_ct(skb); 393 394 if (!rp->checksum) 395 skb->ip_summed = CHECKSUM_UNNECESSARY; 396 397 if (skb->ip_summed == CHECKSUM_COMPLETE) { 398 skb_postpull_rcsum(skb, skb_network_header(skb), 399 skb_network_header_len(skb)); 400 if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 401 &ipv6_hdr(skb)->daddr, 402 skb->len, inet->inet_num, skb->csum)) 403 skb->ip_summed = CHECKSUM_UNNECESSARY; 404 } 405 if (!skb_csum_unnecessary(skb)) 406 skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 407 &ipv6_hdr(skb)->daddr, 408 skb->len, 409 inet->inet_num, 0)); 410 411 if (inet->hdrincl) { 412 if (skb_checksum_complete(skb)) { 413 atomic_inc(&sk->sk_drops); 414 kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM); 415 return NET_RX_DROP; 416 } 417 } 418 419 rawv6_rcv_skb(sk, skb); 420 return 0; 421 } 422 423 424 /* 425 * This should be easy, if there is something there 426 * we return it, otherwise we block. 427 */ 428 429 static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 430 int flags, int *addr_len) 431 { 432 struct ipv6_pinfo *np = inet6_sk(sk); 433 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); 434 struct sk_buff *skb; 435 size_t copied; 436 int err; 437 438 if (flags & MSG_OOB) 439 return -EOPNOTSUPP; 440 441 if (flags & MSG_ERRQUEUE) 442 return ipv6_recv_error(sk, msg, len, addr_len); 443 444 if (np->rxpmtu && np->rxopt.bits.rxpmtu) 445 return ipv6_recv_rxpmtu(sk, msg, len, addr_len); 446 447 skb = skb_recv_datagram(sk, flags, &err); 448 if (!skb) 449 goto out; 450 451 copied = skb->len; 452 if (copied > len) { 453 copied = len; 454 msg->msg_flags |= MSG_TRUNC; 455 } 456 457 if (skb_csum_unnecessary(skb)) { 458 err = skb_copy_datagram_msg(skb, 0, msg, copied); 459 } else if (msg->msg_flags&MSG_TRUNC) { 460 if (__skb_checksum_complete(skb)) 461 goto csum_copy_err; 462 err = skb_copy_datagram_msg(skb, 0, msg, copied); 463 } else { 464 err = skb_copy_and_csum_datagram_msg(skb, 0, msg); 465 if (err == -EINVAL) 466 goto csum_copy_err; 467 } 468 if (err) 469 goto out_free; 470 471 /* Copy the address. */ 472 if (sin6) { 473 sin6->sin6_family = AF_INET6; 474 sin6->sin6_port = 0; 475 sin6->sin6_addr = ipv6_hdr(skb)->saddr; 476 sin6->sin6_flowinfo = 0; 477 sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, 478 inet6_iif(skb)); 479 *addr_len = sizeof(*sin6); 480 } 481 482 sock_recv_cmsgs(msg, sk, skb); 483 484 if (np->rxopt.all) 485 ip6_datagram_recv_ctl(sk, msg, skb); 486 487 err = copied; 488 if (flags & MSG_TRUNC) 489 err = skb->len; 490 491 out_free: 492 skb_free_datagram(sk, skb); 493 out: 494 return err; 495 496 csum_copy_err: 497 skb_kill_datagram(sk, skb, flags); 498 499 /* Error for blocking case is chosen to masquerade 500 as some normal condition. 501 */ 502 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 503 goto out; 504 } 505 506 static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, 507 struct raw6_sock *rp) 508 { 509 struct ipv6_txoptions *opt; 510 struct sk_buff *skb; 511 int err = 0; 512 int offset; 513 int len; 514 int total_len; 515 __wsum tmp_csum; 516 __sum16 csum; 517 518 if (!rp->checksum) 519 goto send; 520 521 skb = skb_peek(&sk->sk_write_queue); 522 if (!skb) 523 goto out; 524 525 offset = rp->offset; 526 total_len = inet_sk(sk)->cork.base.length; 527 opt = inet6_sk(sk)->cork.opt; 528 total_len -= opt ? opt->opt_flen : 0; 529 530 if (offset >= total_len - 1) { 531 err = -EINVAL; 532 ip6_flush_pending_frames(sk); 533 goto out; 534 } 535 536 /* should be check HW csum miyazawa */ 537 if (skb_queue_len(&sk->sk_write_queue) == 1) { 538 /* 539 * Only one fragment on the socket. 540 */ 541 tmp_csum = skb->csum; 542 } else { 543 struct sk_buff *csum_skb = NULL; 544 tmp_csum = 0; 545 546 skb_queue_walk(&sk->sk_write_queue, skb) { 547 tmp_csum = csum_add(tmp_csum, skb->csum); 548 549 if (csum_skb) 550 continue; 551 552 len = skb->len - skb_transport_offset(skb); 553 if (offset >= len) { 554 offset -= len; 555 continue; 556 } 557 558 csum_skb = skb; 559 } 560 561 skb = csum_skb; 562 } 563 564 offset += skb_transport_offset(skb); 565 err = skb_copy_bits(skb, offset, &csum, 2); 566 if (err < 0) { 567 ip6_flush_pending_frames(sk); 568 goto out; 569 } 570 571 /* in case cksum was not initialized */ 572 if (unlikely(csum)) 573 tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); 574 575 csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, 576 total_len, fl6->flowi6_proto, tmp_csum); 577 578 if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) 579 csum = CSUM_MANGLED_0; 580 581 BUG_ON(skb_store_bits(skb, offset, &csum, 2)); 582 583 send: 584 err = ip6_push_pending_frames(sk); 585 out: 586 return err; 587 } 588 589 static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, 590 struct flowi6 *fl6, struct dst_entry **dstp, 591 unsigned int flags, const struct sockcm_cookie *sockc) 592 { 593 struct ipv6_pinfo *np = inet6_sk(sk); 594 struct net *net = sock_net(sk); 595 struct ipv6hdr *iph; 596 struct sk_buff *skb; 597 int err; 598 struct rt6_info *rt = (struct rt6_info *)*dstp; 599 int hlen = LL_RESERVED_SPACE(rt->dst.dev); 600 int tlen = rt->dst.dev->needed_tailroom; 601 602 if (length > rt->dst.dev->mtu) { 603 ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); 604 return -EMSGSIZE; 605 } 606 if (length < sizeof(struct ipv6hdr)) 607 return -EINVAL; 608 if (flags&MSG_PROBE) 609 goto out; 610 611 skb = sock_alloc_send_skb(sk, 612 length + hlen + tlen + 15, 613 flags & MSG_DONTWAIT, &err); 614 if (!skb) 615 goto error; 616 skb_reserve(skb, hlen); 617 618 skb->protocol = htons(ETH_P_IPV6); 619 skb->priority = sk->sk_priority; 620 skb->mark = sockc->mark; 621 skb->tstamp = sockc->transmit_time; 622 623 skb_put(skb, length); 624 skb_reset_network_header(skb); 625 iph = ipv6_hdr(skb); 626 627 skb->ip_summed = CHECKSUM_NONE; 628 629 skb_setup_tx_timestamp(skb, sockc->tsflags); 630 631 if (flags & MSG_CONFIRM) 632 skb_set_dst_pending_confirm(skb, 1); 633 634 skb->transport_header = skb->network_header; 635 err = memcpy_from_msg(iph, msg, length); 636 if (err) { 637 err = -EFAULT; 638 kfree_skb(skb); 639 goto error; 640 } 641 642 skb_dst_set(skb, &rt->dst); 643 *dstp = NULL; 644 645 /* if egress device is enslaved to an L3 master device pass the 646 * skb to its handler for processing 647 */ 648 skb = l3mdev_ip6_out(sk, skb); 649 if (unlikely(!skb)) 650 return 0; 651 652 /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev 653 * in the error path. Since skb has been freed, the dst could 654 * have been queued for deletion. 655 */ 656 rcu_read_lock(); 657 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 658 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, 659 NULL, rt->dst.dev, dst_output); 660 if (err > 0) 661 err = net_xmit_errno(err); 662 if (err) { 663 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 664 rcu_read_unlock(); 665 goto error_check; 666 } 667 rcu_read_unlock(); 668 out: 669 return 0; 670 671 error: 672 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 673 error_check: 674 if (err == -ENOBUFS && !np->recverr) 675 err = 0; 676 return err; 677 } 678 679 struct raw6_frag_vec { 680 struct msghdr *msg; 681 int hlen; 682 char c[4]; 683 }; 684 685 static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6) 686 { 687 int err = 0; 688 switch (fl6->flowi6_proto) { 689 case IPPROTO_ICMPV6: 690 rfv->hlen = 2; 691 err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); 692 if (!err) { 693 fl6->fl6_icmp_type = rfv->c[0]; 694 fl6->fl6_icmp_code = rfv->c[1]; 695 } 696 break; 697 case IPPROTO_MH: 698 rfv->hlen = 4; 699 err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); 700 if (!err) 701 fl6->fl6_mh_type = rfv->c[2]; 702 } 703 return err; 704 } 705 706 static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, 707 struct sk_buff *skb) 708 { 709 struct raw6_frag_vec *rfv = from; 710 711 if (offset < rfv->hlen) { 712 int copy = min(rfv->hlen - offset, len); 713 714 if (skb->ip_summed == CHECKSUM_PARTIAL) 715 memcpy(to, rfv->c + offset, copy); 716 else 717 skb->csum = csum_block_add( 718 skb->csum, 719 csum_partial_copy_nocheck(rfv->c + offset, 720 to, copy), 721 odd); 722 723 odd = 0; 724 offset += copy; 725 to += copy; 726 len -= copy; 727 728 if (!len) 729 return 0; 730 } 731 732 offset -= rfv->hlen; 733 734 return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); 735 } 736 737 static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 738 { 739 struct ipv6_txoptions *opt_to_free = NULL; 740 struct ipv6_txoptions opt_space; 741 DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); 742 struct in6_addr *daddr, *final_p, final; 743 struct inet_sock *inet = inet_sk(sk); 744 struct ipv6_pinfo *np = inet6_sk(sk); 745 struct raw6_sock *rp = raw6_sk(sk); 746 struct ipv6_txoptions *opt = NULL; 747 struct ip6_flowlabel *flowlabel = NULL; 748 struct dst_entry *dst = NULL; 749 struct raw6_frag_vec rfv; 750 struct flowi6 fl6; 751 struct ipcm6_cookie ipc6; 752 int addr_len = msg->msg_namelen; 753 int hdrincl; 754 u16 proto; 755 int err; 756 757 /* Rough check on arithmetic overflow, 758 better check is made in ip6_append_data(). 759 */ 760 if (len > INT_MAX) 761 return -EMSGSIZE; 762 763 /* Mirror BSD error message compatibility */ 764 if (msg->msg_flags & MSG_OOB) 765 return -EOPNOTSUPP; 766 767 /* hdrincl should be READ_ONCE(inet->hdrincl) 768 * but READ_ONCE() doesn't work with bit fields. 769 * Doing this indirectly yields the same result. 770 */ 771 hdrincl = inet->hdrincl; 772 hdrincl = READ_ONCE(hdrincl); 773 774 /* 775 * Get and verify the address. 776 */ 777 memset(&fl6, 0, sizeof(fl6)); 778 779 fl6.flowi6_mark = sk->sk_mark; 780 fl6.flowi6_uid = sk->sk_uid; 781 782 ipcm6_init(&ipc6); 783 ipc6.sockc.tsflags = sk->sk_tsflags; 784 ipc6.sockc.mark = sk->sk_mark; 785 786 if (sin6) { 787 if (addr_len < SIN6_LEN_RFC2133) 788 return -EINVAL; 789 790 if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 791 return -EAFNOSUPPORT; 792 793 /* port is the proto value [0..255] carried in nexthdr */ 794 proto = ntohs(sin6->sin6_port); 795 796 if (!proto) 797 proto = inet->inet_num; 798 else if (proto != inet->inet_num) 799 return -EINVAL; 800 801 if (proto > 255) 802 return -EINVAL; 803 804 daddr = &sin6->sin6_addr; 805 if (np->sndflow) { 806 fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; 807 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 808 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 809 if (IS_ERR(flowlabel)) 810 return -EINVAL; 811 } 812 } 813 814 /* 815 * Otherwise it will be difficult to maintain 816 * sk->sk_dst_cache. 817 */ 818 if (sk->sk_state == TCP_ESTABLISHED && 819 ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) 820 daddr = &sk->sk_v6_daddr; 821 822 if (addr_len >= sizeof(struct sockaddr_in6) && 823 sin6->sin6_scope_id && 824 __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) 825 fl6.flowi6_oif = sin6->sin6_scope_id; 826 } else { 827 if (sk->sk_state != TCP_ESTABLISHED) 828 return -EDESTADDRREQ; 829 830 proto = inet->inet_num; 831 daddr = &sk->sk_v6_daddr; 832 fl6.flowlabel = np->flow_label; 833 } 834 835 if (fl6.flowi6_oif == 0) 836 fl6.flowi6_oif = sk->sk_bound_dev_if; 837 838 if (msg->msg_controllen) { 839 opt = &opt_space; 840 memset(opt, 0, sizeof(struct ipv6_txoptions)); 841 opt->tot_len = sizeof(struct ipv6_txoptions); 842 ipc6.opt = opt; 843 844 err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6); 845 if (err < 0) { 846 fl6_sock_release(flowlabel); 847 return err; 848 } 849 if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { 850 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 851 if (IS_ERR(flowlabel)) 852 return -EINVAL; 853 } 854 if (!(opt->opt_nflen|opt->opt_flen)) 855 opt = NULL; 856 } 857 if (!opt) { 858 opt = txopt_get(np); 859 opt_to_free = opt; 860 } 861 if (flowlabel) 862 opt = fl6_merge_options(&opt_space, flowlabel, opt); 863 opt = ipv6_fixup_options(&opt_space, opt); 864 865 fl6.flowi6_proto = proto; 866 fl6.flowi6_mark = ipc6.sockc.mark; 867 868 if (!hdrincl) { 869 rfv.msg = msg; 870 rfv.hlen = 0; 871 err = rawv6_probe_proto_opt(&rfv, &fl6); 872 if (err) 873 goto out; 874 } 875 876 if (!ipv6_addr_any(daddr)) 877 fl6.daddr = *daddr; 878 else 879 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 880 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) 881 fl6.saddr = np->saddr; 882 883 final_p = fl6_update_dst(&fl6, opt, &final); 884 885 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 886 fl6.flowi6_oif = np->mcast_oif; 887 else if (!fl6.flowi6_oif) 888 fl6.flowi6_oif = np->ucast_oif; 889 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 890 891 if (hdrincl) 892 fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; 893 894 if (ipc6.tclass < 0) 895 ipc6.tclass = np->tclass; 896 897 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); 898 899 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 900 if (IS_ERR(dst)) { 901 err = PTR_ERR(dst); 902 goto out; 903 } 904 if (ipc6.hlimit < 0) 905 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 906 907 if (ipc6.dontfrag < 0) 908 ipc6.dontfrag = np->dontfrag; 909 910 if (msg->msg_flags&MSG_CONFIRM) 911 goto do_confirm; 912 913 back_from_confirm: 914 if (hdrincl) 915 err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, 916 msg->msg_flags, &ipc6.sockc); 917 else { 918 ipc6.opt = opt; 919 lock_sock(sk); 920 err = ip6_append_data(sk, raw6_getfrag, &rfv, 921 len, 0, &ipc6, &fl6, (struct rt6_info *)dst, 922 msg->msg_flags); 923 924 if (err) 925 ip6_flush_pending_frames(sk); 926 else if (!(msg->msg_flags & MSG_MORE)) 927 err = rawv6_push_pending_frames(sk, &fl6, rp); 928 release_sock(sk); 929 } 930 done: 931 dst_release(dst); 932 out: 933 fl6_sock_release(flowlabel); 934 txopt_put(opt_to_free); 935 return err < 0 ? err : len; 936 do_confirm: 937 if (msg->msg_flags & MSG_PROBE) 938 dst_confirm_neigh(dst, &fl6.daddr); 939 if (!(msg->msg_flags & MSG_PROBE) || len) 940 goto back_from_confirm; 941 err = 0; 942 goto done; 943 } 944 945 static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, 946 sockptr_t optval, int optlen) 947 { 948 switch (optname) { 949 case ICMPV6_FILTER: 950 if (optlen > sizeof(struct icmp6_filter)) 951 optlen = sizeof(struct icmp6_filter); 952 if (copy_from_sockptr(&raw6_sk(sk)->filter, optval, optlen)) 953 return -EFAULT; 954 return 0; 955 default: 956 return -ENOPROTOOPT; 957 } 958 959 return 0; 960 } 961 962 static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, 963 char __user *optval, int __user *optlen) 964 { 965 int len; 966 967 switch (optname) { 968 case ICMPV6_FILTER: 969 if (get_user(len, optlen)) 970 return -EFAULT; 971 if (len < 0) 972 return -EINVAL; 973 if (len > sizeof(struct icmp6_filter)) 974 len = sizeof(struct icmp6_filter); 975 if (put_user(len, optlen)) 976 return -EFAULT; 977 if (copy_to_user(optval, &raw6_sk(sk)->filter, len)) 978 return -EFAULT; 979 return 0; 980 default: 981 return -ENOPROTOOPT; 982 } 983 984 return 0; 985 } 986 987 988 static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, 989 sockptr_t optval, unsigned int optlen) 990 { 991 struct raw6_sock *rp = raw6_sk(sk); 992 int val; 993 994 if (optlen < sizeof(val)) 995 return -EINVAL; 996 997 if (copy_from_sockptr(&val, optval, sizeof(val))) 998 return -EFAULT; 999 1000 switch (optname) { 1001 case IPV6_HDRINCL: 1002 if (sk->sk_type != SOCK_RAW) 1003 return -EINVAL; 1004 inet_sk(sk)->hdrincl = !!val; 1005 return 0; 1006 case IPV6_CHECKSUM: 1007 if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && 1008 level == IPPROTO_IPV6) { 1009 /* 1010 * RFC3542 tells that IPV6_CHECKSUM socket 1011 * option in the IPPROTO_IPV6 level is not 1012 * allowed on ICMPv6 sockets. 1013 * If you want to set it, use IPPROTO_RAW 1014 * level IPV6_CHECKSUM socket option 1015 * (Linux extension). 1016 */ 1017 return -EINVAL; 1018 } 1019 1020 /* You may get strange result with a positive odd offset; 1021 RFC2292bis agrees with me. */ 1022 if (val > 0 && (val&1)) 1023 return -EINVAL; 1024 if (val < 0) { 1025 rp->checksum = 0; 1026 } else { 1027 rp->checksum = 1; 1028 rp->offset = val; 1029 } 1030 1031 return 0; 1032 1033 default: 1034 return -ENOPROTOOPT; 1035 } 1036 } 1037 1038 static int rawv6_setsockopt(struct sock *sk, int level, int optname, 1039 sockptr_t optval, unsigned int optlen) 1040 { 1041 switch (level) { 1042 case SOL_RAW: 1043 break; 1044 1045 case SOL_ICMPV6: 1046 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1047 return -EOPNOTSUPP; 1048 return rawv6_seticmpfilter(sk, level, optname, optval, optlen); 1049 case SOL_IPV6: 1050 if (optname == IPV6_CHECKSUM || 1051 optname == IPV6_HDRINCL) 1052 break; 1053 fallthrough; 1054 default: 1055 return ipv6_setsockopt(sk, level, optname, optval, optlen); 1056 } 1057 1058 return do_rawv6_setsockopt(sk, level, optname, optval, optlen); 1059 } 1060 1061 static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, 1062 char __user *optval, int __user *optlen) 1063 { 1064 struct raw6_sock *rp = raw6_sk(sk); 1065 int val, len; 1066 1067 if (get_user(len, optlen)) 1068 return -EFAULT; 1069 1070 switch (optname) { 1071 case IPV6_HDRINCL: 1072 val = inet_sk(sk)->hdrincl; 1073 break; 1074 case IPV6_CHECKSUM: 1075 /* 1076 * We allow getsockopt() for IPPROTO_IPV6-level 1077 * IPV6_CHECKSUM socket option on ICMPv6 sockets 1078 * since RFC3542 is silent about it. 1079 */ 1080 if (rp->checksum == 0) 1081 val = -1; 1082 else 1083 val = rp->offset; 1084 break; 1085 1086 default: 1087 return -ENOPROTOOPT; 1088 } 1089 1090 len = min_t(unsigned int, sizeof(int), len); 1091 1092 if (put_user(len, optlen)) 1093 return -EFAULT; 1094 if (copy_to_user(optval, &val, len)) 1095 return -EFAULT; 1096 return 0; 1097 } 1098 1099 static int rawv6_getsockopt(struct sock *sk, int level, int optname, 1100 char __user *optval, int __user *optlen) 1101 { 1102 switch (level) { 1103 case SOL_RAW: 1104 break; 1105 1106 case SOL_ICMPV6: 1107 if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) 1108 return -EOPNOTSUPP; 1109 return rawv6_geticmpfilter(sk, level, optname, optval, optlen); 1110 case SOL_IPV6: 1111 if (optname == IPV6_CHECKSUM || 1112 optname == IPV6_HDRINCL) 1113 break; 1114 fallthrough; 1115 default: 1116 return ipv6_getsockopt(sk, level, optname, optval, optlen); 1117 } 1118 1119 return do_rawv6_getsockopt(sk, level, optname, optval, optlen); 1120 } 1121 1122 static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) 1123 { 1124 switch (cmd) { 1125 case SIOCOUTQ: { 1126 int amount = sk_wmem_alloc_get(sk); 1127 1128 return put_user(amount, (int __user *)arg); 1129 } 1130 case SIOCINQ: { 1131 struct sk_buff *skb; 1132 int amount = 0; 1133 1134 spin_lock_bh(&sk->sk_receive_queue.lock); 1135 skb = skb_peek(&sk->sk_receive_queue); 1136 if (skb) 1137 amount = skb->len; 1138 spin_unlock_bh(&sk->sk_receive_queue.lock); 1139 return put_user(amount, (int __user *)arg); 1140 } 1141 1142 default: 1143 #ifdef CONFIG_IPV6_MROUTE 1144 return ip6mr_ioctl(sk, cmd, (void __user *)arg); 1145 #else 1146 return -ENOIOCTLCMD; 1147 #endif 1148 } 1149 } 1150 1151 #ifdef CONFIG_COMPAT 1152 static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) 1153 { 1154 switch (cmd) { 1155 case SIOCOUTQ: 1156 case SIOCINQ: 1157 return -ENOIOCTLCMD; 1158 default: 1159 #ifdef CONFIG_IPV6_MROUTE 1160 return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg)); 1161 #else 1162 return -ENOIOCTLCMD; 1163 #endif 1164 } 1165 } 1166 #endif 1167 1168 static void rawv6_close(struct sock *sk, long timeout) 1169 { 1170 if (inet_sk(sk)->inet_num == IPPROTO_RAW) 1171 ip6_ra_control(sk, -1); 1172 ip6mr_sk_done(sk); 1173 sk_common_release(sk); 1174 } 1175 1176 static void raw6_destroy(struct sock *sk) 1177 { 1178 lock_sock(sk); 1179 ip6_flush_pending_frames(sk); 1180 release_sock(sk); 1181 } 1182 1183 static int rawv6_init_sk(struct sock *sk) 1184 { 1185 struct raw6_sock *rp = raw6_sk(sk); 1186 1187 switch (inet_sk(sk)->inet_num) { 1188 case IPPROTO_ICMPV6: 1189 rp->checksum = 1; 1190 rp->offset = 2; 1191 break; 1192 case IPPROTO_MH: 1193 rp->checksum = 1; 1194 rp->offset = 4; 1195 break; 1196 default: 1197 break; 1198 } 1199 return 0; 1200 } 1201 1202 struct proto rawv6_prot = { 1203 .name = "RAWv6", 1204 .owner = THIS_MODULE, 1205 .close = rawv6_close, 1206 .destroy = raw6_destroy, 1207 .connect = ip6_datagram_connect_v6_only, 1208 .disconnect = __udp_disconnect, 1209 .ioctl = rawv6_ioctl, 1210 .init = rawv6_init_sk, 1211 .setsockopt = rawv6_setsockopt, 1212 .getsockopt = rawv6_getsockopt, 1213 .sendmsg = rawv6_sendmsg, 1214 .recvmsg = rawv6_recvmsg, 1215 .bind = rawv6_bind, 1216 .backlog_rcv = rawv6_rcv_skb, 1217 .hash = raw_hash_sk, 1218 .unhash = raw_unhash_sk, 1219 .obj_size = sizeof(struct raw6_sock), 1220 .useroffset = offsetof(struct raw6_sock, filter), 1221 .usersize = sizeof_field(struct raw6_sock, filter), 1222 .h.raw_hash = &raw_v6_hashinfo, 1223 #ifdef CONFIG_COMPAT 1224 .compat_ioctl = compat_rawv6_ioctl, 1225 #endif 1226 .diag_destroy = raw_abort, 1227 }; 1228 1229 #ifdef CONFIG_PROC_FS 1230 static int raw6_seq_show(struct seq_file *seq, void *v) 1231 { 1232 if (v == SEQ_START_TOKEN) { 1233 seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); 1234 } else { 1235 struct sock *sp = v; 1236 __u16 srcp = inet_sk(sp)->inet_num; 1237 ip6_dgram_sock_seq_show(seq, v, srcp, 0, 1238 raw_seq_private(seq)->bucket); 1239 } 1240 return 0; 1241 } 1242 1243 static const struct seq_operations raw6_seq_ops = { 1244 .start = raw_seq_start, 1245 .next = raw_seq_next, 1246 .stop = raw_seq_stop, 1247 .show = raw6_seq_show, 1248 }; 1249 1250 static int __net_init raw6_init_net(struct net *net) 1251 { 1252 if (!proc_create_net_data("raw6", 0444, net->proc_net, &raw6_seq_ops, 1253 sizeof(struct raw_iter_state), &raw_v6_hashinfo)) 1254 return -ENOMEM; 1255 1256 return 0; 1257 } 1258 1259 static void __net_exit raw6_exit_net(struct net *net) 1260 { 1261 remove_proc_entry("raw6", net->proc_net); 1262 } 1263 1264 static struct pernet_operations raw6_net_ops = { 1265 .init = raw6_init_net, 1266 .exit = raw6_exit_net, 1267 }; 1268 1269 int __init raw6_proc_init(void) 1270 { 1271 return register_pernet_subsys(&raw6_net_ops); 1272 } 1273 1274 void raw6_proc_exit(void) 1275 { 1276 unregister_pernet_subsys(&raw6_net_ops); 1277 } 1278 #endif /* CONFIG_PROC_FS */ 1279 1280 /* Same as inet6_dgram_ops, sans udp_poll. */ 1281 const struct proto_ops inet6_sockraw_ops = { 1282 .family = PF_INET6, 1283 .owner = THIS_MODULE, 1284 .release = inet6_release, 1285 .bind = inet6_bind, 1286 .connect = inet_dgram_connect, /* ok */ 1287 .socketpair = sock_no_socketpair, /* a do nothing */ 1288 .accept = sock_no_accept, /* a do nothing */ 1289 .getname = inet6_getname, 1290 .poll = datagram_poll, /* ok */ 1291 .ioctl = inet6_ioctl, /* must change */ 1292 .gettstamp = sock_gettstamp, 1293 .listen = sock_no_listen, /* ok */ 1294 .shutdown = inet_shutdown, /* ok */ 1295 .setsockopt = sock_common_setsockopt, /* ok */ 1296 .getsockopt = sock_common_getsockopt, /* ok */ 1297 .sendmsg = inet_sendmsg, /* ok */ 1298 .recvmsg = sock_common_recvmsg, /* ok */ 1299 .mmap = sock_no_mmap, 1300 .sendpage = sock_no_sendpage, 1301 #ifdef CONFIG_COMPAT 1302 .compat_ioctl = inet6_compat_ioctl, 1303 #endif 1304 }; 1305 1306 static struct inet_protosw rawv6_protosw = { 1307 .type = SOCK_RAW, 1308 .protocol = IPPROTO_IP, /* wild card */ 1309 .prot = &rawv6_prot, 1310 .ops = &inet6_sockraw_ops, 1311 .flags = INET_PROTOSW_REUSE, 1312 }; 1313 1314 int __init rawv6_init(void) 1315 { 1316 return inet6_register_protosw(&rawv6_protosw); 1317 } 1318 1319 void rawv6_exit(void) 1320 { 1321 inet6_unregister_protosw(&rawv6_protosw); 1322 } 1323