1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * The IP to API glue. 8 * 9 * Authors: see ip.c 10 * 11 * Fixes: 12 * Many : Split from ip.c , see ip.c for history. 13 * Martin Mares : TOS setting fixed. 14 * Alan Cox : Fixed a couple of oopses in Martin's 15 * TOS tweaks. 16 * Mike McLagan : Routing by source 17 */ 18 19 #include <linux/module.h> 20 #include <linux/types.h> 21 #include <linux/mm.h> 22 #include <linux/skbuff.h> 23 #include <linux/ip.h> 24 #include <linux/icmp.h> 25 #include <linux/inetdevice.h> 26 #include <linux/netdevice.h> 27 #include <linux/slab.h> 28 #include <net/sock.h> 29 #include <net/ip.h> 30 #include <net/icmp.h> 31 #include <net/tcp_states.h> 32 #include <linux/udp.h> 33 #include <linux/igmp.h> 34 #include <linux/netfilter.h> 35 #include <linux/route.h> 36 #include <linux/mroute.h> 37 #include <net/inet_ecn.h> 38 #include <net/route.h> 39 #include <net/xfrm.h> 40 #include <net/compat.h> 41 #include <net/checksum.h> 42 #if IS_ENABLED(CONFIG_IPV6) 43 #include <net/transp_v6.h> 44 #endif 45 #include <net/ip_fib.h> 46 47 #include <linux/errqueue.h> 48 #include <linux/uaccess.h> 49 50 #include <linux/bpfilter.h> 51 52 /* 53 * SOL_IP control messages. 54 */ 55 56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 57 { 58 struct in_pktinfo info = *PKTINFO_SKB_CB(skb); 59 60 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 61 62 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 63 } 64 65 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) 66 { 67 int ttl = ip_hdr(skb)->ttl; 68 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); 69 } 70 71 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) 72 { 73 put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); 74 } 75 76 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) 77 { 78 if (IPCB(skb)->opt.optlen == 0) 79 return; 80 81 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, 82 ip_hdr(skb) + 1); 83 } 84 85 86 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, 87 struct sk_buff *skb) 88 { 89 unsigned char optbuf[sizeof(struct ip_options) + 40]; 90 struct ip_options *opt = (struct ip_options *)optbuf; 91 92 if (IPCB(skb)->opt.optlen == 0) 93 return; 94 95 if (ip_options_echo(net, opt, skb)) { 96 msg->msg_flags |= MSG_CTRUNC; 97 return; 98 } 99 ip_options_undo(opt); 100 101 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); 102 } 103 104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) 105 { 106 int val; 107 108 if (IPCB(skb)->frag_max_size == 0) 109 return; 110 111 val = IPCB(skb)->frag_max_size; 112 put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); 113 } 114 115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, 116 int tlen, int offset) 117 { 118 __wsum csum = skb->csum; 119 120 if (skb->ip_summed != CHECKSUM_COMPLETE) 121 return; 122 123 if (offset != 0) { 124 int tend_off = skb_transport_offset(skb) + tlen; 125 csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0)); 126 } 127 128 put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); 129 } 130 131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) 132 { 133 char *secdata; 134 u32 seclen, secid; 135 int err; 136 137 err = security_socket_getpeersec_dgram(NULL, skb, &secid); 138 if (err) 139 return; 140 141 err = security_secid_to_secctx(secid, &secdata, &seclen); 142 if (err) 143 return; 144 145 put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); 146 security_release_secctx(secdata, seclen); 147 } 148 149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 150 { 151 __be16 _ports[2], *ports; 152 struct sockaddr_in sin; 153 154 /* All current transport protocols have the port numbers in the 155 * first four bytes of the transport header and this function is 156 * written with this assumption in mind. 157 */ 158 ports = skb_header_pointer(skb, skb_transport_offset(skb), 159 sizeof(_ports), &_ports); 160 if (!ports) 161 return; 162 163 sin.sin_family = AF_INET; 164 sin.sin_addr.s_addr = ip_hdr(skb)->daddr; 165 sin.sin_port = ports[1]; 166 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 167 168 put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); 169 } 170 171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, 172 struct sk_buff *skb, int tlen, int offset) 173 { 174 struct inet_sock *inet = inet_sk(sk); 175 unsigned int flags = inet->cmsg_flags; 176 177 /* Ordered by supposed usage frequency */ 178 if (flags & IP_CMSG_PKTINFO) { 179 ip_cmsg_recv_pktinfo(msg, skb); 180 181 flags &= ~IP_CMSG_PKTINFO; 182 if (!flags) 183 return; 184 } 185 186 if (flags & IP_CMSG_TTL) { 187 ip_cmsg_recv_ttl(msg, skb); 188 189 flags &= ~IP_CMSG_TTL; 190 if (!flags) 191 return; 192 } 193 194 if (flags & IP_CMSG_TOS) { 195 ip_cmsg_recv_tos(msg, skb); 196 197 flags &= ~IP_CMSG_TOS; 198 if (!flags) 199 return; 200 } 201 202 if (flags & IP_CMSG_RECVOPTS) { 203 ip_cmsg_recv_opts(msg, skb); 204 205 flags &= ~IP_CMSG_RECVOPTS; 206 if (!flags) 207 return; 208 } 209 210 if (flags & IP_CMSG_RETOPTS) { 211 ip_cmsg_recv_retopts(sock_net(sk), msg, skb); 212 213 flags &= ~IP_CMSG_RETOPTS; 214 if (!flags) 215 return; 216 } 217 218 if (flags & IP_CMSG_PASSSEC) { 219 ip_cmsg_recv_security(msg, skb); 220 221 flags &= ~IP_CMSG_PASSSEC; 222 if (!flags) 223 return; 224 } 225 226 if (flags & IP_CMSG_ORIGDSTADDR) { 227 ip_cmsg_recv_dstaddr(msg, skb); 228 229 flags &= ~IP_CMSG_ORIGDSTADDR; 230 if (!flags) 231 return; 232 } 233 234 if (flags & IP_CMSG_CHECKSUM) 235 ip_cmsg_recv_checksum(msg, skb, tlen, offset); 236 237 if (flags & IP_CMSG_RECVFRAGSIZE) 238 ip_cmsg_recv_fragsize(msg, skb); 239 } 240 EXPORT_SYMBOL(ip_cmsg_recv_offset); 241 242 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, 243 bool allow_ipv6) 244 { 245 int err, val; 246 struct cmsghdr *cmsg; 247 struct net *net = sock_net(sk); 248 249 for_each_cmsghdr(cmsg, msg) { 250 if (!CMSG_OK(msg, cmsg)) 251 return -EINVAL; 252 #if IS_ENABLED(CONFIG_IPV6) 253 if (allow_ipv6 && 254 cmsg->cmsg_level == SOL_IPV6 && 255 cmsg->cmsg_type == IPV6_PKTINFO) { 256 struct in6_pktinfo *src_info; 257 258 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) 259 return -EINVAL; 260 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 261 if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) 262 return -EINVAL; 263 if (src_info->ipi6_ifindex) 264 ipc->oif = src_info->ipi6_ifindex; 265 ipc->addr = src_info->ipi6_addr.s6_addr32[3]; 266 continue; 267 } 268 #endif 269 if (cmsg->cmsg_level == SOL_SOCKET) { 270 err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); 271 if (err) 272 return err; 273 continue; 274 } 275 276 if (cmsg->cmsg_level != SOL_IP) 277 continue; 278 switch (cmsg->cmsg_type) { 279 case IP_RETOPTS: 280 err = cmsg->cmsg_len - sizeof(struct cmsghdr); 281 282 /* Our caller is responsible for freeing ipc->opt */ 283 err = ip_options_get(net, &ipc->opt, 284 KERNEL_SOCKPTR(CMSG_DATA(cmsg)), 285 err < 40 ? err : 40); 286 if (err) 287 return err; 288 break; 289 case IP_PKTINFO: 290 { 291 struct in_pktinfo *info; 292 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 293 return -EINVAL; 294 info = (struct in_pktinfo *)CMSG_DATA(cmsg); 295 if (info->ipi_ifindex) 296 ipc->oif = info->ipi_ifindex; 297 ipc->addr = info->ipi_spec_dst.s_addr; 298 break; 299 } 300 case IP_TTL: 301 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 302 return -EINVAL; 303 val = *(int *)CMSG_DATA(cmsg); 304 if (val < 1 || val > 255) 305 return -EINVAL; 306 ipc->ttl = val; 307 break; 308 case IP_TOS: 309 if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) 310 val = *(int *)CMSG_DATA(cmsg); 311 else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) 312 val = *(u8 *)CMSG_DATA(cmsg); 313 else 314 return -EINVAL; 315 if (val < 0 || val > 255) 316 return -EINVAL; 317 ipc->tos = val; 318 ipc->priority = rt_tos2priority(ipc->tos); 319 break; 320 321 default: 322 return -EINVAL; 323 } 324 } 325 return 0; 326 } 327 328 static void ip_ra_destroy_rcu(struct rcu_head *head) 329 { 330 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); 331 332 sock_put(ra->saved_sk); 333 kfree(ra); 334 } 335 336 int ip_ra_control(struct sock *sk, unsigned char on, 337 void (*destructor)(struct sock *)) 338 { 339 struct ip_ra_chain *ra, *new_ra; 340 struct ip_ra_chain __rcu **rap; 341 struct net *net = sock_net(sk); 342 343 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 344 return -EINVAL; 345 346 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 347 if (on && !new_ra) 348 return -ENOMEM; 349 350 mutex_lock(&net->ipv4.ra_mutex); 351 for (rap = &net->ipv4.ra_chain; 352 (ra = rcu_dereference_protected(*rap, 353 lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; 354 rap = &ra->next) { 355 if (ra->sk == sk) { 356 if (on) { 357 mutex_unlock(&net->ipv4.ra_mutex); 358 kfree(new_ra); 359 return -EADDRINUSE; 360 } 361 /* dont let ip_call_ra_chain() use sk again */ 362 ra->sk = NULL; 363 RCU_INIT_POINTER(*rap, ra->next); 364 mutex_unlock(&net->ipv4.ra_mutex); 365 366 if (ra->destructor) 367 ra->destructor(sk); 368 /* 369 * Delay sock_put(sk) and kfree(ra) after one rcu grace 370 * period. This guarantee ip_call_ra_chain() dont need 371 * to mess with socket refcounts. 372 */ 373 ra->saved_sk = sk; 374 call_rcu(&ra->rcu, ip_ra_destroy_rcu); 375 return 0; 376 } 377 } 378 if (!new_ra) { 379 mutex_unlock(&net->ipv4.ra_mutex); 380 return -ENOBUFS; 381 } 382 new_ra->sk = sk; 383 new_ra->destructor = destructor; 384 385 RCU_INIT_POINTER(new_ra->next, ra); 386 rcu_assign_pointer(*rap, new_ra); 387 sock_hold(sk); 388 mutex_unlock(&net->ipv4.ra_mutex); 389 390 return 0; 391 } 392 393 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 394 __be16 port, u32 info, u8 *payload) 395 { 396 struct sock_exterr_skb *serr; 397 398 skb = skb_clone(skb, GFP_ATOMIC); 399 if (!skb) 400 return; 401 402 serr = SKB_EXT_ERR(skb); 403 serr->ee.ee_errno = err; 404 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; 405 serr->ee.ee_type = icmp_hdr(skb)->type; 406 serr->ee.ee_code = icmp_hdr(skb)->code; 407 serr->ee.ee_pad = 0; 408 serr->ee.ee_info = info; 409 serr->ee.ee_data = 0; 410 serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - 411 skb_network_header(skb); 412 serr->port = port; 413 414 if (skb_pull(skb, payload - skb->data)) { 415 if (inet_sk(sk)->recverr_rfc4884) 416 ip_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); 417 418 skb_reset_transport_header(skb); 419 if (sock_queue_err_skb(sk, skb) == 0) 420 return; 421 } 422 kfree_skb(skb); 423 } 424 425 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) 426 { 427 struct inet_sock *inet = inet_sk(sk); 428 struct sock_exterr_skb *serr; 429 struct iphdr *iph; 430 struct sk_buff *skb; 431 432 if (!inet->recverr) 433 return; 434 435 skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); 436 if (!skb) 437 return; 438 439 skb_put(skb, sizeof(struct iphdr)); 440 skb_reset_network_header(skb); 441 iph = ip_hdr(skb); 442 iph->daddr = daddr; 443 444 serr = SKB_EXT_ERR(skb); 445 serr->ee.ee_errno = err; 446 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; 447 serr->ee.ee_type = 0; 448 serr->ee.ee_code = 0; 449 serr->ee.ee_pad = 0; 450 serr->ee.ee_info = info; 451 serr->ee.ee_data = 0; 452 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); 453 serr->port = port; 454 455 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 456 skb_reset_transport_header(skb); 457 458 if (sock_queue_err_skb(sk, skb)) 459 kfree_skb(skb); 460 } 461 462 /* For some errors we have valid addr_offset even with zero payload and 463 * zero port. Also, addr_offset should be supported if port is set. 464 */ 465 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) 466 { 467 return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || 468 serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; 469 } 470 471 /* IPv4 supports cmsg on all imcp errors and some timestamps 472 * 473 * Timestamp code paths do not initialize the fields expected by cmsg: 474 * the PKTINFO fields in skb->cb[]. Fill those in here. 475 */ 476 static bool ipv4_datagram_support_cmsg(const struct sock *sk, 477 struct sk_buff *skb, 478 int ee_origin) 479 { 480 struct in_pktinfo *info; 481 482 if (ee_origin == SO_EE_ORIGIN_ICMP) 483 return true; 484 485 if (ee_origin == SO_EE_ORIGIN_LOCAL) 486 return false; 487 488 /* Support IP_PKTINFO on tstamp packets if requested, to correlate 489 * timestamp with egress dev. Not possible for packets without iif 490 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). 491 */ 492 info = PKTINFO_SKB_CB(skb); 493 if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || 494 !info->ipi_ifindex) 495 return false; 496 497 info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; 498 return true; 499 } 500 501 /* 502 * Handle MSG_ERRQUEUE 503 */ 504 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) 505 { 506 struct sock_exterr_skb *serr; 507 struct sk_buff *skb; 508 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 509 struct { 510 struct sock_extended_err ee; 511 struct sockaddr_in offender; 512 } errhdr; 513 int err; 514 int copied; 515 516 err = -EAGAIN; 517 skb = sock_dequeue_err_skb(sk); 518 if (!skb) 519 goto out; 520 521 copied = skb->len; 522 if (copied > len) { 523 msg->msg_flags |= MSG_TRUNC; 524 copied = len; 525 } 526 err = skb_copy_datagram_msg(skb, 0, msg, copied); 527 if (unlikely(err)) { 528 kfree_skb(skb); 529 return err; 530 } 531 sock_recv_timestamp(msg, sk, skb); 532 533 serr = SKB_EXT_ERR(skb); 534 535 if (sin && ipv4_datagram_support_addr(serr)) { 536 sin->sin_family = AF_INET; 537 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + 538 serr->addr_offset); 539 sin->sin_port = serr->port; 540 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 541 *addr_len = sizeof(*sin); 542 } 543 544 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); 545 sin = &errhdr.offender; 546 memset(sin, 0, sizeof(*sin)); 547 548 if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { 549 sin->sin_family = AF_INET; 550 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 551 if (inet_sk(sk)->cmsg_flags) 552 ip_cmsg_recv(msg, skb); 553 } 554 555 put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr); 556 557 /* Now we could try to dump offended packet options */ 558 559 msg->msg_flags |= MSG_ERRQUEUE; 560 err = copied; 561 562 consume_skb(skb); 563 out: 564 return err; 565 } 566 567 static void __ip_sock_set_tos(struct sock *sk, int val) 568 { 569 if (sk->sk_type == SOCK_STREAM) { 570 val &= ~INET_ECN_MASK; 571 val |= inet_sk(sk)->tos & INET_ECN_MASK; 572 } 573 if (inet_sk(sk)->tos != val) { 574 inet_sk(sk)->tos = val; 575 sk->sk_priority = rt_tos2priority(val); 576 sk_dst_reset(sk); 577 } 578 } 579 580 void ip_sock_set_tos(struct sock *sk, int val) 581 { 582 lock_sock(sk); 583 __ip_sock_set_tos(sk, val); 584 release_sock(sk); 585 } 586 EXPORT_SYMBOL(ip_sock_set_tos); 587 588 void ip_sock_set_freebind(struct sock *sk) 589 { 590 lock_sock(sk); 591 inet_sk(sk)->freebind = true; 592 release_sock(sk); 593 } 594 EXPORT_SYMBOL(ip_sock_set_freebind); 595 596 void ip_sock_set_recverr(struct sock *sk) 597 { 598 lock_sock(sk); 599 inet_sk(sk)->recverr = true; 600 release_sock(sk); 601 } 602 EXPORT_SYMBOL(ip_sock_set_recverr); 603 604 int ip_sock_set_mtu_discover(struct sock *sk, int val) 605 { 606 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 607 return -EINVAL; 608 lock_sock(sk); 609 inet_sk(sk)->pmtudisc = val; 610 release_sock(sk); 611 return 0; 612 } 613 EXPORT_SYMBOL(ip_sock_set_mtu_discover); 614 615 void ip_sock_set_pktinfo(struct sock *sk) 616 { 617 lock_sock(sk); 618 inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO; 619 release_sock(sk); 620 } 621 EXPORT_SYMBOL(ip_sock_set_pktinfo); 622 623 /* 624 * Socket option code for IP. This is the end of the line after any 625 * TCP,UDP etc options on an IP socket. 626 */ 627 static bool setsockopt_needs_rtnl(int optname) 628 { 629 switch (optname) { 630 case IP_ADD_MEMBERSHIP: 631 case IP_ADD_SOURCE_MEMBERSHIP: 632 case IP_BLOCK_SOURCE: 633 case IP_DROP_MEMBERSHIP: 634 case IP_DROP_SOURCE_MEMBERSHIP: 635 case IP_MSFILTER: 636 case IP_UNBLOCK_SOURCE: 637 case MCAST_BLOCK_SOURCE: 638 case MCAST_MSFILTER: 639 case MCAST_JOIN_GROUP: 640 case MCAST_JOIN_SOURCE_GROUP: 641 case MCAST_LEAVE_GROUP: 642 case MCAST_LEAVE_SOURCE_GROUP: 643 case MCAST_UNBLOCK_SOURCE: 644 return true; 645 } 646 return false; 647 } 648 649 static int set_mcast_msfilter(struct sock *sk, int ifindex, 650 int numsrc, int fmode, 651 struct sockaddr_storage *group, 652 struct sockaddr_storage *list) 653 { 654 int msize = IP_MSFILTER_SIZE(numsrc); 655 struct ip_msfilter *msf; 656 struct sockaddr_in *psin; 657 int err, i; 658 659 msf = kmalloc(msize, GFP_KERNEL); 660 if (!msf) 661 return -ENOBUFS; 662 663 psin = (struct sockaddr_in *)group; 664 if (psin->sin_family != AF_INET) 665 goto Eaddrnotavail; 666 msf->imsf_multiaddr = psin->sin_addr.s_addr; 667 msf->imsf_interface = 0; 668 msf->imsf_fmode = fmode; 669 msf->imsf_numsrc = numsrc; 670 for (i = 0; i < numsrc; ++i) { 671 psin = (struct sockaddr_in *)&list[i]; 672 673 if (psin->sin_family != AF_INET) 674 goto Eaddrnotavail; 675 msf->imsf_slist[i] = psin->sin_addr.s_addr; 676 } 677 err = ip_mc_msfilter(sk, msf, ifindex); 678 kfree(msf); 679 return err; 680 681 Eaddrnotavail: 682 kfree(msf); 683 return -EADDRNOTAVAIL; 684 } 685 686 static int copy_group_source_from_sockptr(struct group_source_req *greqs, 687 sockptr_t optval, int optlen) 688 { 689 if (in_compat_syscall()) { 690 struct compat_group_source_req gr32; 691 692 if (optlen != sizeof(gr32)) 693 return -EINVAL; 694 if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) 695 return -EFAULT; 696 greqs->gsr_interface = gr32.gsr_interface; 697 greqs->gsr_group = gr32.gsr_group; 698 greqs->gsr_source = gr32.gsr_source; 699 } else { 700 if (optlen != sizeof(*greqs)) 701 return -EINVAL; 702 if (copy_from_sockptr(greqs, optval, sizeof(*greqs))) 703 return -EFAULT; 704 } 705 706 return 0; 707 } 708 709 static int do_mcast_group_source(struct sock *sk, int optname, 710 sockptr_t optval, int optlen) 711 { 712 struct group_source_req greqs; 713 struct ip_mreq_source mreqs; 714 struct sockaddr_in *psin; 715 int omode, add, err; 716 717 err = copy_group_source_from_sockptr(&greqs, optval, optlen); 718 if (err) 719 return err; 720 721 if (greqs.gsr_group.ss_family != AF_INET || 722 greqs.gsr_source.ss_family != AF_INET) 723 return -EADDRNOTAVAIL; 724 725 psin = (struct sockaddr_in *)&greqs.gsr_group; 726 mreqs.imr_multiaddr = psin->sin_addr.s_addr; 727 psin = (struct sockaddr_in *)&greqs.gsr_source; 728 mreqs.imr_sourceaddr = psin->sin_addr.s_addr; 729 mreqs.imr_interface = 0; /* use index for mc_source */ 730 731 if (optname == MCAST_BLOCK_SOURCE) { 732 omode = MCAST_EXCLUDE; 733 add = 1; 734 } else if (optname == MCAST_UNBLOCK_SOURCE) { 735 omode = MCAST_EXCLUDE; 736 add = 0; 737 } else if (optname == MCAST_JOIN_SOURCE_GROUP) { 738 struct ip_mreqn mreq; 739 740 psin = (struct sockaddr_in *)&greqs.gsr_group; 741 mreq.imr_multiaddr = psin->sin_addr; 742 mreq.imr_address.s_addr = 0; 743 mreq.imr_ifindex = greqs.gsr_interface; 744 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 745 if (err && err != -EADDRINUSE) 746 return err; 747 greqs.gsr_interface = mreq.imr_ifindex; 748 omode = MCAST_INCLUDE; 749 add = 1; 750 } else /* MCAST_LEAVE_SOURCE_GROUP */ { 751 omode = MCAST_INCLUDE; 752 add = 0; 753 } 754 return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface); 755 } 756 757 static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) 758 { 759 struct group_filter *gsf = NULL; 760 int err; 761 762 if (optlen < GROUP_FILTER_SIZE(0)) 763 return -EINVAL; 764 if (optlen > sysctl_optmem_max) 765 return -ENOBUFS; 766 767 gsf = memdup_sockptr(optval, optlen); 768 if (IS_ERR(gsf)) 769 return PTR_ERR(gsf); 770 771 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 772 err = -ENOBUFS; 773 if (gsf->gf_numsrc >= 0x1ffffff || 774 gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) 775 goto out_free_gsf; 776 777 err = -EINVAL; 778 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) 779 goto out_free_gsf; 780 781 err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, 782 gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist); 783 out_free_gsf: 784 kfree(gsf); 785 return err; 786 } 787 788 static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, 789 int optlen) 790 { 791 const int size0 = offsetof(struct compat_group_filter, gf_slist); 792 struct compat_group_filter *gf32; 793 unsigned int n; 794 void *p; 795 int err; 796 797 if (optlen < size0) 798 return -EINVAL; 799 if (optlen > sysctl_optmem_max - 4) 800 return -ENOBUFS; 801 802 p = kmalloc(optlen + 4, GFP_KERNEL); 803 if (!p) 804 return -ENOMEM; 805 gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ 806 807 err = -EFAULT; 808 if (copy_from_sockptr(gf32, optval, optlen)) 809 goto out_free_gsf; 810 811 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 812 n = gf32->gf_numsrc; 813 err = -ENOBUFS; 814 if (n >= 0x1ffffff) 815 goto out_free_gsf; 816 817 err = -EINVAL; 818 if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) 819 goto out_free_gsf; 820 821 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 822 err = -ENOBUFS; 823 if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) 824 goto out_free_gsf; 825 err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, 826 &gf32->gf_group, gf32->gf_slist); 827 out_free_gsf: 828 kfree(p); 829 return err; 830 } 831 832 static int ip_mcast_join_leave(struct sock *sk, int optname, 833 sockptr_t optval, int optlen) 834 { 835 struct ip_mreqn mreq = { }; 836 struct sockaddr_in *psin; 837 struct group_req greq; 838 839 if (optlen < sizeof(struct group_req)) 840 return -EINVAL; 841 if (copy_from_sockptr(&greq, optval, sizeof(greq))) 842 return -EFAULT; 843 844 psin = (struct sockaddr_in *)&greq.gr_group; 845 if (psin->sin_family != AF_INET) 846 return -EINVAL; 847 mreq.imr_multiaddr = psin->sin_addr; 848 mreq.imr_ifindex = greq.gr_interface; 849 if (optname == MCAST_JOIN_GROUP) 850 return ip_mc_join_group(sk, &mreq); 851 return ip_mc_leave_group(sk, &mreq); 852 } 853 854 static int compat_ip_mcast_join_leave(struct sock *sk, int optname, 855 sockptr_t optval, int optlen) 856 { 857 struct compat_group_req greq; 858 struct ip_mreqn mreq = { }; 859 struct sockaddr_in *psin; 860 861 if (optlen < sizeof(struct compat_group_req)) 862 return -EINVAL; 863 if (copy_from_sockptr(&greq, optval, sizeof(greq))) 864 return -EFAULT; 865 866 psin = (struct sockaddr_in *)&greq.gr_group; 867 if (psin->sin_family != AF_INET) 868 return -EINVAL; 869 mreq.imr_multiaddr = psin->sin_addr; 870 mreq.imr_ifindex = greq.gr_interface; 871 872 if (optname == MCAST_JOIN_GROUP) 873 return ip_mc_join_group(sk, &mreq); 874 return ip_mc_leave_group(sk, &mreq); 875 } 876 877 static int do_ip_setsockopt(struct sock *sk, int level, int optname, 878 sockptr_t optval, unsigned int optlen) 879 { 880 struct inet_sock *inet = inet_sk(sk); 881 struct net *net = sock_net(sk); 882 int val = 0, err; 883 bool needs_rtnl = setsockopt_needs_rtnl(optname); 884 885 switch (optname) { 886 case IP_PKTINFO: 887 case IP_RECVTTL: 888 case IP_RECVOPTS: 889 case IP_RECVTOS: 890 case IP_RETOPTS: 891 case IP_TOS: 892 case IP_TTL: 893 case IP_HDRINCL: 894 case IP_MTU_DISCOVER: 895 case IP_RECVERR: 896 case IP_ROUTER_ALERT: 897 case IP_FREEBIND: 898 case IP_PASSSEC: 899 case IP_TRANSPARENT: 900 case IP_MINTTL: 901 case IP_NODEFRAG: 902 case IP_BIND_ADDRESS_NO_PORT: 903 case IP_UNICAST_IF: 904 case IP_MULTICAST_TTL: 905 case IP_MULTICAST_ALL: 906 case IP_MULTICAST_LOOP: 907 case IP_RECVORIGDSTADDR: 908 case IP_CHECKSUM: 909 case IP_RECVFRAGSIZE: 910 case IP_RECVERR_RFC4884: 911 if (optlen >= sizeof(int)) { 912 if (copy_from_sockptr(&val, optval, sizeof(val))) 913 return -EFAULT; 914 } else if (optlen >= sizeof(char)) { 915 unsigned char ucval; 916 917 if (copy_from_sockptr(&ucval, optval, sizeof(ucval))) 918 return -EFAULT; 919 val = (int) ucval; 920 } 921 } 922 923 /* If optlen==0, it is equivalent to val == 0 */ 924 925 if (optname == IP_ROUTER_ALERT) 926 return ip_ra_control(sk, val ? 1 : 0, NULL); 927 if (ip_mroute_opt(optname)) 928 return ip_mroute_setsockopt(sk, optname, optval, optlen); 929 930 err = 0; 931 if (needs_rtnl) 932 rtnl_lock(); 933 lock_sock(sk); 934 935 switch (optname) { 936 case IP_OPTIONS: 937 { 938 struct ip_options_rcu *old, *opt = NULL; 939 940 if (optlen > 40) 941 goto e_inval; 942 err = ip_options_get(sock_net(sk), &opt, optval, optlen); 943 if (err) 944 break; 945 old = rcu_dereference_protected(inet->inet_opt, 946 lockdep_sock_is_held(sk)); 947 if (inet->is_icsk) { 948 struct inet_connection_sock *icsk = inet_csk(sk); 949 #if IS_ENABLED(CONFIG_IPV6) 950 if (sk->sk_family == PF_INET || 951 (!((1 << sk->sk_state) & 952 (TCPF_LISTEN | TCPF_CLOSE)) && 953 inet->inet_daddr != LOOPBACK4_IPV6)) { 954 #endif 955 if (old) 956 icsk->icsk_ext_hdr_len -= old->opt.optlen; 957 if (opt) 958 icsk->icsk_ext_hdr_len += opt->opt.optlen; 959 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 960 #if IS_ENABLED(CONFIG_IPV6) 961 } 962 #endif 963 } 964 rcu_assign_pointer(inet->inet_opt, opt); 965 if (old) 966 kfree_rcu(old, rcu); 967 break; 968 } 969 case IP_PKTINFO: 970 if (val) 971 inet->cmsg_flags |= IP_CMSG_PKTINFO; 972 else 973 inet->cmsg_flags &= ~IP_CMSG_PKTINFO; 974 break; 975 case IP_RECVTTL: 976 if (val) 977 inet->cmsg_flags |= IP_CMSG_TTL; 978 else 979 inet->cmsg_flags &= ~IP_CMSG_TTL; 980 break; 981 case IP_RECVTOS: 982 if (val) 983 inet->cmsg_flags |= IP_CMSG_TOS; 984 else 985 inet->cmsg_flags &= ~IP_CMSG_TOS; 986 break; 987 case IP_RECVOPTS: 988 if (val) 989 inet->cmsg_flags |= IP_CMSG_RECVOPTS; 990 else 991 inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; 992 break; 993 case IP_RETOPTS: 994 if (val) 995 inet->cmsg_flags |= IP_CMSG_RETOPTS; 996 else 997 inet->cmsg_flags &= ~IP_CMSG_RETOPTS; 998 break; 999 case IP_PASSSEC: 1000 if (val) 1001 inet->cmsg_flags |= IP_CMSG_PASSSEC; 1002 else 1003 inet->cmsg_flags &= ~IP_CMSG_PASSSEC; 1004 break; 1005 case IP_RECVORIGDSTADDR: 1006 if (val) 1007 inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; 1008 else 1009 inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; 1010 break; 1011 case IP_CHECKSUM: 1012 if (val) { 1013 if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { 1014 inet_inc_convert_csum(sk); 1015 inet->cmsg_flags |= IP_CMSG_CHECKSUM; 1016 } 1017 } else { 1018 if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { 1019 inet_dec_convert_csum(sk); 1020 inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; 1021 } 1022 } 1023 break; 1024 case IP_RECVFRAGSIZE: 1025 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) 1026 goto e_inval; 1027 if (val) 1028 inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; 1029 else 1030 inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; 1031 break; 1032 case IP_TOS: /* This sets both TOS and Precedence */ 1033 __ip_sock_set_tos(sk, val); 1034 break; 1035 case IP_TTL: 1036 if (optlen < 1) 1037 goto e_inval; 1038 if (val != -1 && (val < 1 || val > 255)) 1039 goto e_inval; 1040 inet->uc_ttl = val; 1041 break; 1042 case IP_HDRINCL: 1043 if (sk->sk_type != SOCK_RAW) { 1044 err = -ENOPROTOOPT; 1045 break; 1046 } 1047 inet->hdrincl = val ? 1 : 0; 1048 break; 1049 case IP_NODEFRAG: 1050 if (sk->sk_type != SOCK_RAW) { 1051 err = -ENOPROTOOPT; 1052 break; 1053 } 1054 inet->nodefrag = val ? 1 : 0; 1055 break; 1056 case IP_BIND_ADDRESS_NO_PORT: 1057 inet->bind_address_no_port = val ? 1 : 0; 1058 break; 1059 case IP_MTU_DISCOVER: 1060 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 1061 goto e_inval; 1062 inet->pmtudisc = val; 1063 break; 1064 case IP_RECVERR: 1065 inet->recverr = !!val; 1066 if (!val) 1067 skb_queue_purge(&sk->sk_error_queue); 1068 break; 1069 case IP_RECVERR_RFC4884: 1070 if (val < 0 || val > 1) 1071 goto e_inval; 1072 inet->recverr_rfc4884 = !!val; 1073 break; 1074 case IP_MULTICAST_TTL: 1075 if (sk->sk_type == SOCK_STREAM) 1076 goto e_inval; 1077 if (optlen < 1) 1078 goto e_inval; 1079 if (val == -1) 1080 val = 1; 1081 if (val < 0 || val > 255) 1082 goto e_inval; 1083 inet->mc_ttl = val; 1084 break; 1085 case IP_MULTICAST_LOOP: 1086 if (optlen < 1) 1087 goto e_inval; 1088 inet->mc_loop = !!val; 1089 break; 1090 case IP_UNICAST_IF: 1091 { 1092 struct net_device *dev = NULL; 1093 int ifindex; 1094 int midx; 1095 1096 if (optlen != sizeof(int)) 1097 goto e_inval; 1098 1099 ifindex = (__force int)ntohl((__force __be32)val); 1100 if (ifindex == 0) { 1101 inet->uc_index = 0; 1102 err = 0; 1103 break; 1104 } 1105 1106 dev = dev_get_by_index(sock_net(sk), ifindex); 1107 err = -EADDRNOTAVAIL; 1108 if (!dev) 1109 break; 1110 1111 midx = l3mdev_master_ifindex(dev); 1112 dev_put(dev); 1113 1114 err = -EINVAL; 1115 if (sk->sk_bound_dev_if && 1116 (!midx || midx != sk->sk_bound_dev_if)) 1117 break; 1118 1119 inet->uc_index = ifindex; 1120 err = 0; 1121 break; 1122 } 1123 case IP_MULTICAST_IF: 1124 { 1125 struct ip_mreqn mreq; 1126 struct net_device *dev = NULL; 1127 int midx; 1128 1129 if (sk->sk_type == SOCK_STREAM) 1130 goto e_inval; 1131 /* 1132 * Check the arguments are allowable 1133 */ 1134 1135 if (optlen < sizeof(struct in_addr)) 1136 goto e_inval; 1137 1138 err = -EFAULT; 1139 if (optlen >= sizeof(struct ip_mreqn)) { 1140 if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) 1141 break; 1142 } else { 1143 memset(&mreq, 0, sizeof(mreq)); 1144 if (optlen >= sizeof(struct ip_mreq)) { 1145 if (copy_from_sockptr(&mreq, optval, 1146 sizeof(struct ip_mreq))) 1147 break; 1148 } else if (optlen >= sizeof(struct in_addr)) { 1149 if (copy_from_sockptr(&mreq.imr_address, optval, 1150 sizeof(struct in_addr))) 1151 break; 1152 } 1153 } 1154 1155 if (!mreq.imr_ifindex) { 1156 if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { 1157 inet->mc_index = 0; 1158 inet->mc_addr = 0; 1159 err = 0; 1160 break; 1161 } 1162 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); 1163 if (dev) 1164 mreq.imr_ifindex = dev->ifindex; 1165 } else 1166 dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); 1167 1168 1169 err = -EADDRNOTAVAIL; 1170 if (!dev) 1171 break; 1172 1173 midx = l3mdev_master_ifindex(dev); 1174 1175 dev_put(dev); 1176 1177 err = -EINVAL; 1178 if (sk->sk_bound_dev_if && 1179 mreq.imr_ifindex != sk->sk_bound_dev_if && 1180 (!midx || midx != sk->sk_bound_dev_if)) 1181 break; 1182 1183 inet->mc_index = mreq.imr_ifindex; 1184 inet->mc_addr = mreq.imr_address.s_addr; 1185 err = 0; 1186 break; 1187 } 1188 1189 case IP_ADD_MEMBERSHIP: 1190 case IP_DROP_MEMBERSHIP: 1191 { 1192 struct ip_mreqn mreq; 1193 1194 err = -EPROTO; 1195 if (inet_sk(sk)->is_icsk) 1196 break; 1197 1198 if (optlen < sizeof(struct ip_mreq)) 1199 goto e_inval; 1200 err = -EFAULT; 1201 if (optlen >= sizeof(struct ip_mreqn)) { 1202 if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) 1203 break; 1204 } else { 1205 memset(&mreq, 0, sizeof(mreq)); 1206 if (copy_from_sockptr(&mreq, optval, 1207 sizeof(struct ip_mreq))) 1208 break; 1209 } 1210 1211 if (optname == IP_ADD_MEMBERSHIP) 1212 err = ip_mc_join_group(sk, &mreq); 1213 else 1214 err = ip_mc_leave_group(sk, &mreq); 1215 break; 1216 } 1217 case IP_MSFILTER: 1218 { 1219 struct ip_msfilter *msf; 1220 1221 if (optlen < IP_MSFILTER_SIZE(0)) 1222 goto e_inval; 1223 if (optlen > sysctl_optmem_max) { 1224 err = -ENOBUFS; 1225 break; 1226 } 1227 msf = memdup_sockptr(optval, optlen); 1228 if (IS_ERR(msf)) { 1229 err = PTR_ERR(msf); 1230 break; 1231 } 1232 /* numsrc >= (1G-4) overflow in 32 bits */ 1233 if (msf->imsf_numsrc >= 0x3ffffffcU || 1234 msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { 1235 kfree(msf); 1236 err = -ENOBUFS; 1237 break; 1238 } 1239 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { 1240 kfree(msf); 1241 err = -EINVAL; 1242 break; 1243 } 1244 err = ip_mc_msfilter(sk, msf, 0); 1245 kfree(msf); 1246 break; 1247 } 1248 case IP_BLOCK_SOURCE: 1249 case IP_UNBLOCK_SOURCE: 1250 case IP_ADD_SOURCE_MEMBERSHIP: 1251 case IP_DROP_SOURCE_MEMBERSHIP: 1252 { 1253 struct ip_mreq_source mreqs; 1254 int omode, add; 1255 1256 if (optlen != sizeof(struct ip_mreq_source)) 1257 goto e_inval; 1258 if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) { 1259 err = -EFAULT; 1260 break; 1261 } 1262 if (optname == IP_BLOCK_SOURCE) { 1263 omode = MCAST_EXCLUDE; 1264 add = 1; 1265 } else if (optname == IP_UNBLOCK_SOURCE) { 1266 omode = MCAST_EXCLUDE; 1267 add = 0; 1268 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { 1269 struct ip_mreqn mreq; 1270 1271 mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; 1272 mreq.imr_address.s_addr = mreqs.imr_interface; 1273 mreq.imr_ifindex = 0; 1274 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 1275 if (err && err != -EADDRINUSE) 1276 break; 1277 omode = MCAST_INCLUDE; 1278 add = 1; 1279 } else /* IP_DROP_SOURCE_MEMBERSHIP */ { 1280 omode = MCAST_INCLUDE; 1281 add = 0; 1282 } 1283 err = ip_mc_source(add, omode, sk, &mreqs, 0); 1284 break; 1285 } 1286 case MCAST_JOIN_GROUP: 1287 case MCAST_LEAVE_GROUP: 1288 if (in_compat_syscall()) 1289 err = compat_ip_mcast_join_leave(sk, optname, optval, 1290 optlen); 1291 else 1292 err = ip_mcast_join_leave(sk, optname, optval, optlen); 1293 break; 1294 case MCAST_JOIN_SOURCE_GROUP: 1295 case MCAST_LEAVE_SOURCE_GROUP: 1296 case MCAST_BLOCK_SOURCE: 1297 case MCAST_UNBLOCK_SOURCE: 1298 err = do_mcast_group_source(sk, optname, optval, optlen); 1299 break; 1300 case MCAST_MSFILTER: 1301 if (in_compat_syscall()) 1302 err = compat_ip_set_mcast_msfilter(sk, optval, optlen); 1303 else 1304 err = ip_set_mcast_msfilter(sk, optval, optlen); 1305 break; 1306 case IP_MULTICAST_ALL: 1307 if (optlen < 1) 1308 goto e_inval; 1309 if (val != 0 && val != 1) 1310 goto e_inval; 1311 inet->mc_all = val; 1312 break; 1313 1314 case IP_FREEBIND: 1315 if (optlen < 1) 1316 goto e_inval; 1317 inet->freebind = !!val; 1318 break; 1319 1320 case IP_IPSEC_POLICY: 1321 case IP_XFRM_POLICY: 1322 err = -EPERM; 1323 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1324 break; 1325 err = xfrm_user_policy(sk, optname, optval, optlen); 1326 break; 1327 1328 case IP_TRANSPARENT: 1329 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 1330 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1331 err = -EPERM; 1332 break; 1333 } 1334 if (optlen < 1) 1335 goto e_inval; 1336 inet->transparent = !!val; 1337 break; 1338 1339 case IP_MINTTL: 1340 if (optlen < 1) 1341 goto e_inval; 1342 if (val < 0 || val > 255) 1343 goto e_inval; 1344 inet->min_ttl = val; 1345 break; 1346 1347 default: 1348 err = -ENOPROTOOPT; 1349 break; 1350 } 1351 release_sock(sk); 1352 if (needs_rtnl) 1353 rtnl_unlock(); 1354 return err; 1355 1356 e_inval: 1357 release_sock(sk); 1358 if (needs_rtnl) 1359 rtnl_unlock(); 1360 return -EINVAL; 1361 } 1362 1363 /** 1364 * ipv4_pktinfo_prepare - transfer some info from rtable to skb 1365 * @sk: socket 1366 * @skb: buffer 1367 * 1368 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific 1369 * destination in skb->cb[] before dst drop. 1370 * This way, receiver doesn't make cache line misses to read rtable. 1371 */ 1372 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) 1373 { 1374 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); 1375 bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || 1376 ipv6_sk_rxinfo(sk); 1377 1378 if (prepare && skb_rtable(skb)) { 1379 /* skb->cb is overloaded: prior to this point it is IP{6}CB 1380 * which has interface index (iif) as the first member of the 1381 * underlying inet{6}_skb_parm struct. This code then overlays 1382 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first 1383 * element so the iif is picked up from the prior IPCB. If iif 1384 * is the loopback interface, then return the sending interface 1385 * (e.g., process binds socket to eth0 for Tx which is 1386 * redirected to loopback in the rtable/dst). 1387 */ 1388 struct rtable *rt = skb_rtable(skb); 1389 bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); 1390 1391 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) 1392 pktinfo->ipi_ifindex = inet_iif(skb); 1393 else if (l3slave && rt && rt->rt_iif) 1394 pktinfo->ipi_ifindex = rt->rt_iif; 1395 1396 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); 1397 } else { 1398 pktinfo->ipi_ifindex = 0; 1399 pktinfo->ipi_spec_dst.s_addr = 0; 1400 } 1401 skb_dst_drop(skb); 1402 } 1403 1404 int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, 1405 unsigned int optlen) 1406 { 1407 int err; 1408 1409 if (level != SOL_IP) 1410 return -ENOPROTOOPT; 1411 1412 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1413 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1414 if (optname >= BPFILTER_IPT_SO_SET_REPLACE && 1415 optname < BPFILTER_IPT_SET_MAX) 1416 err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); 1417 #endif 1418 #ifdef CONFIG_NETFILTER 1419 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1420 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1421 optname != IP_IPSEC_POLICY && 1422 optname != IP_XFRM_POLICY && 1423 !ip_mroute_opt(optname)) 1424 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); 1425 #endif 1426 return err; 1427 } 1428 EXPORT_SYMBOL(ip_setsockopt); 1429 1430 /* 1431 * Get the options. Note for future reference. The GET of IP options gets 1432 * the _received_ ones. The set sets the _sent_ ones. 1433 */ 1434 1435 static bool getsockopt_needs_rtnl(int optname) 1436 { 1437 switch (optname) { 1438 case IP_MSFILTER: 1439 case MCAST_MSFILTER: 1440 return true; 1441 } 1442 return false; 1443 } 1444 1445 static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, 1446 int __user *optlen, int len) 1447 { 1448 const int size0 = offsetof(struct group_filter, gf_slist); 1449 struct group_filter __user *p = optval; 1450 struct group_filter gsf; 1451 int num; 1452 int err; 1453 1454 if (len < size0) 1455 return -EINVAL; 1456 if (copy_from_user(&gsf, p, size0)) 1457 return -EFAULT; 1458 1459 num = gsf.gf_numsrc; 1460 err = ip_mc_gsfget(sk, &gsf, p->gf_slist); 1461 if (err) 1462 return err; 1463 if (gsf.gf_numsrc < num) 1464 num = gsf.gf_numsrc; 1465 if (put_user(GROUP_FILTER_SIZE(num), optlen) || 1466 copy_to_user(p, &gsf, size0)) 1467 return -EFAULT; 1468 return 0; 1469 } 1470 1471 static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, 1472 int __user *optlen, int len) 1473 { 1474 const int size0 = offsetof(struct compat_group_filter, gf_slist); 1475 struct compat_group_filter __user *p = optval; 1476 struct compat_group_filter gf32; 1477 struct group_filter gf; 1478 int num; 1479 int err; 1480 1481 if (len < size0) 1482 return -EINVAL; 1483 if (copy_from_user(&gf32, p, size0)) 1484 return -EFAULT; 1485 1486 gf.gf_interface = gf32.gf_interface; 1487 gf.gf_fmode = gf32.gf_fmode; 1488 num = gf.gf_numsrc = gf32.gf_numsrc; 1489 gf.gf_group = gf32.gf_group; 1490 1491 err = ip_mc_gsfget(sk, &gf, p->gf_slist); 1492 if (err) 1493 return err; 1494 if (gf.gf_numsrc < num) 1495 num = gf.gf_numsrc; 1496 len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); 1497 if (put_user(len, optlen) || 1498 put_user(gf.gf_fmode, &p->gf_fmode) || 1499 put_user(gf.gf_numsrc, &p->gf_numsrc)) 1500 return -EFAULT; 1501 return 0; 1502 } 1503 1504 static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1505 char __user *optval, int __user *optlen) 1506 { 1507 struct inet_sock *inet = inet_sk(sk); 1508 bool needs_rtnl = getsockopt_needs_rtnl(optname); 1509 int val, err = 0; 1510 int len; 1511 1512 if (level != SOL_IP) 1513 return -EOPNOTSUPP; 1514 1515 if (ip_mroute_opt(optname)) 1516 return ip_mroute_getsockopt(sk, optname, optval, optlen); 1517 1518 if (get_user(len, optlen)) 1519 return -EFAULT; 1520 if (len < 0) 1521 return -EINVAL; 1522 1523 if (needs_rtnl) 1524 rtnl_lock(); 1525 lock_sock(sk); 1526 1527 switch (optname) { 1528 case IP_OPTIONS: 1529 { 1530 unsigned char optbuf[sizeof(struct ip_options)+40]; 1531 struct ip_options *opt = (struct ip_options *)optbuf; 1532 struct ip_options_rcu *inet_opt; 1533 1534 inet_opt = rcu_dereference_protected(inet->inet_opt, 1535 lockdep_sock_is_held(sk)); 1536 opt->optlen = 0; 1537 if (inet_opt) 1538 memcpy(optbuf, &inet_opt->opt, 1539 sizeof(struct ip_options) + 1540 inet_opt->opt.optlen); 1541 release_sock(sk); 1542 1543 if (opt->optlen == 0) 1544 return put_user(0, optlen); 1545 1546 ip_options_undo(opt); 1547 1548 len = min_t(unsigned int, len, opt->optlen); 1549 if (put_user(len, optlen)) 1550 return -EFAULT; 1551 if (copy_to_user(optval, opt->__data, len)) 1552 return -EFAULT; 1553 return 0; 1554 } 1555 case IP_PKTINFO: 1556 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; 1557 break; 1558 case IP_RECVTTL: 1559 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; 1560 break; 1561 case IP_RECVTOS: 1562 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; 1563 break; 1564 case IP_RECVOPTS: 1565 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; 1566 break; 1567 case IP_RETOPTS: 1568 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; 1569 break; 1570 case IP_PASSSEC: 1571 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; 1572 break; 1573 case IP_RECVORIGDSTADDR: 1574 val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; 1575 break; 1576 case IP_CHECKSUM: 1577 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; 1578 break; 1579 case IP_RECVFRAGSIZE: 1580 val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; 1581 break; 1582 case IP_TOS: 1583 val = inet->tos; 1584 break; 1585 case IP_TTL: 1586 { 1587 struct net *net = sock_net(sk); 1588 val = (inet->uc_ttl == -1 ? 1589 net->ipv4.sysctl_ip_default_ttl : 1590 inet->uc_ttl); 1591 break; 1592 } 1593 case IP_HDRINCL: 1594 val = inet->hdrincl; 1595 break; 1596 case IP_NODEFRAG: 1597 val = inet->nodefrag; 1598 break; 1599 case IP_BIND_ADDRESS_NO_PORT: 1600 val = inet->bind_address_no_port; 1601 break; 1602 case IP_MTU_DISCOVER: 1603 val = inet->pmtudisc; 1604 break; 1605 case IP_MTU: 1606 { 1607 struct dst_entry *dst; 1608 val = 0; 1609 dst = sk_dst_get(sk); 1610 if (dst) { 1611 val = dst_mtu(dst); 1612 dst_release(dst); 1613 } 1614 if (!val) { 1615 release_sock(sk); 1616 return -ENOTCONN; 1617 } 1618 break; 1619 } 1620 case IP_RECVERR: 1621 val = inet->recverr; 1622 break; 1623 case IP_RECVERR_RFC4884: 1624 val = inet->recverr_rfc4884; 1625 break; 1626 case IP_MULTICAST_TTL: 1627 val = inet->mc_ttl; 1628 break; 1629 case IP_MULTICAST_LOOP: 1630 val = inet->mc_loop; 1631 break; 1632 case IP_UNICAST_IF: 1633 val = (__force int)htonl((__u32) inet->uc_index); 1634 break; 1635 case IP_MULTICAST_IF: 1636 { 1637 struct in_addr addr; 1638 len = min_t(unsigned int, len, sizeof(struct in_addr)); 1639 addr.s_addr = inet->mc_addr; 1640 release_sock(sk); 1641 1642 if (put_user(len, optlen)) 1643 return -EFAULT; 1644 if (copy_to_user(optval, &addr, len)) 1645 return -EFAULT; 1646 return 0; 1647 } 1648 case IP_MSFILTER: 1649 { 1650 struct ip_msfilter msf; 1651 1652 if (len < IP_MSFILTER_SIZE(0)) { 1653 err = -EINVAL; 1654 goto out; 1655 } 1656 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { 1657 err = -EFAULT; 1658 goto out; 1659 } 1660 err = ip_mc_msfget(sk, &msf, 1661 (struct ip_msfilter __user *)optval, optlen); 1662 goto out; 1663 } 1664 case MCAST_MSFILTER: 1665 if (in_compat_syscall()) 1666 err = compat_ip_get_mcast_msfilter(sk, optval, optlen, 1667 len); 1668 else 1669 err = ip_get_mcast_msfilter(sk, optval, optlen, len); 1670 goto out; 1671 case IP_MULTICAST_ALL: 1672 val = inet->mc_all; 1673 break; 1674 case IP_PKTOPTIONS: 1675 { 1676 struct msghdr msg; 1677 1678 release_sock(sk); 1679 1680 if (sk->sk_type != SOCK_STREAM) 1681 return -ENOPROTOOPT; 1682 1683 msg.msg_control_is_user = true; 1684 msg.msg_control_user = optval; 1685 msg.msg_controllen = len; 1686 msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; 1687 1688 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1689 struct in_pktinfo info; 1690 1691 info.ipi_addr.s_addr = inet->inet_rcv_saddr; 1692 info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; 1693 info.ipi_ifindex = inet->mc_index; 1694 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1695 } 1696 if (inet->cmsg_flags & IP_CMSG_TTL) { 1697 int hlim = inet->mc_ttl; 1698 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); 1699 } 1700 if (inet->cmsg_flags & IP_CMSG_TOS) { 1701 int tos = inet->rcv_tos; 1702 put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); 1703 } 1704 len -= msg.msg_controllen; 1705 return put_user(len, optlen); 1706 } 1707 case IP_FREEBIND: 1708 val = inet->freebind; 1709 break; 1710 case IP_TRANSPARENT: 1711 val = inet->transparent; 1712 break; 1713 case IP_MINTTL: 1714 val = inet->min_ttl; 1715 break; 1716 default: 1717 release_sock(sk); 1718 return -ENOPROTOOPT; 1719 } 1720 release_sock(sk); 1721 1722 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1723 unsigned char ucval = (unsigned char)val; 1724 len = 1; 1725 if (put_user(len, optlen)) 1726 return -EFAULT; 1727 if (copy_to_user(optval, &ucval, 1)) 1728 return -EFAULT; 1729 } else { 1730 len = min_t(unsigned int, sizeof(int), len); 1731 if (put_user(len, optlen)) 1732 return -EFAULT; 1733 if (copy_to_user(optval, &val, len)) 1734 return -EFAULT; 1735 } 1736 return 0; 1737 1738 out: 1739 release_sock(sk); 1740 if (needs_rtnl) 1741 rtnl_unlock(); 1742 return err; 1743 } 1744 1745 int ip_getsockopt(struct sock *sk, int level, 1746 int optname, char __user *optval, int __user *optlen) 1747 { 1748 int err; 1749 1750 err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1751 1752 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1753 if (optname >= BPFILTER_IPT_SO_GET_INFO && 1754 optname < BPFILTER_IPT_GET_MAX) 1755 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1756 #endif 1757 #ifdef CONFIG_NETFILTER 1758 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1759 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1760 !ip_mroute_opt(optname)) { 1761 int len; 1762 1763 if (get_user(len, optlen)) 1764 return -EFAULT; 1765 1766 err = nf_getsockopt(sk, PF_INET, optname, optval, &len); 1767 if (err >= 0) 1768 err = put_user(len, optlen); 1769 return err; 1770 } 1771 #endif 1772 return err; 1773 } 1774 EXPORT_SYMBOL(ip_getsockopt); 1775