1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * The IP to API glue. 8 * 9 * Authors: see ip.c 10 * 11 * Fixes: 12 * Many : Split from ip.c , see ip.c for history. 13 * Martin Mares : TOS setting fixed. 14 * Alan Cox : Fixed a couple of oopses in Martin's 15 * TOS tweaks. 16 * Mike McLagan : Routing by source 17 */ 18 19 #include <linux/module.h> 20 #include <linux/types.h> 21 #include <linux/mm.h> 22 #include <linux/skbuff.h> 23 #include <linux/ip.h> 24 #include <linux/icmp.h> 25 #include <linux/inetdevice.h> 26 #include <linux/netdevice.h> 27 #include <linux/slab.h> 28 #include <net/sock.h> 29 #include <net/ip.h> 30 #include <net/icmp.h> 31 #include <net/tcp_states.h> 32 #include <linux/udp.h> 33 #include <linux/igmp.h> 34 #include <linux/netfilter.h> 35 #include <linux/route.h> 36 #include <linux/mroute.h> 37 #include <net/inet_ecn.h> 38 #include <net/route.h> 39 #include <net/xfrm.h> 40 #include <net/compat.h> 41 #include <net/checksum.h> 42 #if IS_ENABLED(CONFIG_IPV6) 43 #include <net/transp_v6.h> 44 #endif 45 #include <net/ip_fib.h> 46 47 #include <linux/errqueue.h> 48 #include <linux/uaccess.h> 49 50 #include <linux/bpfilter.h> 51 52 /* 53 * SOL_IP control messages. 54 */ 55 56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 57 { 58 struct in_pktinfo info = *PKTINFO_SKB_CB(skb); 59 60 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 61 62 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 63 } 64 65 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) 66 { 67 int ttl = ip_hdr(skb)->ttl; 68 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); 69 } 70 71 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) 72 { 73 put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); 74 } 75 76 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) 77 { 78 if (IPCB(skb)->opt.optlen == 0) 79 return; 80 81 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, 82 ip_hdr(skb) + 1); 83 } 84 85 86 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, 87 struct sk_buff *skb) 88 { 89 unsigned char optbuf[sizeof(struct ip_options) + 40]; 90 struct ip_options *opt = (struct ip_options *)optbuf; 91 92 if (IPCB(skb)->opt.optlen == 0) 93 return; 94 95 if (ip_options_echo(net, opt, skb)) { 96 msg->msg_flags |= MSG_CTRUNC; 97 return; 98 } 99 ip_options_undo(opt); 100 101 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); 102 } 103 104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) 105 { 106 int val; 107 108 if (IPCB(skb)->frag_max_size == 0) 109 return; 110 111 val = IPCB(skb)->frag_max_size; 112 put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); 113 } 114 115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, 116 int tlen, int offset) 117 { 118 __wsum csum = skb->csum; 119 120 if (skb->ip_summed != CHECKSUM_COMPLETE) 121 return; 122 123 if (offset != 0) { 124 int tend_off = skb_transport_offset(skb) + tlen; 125 csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0)); 126 } 127 128 put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); 129 } 130 131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) 132 { 133 char *secdata; 134 u32 seclen, secid; 135 int err; 136 137 err = security_socket_getpeersec_dgram(NULL, skb, &secid); 138 if (err) 139 return; 140 141 err = security_secid_to_secctx(secid, &secdata, &seclen); 142 if (err) 143 return; 144 145 put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); 146 security_release_secctx(secdata, seclen); 147 } 148 149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 150 { 151 __be16 _ports[2], *ports; 152 struct sockaddr_in sin; 153 154 /* All current transport protocols have the port numbers in the 155 * first four bytes of the transport header and this function is 156 * written with this assumption in mind. 157 */ 158 ports = skb_header_pointer(skb, skb_transport_offset(skb), 159 sizeof(_ports), &_ports); 160 if (!ports) 161 return; 162 163 sin.sin_family = AF_INET; 164 sin.sin_addr.s_addr = ip_hdr(skb)->daddr; 165 sin.sin_port = ports[1]; 166 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 167 168 put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); 169 } 170 171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, 172 struct sk_buff *skb, int tlen, int offset) 173 { 174 struct inet_sock *inet = inet_sk(sk); 175 unsigned int flags = inet->cmsg_flags; 176 177 /* Ordered by supposed usage frequency */ 178 if (flags & IP_CMSG_PKTINFO) { 179 ip_cmsg_recv_pktinfo(msg, skb); 180 181 flags &= ~IP_CMSG_PKTINFO; 182 if (!flags) 183 return; 184 } 185 186 if (flags & IP_CMSG_TTL) { 187 ip_cmsg_recv_ttl(msg, skb); 188 189 flags &= ~IP_CMSG_TTL; 190 if (!flags) 191 return; 192 } 193 194 if (flags & IP_CMSG_TOS) { 195 ip_cmsg_recv_tos(msg, skb); 196 197 flags &= ~IP_CMSG_TOS; 198 if (!flags) 199 return; 200 } 201 202 if (flags & IP_CMSG_RECVOPTS) { 203 ip_cmsg_recv_opts(msg, skb); 204 205 flags &= ~IP_CMSG_RECVOPTS; 206 if (!flags) 207 return; 208 } 209 210 if (flags & IP_CMSG_RETOPTS) { 211 ip_cmsg_recv_retopts(sock_net(sk), msg, skb); 212 213 flags &= ~IP_CMSG_RETOPTS; 214 if (!flags) 215 return; 216 } 217 218 if (flags & IP_CMSG_PASSSEC) { 219 ip_cmsg_recv_security(msg, skb); 220 221 flags &= ~IP_CMSG_PASSSEC; 222 if (!flags) 223 return; 224 } 225 226 if (flags & IP_CMSG_ORIGDSTADDR) { 227 ip_cmsg_recv_dstaddr(msg, skb); 228 229 flags &= ~IP_CMSG_ORIGDSTADDR; 230 if (!flags) 231 return; 232 } 233 234 if (flags & IP_CMSG_CHECKSUM) 235 ip_cmsg_recv_checksum(msg, skb, tlen, offset); 236 237 if (flags & IP_CMSG_RECVFRAGSIZE) 238 ip_cmsg_recv_fragsize(msg, skb); 239 } 240 EXPORT_SYMBOL(ip_cmsg_recv_offset); 241 242 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, 243 bool allow_ipv6) 244 { 245 int err, val; 246 struct cmsghdr *cmsg; 247 struct net *net = sock_net(sk); 248 249 for_each_cmsghdr(cmsg, msg) { 250 if (!CMSG_OK(msg, cmsg)) 251 return -EINVAL; 252 #if IS_ENABLED(CONFIG_IPV6) 253 if (allow_ipv6 && 254 cmsg->cmsg_level == SOL_IPV6 && 255 cmsg->cmsg_type == IPV6_PKTINFO) { 256 struct in6_pktinfo *src_info; 257 258 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) 259 return -EINVAL; 260 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 261 if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) 262 return -EINVAL; 263 if (src_info->ipi6_ifindex) 264 ipc->oif = src_info->ipi6_ifindex; 265 ipc->addr = src_info->ipi6_addr.s6_addr32[3]; 266 continue; 267 } 268 #endif 269 if (cmsg->cmsg_level == SOL_SOCKET) { 270 err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); 271 if (err) 272 return err; 273 continue; 274 } 275 276 if (cmsg->cmsg_level != SOL_IP) 277 continue; 278 switch (cmsg->cmsg_type) { 279 case IP_RETOPTS: 280 err = cmsg->cmsg_len - sizeof(struct cmsghdr); 281 282 /* Our caller is responsible for freeing ipc->opt */ 283 err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), 284 err < 40 ? err : 40); 285 if (err) 286 return err; 287 break; 288 case IP_PKTINFO: 289 { 290 struct in_pktinfo *info; 291 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 292 return -EINVAL; 293 info = (struct in_pktinfo *)CMSG_DATA(cmsg); 294 if (info->ipi_ifindex) 295 ipc->oif = info->ipi_ifindex; 296 ipc->addr = info->ipi_spec_dst.s_addr; 297 break; 298 } 299 case IP_TTL: 300 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 301 return -EINVAL; 302 val = *(int *)CMSG_DATA(cmsg); 303 if (val < 1 || val > 255) 304 return -EINVAL; 305 ipc->ttl = val; 306 break; 307 case IP_TOS: 308 if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) 309 val = *(int *)CMSG_DATA(cmsg); 310 else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) 311 val = *(u8 *)CMSG_DATA(cmsg); 312 else 313 return -EINVAL; 314 if (val < 0 || val > 255) 315 return -EINVAL; 316 ipc->tos = val; 317 ipc->priority = rt_tos2priority(ipc->tos); 318 break; 319 320 default: 321 return -EINVAL; 322 } 323 } 324 return 0; 325 } 326 327 static void ip_ra_destroy_rcu(struct rcu_head *head) 328 { 329 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); 330 331 sock_put(ra->saved_sk); 332 kfree(ra); 333 } 334 335 int ip_ra_control(struct sock *sk, unsigned char on, 336 void (*destructor)(struct sock *)) 337 { 338 struct ip_ra_chain *ra, *new_ra; 339 struct ip_ra_chain __rcu **rap; 340 struct net *net = sock_net(sk); 341 342 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 343 return -EINVAL; 344 345 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 346 if (on && !new_ra) 347 return -ENOMEM; 348 349 mutex_lock(&net->ipv4.ra_mutex); 350 for (rap = &net->ipv4.ra_chain; 351 (ra = rcu_dereference_protected(*rap, 352 lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; 353 rap = &ra->next) { 354 if (ra->sk == sk) { 355 if (on) { 356 mutex_unlock(&net->ipv4.ra_mutex); 357 kfree(new_ra); 358 return -EADDRINUSE; 359 } 360 /* dont let ip_call_ra_chain() use sk again */ 361 ra->sk = NULL; 362 RCU_INIT_POINTER(*rap, ra->next); 363 mutex_unlock(&net->ipv4.ra_mutex); 364 365 if (ra->destructor) 366 ra->destructor(sk); 367 /* 368 * Delay sock_put(sk) and kfree(ra) after one rcu grace 369 * period. This guarantee ip_call_ra_chain() dont need 370 * to mess with socket refcounts. 371 */ 372 ra->saved_sk = sk; 373 call_rcu(&ra->rcu, ip_ra_destroy_rcu); 374 return 0; 375 } 376 } 377 if (!new_ra) { 378 mutex_unlock(&net->ipv4.ra_mutex); 379 return -ENOBUFS; 380 } 381 new_ra->sk = sk; 382 new_ra->destructor = destructor; 383 384 RCU_INIT_POINTER(new_ra->next, ra); 385 rcu_assign_pointer(*rap, new_ra); 386 sock_hold(sk); 387 mutex_unlock(&net->ipv4.ra_mutex); 388 389 return 0; 390 } 391 392 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 393 __be16 port, u32 info, u8 *payload) 394 { 395 struct sock_exterr_skb *serr; 396 397 skb = skb_clone(skb, GFP_ATOMIC); 398 if (!skb) 399 return; 400 401 serr = SKB_EXT_ERR(skb); 402 serr->ee.ee_errno = err; 403 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; 404 serr->ee.ee_type = icmp_hdr(skb)->type; 405 serr->ee.ee_code = icmp_hdr(skb)->code; 406 serr->ee.ee_pad = 0; 407 serr->ee.ee_info = info; 408 serr->ee.ee_data = 0; 409 serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - 410 skb_network_header(skb); 411 serr->port = port; 412 413 if (skb_pull(skb, payload - skb->data)) { 414 skb_reset_transport_header(skb); 415 if (sock_queue_err_skb(sk, skb) == 0) 416 return; 417 } 418 kfree_skb(skb); 419 } 420 421 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) 422 { 423 struct inet_sock *inet = inet_sk(sk); 424 struct sock_exterr_skb *serr; 425 struct iphdr *iph; 426 struct sk_buff *skb; 427 428 if (!inet->recverr) 429 return; 430 431 skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); 432 if (!skb) 433 return; 434 435 skb_put(skb, sizeof(struct iphdr)); 436 skb_reset_network_header(skb); 437 iph = ip_hdr(skb); 438 iph->daddr = daddr; 439 440 serr = SKB_EXT_ERR(skb); 441 serr->ee.ee_errno = err; 442 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; 443 serr->ee.ee_type = 0; 444 serr->ee.ee_code = 0; 445 serr->ee.ee_pad = 0; 446 serr->ee.ee_info = info; 447 serr->ee.ee_data = 0; 448 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); 449 serr->port = port; 450 451 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 452 skb_reset_transport_header(skb); 453 454 if (sock_queue_err_skb(sk, skb)) 455 kfree_skb(skb); 456 } 457 458 /* For some errors we have valid addr_offset even with zero payload and 459 * zero port. Also, addr_offset should be supported if port is set. 460 */ 461 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) 462 { 463 return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || 464 serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; 465 } 466 467 /* IPv4 supports cmsg on all imcp errors and some timestamps 468 * 469 * Timestamp code paths do not initialize the fields expected by cmsg: 470 * the PKTINFO fields in skb->cb[]. Fill those in here. 471 */ 472 static bool ipv4_datagram_support_cmsg(const struct sock *sk, 473 struct sk_buff *skb, 474 int ee_origin) 475 { 476 struct in_pktinfo *info; 477 478 if (ee_origin == SO_EE_ORIGIN_ICMP) 479 return true; 480 481 if (ee_origin == SO_EE_ORIGIN_LOCAL) 482 return false; 483 484 /* Support IP_PKTINFO on tstamp packets if requested, to correlate 485 * timestamp with egress dev. Not possible for packets without iif 486 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). 487 */ 488 info = PKTINFO_SKB_CB(skb); 489 if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || 490 !info->ipi_ifindex) 491 return false; 492 493 info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; 494 return true; 495 } 496 497 /* 498 * Handle MSG_ERRQUEUE 499 */ 500 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) 501 { 502 struct sock_exterr_skb *serr; 503 struct sk_buff *skb; 504 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 505 struct { 506 struct sock_extended_err ee; 507 struct sockaddr_in offender; 508 } errhdr; 509 int err; 510 int copied; 511 512 err = -EAGAIN; 513 skb = sock_dequeue_err_skb(sk); 514 if (!skb) 515 goto out; 516 517 copied = skb->len; 518 if (copied > len) { 519 msg->msg_flags |= MSG_TRUNC; 520 copied = len; 521 } 522 err = skb_copy_datagram_msg(skb, 0, msg, copied); 523 if (unlikely(err)) { 524 kfree_skb(skb); 525 return err; 526 } 527 sock_recv_timestamp(msg, sk, skb); 528 529 serr = SKB_EXT_ERR(skb); 530 531 if (sin && ipv4_datagram_support_addr(serr)) { 532 sin->sin_family = AF_INET; 533 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + 534 serr->addr_offset); 535 sin->sin_port = serr->port; 536 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 537 *addr_len = sizeof(*sin); 538 } 539 540 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); 541 sin = &errhdr.offender; 542 memset(sin, 0, sizeof(*sin)); 543 544 if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { 545 sin->sin_family = AF_INET; 546 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 547 if (inet_sk(sk)->cmsg_flags) 548 ip_cmsg_recv(msg, skb); 549 } 550 551 put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr); 552 553 /* Now we could try to dump offended packet options */ 554 555 msg->msg_flags |= MSG_ERRQUEUE; 556 err = copied; 557 558 consume_skb(skb); 559 out: 560 return err; 561 } 562 563 static void __ip_sock_set_tos(struct sock *sk, int val) 564 { 565 if (sk->sk_type == SOCK_STREAM) { 566 val &= ~INET_ECN_MASK; 567 val |= inet_sk(sk)->tos & INET_ECN_MASK; 568 } 569 if (inet_sk(sk)->tos != val) { 570 inet_sk(sk)->tos = val; 571 sk->sk_priority = rt_tos2priority(val); 572 sk_dst_reset(sk); 573 } 574 } 575 576 void ip_sock_set_tos(struct sock *sk, int val) 577 { 578 lock_sock(sk); 579 __ip_sock_set_tos(sk, val); 580 release_sock(sk); 581 } 582 EXPORT_SYMBOL(ip_sock_set_tos); 583 584 void ip_sock_set_freebind(struct sock *sk) 585 { 586 lock_sock(sk); 587 inet_sk(sk)->freebind = true; 588 release_sock(sk); 589 } 590 EXPORT_SYMBOL(ip_sock_set_freebind); 591 592 void ip_sock_set_recverr(struct sock *sk) 593 { 594 lock_sock(sk); 595 inet_sk(sk)->recverr = true; 596 release_sock(sk); 597 } 598 EXPORT_SYMBOL(ip_sock_set_recverr); 599 600 int ip_sock_set_mtu_discover(struct sock *sk, int val) 601 { 602 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 603 return -EINVAL; 604 lock_sock(sk); 605 inet_sk(sk)->pmtudisc = val; 606 release_sock(sk); 607 return 0; 608 } 609 EXPORT_SYMBOL(ip_sock_set_mtu_discover); 610 611 void ip_sock_set_pktinfo(struct sock *sk) 612 { 613 lock_sock(sk); 614 inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO; 615 release_sock(sk); 616 } 617 EXPORT_SYMBOL(ip_sock_set_pktinfo); 618 619 /* 620 * Socket option code for IP. This is the end of the line after any 621 * TCP,UDP etc options on an IP socket. 622 */ 623 static bool setsockopt_needs_rtnl(int optname) 624 { 625 switch (optname) { 626 case IP_ADD_MEMBERSHIP: 627 case IP_ADD_SOURCE_MEMBERSHIP: 628 case IP_BLOCK_SOURCE: 629 case IP_DROP_MEMBERSHIP: 630 case IP_DROP_SOURCE_MEMBERSHIP: 631 case IP_MSFILTER: 632 case IP_UNBLOCK_SOURCE: 633 case MCAST_BLOCK_SOURCE: 634 case MCAST_MSFILTER: 635 case MCAST_JOIN_GROUP: 636 case MCAST_JOIN_SOURCE_GROUP: 637 case MCAST_LEAVE_GROUP: 638 case MCAST_LEAVE_SOURCE_GROUP: 639 case MCAST_UNBLOCK_SOURCE: 640 return true; 641 } 642 return false; 643 } 644 645 static int set_mcast_msfilter(struct sock *sk, int ifindex, 646 int numsrc, int fmode, 647 struct sockaddr_storage *group, 648 struct sockaddr_storage *list) 649 { 650 int msize = IP_MSFILTER_SIZE(numsrc); 651 struct ip_msfilter *msf; 652 struct sockaddr_in *psin; 653 int err, i; 654 655 msf = kmalloc(msize, GFP_KERNEL); 656 if (!msf) 657 return -ENOBUFS; 658 659 psin = (struct sockaddr_in *)group; 660 if (psin->sin_family != AF_INET) 661 goto Eaddrnotavail; 662 msf->imsf_multiaddr = psin->sin_addr.s_addr; 663 msf->imsf_interface = 0; 664 msf->imsf_fmode = fmode; 665 msf->imsf_numsrc = numsrc; 666 for (i = 0; i < numsrc; ++i) { 667 psin = (struct sockaddr_in *)&list[i]; 668 669 if (psin->sin_family != AF_INET) 670 goto Eaddrnotavail; 671 msf->imsf_slist[i] = psin->sin_addr.s_addr; 672 } 673 err = ip_mc_msfilter(sk, msf, ifindex); 674 kfree(msf); 675 return err; 676 677 Eaddrnotavail: 678 kfree(msf); 679 return -EADDRNOTAVAIL; 680 } 681 682 static int do_mcast_group_source(struct sock *sk, int optname, 683 struct group_source_req *greqs) 684 { 685 struct ip_mreq_source mreqs; 686 struct sockaddr_in *psin; 687 int omode, add, err; 688 689 if (greqs->gsr_group.ss_family != AF_INET || 690 greqs->gsr_source.ss_family != AF_INET) 691 return -EADDRNOTAVAIL; 692 693 psin = (struct sockaddr_in *)&greqs->gsr_group; 694 mreqs.imr_multiaddr = psin->sin_addr.s_addr; 695 psin = (struct sockaddr_in *)&greqs->gsr_source; 696 mreqs.imr_sourceaddr = psin->sin_addr.s_addr; 697 mreqs.imr_interface = 0; /* use index for mc_source */ 698 699 if (optname == MCAST_BLOCK_SOURCE) { 700 omode = MCAST_EXCLUDE; 701 add = 1; 702 } else if (optname == MCAST_UNBLOCK_SOURCE) { 703 omode = MCAST_EXCLUDE; 704 add = 0; 705 } else if (optname == MCAST_JOIN_SOURCE_GROUP) { 706 struct ip_mreqn mreq; 707 708 psin = (struct sockaddr_in *)&greqs->gsr_group; 709 mreq.imr_multiaddr = psin->sin_addr; 710 mreq.imr_address.s_addr = 0; 711 mreq.imr_ifindex = greqs->gsr_interface; 712 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 713 if (err && err != -EADDRINUSE) 714 return err; 715 greqs->gsr_interface = mreq.imr_ifindex; 716 omode = MCAST_INCLUDE; 717 add = 1; 718 } else /* MCAST_LEAVE_SOURCE_GROUP */ { 719 omode = MCAST_INCLUDE; 720 add = 0; 721 } 722 return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); 723 } 724 725 static int do_ip_setsockopt(struct sock *sk, int level, 726 int optname, char __user *optval, unsigned int optlen) 727 { 728 struct inet_sock *inet = inet_sk(sk); 729 struct net *net = sock_net(sk); 730 int val = 0, err; 731 bool needs_rtnl = setsockopt_needs_rtnl(optname); 732 733 switch (optname) { 734 case IP_PKTINFO: 735 case IP_RECVTTL: 736 case IP_RECVOPTS: 737 case IP_RECVTOS: 738 case IP_RETOPTS: 739 case IP_TOS: 740 case IP_TTL: 741 case IP_HDRINCL: 742 case IP_MTU_DISCOVER: 743 case IP_RECVERR: 744 case IP_ROUTER_ALERT: 745 case IP_FREEBIND: 746 case IP_PASSSEC: 747 case IP_TRANSPARENT: 748 case IP_MINTTL: 749 case IP_NODEFRAG: 750 case IP_BIND_ADDRESS_NO_PORT: 751 case IP_UNICAST_IF: 752 case IP_MULTICAST_TTL: 753 case IP_MULTICAST_ALL: 754 case IP_MULTICAST_LOOP: 755 case IP_RECVORIGDSTADDR: 756 case IP_CHECKSUM: 757 case IP_RECVFRAGSIZE: 758 if (optlen >= sizeof(int)) { 759 if (get_user(val, (int __user *) optval)) 760 return -EFAULT; 761 } else if (optlen >= sizeof(char)) { 762 unsigned char ucval; 763 764 if (get_user(ucval, (unsigned char __user *) optval)) 765 return -EFAULT; 766 val = (int) ucval; 767 } 768 } 769 770 /* If optlen==0, it is equivalent to val == 0 */ 771 772 if (optname == IP_ROUTER_ALERT) 773 return ip_ra_control(sk, val ? 1 : 0, NULL); 774 if (ip_mroute_opt(optname)) 775 return ip_mroute_setsockopt(sk, optname, optval, optlen); 776 777 err = 0; 778 if (needs_rtnl) 779 rtnl_lock(); 780 lock_sock(sk); 781 782 switch (optname) { 783 case IP_OPTIONS: 784 { 785 struct ip_options_rcu *old, *opt = NULL; 786 787 if (optlen > 40) 788 goto e_inval; 789 err = ip_options_get_from_user(sock_net(sk), &opt, 790 optval, optlen); 791 if (err) 792 break; 793 old = rcu_dereference_protected(inet->inet_opt, 794 lockdep_sock_is_held(sk)); 795 if (inet->is_icsk) { 796 struct inet_connection_sock *icsk = inet_csk(sk); 797 #if IS_ENABLED(CONFIG_IPV6) 798 if (sk->sk_family == PF_INET || 799 (!((1 << sk->sk_state) & 800 (TCPF_LISTEN | TCPF_CLOSE)) && 801 inet->inet_daddr != LOOPBACK4_IPV6)) { 802 #endif 803 if (old) 804 icsk->icsk_ext_hdr_len -= old->opt.optlen; 805 if (opt) 806 icsk->icsk_ext_hdr_len += opt->opt.optlen; 807 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 808 #if IS_ENABLED(CONFIG_IPV6) 809 } 810 #endif 811 } 812 rcu_assign_pointer(inet->inet_opt, opt); 813 if (old) 814 kfree_rcu(old, rcu); 815 break; 816 } 817 case IP_PKTINFO: 818 if (val) 819 inet->cmsg_flags |= IP_CMSG_PKTINFO; 820 else 821 inet->cmsg_flags &= ~IP_CMSG_PKTINFO; 822 break; 823 case IP_RECVTTL: 824 if (val) 825 inet->cmsg_flags |= IP_CMSG_TTL; 826 else 827 inet->cmsg_flags &= ~IP_CMSG_TTL; 828 break; 829 case IP_RECVTOS: 830 if (val) 831 inet->cmsg_flags |= IP_CMSG_TOS; 832 else 833 inet->cmsg_flags &= ~IP_CMSG_TOS; 834 break; 835 case IP_RECVOPTS: 836 if (val) 837 inet->cmsg_flags |= IP_CMSG_RECVOPTS; 838 else 839 inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; 840 break; 841 case IP_RETOPTS: 842 if (val) 843 inet->cmsg_flags |= IP_CMSG_RETOPTS; 844 else 845 inet->cmsg_flags &= ~IP_CMSG_RETOPTS; 846 break; 847 case IP_PASSSEC: 848 if (val) 849 inet->cmsg_flags |= IP_CMSG_PASSSEC; 850 else 851 inet->cmsg_flags &= ~IP_CMSG_PASSSEC; 852 break; 853 case IP_RECVORIGDSTADDR: 854 if (val) 855 inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; 856 else 857 inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; 858 break; 859 case IP_CHECKSUM: 860 if (val) { 861 if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { 862 inet_inc_convert_csum(sk); 863 inet->cmsg_flags |= IP_CMSG_CHECKSUM; 864 } 865 } else { 866 if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { 867 inet_dec_convert_csum(sk); 868 inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; 869 } 870 } 871 break; 872 case IP_RECVFRAGSIZE: 873 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) 874 goto e_inval; 875 if (val) 876 inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; 877 else 878 inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; 879 break; 880 case IP_TOS: /* This sets both TOS and Precedence */ 881 __ip_sock_set_tos(sk, val); 882 break; 883 case IP_TTL: 884 if (optlen < 1) 885 goto e_inval; 886 if (val != -1 && (val < 1 || val > 255)) 887 goto e_inval; 888 inet->uc_ttl = val; 889 break; 890 case IP_HDRINCL: 891 if (sk->sk_type != SOCK_RAW) { 892 err = -ENOPROTOOPT; 893 break; 894 } 895 inet->hdrincl = val ? 1 : 0; 896 break; 897 case IP_NODEFRAG: 898 if (sk->sk_type != SOCK_RAW) { 899 err = -ENOPROTOOPT; 900 break; 901 } 902 inet->nodefrag = val ? 1 : 0; 903 break; 904 case IP_BIND_ADDRESS_NO_PORT: 905 inet->bind_address_no_port = val ? 1 : 0; 906 break; 907 case IP_MTU_DISCOVER: 908 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 909 goto e_inval; 910 inet->pmtudisc = val; 911 break; 912 case IP_RECVERR: 913 inet->recverr = !!val; 914 if (!val) 915 skb_queue_purge(&sk->sk_error_queue); 916 break; 917 case IP_MULTICAST_TTL: 918 if (sk->sk_type == SOCK_STREAM) 919 goto e_inval; 920 if (optlen < 1) 921 goto e_inval; 922 if (val == -1) 923 val = 1; 924 if (val < 0 || val > 255) 925 goto e_inval; 926 inet->mc_ttl = val; 927 break; 928 case IP_MULTICAST_LOOP: 929 if (optlen < 1) 930 goto e_inval; 931 inet->mc_loop = !!val; 932 break; 933 case IP_UNICAST_IF: 934 { 935 struct net_device *dev = NULL; 936 int ifindex; 937 int midx; 938 939 if (optlen != sizeof(int)) 940 goto e_inval; 941 942 ifindex = (__force int)ntohl((__force __be32)val); 943 if (ifindex == 0) { 944 inet->uc_index = 0; 945 err = 0; 946 break; 947 } 948 949 dev = dev_get_by_index(sock_net(sk), ifindex); 950 err = -EADDRNOTAVAIL; 951 if (!dev) 952 break; 953 954 midx = l3mdev_master_ifindex(dev); 955 dev_put(dev); 956 957 err = -EINVAL; 958 if (sk->sk_bound_dev_if && 959 (!midx || midx != sk->sk_bound_dev_if)) 960 break; 961 962 inet->uc_index = ifindex; 963 err = 0; 964 break; 965 } 966 case IP_MULTICAST_IF: 967 { 968 struct ip_mreqn mreq; 969 struct net_device *dev = NULL; 970 int midx; 971 972 if (sk->sk_type == SOCK_STREAM) 973 goto e_inval; 974 /* 975 * Check the arguments are allowable 976 */ 977 978 if (optlen < sizeof(struct in_addr)) 979 goto e_inval; 980 981 err = -EFAULT; 982 if (optlen >= sizeof(struct ip_mreqn)) { 983 if (copy_from_user(&mreq, optval, sizeof(mreq))) 984 break; 985 } else { 986 memset(&mreq, 0, sizeof(mreq)); 987 if (optlen >= sizeof(struct ip_mreq)) { 988 if (copy_from_user(&mreq, optval, 989 sizeof(struct ip_mreq))) 990 break; 991 } else if (optlen >= sizeof(struct in_addr)) { 992 if (copy_from_user(&mreq.imr_address, optval, 993 sizeof(struct in_addr))) 994 break; 995 } 996 } 997 998 if (!mreq.imr_ifindex) { 999 if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { 1000 inet->mc_index = 0; 1001 inet->mc_addr = 0; 1002 err = 0; 1003 break; 1004 } 1005 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); 1006 if (dev) 1007 mreq.imr_ifindex = dev->ifindex; 1008 } else 1009 dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); 1010 1011 1012 err = -EADDRNOTAVAIL; 1013 if (!dev) 1014 break; 1015 1016 midx = l3mdev_master_ifindex(dev); 1017 1018 dev_put(dev); 1019 1020 err = -EINVAL; 1021 if (sk->sk_bound_dev_if && 1022 mreq.imr_ifindex != sk->sk_bound_dev_if && 1023 (!midx || midx != sk->sk_bound_dev_if)) 1024 break; 1025 1026 inet->mc_index = mreq.imr_ifindex; 1027 inet->mc_addr = mreq.imr_address.s_addr; 1028 err = 0; 1029 break; 1030 } 1031 1032 case IP_ADD_MEMBERSHIP: 1033 case IP_DROP_MEMBERSHIP: 1034 { 1035 struct ip_mreqn mreq; 1036 1037 err = -EPROTO; 1038 if (inet_sk(sk)->is_icsk) 1039 break; 1040 1041 if (optlen < sizeof(struct ip_mreq)) 1042 goto e_inval; 1043 err = -EFAULT; 1044 if (optlen >= sizeof(struct ip_mreqn)) { 1045 if (copy_from_user(&mreq, optval, sizeof(mreq))) 1046 break; 1047 } else { 1048 memset(&mreq, 0, sizeof(mreq)); 1049 if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) 1050 break; 1051 } 1052 1053 if (optname == IP_ADD_MEMBERSHIP) 1054 err = ip_mc_join_group(sk, &mreq); 1055 else 1056 err = ip_mc_leave_group(sk, &mreq); 1057 break; 1058 } 1059 case IP_MSFILTER: 1060 { 1061 struct ip_msfilter *msf; 1062 1063 if (optlen < IP_MSFILTER_SIZE(0)) 1064 goto e_inval; 1065 if (optlen > sysctl_optmem_max) { 1066 err = -ENOBUFS; 1067 break; 1068 } 1069 msf = memdup_user(optval, optlen); 1070 if (IS_ERR(msf)) { 1071 err = PTR_ERR(msf); 1072 break; 1073 } 1074 /* numsrc >= (1G-4) overflow in 32 bits */ 1075 if (msf->imsf_numsrc >= 0x3ffffffcU || 1076 msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { 1077 kfree(msf); 1078 err = -ENOBUFS; 1079 break; 1080 } 1081 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { 1082 kfree(msf); 1083 err = -EINVAL; 1084 break; 1085 } 1086 err = ip_mc_msfilter(sk, msf, 0); 1087 kfree(msf); 1088 break; 1089 } 1090 case IP_BLOCK_SOURCE: 1091 case IP_UNBLOCK_SOURCE: 1092 case IP_ADD_SOURCE_MEMBERSHIP: 1093 case IP_DROP_SOURCE_MEMBERSHIP: 1094 { 1095 struct ip_mreq_source mreqs; 1096 int omode, add; 1097 1098 if (optlen != sizeof(struct ip_mreq_source)) 1099 goto e_inval; 1100 if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { 1101 err = -EFAULT; 1102 break; 1103 } 1104 if (optname == IP_BLOCK_SOURCE) { 1105 omode = MCAST_EXCLUDE; 1106 add = 1; 1107 } else if (optname == IP_UNBLOCK_SOURCE) { 1108 omode = MCAST_EXCLUDE; 1109 add = 0; 1110 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { 1111 struct ip_mreqn mreq; 1112 1113 mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; 1114 mreq.imr_address.s_addr = mreqs.imr_interface; 1115 mreq.imr_ifindex = 0; 1116 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 1117 if (err && err != -EADDRINUSE) 1118 break; 1119 omode = MCAST_INCLUDE; 1120 add = 1; 1121 } else /* IP_DROP_SOURCE_MEMBERSHIP */ { 1122 omode = MCAST_INCLUDE; 1123 add = 0; 1124 } 1125 err = ip_mc_source(add, omode, sk, &mreqs, 0); 1126 break; 1127 } 1128 case MCAST_JOIN_GROUP: 1129 case MCAST_LEAVE_GROUP: 1130 { 1131 struct group_req greq; 1132 struct sockaddr_in *psin; 1133 struct ip_mreqn mreq; 1134 1135 if (optlen < sizeof(struct group_req)) 1136 goto e_inval; 1137 err = -EFAULT; 1138 if (copy_from_user(&greq, optval, sizeof(greq))) 1139 break; 1140 psin = (struct sockaddr_in *)&greq.gr_group; 1141 if (psin->sin_family != AF_INET) 1142 goto e_inval; 1143 memset(&mreq, 0, sizeof(mreq)); 1144 mreq.imr_multiaddr = psin->sin_addr; 1145 mreq.imr_ifindex = greq.gr_interface; 1146 1147 if (optname == MCAST_JOIN_GROUP) 1148 err = ip_mc_join_group(sk, &mreq); 1149 else 1150 err = ip_mc_leave_group(sk, &mreq); 1151 break; 1152 } 1153 case MCAST_JOIN_SOURCE_GROUP: 1154 case MCAST_LEAVE_SOURCE_GROUP: 1155 case MCAST_BLOCK_SOURCE: 1156 case MCAST_UNBLOCK_SOURCE: 1157 { 1158 struct group_source_req greqs; 1159 1160 if (optlen != sizeof(struct group_source_req)) 1161 goto e_inval; 1162 if (copy_from_user(&greqs, optval, sizeof(greqs))) { 1163 err = -EFAULT; 1164 break; 1165 } 1166 err = do_mcast_group_source(sk, optname, &greqs); 1167 break; 1168 } 1169 case MCAST_MSFILTER: 1170 { 1171 struct group_filter *gsf = NULL; 1172 1173 if (optlen < GROUP_FILTER_SIZE(0)) 1174 goto e_inval; 1175 if (optlen > sysctl_optmem_max) { 1176 err = -ENOBUFS; 1177 break; 1178 } 1179 gsf = memdup_user(optval, optlen); 1180 if (IS_ERR(gsf)) { 1181 err = PTR_ERR(gsf); 1182 break; 1183 } 1184 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1185 if (gsf->gf_numsrc >= 0x1ffffff || 1186 gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { 1187 err = -ENOBUFS; 1188 goto mc_msf_out; 1189 } 1190 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { 1191 err = -EINVAL; 1192 goto mc_msf_out; 1193 } 1194 err = set_mcast_msfilter(sk, gsf->gf_interface, 1195 gsf->gf_numsrc, gsf->gf_fmode, 1196 &gsf->gf_group, gsf->gf_slist); 1197 mc_msf_out: 1198 kfree(gsf); 1199 break; 1200 } 1201 case IP_MULTICAST_ALL: 1202 if (optlen < 1) 1203 goto e_inval; 1204 if (val != 0 && val != 1) 1205 goto e_inval; 1206 inet->mc_all = val; 1207 break; 1208 1209 case IP_FREEBIND: 1210 if (optlen < 1) 1211 goto e_inval; 1212 inet->freebind = !!val; 1213 break; 1214 1215 case IP_IPSEC_POLICY: 1216 case IP_XFRM_POLICY: 1217 err = -EPERM; 1218 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1219 break; 1220 err = xfrm_user_policy(sk, optname, optval, optlen); 1221 break; 1222 1223 case IP_TRANSPARENT: 1224 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 1225 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1226 err = -EPERM; 1227 break; 1228 } 1229 if (optlen < 1) 1230 goto e_inval; 1231 inet->transparent = !!val; 1232 break; 1233 1234 case IP_MINTTL: 1235 if (optlen < 1) 1236 goto e_inval; 1237 if (val < 0 || val > 255) 1238 goto e_inval; 1239 inet->min_ttl = val; 1240 break; 1241 1242 default: 1243 err = -ENOPROTOOPT; 1244 break; 1245 } 1246 release_sock(sk); 1247 if (needs_rtnl) 1248 rtnl_unlock(); 1249 return err; 1250 1251 e_inval: 1252 release_sock(sk); 1253 if (needs_rtnl) 1254 rtnl_unlock(); 1255 return -EINVAL; 1256 } 1257 1258 /** 1259 * ipv4_pktinfo_prepare - transfer some info from rtable to skb 1260 * @sk: socket 1261 * @skb: buffer 1262 * 1263 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific 1264 * destination in skb->cb[] before dst drop. 1265 * This way, receiver doesn't make cache line misses to read rtable. 1266 */ 1267 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) 1268 { 1269 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); 1270 bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || 1271 ipv6_sk_rxinfo(sk); 1272 1273 if (prepare && skb_rtable(skb)) { 1274 /* skb->cb is overloaded: prior to this point it is IP{6}CB 1275 * which has interface index (iif) as the first member of the 1276 * underlying inet{6}_skb_parm struct. This code then overlays 1277 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first 1278 * element so the iif is picked up from the prior IPCB. If iif 1279 * is the loopback interface, then return the sending interface 1280 * (e.g., process binds socket to eth0 for Tx which is 1281 * redirected to loopback in the rtable/dst). 1282 */ 1283 struct rtable *rt = skb_rtable(skb); 1284 bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); 1285 1286 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) 1287 pktinfo->ipi_ifindex = inet_iif(skb); 1288 else if (l3slave && rt && rt->rt_iif) 1289 pktinfo->ipi_ifindex = rt->rt_iif; 1290 1291 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); 1292 } else { 1293 pktinfo->ipi_ifindex = 0; 1294 pktinfo->ipi_spec_dst.s_addr = 0; 1295 } 1296 skb_dst_drop(skb); 1297 } 1298 1299 int ip_setsockopt(struct sock *sk, int level, 1300 int optname, char __user *optval, unsigned int optlen) 1301 { 1302 int err; 1303 1304 if (level != SOL_IP) 1305 return -ENOPROTOOPT; 1306 1307 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1308 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1309 if (optname >= BPFILTER_IPT_SO_SET_REPLACE && 1310 optname < BPFILTER_IPT_SET_MAX) 1311 err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); 1312 #endif 1313 #ifdef CONFIG_NETFILTER 1314 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1315 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1316 optname != IP_IPSEC_POLICY && 1317 optname != IP_XFRM_POLICY && 1318 !ip_mroute_opt(optname)) 1319 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); 1320 #endif 1321 return err; 1322 } 1323 EXPORT_SYMBOL(ip_setsockopt); 1324 1325 #ifdef CONFIG_COMPAT 1326 int compat_ip_setsockopt(struct sock *sk, int level, int optname, 1327 char __user *optval, unsigned int optlen) 1328 { 1329 int err; 1330 1331 if (level != SOL_IP) 1332 return -ENOPROTOOPT; 1333 1334 switch (optname) { 1335 case MCAST_JOIN_GROUP: 1336 case MCAST_LEAVE_GROUP: 1337 { 1338 struct compat_group_req __user *gr32 = (void __user *)optval; 1339 struct group_req greq; 1340 struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group; 1341 struct ip_mreqn mreq; 1342 1343 if (optlen < sizeof(struct compat_group_req)) 1344 return -EINVAL; 1345 1346 if (get_user(greq.gr_interface, &gr32->gr_interface) || 1347 copy_from_user(&greq.gr_group, &gr32->gr_group, 1348 sizeof(greq.gr_group))) 1349 return -EFAULT; 1350 1351 if (psin->sin_family != AF_INET) 1352 return -EINVAL; 1353 1354 memset(&mreq, 0, sizeof(mreq)); 1355 mreq.imr_multiaddr = psin->sin_addr; 1356 mreq.imr_ifindex = greq.gr_interface; 1357 1358 rtnl_lock(); 1359 lock_sock(sk); 1360 if (optname == MCAST_JOIN_GROUP) 1361 err = ip_mc_join_group(sk, &mreq); 1362 else 1363 err = ip_mc_leave_group(sk, &mreq); 1364 release_sock(sk); 1365 rtnl_unlock(); 1366 return err; 1367 } 1368 case MCAST_JOIN_SOURCE_GROUP: 1369 case MCAST_LEAVE_SOURCE_GROUP: 1370 case MCAST_BLOCK_SOURCE: 1371 case MCAST_UNBLOCK_SOURCE: 1372 { 1373 struct compat_group_source_req __user *gsr32 = (void __user *)optval; 1374 struct group_source_req greqs; 1375 1376 if (optlen != sizeof(struct compat_group_source_req)) 1377 return -EINVAL; 1378 1379 if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || 1380 copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, 1381 sizeof(greqs.gsr_group)) || 1382 copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, 1383 sizeof(greqs.gsr_source))) 1384 return -EFAULT; 1385 1386 rtnl_lock(); 1387 lock_sock(sk); 1388 err = do_mcast_group_source(sk, optname, &greqs); 1389 release_sock(sk); 1390 rtnl_unlock(); 1391 return err; 1392 } 1393 case MCAST_MSFILTER: 1394 { 1395 const int size0 = offsetof(struct compat_group_filter, gf_slist); 1396 struct compat_group_filter *gf32; 1397 unsigned int n; 1398 void *p; 1399 1400 if (optlen < size0) 1401 return -EINVAL; 1402 if (optlen > sysctl_optmem_max - 4) 1403 return -ENOBUFS; 1404 1405 p = kmalloc(optlen + 4, GFP_KERNEL); 1406 if (!p) 1407 return -ENOMEM; 1408 gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ 1409 if (copy_from_user(gf32, optval, optlen)) { 1410 err = -EFAULT; 1411 goto mc_msf_out; 1412 } 1413 1414 n = gf32->gf_numsrc; 1415 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1416 if (n >= 0x1ffffff) { 1417 err = -ENOBUFS; 1418 goto mc_msf_out; 1419 } 1420 if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { 1421 err = -EINVAL; 1422 goto mc_msf_out; 1423 } 1424 1425 rtnl_lock(); 1426 lock_sock(sk); 1427 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1428 if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) 1429 err = -ENOBUFS; 1430 else 1431 err = set_mcast_msfilter(sk, gf32->gf_interface, 1432 n, gf32->gf_fmode, 1433 &gf32->gf_group, gf32->gf_slist); 1434 release_sock(sk); 1435 rtnl_unlock(); 1436 mc_msf_out: 1437 kfree(p); 1438 return err; 1439 } 1440 } 1441 1442 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1443 #ifdef CONFIG_NETFILTER 1444 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1445 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1446 optname != IP_IPSEC_POLICY && 1447 optname != IP_XFRM_POLICY && 1448 !ip_mroute_opt(optname)) 1449 err = compat_nf_setsockopt(sk, PF_INET, optname, optval, 1450 optlen); 1451 #endif 1452 return err; 1453 } 1454 EXPORT_SYMBOL(compat_ip_setsockopt); 1455 #endif 1456 1457 /* 1458 * Get the options. Note for future reference. The GET of IP options gets 1459 * the _received_ ones. The set sets the _sent_ ones. 1460 */ 1461 1462 static bool getsockopt_needs_rtnl(int optname) 1463 { 1464 switch (optname) { 1465 case IP_MSFILTER: 1466 case MCAST_MSFILTER: 1467 return true; 1468 } 1469 return false; 1470 } 1471 1472 static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1473 char __user *optval, int __user *optlen, unsigned int flags) 1474 { 1475 struct inet_sock *inet = inet_sk(sk); 1476 bool needs_rtnl = getsockopt_needs_rtnl(optname); 1477 int val, err = 0; 1478 int len; 1479 1480 if (level != SOL_IP) 1481 return -EOPNOTSUPP; 1482 1483 if (ip_mroute_opt(optname)) 1484 return ip_mroute_getsockopt(sk, optname, optval, optlen); 1485 1486 if (get_user(len, optlen)) 1487 return -EFAULT; 1488 if (len < 0) 1489 return -EINVAL; 1490 1491 if (needs_rtnl) 1492 rtnl_lock(); 1493 lock_sock(sk); 1494 1495 switch (optname) { 1496 case IP_OPTIONS: 1497 { 1498 unsigned char optbuf[sizeof(struct ip_options)+40]; 1499 struct ip_options *opt = (struct ip_options *)optbuf; 1500 struct ip_options_rcu *inet_opt; 1501 1502 inet_opt = rcu_dereference_protected(inet->inet_opt, 1503 lockdep_sock_is_held(sk)); 1504 opt->optlen = 0; 1505 if (inet_opt) 1506 memcpy(optbuf, &inet_opt->opt, 1507 sizeof(struct ip_options) + 1508 inet_opt->opt.optlen); 1509 release_sock(sk); 1510 1511 if (opt->optlen == 0) 1512 return put_user(0, optlen); 1513 1514 ip_options_undo(opt); 1515 1516 len = min_t(unsigned int, len, opt->optlen); 1517 if (put_user(len, optlen)) 1518 return -EFAULT; 1519 if (copy_to_user(optval, opt->__data, len)) 1520 return -EFAULT; 1521 return 0; 1522 } 1523 case IP_PKTINFO: 1524 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; 1525 break; 1526 case IP_RECVTTL: 1527 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; 1528 break; 1529 case IP_RECVTOS: 1530 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; 1531 break; 1532 case IP_RECVOPTS: 1533 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; 1534 break; 1535 case IP_RETOPTS: 1536 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; 1537 break; 1538 case IP_PASSSEC: 1539 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; 1540 break; 1541 case IP_RECVORIGDSTADDR: 1542 val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; 1543 break; 1544 case IP_CHECKSUM: 1545 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; 1546 break; 1547 case IP_RECVFRAGSIZE: 1548 val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; 1549 break; 1550 case IP_TOS: 1551 val = inet->tos; 1552 break; 1553 case IP_TTL: 1554 { 1555 struct net *net = sock_net(sk); 1556 val = (inet->uc_ttl == -1 ? 1557 net->ipv4.sysctl_ip_default_ttl : 1558 inet->uc_ttl); 1559 break; 1560 } 1561 case IP_HDRINCL: 1562 val = inet->hdrincl; 1563 break; 1564 case IP_NODEFRAG: 1565 val = inet->nodefrag; 1566 break; 1567 case IP_BIND_ADDRESS_NO_PORT: 1568 val = inet->bind_address_no_port; 1569 break; 1570 case IP_MTU_DISCOVER: 1571 val = inet->pmtudisc; 1572 break; 1573 case IP_MTU: 1574 { 1575 struct dst_entry *dst; 1576 val = 0; 1577 dst = sk_dst_get(sk); 1578 if (dst) { 1579 val = dst_mtu(dst); 1580 dst_release(dst); 1581 } 1582 if (!val) { 1583 release_sock(sk); 1584 return -ENOTCONN; 1585 } 1586 break; 1587 } 1588 case IP_RECVERR: 1589 val = inet->recverr; 1590 break; 1591 case IP_MULTICAST_TTL: 1592 val = inet->mc_ttl; 1593 break; 1594 case IP_MULTICAST_LOOP: 1595 val = inet->mc_loop; 1596 break; 1597 case IP_UNICAST_IF: 1598 val = (__force int)htonl((__u32) inet->uc_index); 1599 break; 1600 case IP_MULTICAST_IF: 1601 { 1602 struct in_addr addr; 1603 len = min_t(unsigned int, len, sizeof(struct in_addr)); 1604 addr.s_addr = inet->mc_addr; 1605 release_sock(sk); 1606 1607 if (put_user(len, optlen)) 1608 return -EFAULT; 1609 if (copy_to_user(optval, &addr, len)) 1610 return -EFAULT; 1611 return 0; 1612 } 1613 case IP_MSFILTER: 1614 { 1615 struct ip_msfilter msf; 1616 1617 if (len < IP_MSFILTER_SIZE(0)) { 1618 err = -EINVAL; 1619 goto out; 1620 } 1621 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { 1622 err = -EFAULT; 1623 goto out; 1624 } 1625 err = ip_mc_msfget(sk, &msf, 1626 (struct ip_msfilter __user *)optval, optlen); 1627 goto out; 1628 } 1629 case MCAST_MSFILTER: 1630 { 1631 struct group_filter __user *p = (void __user *)optval; 1632 struct group_filter gsf; 1633 const int size0 = offsetof(struct group_filter, gf_slist); 1634 int num; 1635 1636 if (len < size0) { 1637 err = -EINVAL; 1638 goto out; 1639 } 1640 if (copy_from_user(&gsf, p, size0)) { 1641 err = -EFAULT; 1642 goto out; 1643 } 1644 num = gsf.gf_numsrc; 1645 err = ip_mc_gsfget(sk, &gsf, p->gf_slist); 1646 if (err) 1647 goto out; 1648 if (gsf.gf_numsrc < num) 1649 num = gsf.gf_numsrc; 1650 if (put_user(GROUP_FILTER_SIZE(num), optlen) || 1651 copy_to_user(p, &gsf, size0)) 1652 err = -EFAULT; 1653 goto out; 1654 } 1655 case IP_MULTICAST_ALL: 1656 val = inet->mc_all; 1657 break; 1658 case IP_PKTOPTIONS: 1659 { 1660 struct msghdr msg; 1661 1662 release_sock(sk); 1663 1664 if (sk->sk_type != SOCK_STREAM) 1665 return -ENOPROTOOPT; 1666 1667 msg.msg_control_is_user = true; 1668 msg.msg_control_user = optval; 1669 msg.msg_controllen = len; 1670 msg.msg_flags = flags; 1671 1672 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1673 struct in_pktinfo info; 1674 1675 info.ipi_addr.s_addr = inet->inet_rcv_saddr; 1676 info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; 1677 info.ipi_ifindex = inet->mc_index; 1678 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1679 } 1680 if (inet->cmsg_flags & IP_CMSG_TTL) { 1681 int hlim = inet->mc_ttl; 1682 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); 1683 } 1684 if (inet->cmsg_flags & IP_CMSG_TOS) { 1685 int tos = inet->rcv_tos; 1686 put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); 1687 } 1688 len -= msg.msg_controllen; 1689 return put_user(len, optlen); 1690 } 1691 case IP_FREEBIND: 1692 val = inet->freebind; 1693 break; 1694 case IP_TRANSPARENT: 1695 val = inet->transparent; 1696 break; 1697 case IP_MINTTL: 1698 val = inet->min_ttl; 1699 break; 1700 default: 1701 release_sock(sk); 1702 return -ENOPROTOOPT; 1703 } 1704 release_sock(sk); 1705 1706 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1707 unsigned char ucval = (unsigned char)val; 1708 len = 1; 1709 if (put_user(len, optlen)) 1710 return -EFAULT; 1711 if (copy_to_user(optval, &ucval, 1)) 1712 return -EFAULT; 1713 } else { 1714 len = min_t(unsigned int, sizeof(int), len); 1715 if (put_user(len, optlen)) 1716 return -EFAULT; 1717 if (copy_to_user(optval, &val, len)) 1718 return -EFAULT; 1719 } 1720 return 0; 1721 1722 out: 1723 release_sock(sk); 1724 if (needs_rtnl) 1725 rtnl_unlock(); 1726 return err; 1727 } 1728 1729 int ip_getsockopt(struct sock *sk, int level, 1730 int optname, char __user *optval, int __user *optlen) 1731 { 1732 int err; 1733 1734 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); 1735 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1736 if (optname >= BPFILTER_IPT_SO_GET_INFO && 1737 optname < BPFILTER_IPT_GET_MAX) 1738 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1739 #endif 1740 #ifdef CONFIG_NETFILTER 1741 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1742 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1743 !ip_mroute_opt(optname)) { 1744 int len; 1745 1746 if (get_user(len, optlen)) 1747 return -EFAULT; 1748 1749 err = nf_getsockopt(sk, PF_INET, optname, optval, &len); 1750 if (err >= 0) 1751 err = put_user(len, optlen); 1752 return err; 1753 } 1754 #endif 1755 return err; 1756 } 1757 EXPORT_SYMBOL(ip_getsockopt); 1758 1759 #ifdef CONFIG_COMPAT 1760 int compat_ip_getsockopt(struct sock *sk, int level, int optname, 1761 char __user *optval, int __user *optlen) 1762 { 1763 int err; 1764 1765 if (optname == MCAST_MSFILTER) { 1766 const int size0 = offsetof(struct compat_group_filter, gf_slist); 1767 struct compat_group_filter __user *p = (void __user *)optval; 1768 struct compat_group_filter gf32; 1769 struct group_filter gf; 1770 int ulen, err; 1771 int num; 1772 1773 if (level != SOL_IP) 1774 return -EOPNOTSUPP; 1775 1776 if (get_user(ulen, optlen)) 1777 return -EFAULT; 1778 1779 if (ulen < size0) 1780 return -EINVAL; 1781 1782 if (copy_from_user(&gf32, p, size0)) 1783 return -EFAULT; 1784 1785 gf.gf_interface = gf32.gf_interface; 1786 gf.gf_fmode = gf32.gf_fmode; 1787 num = gf.gf_numsrc = gf32.gf_numsrc; 1788 gf.gf_group = gf32.gf_group; 1789 1790 rtnl_lock(); 1791 lock_sock(sk); 1792 err = ip_mc_gsfget(sk, &gf, p->gf_slist); 1793 release_sock(sk); 1794 rtnl_unlock(); 1795 if (err) 1796 return err; 1797 if (gf.gf_numsrc < num) 1798 num = gf.gf_numsrc; 1799 ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); 1800 if (put_user(ulen, optlen) || 1801 put_user(gf.gf_fmode, &p->gf_fmode) || 1802 put_user(gf.gf_numsrc, &p->gf_numsrc)) 1803 return -EFAULT; 1804 return 0; 1805 } 1806 1807 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 1808 MSG_CMSG_COMPAT); 1809 1810 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1811 if (optname >= BPFILTER_IPT_SO_GET_INFO && 1812 optname < BPFILTER_IPT_GET_MAX) 1813 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1814 #endif 1815 #ifdef CONFIG_NETFILTER 1816 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1817 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1818 !ip_mroute_opt(optname)) { 1819 int len; 1820 1821 if (get_user(len, optlen)) 1822 return -EFAULT; 1823 1824 err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); 1825 if (err >= 0) 1826 err = put_user(len, optlen); 1827 return err; 1828 } 1829 #endif 1830 return err; 1831 } 1832 EXPORT_SYMBOL(compat_ip_getsockopt); 1833 #endif 1834