1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * The IP to API glue. 8 * 9 * Authors: see ip.c 10 * 11 * Fixes: 12 * Many : Split from ip.c , see ip.c for history. 13 * Martin Mares : TOS setting fixed. 14 * Alan Cox : Fixed a couple of oopses in Martin's 15 * TOS tweaks. 16 * Mike McLagan : Routing by source 17 */ 18 19 #include <linux/module.h> 20 #include <linux/types.h> 21 #include <linux/mm.h> 22 #include <linux/skbuff.h> 23 #include <linux/ip.h> 24 #include <linux/icmp.h> 25 #include <linux/inetdevice.h> 26 #include <linux/netdevice.h> 27 #include <linux/slab.h> 28 #include <net/sock.h> 29 #include <net/ip.h> 30 #include <net/icmp.h> 31 #include <net/tcp_states.h> 32 #include <linux/udp.h> 33 #include <linux/igmp.h> 34 #include <linux/netfilter.h> 35 #include <linux/route.h> 36 #include <linux/mroute.h> 37 #include <net/inet_ecn.h> 38 #include <net/route.h> 39 #include <net/xfrm.h> 40 #include <net/compat.h> 41 #include <net/checksum.h> 42 #if IS_ENABLED(CONFIG_IPV6) 43 #include <net/transp_v6.h> 44 #endif 45 #include <net/ip_fib.h> 46 47 #include <linux/errqueue.h> 48 #include <linux/uaccess.h> 49 50 #include <linux/bpfilter.h> 51 52 /* 53 * SOL_IP control messages. 54 */ 55 56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 57 { 58 struct in_pktinfo info = *PKTINFO_SKB_CB(skb); 59 60 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 61 62 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 63 } 64 65 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) 66 { 67 int ttl = ip_hdr(skb)->ttl; 68 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); 69 } 70 71 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) 72 { 73 put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); 74 } 75 76 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) 77 { 78 if (IPCB(skb)->opt.optlen == 0) 79 return; 80 81 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, 82 ip_hdr(skb) + 1); 83 } 84 85 86 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, 87 struct sk_buff *skb) 88 { 89 unsigned char optbuf[sizeof(struct ip_options) + 40]; 90 struct ip_options *opt = (struct ip_options *)optbuf; 91 92 if (IPCB(skb)->opt.optlen == 0) 93 return; 94 95 if (ip_options_echo(net, opt, skb)) { 96 msg->msg_flags |= MSG_CTRUNC; 97 return; 98 } 99 ip_options_undo(opt); 100 101 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); 102 } 103 104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) 105 { 106 int val; 107 108 if (IPCB(skb)->frag_max_size == 0) 109 return; 110 111 val = IPCB(skb)->frag_max_size; 112 put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); 113 } 114 115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, 116 int tlen, int offset) 117 { 118 __wsum csum = skb->csum; 119 120 if (skb->ip_summed != CHECKSUM_COMPLETE) 121 return; 122 123 if (offset != 0) { 124 int tend_off = skb_transport_offset(skb) + tlen; 125 csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0)); 126 } 127 128 put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); 129 } 130 131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) 132 { 133 char *secdata; 134 u32 seclen, secid; 135 int err; 136 137 err = security_socket_getpeersec_dgram(NULL, skb, &secid); 138 if (err) 139 return; 140 141 err = security_secid_to_secctx(secid, &secdata, &seclen); 142 if (err) 143 return; 144 145 put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); 146 security_release_secctx(secdata, seclen); 147 } 148 149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) 150 { 151 __be16 _ports[2], *ports; 152 struct sockaddr_in sin; 153 154 /* All current transport protocols have the port numbers in the 155 * first four bytes of the transport header and this function is 156 * written with this assumption in mind. 157 */ 158 ports = skb_header_pointer(skb, skb_transport_offset(skb), 159 sizeof(_ports), &_ports); 160 if (!ports) 161 return; 162 163 sin.sin_family = AF_INET; 164 sin.sin_addr.s_addr = ip_hdr(skb)->daddr; 165 sin.sin_port = ports[1]; 166 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 167 168 put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); 169 } 170 171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, 172 struct sk_buff *skb, int tlen, int offset) 173 { 174 struct inet_sock *inet = inet_sk(sk); 175 unsigned int flags = inet->cmsg_flags; 176 177 /* Ordered by supposed usage frequency */ 178 if (flags & IP_CMSG_PKTINFO) { 179 ip_cmsg_recv_pktinfo(msg, skb); 180 181 flags &= ~IP_CMSG_PKTINFO; 182 if (!flags) 183 return; 184 } 185 186 if (flags & IP_CMSG_TTL) { 187 ip_cmsg_recv_ttl(msg, skb); 188 189 flags &= ~IP_CMSG_TTL; 190 if (!flags) 191 return; 192 } 193 194 if (flags & IP_CMSG_TOS) { 195 ip_cmsg_recv_tos(msg, skb); 196 197 flags &= ~IP_CMSG_TOS; 198 if (!flags) 199 return; 200 } 201 202 if (flags & IP_CMSG_RECVOPTS) { 203 ip_cmsg_recv_opts(msg, skb); 204 205 flags &= ~IP_CMSG_RECVOPTS; 206 if (!flags) 207 return; 208 } 209 210 if (flags & IP_CMSG_RETOPTS) { 211 ip_cmsg_recv_retopts(sock_net(sk), msg, skb); 212 213 flags &= ~IP_CMSG_RETOPTS; 214 if (!flags) 215 return; 216 } 217 218 if (flags & IP_CMSG_PASSSEC) { 219 ip_cmsg_recv_security(msg, skb); 220 221 flags &= ~IP_CMSG_PASSSEC; 222 if (!flags) 223 return; 224 } 225 226 if (flags & IP_CMSG_ORIGDSTADDR) { 227 ip_cmsg_recv_dstaddr(msg, skb); 228 229 flags &= ~IP_CMSG_ORIGDSTADDR; 230 if (!flags) 231 return; 232 } 233 234 if (flags & IP_CMSG_CHECKSUM) 235 ip_cmsg_recv_checksum(msg, skb, tlen, offset); 236 237 if (flags & IP_CMSG_RECVFRAGSIZE) 238 ip_cmsg_recv_fragsize(msg, skb); 239 } 240 EXPORT_SYMBOL(ip_cmsg_recv_offset); 241 242 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, 243 bool allow_ipv6) 244 { 245 int err, val; 246 struct cmsghdr *cmsg; 247 struct net *net = sock_net(sk); 248 249 for_each_cmsghdr(cmsg, msg) { 250 if (!CMSG_OK(msg, cmsg)) 251 return -EINVAL; 252 #if IS_ENABLED(CONFIG_IPV6) 253 if (allow_ipv6 && 254 cmsg->cmsg_level == SOL_IPV6 && 255 cmsg->cmsg_type == IPV6_PKTINFO) { 256 struct in6_pktinfo *src_info; 257 258 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) 259 return -EINVAL; 260 src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); 261 if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) 262 return -EINVAL; 263 if (src_info->ipi6_ifindex) 264 ipc->oif = src_info->ipi6_ifindex; 265 ipc->addr = src_info->ipi6_addr.s6_addr32[3]; 266 continue; 267 } 268 #endif 269 if (cmsg->cmsg_level == SOL_SOCKET) { 270 err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); 271 if (err) 272 return err; 273 continue; 274 } 275 276 if (cmsg->cmsg_level != SOL_IP) 277 continue; 278 switch (cmsg->cmsg_type) { 279 case IP_RETOPTS: 280 err = cmsg->cmsg_len - sizeof(struct cmsghdr); 281 282 /* Our caller is responsible for freeing ipc->opt */ 283 err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), 284 err < 40 ? err : 40); 285 if (err) 286 return err; 287 break; 288 case IP_PKTINFO: 289 { 290 struct in_pktinfo *info; 291 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 292 return -EINVAL; 293 info = (struct in_pktinfo *)CMSG_DATA(cmsg); 294 if (info->ipi_ifindex) 295 ipc->oif = info->ipi_ifindex; 296 ipc->addr = info->ipi_spec_dst.s_addr; 297 break; 298 } 299 case IP_TTL: 300 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 301 return -EINVAL; 302 val = *(int *)CMSG_DATA(cmsg); 303 if (val < 1 || val > 255) 304 return -EINVAL; 305 ipc->ttl = val; 306 break; 307 case IP_TOS: 308 if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) 309 val = *(int *)CMSG_DATA(cmsg); 310 else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) 311 val = *(u8 *)CMSG_DATA(cmsg); 312 else 313 return -EINVAL; 314 if (val < 0 || val > 255) 315 return -EINVAL; 316 ipc->tos = val; 317 ipc->priority = rt_tos2priority(ipc->tos); 318 break; 319 320 default: 321 return -EINVAL; 322 } 323 } 324 return 0; 325 } 326 327 static void ip_ra_destroy_rcu(struct rcu_head *head) 328 { 329 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); 330 331 sock_put(ra->saved_sk); 332 kfree(ra); 333 } 334 335 int ip_ra_control(struct sock *sk, unsigned char on, 336 void (*destructor)(struct sock *)) 337 { 338 struct ip_ra_chain *ra, *new_ra; 339 struct ip_ra_chain __rcu **rap; 340 struct net *net = sock_net(sk); 341 342 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 343 return -EINVAL; 344 345 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 346 if (on && !new_ra) 347 return -ENOMEM; 348 349 mutex_lock(&net->ipv4.ra_mutex); 350 for (rap = &net->ipv4.ra_chain; 351 (ra = rcu_dereference_protected(*rap, 352 lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; 353 rap = &ra->next) { 354 if (ra->sk == sk) { 355 if (on) { 356 mutex_unlock(&net->ipv4.ra_mutex); 357 kfree(new_ra); 358 return -EADDRINUSE; 359 } 360 /* dont let ip_call_ra_chain() use sk again */ 361 ra->sk = NULL; 362 RCU_INIT_POINTER(*rap, ra->next); 363 mutex_unlock(&net->ipv4.ra_mutex); 364 365 if (ra->destructor) 366 ra->destructor(sk); 367 /* 368 * Delay sock_put(sk) and kfree(ra) after one rcu grace 369 * period. This guarantee ip_call_ra_chain() dont need 370 * to mess with socket refcounts. 371 */ 372 ra->saved_sk = sk; 373 call_rcu(&ra->rcu, ip_ra_destroy_rcu); 374 return 0; 375 } 376 } 377 if (!new_ra) { 378 mutex_unlock(&net->ipv4.ra_mutex); 379 return -ENOBUFS; 380 } 381 new_ra->sk = sk; 382 new_ra->destructor = destructor; 383 384 RCU_INIT_POINTER(new_ra->next, ra); 385 rcu_assign_pointer(*rap, new_ra); 386 sock_hold(sk); 387 mutex_unlock(&net->ipv4.ra_mutex); 388 389 return 0; 390 } 391 392 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 393 __be16 port, u32 info, u8 *payload) 394 { 395 struct sock_exterr_skb *serr; 396 397 skb = skb_clone(skb, GFP_ATOMIC); 398 if (!skb) 399 return; 400 401 serr = SKB_EXT_ERR(skb); 402 serr->ee.ee_errno = err; 403 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; 404 serr->ee.ee_type = icmp_hdr(skb)->type; 405 serr->ee.ee_code = icmp_hdr(skb)->code; 406 serr->ee.ee_pad = 0; 407 serr->ee.ee_info = info; 408 serr->ee.ee_data = 0; 409 serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - 410 skb_network_header(skb); 411 serr->port = port; 412 413 if (skb_pull(skb, payload - skb->data)) { 414 if (inet_sk(sk)->recverr_rfc4884) 415 ip_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); 416 417 skb_reset_transport_header(skb); 418 if (sock_queue_err_skb(sk, skb) == 0) 419 return; 420 } 421 kfree_skb(skb); 422 } 423 424 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) 425 { 426 struct inet_sock *inet = inet_sk(sk); 427 struct sock_exterr_skb *serr; 428 struct iphdr *iph; 429 struct sk_buff *skb; 430 431 if (!inet->recverr) 432 return; 433 434 skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); 435 if (!skb) 436 return; 437 438 skb_put(skb, sizeof(struct iphdr)); 439 skb_reset_network_header(skb); 440 iph = ip_hdr(skb); 441 iph->daddr = daddr; 442 443 serr = SKB_EXT_ERR(skb); 444 serr->ee.ee_errno = err; 445 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; 446 serr->ee.ee_type = 0; 447 serr->ee.ee_code = 0; 448 serr->ee.ee_pad = 0; 449 serr->ee.ee_info = info; 450 serr->ee.ee_data = 0; 451 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); 452 serr->port = port; 453 454 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 455 skb_reset_transport_header(skb); 456 457 if (sock_queue_err_skb(sk, skb)) 458 kfree_skb(skb); 459 } 460 461 /* For some errors we have valid addr_offset even with zero payload and 462 * zero port. Also, addr_offset should be supported if port is set. 463 */ 464 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) 465 { 466 return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || 467 serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; 468 } 469 470 /* IPv4 supports cmsg on all imcp errors and some timestamps 471 * 472 * Timestamp code paths do not initialize the fields expected by cmsg: 473 * the PKTINFO fields in skb->cb[]. Fill those in here. 474 */ 475 static bool ipv4_datagram_support_cmsg(const struct sock *sk, 476 struct sk_buff *skb, 477 int ee_origin) 478 { 479 struct in_pktinfo *info; 480 481 if (ee_origin == SO_EE_ORIGIN_ICMP) 482 return true; 483 484 if (ee_origin == SO_EE_ORIGIN_LOCAL) 485 return false; 486 487 /* Support IP_PKTINFO on tstamp packets if requested, to correlate 488 * timestamp with egress dev. Not possible for packets without iif 489 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). 490 */ 491 info = PKTINFO_SKB_CB(skb); 492 if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || 493 !info->ipi_ifindex) 494 return false; 495 496 info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; 497 return true; 498 } 499 500 /* 501 * Handle MSG_ERRQUEUE 502 */ 503 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) 504 { 505 struct sock_exterr_skb *serr; 506 struct sk_buff *skb; 507 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 508 struct { 509 struct sock_extended_err ee; 510 struct sockaddr_in offender; 511 } errhdr; 512 int err; 513 int copied; 514 515 err = -EAGAIN; 516 skb = sock_dequeue_err_skb(sk); 517 if (!skb) 518 goto out; 519 520 copied = skb->len; 521 if (copied > len) { 522 msg->msg_flags |= MSG_TRUNC; 523 copied = len; 524 } 525 err = skb_copy_datagram_msg(skb, 0, msg, copied); 526 if (unlikely(err)) { 527 kfree_skb(skb); 528 return err; 529 } 530 sock_recv_timestamp(msg, sk, skb); 531 532 serr = SKB_EXT_ERR(skb); 533 534 if (sin && ipv4_datagram_support_addr(serr)) { 535 sin->sin_family = AF_INET; 536 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + 537 serr->addr_offset); 538 sin->sin_port = serr->port; 539 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 540 *addr_len = sizeof(*sin); 541 } 542 543 memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); 544 sin = &errhdr.offender; 545 memset(sin, 0, sizeof(*sin)); 546 547 if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { 548 sin->sin_family = AF_INET; 549 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 550 if (inet_sk(sk)->cmsg_flags) 551 ip_cmsg_recv(msg, skb); 552 } 553 554 put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr); 555 556 /* Now we could try to dump offended packet options */ 557 558 msg->msg_flags |= MSG_ERRQUEUE; 559 err = copied; 560 561 consume_skb(skb); 562 out: 563 return err; 564 } 565 566 static void __ip_sock_set_tos(struct sock *sk, int val) 567 { 568 if (sk->sk_type == SOCK_STREAM) { 569 val &= ~INET_ECN_MASK; 570 val |= inet_sk(sk)->tos & INET_ECN_MASK; 571 } 572 if (inet_sk(sk)->tos != val) { 573 inet_sk(sk)->tos = val; 574 sk->sk_priority = rt_tos2priority(val); 575 sk_dst_reset(sk); 576 } 577 } 578 579 void ip_sock_set_tos(struct sock *sk, int val) 580 { 581 lock_sock(sk); 582 __ip_sock_set_tos(sk, val); 583 release_sock(sk); 584 } 585 EXPORT_SYMBOL(ip_sock_set_tos); 586 587 void ip_sock_set_freebind(struct sock *sk) 588 { 589 lock_sock(sk); 590 inet_sk(sk)->freebind = true; 591 release_sock(sk); 592 } 593 EXPORT_SYMBOL(ip_sock_set_freebind); 594 595 void ip_sock_set_recverr(struct sock *sk) 596 { 597 lock_sock(sk); 598 inet_sk(sk)->recverr = true; 599 release_sock(sk); 600 } 601 EXPORT_SYMBOL(ip_sock_set_recverr); 602 603 int ip_sock_set_mtu_discover(struct sock *sk, int val) 604 { 605 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 606 return -EINVAL; 607 lock_sock(sk); 608 inet_sk(sk)->pmtudisc = val; 609 release_sock(sk); 610 return 0; 611 } 612 EXPORT_SYMBOL(ip_sock_set_mtu_discover); 613 614 void ip_sock_set_pktinfo(struct sock *sk) 615 { 616 lock_sock(sk); 617 inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO; 618 release_sock(sk); 619 } 620 EXPORT_SYMBOL(ip_sock_set_pktinfo); 621 622 /* 623 * Socket option code for IP. This is the end of the line after any 624 * TCP,UDP etc options on an IP socket. 625 */ 626 static bool setsockopt_needs_rtnl(int optname) 627 { 628 switch (optname) { 629 case IP_ADD_MEMBERSHIP: 630 case IP_ADD_SOURCE_MEMBERSHIP: 631 case IP_BLOCK_SOURCE: 632 case IP_DROP_MEMBERSHIP: 633 case IP_DROP_SOURCE_MEMBERSHIP: 634 case IP_MSFILTER: 635 case IP_UNBLOCK_SOURCE: 636 case MCAST_BLOCK_SOURCE: 637 case MCAST_MSFILTER: 638 case MCAST_JOIN_GROUP: 639 case MCAST_JOIN_SOURCE_GROUP: 640 case MCAST_LEAVE_GROUP: 641 case MCAST_LEAVE_SOURCE_GROUP: 642 case MCAST_UNBLOCK_SOURCE: 643 return true; 644 } 645 return false; 646 } 647 648 static int set_mcast_msfilter(struct sock *sk, int ifindex, 649 int numsrc, int fmode, 650 struct sockaddr_storage *group, 651 struct sockaddr_storage *list) 652 { 653 int msize = IP_MSFILTER_SIZE(numsrc); 654 struct ip_msfilter *msf; 655 struct sockaddr_in *psin; 656 int err, i; 657 658 msf = kmalloc(msize, GFP_KERNEL); 659 if (!msf) 660 return -ENOBUFS; 661 662 psin = (struct sockaddr_in *)group; 663 if (psin->sin_family != AF_INET) 664 goto Eaddrnotavail; 665 msf->imsf_multiaddr = psin->sin_addr.s_addr; 666 msf->imsf_interface = 0; 667 msf->imsf_fmode = fmode; 668 msf->imsf_numsrc = numsrc; 669 for (i = 0; i < numsrc; ++i) { 670 psin = (struct sockaddr_in *)&list[i]; 671 672 if (psin->sin_family != AF_INET) 673 goto Eaddrnotavail; 674 msf->imsf_slist[i] = psin->sin_addr.s_addr; 675 } 676 err = ip_mc_msfilter(sk, msf, ifindex); 677 kfree(msf); 678 return err; 679 680 Eaddrnotavail: 681 kfree(msf); 682 return -EADDRNOTAVAIL; 683 } 684 685 static int copy_group_source_from_user(struct group_source_req *greqs, 686 void __user *optval, int optlen) 687 { 688 if (in_compat_syscall()) { 689 struct compat_group_source_req gr32; 690 691 if (optlen != sizeof(gr32)) 692 return -EINVAL; 693 if (copy_from_user(&gr32, optval, sizeof(gr32))) 694 return -EFAULT; 695 greqs->gsr_interface = gr32.gsr_interface; 696 greqs->gsr_group = gr32.gsr_group; 697 greqs->gsr_source = gr32.gsr_source; 698 } else { 699 if (optlen != sizeof(*greqs)) 700 return -EINVAL; 701 if (copy_from_user(greqs, optval, sizeof(*greqs))) 702 return -EFAULT; 703 } 704 705 return 0; 706 } 707 708 static int do_mcast_group_source(struct sock *sk, int optname, 709 void __user *optval, int optlen) 710 { 711 struct group_source_req greqs; 712 struct ip_mreq_source mreqs; 713 struct sockaddr_in *psin; 714 int omode, add, err; 715 716 err = copy_group_source_from_user(&greqs, optval, optlen); 717 if (err) 718 return err; 719 720 if (greqs.gsr_group.ss_family != AF_INET || 721 greqs.gsr_source.ss_family != AF_INET) 722 return -EADDRNOTAVAIL; 723 724 psin = (struct sockaddr_in *)&greqs.gsr_group; 725 mreqs.imr_multiaddr = psin->sin_addr.s_addr; 726 psin = (struct sockaddr_in *)&greqs.gsr_source; 727 mreqs.imr_sourceaddr = psin->sin_addr.s_addr; 728 mreqs.imr_interface = 0; /* use index for mc_source */ 729 730 if (optname == MCAST_BLOCK_SOURCE) { 731 omode = MCAST_EXCLUDE; 732 add = 1; 733 } else if (optname == MCAST_UNBLOCK_SOURCE) { 734 omode = MCAST_EXCLUDE; 735 add = 0; 736 } else if (optname == MCAST_JOIN_SOURCE_GROUP) { 737 struct ip_mreqn mreq; 738 739 psin = (struct sockaddr_in *)&greqs.gsr_group; 740 mreq.imr_multiaddr = psin->sin_addr; 741 mreq.imr_address.s_addr = 0; 742 mreq.imr_ifindex = greqs.gsr_interface; 743 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 744 if (err && err != -EADDRINUSE) 745 return err; 746 greqs.gsr_interface = mreq.imr_ifindex; 747 omode = MCAST_INCLUDE; 748 add = 1; 749 } else /* MCAST_LEAVE_SOURCE_GROUP */ { 750 omode = MCAST_INCLUDE; 751 add = 0; 752 } 753 return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface); 754 } 755 756 static int ip_set_mcast_msfilter(struct sock *sk, void __user *optval, 757 int optlen) 758 { 759 struct group_filter *gsf = NULL; 760 int err; 761 762 if (optlen < GROUP_FILTER_SIZE(0)) 763 return -EINVAL; 764 if (optlen > sysctl_optmem_max) 765 return -ENOBUFS; 766 767 gsf = memdup_user(optval, optlen); 768 if (IS_ERR(gsf)) 769 return PTR_ERR(gsf); 770 771 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 772 err = -ENOBUFS; 773 if (gsf->gf_numsrc >= 0x1ffffff || 774 gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) 775 goto out_free_gsf; 776 777 err = -EINVAL; 778 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) 779 goto out_free_gsf; 780 781 err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, 782 gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist); 783 out_free_gsf: 784 kfree(gsf); 785 return err; 786 } 787 788 static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval, 789 int optlen) 790 { 791 const int size0 = offsetof(struct compat_group_filter, gf_slist); 792 struct compat_group_filter *gf32; 793 unsigned int n; 794 void *p; 795 int err; 796 797 if (optlen < size0) 798 return -EINVAL; 799 if (optlen > sysctl_optmem_max - 4) 800 return -ENOBUFS; 801 802 p = kmalloc(optlen + 4, GFP_KERNEL); 803 if (!p) 804 return -ENOMEM; 805 gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ 806 807 err = -EFAULT; 808 if (copy_from_user(gf32, optval, optlen)) 809 goto out_free_gsf; 810 811 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 812 n = gf32->gf_numsrc; 813 err = -ENOBUFS; 814 if (n >= 0x1ffffff) 815 goto out_free_gsf; 816 817 err = -EINVAL; 818 if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) 819 goto out_free_gsf; 820 821 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 822 err = -ENOBUFS; 823 if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) 824 goto out_free_gsf; 825 err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, 826 &gf32->gf_group, gf32->gf_slist); 827 out_free_gsf: 828 kfree(p); 829 return err; 830 } 831 832 static int ip_mcast_join_leave(struct sock *sk, int optname, 833 void __user *optval, int optlen) 834 { 835 struct ip_mreqn mreq = { }; 836 struct sockaddr_in *psin; 837 struct group_req greq; 838 839 if (optlen < sizeof(struct group_req)) 840 return -EINVAL; 841 if (copy_from_user(&greq, optval, sizeof(greq))) 842 return -EFAULT; 843 844 psin = (struct sockaddr_in *)&greq.gr_group; 845 if (psin->sin_family != AF_INET) 846 return -EINVAL; 847 mreq.imr_multiaddr = psin->sin_addr; 848 mreq.imr_ifindex = greq.gr_interface; 849 if (optname == MCAST_JOIN_GROUP) 850 return ip_mc_join_group(sk, &mreq); 851 return ip_mc_leave_group(sk, &mreq); 852 } 853 854 static int compat_ip_mcast_join_leave(struct sock *sk, int optname, 855 void __user *optval, int optlen) 856 { 857 struct compat_group_req greq; 858 struct ip_mreqn mreq = { }; 859 struct sockaddr_in *psin; 860 861 if (optlen < sizeof(struct compat_group_req)) 862 return -EINVAL; 863 if (copy_from_user(&greq, optval, sizeof(greq))) 864 return -EFAULT; 865 866 psin = (struct sockaddr_in *)&greq.gr_group; 867 if (psin->sin_family != AF_INET) 868 return -EINVAL; 869 mreq.imr_multiaddr = psin->sin_addr; 870 mreq.imr_ifindex = greq.gr_interface; 871 872 if (optname == MCAST_JOIN_GROUP) 873 return ip_mc_join_group(sk, &mreq); 874 return ip_mc_leave_group(sk, &mreq); 875 } 876 877 static int do_ip_setsockopt(struct sock *sk, int level, 878 int optname, char __user *optval, unsigned int optlen) 879 { 880 struct inet_sock *inet = inet_sk(sk); 881 struct net *net = sock_net(sk); 882 int val = 0, err; 883 bool needs_rtnl = setsockopt_needs_rtnl(optname); 884 885 switch (optname) { 886 case IP_PKTINFO: 887 case IP_RECVTTL: 888 case IP_RECVOPTS: 889 case IP_RECVTOS: 890 case IP_RETOPTS: 891 case IP_TOS: 892 case IP_TTL: 893 case IP_HDRINCL: 894 case IP_MTU_DISCOVER: 895 case IP_RECVERR: 896 case IP_ROUTER_ALERT: 897 case IP_FREEBIND: 898 case IP_PASSSEC: 899 case IP_TRANSPARENT: 900 case IP_MINTTL: 901 case IP_NODEFRAG: 902 case IP_BIND_ADDRESS_NO_PORT: 903 case IP_UNICAST_IF: 904 case IP_MULTICAST_TTL: 905 case IP_MULTICAST_ALL: 906 case IP_MULTICAST_LOOP: 907 case IP_RECVORIGDSTADDR: 908 case IP_CHECKSUM: 909 case IP_RECVFRAGSIZE: 910 case IP_RECVERR_RFC4884: 911 if (optlen >= sizeof(int)) { 912 if (get_user(val, (int __user *) optval)) 913 return -EFAULT; 914 } else if (optlen >= sizeof(char)) { 915 unsigned char ucval; 916 917 if (get_user(ucval, (unsigned char __user *) optval)) 918 return -EFAULT; 919 val = (int) ucval; 920 } 921 } 922 923 /* If optlen==0, it is equivalent to val == 0 */ 924 925 if (optname == IP_ROUTER_ALERT) 926 return ip_ra_control(sk, val ? 1 : 0, NULL); 927 if (ip_mroute_opt(optname)) 928 return ip_mroute_setsockopt(sk, optname, optval, optlen); 929 930 err = 0; 931 if (needs_rtnl) 932 rtnl_lock(); 933 lock_sock(sk); 934 935 switch (optname) { 936 case IP_OPTIONS: 937 { 938 struct ip_options_rcu *old, *opt = NULL; 939 940 if (optlen > 40) 941 goto e_inval; 942 err = ip_options_get_from_user(sock_net(sk), &opt, 943 optval, optlen); 944 if (err) 945 break; 946 old = rcu_dereference_protected(inet->inet_opt, 947 lockdep_sock_is_held(sk)); 948 if (inet->is_icsk) { 949 struct inet_connection_sock *icsk = inet_csk(sk); 950 #if IS_ENABLED(CONFIG_IPV6) 951 if (sk->sk_family == PF_INET || 952 (!((1 << sk->sk_state) & 953 (TCPF_LISTEN | TCPF_CLOSE)) && 954 inet->inet_daddr != LOOPBACK4_IPV6)) { 955 #endif 956 if (old) 957 icsk->icsk_ext_hdr_len -= old->opt.optlen; 958 if (opt) 959 icsk->icsk_ext_hdr_len += opt->opt.optlen; 960 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 961 #if IS_ENABLED(CONFIG_IPV6) 962 } 963 #endif 964 } 965 rcu_assign_pointer(inet->inet_opt, opt); 966 if (old) 967 kfree_rcu(old, rcu); 968 break; 969 } 970 case IP_PKTINFO: 971 if (val) 972 inet->cmsg_flags |= IP_CMSG_PKTINFO; 973 else 974 inet->cmsg_flags &= ~IP_CMSG_PKTINFO; 975 break; 976 case IP_RECVTTL: 977 if (val) 978 inet->cmsg_flags |= IP_CMSG_TTL; 979 else 980 inet->cmsg_flags &= ~IP_CMSG_TTL; 981 break; 982 case IP_RECVTOS: 983 if (val) 984 inet->cmsg_flags |= IP_CMSG_TOS; 985 else 986 inet->cmsg_flags &= ~IP_CMSG_TOS; 987 break; 988 case IP_RECVOPTS: 989 if (val) 990 inet->cmsg_flags |= IP_CMSG_RECVOPTS; 991 else 992 inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; 993 break; 994 case IP_RETOPTS: 995 if (val) 996 inet->cmsg_flags |= IP_CMSG_RETOPTS; 997 else 998 inet->cmsg_flags &= ~IP_CMSG_RETOPTS; 999 break; 1000 case IP_PASSSEC: 1001 if (val) 1002 inet->cmsg_flags |= IP_CMSG_PASSSEC; 1003 else 1004 inet->cmsg_flags &= ~IP_CMSG_PASSSEC; 1005 break; 1006 case IP_RECVORIGDSTADDR: 1007 if (val) 1008 inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; 1009 else 1010 inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; 1011 break; 1012 case IP_CHECKSUM: 1013 if (val) { 1014 if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { 1015 inet_inc_convert_csum(sk); 1016 inet->cmsg_flags |= IP_CMSG_CHECKSUM; 1017 } 1018 } else { 1019 if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { 1020 inet_dec_convert_csum(sk); 1021 inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; 1022 } 1023 } 1024 break; 1025 case IP_RECVFRAGSIZE: 1026 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) 1027 goto e_inval; 1028 if (val) 1029 inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; 1030 else 1031 inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; 1032 break; 1033 case IP_TOS: /* This sets both TOS and Precedence */ 1034 __ip_sock_set_tos(sk, val); 1035 break; 1036 case IP_TTL: 1037 if (optlen < 1) 1038 goto e_inval; 1039 if (val != -1 && (val < 1 || val > 255)) 1040 goto e_inval; 1041 inet->uc_ttl = val; 1042 break; 1043 case IP_HDRINCL: 1044 if (sk->sk_type != SOCK_RAW) { 1045 err = -ENOPROTOOPT; 1046 break; 1047 } 1048 inet->hdrincl = val ? 1 : 0; 1049 break; 1050 case IP_NODEFRAG: 1051 if (sk->sk_type != SOCK_RAW) { 1052 err = -ENOPROTOOPT; 1053 break; 1054 } 1055 inet->nodefrag = val ? 1 : 0; 1056 break; 1057 case IP_BIND_ADDRESS_NO_PORT: 1058 inet->bind_address_no_port = val ? 1 : 0; 1059 break; 1060 case IP_MTU_DISCOVER: 1061 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) 1062 goto e_inval; 1063 inet->pmtudisc = val; 1064 break; 1065 case IP_RECVERR: 1066 inet->recverr = !!val; 1067 if (!val) 1068 skb_queue_purge(&sk->sk_error_queue); 1069 break; 1070 case IP_RECVERR_RFC4884: 1071 if (val < 0 || val > 1) 1072 goto e_inval; 1073 inet->recverr_rfc4884 = !!val; 1074 break; 1075 case IP_MULTICAST_TTL: 1076 if (sk->sk_type == SOCK_STREAM) 1077 goto e_inval; 1078 if (optlen < 1) 1079 goto e_inval; 1080 if (val == -1) 1081 val = 1; 1082 if (val < 0 || val > 255) 1083 goto e_inval; 1084 inet->mc_ttl = val; 1085 break; 1086 case IP_MULTICAST_LOOP: 1087 if (optlen < 1) 1088 goto e_inval; 1089 inet->mc_loop = !!val; 1090 break; 1091 case IP_UNICAST_IF: 1092 { 1093 struct net_device *dev = NULL; 1094 int ifindex; 1095 int midx; 1096 1097 if (optlen != sizeof(int)) 1098 goto e_inval; 1099 1100 ifindex = (__force int)ntohl((__force __be32)val); 1101 if (ifindex == 0) { 1102 inet->uc_index = 0; 1103 err = 0; 1104 break; 1105 } 1106 1107 dev = dev_get_by_index(sock_net(sk), ifindex); 1108 err = -EADDRNOTAVAIL; 1109 if (!dev) 1110 break; 1111 1112 midx = l3mdev_master_ifindex(dev); 1113 dev_put(dev); 1114 1115 err = -EINVAL; 1116 if (sk->sk_bound_dev_if && 1117 (!midx || midx != sk->sk_bound_dev_if)) 1118 break; 1119 1120 inet->uc_index = ifindex; 1121 err = 0; 1122 break; 1123 } 1124 case IP_MULTICAST_IF: 1125 { 1126 struct ip_mreqn mreq; 1127 struct net_device *dev = NULL; 1128 int midx; 1129 1130 if (sk->sk_type == SOCK_STREAM) 1131 goto e_inval; 1132 /* 1133 * Check the arguments are allowable 1134 */ 1135 1136 if (optlen < sizeof(struct in_addr)) 1137 goto e_inval; 1138 1139 err = -EFAULT; 1140 if (optlen >= sizeof(struct ip_mreqn)) { 1141 if (copy_from_user(&mreq, optval, sizeof(mreq))) 1142 break; 1143 } else { 1144 memset(&mreq, 0, sizeof(mreq)); 1145 if (optlen >= sizeof(struct ip_mreq)) { 1146 if (copy_from_user(&mreq, optval, 1147 sizeof(struct ip_mreq))) 1148 break; 1149 } else if (optlen >= sizeof(struct in_addr)) { 1150 if (copy_from_user(&mreq.imr_address, optval, 1151 sizeof(struct in_addr))) 1152 break; 1153 } 1154 } 1155 1156 if (!mreq.imr_ifindex) { 1157 if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { 1158 inet->mc_index = 0; 1159 inet->mc_addr = 0; 1160 err = 0; 1161 break; 1162 } 1163 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); 1164 if (dev) 1165 mreq.imr_ifindex = dev->ifindex; 1166 } else 1167 dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); 1168 1169 1170 err = -EADDRNOTAVAIL; 1171 if (!dev) 1172 break; 1173 1174 midx = l3mdev_master_ifindex(dev); 1175 1176 dev_put(dev); 1177 1178 err = -EINVAL; 1179 if (sk->sk_bound_dev_if && 1180 mreq.imr_ifindex != sk->sk_bound_dev_if && 1181 (!midx || midx != sk->sk_bound_dev_if)) 1182 break; 1183 1184 inet->mc_index = mreq.imr_ifindex; 1185 inet->mc_addr = mreq.imr_address.s_addr; 1186 err = 0; 1187 break; 1188 } 1189 1190 case IP_ADD_MEMBERSHIP: 1191 case IP_DROP_MEMBERSHIP: 1192 { 1193 struct ip_mreqn mreq; 1194 1195 err = -EPROTO; 1196 if (inet_sk(sk)->is_icsk) 1197 break; 1198 1199 if (optlen < sizeof(struct ip_mreq)) 1200 goto e_inval; 1201 err = -EFAULT; 1202 if (optlen >= sizeof(struct ip_mreqn)) { 1203 if (copy_from_user(&mreq, optval, sizeof(mreq))) 1204 break; 1205 } else { 1206 memset(&mreq, 0, sizeof(mreq)); 1207 if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) 1208 break; 1209 } 1210 1211 if (optname == IP_ADD_MEMBERSHIP) 1212 err = ip_mc_join_group(sk, &mreq); 1213 else 1214 err = ip_mc_leave_group(sk, &mreq); 1215 break; 1216 } 1217 case IP_MSFILTER: 1218 { 1219 struct ip_msfilter *msf; 1220 1221 if (optlen < IP_MSFILTER_SIZE(0)) 1222 goto e_inval; 1223 if (optlen > sysctl_optmem_max) { 1224 err = -ENOBUFS; 1225 break; 1226 } 1227 msf = memdup_user(optval, optlen); 1228 if (IS_ERR(msf)) { 1229 err = PTR_ERR(msf); 1230 break; 1231 } 1232 /* numsrc >= (1G-4) overflow in 32 bits */ 1233 if (msf->imsf_numsrc >= 0x3ffffffcU || 1234 msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { 1235 kfree(msf); 1236 err = -ENOBUFS; 1237 break; 1238 } 1239 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { 1240 kfree(msf); 1241 err = -EINVAL; 1242 break; 1243 } 1244 err = ip_mc_msfilter(sk, msf, 0); 1245 kfree(msf); 1246 break; 1247 } 1248 case IP_BLOCK_SOURCE: 1249 case IP_UNBLOCK_SOURCE: 1250 case IP_ADD_SOURCE_MEMBERSHIP: 1251 case IP_DROP_SOURCE_MEMBERSHIP: 1252 { 1253 struct ip_mreq_source mreqs; 1254 int omode, add; 1255 1256 if (optlen != sizeof(struct ip_mreq_source)) 1257 goto e_inval; 1258 if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { 1259 err = -EFAULT; 1260 break; 1261 } 1262 if (optname == IP_BLOCK_SOURCE) { 1263 omode = MCAST_EXCLUDE; 1264 add = 1; 1265 } else if (optname == IP_UNBLOCK_SOURCE) { 1266 omode = MCAST_EXCLUDE; 1267 add = 0; 1268 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { 1269 struct ip_mreqn mreq; 1270 1271 mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; 1272 mreq.imr_address.s_addr = mreqs.imr_interface; 1273 mreq.imr_ifindex = 0; 1274 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); 1275 if (err && err != -EADDRINUSE) 1276 break; 1277 omode = MCAST_INCLUDE; 1278 add = 1; 1279 } else /* IP_DROP_SOURCE_MEMBERSHIP */ { 1280 omode = MCAST_INCLUDE; 1281 add = 0; 1282 } 1283 err = ip_mc_source(add, omode, sk, &mreqs, 0); 1284 break; 1285 } 1286 case MCAST_JOIN_GROUP: 1287 case MCAST_LEAVE_GROUP: 1288 if (in_compat_syscall()) 1289 err = compat_ip_mcast_join_leave(sk, optname, optval, 1290 optlen); 1291 else 1292 err = ip_mcast_join_leave(sk, optname, optval, optlen); 1293 break; 1294 case MCAST_JOIN_SOURCE_GROUP: 1295 case MCAST_LEAVE_SOURCE_GROUP: 1296 case MCAST_BLOCK_SOURCE: 1297 case MCAST_UNBLOCK_SOURCE: 1298 err = do_mcast_group_source(sk, optname, optval, optlen); 1299 break; 1300 case MCAST_MSFILTER: 1301 if (in_compat_syscall()) 1302 err = compat_ip_set_mcast_msfilter(sk, optval, optlen); 1303 else 1304 err = ip_set_mcast_msfilter(sk, optval, optlen); 1305 break; 1306 case IP_MULTICAST_ALL: 1307 if (optlen < 1) 1308 goto e_inval; 1309 if (val != 0 && val != 1) 1310 goto e_inval; 1311 inet->mc_all = val; 1312 break; 1313 1314 case IP_FREEBIND: 1315 if (optlen < 1) 1316 goto e_inval; 1317 inet->freebind = !!val; 1318 break; 1319 1320 case IP_IPSEC_POLICY: 1321 case IP_XFRM_POLICY: 1322 err = -EPERM; 1323 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1324 break; 1325 err = xfrm_user_policy(sk, optname, USER_SOCKPTR(optval), 1326 optlen); 1327 break; 1328 1329 case IP_TRANSPARENT: 1330 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 1331 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1332 err = -EPERM; 1333 break; 1334 } 1335 if (optlen < 1) 1336 goto e_inval; 1337 inet->transparent = !!val; 1338 break; 1339 1340 case IP_MINTTL: 1341 if (optlen < 1) 1342 goto e_inval; 1343 if (val < 0 || val > 255) 1344 goto e_inval; 1345 inet->min_ttl = val; 1346 break; 1347 1348 default: 1349 err = -ENOPROTOOPT; 1350 break; 1351 } 1352 release_sock(sk); 1353 if (needs_rtnl) 1354 rtnl_unlock(); 1355 return err; 1356 1357 e_inval: 1358 release_sock(sk); 1359 if (needs_rtnl) 1360 rtnl_unlock(); 1361 return -EINVAL; 1362 } 1363 1364 /** 1365 * ipv4_pktinfo_prepare - transfer some info from rtable to skb 1366 * @sk: socket 1367 * @skb: buffer 1368 * 1369 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific 1370 * destination in skb->cb[] before dst drop. 1371 * This way, receiver doesn't make cache line misses to read rtable. 1372 */ 1373 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) 1374 { 1375 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); 1376 bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || 1377 ipv6_sk_rxinfo(sk); 1378 1379 if (prepare && skb_rtable(skb)) { 1380 /* skb->cb is overloaded: prior to this point it is IP{6}CB 1381 * which has interface index (iif) as the first member of the 1382 * underlying inet{6}_skb_parm struct. This code then overlays 1383 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first 1384 * element so the iif is picked up from the prior IPCB. If iif 1385 * is the loopback interface, then return the sending interface 1386 * (e.g., process binds socket to eth0 for Tx which is 1387 * redirected to loopback in the rtable/dst). 1388 */ 1389 struct rtable *rt = skb_rtable(skb); 1390 bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); 1391 1392 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) 1393 pktinfo->ipi_ifindex = inet_iif(skb); 1394 else if (l3slave && rt && rt->rt_iif) 1395 pktinfo->ipi_ifindex = rt->rt_iif; 1396 1397 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); 1398 } else { 1399 pktinfo->ipi_ifindex = 0; 1400 pktinfo->ipi_spec_dst.s_addr = 0; 1401 } 1402 skb_dst_drop(skb); 1403 } 1404 1405 int ip_setsockopt(struct sock *sk, int level, 1406 int optname, char __user *optval, unsigned int optlen) 1407 { 1408 int err; 1409 1410 if (level != SOL_IP) 1411 return -ENOPROTOOPT; 1412 1413 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1414 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1415 if (optname >= BPFILTER_IPT_SO_SET_REPLACE && 1416 optname < BPFILTER_IPT_SET_MAX) 1417 err = bpfilter_ip_set_sockopt(sk, optname, USER_SOCKPTR(optval), 1418 optlen); 1419 #endif 1420 #ifdef CONFIG_NETFILTER 1421 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1422 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1423 optname != IP_IPSEC_POLICY && 1424 optname != IP_XFRM_POLICY && 1425 !ip_mroute_opt(optname)) 1426 err = nf_setsockopt(sk, PF_INET, optname, USER_SOCKPTR(optval), 1427 optlen); 1428 #endif 1429 return err; 1430 } 1431 EXPORT_SYMBOL(ip_setsockopt); 1432 1433 /* 1434 * Get the options. Note for future reference. The GET of IP options gets 1435 * the _received_ ones. The set sets the _sent_ ones. 1436 */ 1437 1438 static bool getsockopt_needs_rtnl(int optname) 1439 { 1440 switch (optname) { 1441 case IP_MSFILTER: 1442 case MCAST_MSFILTER: 1443 return true; 1444 } 1445 return false; 1446 } 1447 1448 static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, 1449 int __user *optlen, int len) 1450 { 1451 const int size0 = offsetof(struct group_filter, gf_slist); 1452 struct group_filter __user *p = optval; 1453 struct group_filter gsf; 1454 int num; 1455 int err; 1456 1457 if (len < size0) 1458 return -EINVAL; 1459 if (copy_from_user(&gsf, p, size0)) 1460 return -EFAULT; 1461 1462 num = gsf.gf_numsrc; 1463 err = ip_mc_gsfget(sk, &gsf, p->gf_slist); 1464 if (err) 1465 return err; 1466 if (gsf.gf_numsrc < num) 1467 num = gsf.gf_numsrc; 1468 if (put_user(GROUP_FILTER_SIZE(num), optlen) || 1469 copy_to_user(p, &gsf, size0)) 1470 return -EFAULT; 1471 return 0; 1472 } 1473 1474 static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, 1475 int __user *optlen, int len) 1476 { 1477 const int size0 = offsetof(struct compat_group_filter, gf_slist); 1478 struct compat_group_filter __user *p = optval; 1479 struct compat_group_filter gf32; 1480 struct group_filter gf; 1481 int num; 1482 int err; 1483 1484 if (len < size0) 1485 return -EINVAL; 1486 if (copy_from_user(&gf32, p, size0)) 1487 return -EFAULT; 1488 1489 gf.gf_interface = gf32.gf_interface; 1490 gf.gf_fmode = gf32.gf_fmode; 1491 num = gf.gf_numsrc = gf32.gf_numsrc; 1492 gf.gf_group = gf32.gf_group; 1493 1494 err = ip_mc_gsfget(sk, &gf, p->gf_slist); 1495 if (err) 1496 return err; 1497 if (gf.gf_numsrc < num) 1498 num = gf.gf_numsrc; 1499 len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); 1500 if (put_user(len, optlen) || 1501 put_user(gf.gf_fmode, &p->gf_fmode) || 1502 put_user(gf.gf_numsrc, &p->gf_numsrc)) 1503 return -EFAULT; 1504 return 0; 1505 } 1506 1507 static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1508 char __user *optval, int __user *optlen) 1509 { 1510 struct inet_sock *inet = inet_sk(sk); 1511 bool needs_rtnl = getsockopt_needs_rtnl(optname); 1512 int val, err = 0; 1513 int len; 1514 1515 if (level != SOL_IP) 1516 return -EOPNOTSUPP; 1517 1518 if (ip_mroute_opt(optname)) 1519 return ip_mroute_getsockopt(sk, optname, optval, optlen); 1520 1521 if (get_user(len, optlen)) 1522 return -EFAULT; 1523 if (len < 0) 1524 return -EINVAL; 1525 1526 if (needs_rtnl) 1527 rtnl_lock(); 1528 lock_sock(sk); 1529 1530 switch (optname) { 1531 case IP_OPTIONS: 1532 { 1533 unsigned char optbuf[sizeof(struct ip_options)+40]; 1534 struct ip_options *opt = (struct ip_options *)optbuf; 1535 struct ip_options_rcu *inet_opt; 1536 1537 inet_opt = rcu_dereference_protected(inet->inet_opt, 1538 lockdep_sock_is_held(sk)); 1539 opt->optlen = 0; 1540 if (inet_opt) 1541 memcpy(optbuf, &inet_opt->opt, 1542 sizeof(struct ip_options) + 1543 inet_opt->opt.optlen); 1544 release_sock(sk); 1545 1546 if (opt->optlen == 0) 1547 return put_user(0, optlen); 1548 1549 ip_options_undo(opt); 1550 1551 len = min_t(unsigned int, len, opt->optlen); 1552 if (put_user(len, optlen)) 1553 return -EFAULT; 1554 if (copy_to_user(optval, opt->__data, len)) 1555 return -EFAULT; 1556 return 0; 1557 } 1558 case IP_PKTINFO: 1559 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; 1560 break; 1561 case IP_RECVTTL: 1562 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; 1563 break; 1564 case IP_RECVTOS: 1565 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; 1566 break; 1567 case IP_RECVOPTS: 1568 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; 1569 break; 1570 case IP_RETOPTS: 1571 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; 1572 break; 1573 case IP_PASSSEC: 1574 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; 1575 break; 1576 case IP_RECVORIGDSTADDR: 1577 val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; 1578 break; 1579 case IP_CHECKSUM: 1580 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; 1581 break; 1582 case IP_RECVFRAGSIZE: 1583 val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; 1584 break; 1585 case IP_TOS: 1586 val = inet->tos; 1587 break; 1588 case IP_TTL: 1589 { 1590 struct net *net = sock_net(sk); 1591 val = (inet->uc_ttl == -1 ? 1592 net->ipv4.sysctl_ip_default_ttl : 1593 inet->uc_ttl); 1594 break; 1595 } 1596 case IP_HDRINCL: 1597 val = inet->hdrincl; 1598 break; 1599 case IP_NODEFRAG: 1600 val = inet->nodefrag; 1601 break; 1602 case IP_BIND_ADDRESS_NO_PORT: 1603 val = inet->bind_address_no_port; 1604 break; 1605 case IP_MTU_DISCOVER: 1606 val = inet->pmtudisc; 1607 break; 1608 case IP_MTU: 1609 { 1610 struct dst_entry *dst; 1611 val = 0; 1612 dst = sk_dst_get(sk); 1613 if (dst) { 1614 val = dst_mtu(dst); 1615 dst_release(dst); 1616 } 1617 if (!val) { 1618 release_sock(sk); 1619 return -ENOTCONN; 1620 } 1621 break; 1622 } 1623 case IP_RECVERR: 1624 val = inet->recverr; 1625 break; 1626 case IP_RECVERR_RFC4884: 1627 val = inet->recverr_rfc4884; 1628 break; 1629 case IP_MULTICAST_TTL: 1630 val = inet->mc_ttl; 1631 break; 1632 case IP_MULTICAST_LOOP: 1633 val = inet->mc_loop; 1634 break; 1635 case IP_UNICAST_IF: 1636 val = (__force int)htonl((__u32) inet->uc_index); 1637 break; 1638 case IP_MULTICAST_IF: 1639 { 1640 struct in_addr addr; 1641 len = min_t(unsigned int, len, sizeof(struct in_addr)); 1642 addr.s_addr = inet->mc_addr; 1643 release_sock(sk); 1644 1645 if (put_user(len, optlen)) 1646 return -EFAULT; 1647 if (copy_to_user(optval, &addr, len)) 1648 return -EFAULT; 1649 return 0; 1650 } 1651 case IP_MSFILTER: 1652 { 1653 struct ip_msfilter msf; 1654 1655 if (len < IP_MSFILTER_SIZE(0)) { 1656 err = -EINVAL; 1657 goto out; 1658 } 1659 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { 1660 err = -EFAULT; 1661 goto out; 1662 } 1663 err = ip_mc_msfget(sk, &msf, 1664 (struct ip_msfilter __user *)optval, optlen); 1665 goto out; 1666 } 1667 case MCAST_MSFILTER: 1668 if (in_compat_syscall()) 1669 err = compat_ip_get_mcast_msfilter(sk, optval, optlen, 1670 len); 1671 else 1672 err = ip_get_mcast_msfilter(sk, optval, optlen, len); 1673 goto out; 1674 case IP_MULTICAST_ALL: 1675 val = inet->mc_all; 1676 break; 1677 case IP_PKTOPTIONS: 1678 { 1679 struct msghdr msg; 1680 1681 release_sock(sk); 1682 1683 if (sk->sk_type != SOCK_STREAM) 1684 return -ENOPROTOOPT; 1685 1686 msg.msg_control_is_user = true; 1687 msg.msg_control_user = optval; 1688 msg.msg_controllen = len; 1689 msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; 1690 1691 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1692 struct in_pktinfo info; 1693 1694 info.ipi_addr.s_addr = inet->inet_rcv_saddr; 1695 info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; 1696 info.ipi_ifindex = inet->mc_index; 1697 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1698 } 1699 if (inet->cmsg_flags & IP_CMSG_TTL) { 1700 int hlim = inet->mc_ttl; 1701 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); 1702 } 1703 if (inet->cmsg_flags & IP_CMSG_TOS) { 1704 int tos = inet->rcv_tos; 1705 put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); 1706 } 1707 len -= msg.msg_controllen; 1708 return put_user(len, optlen); 1709 } 1710 case IP_FREEBIND: 1711 val = inet->freebind; 1712 break; 1713 case IP_TRANSPARENT: 1714 val = inet->transparent; 1715 break; 1716 case IP_MINTTL: 1717 val = inet->min_ttl; 1718 break; 1719 default: 1720 release_sock(sk); 1721 return -ENOPROTOOPT; 1722 } 1723 release_sock(sk); 1724 1725 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1726 unsigned char ucval = (unsigned char)val; 1727 len = 1; 1728 if (put_user(len, optlen)) 1729 return -EFAULT; 1730 if (copy_to_user(optval, &ucval, 1)) 1731 return -EFAULT; 1732 } else { 1733 len = min_t(unsigned int, sizeof(int), len); 1734 if (put_user(len, optlen)) 1735 return -EFAULT; 1736 if (copy_to_user(optval, &val, len)) 1737 return -EFAULT; 1738 } 1739 return 0; 1740 1741 out: 1742 release_sock(sk); 1743 if (needs_rtnl) 1744 rtnl_unlock(); 1745 return err; 1746 } 1747 1748 int ip_getsockopt(struct sock *sk, int level, 1749 int optname, char __user *optval, int __user *optlen) 1750 { 1751 int err; 1752 1753 err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1754 1755 #if IS_ENABLED(CONFIG_BPFILTER_UMH) 1756 if (optname >= BPFILTER_IPT_SO_GET_INFO && 1757 optname < BPFILTER_IPT_GET_MAX) 1758 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1759 #endif 1760 #ifdef CONFIG_NETFILTER 1761 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1762 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1763 !ip_mroute_opt(optname)) { 1764 int len; 1765 1766 if (get_user(len, optlen)) 1767 return -EFAULT; 1768 1769 err = nf_getsockopt(sk, PF_INET, optname, optval, &len); 1770 if (err >= 0) 1771 err = put_user(len, optlen); 1772 return err; 1773 } 1774 #endif 1775 return err; 1776 } 1777 EXPORT_SYMBOL(ip_getsockopt); 1778