1 /* 2 * inet_diag.c Module for monitoring INET transport protocols sockets. 3 * 4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/fcntl.h> 16 #include <linux/random.h> 17 #include <linux/slab.h> 18 #include <linux/cache.h> 19 #include <linux/init.h> 20 #include <linux/time.h> 21 22 #include <net/icmp.h> 23 #include <net/tcp.h> 24 #include <net/ipv6.h> 25 #include <net/inet_common.h> 26 #include <net/inet_connection_sock.h> 27 #include <net/inet_hashtables.h> 28 #include <net/inet_timewait_sock.h> 29 #include <net/inet6_hashtables.h> 30 #include <net/netlink.h> 31 32 #include <linux/inet.h> 33 #include <linux/stddef.h> 34 35 #include <linux/inet_diag.h> 36 #include <linux/sock_diag.h> 37 38 static const struct inet_diag_handler **inet_diag_table; 39 40 struct inet_diag_entry { 41 const __be32 *saddr; 42 const __be32 *daddr; 43 u16 sport; 44 u16 dport; 45 u16 family; 46 u16 userlocks; 47 u32 ifindex; 48 u32 mark; 49 }; 50 51 static DEFINE_MUTEX(inet_diag_table_mutex); 52 53 static const struct inet_diag_handler *inet_diag_lock_handler(int proto) 54 { 55 if (!inet_diag_table[proto]) 56 request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, 57 NETLINK_SOCK_DIAG, AF_INET, proto); 58 59 mutex_lock(&inet_diag_table_mutex); 60 if (!inet_diag_table[proto]) 61 return ERR_PTR(-ENOENT); 62 63 return inet_diag_table[proto]; 64 } 65 66 static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) 67 { 68 mutex_unlock(&inet_diag_table_mutex); 69 } 70 71 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) 72 { 73 r->idiag_family = sk->sk_family; 74 75 r->id.idiag_sport = htons(sk->sk_num); 76 r->id.idiag_dport = sk->sk_dport; 77 r->id.idiag_if = sk->sk_bound_dev_if; 78 sock_diag_save_cookie(sk, r->id.idiag_cookie); 79 80 #if IS_ENABLED(CONFIG_IPV6) 81 if (sk->sk_family == AF_INET6) { 82 *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; 83 *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; 84 } else 85 #endif 86 { 87 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); 88 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); 89 90 r->id.idiag_src[0] = sk->sk_rcv_saddr; 91 r->id.idiag_dst[0] = sk->sk_daddr; 92 } 93 } 94 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); 95 96 static size_t inet_sk_attr_size(struct sock *sk, 97 const struct inet_diag_req_v2 *req, 98 bool net_admin) 99 { 100 const struct inet_diag_handler *handler; 101 size_t aux = 0; 102 103 handler = inet_diag_table[req->sdiag_protocol]; 104 if (handler && handler->idiag_get_aux_size) 105 aux = handler->idiag_get_aux_size(sk, net_admin); 106 107 return nla_total_size(sizeof(struct tcp_info)) 108 + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ 109 + nla_total_size(1) /* INET_DIAG_TOS */ 110 + nla_total_size(1) /* INET_DIAG_TCLASS */ 111 + nla_total_size(4) /* INET_DIAG_MARK */ 112 + nla_total_size(sizeof(struct inet_diag_meminfo)) 113 + nla_total_size(sizeof(struct inet_diag_msg)) 114 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) 115 + nla_total_size(TCP_CA_NAME_MAX) 116 + nla_total_size(sizeof(struct tcpvegas_info)) 117 + aux 118 + 64; 119 } 120 121 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, 122 struct inet_diag_msg *r, int ext, 123 struct user_namespace *user_ns, 124 bool net_admin) 125 { 126 const struct inet_sock *inet = inet_sk(sk); 127 128 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) 129 goto errout; 130 131 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, 132 * hence this needs to be included regardless of socket family. 133 */ 134 if (ext & (1 << (INET_DIAG_TOS - 1))) 135 if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) 136 goto errout; 137 138 #if IS_ENABLED(CONFIG_IPV6) 139 if (r->idiag_family == AF_INET6) { 140 if (ext & (1 << (INET_DIAG_TCLASS - 1))) 141 if (nla_put_u8(skb, INET_DIAG_TCLASS, 142 inet6_sk(sk)->tclass) < 0) 143 goto errout; 144 145 if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && 146 nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) 147 goto errout; 148 } 149 #endif 150 151 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) 152 goto errout; 153 154 r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); 155 r->idiag_inode = sock_i_ino(sk); 156 157 return 0; 158 errout: 159 return 1; 160 } 161 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); 162 163 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, 164 struct sk_buff *skb, const struct inet_diag_req_v2 *req, 165 struct user_namespace *user_ns, 166 u32 portid, u32 seq, u16 nlmsg_flags, 167 const struct nlmsghdr *unlh, 168 bool net_admin) 169 { 170 const struct tcp_congestion_ops *ca_ops; 171 const struct inet_diag_handler *handler; 172 int ext = req->idiag_ext; 173 struct inet_diag_msg *r; 174 struct nlmsghdr *nlh; 175 struct nlattr *attr; 176 void *info = NULL; 177 178 handler = inet_diag_table[req->sdiag_protocol]; 179 BUG_ON(!handler); 180 181 nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), 182 nlmsg_flags); 183 if (!nlh) 184 return -EMSGSIZE; 185 186 r = nlmsg_data(nlh); 187 BUG_ON(!sk_fullsock(sk)); 188 189 inet_diag_msg_common_fill(r, sk); 190 r->idiag_state = sk->sk_state; 191 r->idiag_timer = 0; 192 r->idiag_retrans = 0; 193 194 if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) 195 goto errout; 196 197 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { 198 struct inet_diag_meminfo minfo = { 199 .idiag_rmem = sk_rmem_alloc_get(sk), 200 .idiag_wmem = sk->sk_wmem_queued, 201 .idiag_fmem = sk->sk_forward_alloc, 202 .idiag_tmem = sk_wmem_alloc_get(sk), 203 }; 204 205 if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0) 206 goto errout; 207 } 208 209 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) 210 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) 211 goto errout; 212 213 /* 214 * RAW sockets might have user-defined protocols assigned, 215 * so report the one supplied on socket creation. 216 */ 217 if (sk->sk_type == SOCK_RAW) { 218 if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) 219 goto errout; 220 } 221 222 if (!icsk) { 223 handler->idiag_get_info(sk, r, NULL); 224 goto out; 225 } 226 227 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 228 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 229 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 230 r->idiag_timer = 1; 231 r->idiag_retrans = icsk->icsk_retransmits; 232 r->idiag_expires = 233 jiffies_to_msecs(icsk->icsk_timeout - jiffies); 234 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 235 r->idiag_timer = 4; 236 r->idiag_retrans = icsk->icsk_probes_out; 237 r->idiag_expires = 238 jiffies_to_msecs(icsk->icsk_timeout - jiffies); 239 } else if (timer_pending(&sk->sk_timer)) { 240 r->idiag_timer = 2; 241 r->idiag_retrans = icsk->icsk_probes_out; 242 r->idiag_expires = 243 jiffies_to_msecs(sk->sk_timer.expires - jiffies); 244 } else { 245 r->idiag_timer = 0; 246 r->idiag_expires = 0; 247 } 248 249 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { 250 attr = nla_reserve_64bit(skb, INET_DIAG_INFO, 251 handler->idiag_info_size, 252 INET_DIAG_PAD); 253 if (!attr) 254 goto errout; 255 256 info = nla_data(attr); 257 } 258 259 if (ext & (1 << (INET_DIAG_CONG - 1))) { 260 int err = 0; 261 262 rcu_read_lock(); 263 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 264 if (ca_ops) 265 err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); 266 rcu_read_unlock(); 267 if (err < 0) 268 goto errout; 269 } 270 271 handler->idiag_get_info(sk, r, info); 272 273 if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux) 274 if (handler->idiag_get_aux(sk, net_admin, skb) < 0) 275 goto errout; 276 277 if (sk->sk_state < TCP_TIME_WAIT) { 278 union tcp_cc_info info; 279 size_t sz = 0; 280 int attr; 281 282 rcu_read_lock(); 283 ca_ops = READ_ONCE(icsk->icsk_ca_ops); 284 if (ca_ops && ca_ops->get_info) 285 sz = ca_ops->get_info(sk, ext, &attr, &info); 286 rcu_read_unlock(); 287 if (sz && nla_put(skb, attr, sz, &info) < 0) 288 goto errout; 289 } 290 291 if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { 292 u32 classid = 0; 293 294 #ifdef CONFIG_SOCK_CGROUP_DATA 295 classid = sock_cgroup_classid(&sk->sk_cgrp_data); 296 #endif 297 298 if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) 299 goto errout; 300 } 301 302 out: 303 nlmsg_end(skb, nlh); 304 return 0; 305 306 errout: 307 nlmsg_cancel(skb, nlh); 308 return -EMSGSIZE; 309 } 310 EXPORT_SYMBOL_GPL(inet_sk_diag_fill); 311 312 static int inet_csk_diag_fill(struct sock *sk, 313 struct sk_buff *skb, 314 const struct inet_diag_req_v2 *req, 315 struct user_namespace *user_ns, 316 u32 portid, u32 seq, u16 nlmsg_flags, 317 const struct nlmsghdr *unlh, 318 bool net_admin) 319 { 320 return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, 321 portid, seq, nlmsg_flags, unlh, net_admin); 322 } 323 324 static int inet_twsk_diag_fill(struct sock *sk, 325 struct sk_buff *skb, 326 u32 portid, u32 seq, u16 nlmsg_flags, 327 const struct nlmsghdr *unlh) 328 { 329 struct inet_timewait_sock *tw = inet_twsk(sk); 330 struct inet_diag_msg *r; 331 struct nlmsghdr *nlh; 332 long tmo; 333 334 nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), 335 nlmsg_flags); 336 if (!nlh) 337 return -EMSGSIZE; 338 339 r = nlmsg_data(nlh); 340 BUG_ON(tw->tw_state != TCP_TIME_WAIT); 341 342 tmo = tw->tw_timer.expires - jiffies; 343 if (tmo < 0) 344 tmo = 0; 345 346 inet_diag_msg_common_fill(r, sk); 347 r->idiag_retrans = 0; 348 349 r->idiag_state = tw->tw_substate; 350 r->idiag_timer = 3; 351 r->idiag_expires = jiffies_to_msecs(tmo); 352 r->idiag_rqueue = 0; 353 r->idiag_wqueue = 0; 354 r->idiag_uid = 0; 355 r->idiag_inode = 0; 356 357 nlmsg_end(skb, nlh); 358 return 0; 359 } 360 361 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, 362 u32 portid, u32 seq, u16 nlmsg_flags, 363 const struct nlmsghdr *unlh, bool net_admin) 364 { 365 struct request_sock *reqsk = inet_reqsk(sk); 366 struct inet_diag_msg *r; 367 struct nlmsghdr *nlh; 368 long tmo; 369 370 nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), 371 nlmsg_flags); 372 if (!nlh) 373 return -EMSGSIZE; 374 375 r = nlmsg_data(nlh); 376 inet_diag_msg_common_fill(r, sk); 377 r->idiag_state = TCP_SYN_RECV; 378 r->idiag_timer = 1; 379 r->idiag_retrans = reqsk->num_retrans; 380 381 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 382 offsetof(struct sock, sk_cookie)); 383 384 tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; 385 r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; 386 r->idiag_rqueue = 0; 387 r->idiag_wqueue = 0; 388 r->idiag_uid = 0; 389 r->idiag_inode = 0; 390 391 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, 392 inet_rsk(reqsk)->ir_mark)) 393 return -EMSGSIZE; 394 395 nlmsg_end(skb, nlh); 396 return 0; 397 } 398 399 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 400 const struct inet_diag_req_v2 *r, 401 struct user_namespace *user_ns, 402 u32 portid, u32 seq, u16 nlmsg_flags, 403 const struct nlmsghdr *unlh, bool net_admin) 404 { 405 if (sk->sk_state == TCP_TIME_WAIT) 406 return inet_twsk_diag_fill(sk, skb, portid, seq, 407 nlmsg_flags, unlh); 408 409 if (sk->sk_state == TCP_NEW_SYN_RECV) 410 return inet_req_diag_fill(sk, skb, portid, seq, 411 nlmsg_flags, unlh, net_admin); 412 413 return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, 414 nlmsg_flags, unlh, net_admin); 415 } 416 417 struct sock *inet_diag_find_one_icsk(struct net *net, 418 struct inet_hashinfo *hashinfo, 419 const struct inet_diag_req_v2 *req) 420 { 421 struct sock *sk; 422 423 rcu_read_lock(); 424 if (req->sdiag_family == AF_INET) 425 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], 426 req->id.idiag_dport, req->id.idiag_src[0], 427 req->id.idiag_sport, req->id.idiag_if); 428 #if IS_ENABLED(CONFIG_IPV6) 429 else if (req->sdiag_family == AF_INET6) { 430 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && 431 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) 432 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3], 433 req->id.idiag_dport, req->id.idiag_src[3], 434 req->id.idiag_sport, req->id.idiag_if); 435 else 436 sk = inet6_lookup(net, hashinfo, NULL, 0, 437 (struct in6_addr *)req->id.idiag_dst, 438 req->id.idiag_dport, 439 (struct in6_addr *)req->id.idiag_src, 440 req->id.idiag_sport, 441 req->id.idiag_if); 442 } 443 #endif 444 else { 445 rcu_read_unlock(); 446 return ERR_PTR(-EINVAL); 447 } 448 rcu_read_unlock(); 449 if (!sk) 450 return ERR_PTR(-ENOENT); 451 452 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { 453 sock_gen_put(sk); 454 return ERR_PTR(-ENOENT); 455 } 456 457 return sk; 458 } 459 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); 460 461 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, 462 struct sk_buff *in_skb, 463 const struct nlmsghdr *nlh, 464 const struct inet_diag_req_v2 *req) 465 { 466 bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); 467 struct net *net = sock_net(in_skb->sk); 468 struct sk_buff *rep; 469 struct sock *sk; 470 int err; 471 472 sk = inet_diag_find_one_icsk(net, hashinfo, req); 473 if (IS_ERR(sk)) 474 return PTR_ERR(sk); 475 476 rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); 477 if (!rep) { 478 err = -ENOMEM; 479 goto out; 480 } 481 482 err = sk_diag_fill(sk, rep, req, 483 sk_user_ns(NETLINK_CB(in_skb).sk), 484 NETLINK_CB(in_skb).portid, 485 nlh->nlmsg_seq, 0, nlh, net_admin); 486 if (err < 0) { 487 WARN_ON(err == -EMSGSIZE); 488 nlmsg_free(rep); 489 goto out; 490 } 491 err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, 492 MSG_DONTWAIT); 493 if (err > 0) 494 err = 0; 495 496 out: 497 if (sk) 498 sock_gen_put(sk); 499 500 return err; 501 } 502 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); 503 504 static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, 505 const struct nlmsghdr *nlh, 506 const struct inet_diag_req_v2 *req) 507 { 508 const struct inet_diag_handler *handler; 509 int err; 510 511 handler = inet_diag_lock_handler(req->sdiag_protocol); 512 if (IS_ERR(handler)) 513 err = PTR_ERR(handler); 514 else if (cmd == SOCK_DIAG_BY_FAMILY) 515 err = handler->dump_one(in_skb, nlh, req); 516 else if (cmd == SOCK_DESTROY && handler->destroy) 517 err = handler->destroy(in_skb, req); 518 else 519 err = -EOPNOTSUPP; 520 inet_diag_unlock_handler(handler); 521 522 return err; 523 } 524 525 static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) 526 { 527 int words = bits >> 5; 528 529 bits &= 0x1f; 530 531 if (words) { 532 if (memcmp(a1, a2, words << 2)) 533 return 0; 534 } 535 if (bits) { 536 __be32 w1, w2; 537 __be32 mask; 538 539 w1 = a1[words]; 540 w2 = a2[words]; 541 542 mask = htonl((0xffffffff) << (32 - bits)); 543 544 if ((w1 ^ w2) & mask) 545 return 0; 546 } 547 548 return 1; 549 } 550 551 static int inet_diag_bc_run(const struct nlattr *_bc, 552 const struct inet_diag_entry *entry) 553 { 554 const void *bc = nla_data(_bc); 555 int len = nla_len(_bc); 556 557 while (len > 0) { 558 int yes = 1; 559 const struct inet_diag_bc_op *op = bc; 560 561 switch (op->code) { 562 case INET_DIAG_BC_NOP: 563 break; 564 case INET_DIAG_BC_JMP: 565 yes = 0; 566 break; 567 case INET_DIAG_BC_S_GE: 568 yes = entry->sport >= op[1].no; 569 break; 570 case INET_DIAG_BC_S_LE: 571 yes = entry->sport <= op[1].no; 572 break; 573 case INET_DIAG_BC_D_GE: 574 yes = entry->dport >= op[1].no; 575 break; 576 case INET_DIAG_BC_D_LE: 577 yes = entry->dport <= op[1].no; 578 break; 579 case INET_DIAG_BC_AUTO: 580 yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); 581 break; 582 case INET_DIAG_BC_S_COND: 583 case INET_DIAG_BC_D_COND: { 584 const struct inet_diag_hostcond *cond; 585 const __be32 *addr; 586 587 cond = (const struct inet_diag_hostcond *)(op + 1); 588 if (cond->port != -1 && 589 cond->port != (op->code == INET_DIAG_BC_S_COND ? 590 entry->sport : entry->dport)) { 591 yes = 0; 592 break; 593 } 594 595 if (op->code == INET_DIAG_BC_S_COND) 596 addr = entry->saddr; 597 else 598 addr = entry->daddr; 599 600 if (cond->family != AF_UNSPEC && 601 cond->family != entry->family) { 602 if (entry->family == AF_INET6 && 603 cond->family == AF_INET) { 604 if (addr[0] == 0 && addr[1] == 0 && 605 addr[2] == htonl(0xffff) && 606 bitstring_match(addr + 3, 607 cond->addr, 608 cond->prefix_len)) 609 break; 610 } 611 yes = 0; 612 break; 613 } 614 615 if (cond->prefix_len == 0) 616 break; 617 if (bitstring_match(addr, cond->addr, 618 cond->prefix_len)) 619 break; 620 yes = 0; 621 break; 622 } 623 case INET_DIAG_BC_DEV_COND: { 624 u32 ifindex; 625 626 ifindex = *((const u32 *)(op + 1)); 627 if (ifindex != entry->ifindex) 628 yes = 0; 629 break; 630 } 631 case INET_DIAG_BC_MARK_COND: { 632 struct inet_diag_markcond *cond; 633 634 cond = (struct inet_diag_markcond *)(op + 1); 635 if ((entry->mark & cond->mask) != cond->mark) 636 yes = 0; 637 break; 638 } 639 } 640 641 if (yes) { 642 len -= op->yes; 643 bc += op->yes; 644 } else { 645 len -= op->no; 646 bc += op->no; 647 } 648 } 649 return len == 0; 650 } 651 652 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) 653 */ 654 static void entry_fill_addrs(struct inet_diag_entry *entry, 655 const struct sock *sk) 656 { 657 #if IS_ENABLED(CONFIG_IPV6) 658 if (sk->sk_family == AF_INET6) { 659 entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; 660 entry->daddr = sk->sk_v6_daddr.s6_addr32; 661 } else 662 #endif 663 { 664 entry->saddr = &sk->sk_rcv_saddr; 665 entry->daddr = &sk->sk_daddr; 666 } 667 } 668 669 int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) 670 { 671 struct inet_sock *inet = inet_sk(sk); 672 struct inet_diag_entry entry; 673 674 if (!bc) 675 return 1; 676 677 entry.family = sk->sk_family; 678 entry_fill_addrs(&entry, sk); 679 entry.sport = inet->inet_num; 680 entry.dport = ntohs(inet->inet_dport); 681 entry.ifindex = sk->sk_bound_dev_if; 682 entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; 683 if (sk_fullsock(sk)) 684 entry.mark = sk->sk_mark; 685 else if (sk->sk_state == TCP_NEW_SYN_RECV) 686 entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 687 else 688 entry.mark = 0; 689 690 return inet_diag_bc_run(bc, &entry); 691 } 692 EXPORT_SYMBOL_GPL(inet_diag_bc_sk); 693 694 static int valid_cc(const void *bc, int len, int cc) 695 { 696 while (len >= 0) { 697 const struct inet_diag_bc_op *op = bc; 698 699 if (cc > len) 700 return 0; 701 if (cc == len) 702 return 1; 703 if (op->yes < 4 || op->yes & 3) 704 return 0; 705 len -= op->yes; 706 bc += op->yes; 707 } 708 return 0; 709 } 710 711 /* data is u32 ifindex */ 712 static bool valid_devcond(const struct inet_diag_bc_op *op, int len, 713 int *min_len) 714 { 715 /* Check ifindex space. */ 716 *min_len += sizeof(u32); 717 if (len < *min_len) 718 return false; 719 720 return true; 721 } 722 /* Validate an inet_diag_hostcond. */ 723 static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, 724 int *min_len) 725 { 726 struct inet_diag_hostcond *cond; 727 int addr_len; 728 729 /* Check hostcond space. */ 730 *min_len += sizeof(struct inet_diag_hostcond); 731 if (len < *min_len) 732 return false; 733 cond = (struct inet_diag_hostcond *)(op + 1); 734 735 /* Check address family and address length. */ 736 switch (cond->family) { 737 case AF_UNSPEC: 738 addr_len = 0; 739 break; 740 case AF_INET: 741 addr_len = sizeof(struct in_addr); 742 break; 743 case AF_INET6: 744 addr_len = sizeof(struct in6_addr); 745 break; 746 default: 747 return false; 748 } 749 *min_len += addr_len; 750 if (len < *min_len) 751 return false; 752 753 /* Check prefix length (in bits) vs address length (in bytes). */ 754 if (cond->prefix_len > 8 * addr_len) 755 return false; 756 757 return true; 758 } 759 760 /* Validate a port comparison operator. */ 761 static bool valid_port_comparison(const struct inet_diag_bc_op *op, 762 int len, int *min_len) 763 { 764 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ 765 *min_len += sizeof(struct inet_diag_bc_op); 766 if (len < *min_len) 767 return false; 768 return true; 769 } 770 771 static bool valid_markcond(const struct inet_diag_bc_op *op, int len, 772 int *min_len) 773 { 774 *min_len += sizeof(struct inet_diag_markcond); 775 return len >= *min_len; 776 } 777 778 static int inet_diag_bc_audit(const struct nlattr *attr, 779 const struct sk_buff *skb) 780 { 781 bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 782 const void *bytecode, *bc; 783 int bytecode_len, len; 784 785 if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) 786 return -EINVAL; 787 788 bytecode = bc = nla_data(attr); 789 len = bytecode_len = nla_len(attr); 790 791 while (len > 0) { 792 int min_len = sizeof(struct inet_diag_bc_op); 793 const struct inet_diag_bc_op *op = bc; 794 795 switch (op->code) { 796 case INET_DIAG_BC_S_COND: 797 case INET_DIAG_BC_D_COND: 798 if (!valid_hostcond(bc, len, &min_len)) 799 return -EINVAL; 800 break; 801 case INET_DIAG_BC_DEV_COND: 802 if (!valid_devcond(bc, len, &min_len)) 803 return -EINVAL; 804 break; 805 case INET_DIAG_BC_S_GE: 806 case INET_DIAG_BC_S_LE: 807 case INET_DIAG_BC_D_GE: 808 case INET_DIAG_BC_D_LE: 809 if (!valid_port_comparison(bc, len, &min_len)) 810 return -EINVAL; 811 break; 812 case INET_DIAG_BC_MARK_COND: 813 if (!net_admin) 814 return -EPERM; 815 if (!valid_markcond(bc, len, &min_len)) 816 return -EINVAL; 817 break; 818 case INET_DIAG_BC_AUTO: 819 case INET_DIAG_BC_JMP: 820 case INET_DIAG_BC_NOP: 821 break; 822 default: 823 return -EINVAL; 824 } 825 826 if (op->code != INET_DIAG_BC_NOP) { 827 if (op->no < min_len || op->no > len + 4 || op->no & 3) 828 return -EINVAL; 829 if (op->no < len && 830 !valid_cc(bytecode, bytecode_len, len - op->no)) 831 return -EINVAL; 832 } 833 834 if (op->yes < min_len || op->yes > len + 4 || op->yes & 3) 835 return -EINVAL; 836 bc += op->yes; 837 len -= op->yes; 838 } 839 return len == 0 ? 0 : -EINVAL; 840 } 841 842 static int inet_csk_diag_dump(struct sock *sk, 843 struct sk_buff *skb, 844 struct netlink_callback *cb, 845 const struct inet_diag_req_v2 *r, 846 const struct nlattr *bc, 847 bool net_admin) 848 { 849 if (!inet_diag_bc_sk(bc, sk)) 850 return 0; 851 852 return inet_csk_diag_fill(sk, skb, r, 853 sk_user_ns(NETLINK_CB(cb->skb).sk), 854 NETLINK_CB(cb->skb).portid, 855 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, 856 net_admin); 857 } 858 859 static void twsk_build_assert(void) 860 { 861 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != 862 offsetof(struct sock, sk_family)); 863 864 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != 865 offsetof(struct inet_sock, inet_num)); 866 867 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != 868 offsetof(struct inet_sock, inet_dport)); 869 870 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != 871 offsetof(struct inet_sock, inet_rcv_saddr)); 872 873 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != 874 offsetof(struct inet_sock, inet_daddr)); 875 876 #if IS_ENABLED(CONFIG_IPV6) 877 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != 878 offsetof(struct sock, sk_v6_rcv_saddr)); 879 880 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != 881 offsetof(struct sock, sk_v6_daddr)); 882 #endif 883 } 884 885 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, 886 struct netlink_callback *cb, 887 const struct inet_diag_req_v2 *r, struct nlattr *bc) 888 { 889 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 890 struct net *net = sock_net(skb->sk); 891 u32 idiag_states = r->idiag_states; 892 int i, num, s_i, s_num; 893 struct sock *sk; 894 895 if (idiag_states & TCPF_SYN_RECV) 896 idiag_states |= TCPF_NEW_SYN_RECV; 897 s_i = cb->args[1]; 898 s_num = num = cb->args[2]; 899 900 if (cb->args[0] == 0) { 901 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) 902 goto skip_listen_ht; 903 904 for (i = s_i; i < INET_LHTABLE_SIZE; i++) { 905 struct inet_listen_hashbucket *ilb; 906 907 num = 0; 908 ilb = &hashinfo->listening_hash[i]; 909 spin_lock(&ilb->lock); 910 sk_for_each(sk, &ilb->head) { 911 struct inet_sock *inet = inet_sk(sk); 912 913 if (!net_eq(sock_net(sk), net)) 914 continue; 915 916 if (num < s_num) { 917 num++; 918 continue; 919 } 920 921 if (r->sdiag_family != AF_UNSPEC && 922 sk->sk_family != r->sdiag_family) 923 goto next_listen; 924 925 if (r->id.idiag_sport != inet->inet_sport && 926 r->id.idiag_sport) 927 goto next_listen; 928 929 if (inet_csk_diag_dump(sk, skb, cb, r, 930 bc, net_admin) < 0) { 931 spin_unlock(&ilb->lock); 932 goto done; 933 } 934 935 next_listen: 936 ++num; 937 } 938 spin_unlock(&ilb->lock); 939 940 s_num = 0; 941 } 942 skip_listen_ht: 943 cb->args[0] = 1; 944 s_i = num = s_num = 0; 945 } 946 947 if (!(idiag_states & ~TCPF_LISTEN)) 948 goto out; 949 950 #define SKARR_SZ 16 951 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 952 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 953 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 954 struct hlist_nulls_node *node; 955 struct sock *sk_arr[SKARR_SZ]; 956 int num_arr[SKARR_SZ]; 957 int idx, accum, res; 958 959 if (hlist_nulls_empty(&head->chain)) 960 continue; 961 962 if (i > s_i) 963 s_num = 0; 964 965 next_chunk: 966 num = 0; 967 accum = 0; 968 spin_lock_bh(lock); 969 sk_nulls_for_each(sk, node, &head->chain) { 970 int state; 971 972 if (!net_eq(sock_net(sk), net)) 973 continue; 974 if (num < s_num) 975 goto next_normal; 976 state = (sk->sk_state == TCP_TIME_WAIT) ? 977 inet_twsk(sk)->tw_substate : sk->sk_state; 978 if (!(idiag_states & (1 << state))) 979 goto next_normal; 980 if (r->sdiag_family != AF_UNSPEC && 981 sk->sk_family != r->sdiag_family) 982 goto next_normal; 983 if (r->id.idiag_sport != htons(sk->sk_num) && 984 r->id.idiag_sport) 985 goto next_normal; 986 if (r->id.idiag_dport != sk->sk_dport && 987 r->id.idiag_dport) 988 goto next_normal; 989 twsk_build_assert(); 990 991 if (!inet_diag_bc_sk(bc, sk)) 992 goto next_normal; 993 994 sock_hold(sk); 995 num_arr[accum] = num; 996 sk_arr[accum] = sk; 997 if (++accum == SKARR_SZ) 998 break; 999 next_normal: 1000 ++num; 1001 } 1002 spin_unlock_bh(lock); 1003 res = 0; 1004 for (idx = 0; idx < accum; idx++) { 1005 if (res >= 0) { 1006 res = sk_diag_fill(sk_arr[idx], skb, r, 1007 sk_user_ns(NETLINK_CB(cb->skb).sk), 1008 NETLINK_CB(cb->skb).portid, 1009 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1010 cb->nlh, net_admin); 1011 if (res < 0) 1012 num = num_arr[idx]; 1013 } 1014 sock_gen_put(sk_arr[idx]); 1015 } 1016 if (res < 0) 1017 break; 1018 cond_resched(); 1019 if (accum == SKARR_SZ) { 1020 s_num = num + 1; 1021 goto next_chunk; 1022 } 1023 } 1024 1025 done: 1026 cb->args[1] = i; 1027 cb->args[2] = num; 1028 out: 1029 ; 1030 } 1031 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); 1032 1033 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, 1034 const struct inet_diag_req_v2 *r, 1035 struct nlattr *bc) 1036 { 1037 const struct inet_diag_handler *handler; 1038 int err = 0; 1039 1040 handler = inet_diag_lock_handler(r->sdiag_protocol); 1041 if (!IS_ERR(handler)) 1042 handler->dump(skb, cb, r, bc); 1043 else 1044 err = PTR_ERR(handler); 1045 inet_diag_unlock_handler(handler); 1046 1047 return err ? : skb->len; 1048 } 1049 1050 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) 1051 { 1052 int hdrlen = sizeof(struct inet_diag_req_v2); 1053 struct nlattr *bc = NULL; 1054 1055 if (nlmsg_attrlen(cb->nlh, hdrlen)) 1056 bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); 1057 1058 return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); 1059 } 1060 1061 static int inet_diag_type2proto(int type) 1062 { 1063 switch (type) { 1064 case TCPDIAG_GETSOCK: 1065 return IPPROTO_TCP; 1066 case DCCPDIAG_GETSOCK: 1067 return IPPROTO_DCCP; 1068 default: 1069 return 0; 1070 } 1071 } 1072 1073 static int inet_diag_dump_compat(struct sk_buff *skb, 1074 struct netlink_callback *cb) 1075 { 1076 struct inet_diag_req *rc = nlmsg_data(cb->nlh); 1077 int hdrlen = sizeof(struct inet_diag_req); 1078 struct inet_diag_req_v2 req; 1079 struct nlattr *bc = NULL; 1080 1081 req.sdiag_family = AF_UNSPEC; /* compatibility */ 1082 req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); 1083 req.idiag_ext = rc->idiag_ext; 1084 req.idiag_states = rc->idiag_states; 1085 req.id = rc->id; 1086 1087 if (nlmsg_attrlen(cb->nlh, hdrlen)) 1088 bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); 1089 1090 return __inet_diag_dump(skb, cb, &req, bc); 1091 } 1092 1093 static int inet_diag_get_exact_compat(struct sk_buff *in_skb, 1094 const struct nlmsghdr *nlh) 1095 { 1096 struct inet_diag_req *rc = nlmsg_data(nlh); 1097 struct inet_diag_req_v2 req; 1098 1099 req.sdiag_family = rc->idiag_family; 1100 req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); 1101 req.idiag_ext = rc->idiag_ext; 1102 req.idiag_states = rc->idiag_states; 1103 req.id = rc->id; 1104 1105 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); 1106 } 1107 1108 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) 1109 { 1110 int hdrlen = sizeof(struct inet_diag_req); 1111 struct net *net = sock_net(skb->sk); 1112 1113 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || 1114 nlmsg_len(nlh) < hdrlen) 1115 return -EINVAL; 1116 1117 if (nlh->nlmsg_flags & NLM_F_DUMP) { 1118 if (nlmsg_attrlen(nlh, hdrlen)) { 1119 struct nlattr *attr; 1120 int err; 1121 1122 attr = nlmsg_find_attr(nlh, hdrlen, 1123 INET_DIAG_REQ_BYTECODE); 1124 err = inet_diag_bc_audit(attr, skb); 1125 if (err) 1126 return err; 1127 } 1128 { 1129 struct netlink_dump_control c = { 1130 .dump = inet_diag_dump_compat, 1131 }; 1132 return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); 1133 } 1134 } 1135 1136 return inet_diag_get_exact_compat(skb, nlh); 1137 } 1138 1139 static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) 1140 { 1141 int hdrlen = sizeof(struct inet_diag_req_v2); 1142 struct net *net = sock_net(skb->sk); 1143 1144 if (nlmsg_len(h) < hdrlen) 1145 return -EINVAL; 1146 1147 if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && 1148 h->nlmsg_flags & NLM_F_DUMP) { 1149 if (nlmsg_attrlen(h, hdrlen)) { 1150 struct nlattr *attr; 1151 int err; 1152 1153 attr = nlmsg_find_attr(h, hdrlen, 1154 INET_DIAG_REQ_BYTECODE); 1155 err = inet_diag_bc_audit(attr, skb); 1156 if (err) 1157 return err; 1158 } 1159 { 1160 struct netlink_dump_control c = { 1161 .dump = inet_diag_dump, 1162 }; 1163 return netlink_dump_start(net->diag_nlsk, skb, h, &c); 1164 } 1165 } 1166 1167 return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); 1168 } 1169 1170 static 1171 int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) 1172 { 1173 const struct inet_diag_handler *handler; 1174 struct nlmsghdr *nlh; 1175 struct nlattr *attr; 1176 struct inet_diag_msg *r; 1177 void *info = NULL; 1178 int err = 0; 1179 1180 nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); 1181 if (!nlh) 1182 return -ENOMEM; 1183 1184 r = nlmsg_data(nlh); 1185 memset(r, 0, sizeof(*r)); 1186 inet_diag_msg_common_fill(r, sk); 1187 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM) 1188 r->id.idiag_sport = inet_sk(sk)->inet_sport; 1189 r->idiag_state = sk->sk_state; 1190 1191 if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { 1192 nlmsg_cancel(skb, nlh); 1193 return err; 1194 } 1195 1196 handler = inet_diag_lock_handler(sk->sk_protocol); 1197 if (IS_ERR(handler)) { 1198 inet_diag_unlock_handler(handler); 1199 nlmsg_cancel(skb, nlh); 1200 return PTR_ERR(handler); 1201 } 1202 1203 attr = handler->idiag_info_size 1204 ? nla_reserve_64bit(skb, INET_DIAG_INFO, 1205 handler->idiag_info_size, 1206 INET_DIAG_PAD) 1207 : NULL; 1208 if (attr) 1209 info = nla_data(attr); 1210 1211 handler->idiag_get_info(sk, r, info); 1212 inet_diag_unlock_handler(handler); 1213 1214 nlmsg_end(skb, nlh); 1215 return 0; 1216 } 1217 1218 static const struct sock_diag_handler inet_diag_handler = { 1219 .family = AF_INET, 1220 .dump = inet_diag_handler_cmd, 1221 .get_info = inet_diag_handler_get_info, 1222 .destroy = inet_diag_handler_cmd, 1223 }; 1224 1225 static const struct sock_diag_handler inet6_diag_handler = { 1226 .family = AF_INET6, 1227 .dump = inet_diag_handler_cmd, 1228 .get_info = inet_diag_handler_get_info, 1229 .destroy = inet_diag_handler_cmd, 1230 }; 1231 1232 int inet_diag_register(const struct inet_diag_handler *h) 1233 { 1234 const __u16 type = h->idiag_type; 1235 int err = -EINVAL; 1236 1237 if (type >= IPPROTO_MAX) 1238 goto out; 1239 1240 mutex_lock(&inet_diag_table_mutex); 1241 err = -EEXIST; 1242 if (!inet_diag_table[type]) { 1243 inet_diag_table[type] = h; 1244 err = 0; 1245 } 1246 mutex_unlock(&inet_diag_table_mutex); 1247 out: 1248 return err; 1249 } 1250 EXPORT_SYMBOL_GPL(inet_diag_register); 1251 1252 void inet_diag_unregister(const struct inet_diag_handler *h) 1253 { 1254 const __u16 type = h->idiag_type; 1255 1256 if (type >= IPPROTO_MAX) 1257 return; 1258 1259 mutex_lock(&inet_diag_table_mutex); 1260 inet_diag_table[type] = NULL; 1261 mutex_unlock(&inet_diag_table_mutex); 1262 } 1263 EXPORT_SYMBOL_GPL(inet_diag_unregister); 1264 1265 static int __init inet_diag_init(void) 1266 { 1267 const int inet_diag_table_size = (IPPROTO_MAX * 1268 sizeof(struct inet_diag_handler *)); 1269 int err = -ENOMEM; 1270 1271 inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL); 1272 if (!inet_diag_table) 1273 goto out; 1274 1275 err = sock_diag_register(&inet_diag_handler); 1276 if (err) 1277 goto out_free_nl; 1278 1279 err = sock_diag_register(&inet6_diag_handler); 1280 if (err) 1281 goto out_free_inet; 1282 1283 sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); 1284 out: 1285 return err; 1286 1287 out_free_inet: 1288 sock_diag_unregister(&inet_diag_handler); 1289 out_free_nl: 1290 kfree(inet_diag_table); 1291 goto out; 1292 } 1293 1294 static void __exit inet_diag_exit(void) 1295 { 1296 sock_diag_unregister(&inet6_diag_handler); 1297 sock_diag_unregister(&inet_diag_handler); 1298 sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); 1299 kfree(inet_diag_table); 1300 } 1301 1302 module_init(inet_diag_init); 1303 module_exit(inet_diag_exit); 1304 MODULE_LICENSE("GPL"); 1305 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); 1306 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); 1307