1 /* Copyright (C) 2017 Cavium, Inc. 2 * 3 * This program is free software; you can redistribute it and/or modify it 4 * under the terms of version 2 of the GNU General Public License 5 * as published by the Free Software Foundation. 6 */ 7 #include <linux/bpf.h> 8 #include <linux/netlink.h> 9 #include <linux/rtnetlink.h> 10 #include <assert.h> 11 #include <errno.h> 12 #include <signal.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <string.h> 16 #include <sys/socket.h> 17 #include <unistd.h> 18 #include "bpf_load.h" 19 #include <bpf/bpf.h> 20 #include <arpa/inet.h> 21 #include <fcntl.h> 22 #include <poll.h> 23 #include <net/if.h> 24 #include <netdb.h> 25 #include <sys/ioctl.h> 26 #include <sys/syscall.h> 27 #include "bpf_util.h" 28 29 int sock, sock_arp, flags = 0; 30 static int total_ifindex; 31 int *ifindex_list; 32 char buf[8192]; 33 34 static int get_route_table(int rtm_family); 35 static void int_exit(int sig) 36 { 37 int i = 0; 38 39 for (i = 0; i < total_ifindex; i++) 40 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); 41 exit(0); 42 } 43 44 static void close_and_exit(int sig) 45 { 46 int i = 0; 47 48 close(sock); 49 close(sock_arp); 50 51 for (i = 0; i < total_ifindex; i++) 52 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); 53 exit(0); 54 } 55 56 /* Get the mac address of the interface given interface name */ 57 static __be64 getmac(char *iface) 58 { 59 struct ifreq ifr; 60 __be64 mac = 0; 61 int fd, i; 62 63 fd = socket(AF_INET, SOCK_DGRAM, 0); 64 ifr.ifr_addr.sa_family = AF_INET; 65 strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1); 66 if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { 67 printf("ioctl failed leaving....\n"); 68 return -1; 69 } 70 for (i = 0; i < 6 ; i++) 71 *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i]; 72 close(fd); 73 return mac; 74 } 75 76 static int recv_msg(struct sockaddr_nl sock_addr, int sock) 77 { 78 struct nlmsghdr *nh; 79 int len, nll = 0; 80 char *buf_ptr; 81 82 buf_ptr = buf; 83 while (1) { 84 len = recv(sock, buf_ptr, sizeof(buf) - nll, 0); 85 if (len < 0) 86 return len; 87 88 nh = (struct nlmsghdr *)buf_ptr; 89 90 if (nh->nlmsg_type == NLMSG_DONE) 91 break; 92 buf_ptr += len; 93 nll += len; 94 if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH) 95 break; 96 97 if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE) 98 break; 99 } 100 return nll; 101 } 102 103 /* Function to parse the route entry returned by netlink 104 * Updates the route entry related map entries 105 */ 106 static void read_route(struct nlmsghdr *nh, int nll) 107 { 108 char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; 109 struct bpf_lpm_trie_key *prefix_key; 110 struct rtattr *rt_attr; 111 struct rtmsg *rt_msg; 112 int rtm_family; 113 int rtl; 114 int i; 115 struct route_table { 116 int dst_len, iface, metric; 117 char *iface_name; 118 __be32 dst, gw; 119 __be64 mac; 120 } route; 121 struct arp_table { 122 __be64 mac; 123 __be32 dst; 124 }; 125 126 struct direct_map { 127 struct arp_table arp; 128 int ifindex; 129 __be64 mac; 130 } direct_entry; 131 132 if (nh->nlmsg_type == RTM_DELROUTE) 133 printf("DELETING Route entry\n"); 134 else if (nh->nlmsg_type == RTM_GETROUTE) 135 printf("READING Route entry\n"); 136 else if (nh->nlmsg_type == RTM_NEWROUTE) 137 printf("NEW Route entry\n"); 138 else 139 printf("%d\n", nh->nlmsg_type); 140 141 memset(&route, 0, sizeof(route)); 142 printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n"); 143 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { 144 rt_msg = (struct rtmsg *)NLMSG_DATA(nh); 145 rtm_family = rt_msg->rtm_family; 146 if (rtm_family == AF_INET) 147 if (rt_msg->rtm_table != RT_TABLE_MAIN) 148 continue; 149 rt_attr = (struct rtattr *)RTM_RTA(rt_msg); 150 rtl = RTM_PAYLOAD(nh); 151 152 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { 153 switch (rt_attr->rta_type) { 154 case NDA_DST: 155 sprintf(dsts, "%u", 156 (*((__be32 *)RTA_DATA(rt_attr)))); 157 break; 158 case RTA_GATEWAY: 159 sprintf(gws, "%u", 160 *((__be32 *)RTA_DATA(rt_attr))); 161 break; 162 case RTA_OIF: 163 sprintf(ifs, "%u", 164 *((int *)RTA_DATA(rt_attr))); 165 break; 166 case RTA_METRICS: 167 sprintf(metrics, "%u", 168 *((int *)RTA_DATA(rt_attr))); 169 default: 170 break; 171 } 172 } 173 sprintf(dsts_len, "%d", rt_msg->rtm_dst_len); 174 route.dst = atoi(dsts); 175 route.dst_len = atoi(dsts_len); 176 route.gw = atoi(gws); 177 route.iface = atoi(ifs); 178 route.metric = atoi(metrics); 179 route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); 180 route.iface_name = if_indextoname(route.iface, route.iface_name); 181 route.mac = getmac(route.iface_name); 182 if (route.mac == -1) { 183 int i = 0; 184 185 for (i = 0; i < total_ifindex; i++) 186 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); 187 exit(0); 188 } 189 assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0); 190 if (rtm_family == AF_INET) { 191 struct trie_value { 192 __u8 prefix[4]; 193 __be64 value; 194 int ifindex; 195 int metric; 196 __be32 gw; 197 } *prefix_value; 198 199 prefix_key = alloca(sizeof(*prefix_key) + 3); 200 prefix_value = alloca(sizeof(*prefix_value)); 201 202 prefix_key->prefixlen = 32; 203 prefix_key->prefixlen = route.dst_len; 204 direct_entry.mac = route.mac & 0xffffffffffff; 205 direct_entry.ifindex = route.iface; 206 direct_entry.arp.mac = 0; 207 direct_entry.arp.dst = 0; 208 if (route.dst_len == 32) { 209 if (nh->nlmsg_type == RTM_DELROUTE) { 210 assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); 211 } else { 212 if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) 213 direct_entry.arp.dst = route.dst; 214 assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); 215 } 216 } 217 for (i = 0; i < 4; i++) 218 prefix_key->data[i] = (route.dst >> i * 8) & 0xff; 219 220 printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n", 221 (int)prefix_key->data[0], 222 (int)prefix_key->data[1], 223 (int)prefix_key->data[2], 224 (int)prefix_key->data[3], 225 route.gw, route.dst_len, 226 route.metric, 227 route.iface_name); 228 if (bpf_map_lookup_elem(map_fd[0], prefix_key, 229 prefix_value) < 0) { 230 for (i = 0; i < 4; i++) 231 prefix_value->prefix[i] = prefix_key->data[i]; 232 prefix_value->value = route.mac & 0xffffffffffff; 233 prefix_value->ifindex = route.iface; 234 prefix_value->gw = route.gw; 235 prefix_value->metric = route.metric; 236 237 assert(bpf_map_update_elem(map_fd[0], 238 prefix_key, 239 prefix_value, 0 240 ) == 0); 241 } else { 242 if (nh->nlmsg_type == RTM_DELROUTE) { 243 printf("deleting entry\n"); 244 printf("prefix key=%d.%d.%d.%d/%d", 245 prefix_key->data[0], 246 prefix_key->data[1], 247 prefix_key->data[2], 248 prefix_key->data[3], 249 prefix_key->prefixlen); 250 assert(bpf_map_delete_elem(map_fd[0], 251 prefix_key 252 ) == 0); 253 /* Rereading the route table to check if 254 * there is an entry with the same 255 * prefix but a different metric as the 256 * deleted enty. 257 */ 258 get_route_table(AF_INET); 259 } else if (prefix_key->data[0] == 260 prefix_value->prefix[0] && 261 prefix_key->data[1] == 262 prefix_value->prefix[1] && 263 prefix_key->data[2] == 264 prefix_value->prefix[2] && 265 prefix_key->data[3] == 266 prefix_value->prefix[3] && 267 route.metric >= prefix_value->metric) { 268 continue; 269 } else { 270 for (i = 0; i < 4; i++) 271 prefix_value->prefix[i] = 272 prefix_key->data[i]; 273 prefix_value->value = 274 route.mac & 0xffffffffffff; 275 prefix_value->ifindex = route.iface; 276 prefix_value->gw = route.gw; 277 prefix_value->metric = route.metric; 278 assert(bpf_map_update_elem( 279 map_fd[0], 280 prefix_key, 281 prefix_value, 282 0) == 0); 283 } 284 } 285 } 286 memset(&route, 0, sizeof(route)); 287 memset(dsts, 0, sizeof(dsts)); 288 memset(dsts_len, 0, sizeof(dsts_len)); 289 memset(gws, 0, sizeof(gws)); 290 memset(ifs, 0, sizeof(ifs)); 291 memset(&route, 0, sizeof(route)); 292 } 293 } 294 295 /* Function to read the existing route table when the process is launched*/ 296 static int get_route_table(int rtm_family) 297 { 298 struct sockaddr_nl sa; 299 struct nlmsghdr *nh; 300 int sock, seq = 0; 301 struct msghdr msg; 302 struct iovec iov; 303 int ret = 0; 304 int nll; 305 306 struct { 307 struct nlmsghdr nl; 308 struct rtmsg rt; 309 char buf[8192]; 310 } req; 311 312 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 313 if (sock < 0) { 314 printf("open netlink socket: %s\n", strerror(errno)); 315 return -1; 316 } 317 memset(&sa, 0, sizeof(sa)); 318 sa.nl_family = AF_NETLINK; 319 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { 320 printf("bind to netlink: %s\n", strerror(errno)); 321 ret = -1; 322 goto cleanup; 323 } 324 memset(&req, 0, sizeof(req)); 325 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); 326 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; 327 req.nl.nlmsg_type = RTM_GETROUTE; 328 329 req.rt.rtm_family = rtm_family; 330 req.rt.rtm_table = RT_TABLE_MAIN; 331 req.nl.nlmsg_pid = 0; 332 req.nl.nlmsg_seq = ++seq; 333 memset(&msg, 0, sizeof(msg)); 334 iov.iov_base = (void *)&req.nl; 335 iov.iov_len = req.nl.nlmsg_len; 336 msg.msg_iov = &iov; 337 msg.msg_iovlen = 1; 338 ret = sendmsg(sock, &msg, 0); 339 if (ret < 0) { 340 printf("send to netlink: %s\n", strerror(errno)); 341 ret = -1; 342 goto cleanup; 343 } 344 memset(buf, 0, sizeof(buf)); 345 nll = recv_msg(sa, sock); 346 if (nll < 0) { 347 printf("recv from netlink: %s\n", strerror(nll)); 348 ret = -1; 349 goto cleanup; 350 } 351 nh = (struct nlmsghdr *)buf; 352 read_route(nh, nll); 353 cleanup: 354 close(sock); 355 return ret; 356 } 357 358 /* Function to parse the arp entry returned by netlink 359 * Updates the arp entry related map entries 360 */ 361 static void read_arp(struct nlmsghdr *nh, int nll) 362 { 363 struct rtattr *rt_attr; 364 char dsts[24], mac[24]; 365 struct ndmsg *rt_msg; 366 int rtl, ndm_family; 367 368 struct arp_table { 369 __be64 mac; 370 __be32 dst; 371 } arp_entry; 372 struct direct_map { 373 struct arp_table arp; 374 int ifindex; 375 __be64 mac; 376 } direct_entry; 377 378 if (nh->nlmsg_type == RTM_GETNEIGH) 379 printf("READING arp entry\n"); 380 printf("Address\tHwAddress\n"); 381 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { 382 rt_msg = (struct ndmsg *)NLMSG_DATA(nh); 383 rt_attr = (struct rtattr *)RTM_RTA(rt_msg); 384 ndm_family = rt_msg->ndm_family; 385 rtl = RTM_PAYLOAD(nh); 386 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) { 387 switch (rt_attr->rta_type) { 388 case NDA_DST: 389 sprintf(dsts, "%u", 390 *((__be32 *)RTA_DATA(rt_attr))); 391 break; 392 case NDA_LLADDR: 393 sprintf(mac, "%lld", 394 *((__be64 *)RTA_DATA(rt_attr))); 395 break; 396 default: 397 break; 398 } 399 } 400 arp_entry.dst = atoi(dsts); 401 arp_entry.mac = atol(mac); 402 printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); 403 if (ndm_family == AF_INET) { 404 if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst, 405 &direct_entry) == 0) { 406 if (nh->nlmsg_type == RTM_DELNEIGH) { 407 direct_entry.arp.dst = 0; 408 direct_entry.arp.mac = 0; 409 } else if (nh->nlmsg_type == RTM_NEWNEIGH) { 410 direct_entry.arp.dst = arp_entry.dst; 411 direct_entry.arp.mac = arp_entry.mac; 412 } 413 assert(bpf_map_update_elem(map_fd[3], 414 &arp_entry.dst, 415 &direct_entry, 0 416 ) == 0); 417 memset(&direct_entry, 0, sizeof(direct_entry)); 418 } 419 if (nh->nlmsg_type == RTM_DELNEIGH) { 420 assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0); 421 } else if (nh->nlmsg_type == RTM_NEWNEIGH) { 422 assert(bpf_map_update_elem(map_fd[2], 423 &arp_entry.dst, 424 &arp_entry.mac, 0 425 ) == 0); 426 } 427 } 428 memset(&arp_entry, 0, sizeof(arp_entry)); 429 memset(dsts, 0, sizeof(dsts)); 430 } 431 } 432 433 /* Function to read the existing arp table when the process is launched*/ 434 static int get_arp_table(int rtm_family) 435 { 436 struct sockaddr_nl sa; 437 struct nlmsghdr *nh; 438 int sock, seq = 0; 439 struct msghdr msg; 440 struct iovec iov; 441 int ret = 0; 442 int nll; 443 struct { 444 struct nlmsghdr nl; 445 struct ndmsg rt; 446 char buf[8192]; 447 } req; 448 449 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 450 if (sock < 0) { 451 printf("open netlink socket: %s\n", strerror(errno)); 452 return -1; 453 } 454 memset(&sa, 0, sizeof(sa)); 455 sa.nl_family = AF_NETLINK; 456 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { 457 printf("bind to netlink: %s\n", strerror(errno)); 458 ret = -1; 459 goto cleanup; 460 } 461 memset(&req, 0, sizeof(req)); 462 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); 463 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; 464 req.nl.nlmsg_type = RTM_GETNEIGH; 465 req.rt.ndm_state = NUD_REACHABLE; 466 req.rt.ndm_family = rtm_family; 467 req.nl.nlmsg_pid = 0; 468 req.nl.nlmsg_seq = ++seq; 469 memset(&msg, 0, sizeof(msg)); 470 iov.iov_base = (void *)&req.nl; 471 iov.iov_len = req.nl.nlmsg_len; 472 msg.msg_iov = &iov; 473 msg.msg_iovlen = 1; 474 ret = sendmsg(sock, &msg, 0); 475 if (ret < 0) { 476 printf("send to netlink: %s\n", strerror(errno)); 477 ret = -1; 478 goto cleanup; 479 } 480 memset(buf, 0, sizeof(buf)); 481 nll = recv_msg(sa, sock); 482 if (nll < 0) { 483 printf("recv from netlink: %s\n", strerror(nll)); 484 ret = -1; 485 goto cleanup; 486 } 487 nh = (struct nlmsghdr *)buf; 488 read_arp(nh, nll); 489 cleanup: 490 close(sock); 491 return ret; 492 } 493 494 /* Function to keep track and update changes in route and arp table 495 * Give regular statistics of packets forwarded 496 */ 497 static int monitor_route(void) 498 { 499 unsigned int nr_cpus = bpf_num_possible_cpus(); 500 const unsigned int nr_keys = 256; 501 struct pollfd fds_route, fds_arp; 502 __u64 prev[nr_keys][nr_cpus]; 503 struct sockaddr_nl la, lr; 504 __u64 values[nr_cpus]; 505 struct nlmsghdr *nh; 506 int nll, ret = 0; 507 int interval = 5; 508 __u32 key; 509 int i; 510 511 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 512 if (sock < 0) { 513 printf("open netlink socket: %s\n", strerror(errno)); 514 return -1; 515 } 516 517 fcntl(sock, F_SETFL, O_NONBLOCK); 518 memset(&lr, 0, sizeof(lr)); 519 lr.nl_family = AF_NETLINK; 520 lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY; 521 if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) { 522 printf("bind to netlink: %s\n", strerror(errno)); 523 ret = -1; 524 goto cleanup; 525 } 526 fds_route.fd = sock; 527 fds_route.events = POLL_IN; 528 529 sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 530 if (sock_arp < 0) { 531 printf("open netlink socket: %s\n", strerror(errno)); 532 return -1; 533 } 534 535 fcntl(sock_arp, F_SETFL, O_NONBLOCK); 536 memset(&la, 0, sizeof(la)); 537 la.nl_family = AF_NETLINK; 538 la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY; 539 if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) { 540 printf("bind to netlink: %s\n", strerror(errno)); 541 ret = -1; 542 goto cleanup; 543 } 544 fds_arp.fd = sock_arp; 545 fds_arp.events = POLL_IN; 546 547 memset(prev, 0, sizeof(prev)); 548 do { 549 signal(SIGINT, close_and_exit); 550 signal(SIGTERM, close_and_exit); 551 552 sleep(interval); 553 for (key = 0; key < nr_keys; key++) { 554 __u64 sum = 0; 555 556 assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); 557 for (i = 0; i < nr_cpus; i++) 558 sum += (values[i] - prev[key][i]); 559 if (sum) 560 printf("proto %u: %10llu pkt/s\n", 561 key, sum / interval); 562 memcpy(prev[key], values, sizeof(values)); 563 } 564 565 memset(buf, 0, sizeof(buf)); 566 if (poll(&fds_route, 1, 3) == POLL_IN) { 567 nll = recv_msg(lr, sock); 568 if (nll < 0) { 569 printf("recv from netlink: %s\n", strerror(nll)); 570 ret = -1; 571 goto cleanup; 572 } 573 574 nh = (struct nlmsghdr *)buf; 575 printf("Routing table updated.\n"); 576 read_route(nh, nll); 577 } 578 memset(buf, 0, sizeof(buf)); 579 if (poll(&fds_arp, 1, 3) == POLL_IN) { 580 nll = recv_msg(la, sock_arp); 581 if (nll < 0) { 582 printf("recv from netlink: %s\n", strerror(nll)); 583 ret = -1; 584 goto cleanup; 585 } 586 587 nh = (struct nlmsghdr *)buf; 588 read_arp(nh, nll); 589 } 590 591 } while (1); 592 cleanup: 593 close(sock); 594 return ret; 595 } 596 597 int main(int ac, char **argv) 598 { 599 char filename[256]; 600 char **ifname_list; 601 int i = 1; 602 603 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 604 if (ac < 2) { 605 printf("usage: %s [-S] Interface name list\n", argv[0]); 606 return 1; 607 } 608 if (!strcmp(argv[1], "-S")) { 609 flags = XDP_FLAGS_SKB_MODE; 610 total_ifindex = ac - 2; 611 ifname_list = (argv + 2); 612 } else { 613 flags = 0; 614 total_ifindex = ac - 1; 615 ifname_list = (argv + 1); 616 } 617 if (load_bpf_file(filename)) { 618 printf("%s", bpf_log_buf); 619 return 1; 620 } 621 printf("\n**************loading bpf file*********************\n\n\n"); 622 if (!prog_fd[0]) { 623 printf("load_bpf_file: %s\n", strerror(errno)); 624 return 1; 625 } 626 ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); 627 for (i = 0; i < total_ifindex; i++) { 628 ifindex_list[i] = if_nametoindex(ifname_list[i]); 629 if (!ifindex_list[i]) { 630 printf("Couldn't translate interface name: %s", 631 strerror(errno)); 632 return 1; 633 } 634 } 635 for (i = 0; i < total_ifindex; i++) { 636 if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) { 637 printf("link set xdp fd failed\n"); 638 int recovery_index = i; 639 640 for (i = 0; i < recovery_index; i++) 641 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); 642 643 return 1; 644 } 645 printf("Attached to %d\n", ifindex_list[i]); 646 } 647 signal(SIGINT, int_exit); 648 signal(SIGTERM, int_exit); 649 650 printf("*******************ROUTE TABLE*************************\n\n\n"); 651 get_route_table(AF_INET); 652 printf("*******************ARP TABLE***************************\n\n\n"); 653 get_arp_table(AF_INET); 654 if (monitor_route() < 0) { 655 printf("Error in receiving route update"); 656 return 1; 657 } 658 659 return 0; 660 } 661