xref: /openbmc/linux/samples/bpf/xdp_fwd_kern.c (revision 8dd06ef34b6e2f41b29fbf5fc1663780f2524285)
1fe616055SDavid Ahern // SPDX-License-Identifier: GPL-2.0
2fe616055SDavid Ahern /* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
3fe616055SDavid Ahern  *
4fe616055SDavid Ahern  * This program is free software; you can redistribute it and/or
5fe616055SDavid Ahern  * modify it under the terms of version 2 of the GNU General Public
6fe616055SDavid Ahern  * License as published by the Free Software Foundation.
7fe616055SDavid Ahern  *
8fe616055SDavid Ahern  * This program is distributed in the hope that it will be useful, but
9fe616055SDavid Ahern  * WITHOUT ANY WARRANTY; without even the implied warranty of
10fe616055SDavid Ahern  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11fe616055SDavid Ahern  * General Public License for more details.
12fe616055SDavid Ahern  */
13fe616055SDavid Ahern #define KBUILD_MODNAME "foo"
14fe616055SDavid Ahern #include <uapi/linux/bpf.h>
15fe616055SDavid Ahern #include <linux/in.h>
16fe616055SDavid Ahern #include <linux/if_ether.h>
17fe616055SDavid Ahern #include <linux/if_packet.h>
18fe616055SDavid Ahern #include <linux/if_vlan.h>
19fe616055SDavid Ahern #include <linux/ip.h>
20fe616055SDavid Ahern #include <linux/ipv6.h>
21fe616055SDavid Ahern 
22*7cf245a3SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
23fe616055SDavid Ahern 
24fe616055SDavid Ahern #define IPV6_FLOWINFO_MASK              cpu_to_be32(0x0FFFFFFF)
25fe616055SDavid Ahern 
26451d1dc8SDaniel T. Lee struct {
27451d1dc8SDaniel T. Lee 	__uint(type, BPF_MAP_TYPE_DEVMAP);
28451d1dc8SDaniel T. Lee 	__uint(key_size, sizeof(int));
29451d1dc8SDaniel T. Lee 	__uint(value_size, sizeof(int));
30451d1dc8SDaniel T. Lee 	__uint(max_entries, 64);
31451d1dc8SDaniel T. Lee } xdp_tx_ports SEC(".maps");
32fe616055SDavid Ahern 
3344edef77SDavid Ahern /* from include/net/ip.h */
ip_decrease_ttl(struct iphdr * iph)3444edef77SDavid Ahern static __always_inline int ip_decrease_ttl(struct iphdr *iph)
3544edef77SDavid Ahern {
3644edef77SDavid Ahern 	u32 check = (__force u32)iph->check;
3744edef77SDavid Ahern 
3844edef77SDavid Ahern 	check += (__force u32)htons(0x0100);
3944edef77SDavid Ahern 	iph->check = (__force __sum16)(check + (check >= 0xFFFF));
4044edef77SDavid Ahern 	return --iph->ttl;
4144edef77SDavid Ahern }
4244edef77SDavid Ahern 
xdp_fwd_flags(struct xdp_md * ctx,u32 flags)43fe616055SDavid Ahern static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
44fe616055SDavid Ahern {
45fe616055SDavid Ahern 	void *data_end = (void *)(long)ctx->data_end;
46fe616055SDavid Ahern 	void *data = (void *)(long)ctx->data;
47fe616055SDavid Ahern 	struct bpf_fib_lookup fib_params;
48fe616055SDavid Ahern 	struct ethhdr *eth = data;
4944edef77SDavid Ahern 	struct ipv6hdr *ip6h;
5044edef77SDavid Ahern 	struct iphdr *iph;
51fe616055SDavid Ahern 	u16 h_proto;
52fe616055SDavid Ahern 	u64 nh_off;
534c79579bSDavid Ahern 	int rc;
54fe616055SDavid Ahern 
55fe616055SDavid Ahern 	nh_off = sizeof(*eth);
56fe616055SDavid Ahern 	if (data + nh_off > data_end)
57fe616055SDavid Ahern 		return XDP_DROP;
58fe616055SDavid Ahern 
59fe616055SDavid Ahern 	__builtin_memset(&fib_params, 0, sizeof(fib_params));
60fe616055SDavid Ahern 
61fe616055SDavid Ahern 	h_proto = eth->h_proto;
62fe616055SDavid Ahern 	if (h_proto == htons(ETH_P_IP)) {
6344edef77SDavid Ahern 		iph = data + nh_off;
64fe616055SDavid Ahern 
65fe616055SDavid Ahern 		if (iph + 1 > data_end)
66fe616055SDavid Ahern 			return XDP_DROP;
67fe616055SDavid Ahern 
6844edef77SDavid Ahern 		if (iph->ttl <= 1)
6944edef77SDavid Ahern 			return XDP_PASS;
7044edef77SDavid Ahern 
71fe616055SDavid Ahern 		fib_params.family	= AF_INET;
72fe616055SDavid Ahern 		fib_params.tos		= iph->tos;
73fe616055SDavid Ahern 		fib_params.l4_protocol	= iph->protocol;
74fe616055SDavid Ahern 		fib_params.sport	= 0;
75fe616055SDavid Ahern 		fib_params.dport	= 0;
76fe616055SDavid Ahern 		fib_params.tot_len	= ntohs(iph->tot_len);
77fe616055SDavid Ahern 		fib_params.ipv4_src	= iph->saddr;
78fe616055SDavid Ahern 		fib_params.ipv4_dst	= iph->daddr;
79fe616055SDavid Ahern 	} else if (h_proto == htons(ETH_P_IPV6)) {
80fe616055SDavid Ahern 		struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
81fe616055SDavid Ahern 		struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
82fe616055SDavid Ahern 
8344edef77SDavid Ahern 		ip6h = data + nh_off;
8444edef77SDavid Ahern 		if (ip6h + 1 > data_end)
85fe616055SDavid Ahern 			return XDP_DROP;
86fe616055SDavid Ahern 
8744edef77SDavid Ahern 		if (ip6h->hop_limit <= 1)
8844edef77SDavid Ahern 			return XDP_PASS;
8944edef77SDavid Ahern 
90fe616055SDavid Ahern 		fib_params.family	= AF_INET6;
91bd3a08aaSDavid Ahern 		fib_params.flowinfo	= *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
9244edef77SDavid Ahern 		fib_params.l4_protocol	= ip6h->nexthdr;
93fe616055SDavid Ahern 		fib_params.sport	= 0;
94fe616055SDavid Ahern 		fib_params.dport	= 0;
9544edef77SDavid Ahern 		fib_params.tot_len	= ntohs(ip6h->payload_len);
9644edef77SDavid Ahern 		*src			= ip6h->saddr;
9744edef77SDavid Ahern 		*dst			= ip6h->daddr;
98fe616055SDavid Ahern 	} else {
99fe616055SDavid Ahern 		return XDP_PASS;
100fe616055SDavid Ahern 	}
101fe616055SDavid Ahern 
102fe616055SDavid Ahern 	fib_params.ifindex = ctx->ingress_ifindex;
103fe616055SDavid Ahern 
1044c79579bSDavid Ahern 	rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
105abcce733SJesper Dangaard Brouer 	/*
106abcce733SJesper Dangaard Brouer 	 * Some rc (return codes) from bpf_fib_lookup() are important,
107abcce733SJesper Dangaard Brouer 	 * to understand how this XDP-prog interacts with network stack.
108abcce733SJesper Dangaard Brouer 	 *
109abcce733SJesper Dangaard Brouer 	 * BPF_FIB_LKUP_RET_NO_NEIGH:
110abcce733SJesper Dangaard Brouer 	 *  Even if route lookup was a success, then the MAC-addresses are also
111abcce733SJesper Dangaard Brouer 	 *  needed.  This is obtained from arp/neighbour table, but if table is
112abcce733SJesper Dangaard Brouer 	 *  (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned.  To avoid
113abcce733SJesper Dangaard Brouer 	 *  doing ARP lookup directly from XDP, then send packet to normal
114abcce733SJesper Dangaard Brouer 	 *  network stack via XDP_PASS and expect it will do ARP resolution.
115abcce733SJesper Dangaard Brouer 	 *
116abcce733SJesper Dangaard Brouer 	 * BPF_FIB_LKUP_RET_FWD_DISABLED:
117abcce733SJesper Dangaard Brouer 	 *  The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
118abcce733SJesper Dangaard Brouer 	 *  setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
119abcce733SJesper Dangaard Brouer 	 *  enabled this on ingress device.
120abcce733SJesper Dangaard Brouer 	 */
121abcce733SJesper Dangaard Brouer 	if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
122a32a32cbSJesper Dangaard Brouer 		/* Verify egress index has been configured as TX-port.
123a32a32cbSJesper Dangaard Brouer 		 * (Note: User can still have inserted an egress ifindex that
124a32a32cbSJesper Dangaard Brouer 		 * doesn't support XDP xmit, which will result in packet drops).
125a32a32cbSJesper Dangaard Brouer 		 *
126a32a32cbSJesper Dangaard Brouer 		 * Note: lookup in devmap supported since 0cdbb4b09a0.
127a32a32cbSJesper Dangaard Brouer 		 * If not supported will fail with:
128a32a32cbSJesper Dangaard Brouer 		 *  cannot pass map_type 14 into func bpf_map_lookup_elem#1:
129a32a32cbSJesper Dangaard Brouer 		 */
130a32a32cbSJesper Dangaard Brouer 		if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
131a32a32cbSJesper Dangaard Brouer 			return XDP_PASS;
132a32a32cbSJesper Dangaard Brouer 
13344edef77SDavid Ahern 		if (h_proto == htons(ETH_P_IP))
13444edef77SDavid Ahern 			ip_decrease_ttl(iph);
13544edef77SDavid Ahern 		else if (h_proto == htons(ETH_P_IPV6))
13644edef77SDavid Ahern 			ip6h->hop_limit--;
13744edef77SDavid Ahern 
138fe616055SDavid Ahern 		memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
139fe616055SDavid Ahern 		memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
1403783d437SJesper Dangaard Brouer 		return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
141fe616055SDavid Ahern 	}
142fe616055SDavid Ahern 
143fe616055SDavid Ahern 	return XDP_PASS;
144fe616055SDavid Ahern }
145fe616055SDavid Ahern 
146fe616055SDavid Ahern SEC("xdp_fwd")
xdp_fwd_prog(struct xdp_md * ctx)147fe616055SDavid Ahern int xdp_fwd_prog(struct xdp_md *ctx)
148fe616055SDavid Ahern {
149fe616055SDavid Ahern 	return xdp_fwd_flags(ctx, 0);
150fe616055SDavid Ahern }
151fe616055SDavid Ahern 
152fe616055SDavid Ahern SEC("xdp_fwd_direct")
xdp_fwd_direct_prog(struct xdp_md * ctx)153fe616055SDavid Ahern int xdp_fwd_direct_prog(struct xdp_md *ctx)
154fe616055SDavid Ahern {
155fe616055SDavid Ahern 	return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
156fe616055SDavid Ahern }
157fe616055SDavid Ahern 
158fe616055SDavid Ahern char _license[] SEC("license") = "GPL";
159