xref: /openbmc/linux/samples/bpf/xdp_tx_iptunnel_kern.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
112d8bb64SMartin KaFai Lau /* Copyright (c) 2016 Facebook
212d8bb64SMartin KaFai Lau  *
312d8bb64SMartin KaFai Lau  * This program is free software; you can redistribute it and/or
412d8bb64SMartin KaFai Lau  * modify it under the terms of version 2 of the GNU General Public
512d8bb64SMartin KaFai Lau  * License as published by the Free Software Foundation.
612d8bb64SMartin KaFai Lau  *
712d8bb64SMartin KaFai Lau  * This program shows how to use bpf_xdp_adjust_head() by
812d8bb64SMartin KaFai Lau  * encapsulating the incoming packet in an IPv4/v6 header
912d8bb64SMartin KaFai Lau  * and then XDP_TX it out.
1012d8bb64SMartin KaFai Lau  */
11cdb749ceSJesper Dangaard Brouer #define KBUILD_MODNAME "foo"
1212d8bb64SMartin KaFai Lau #include <uapi/linux/bpf.h>
1312d8bb64SMartin KaFai Lau #include <linux/in.h>
1412d8bb64SMartin KaFai Lau #include <linux/if_ether.h>
1512d8bb64SMartin KaFai Lau #include <linux/if_packet.h>
1612d8bb64SMartin KaFai Lau #include <linux/if_vlan.h>
1712d8bb64SMartin KaFai Lau #include <linux/ip.h>
1812d8bb64SMartin KaFai Lau #include <linux/ipv6.h>
197cf245a3SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
2012d8bb64SMartin KaFai Lau #include "xdp_tx_iptunnel_common.h"
2112d8bb64SMartin KaFai Lau 
22451d1dc8SDaniel T. Lee struct {
23451d1dc8SDaniel T. Lee 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
24451d1dc8SDaniel T. Lee 	__type(key, __u32);
25451d1dc8SDaniel T. Lee 	__type(value, __u64);
26451d1dc8SDaniel T. Lee 	__uint(max_entries, 256);
27451d1dc8SDaniel T. Lee } rxcnt SEC(".maps");
2812d8bb64SMartin KaFai Lau 
29451d1dc8SDaniel T. Lee struct {
30451d1dc8SDaniel T. Lee 	__uint(type, BPF_MAP_TYPE_HASH);
31451d1dc8SDaniel T. Lee 	__type(key, struct vip);
32451d1dc8SDaniel T. Lee 	__type(value, struct iptnl_info);
33451d1dc8SDaniel T. Lee 	__uint(max_entries, MAX_IPTNL_ENTRIES);
34451d1dc8SDaniel T. Lee } vip2tnl SEC(".maps");
3512d8bb64SMartin KaFai Lau 
count_tx(u32 protocol)3612d8bb64SMartin KaFai Lau static __always_inline void count_tx(u32 protocol)
3712d8bb64SMartin KaFai Lau {
3812d8bb64SMartin KaFai Lau 	u64 *rxcnt_count;
3912d8bb64SMartin KaFai Lau 
4012d8bb64SMartin KaFai Lau 	rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
4112d8bb64SMartin KaFai Lau 	if (rxcnt_count)
4212d8bb64SMartin KaFai Lau 		*rxcnt_count += 1;
4312d8bb64SMartin KaFai Lau }
4412d8bb64SMartin KaFai Lau 
get_dport(void * trans_data,void * data_end,u8 protocol)4512d8bb64SMartin KaFai Lau static __always_inline int get_dport(void *trans_data, void *data_end,
4612d8bb64SMartin KaFai Lau 				     u8 protocol)
4712d8bb64SMartin KaFai Lau {
4812d8bb64SMartin KaFai Lau 	struct tcphdr *th;
4912d8bb64SMartin KaFai Lau 	struct udphdr *uh;
5012d8bb64SMartin KaFai Lau 
5112d8bb64SMartin KaFai Lau 	switch (protocol) {
5212d8bb64SMartin KaFai Lau 	case IPPROTO_TCP:
5312d8bb64SMartin KaFai Lau 		th = (struct tcphdr *)trans_data;
5412d8bb64SMartin KaFai Lau 		if (th + 1 > data_end)
5512d8bb64SMartin KaFai Lau 			return -1;
5612d8bb64SMartin KaFai Lau 		return th->dest;
5712d8bb64SMartin KaFai Lau 	case IPPROTO_UDP:
5812d8bb64SMartin KaFai Lau 		uh = (struct udphdr *)trans_data;
5912d8bb64SMartin KaFai Lau 		if (uh + 1 > data_end)
6012d8bb64SMartin KaFai Lau 			return -1;
6112d8bb64SMartin KaFai Lau 		return uh->dest;
6212d8bb64SMartin KaFai Lau 	default:
6312d8bb64SMartin KaFai Lau 		return 0;
6412d8bb64SMartin KaFai Lau 	}
6512d8bb64SMartin KaFai Lau }
6612d8bb64SMartin KaFai Lau 
set_ethhdr(struct ethhdr * new_eth,const struct ethhdr * old_eth,const struct iptnl_info * tnl,__be16 h_proto)6712d8bb64SMartin KaFai Lau static __always_inline void set_ethhdr(struct ethhdr *new_eth,
6812d8bb64SMartin KaFai Lau 				       const struct ethhdr *old_eth,
6912d8bb64SMartin KaFai Lau 				       const struct iptnl_info *tnl,
7012d8bb64SMartin KaFai Lau 				       __be16 h_proto)
7112d8bb64SMartin KaFai Lau {
7212d8bb64SMartin KaFai Lau 	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
7312d8bb64SMartin KaFai Lau 	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
7412d8bb64SMartin KaFai Lau 	new_eth->h_proto = h_proto;
7512d8bb64SMartin KaFai Lau }
7612d8bb64SMartin KaFai Lau 
handle_ipv4(struct xdp_md * xdp)7712d8bb64SMartin KaFai Lau static __always_inline int handle_ipv4(struct xdp_md *xdp)
7812d8bb64SMartin KaFai Lau {
7912d8bb64SMartin KaFai Lau 	void *data_end = (void *)(long)xdp->data_end;
8012d8bb64SMartin KaFai Lau 	void *data = (void *)(long)xdp->data;
8112d8bb64SMartin KaFai Lau 	struct iptnl_info *tnl;
8212d8bb64SMartin KaFai Lau 	struct ethhdr *new_eth;
8312d8bb64SMartin KaFai Lau 	struct ethhdr *old_eth;
8412d8bb64SMartin KaFai Lau 	struct iphdr *iph = data + sizeof(struct ethhdr);
8512d8bb64SMartin KaFai Lau 	u16 *next_iph_u16;
8612d8bb64SMartin KaFai Lau 	u16 payload_len;
8712d8bb64SMartin KaFai Lau 	struct vip vip = {};
8812d8bb64SMartin KaFai Lau 	int dport;
8912d8bb64SMartin KaFai Lau 	u32 csum = 0;
9012d8bb64SMartin KaFai Lau 	int i;
9112d8bb64SMartin KaFai Lau 
9212d8bb64SMartin KaFai Lau 	if (iph + 1 > data_end)
9312d8bb64SMartin KaFai Lau 		return XDP_DROP;
9412d8bb64SMartin KaFai Lau 
9512d8bb64SMartin KaFai Lau 	dport = get_dport(iph + 1, data_end, iph->protocol);
9612d8bb64SMartin KaFai Lau 	if (dport == -1)
9712d8bb64SMartin KaFai Lau 		return XDP_DROP;
9812d8bb64SMartin KaFai Lau 
9912d8bb64SMartin KaFai Lau 	vip.protocol = iph->protocol;
10012d8bb64SMartin KaFai Lau 	vip.family = AF_INET;
10112d8bb64SMartin KaFai Lau 	vip.daddr.v4 = iph->daddr;
10212d8bb64SMartin KaFai Lau 	vip.dport = dport;
10312d8bb64SMartin KaFai Lau 	payload_len = ntohs(iph->tot_len);
10412d8bb64SMartin KaFai Lau 
10512d8bb64SMartin KaFai Lau 	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
10612d8bb64SMartin KaFai Lau 	/* It only does v4-in-v4 */
10712d8bb64SMartin KaFai Lau 	if (!tnl || tnl->family != AF_INET)
10812d8bb64SMartin KaFai Lau 		return XDP_PASS;
10912d8bb64SMartin KaFai Lau 
11012d8bb64SMartin KaFai Lau 	/* The vip key is found.  Add an IP header and send it out */
11112d8bb64SMartin KaFai Lau 
11212d8bb64SMartin KaFai Lau 	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
11312d8bb64SMartin KaFai Lau 		return XDP_DROP;
11412d8bb64SMartin KaFai Lau 
11512d8bb64SMartin KaFai Lau 	data = (void *)(long)xdp->data;
11612d8bb64SMartin KaFai Lau 	data_end = (void *)(long)xdp->data_end;
11712d8bb64SMartin KaFai Lau 
11812d8bb64SMartin KaFai Lau 	new_eth = data;
11912d8bb64SMartin KaFai Lau 	iph = data + sizeof(*new_eth);
12012d8bb64SMartin KaFai Lau 	old_eth = data + sizeof(*iph);
12112d8bb64SMartin KaFai Lau 
12212d8bb64SMartin KaFai Lau 	if (new_eth + 1 > data_end ||
12312d8bb64SMartin KaFai Lau 	    old_eth + 1 > data_end ||
12412d8bb64SMartin KaFai Lau 	    iph + 1 > data_end)
12512d8bb64SMartin KaFai Lau 		return XDP_DROP;
12612d8bb64SMartin KaFai Lau 
12712d8bb64SMartin KaFai Lau 	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
12812d8bb64SMartin KaFai Lau 
12912d8bb64SMartin KaFai Lau 	iph->version = 4;
13012d8bb64SMartin KaFai Lau 	iph->ihl = sizeof(*iph) >> 2;
13112d8bb64SMartin KaFai Lau 	iph->frag_off =	0;
13212d8bb64SMartin KaFai Lau 	iph->protocol = IPPROTO_IPIP;
13312d8bb64SMartin KaFai Lau 	iph->check = 0;
13412d8bb64SMartin KaFai Lau 	iph->tos = 0;
13512d8bb64SMartin KaFai Lau 	iph->tot_len = htons(payload_len + sizeof(*iph));
13612d8bb64SMartin KaFai Lau 	iph->daddr = tnl->daddr.v4;
13712d8bb64SMartin KaFai Lau 	iph->saddr = tnl->saddr.v4;
13812d8bb64SMartin KaFai Lau 	iph->ttl = 8;
13912d8bb64SMartin KaFai Lau 
14012d8bb64SMartin KaFai Lau 	next_iph_u16 = (u16 *)iph;
14112d8bb64SMartin KaFai Lau #pragma clang loop unroll(full)
14212d8bb64SMartin KaFai Lau 	for (i = 0; i < sizeof(*iph) >> 1; i++)
14312d8bb64SMartin KaFai Lau 		csum += *next_iph_u16++;
14412d8bb64SMartin KaFai Lau 
14512d8bb64SMartin KaFai Lau 	iph->check = ~((csum & 0xffff) + (csum >> 16));
14612d8bb64SMartin KaFai Lau 
14712d8bb64SMartin KaFai Lau 	count_tx(vip.protocol);
14812d8bb64SMartin KaFai Lau 
14912d8bb64SMartin KaFai Lau 	return XDP_TX;
15012d8bb64SMartin KaFai Lau }
15112d8bb64SMartin KaFai Lau 
handle_ipv6(struct xdp_md * xdp)15212d8bb64SMartin KaFai Lau static __always_inline int handle_ipv6(struct xdp_md *xdp)
15312d8bb64SMartin KaFai Lau {
15412d8bb64SMartin KaFai Lau 	void *data_end = (void *)(long)xdp->data_end;
15512d8bb64SMartin KaFai Lau 	void *data = (void *)(long)xdp->data;
15612d8bb64SMartin KaFai Lau 	struct iptnl_info *tnl;
15712d8bb64SMartin KaFai Lau 	struct ethhdr *new_eth;
15812d8bb64SMartin KaFai Lau 	struct ethhdr *old_eth;
15912d8bb64SMartin KaFai Lau 	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
16012d8bb64SMartin KaFai Lau 	__u16 payload_len;
16112d8bb64SMartin KaFai Lau 	struct vip vip = {};
16212d8bb64SMartin KaFai Lau 	int dport;
16312d8bb64SMartin KaFai Lau 
16412d8bb64SMartin KaFai Lau 	if (ip6h + 1 > data_end)
16512d8bb64SMartin KaFai Lau 		return XDP_DROP;
16612d8bb64SMartin KaFai Lau 
16712d8bb64SMartin KaFai Lau 	dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
16812d8bb64SMartin KaFai Lau 	if (dport == -1)
16912d8bb64SMartin KaFai Lau 		return XDP_DROP;
17012d8bb64SMartin KaFai Lau 
17112d8bb64SMartin KaFai Lau 	vip.protocol = ip6h->nexthdr;
17212d8bb64SMartin KaFai Lau 	vip.family = AF_INET6;
17312d8bb64SMartin KaFai Lau 	memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
17412d8bb64SMartin KaFai Lau 	vip.dport = dport;
17512d8bb64SMartin KaFai Lau 	payload_len = ip6h->payload_len;
17612d8bb64SMartin KaFai Lau 
17712d8bb64SMartin KaFai Lau 	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
17812d8bb64SMartin KaFai Lau 	/* It only does v6-in-v6 */
17912d8bb64SMartin KaFai Lau 	if (!tnl || tnl->family != AF_INET6)
18012d8bb64SMartin KaFai Lau 		return XDP_PASS;
18112d8bb64SMartin KaFai Lau 
18212d8bb64SMartin KaFai Lau 	/* The vip key is found.  Add an IP header and send it out */
18312d8bb64SMartin KaFai Lau 
18412d8bb64SMartin KaFai Lau 	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
18512d8bb64SMartin KaFai Lau 		return XDP_DROP;
18612d8bb64SMartin KaFai Lau 
18712d8bb64SMartin KaFai Lau 	data = (void *)(long)xdp->data;
18812d8bb64SMartin KaFai Lau 	data_end = (void *)(long)xdp->data_end;
18912d8bb64SMartin KaFai Lau 
19012d8bb64SMartin KaFai Lau 	new_eth = data;
19112d8bb64SMartin KaFai Lau 	ip6h = data + sizeof(*new_eth);
19212d8bb64SMartin KaFai Lau 	old_eth = data + sizeof(*ip6h);
19312d8bb64SMartin KaFai Lau 
19412d8bb64SMartin KaFai Lau 	if (new_eth + 1 > data_end ||
19512d8bb64SMartin KaFai Lau 	    old_eth + 1 > data_end ||
19612d8bb64SMartin KaFai Lau 	    ip6h + 1 > data_end)
19712d8bb64SMartin KaFai Lau 		return XDP_DROP;
19812d8bb64SMartin KaFai Lau 
19912d8bb64SMartin KaFai Lau 	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
20012d8bb64SMartin KaFai Lau 
20112d8bb64SMartin KaFai Lau 	ip6h->version = 6;
20212d8bb64SMartin KaFai Lau 	ip6h->priority = 0;
20312d8bb64SMartin KaFai Lau 	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
20412d8bb64SMartin KaFai Lau 	ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
20512d8bb64SMartin KaFai Lau 	ip6h->nexthdr = IPPROTO_IPV6;
20612d8bb64SMartin KaFai Lau 	ip6h->hop_limit = 8;
20712d8bb64SMartin KaFai Lau 	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
20812d8bb64SMartin KaFai Lau 	memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
20912d8bb64SMartin KaFai Lau 
21012d8bb64SMartin KaFai Lau 	count_tx(vip.protocol);
21112d8bb64SMartin KaFai Lau 
21212d8bb64SMartin KaFai Lau 	return XDP_TX;
21312d8bb64SMartin KaFai Lau }
21412d8bb64SMartin KaFai Lau 
215*77225174SAndy Gospodarek SEC("xdp.frags")
_xdp_tx_iptunnel(struct xdp_md * xdp)21612d8bb64SMartin KaFai Lau int _xdp_tx_iptunnel(struct xdp_md *xdp)
21712d8bb64SMartin KaFai Lau {
21812d8bb64SMartin KaFai Lau 	void *data_end = (void *)(long)xdp->data_end;
21912d8bb64SMartin KaFai Lau 	void *data = (void *)(long)xdp->data;
22012d8bb64SMartin KaFai Lau 	struct ethhdr *eth = data;
22112d8bb64SMartin KaFai Lau 	__u16 h_proto;
22212d8bb64SMartin KaFai Lau 
22312d8bb64SMartin KaFai Lau 	if (eth + 1 > data_end)
22412d8bb64SMartin KaFai Lau 		return XDP_DROP;
22512d8bb64SMartin KaFai Lau 
22612d8bb64SMartin KaFai Lau 	h_proto = eth->h_proto;
22712d8bb64SMartin KaFai Lau 
22812d8bb64SMartin KaFai Lau 	if (h_proto == htons(ETH_P_IP))
22912d8bb64SMartin KaFai Lau 		return handle_ipv4(xdp);
23012d8bb64SMartin KaFai Lau 	else if (h_proto == htons(ETH_P_IPV6))
23112d8bb64SMartin KaFai Lau 
23212d8bb64SMartin KaFai Lau 		return handle_ipv6(xdp);
23312d8bb64SMartin KaFai Lau 	else
23412d8bb64SMartin KaFai Lau 		return XDP_PASS;
23512d8bb64SMartin KaFai Lau }
23612d8bb64SMartin KaFai Lau 
23712d8bb64SMartin KaFai Lau char _license[] SEC("license") = "GPL";
238