1bd4aed0eSJiong Wang /* SPDX-License-Identifier: GPL-2.0
2bd4aed0eSJiong Wang  *  Copyright(c) 2018 Jesper Dangaard Brouer.
3bd4aed0eSJiong Wang  *
4bd4aed0eSJiong Wang  * XDP/TC VLAN manipulation example
5bd4aed0eSJiong Wang  *
6bd4aed0eSJiong Wang  * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
7bd4aed0eSJiong Wang  * else the VLAN tags are NOT inlined in the packet payload:
8bd4aed0eSJiong Wang  *
9bd4aed0eSJiong Wang  *  # ethtool -K ixgbe2 rxvlan off
10bd4aed0eSJiong Wang  *
11bd4aed0eSJiong Wang  * Verify setting:
12bd4aed0eSJiong Wang  *  # ethtool -k ixgbe2 | grep rx-vlan-offload
13bd4aed0eSJiong Wang  *  rx-vlan-offload: off
14bd4aed0eSJiong Wang  *
15bd4aed0eSJiong Wang  */
16bd4aed0eSJiong Wang #include <stddef.h>
17bd4aed0eSJiong Wang #include <stdbool.h>
18bd4aed0eSJiong Wang #include <string.h>
19bd4aed0eSJiong Wang #include <linux/bpf.h>
20bd4aed0eSJiong Wang #include <linux/if_ether.h>
21bd4aed0eSJiong Wang #include <linux/if_vlan.h>
22bd4aed0eSJiong Wang #include <linux/in.h>
23bd4aed0eSJiong Wang #include <linux/pkt_cls.h>
24bd4aed0eSJiong Wang 
253e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
263e689141SToke Høiland-Jørgensen #include <bpf/bpf_endian.h>
27bd4aed0eSJiong Wang 
28bd4aed0eSJiong Wang /* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
29bd4aed0eSJiong Wang  *
30bd4aed0eSJiong Wang  *	struct vlan_hdr - vlan header
31bd4aed0eSJiong Wang  *	@h_vlan_TCI: priority and VLAN ID
32bd4aed0eSJiong Wang  *	@h_vlan_encapsulated_proto: packet type ID or len
33bd4aed0eSJiong Wang  */
34bd4aed0eSJiong Wang struct _vlan_hdr {
35bd4aed0eSJiong Wang 	__be16 h_vlan_TCI;
36bd4aed0eSJiong Wang 	__be16 h_vlan_encapsulated_proto;
37bd4aed0eSJiong Wang };
38bd4aed0eSJiong Wang #define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
39bd4aed0eSJiong Wang #define VLAN_PRIO_SHIFT		13
40bd4aed0eSJiong Wang #define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
41bd4aed0eSJiong Wang #define VLAN_TAG_PRESENT	VLAN_CFI_MASK
42bd4aed0eSJiong Wang #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
43bd4aed0eSJiong Wang #define VLAN_N_VID		4096
44bd4aed0eSJiong Wang 
45bd4aed0eSJiong Wang struct parse_pkt {
46bd4aed0eSJiong Wang 	__u16 l3_proto;
47bd4aed0eSJiong Wang 	__u16 l3_offset;
48bd4aed0eSJiong Wang 	__u16 vlan_outer;
49bd4aed0eSJiong Wang 	__u16 vlan_inner;
50bd4aed0eSJiong Wang 	__u8  vlan_outer_offset;
51bd4aed0eSJiong Wang 	__u8  vlan_inner_offset;
52bd4aed0eSJiong Wang };
53bd4aed0eSJiong Wang 
54bd4aed0eSJiong Wang char _license[] SEC("license") = "GPL";
55bd4aed0eSJiong Wang 
56bd4aed0eSJiong Wang static __always_inline
parse_eth_frame(struct ethhdr * eth,void * data_end,struct parse_pkt * pkt)57bd4aed0eSJiong Wang bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
58bd4aed0eSJiong Wang {
59bd4aed0eSJiong Wang 	__u16 eth_type;
60bd4aed0eSJiong Wang 	__u8 offset;
61bd4aed0eSJiong Wang 
62bd4aed0eSJiong Wang 	offset = sizeof(*eth);
63bd4aed0eSJiong Wang 	/* Make sure packet is large enough for parsing eth + 2 VLAN headers */
64bd4aed0eSJiong Wang 	if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
65bd4aed0eSJiong Wang 		return false;
66bd4aed0eSJiong Wang 
67bd4aed0eSJiong Wang 	eth_type = eth->h_proto;
68bd4aed0eSJiong Wang 
69bd4aed0eSJiong Wang 	/* Handle outer VLAN tag */
70bd4aed0eSJiong Wang 	if (eth_type == bpf_htons(ETH_P_8021Q)
71bd4aed0eSJiong Wang 	    || eth_type == bpf_htons(ETH_P_8021AD)) {
72bd4aed0eSJiong Wang 		struct _vlan_hdr *vlan_hdr;
73bd4aed0eSJiong Wang 
74bd4aed0eSJiong Wang 		vlan_hdr = (void *)eth + offset;
75bd4aed0eSJiong Wang 		pkt->vlan_outer_offset = offset;
76bd4aed0eSJiong Wang 		pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
77bd4aed0eSJiong Wang 				& VLAN_VID_MASK;
78bd4aed0eSJiong Wang 		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
79bd4aed0eSJiong Wang 		offset += sizeof(*vlan_hdr);
80bd4aed0eSJiong Wang 	}
81bd4aed0eSJiong Wang 
82bd4aed0eSJiong Wang 	/* Handle inner (double) VLAN tag */
83bd4aed0eSJiong Wang 	if (eth_type == bpf_htons(ETH_P_8021Q)
84bd4aed0eSJiong Wang 	    || eth_type == bpf_htons(ETH_P_8021AD)) {
85bd4aed0eSJiong Wang 		struct _vlan_hdr *vlan_hdr;
86bd4aed0eSJiong Wang 
87bd4aed0eSJiong Wang 		vlan_hdr = (void *)eth + offset;
88bd4aed0eSJiong Wang 		pkt->vlan_inner_offset = offset;
89bd4aed0eSJiong Wang 		pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
90bd4aed0eSJiong Wang 				& VLAN_VID_MASK;
91bd4aed0eSJiong Wang 		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
92bd4aed0eSJiong Wang 		offset += sizeof(*vlan_hdr);
93bd4aed0eSJiong Wang 	}
94bd4aed0eSJiong Wang 
95bd4aed0eSJiong Wang 	pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
96bd4aed0eSJiong Wang 	pkt->l3_offset = offset;
97bd4aed0eSJiong Wang 
98bd4aed0eSJiong Wang 	return true;
99bd4aed0eSJiong Wang }
100bd4aed0eSJiong Wang 
101*df71a42cSTaichi Nishimura /* Hint, VLANs are chosen to hit network-byte-order issues */
102bd4aed0eSJiong Wang #define TESTVLAN 4011 /* 0xFAB */
103bd4aed0eSJiong Wang // #define TO_VLAN  4000 /* 0xFA0 (hint 0xOA0 = 160) */
104bd4aed0eSJiong Wang 
105bd4aed0eSJiong Wang SEC("xdp_drop_vlan_4011")
xdp_prognum0(struct xdp_md * ctx)106bd4aed0eSJiong Wang int  xdp_prognum0(struct xdp_md *ctx)
107bd4aed0eSJiong Wang {
108bd4aed0eSJiong Wang 	void *data_end = (void *)(long)ctx->data_end;
109bd4aed0eSJiong Wang 	void *data     = (void *)(long)ctx->data;
110bd4aed0eSJiong Wang 	struct parse_pkt pkt = { 0 };
111bd4aed0eSJiong Wang 
112bd4aed0eSJiong Wang 	if (!parse_eth_frame(data, data_end, &pkt))
113bd4aed0eSJiong Wang 		return XDP_ABORTED;
114bd4aed0eSJiong Wang 
115bd4aed0eSJiong Wang 	/* Drop specific VLAN ID example */
116bd4aed0eSJiong Wang 	if (pkt.vlan_outer == TESTVLAN)
117bd4aed0eSJiong Wang 		return XDP_ABORTED;
118bd4aed0eSJiong Wang 	/*
119bd4aed0eSJiong Wang 	 * Using XDP_ABORTED makes it possible to record this event,
120bd4aed0eSJiong Wang 	 * via tracepoint xdp:xdp_exception like:
121bd4aed0eSJiong Wang 	 *  # perf record -a -e xdp:xdp_exception
122bd4aed0eSJiong Wang 	 *  # perf script
123bd4aed0eSJiong Wang 	 */
124bd4aed0eSJiong Wang 	return XDP_PASS;
125bd4aed0eSJiong Wang }
126bd4aed0eSJiong Wang /*
127bd4aed0eSJiong Wang Commands to setup VLAN on Linux to test packets gets dropped:
128bd4aed0eSJiong Wang 
129bd4aed0eSJiong Wang  export ROOTDEV=ixgbe2
130bd4aed0eSJiong Wang  export VLANID=4011
131bd4aed0eSJiong Wang  ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
132bd4aed0eSJiong Wang  ip link set dev  $ROOTDEV.$VLANID up
133bd4aed0eSJiong Wang 
134bd4aed0eSJiong Wang  ip link set dev $ROOTDEV mtu 1508
135bd4aed0eSJiong Wang  ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
136bd4aed0eSJiong Wang 
137bd4aed0eSJiong Wang Load prog with ip tool:
138bd4aed0eSJiong Wang 
139bd4aed0eSJiong Wang  ip link set $ROOTDEV xdp off
140bd4aed0eSJiong Wang  ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
141bd4aed0eSJiong Wang 
142bd4aed0eSJiong Wang */
143bd4aed0eSJiong Wang 
144bd4aed0eSJiong Wang /* Changing VLAN to zero, have same practical effect as removing the VLAN. */
145bd4aed0eSJiong Wang #define TO_VLAN	0
146bd4aed0eSJiong Wang 
147bd4aed0eSJiong Wang SEC("xdp_vlan_change")
xdp_prognum1(struct xdp_md * ctx)148bd4aed0eSJiong Wang int  xdp_prognum1(struct xdp_md *ctx)
149bd4aed0eSJiong Wang {
150bd4aed0eSJiong Wang 	void *data_end = (void *)(long)ctx->data_end;
151bd4aed0eSJiong Wang 	void *data     = (void *)(long)ctx->data;
152bd4aed0eSJiong Wang 	struct parse_pkt pkt = { 0 };
153bd4aed0eSJiong Wang 
154bd4aed0eSJiong Wang 	if (!parse_eth_frame(data, data_end, &pkt))
155bd4aed0eSJiong Wang 		return XDP_ABORTED;
156bd4aed0eSJiong Wang 
157bd4aed0eSJiong Wang 	/* Change specific VLAN ID */
158bd4aed0eSJiong Wang 	if (pkt.vlan_outer == TESTVLAN) {
159bd4aed0eSJiong Wang 		struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
160bd4aed0eSJiong Wang 
161bd4aed0eSJiong Wang 		/* Modifying VLAN, preserve top 4 bits */
162bd4aed0eSJiong Wang 		vlan_hdr->h_vlan_TCI =
163bd4aed0eSJiong Wang 			bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
164bd4aed0eSJiong Wang 				  | TO_VLAN);
165bd4aed0eSJiong Wang 	}
166bd4aed0eSJiong Wang 
167bd4aed0eSJiong Wang 	return XDP_PASS;
168bd4aed0eSJiong Wang }
169bd4aed0eSJiong Wang 
170bd4aed0eSJiong Wang /*
171bd4aed0eSJiong Wang  * Show XDP+TC can cooperate, on creating a VLAN rewriter.
172bd4aed0eSJiong Wang  * 1. Create a XDP prog that can "pop"/remove a VLAN header.
173bd4aed0eSJiong Wang  * 2. Create a TC-bpf prog that egress can add a VLAN header.
174bd4aed0eSJiong Wang  */
175bd4aed0eSJiong Wang 
176bd4aed0eSJiong Wang #ifndef ETH_ALEN /* Ethernet MAC address length */
177bd4aed0eSJiong Wang #define ETH_ALEN	6	/* bytes */
178bd4aed0eSJiong Wang #endif
179bd4aed0eSJiong Wang #define VLAN_HDR_SZ	4	/* bytes */
180bd4aed0eSJiong Wang 
181bd4aed0eSJiong Wang SEC("xdp_vlan_remove_outer")
xdp_prognum2(struct xdp_md * ctx)182bd4aed0eSJiong Wang int  xdp_prognum2(struct xdp_md *ctx)
183bd4aed0eSJiong Wang {
184bd4aed0eSJiong Wang 	void *data_end = (void *)(long)ctx->data_end;
185bd4aed0eSJiong Wang 	void *data     = (void *)(long)ctx->data;
186bd4aed0eSJiong Wang 	struct parse_pkt pkt = { 0 };
187bd4aed0eSJiong Wang 	char *dest;
188bd4aed0eSJiong Wang 
189bd4aed0eSJiong Wang 	if (!parse_eth_frame(data, data_end, &pkt))
190bd4aed0eSJiong Wang 		return XDP_ABORTED;
191bd4aed0eSJiong Wang 
192bd4aed0eSJiong Wang 	/* Skip packet if no outer VLAN was detected */
193bd4aed0eSJiong Wang 	if (pkt.vlan_outer_offset == 0)
194bd4aed0eSJiong Wang 		return XDP_PASS;
195bd4aed0eSJiong Wang 
196bd4aed0eSJiong Wang 	/* Moving Ethernet header, dest overlap with src, memmove handle this */
197bd4aed0eSJiong Wang 	dest = data;
198bd4aed0eSJiong Wang 	dest += VLAN_HDR_SZ;
199bd4aed0eSJiong Wang 	/*
200bd4aed0eSJiong Wang 	 * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
201bd4aed0eSJiong Wang 	 * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
202bd4aed0eSJiong Wang 	 */
203bd4aed0eSJiong Wang 	__builtin_memmove(dest, data, ETH_ALEN * 2);
204bd4aed0eSJiong Wang 	/* Note: LLVM built-in memmove inlining require size to be constant */
205bd4aed0eSJiong Wang 
206bd4aed0eSJiong Wang 	/* Move start of packet header seen by Linux kernel stack */
207bd4aed0eSJiong Wang 	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
208bd4aed0eSJiong Wang 
209bd4aed0eSJiong Wang 	return XDP_PASS;
210bd4aed0eSJiong Wang }
211bd4aed0eSJiong Wang 
212bd4aed0eSJiong Wang static __always_inline
shift_mac_4bytes_32bit(void * data)213bd4aed0eSJiong Wang void shift_mac_4bytes_32bit(void *data)
214bd4aed0eSJiong Wang {
215bd4aed0eSJiong Wang 	__u32 *p = data;
216bd4aed0eSJiong Wang 
217bd4aed0eSJiong Wang 	/* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
218bd4aed0eSJiong Wang 	 * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
219bd4aed0eSJiong Wang 	 * The vlan_hdr->h_vlan_encapsulated_proto take over role as
220bd4aed0eSJiong Wang 	 * ethhdr->h_proto.
221bd4aed0eSJiong Wang 	 */
222bd4aed0eSJiong Wang 	p[3] = p[2];
223bd4aed0eSJiong Wang 	p[2] = p[1];
224bd4aed0eSJiong Wang 	p[1] = p[0];
225bd4aed0eSJiong Wang }
226bd4aed0eSJiong Wang 
227bd4aed0eSJiong Wang SEC("xdp_vlan_remove_outer2")
xdp_prognum3(struct xdp_md * ctx)228bd4aed0eSJiong Wang int  xdp_prognum3(struct xdp_md *ctx)
229bd4aed0eSJiong Wang {
230bd4aed0eSJiong Wang 	void *data_end = (void *)(long)ctx->data_end;
231bd4aed0eSJiong Wang 	void *data     = (void *)(long)ctx->data;
232bd4aed0eSJiong Wang 	struct ethhdr *orig_eth = data;
233bd4aed0eSJiong Wang 	struct parse_pkt pkt = { 0 };
234bd4aed0eSJiong Wang 
235bd4aed0eSJiong Wang 	if (!parse_eth_frame(orig_eth, data_end, &pkt))
236bd4aed0eSJiong Wang 		return XDP_ABORTED;
237bd4aed0eSJiong Wang 
238bd4aed0eSJiong Wang 	/* Skip packet if no outer VLAN was detected */
239bd4aed0eSJiong Wang 	if (pkt.vlan_outer_offset == 0)
240bd4aed0eSJiong Wang 		return XDP_PASS;
241bd4aed0eSJiong Wang 
242bd4aed0eSJiong Wang 	/* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
243bd4aed0eSJiong Wang 	shift_mac_4bytes_32bit(data);
244bd4aed0eSJiong Wang 
245bd4aed0eSJiong Wang 	/* Move start of packet header seen by Linux kernel stack */
246bd4aed0eSJiong Wang 	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
247bd4aed0eSJiong Wang 
248bd4aed0eSJiong Wang 	return XDP_PASS;
249bd4aed0eSJiong Wang }
250bd4aed0eSJiong Wang 
251bd4aed0eSJiong Wang /*=====================================
252bd4aed0eSJiong Wang  *  BELOW: TC-hook based ebpf programs
253bd4aed0eSJiong Wang  * ====================================
254bd4aed0eSJiong Wang  * The TC-clsact eBPF programs (currently) need to be attach via TC commands
255bd4aed0eSJiong Wang  */
256bd4aed0eSJiong Wang 
257bd4aed0eSJiong Wang SEC("tc_vlan_push")
_tc_progA(struct __sk_buff * ctx)258bd4aed0eSJiong Wang int _tc_progA(struct __sk_buff *ctx)
259bd4aed0eSJiong Wang {
260bd4aed0eSJiong Wang 	bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
261bd4aed0eSJiong Wang 
262bd4aed0eSJiong Wang 	return TC_ACT_OK;
263bd4aed0eSJiong Wang }
264bd4aed0eSJiong Wang /*
265bd4aed0eSJiong Wang Commands to setup TC to use above bpf prog:
266bd4aed0eSJiong Wang 
267bd4aed0eSJiong Wang export ROOTDEV=ixgbe2
268bd4aed0eSJiong Wang export FILE=xdp_vlan01_kern.o
269bd4aed0eSJiong Wang 
270bd4aed0eSJiong Wang # Re-attach clsact to clear/flush existing role
271bd4aed0eSJiong Wang tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
272bd4aed0eSJiong Wang tc qdisc add dev $ROOTDEV clsact
273bd4aed0eSJiong Wang 
274bd4aed0eSJiong Wang # Attach BPF prog EGRESS
275bd4aed0eSJiong Wang tc filter add dev $ROOTDEV egress \
276bd4aed0eSJiong Wang   prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
277bd4aed0eSJiong Wang 
278bd4aed0eSJiong Wang tc filter show dev $ROOTDEV egress
279bd4aed0eSJiong Wang */
280