1 #include <stddef.h>
2 #include <inttypes.h>
3 #include <errno.h>
4 #include <linux/seg6_local.h>
5 #include <linux/bpf.h>
6 #include <bpf/bpf_helpers.h>
7 #include <bpf/bpf_endian.h>
8 
9 /* Packet parsing state machine helpers. */
10 #define cursor_advance(_cursor, _len) \
11 	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
12 
13 #define SR6_FLAG_ALERT (1 << 4)
14 
15 #define BPF_PACKET_HEADER __attribute__((packed))
16 
17 struct ip6_t {
18 	unsigned int ver:4;
19 	unsigned int priority:8;
20 	unsigned int flow_label:20;
21 	unsigned short payload_len;
22 	unsigned char next_header;
23 	unsigned char hop_limit;
24 	unsigned long long src_hi;
25 	unsigned long long src_lo;
26 	unsigned long long dst_hi;
27 	unsigned long long dst_lo;
28 } BPF_PACKET_HEADER;
29 
30 struct ip6_addr_t {
31 	unsigned long long hi;
32 	unsigned long long lo;
33 } BPF_PACKET_HEADER;
34 
35 struct ip6_srh_t {
36 	unsigned char nexthdr;
37 	unsigned char hdrlen;
38 	unsigned char type;
39 	unsigned char segments_left;
40 	unsigned char first_segment;
41 	unsigned char flags;
42 	unsigned short tag;
43 
44 	struct ip6_addr_t segments[0];
45 } BPF_PACKET_HEADER;
46 
47 struct sr6_tlv_t {
48 	unsigned char type;
49 	unsigned char len;
50 	unsigned char value[0];
51 } BPF_PACKET_HEADER;
52 
get_srh(struct __sk_buff * skb)53 static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
54 {
55 	void *cursor, *data_end;
56 	struct ip6_srh_t *srh;
57 	struct ip6_t *ip;
58 	uint8_t *ipver;
59 
60 	data_end = (void *)(long)skb->data_end;
61 	cursor = (void *)(long)skb->data;
62 	ipver = (uint8_t *)cursor;
63 
64 	if ((void *)ipver + sizeof(*ipver) > data_end)
65 		return NULL;
66 
67 	if ((*ipver >> 4) != 6)
68 		return NULL;
69 
70 	ip = cursor_advance(cursor, sizeof(*ip));
71 	if ((void *)ip + sizeof(*ip) > data_end)
72 		return NULL;
73 
74 	if (ip->next_header != 43)
75 		return NULL;
76 
77 	srh = cursor_advance(cursor, sizeof(*srh));
78 	if ((void *)srh + sizeof(*srh) > data_end)
79 		return NULL;
80 
81 	if (srh->type != 4)
82 		return NULL;
83 
84 	return srh;
85 }
86 
87 static __always_inline
update_tlv_pad(struct __sk_buff * skb,uint32_t new_pad,uint32_t old_pad,uint32_t pad_off)88 int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
89 		   uint32_t old_pad, uint32_t pad_off)
90 {
91 	int err;
92 
93 	if (new_pad != old_pad) {
94 		err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
95 					  (int) new_pad - (int) old_pad);
96 		if (err)
97 			return err;
98 	}
99 
100 	if (new_pad > 0) {
101 		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 					0, 0, 0};
103 		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
104 
105 		pad_tlv->type = SR6_TLV_PADDING;
106 		pad_tlv->len = new_pad - 2;
107 
108 		err = bpf_lwt_seg6_store_bytes(skb, pad_off,
109 					       (void *)pad_tlv_buf, new_pad);
110 		if (err)
111 			return err;
112 	}
113 
114 	return 0;
115 }
116 
117 static __always_inline
is_valid_tlv_boundary(struct __sk_buff * skb,struct ip6_srh_t * srh,uint32_t * tlv_off,uint32_t * pad_size,uint32_t * pad_off)118 int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
119 			  uint32_t *tlv_off, uint32_t *pad_size,
120 			  uint32_t *pad_off)
121 {
122 	uint32_t srh_off, cur_off;
123 	int offset_valid = 0;
124 	int err;
125 
126 	srh_off = (char *)srh - (char *)(long)skb->data;
127 	// cur_off = end of segments, start of possible TLVs
128 	cur_off = srh_off + sizeof(*srh) +
129 		sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
130 
131 	*pad_off = 0;
132 
133 	// we can only go as far as ~10 TLVs due to the BPF max stack size
134 	#pragma clang loop unroll(full)
135 	for (int i = 0; i < 10; i++) {
136 		struct sr6_tlv_t tlv;
137 
138 		if (cur_off == *tlv_off)
139 			offset_valid = 1;
140 
141 		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
142 			break;
143 
144 		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
145 		if (err)
146 			return err;
147 
148 		if (tlv.type == SR6_TLV_PADDING) {
149 			*pad_size = tlv.len + sizeof(tlv);
150 			*pad_off = cur_off;
151 
152 			if (*tlv_off == srh_off) {
153 				*tlv_off = cur_off;
154 				offset_valid = 1;
155 			}
156 			break;
157 
158 		} else if (tlv.type == SR6_TLV_HMAC) {
159 			break;
160 		}
161 
162 		cur_off += sizeof(tlv) + tlv.len;
163 	} // we reached the padding or HMAC TLVs, or the end of the SRH
164 
165 	if (*pad_off == 0)
166 		*pad_off = cur_off;
167 
168 	if (*tlv_off == -1)
169 		*tlv_off = cur_off;
170 	else if (!offset_valid)
171 		return -EINVAL;
172 
173 	return 0;
174 }
175 
176 static __always_inline
add_tlv(struct __sk_buff * skb,struct ip6_srh_t * srh,uint32_t tlv_off,struct sr6_tlv_t * itlv,uint8_t tlv_size)177 int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
178 	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
179 {
180 	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
181 	uint8_t len_remaining, new_pad;
182 	uint32_t pad_off = 0;
183 	uint32_t pad_size = 0;
184 	uint32_t partial_srh_len;
185 	int err;
186 
187 	if (tlv_off != -1)
188 		tlv_off += srh_off;
189 
190 	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
191 		return -EINVAL;
192 
193 	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
194 	if (err)
195 		return err;
196 
197 	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
198 	if (err)
199 		return err;
200 
201 	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
202 	if (err)
203 		return err;
204 
205 	// the following can't be moved inside update_tlv_pad because the
206 	// bpf verifier has some issues with it
207 	pad_off += sizeof(*itlv) + itlv->len;
208 	partial_srh_len = pad_off - srh_off;
209 	len_remaining = partial_srh_len % 8;
210 	new_pad = 8 - len_remaining;
211 
212 	if (new_pad == 1) // cannot pad for 1 byte only
213 		new_pad = 9;
214 	else if (new_pad == 8)
215 		new_pad = 0;
216 
217 	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
218 }
219 
220 static __always_inline
delete_tlv(struct __sk_buff * skb,struct ip6_srh_t * srh,uint32_t tlv_off)221 int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
222 	       uint32_t tlv_off)
223 {
224 	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
225 	uint8_t len_remaining, new_pad;
226 	uint32_t partial_srh_len;
227 	uint32_t pad_off = 0;
228 	uint32_t pad_size = 0;
229 	struct sr6_tlv_t tlv;
230 	int err;
231 
232 	tlv_off += srh_off;
233 
234 	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
235 	if (err)
236 		return err;
237 
238 	err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
239 	if (err)
240 		return err;
241 
242 	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
243 	if (err)
244 		return err;
245 
246 	pad_off -= sizeof(tlv) + tlv.len;
247 	partial_srh_len = pad_off - srh_off;
248 	len_remaining = partial_srh_len % 8;
249 	new_pad = 8 - len_remaining;
250 	if (new_pad == 1) // cannot pad for 1 byte only
251 		new_pad = 9;
252 	else if (new_pad == 8)
253 		new_pad = 0;
254 
255 	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
256 }
257 
258 static __always_inline
has_egr_tlv(struct __sk_buff * skb,struct ip6_srh_t * srh)259 int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
260 {
261 	int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
262 		((srh->first_segment + 1) << 4);
263 	struct sr6_tlv_t tlv;
264 
265 	if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
266 		return 0;
267 
268 	if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
269 		struct ip6_addr_t egr_addr;
270 
271 		if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
272 			return 0;
273 
274 		// check if egress TLV value is correct
275 		if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 &&
276 		    bpf_be64_to_cpu(egr_addr.lo) == 0x4)
277 			return 1;
278 	}
279 
280 	return 0;
281 }
282 
283 // This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
284 // fd00::4
285 SEC("encap_srh")
__encap_srh(struct __sk_buff * skb)286 int __encap_srh(struct __sk_buff *skb)
287 {
288 	unsigned long long hi = 0xfd00000000000000;
289 	struct ip6_addr_t *seg;
290 	struct ip6_srh_t *srh;
291 	char srh_buf[72]; // room for 4 segments
292 	int err;
293 
294 	srh = (struct ip6_srh_t *)srh_buf;
295 	srh->nexthdr = 0;
296 	srh->hdrlen = 8;
297 	srh->type = 4;
298 	srh->segments_left = 3;
299 	srh->first_segment = 3;
300 	srh->flags = 0;
301 	srh->tag = 0;
302 
303 	seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
304 
305 	#pragma clang loop unroll(full)
306 	for (unsigned long long lo = 0; lo < 4; lo++) {
307 		seg->lo = bpf_cpu_to_be64(4 - lo);
308 		seg->hi = bpf_cpu_to_be64(hi);
309 		seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
310 	}
311 
312 	err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
313 	if (err)
314 		return BPF_DROP;
315 
316 	return BPF_REDIRECT;
317 }
318 
319 // Add an Egress TLV fc00::4, add the flag A,
320 // and apply End.X action to fc42::1
321 SEC("add_egr_x")
__add_egr_x(struct __sk_buff * skb)322 int __add_egr_x(struct __sk_buff *skb)
323 {
324 	unsigned long long hi = 0xfc42000000000000;
325 	unsigned long long lo = 0x1;
326 	struct ip6_srh_t *srh = get_srh(skb);
327 	uint8_t new_flags = SR6_FLAG_ALERT;
328 	struct ip6_addr_t addr;
329 	int err, offset;
330 
331 	if (srh == NULL)
332 		return BPF_DROP;
333 
334 	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
335 			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
336 
337 	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
338 		      (struct sr6_tlv_t *)&tlv, 20);
339 	if (err)
340 		return BPF_DROP;
341 
342 	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
343 	err = bpf_lwt_seg6_store_bytes(skb, offset,
344 				       (void *)&new_flags, sizeof(new_flags));
345 	if (err)
346 		return BPF_DROP;
347 
348 	addr.lo = bpf_cpu_to_be64(lo);
349 	addr.hi = bpf_cpu_to_be64(hi);
350 	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
351 				  (void *)&addr, sizeof(addr));
352 	if (err)
353 		return BPF_DROP;
354 	return BPF_REDIRECT;
355 }
356 
357 // Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
358 // simple End action
359 SEC("pop_egr")
__pop_egr(struct __sk_buff * skb)360 int __pop_egr(struct __sk_buff *skb)
361 {
362 	struct ip6_srh_t *srh = get_srh(skb);
363 	uint16_t new_tag = bpf_htons(2442);
364 	uint8_t new_flags = 0;
365 	int err, offset;
366 
367 	if (srh == NULL)
368 		return BPF_DROP;
369 
370 	if (srh->flags != SR6_FLAG_ALERT)
371 		return BPF_DROP;
372 
373 	if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
374 		return BPF_DROP;
375 
376 	if (!has_egr_tlv(skb, srh))
377 		return BPF_DROP;
378 
379 	err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
380 	if (err)
381 		return BPF_DROP;
382 
383 	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
384 	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
385 				     sizeof(new_flags)))
386 		return BPF_DROP;
387 
388 	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
389 	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
390 				     sizeof(new_tag)))
391 		return BPF_DROP;
392 
393 	return BPF_OK;
394 }
395 
396 // Inspect if the Egress TLV and flag have been removed, if the tag is correct,
397 // then apply a End.T action to reach the last segment
398 SEC("inspect_t")
__inspect_t(struct __sk_buff * skb)399 int __inspect_t(struct __sk_buff *skb)
400 {
401 	struct ip6_srh_t *srh = get_srh(skb);
402 	int table = 117;
403 	int err;
404 
405 	if (srh == NULL)
406 		return BPF_DROP;
407 
408 	if (srh->flags != 0)
409 		return BPF_DROP;
410 
411 	if (srh->tag != bpf_htons(2442))
412 		return BPF_DROP;
413 
414 	if (srh->hdrlen != 8) // 4 segments
415 		return BPF_DROP;
416 
417 	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
418 				  (void *)&table, sizeof(table));
419 
420 	if (err)
421 		return BPF_DROP;
422 
423 	return BPF_REDIRECT;
424 }
425 
426 char __license[] SEC("license") = "GPL";
427