1 #include <stddef.h> 2 #include <inttypes.h> 3 #include <errno.h> 4 #include <linux/seg6_local.h> 5 #include <linux/bpf.h> 6 #include "bpf_helpers.h" 7 #include "bpf_endian.h" 8 9 #define bpf_printk(fmt, ...) \ 10 ({ \ 11 char ____fmt[] = fmt; \ 12 bpf_trace_printk(____fmt, sizeof(____fmt), \ 13 ##__VA_ARGS__); \ 14 }) 15 16 /* Packet parsing state machine helpers. */ 17 #define cursor_advance(_cursor, _len) \ 18 ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) 19 20 #define SR6_FLAG_ALERT (1 << 4) 21 22 #define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ 23 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) 24 #define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ 25 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) 26 #define BPF_PACKET_HEADER __attribute__((packed)) 27 28 struct ip6_t { 29 unsigned int ver:4; 30 unsigned int priority:8; 31 unsigned int flow_label:20; 32 unsigned short payload_len; 33 unsigned char next_header; 34 unsigned char hop_limit; 35 unsigned long long src_hi; 36 unsigned long long src_lo; 37 unsigned long long dst_hi; 38 unsigned long long dst_lo; 39 } BPF_PACKET_HEADER; 40 41 struct ip6_addr_t { 42 unsigned long long hi; 43 unsigned long long lo; 44 } BPF_PACKET_HEADER; 45 46 struct ip6_srh_t { 47 unsigned char nexthdr; 48 unsigned char hdrlen; 49 unsigned char type; 50 unsigned char segments_left; 51 unsigned char first_segment; 52 unsigned char flags; 53 unsigned short tag; 54 55 struct ip6_addr_t segments[0]; 56 } BPF_PACKET_HEADER; 57 58 struct sr6_tlv_t { 59 unsigned char type; 60 unsigned char len; 61 unsigned char value[0]; 62 } BPF_PACKET_HEADER; 63 64 __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) 65 { 66 void *cursor, *data_end; 67 struct ip6_srh_t *srh; 68 struct ip6_t *ip; 69 uint8_t *ipver; 70 71 data_end = (void *)(long)skb->data_end; 72 cursor = (void *)(long)skb->data; 73 ipver = (uint8_t *)cursor; 74 75 if ((void *)ipver + sizeof(*ipver) > data_end) 76 return NULL; 77 78 if ((*ipver >> 4) != 6) 79 return NULL; 80 81 ip = cursor_advance(cursor, sizeof(*ip)); 82 if ((void *)ip + sizeof(*ip) > data_end) 83 return NULL; 84 85 if (ip->next_header != 43) 86 return NULL; 87 88 srh = cursor_advance(cursor, sizeof(*srh)); 89 if ((void *)srh + sizeof(*srh) > data_end) 90 return NULL; 91 92 if (srh->type != 4) 93 return NULL; 94 95 return srh; 96 } 97 98 __attribute__((always_inline)) 99 int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, 100 uint32_t old_pad, uint32_t pad_off) 101 { 102 int err; 103 104 if (new_pad != old_pad) { 105 err = bpf_lwt_seg6_adjust_srh(skb, pad_off, 106 (int) new_pad - (int) old_pad); 107 if (err) 108 return err; 109 } 110 111 if (new_pad > 0) { 112 char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113 0, 0, 0}; 114 struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; 115 116 pad_tlv->type = SR6_TLV_PADDING; 117 pad_tlv->len = new_pad - 2; 118 119 err = bpf_lwt_seg6_store_bytes(skb, pad_off, 120 (void *)pad_tlv_buf, new_pad); 121 if (err) 122 return err; 123 } 124 125 return 0; 126 } 127 128 __attribute__((always_inline)) 129 int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, 130 uint32_t *tlv_off, uint32_t *pad_size, 131 uint32_t *pad_off) 132 { 133 uint32_t srh_off, cur_off; 134 int offset_valid = 0; 135 int err; 136 137 srh_off = (char *)srh - (char *)(long)skb->data; 138 // cur_off = end of segments, start of possible TLVs 139 cur_off = srh_off + sizeof(*srh) + 140 sizeof(struct ip6_addr_t) * (srh->first_segment + 1); 141 142 *pad_off = 0; 143 144 // we can only go as far as ~10 TLVs due to the BPF max stack size 145 #pragma clang loop unroll(full) 146 for (int i = 0; i < 10; i++) { 147 struct sr6_tlv_t tlv; 148 149 if (cur_off == *tlv_off) 150 offset_valid = 1; 151 152 if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) 153 break; 154 155 err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); 156 if (err) 157 return err; 158 159 if (tlv.type == SR6_TLV_PADDING) { 160 *pad_size = tlv.len + sizeof(tlv); 161 *pad_off = cur_off; 162 163 if (*tlv_off == srh_off) { 164 *tlv_off = cur_off; 165 offset_valid = 1; 166 } 167 break; 168 169 } else if (tlv.type == SR6_TLV_HMAC) { 170 break; 171 } 172 173 cur_off += sizeof(tlv) + tlv.len; 174 } // we reached the padding or HMAC TLVs, or the end of the SRH 175 176 if (*pad_off == 0) 177 *pad_off = cur_off; 178 179 if (*tlv_off == -1) 180 *tlv_off = cur_off; 181 else if (!offset_valid) 182 return -EINVAL; 183 184 return 0; 185 } 186 187 __attribute__((always_inline)) 188 int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, 189 struct sr6_tlv_t *itlv, uint8_t tlv_size) 190 { 191 uint32_t srh_off = (char *)srh - (char *)(long)skb->data; 192 uint8_t len_remaining, new_pad; 193 uint32_t pad_off = 0; 194 uint32_t pad_size = 0; 195 uint32_t partial_srh_len; 196 int err; 197 198 if (tlv_off != -1) 199 tlv_off += srh_off; 200 201 if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) 202 return -EINVAL; 203 204 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); 205 if (err) 206 return err; 207 208 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); 209 if (err) 210 return err; 211 212 err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); 213 if (err) 214 return err; 215 216 // the following can't be moved inside update_tlv_pad because the 217 // bpf verifier has some issues with it 218 pad_off += sizeof(*itlv) + itlv->len; 219 partial_srh_len = pad_off - srh_off; 220 len_remaining = partial_srh_len % 8; 221 new_pad = 8 - len_remaining; 222 223 if (new_pad == 1) // cannot pad for 1 byte only 224 new_pad = 9; 225 else if (new_pad == 8) 226 new_pad = 0; 227 228 return update_tlv_pad(skb, new_pad, pad_size, pad_off); 229 } 230 231 __attribute__((always_inline)) 232 int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, 233 uint32_t tlv_off) 234 { 235 uint32_t srh_off = (char *)srh - (char *)(long)skb->data; 236 uint8_t len_remaining, new_pad; 237 uint32_t partial_srh_len; 238 uint32_t pad_off = 0; 239 uint32_t pad_size = 0; 240 struct sr6_tlv_t tlv; 241 int err; 242 243 tlv_off += srh_off; 244 245 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); 246 if (err) 247 return err; 248 249 err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv)); 250 if (err) 251 return err; 252 253 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len)); 254 if (err) 255 return err; 256 257 pad_off -= sizeof(tlv) + tlv.len; 258 partial_srh_len = pad_off - srh_off; 259 len_remaining = partial_srh_len % 8; 260 new_pad = 8 - len_remaining; 261 if (new_pad == 1) // cannot pad for 1 byte only 262 new_pad = 9; 263 else if (new_pad == 8) 264 new_pad = 0; 265 266 return update_tlv_pad(skb, new_pad, pad_size, pad_off); 267 } 268 269 __attribute__((always_inline)) 270 int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) 271 { 272 int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + 273 ((srh->first_segment + 1) << 4); 274 struct sr6_tlv_t tlv; 275 276 if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t))) 277 return 0; 278 279 if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) { 280 struct ip6_addr_t egr_addr; 281 282 if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16)) 283 return 0; 284 285 // check if egress TLV value is correct 286 if (ntohll(egr_addr.hi) == 0xfd00000000000000 && 287 ntohll(egr_addr.lo) == 0x4) 288 return 1; 289 } 290 291 return 0; 292 } 293 294 // This function will push a SRH with segments fd00::1, fd00::2, fd00::3, 295 // fd00::4 296 SEC("encap_srh") 297 int __encap_srh(struct __sk_buff *skb) 298 { 299 unsigned long long hi = 0xfd00000000000000; 300 struct ip6_addr_t *seg; 301 struct ip6_srh_t *srh; 302 char srh_buf[72]; // room for 4 segments 303 int err; 304 305 srh = (struct ip6_srh_t *)srh_buf; 306 srh->nexthdr = 0; 307 srh->hdrlen = 8; 308 srh->type = 4; 309 srh->segments_left = 3; 310 srh->first_segment = 3; 311 srh->flags = 0; 312 srh->tag = 0; 313 314 seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); 315 316 #pragma clang loop unroll(full) 317 for (unsigned long long lo = 0; lo < 4; lo++) { 318 seg->lo = htonll(4 - lo); 319 seg->hi = htonll(hi); 320 seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); 321 } 322 323 err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf)); 324 if (err) 325 return BPF_DROP; 326 327 return BPF_REDIRECT; 328 } 329 330 // Add an Egress TLV fc00::4, add the flag A, 331 // and apply End.X action to fc42::1 332 SEC("add_egr_x") 333 int __add_egr_x(struct __sk_buff *skb) 334 { 335 unsigned long long hi = 0xfc42000000000000; 336 unsigned long long lo = 0x1; 337 struct ip6_srh_t *srh = get_srh(skb); 338 uint8_t new_flags = SR6_FLAG_ALERT; 339 struct ip6_addr_t addr; 340 int err, offset; 341 342 if (srh == NULL) 343 return BPF_DROP; 344 345 uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 346 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; 347 348 err = add_tlv(skb, srh, (srh->hdrlen+1) << 3, 349 (struct sr6_tlv_t *)&tlv, 20); 350 if (err) 351 return BPF_DROP; 352 353 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); 354 err = bpf_lwt_seg6_store_bytes(skb, offset, 355 (void *)&new_flags, sizeof(new_flags)); 356 if (err) 357 return BPF_DROP; 358 359 addr.lo = htonll(lo); 360 addr.hi = htonll(hi); 361 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, 362 (void *)&addr, sizeof(addr)); 363 if (err) 364 return BPF_DROP; 365 return BPF_REDIRECT; 366 } 367 368 // Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a 369 // simple End action 370 SEC("pop_egr") 371 int __pop_egr(struct __sk_buff *skb) 372 { 373 struct ip6_srh_t *srh = get_srh(skb); 374 uint16_t new_tag = bpf_htons(2442); 375 uint8_t new_flags = 0; 376 int err, offset; 377 378 if (srh == NULL) 379 return BPF_DROP; 380 381 if (srh->flags != SR6_FLAG_ALERT) 382 return BPF_DROP; 383 384 if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV 385 return BPF_DROP; 386 387 if (!has_egr_tlv(skb, srh)) 388 return BPF_DROP; 389 390 err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16); 391 if (err) 392 return BPF_DROP; 393 394 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); 395 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags, 396 sizeof(new_flags))) 397 return BPF_DROP; 398 399 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag); 400 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag, 401 sizeof(new_tag))) 402 return BPF_DROP; 403 404 return BPF_OK; 405 } 406 407 // Inspect if the Egress TLV and flag have been removed, if the tag is correct, 408 // then apply a End.T action to reach the last segment 409 SEC("inspect_t") 410 int __inspect_t(struct __sk_buff *skb) 411 { 412 struct ip6_srh_t *srh = get_srh(skb); 413 int table = 117; 414 int err; 415 416 if (srh == NULL) 417 return BPF_DROP; 418 419 if (srh->flags != 0) 420 return BPF_DROP; 421 422 if (srh->tag != bpf_htons(2442)) 423 return BPF_DROP; 424 425 if (srh->hdrlen != 8) // 4 segments 426 return BPF_DROP; 427 428 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T, 429 (void *)&table, sizeof(table)); 430 431 if (err) 432 return BPF_DROP; 433 434 return BPF_REDIRECT; 435 } 436 437 char __license[] SEC("license") = "GPL"; 438