1 #include <stddef.h> 2 #include <inttypes.h> 3 #include <errno.h> 4 #include <linux/seg6_local.h> 5 #include <linux/bpf.h> 6 #include "bpf_helpers.h" 7 #include "bpf_endian.h" 8 9 /* Packet parsing state machine helpers. */ 10 #define cursor_advance(_cursor, _len) \ 11 ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) 12 13 #define SR6_FLAG_ALERT (1 << 4) 14 15 #define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ 16 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) 17 #define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ 18 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) 19 #define BPF_PACKET_HEADER __attribute__((packed)) 20 21 struct ip6_t { 22 unsigned int ver:4; 23 unsigned int priority:8; 24 unsigned int flow_label:20; 25 unsigned short payload_len; 26 unsigned char next_header; 27 unsigned char hop_limit; 28 unsigned long long src_hi; 29 unsigned long long src_lo; 30 unsigned long long dst_hi; 31 unsigned long long dst_lo; 32 } BPF_PACKET_HEADER; 33 34 struct ip6_addr_t { 35 unsigned long long hi; 36 unsigned long long lo; 37 } BPF_PACKET_HEADER; 38 39 struct ip6_srh_t { 40 unsigned char nexthdr; 41 unsigned char hdrlen; 42 unsigned char type; 43 unsigned char segments_left; 44 unsigned char first_segment; 45 unsigned char flags; 46 unsigned short tag; 47 48 struct ip6_addr_t segments[0]; 49 } BPF_PACKET_HEADER; 50 51 struct sr6_tlv_t { 52 unsigned char type; 53 unsigned char len; 54 unsigned char value[0]; 55 } BPF_PACKET_HEADER; 56 57 static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb) 58 { 59 void *cursor, *data_end; 60 struct ip6_srh_t *srh; 61 struct ip6_t *ip; 62 uint8_t *ipver; 63 64 data_end = (void *)(long)skb->data_end; 65 cursor = (void *)(long)skb->data; 66 ipver = (uint8_t *)cursor; 67 68 if ((void *)ipver + sizeof(*ipver) > data_end) 69 return NULL; 70 71 if ((*ipver >> 4) != 6) 72 return NULL; 73 74 ip = cursor_advance(cursor, sizeof(*ip)); 75 if ((void *)ip + sizeof(*ip) > data_end) 76 return NULL; 77 78 if (ip->next_header != 43) 79 return NULL; 80 81 srh = cursor_advance(cursor, sizeof(*srh)); 82 if ((void *)srh + sizeof(*srh) > data_end) 83 return NULL; 84 85 if (srh->type != 4) 86 return NULL; 87 88 return srh; 89 } 90 91 static __always_inline 92 int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, 93 uint32_t old_pad, uint32_t pad_off) 94 { 95 int err; 96 97 if (new_pad != old_pad) { 98 err = bpf_lwt_seg6_adjust_srh(skb, pad_off, 99 (int) new_pad - (int) old_pad); 100 if (err) 101 return err; 102 } 103 104 if (new_pad > 0) { 105 char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 0, 0}; 107 struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; 108 109 pad_tlv->type = SR6_TLV_PADDING; 110 pad_tlv->len = new_pad - 2; 111 112 err = bpf_lwt_seg6_store_bytes(skb, pad_off, 113 (void *)pad_tlv_buf, new_pad); 114 if (err) 115 return err; 116 } 117 118 return 0; 119 } 120 121 static __always_inline 122 int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, 123 uint32_t *tlv_off, uint32_t *pad_size, 124 uint32_t *pad_off) 125 { 126 uint32_t srh_off, cur_off; 127 int offset_valid = 0; 128 int err; 129 130 srh_off = (char *)srh - (char *)(long)skb->data; 131 // cur_off = end of segments, start of possible TLVs 132 cur_off = srh_off + sizeof(*srh) + 133 sizeof(struct ip6_addr_t) * (srh->first_segment + 1); 134 135 *pad_off = 0; 136 137 // we can only go as far as ~10 TLVs due to the BPF max stack size 138 #pragma clang loop unroll(full) 139 for (int i = 0; i < 10; i++) { 140 struct sr6_tlv_t tlv; 141 142 if (cur_off == *tlv_off) 143 offset_valid = 1; 144 145 if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) 146 break; 147 148 err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); 149 if (err) 150 return err; 151 152 if (tlv.type == SR6_TLV_PADDING) { 153 *pad_size = tlv.len + sizeof(tlv); 154 *pad_off = cur_off; 155 156 if (*tlv_off == srh_off) { 157 *tlv_off = cur_off; 158 offset_valid = 1; 159 } 160 break; 161 162 } else if (tlv.type == SR6_TLV_HMAC) { 163 break; 164 } 165 166 cur_off += sizeof(tlv) + tlv.len; 167 } // we reached the padding or HMAC TLVs, or the end of the SRH 168 169 if (*pad_off == 0) 170 *pad_off = cur_off; 171 172 if (*tlv_off == -1) 173 *tlv_off = cur_off; 174 else if (!offset_valid) 175 return -EINVAL; 176 177 return 0; 178 } 179 180 static __always_inline 181 int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, 182 struct sr6_tlv_t *itlv, uint8_t tlv_size) 183 { 184 uint32_t srh_off = (char *)srh - (char *)(long)skb->data; 185 uint8_t len_remaining, new_pad; 186 uint32_t pad_off = 0; 187 uint32_t pad_size = 0; 188 uint32_t partial_srh_len; 189 int err; 190 191 if (tlv_off != -1) 192 tlv_off += srh_off; 193 194 if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) 195 return -EINVAL; 196 197 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); 198 if (err) 199 return err; 200 201 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); 202 if (err) 203 return err; 204 205 err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); 206 if (err) 207 return err; 208 209 // the following can't be moved inside update_tlv_pad because the 210 // bpf verifier has some issues with it 211 pad_off += sizeof(*itlv) + itlv->len; 212 partial_srh_len = pad_off - srh_off; 213 len_remaining = partial_srh_len % 8; 214 new_pad = 8 - len_remaining; 215 216 if (new_pad == 1) // cannot pad for 1 byte only 217 new_pad = 9; 218 else if (new_pad == 8) 219 new_pad = 0; 220 221 return update_tlv_pad(skb, new_pad, pad_size, pad_off); 222 } 223 224 static __always_inline 225 int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, 226 uint32_t tlv_off) 227 { 228 uint32_t srh_off = (char *)srh - (char *)(long)skb->data; 229 uint8_t len_remaining, new_pad; 230 uint32_t partial_srh_len; 231 uint32_t pad_off = 0; 232 uint32_t pad_size = 0; 233 struct sr6_tlv_t tlv; 234 int err; 235 236 tlv_off += srh_off; 237 238 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); 239 if (err) 240 return err; 241 242 err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv)); 243 if (err) 244 return err; 245 246 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len)); 247 if (err) 248 return err; 249 250 pad_off -= sizeof(tlv) + tlv.len; 251 partial_srh_len = pad_off - srh_off; 252 len_remaining = partial_srh_len % 8; 253 new_pad = 8 - len_remaining; 254 if (new_pad == 1) // cannot pad for 1 byte only 255 new_pad = 9; 256 else if (new_pad == 8) 257 new_pad = 0; 258 259 return update_tlv_pad(skb, new_pad, pad_size, pad_off); 260 } 261 262 static __always_inline 263 int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) 264 { 265 int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + 266 ((srh->first_segment + 1) << 4); 267 struct sr6_tlv_t tlv; 268 269 if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t))) 270 return 0; 271 272 if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) { 273 struct ip6_addr_t egr_addr; 274 275 if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16)) 276 return 0; 277 278 // check if egress TLV value is correct 279 if (ntohll(egr_addr.hi) == 0xfd00000000000000 && 280 ntohll(egr_addr.lo) == 0x4) 281 return 1; 282 } 283 284 return 0; 285 } 286 287 // This function will push a SRH with segments fd00::1, fd00::2, fd00::3, 288 // fd00::4 289 SEC("encap_srh") 290 int __encap_srh(struct __sk_buff *skb) 291 { 292 unsigned long long hi = 0xfd00000000000000; 293 struct ip6_addr_t *seg; 294 struct ip6_srh_t *srh; 295 char srh_buf[72]; // room for 4 segments 296 int err; 297 298 srh = (struct ip6_srh_t *)srh_buf; 299 srh->nexthdr = 0; 300 srh->hdrlen = 8; 301 srh->type = 4; 302 srh->segments_left = 3; 303 srh->first_segment = 3; 304 srh->flags = 0; 305 srh->tag = 0; 306 307 seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); 308 309 #pragma clang loop unroll(full) 310 for (unsigned long long lo = 0; lo < 4; lo++) { 311 seg->lo = htonll(4 - lo); 312 seg->hi = htonll(hi); 313 seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); 314 } 315 316 err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf)); 317 if (err) 318 return BPF_DROP; 319 320 return BPF_REDIRECT; 321 } 322 323 // Add an Egress TLV fc00::4, add the flag A, 324 // and apply End.X action to fc42::1 325 SEC("add_egr_x") 326 int __add_egr_x(struct __sk_buff *skb) 327 { 328 unsigned long long hi = 0xfc42000000000000; 329 unsigned long long lo = 0x1; 330 struct ip6_srh_t *srh = get_srh(skb); 331 uint8_t new_flags = SR6_FLAG_ALERT; 332 struct ip6_addr_t addr; 333 int err, offset; 334 335 if (srh == NULL) 336 return BPF_DROP; 337 338 uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 339 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; 340 341 err = add_tlv(skb, srh, (srh->hdrlen+1) << 3, 342 (struct sr6_tlv_t *)&tlv, 20); 343 if (err) 344 return BPF_DROP; 345 346 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); 347 err = bpf_lwt_seg6_store_bytes(skb, offset, 348 (void *)&new_flags, sizeof(new_flags)); 349 if (err) 350 return BPF_DROP; 351 352 addr.lo = htonll(lo); 353 addr.hi = htonll(hi); 354 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, 355 (void *)&addr, sizeof(addr)); 356 if (err) 357 return BPF_DROP; 358 return BPF_REDIRECT; 359 } 360 361 // Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a 362 // simple End action 363 SEC("pop_egr") 364 int __pop_egr(struct __sk_buff *skb) 365 { 366 struct ip6_srh_t *srh = get_srh(skb); 367 uint16_t new_tag = bpf_htons(2442); 368 uint8_t new_flags = 0; 369 int err, offset; 370 371 if (srh == NULL) 372 return BPF_DROP; 373 374 if (srh->flags != SR6_FLAG_ALERT) 375 return BPF_DROP; 376 377 if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV 378 return BPF_DROP; 379 380 if (!has_egr_tlv(skb, srh)) 381 return BPF_DROP; 382 383 err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16); 384 if (err) 385 return BPF_DROP; 386 387 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); 388 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags, 389 sizeof(new_flags))) 390 return BPF_DROP; 391 392 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag); 393 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag, 394 sizeof(new_tag))) 395 return BPF_DROP; 396 397 return BPF_OK; 398 } 399 400 // Inspect if the Egress TLV and flag have been removed, if the tag is correct, 401 // then apply a End.T action to reach the last segment 402 SEC("inspect_t") 403 int __inspect_t(struct __sk_buff *skb) 404 { 405 struct ip6_srh_t *srh = get_srh(skb); 406 int table = 117; 407 int err; 408 409 if (srh == NULL) 410 return BPF_DROP; 411 412 if (srh->flags != 0) 413 return BPF_DROP; 414 415 if (srh->tag != bpf_htons(2442)) 416 return BPF_DROP; 417 418 if (srh->hdrlen != 8) // 4 segments 419 return BPF_DROP; 420 421 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T, 422 (void *)&table, sizeof(table)); 423 424 if (err) 425 return BPF_DROP; 426 427 return BPF_REDIRECT; 428 } 429 430 char __license[] SEC("license") = "GPL"; 431