1 #include <stddef.h> 2 #include <inttypes.h> 3 #include <errno.h> 4 #include <linux/seg6_local.h> 5 #include <linux/bpf.h> 6 #include <bpf/bpf_helpers.h> 7 #include <bpf/bpf_endian.h> 8 9 /* Packet parsing state machine helpers. */ 10 #define cursor_advance(_cursor, _len) \ 11 ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) 12 13 #define SR6_FLAG_ALERT (1 << 4) 14 15 #define BPF_PACKET_HEADER __attribute__((packed)) 16 17 struct ip6_t { 18 unsigned int ver:4; 19 unsigned int priority:8; 20 unsigned int flow_label:20; 21 unsigned short payload_len; 22 unsigned char next_header; 23 unsigned char hop_limit; 24 unsigned long long src_hi; 25 unsigned long long src_lo; 26 unsigned long long dst_hi; 27 unsigned long long dst_lo; 28 } BPF_PACKET_HEADER; 29 30 struct ip6_addr_t { 31 unsigned long long hi; 32 unsigned long long lo; 33 } BPF_PACKET_HEADER; 34 35 struct ip6_srh_t { 36 unsigned char nexthdr; 37 unsigned char hdrlen; 38 unsigned char type; 39 unsigned char segments_left; 40 unsigned char first_segment; 41 unsigned char flags; 42 unsigned short tag; 43 44 struct ip6_addr_t segments[0]; 45 } BPF_PACKET_HEADER; 46 47 struct sr6_tlv_t { 48 unsigned char type; 49 unsigned char len; 50 unsigned char value[0]; 51 } BPF_PACKET_HEADER; 52 53 static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb) 54 { 55 void *cursor, *data_end; 56 struct ip6_srh_t *srh; 57 struct ip6_t *ip; 58 uint8_t *ipver; 59 60 data_end = (void *)(long)skb->data_end; 61 cursor = (void *)(long)skb->data; 62 ipver = (uint8_t *)cursor; 63 64 if ((void *)ipver + sizeof(*ipver) > data_end) 65 return NULL; 66 67 if ((*ipver >> 4) != 6) 68 return NULL; 69 70 ip = cursor_advance(cursor, sizeof(*ip)); 71 if ((void *)ip + sizeof(*ip) > data_end) 72 return NULL; 73 74 if (ip->next_header != 43) 75 return NULL; 76 77 srh = cursor_advance(cursor, sizeof(*srh)); 78 if ((void *)srh + sizeof(*srh) > data_end) 79 return NULL; 80 81 if (srh->type != 4) 82 return NULL; 83 84 return srh; 85 } 86 87 static __always_inline 88 int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, 89 uint32_t old_pad, uint32_t pad_off) 90 { 91 int err; 92 93 if (new_pad != old_pad) { 94 err = bpf_lwt_seg6_adjust_srh(skb, pad_off, 95 (int) new_pad - (int) old_pad); 96 if (err) 97 return err; 98 } 99 100 if (new_pad > 0) { 101 char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102 0, 0, 0}; 103 struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; 104 105 pad_tlv->type = SR6_TLV_PADDING; 106 pad_tlv->len = new_pad - 2; 107 108 err = bpf_lwt_seg6_store_bytes(skb, pad_off, 109 (void *)pad_tlv_buf, new_pad); 110 if (err) 111 return err; 112 } 113 114 return 0; 115 } 116 117 static __always_inline 118 int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, 119 uint32_t *tlv_off, uint32_t *pad_size, 120 uint32_t *pad_off) 121 { 122 uint32_t srh_off, cur_off; 123 int offset_valid = 0; 124 int err; 125 126 srh_off = (char *)srh - (char *)(long)skb->data; 127 // cur_off = end of segments, start of possible TLVs 128 cur_off = srh_off + sizeof(*srh) + 129 sizeof(struct ip6_addr_t) * (srh->first_segment + 1); 130 131 *pad_off = 0; 132 133 // we can only go as far as ~10 TLVs due to the BPF max stack size 134 #pragma clang loop unroll(full) 135 for (int i = 0; i < 10; i++) { 136 struct sr6_tlv_t tlv; 137 138 if (cur_off == *tlv_off) 139 offset_valid = 1; 140 141 if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) 142 break; 143 144 err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); 145 if (err) 146 return err; 147 148 if (tlv.type == SR6_TLV_PADDING) { 149 *pad_size = tlv.len + sizeof(tlv); 150 *pad_off = cur_off; 151 152 if (*tlv_off == srh_off) { 153 *tlv_off = cur_off; 154 offset_valid = 1; 155 } 156 break; 157 158 } else if (tlv.type == SR6_TLV_HMAC) { 159 break; 160 } 161 162 cur_off += sizeof(tlv) + tlv.len; 163 } // we reached the padding or HMAC TLVs, or the end of the SRH 164 165 if (*pad_off == 0) 166 *pad_off = cur_off; 167 168 if (*tlv_off == -1) 169 *tlv_off = cur_off; 170 else if (!offset_valid) 171 return -EINVAL; 172 173 return 0; 174 } 175 176 static __always_inline 177 int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, 178 struct sr6_tlv_t *itlv, uint8_t tlv_size) 179 { 180 uint32_t srh_off = (char *)srh - (char *)(long)skb->data; 181 uint8_t len_remaining, new_pad; 182 uint32_t pad_off = 0; 183 uint32_t pad_size = 0; 184 uint32_t partial_srh_len; 185 int err; 186 187 if (tlv_off != -1) 188 tlv_off += srh_off; 189 190 if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) 191 return -EINVAL; 192 193 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); 194 if (err) 195 return err; 196 197 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); 198 if (err) 199 return err; 200 201 err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); 202 if (err) 203 return err; 204 205 // the following can't be moved inside update_tlv_pad because the 206 // bpf verifier has some issues with it 207 pad_off += sizeof(*itlv) + itlv->len; 208 partial_srh_len = pad_off - srh_off; 209 len_remaining = partial_srh_len % 8; 210 new_pad = 8 - len_remaining; 211 212 if (new_pad == 1) // cannot pad for 1 byte only 213 new_pad = 9; 214 else if (new_pad == 8) 215 new_pad = 0; 216 217 return update_tlv_pad(skb, new_pad, pad_size, pad_off); 218 } 219 220 static __always_inline 221 int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, 222 uint32_t tlv_off) 223 { 224 uint32_t srh_off = (char *)srh - (char *)(long)skb->data; 225 uint8_t len_remaining, new_pad; 226 uint32_t partial_srh_len; 227 uint32_t pad_off = 0; 228 uint32_t pad_size = 0; 229 struct sr6_tlv_t tlv; 230 int err; 231 232 tlv_off += srh_off; 233 234 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); 235 if (err) 236 return err; 237 238 err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv)); 239 if (err) 240 return err; 241 242 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len)); 243 if (err) 244 return err; 245 246 pad_off -= sizeof(tlv) + tlv.len; 247 partial_srh_len = pad_off - srh_off; 248 len_remaining = partial_srh_len % 8; 249 new_pad = 8 - len_remaining; 250 if (new_pad == 1) // cannot pad for 1 byte only 251 new_pad = 9; 252 else if (new_pad == 8) 253 new_pad = 0; 254 255 return update_tlv_pad(skb, new_pad, pad_size, pad_off); 256 } 257 258 static __always_inline 259 int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) 260 { 261 int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + 262 ((srh->first_segment + 1) << 4); 263 struct sr6_tlv_t tlv; 264 265 if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t))) 266 return 0; 267 268 if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) { 269 struct ip6_addr_t egr_addr; 270 271 if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16)) 272 return 0; 273 274 // check if egress TLV value is correct 275 if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 && 276 bpf_be64_to_cpu(egr_addr.lo) == 0x4) 277 return 1; 278 } 279 280 return 0; 281 } 282 283 // This function will push a SRH with segments fd00::1, fd00::2, fd00::3, 284 // fd00::4 285 SEC("encap_srh") 286 int __encap_srh(struct __sk_buff *skb) 287 { 288 unsigned long long hi = 0xfd00000000000000; 289 struct ip6_addr_t *seg; 290 struct ip6_srh_t *srh; 291 char srh_buf[72]; // room for 4 segments 292 int err; 293 294 srh = (struct ip6_srh_t *)srh_buf; 295 srh->nexthdr = 0; 296 srh->hdrlen = 8; 297 srh->type = 4; 298 srh->segments_left = 3; 299 srh->first_segment = 3; 300 srh->flags = 0; 301 srh->tag = 0; 302 303 seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); 304 305 #pragma clang loop unroll(full) 306 for (unsigned long long lo = 0; lo < 4; lo++) { 307 seg->lo = bpf_cpu_to_be64(4 - lo); 308 seg->hi = bpf_cpu_to_be64(hi); 309 seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); 310 } 311 312 err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf)); 313 if (err) 314 return BPF_DROP; 315 316 return BPF_REDIRECT; 317 } 318 319 // Add an Egress TLV fc00::4, add the flag A, 320 // and apply End.X action to fc42::1 321 SEC("add_egr_x") 322 int __add_egr_x(struct __sk_buff *skb) 323 { 324 unsigned long long hi = 0xfc42000000000000; 325 unsigned long long lo = 0x1; 326 struct ip6_srh_t *srh = get_srh(skb); 327 uint8_t new_flags = SR6_FLAG_ALERT; 328 struct ip6_addr_t addr; 329 int err, offset; 330 331 if (srh == NULL) 332 return BPF_DROP; 333 334 uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 335 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; 336 337 err = add_tlv(skb, srh, (srh->hdrlen+1) << 3, 338 (struct sr6_tlv_t *)&tlv, 20); 339 if (err) 340 return BPF_DROP; 341 342 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); 343 err = bpf_lwt_seg6_store_bytes(skb, offset, 344 (void *)&new_flags, sizeof(new_flags)); 345 if (err) 346 return BPF_DROP; 347 348 addr.lo = bpf_cpu_to_be64(lo); 349 addr.hi = bpf_cpu_to_be64(hi); 350 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, 351 (void *)&addr, sizeof(addr)); 352 if (err) 353 return BPF_DROP; 354 return BPF_REDIRECT; 355 } 356 357 // Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a 358 // simple End action 359 SEC("pop_egr") 360 int __pop_egr(struct __sk_buff *skb) 361 { 362 struct ip6_srh_t *srh = get_srh(skb); 363 uint16_t new_tag = bpf_htons(2442); 364 uint8_t new_flags = 0; 365 int err, offset; 366 367 if (srh == NULL) 368 return BPF_DROP; 369 370 if (srh->flags != SR6_FLAG_ALERT) 371 return BPF_DROP; 372 373 if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV 374 return BPF_DROP; 375 376 if (!has_egr_tlv(skb, srh)) 377 return BPF_DROP; 378 379 err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16); 380 if (err) 381 return BPF_DROP; 382 383 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); 384 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags, 385 sizeof(new_flags))) 386 return BPF_DROP; 387 388 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag); 389 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag, 390 sizeof(new_tag))) 391 return BPF_DROP; 392 393 return BPF_OK; 394 } 395 396 // Inspect if the Egress TLV and flag have been removed, if the tag is correct, 397 // then apply a End.T action to reach the last segment 398 SEC("inspect_t") 399 int __inspect_t(struct __sk_buff *skb) 400 { 401 struct ip6_srh_t *srh = get_srh(skb); 402 int table = 117; 403 int err; 404 405 if (srh == NULL) 406 return BPF_DROP; 407 408 if (srh->flags != 0) 409 return BPF_DROP; 410 411 if (srh->tag != bpf_htons(2442)) 412 return BPF_DROP; 413 414 if (srh->hdrlen != 8) // 4 segments 415 return BPF_DROP; 416 417 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T, 418 (void *)&table, sizeof(table)); 419 420 if (err) 421 return BPF_DROP; 422 423 return BPF_REDIRECT; 424 } 425 426 char __license[] SEC("license") = "GPL"; 427