1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2022 Meta 3 4 #include <stddef.h> 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/bpf.h> 8 #include <linux/stddef.h> 9 #include <linux/pkt_cls.h> 10 #include <linux/if_ether.h> 11 #include <linux/in.h> 12 #include <linux/ip.h> 13 #include <linux/ipv6.h> 14 #include <linux/tcp.h> 15 #include <linux/udp.h> 16 #include <bpf/bpf_helpers.h> 17 #include <bpf/bpf_endian.h> 18 19 /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst 20 * | | 21 * ns_src | ns_fwd | ns_dst 22 * 23 * ns_src and ns_dst: ENDHOST namespace 24 * ns_fwd: Fowarding namespace 25 */ 26 27 #define ctx_ptr(field) (void *)(long)(field) 28 29 #define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */ 30 #define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */ 31 32 #define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 33 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 34 #define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 35 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 36 37 #define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ 38 a.s6_addr32[1] == b.s6_addr32[1] && \ 39 a.s6_addr32[2] == b.s6_addr32[2] && \ 40 a.s6_addr32[3] == b.s6_addr32[3]) 41 42 volatile const __u32 IFINDEX_SRC; 43 volatile const __u32 IFINDEX_DST; 44 45 #define EGRESS_ENDHOST_MAGIC 0x0b9fbeef 46 #define INGRESS_FWDNS_MAGIC 0x1b9fbeef 47 #define EGRESS_FWDNS_MAGIC 0x2b9fbeef 48 49 enum { 50 INGRESS_FWDNS_P100, 51 INGRESS_FWDNS_P101, 52 EGRESS_FWDNS_P100, 53 EGRESS_FWDNS_P101, 54 INGRESS_ENDHOST, 55 EGRESS_ENDHOST, 56 SET_DTIME, 57 __MAX_CNT, 58 }; 59 60 enum { 61 TCP_IP6_CLEAR_DTIME, 62 TCP_IP4, 63 TCP_IP6, 64 UDP_IP4, 65 UDP_IP6, 66 TCP_IP4_RT_FWD, 67 TCP_IP6_RT_FWD, 68 UDP_IP4_RT_FWD, 69 UDP_IP6_RT_FWD, 70 UKN_TEST, 71 __NR_TESTS, 72 }; 73 74 enum { 75 SRC_NS = 1, 76 DST_NS, 77 }; 78 79 __u32 dtimes[__NR_TESTS][__MAX_CNT] = {}; 80 __u32 errs[__NR_TESTS][__MAX_CNT] = {}; 81 __u32 test = 0; 82 83 static void inc_dtimes(__u32 idx) 84 { 85 if (test < __NR_TESTS) 86 dtimes[test][idx]++; 87 else 88 dtimes[UKN_TEST][idx]++; 89 } 90 91 static void inc_errs(__u32 idx) 92 { 93 if (test < __NR_TESTS) 94 errs[test][idx]++; 95 else 96 errs[UKN_TEST][idx]++; 97 } 98 99 static int skb_proto(int type) 100 { 101 return type & 0xff; 102 } 103 104 static int skb_ns(int type) 105 { 106 return (type >> 8) & 0xff; 107 } 108 109 static bool fwdns_clear_dtime(void) 110 { 111 return test == TCP_IP6_CLEAR_DTIME; 112 } 113 114 static bool bpf_fwd(void) 115 { 116 return test < TCP_IP4_RT_FWD; 117 } 118 119 static __u8 get_proto(void) 120 { 121 switch (test) { 122 case UDP_IP4: 123 case UDP_IP6: 124 case UDP_IP4_RT_FWD: 125 case UDP_IP6_RT_FWD: 126 return IPPROTO_UDP; 127 default: 128 return IPPROTO_TCP; 129 } 130 } 131 132 /* -1: parse error: TC_ACT_SHOT 133 * 0: not testing traffic: TC_ACT_OK 134 * >0: first byte is the inet_proto, second byte has the netns 135 * of the sender 136 */ 137 static int skb_get_type(struct __sk_buff *skb) 138 { 139 __u16 dst_ns_port = __bpf_htons(50000 + test); 140 void *data_end = ctx_ptr(skb->data_end); 141 void *data = ctx_ptr(skb->data); 142 __u8 inet_proto = 0, ns = 0; 143 struct ipv6hdr *ip6h; 144 __u16 sport, dport; 145 struct iphdr *iph; 146 struct tcphdr *th; 147 struct udphdr *uh; 148 void *trans; 149 150 switch (skb->protocol) { 151 case __bpf_htons(ETH_P_IP): 152 iph = data + sizeof(struct ethhdr); 153 if (iph + 1 > data_end) 154 return -1; 155 if (iph->saddr == ip4_src) 156 ns = SRC_NS; 157 else if (iph->saddr == ip4_dst) 158 ns = DST_NS; 159 inet_proto = iph->protocol; 160 trans = iph + 1; 161 break; 162 case __bpf_htons(ETH_P_IPV6): 163 ip6h = data + sizeof(struct ethhdr); 164 if (ip6h + 1 > data_end) 165 return -1; 166 if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src)) 167 ns = SRC_NS; 168 else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) 169 ns = DST_NS; 170 inet_proto = ip6h->nexthdr; 171 trans = ip6h + 1; 172 break; 173 default: 174 return 0; 175 } 176 177 /* skb is not from src_ns or dst_ns. 178 * skb is not the testing IPPROTO. 179 */ 180 if (!ns || inet_proto != get_proto()) 181 return 0; 182 183 switch (inet_proto) { 184 case IPPROTO_TCP: 185 th = trans; 186 if (th + 1 > data_end) 187 return -1; 188 sport = th->source; 189 dport = th->dest; 190 break; 191 case IPPROTO_UDP: 192 uh = trans; 193 if (uh + 1 > data_end) 194 return -1; 195 sport = uh->source; 196 dport = uh->dest; 197 break; 198 default: 199 return 0; 200 } 201 202 /* The skb is the testing traffic */ 203 if ((ns == SRC_NS && dport == dst_ns_port) || 204 (ns == DST_NS && sport == dst_ns_port)) 205 return (ns << 8 | inet_proto); 206 207 return 0; 208 } 209 210 /* format: direction@iface@netns 211 * egress@veth_(src|dst)@ns_(src|dst) 212 */ 213 SEC("tc") 214 int egress_host(struct __sk_buff *skb) 215 { 216 int skb_type; 217 218 skb_type = skb_get_type(skb); 219 if (skb_type == -1) 220 return TC_ACT_SHOT; 221 if (!skb_type) 222 return TC_ACT_OK; 223 224 if (skb_proto(skb_type) == IPPROTO_TCP) { 225 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 226 skb->tstamp) 227 inc_dtimes(EGRESS_ENDHOST); 228 else 229 inc_errs(EGRESS_ENDHOST); 230 } else { 231 if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && 232 skb->tstamp) 233 inc_dtimes(EGRESS_ENDHOST); 234 else 235 inc_errs(EGRESS_ENDHOST); 236 } 237 238 skb->tstamp = EGRESS_ENDHOST_MAGIC; 239 240 return TC_ACT_OK; 241 } 242 243 /* ingress@veth_(src|dst)@ns_(src|dst) */ 244 SEC("tc") 245 int ingress_host(struct __sk_buff *skb) 246 { 247 int skb_type; 248 249 skb_type = skb_get_type(skb); 250 if (skb_type == -1) 251 return TC_ACT_SHOT; 252 if (!skb_type) 253 return TC_ACT_OK; 254 255 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 256 skb->tstamp == EGRESS_FWDNS_MAGIC) 257 inc_dtimes(INGRESS_ENDHOST); 258 else 259 inc_errs(INGRESS_ENDHOST); 260 261 return TC_ACT_OK; 262 } 263 264 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 265 SEC("tc") 266 int ingress_fwdns_prio100(struct __sk_buff *skb) 267 { 268 int skb_type; 269 270 skb_type = skb_get_type(skb); 271 if (skb_type == -1) 272 return TC_ACT_SHOT; 273 if (!skb_type) 274 return TC_ACT_OK; 275 276 /* delivery_time is only available to the ingress 277 * if the tc-bpf checks the skb->tstamp_type. 278 */ 279 if (skb->tstamp == EGRESS_ENDHOST_MAGIC) 280 inc_errs(INGRESS_FWDNS_P100); 281 282 if (fwdns_clear_dtime()) 283 skb->tstamp = 0; 284 285 return TC_ACT_UNSPEC; 286 } 287 288 /* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 289 SEC("tc") 290 int egress_fwdns_prio100(struct __sk_buff *skb) 291 { 292 int skb_type; 293 294 skb_type = skb_get_type(skb); 295 if (skb_type == -1) 296 return TC_ACT_SHOT; 297 if (!skb_type) 298 return TC_ACT_OK; 299 300 /* delivery_time is always available to egress even 301 * the tc-bpf did not use the tstamp_type. 302 */ 303 if (skb->tstamp == INGRESS_FWDNS_MAGIC) 304 inc_dtimes(EGRESS_FWDNS_P100); 305 else 306 inc_errs(EGRESS_FWDNS_P100); 307 308 if (fwdns_clear_dtime()) 309 skb->tstamp = 0; 310 311 return TC_ACT_UNSPEC; 312 } 313 314 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 315 SEC("tc") 316 int ingress_fwdns_prio101(struct __sk_buff *skb) 317 { 318 __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; 319 int skb_type; 320 321 skb_type = skb_get_type(skb); 322 if (skb_type == -1 || !skb_type) 323 /* Should have handled in prio100 */ 324 return TC_ACT_SHOT; 325 326 if (skb_proto(skb_type) == IPPROTO_UDP) 327 expected_dtime = 0; 328 329 if (skb->tstamp_type) { 330 if (fwdns_clear_dtime() || 331 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 332 skb->tstamp != expected_dtime) 333 inc_errs(INGRESS_FWDNS_P101); 334 else 335 inc_dtimes(INGRESS_FWDNS_P101); 336 } else { 337 if (!fwdns_clear_dtime() && expected_dtime) 338 inc_errs(INGRESS_FWDNS_P101); 339 } 340 341 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 342 skb->tstamp = INGRESS_FWDNS_MAGIC; 343 } else { 344 if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 345 BPF_SKB_TSTAMP_DELIVERY_MONO)) 346 inc_errs(SET_DTIME); 347 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 348 BPF_SKB_TSTAMP_UNSPEC)) 349 inc_errs(SET_DTIME); 350 } 351 352 if (skb_ns(skb_type) == SRC_NS) 353 return bpf_fwd() ? 354 bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK; 355 else 356 return bpf_fwd() ? 357 bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK; 358 } 359 360 /* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 361 SEC("tc") 362 int egress_fwdns_prio101(struct __sk_buff *skb) 363 { 364 int skb_type; 365 366 skb_type = skb_get_type(skb); 367 if (skb_type == -1 || !skb_type) 368 /* Should have handled in prio100 */ 369 return TC_ACT_SHOT; 370 371 if (skb->tstamp_type) { 372 if (fwdns_clear_dtime() || 373 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 374 skb->tstamp != INGRESS_FWDNS_MAGIC) 375 inc_errs(EGRESS_FWDNS_P101); 376 else 377 inc_dtimes(EGRESS_FWDNS_P101); 378 } else { 379 if (!fwdns_clear_dtime()) 380 inc_errs(EGRESS_FWDNS_P101); 381 } 382 383 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 384 skb->tstamp = EGRESS_FWDNS_MAGIC; 385 } else { 386 if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, 387 BPF_SKB_TSTAMP_DELIVERY_MONO)) 388 inc_errs(SET_DTIME); 389 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 390 BPF_SKB_TSTAMP_UNSPEC)) 391 inc_errs(SET_DTIME); 392 } 393 394 return TC_ACT_OK; 395 } 396 397 char __license[] SEC("license") = "GPL"; 398