1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2022 Meta 3 4 #include <stddef.h> 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/bpf.h> 8 #include <linux/stddef.h> 9 #include <linux/pkt_cls.h> 10 #include <linux/if_ether.h> 11 #include <linux/in.h> 12 #include <linux/ip.h> 13 #include <linux/ipv6.h> 14 #include <linux/tcp.h> 15 #include <linux/udp.h> 16 #include <bpf/bpf_helpers.h> 17 #include <bpf/bpf_endian.h> 18 #include <sys/socket.h> 19 20 /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst 21 * | | 22 * ns_src | ns_fwd | ns_dst 23 * 24 * ns_src and ns_dst: ENDHOST namespace 25 * ns_fwd: Fowarding namespace 26 */ 27 28 #define ctx_ptr(field) (void *)(long)(field) 29 30 #define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */ 31 #define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */ 32 33 #define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 34 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 35 #define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 36 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 37 38 #define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ 39 a.s6_addr32[1] == b.s6_addr32[1] && \ 40 a.s6_addr32[2] == b.s6_addr32[2] && \ 41 a.s6_addr32[3] == b.s6_addr32[3]) 42 43 volatile const __u32 IFINDEX_SRC; 44 volatile const __u32 IFINDEX_DST; 45 46 #define EGRESS_ENDHOST_MAGIC 0x0b9fbeef 47 #define INGRESS_FWDNS_MAGIC 0x1b9fbeef 48 #define EGRESS_FWDNS_MAGIC 0x2b9fbeef 49 50 enum { 51 INGRESS_FWDNS_P100, 52 INGRESS_FWDNS_P101, 53 EGRESS_FWDNS_P100, 54 EGRESS_FWDNS_P101, 55 INGRESS_ENDHOST, 56 EGRESS_ENDHOST, 57 SET_DTIME, 58 __MAX_CNT, 59 }; 60 61 enum { 62 TCP_IP6_CLEAR_DTIME, 63 TCP_IP4, 64 TCP_IP6, 65 UDP_IP4, 66 UDP_IP6, 67 TCP_IP4_RT_FWD, 68 TCP_IP6_RT_FWD, 69 UDP_IP4_RT_FWD, 70 UDP_IP6_RT_FWD, 71 UKN_TEST, 72 __NR_TESTS, 73 }; 74 75 enum { 76 SRC_NS = 1, 77 DST_NS, 78 }; 79 80 __u32 dtimes[__NR_TESTS][__MAX_CNT] = {}; 81 __u32 errs[__NR_TESTS][__MAX_CNT] = {}; 82 __u32 test = 0; 83 84 static void inc_dtimes(__u32 idx) 85 { 86 if (test < __NR_TESTS) 87 dtimes[test][idx]++; 88 else 89 dtimes[UKN_TEST][idx]++; 90 } 91 92 static void inc_errs(__u32 idx) 93 { 94 if (test < __NR_TESTS) 95 errs[test][idx]++; 96 else 97 errs[UKN_TEST][idx]++; 98 } 99 100 static int skb_proto(int type) 101 { 102 return type & 0xff; 103 } 104 105 static int skb_ns(int type) 106 { 107 return (type >> 8) & 0xff; 108 } 109 110 static bool fwdns_clear_dtime(void) 111 { 112 return test == TCP_IP6_CLEAR_DTIME; 113 } 114 115 static bool bpf_fwd(void) 116 { 117 return test < TCP_IP4_RT_FWD; 118 } 119 120 static __u8 get_proto(void) 121 { 122 switch (test) { 123 case UDP_IP4: 124 case UDP_IP6: 125 case UDP_IP4_RT_FWD: 126 case UDP_IP6_RT_FWD: 127 return IPPROTO_UDP; 128 default: 129 return IPPROTO_TCP; 130 } 131 } 132 133 /* -1: parse error: TC_ACT_SHOT 134 * 0: not testing traffic: TC_ACT_OK 135 * >0: first byte is the inet_proto, second byte has the netns 136 * of the sender 137 */ 138 static int skb_get_type(struct __sk_buff *skb) 139 { 140 __u16 dst_ns_port = __bpf_htons(50000 + test); 141 void *data_end = ctx_ptr(skb->data_end); 142 void *data = ctx_ptr(skb->data); 143 __u8 inet_proto = 0, ns = 0; 144 struct ipv6hdr *ip6h; 145 __u16 sport, dport; 146 struct iphdr *iph; 147 struct tcphdr *th; 148 struct udphdr *uh; 149 void *trans; 150 151 switch (skb->protocol) { 152 case __bpf_htons(ETH_P_IP): 153 iph = data + sizeof(struct ethhdr); 154 if (iph + 1 > data_end) 155 return -1; 156 if (iph->saddr == ip4_src) 157 ns = SRC_NS; 158 else if (iph->saddr == ip4_dst) 159 ns = DST_NS; 160 inet_proto = iph->protocol; 161 trans = iph + 1; 162 break; 163 case __bpf_htons(ETH_P_IPV6): 164 ip6h = data + sizeof(struct ethhdr); 165 if (ip6h + 1 > data_end) 166 return -1; 167 if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src)) 168 ns = SRC_NS; 169 else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) 170 ns = DST_NS; 171 inet_proto = ip6h->nexthdr; 172 trans = ip6h + 1; 173 break; 174 default: 175 return 0; 176 } 177 178 /* skb is not from src_ns or dst_ns. 179 * skb is not the testing IPPROTO. 180 */ 181 if (!ns || inet_proto != get_proto()) 182 return 0; 183 184 switch (inet_proto) { 185 case IPPROTO_TCP: 186 th = trans; 187 if (th + 1 > data_end) 188 return -1; 189 sport = th->source; 190 dport = th->dest; 191 break; 192 case IPPROTO_UDP: 193 uh = trans; 194 if (uh + 1 > data_end) 195 return -1; 196 sport = uh->source; 197 dport = uh->dest; 198 break; 199 default: 200 return 0; 201 } 202 203 /* The skb is the testing traffic */ 204 if ((ns == SRC_NS && dport == dst_ns_port) || 205 (ns == DST_NS && sport == dst_ns_port)) 206 return (ns << 8 | inet_proto); 207 208 return 0; 209 } 210 211 /* format: direction@iface@netns 212 * egress@veth_(src|dst)@ns_(src|dst) 213 */ 214 SEC("tc") 215 int egress_host(struct __sk_buff *skb) 216 { 217 int skb_type; 218 219 skb_type = skb_get_type(skb); 220 if (skb_type == -1) 221 return TC_ACT_SHOT; 222 if (!skb_type) 223 return TC_ACT_OK; 224 225 if (skb_proto(skb_type) == IPPROTO_TCP) { 226 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 227 skb->tstamp) 228 inc_dtimes(EGRESS_ENDHOST); 229 else 230 inc_errs(EGRESS_ENDHOST); 231 } else { 232 if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && 233 skb->tstamp) 234 inc_dtimes(EGRESS_ENDHOST); 235 else 236 inc_errs(EGRESS_ENDHOST); 237 } 238 239 skb->tstamp = EGRESS_ENDHOST_MAGIC; 240 241 return TC_ACT_OK; 242 } 243 244 /* ingress@veth_(src|dst)@ns_(src|dst) */ 245 SEC("tc") 246 int ingress_host(struct __sk_buff *skb) 247 { 248 int skb_type; 249 250 skb_type = skb_get_type(skb); 251 if (skb_type == -1) 252 return TC_ACT_SHOT; 253 if (!skb_type) 254 return TC_ACT_OK; 255 256 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 257 skb->tstamp == EGRESS_FWDNS_MAGIC) 258 inc_dtimes(INGRESS_ENDHOST); 259 else 260 inc_errs(INGRESS_ENDHOST); 261 262 return TC_ACT_OK; 263 } 264 265 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 266 SEC("tc") 267 int ingress_fwdns_prio100(struct __sk_buff *skb) 268 { 269 int skb_type; 270 271 skb_type = skb_get_type(skb); 272 if (skb_type == -1) 273 return TC_ACT_SHOT; 274 if (!skb_type) 275 return TC_ACT_OK; 276 277 /* delivery_time is only available to the ingress 278 * if the tc-bpf checks the skb->tstamp_type. 279 */ 280 if (skb->tstamp == EGRESS_ENDHOST_MAGIC) 281 inc_errs(INGRESS_FWDNS_P100); 282 283 if (fwdns_clear_dtime()) 284 skb->tstamp = 0; 285 286 return TC_ACT_UNSPEC; 287 } 288 289 /* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 290 SEC("tc") 291 int egress_fwdns_prio100(struct __sk_buff *skb) 292 { 293 int skb_type; 294 295 skb_type = skb_get_type(skb); 296 if (skb_type == -1) 297 return TC_ACT_SHOT; 298 if (!skb_type) 299 return TC_ACT_OK; 300 301 /* delivery_time is always available to egress even 302 * the tc-bpf did not use the tstamp_type. 303 */ 304 if (skb->tstamp == INGRESS_FWDNS_MAGIC) 305 inc_dtimes(EGRESS_FWDNS_P100); 306 else 307 inc_errs(EGRESS_FWDNS_P100); 308 309 if (fwdns_clear_dtime()) 310 skb->tstamp = 0; 311 312 return TC_ACT_UNSPEC; 313 } 314 315 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 316 SEC("tc") 317 int ingress_fwdns_prio101(struct __sk_buff *skb) 318 { 319 __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; 320 int skb_type; 321 322 skb_type = skb_get_type(skb); 323 if (skb_type == -1 || !skb_type) 324 /* Should have handled in prio100 */ 325 return TC_ACT_SHOT; 326 327 if (skb_proto(skb_type) == IPPROTO_UDP) 328 expected_dtime = 0; 329 330 if (skb->tstamp_type) { 331 if (fwdns_clear_dtime() || 332 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 333 skb->tstamp != expected_dtime) 334 inc_errs(INGRESS_FWDNS_P101); 335 else 336 inc_dtimes(INGRESS_FWDNS_P101); 337 } else { 338 if (!fwdns_clear_dtime() && expected_dtime) 339 inc_errs(INGRESS_FWDNS_P101); 340 } 341 342 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 343 skb->tstamp = INGRESS_FWDNS_MAGIC; 344 } else { 345 if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 346 BPF_SKB_TSTAMP_DELIVERY_MONO)) 347 inc_errs(SET_DTIME); 348 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 349 BPF_SKB_TSTAMP_UNSPEC)) 350 inc_errs(SET_DTIME); 351 } 352 353 if (skb_ns(skb_type) == SRC_NS) 354 return bpf_fwd() ? 355 bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK; 356 else 357 return bpf_fwd() ? 358 bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK; 359 } 360 361 /* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 362 SEC("tc") 363 int egress_fwdns_prio101(struct __sk_buff *skb) 364 { 365 int skb_type; 366 367 skb_type = skb_get_type(skb); 368 if (skb_type == -1 || !skb_type) 369 /* Should have handled in prio100 */ 370 return TC_ACT_SHOT; 371 372 if (skb->tstamp_type) { 373 if (fwdns_clear_dtime() || 374 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 375 skb->tstamp != INGRESS_FWDNS_MAGIC) 376 inc_errs(EGRESS_FWDNS_P101); 377 else 378 inc_dtimes(EGRESS_FWDNS_P101); 379 } else { 380 if (!fwdns_clear_dtime()) 381 inc_errs(EGRESS_FWDNS_P101); 382 } 383 384 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 385 skb->tstamp = EGRESS_FWDNS_MAGIC; 386 } else { 387 if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, 388 BPF_SKB_TSTAMP_DELIVERY_MONO)) 389 inc_errs(SET_DTIME); 390 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 391 BPF_SKB_TSTAMP_UNSPEC)) 392 inc_errs(SET_DTIME); 393 } 394 395 return TC_ACT_OK; 396 } 397 398 char __license[] SEC("license") = "GPL"; 399