1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2022 Meta 3 4 #include <stddef.h> 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/bpf.h> 8 #include <linux/stddef.h> 9 #include <linux/pkt_cls.h> 10 #include <linux/if_ether.h> 11 #include <linux/in.h> 12 #include <linux/ip.h> 13 #include <linux/ipv6.h> 14 #include <bpf/bpf_helpers.h> 15 #include <bpf/bpf_endian.h> 16 #include <sys/socket.h> 17 18 /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst 19 * | | 20 * ns_src | ns_fwd | ns_dst 21 * 22 * ns_src and ns_dst: ENDHOST namespace 23 * ns_fwd: Fowarding namespace 24 */ 25 26 #define ctx_ptr(field) (void *)(long)(field) 27 28 #define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */ 29 #define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */ 30 31 #define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 32 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 33 #define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 34 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 35 36 #define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ 37 a.s6_addr32[1] == b.s6_addr32[1] && \ 38 a.s6_addr32[2] == b.s6_addr32[2] && \ 39 a.s6_addr32[3] == b.s6_addr32[3]) 40 41 volatile const __u32 IFINDEX_SRC; 42 volatile const __u32 IFINDEX_DST; 43 44 #define EGRESS_ENDHOST_MAGIC 0x0b9fbeef 45 #define INGRESS_FWDNS_MAGIC 0x1b9fbeef 46 #define EGRESS_FWDNS_MAGIC 0x2b9fbeef 47 48 enum { 49 INGRESS_FWDNS_P100, 50 INGRESS_FWDNS_P101, 51 EGRESS_FWDNS_P100, 52 EGRESS_FWDNS_P101, 53 INGRESS_ENDHOST, 54 EGRESS_ENDHOST, 55 SET_DTIME, 56 __MAX_CNT, 57 }; 58 59 enum { 60 TCP_IP6_CLEAR_DTIME, 61 TCP_IP4, 62 TCP_IP6, 63 UDP_IP4, 64 UDP_IP6, 65 TCP_IP4_RT_FWD, 66 TCP_IP6_RT_FWD, 67 UDP_IP4_RT_FWD, 68 UDP_IP6_RT_FWD, 69 UKN_TEST, 70 __NR_TESTS, 71 }; 72 73 enum { 74 SRC_NS = 1, 75 DST_NS, 76 }; 77 78 __u32 dtimes[__NR_TESTS][__MAX_CNT] = {}; 79 __u32 errs[__NR_TESTS][__MAX_CNT] = {}; 80 __u32 test = 0; 81 82 static void inc_dtimes(__u32 idx) 83 { 84 if (test < __NR_TESTS) 85 dtimes[test][idx]++; 86 else 87 dtimes[UKN_TEST][idx]++; 88 } 89 90 static void inc_errs(__u32 idx) 91 { 92 if (test < __NR_TESTS) 93 errs[test][idx]++; 94 else 95 errs[UKN_TEST][idx]++; 96 } 97 98 static int skb_proto(int type) 99 { 100 return type & 0xff; 101 } 102 103 static int skb_ns(int type) 104 { 105 return (type >> 8) & 0xff; 106 } 107 108 static bool fwdns_clear_dtime(void) 109 { 110 return test == TCP_IP6_CLEAR_DTIME; 111 } 112 113 static bool bpf_fwd(void) 114 { 115 return test < TCP_IP4_RT_FWD; 116 } 117 118 /* -1: parse error: TC_ACT_SHOT 119 * 0: not testing traffic: TC_ACT_OK 120 * >0: first byte is the inet_proto, second byte has the netns 121 * of the sender 122 */ 123 static int skb_get_type(struct __sk_buff *skb) 124 { 125 void *data_end = ctx_ptr(skb->data_end); 126 void *data = ctx_ptr(skb->data); 127 __u8 inet_proto = 0, ns = 0; 128 struct ipv6hdr *ip6h; 129 struct iphdr *iph; 130 131 switch (skb->protocol) { 132 case __bpf_htons(ETH_P_IP): 133 iph = data + sizeof(struct ethhdr); 134 if (iph + 1 > data_end) 135 return -1; 136 if (iph->saddr == ip4_src) 137 ns = SRC_NS; 138 else if (iph->saddr == ip4_dst) 139 ns = DST_NS; 140 inet_proto = iph->protocol; 141 break; 142 case __bpf_htons(ETH_P_IPV6): 143 ip6h = data + sizeof(struct ethhdr); 144 if (ip6h + 1 > data_end) 145 return -1; 146 if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src)) 147 ns = SRC_NS; 148 else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) 149 ns = DST_NS; 150 inet_proto = ip6h->nexthdr; 151 break; 152 default: 153 return 0; 154 } 155 156 if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns) 157 return 0; 158 159 return (ns << 8 | inet_proto); 160 } 161 162 /* format: direction@iface@netns 163 * egress@veth_(src|dst)@ns_(src|dst) 164 */ 165 SEC("tc") 166 int egress_host(struct __sk_buff *skb) 167 { 168 int skb_type; 169 170 skb_type = skb_get_type(skb); 171 if (skb_type == -1) 172 return TC_ACT_SHOT; 173 if (!skb_type) 174 return TC_ACT_OK; 175 176 if (skb_proto(skb_type) == IPPROTO_TCP) { 177 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 178 skb->tstamp) 179 inc_dtimes(EGRESS_ENDHOST); 180 else 181 inc_errs(EGRESS_ENDHOST); 182 } else { 183 if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && 184 skb->tstamp) 185 inc_dtimes(EGRESS_ENDHOST); 186 else 187 inc_errs(EGRESS_ENDHOST); 188 } 189 190 skb->tstamp = EGRESS_ENDHOST_MAGIC; 191 192 return TC_ACT_OK; 193 } 194 195 /* ingress@veth_(src|dst)@ns_(src|dst) */ 196 SEC("tc") 197 int ingress_host(struct __sk_buff *skb) 198 { 199 int skb_type; 200 201 skb_type = skb_get_type(skb); 202 if (skb_type == -1) 203 return TC_ACT_SHOT; 204 if (!skb_type) 205 return TC_ACT_OK; 206 207 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 208 skb->tstamp == EGRESS_FWDNS_MAGIC) 209 inc_dtimes(INGRESS_ENDHOST); 210 else 211 inc_errs(INGRESS_ENDHOST); 212 213 return TC_ACT_OK; 214 } 215 216 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 217 SEC("tc") 218 int ingress_fwdns_prio100(struct __sk_buff *skb) 219 { 220 int skb_type; 221 222 skb_type = skb_get_type(skb); 223 if (skb_type == -1) 224 return TC_ACT_SHOT; 225 if (!skb_type) 226 return TC_ACT_OK; 227 228 /* delivery_time is only available to the ingress 229 * if the tc-bpf checks the skb->tstamp_type. 230 */ 231 if (skb->tstamp == EGRESS_ENDHOST_MAGIC) 232 inc_errs(INGRESS_FWDNS_P100); 233 234 if (fwdns_clear_dtime()) 235 skb->tstamp = 0; 236 237 return TC_ACT_UNSPEC; 238 } 239 240 /* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 241 SEC("tc") 242 int egress_fwdns_prio100(struct __sk_buff *skb) 243 { 244 int skb_type; 245 246 skb_type = skb_get_type(skb); 247 if (skb_type == -1) 248 return TC_ACT_SHOT; 249 if (!skb_type) 250 return TC_ACT_OK; 251 252 /* delivery_time is always available to egress even 253 * the tc-bpf did not use the tstamp_type. 254 */ 255 if (skb->tstamp == INGRESS_FWDNS_MAGIC) 256 inc_dtimes(EGRESS_FWDNS_P100); 257 else 258 inc_errs(EGRESS_FWDNS_P100); 259 260 if (fwdns_clear_dtime()) 261 skb->tstamp = 0; 262 263 return TC_ACT_UNSPEC; 264 } 265 266 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 267 SEC("tc") 268 int ingress_fwdns_prio101(struct __sk_buff *skb) 269 { 270 __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; 271 int skb_type; 272 273 skb_type = skb_get_type(skb); 274 if (skb_type == -1 || !skb_type) 275 /* Should have handled in prio100 */ 276 return TC_ACT_SHOT; 277 278 if (skb_proto(skb_type) == IPPROTO_UDP) 279 expected_dtime = 0; 280 281 if (skb->tstamp_type) { 282 if (fwdns_clear_dtime() || 283 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 284 skb->tstamp != expected_dtime) 285 inc_errs(INGRESS_FWDNS_P101); 286 else 287 inc_dtimes(INGRESS_FWDNS_P101); 288 } else { 289 if (!fwdns_clear_dtime() && expected_dtime) 290 inc_errs(INGRESS_FWDNS_P101); 291 } 292 293 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 294 skb->tstamp = INGRESS_FWDNS_MAGIC; 295 } else { 296 if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 297 BPF_SKB_TSTAMP_DELIVERY_MONO)) 298 inc_errs(SET_DTIME); 299 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 300 BPF_SKB_TSTAMP_UNSPEC)) 301 inc_errs(SET_DTIME); 302 } 303 304 if (skb_ns(skb_type) == SRC_NS) 305 return bpf_fwd() ? 306 bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK; 307 else 308 return bpf_fwd() ? 309 bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK; 310 } 311 312 /* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 313 SEC("tc") 314 int egress_fwdns_prio101(struct __sk_buff *skb) 315 { 316 int skb_type; 317 318 skb_type = skb_get_type(skb); 319 if (skb_type == -1 || !skb_type) 320 /* Should have handled in prio100 */ 321 return TC_ACT_SHOT; 322 323 if (skb->tstamp_type) { 324 if (fwdns_clear_dtime() || 325 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 326 skb->tstamp != INGRESS_FWDNS_MAGIC) 327 inc_errs(EGRESS_FWDNS_P101); 328 else 329 inc_dtimes(EGRESS_FWDNS_P101); 330 } else { 331 if (!fwdns_clear_dtime()) 332 inc_errs(EGRESS_FWDNS_P101); 333 } 334 335 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 336 skb->tstamp = EGRESS_FWDNS_MAGIC; 337 } else { 338 if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, 339 BPF_SKB_TSTAMP_DELIVERY_MONO)) 340 inc_errs(SET_DTIME); 341 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 342 BPF_SKB_TSTAMP_UNSPEC)) 343 inc_errs(SET_DTIME); 344 } 345 346 return TC_ACT_OK; 347 } 348 349 char __license[] SEC("license") = "GPL"; 350