1 // SPDX-License-Identifier: GPL-2.0 2 #include <string.h> 3 #include <linux/tcp.h> 4 #include <linux/bpf.h> 5 #include <netinet/in.h> 6 #include <bpf/bpf_helpers.h> 7 8 char _license[] SEC("license") = "GPL"; 9 10 int page_size = 0; /* userspace should set it */ 11 12 #ifndef SOL_TCP 13 #define SOL_TCP IPPROTO_TCP 14 #endif 15 16 #define SOL_CUSTOM 0xdeadbeef 17 18 struct sockopt_sk { 19 __u8 val; 20 }; 21 22 struct { 23 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 24 __uint(map_flags, BPF_F_NO_PREALLOC); 25 __type(key, int); 26 __type(value, struct sockopt_sk); 27 } socket_storage_map SEC(".maps"); 28 29 SEC("cgroup/getsockopt") 30 int _getsockopt(struct bpf_sockopt *ctx) 31 { 32 __u8 *optval_end = ctx->optval_end; 33 __u8 *optval = ctx->optval; 34 struct sockopt_sk *storage; 35 struct bpf_sock *sk; 36 37 /* Bypass AF_NETLINK. */ 38 sk = ctx->sk; 39 if (sk && sk->family == AF_NETLINK) 40 return 1; 41 42 /* Make sure bpf_get_netns_cookie is callable. 43 */ 44 if (bpf_get_netns_cookie(NULL) == 0) 45 return 0; 46 47 if (bpf_get_netns_cookie(ctx) == 0) 48 return 0; 49 50 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { 51 /* Not interested in SOL_IP:IP_TOS; 52 * let next BPF program in the cgroup chain or kernel 53 * handle it. 54 */ 55 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ 56 return 1; 57 } 58 59 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { 60 /* Not interested in SOL_SOCKET:SO_SNDBUF; 61 * let next BPF program in the cgroup chain or kernel 62 * handle it. 63 */ 64 return 1; 65 } 66 67 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { 68 /* Not interested in SOL_TCP:TCP_CONGESTION; 69 * let next BPF program in the cgroup chain or kernel 70 * handle it. 71 */ 72 return 1; 73 } 74 75 if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { 76 /* Verify that TCP_ZEROCOPY_RECEIVE triggers. 77 * It has a custom implementation for performance 78 * reasons. 79 */ 80 81 /* Check that optval contains address (__u64) */ 82 if (optval + sizeof(__u64) > optval_end) 83 return 0; /* bounds check */ 84 85 if (((struct tcp_zerocopy_receive *)optval)->address != 0) 86 return 0; /* unexpected data */ 87 88 return 1; 89 } 90 91 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { 92 if (optval + 1 > optval_end) 93 return 0; /* bounds check */ 94 95 ctx->retval = 0; /* Reset system call return value to zero */ 96 97 /* Always export 0x55 */ 98 optval[0] = 0x55; 99 ctx->optlen = 1; 100 101 /* Userspace buffer is PAGE_SIZE * 2, but BPF 102 * program can only see the first PAGE_SIZE 103 * bytes of data. 104 */ 105 if (optval_end - optval != page_size) 106 return 0; /* unexpected data size */ 107 108 return 1; 109 } 110 111 if (ctx->level != SOL_CUSTOM) 112 return 0; /* deny everything except custom level */ 113 114 if (optval + 1 > optval_end) 115 return 0; /* bounds check */ 116 117 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, 118 BPF_SK_STORAGE_GET_F_CREATE); 119 if (!storage) 120 return 0; /* couldn't get sk storage */ 121 122 if (!ctx->retval) 123 return 0; /* kernel should not have handled 124 * SOL_CUSTOM, something is wrong! 125 */ 126 ctx->retval = 0; /* Reset system call return value to zero */ 127 128 optval[0] = storage->val; 129 ctx->optlen = 1; 130 131 return 1; 132 } 133 134 SEC("cgroup/setsockopt") 135 int _setsockopt(struct bpf_sockopt *ctx) 136 { 137 __u8 *optval_end = ctx->optval_end; 138 __u8 *optval = ctx->optval; 139 struct sockopt_sk *storage; 140 struct bpf_sock *sk; 141 142 /* Bypass AF_NETLINK. */ 143 sk = ctx->sk; 144 if (sk && sk->family == AF_NETLINK) 145 return 1; 146 147 /* Make sure bpf_get_netns_cookie is callable. 148 */ 149 if (bpf_get_netns_cookie(NULL) == 0) 150 return 0; 151 152 if (bpf_get_netns_cookie(ctx) == 0) 153 return 0; 154 155 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { 156 /* Not interested in SOL_IP:IP_TOS; 157 * let next BPF program in the cgroup chain or kernel 158 * handle it. 159 */ 160 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ 161 return 1; 162 } 163 164 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { 165 /* Overwrite SO_SNDBUF value */ 166 167 if (optval + sizeof(__u32) > optval_end) 168 return 0; /* bounds check */ 169 170 *(__u32 *)optval = 0x55AA; 171 ctx->optlen = 4; 172 173 return 1; 174 } 175 176 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { 177 /* Always use cubic */ 178 179 if (optval + 5 > optval_end) 180 return 0; /* bounds check */ 181 182 memcpy(optval, "cubic", 5); 183 ctx->optlen = 5; 184 185 return 1; 186 } 187 188 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { 189 /* Original optlen is larger than PAGE_SIZE. */ 190 if (ctx->optlen != page_size * 2) 191 return 0; /* unexpected data size */ 192 193 if (optval + 1 > optval_end) 194 return 0; /* bounds check */ 195 196 /* Make sure we can trim the buffer. */ 197 optval[0] = 0; 198 ctx->optlen = 1; 199 200 /* Usepace buffer is PAGE_SIZE * 2, but BPF 201 * program can only see the first PAGE_SIZE 202 * bytes of data. 203 */ 204 if (optval_end - optval != page_size) 205 return 0; /* unexpected data size */ 206 207 return 1; 208 } 209 210 if (ctx->level != SOL_CUSTOM) 211 return 0; /* deny everything except custom level */ 212 213 if (optval + 1 > optval_end) 214 return 0; /* bounds check */ 215 216 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, 217 BPF_SK_STORAGE_GET_F_CREATE); 218 if (!storage) 219 return 0; /* couldn't get sk storage */ 220 221 storage->val = optval[0]; 222 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel 223 * setsockopt handler. 224 */ 225 226 return 1; 227 } 228