1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 */ 5 6 #include "allowedips.h" 7 #include "peer.h" 8 9 static void swap_endian(u8 *dst, const u8 *src, u8 bits) 10 { 11 if (bits == 32) { 12 *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); 13 } else if (bits == 128) { 14 ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); 15 ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); 16 } 17 } 18 19 static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, 20 u8 cidr, u8 bits) 21 { 22 node->cidr = cidr; 23 node->bit_at_a = cidr / 8U; 24 #ifdef __LITTLE_ENDIAN 25 node->bit_at_a ^= (bits / 8U - 1U) % 8U; 26 #endif 27 node->bit_at_b = 7U - (cidr % 8U); 28 node->bitlen = bits; 29 memcpy(node->bits, src, bits / 8U); 30 } 31 #define CHOOSE_NODE(parent, key) \ 32 parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] 33 34 static void node_free_rcu(struct rcu_head *rcu) 35 { 36 kfree(container_of(rcu, struct allowedips_node, rcu)); 37 } 38 39 static void push_rcu(struct allowedips_node **stack, 40 struct allowedips_node __rcu *p, unsigned int *len) 41 { 42 if (rcu_access_pointer(p)) { 43 WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); 44 stack[(*len)++] = rcu_dereference_raw(p); 45 } 46 } 47 48 static void root_free_rcu(struct rcu_head *rcu) 49 { 50 struct allowedips_node *node, *stack[128] = { 51 container_of(rcu, struct allowedips_node, rcu) }; 52 unsigned int len = 1; 53 54 while (len > 0 && (node = stack[--len])) { 55 push_rcu(stack, node->bit[0], &len); 56 push_rcu(stack, node->bit[1], &len); 57 kfree(node); 58 } 59 } 60 61 static void root_remove_peer_lists(struct allowedips_node *root) 62 { 63 struct allowedips_node *node, *stack[128] = { root }; 64 unsigned int len = 1; 65 66 while (len > 0 && (node = stack[--len])) { 67 push_rcu(stack, node->bit[0], &len); 68 push_rcu(stack, node->bit[1], &len); 69 if (rcu_access_pointer(node->peer)) 70 list_del(&node->peer_list); 71 } 72 } 73 74 static void walk_remove_by_peer(struct allowedips_node __rcu **top, 75 struct wg_peer *peer, struct mutex *lock) 76 { 77 #define REF(p) rcu_access_pointer(p) 78 #define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) 79 #define PUSH(p) ({ \ 80 WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ 81 stack[len++] = p; \ 82 }) 83 84 struct allowedips_node __rcu **stack[128], **nptr; 85 struct allowedips_node *node, *prev; 86 unsigned int len; 87 88 if (unlikely(!peer || !REF(*top))) 89 return; 90 91 for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { 92 nptr = stack[len - 1]; 93 node = DEREF(nptr); 94 if (!node) { 95 --len; 96 continue; 97 } 98 if (!prev || REF(prev->bit[0]) == node || 99 REF(prev->bit[1]) == node) { 100 if (REF(node->bit[0])) 101 PUSH(&node->bit[0]); 102 else if (REF(node->bit[1])) 103 PUSH(&node->bit[1]); 104 } else if (REF(node->bit[0]) == prev) { 105 if (REF(node->bit[1])) 106 PUSH(&node->bit[1]); 107 } else { 108 if (rcu_dereference_protected(node->peer, 109 lockdep_is_held(lock)) == peer) { 110 RCU_INIT_POINTER(node->peer, NULL); 111 list_del_init(&node->peer_list); 112 if (!node->bit[0] || !node->bit[1]) { 113 rcu_assign_pointer(*nptr, DEREF( 114 &node->bit[!REF(node->bit[0])])); 115 call_rcu(&node->rcu, node_free_rcu); 116 node = DEREF(nptr); 117 } 118 } 119 --len; 120 } 121 } 122 123 #undef REF 124 #undef DEREF 125 #undef PUSH 126 } 127 128 static unsigned int fls128(u64 a, u64 b) 129 { 130 return a ? fls64(a) + 64U : fls64(b); 131 } 132 133 static u8 common_bits(const struct allowedips_node *node, const u8 *key, 134 u8 bits) 135 { 136 if (bits == 32) 137 return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key); 138 else if (bits == 128) 139 return 128U - fls128( 140 *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0], 141 *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]); 142 return 0; 143 } 144 145 static bool prefix_matches(const struct allowedips_node *node, const u8 *key, 146 u8 bits) 147 { 148 /* This could be much faster if it actually just compared the common 149 * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and 150 * the rest, but it turns out that common_bits is already super fast on 151 * modern processors, even taking into account the unfortunate bswap. 152 * So, we just inline it like this instead. 153 */ 154 return common_bits(node, key, bits) >= node->cidr; 155 } 156 157 static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, 158 const u8 *key) 159 { 160 struct allowedips_node *node = trie, *found = NULL; 161 162 while (node && prefix_matches(node, key, bits)) { 163 if (rcu_access_pointer(node->peer)) 164 found = node; 165 if (node->cidr == bits) 166 break; 167 node = rcu_dereference_bh(CHOOSE_NODE(node, key)); 168 } 169 return found; 170 } 171 172 /* Returns a strong reference to a peer */ 173 static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits, 174 const void *be_ip) 175 { 176 /* Aligned so it can be passed to fls/fls64 */ 177 u8 ip[16] __aligned(__alignof(u64)); 178 struct allowedips_node *node; 179 struct wg_peer *peer = NULL; 180 181 swap_endian(ip, be_ip, bits); 182 183 rcu_read_lock_bh(); 184 retry: 185 node = find_node(rcu_dereference_bh(root), bits, ip); 186 if (node) { 187 peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer)); 188 if (!peer) 189 goto retry; 190 } 191 rcu_read_unlock_bh(); 192 return peer; 193 } 194 195 static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, 196 u8 cidr, u8 bits, struct allowedips_node **rnode, 197 struct mutex *lock) 198 { 199 struct allowedips_node *node = rcu_dereference_protected(trie, 200 lockdep_is_held(lock)); 201 struct allowedips_node *parent = NULL; 202 bool exact = false; 203 204 while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) { 205 parent = node; 206 if (parent->cidr == cidr) { 207 exact = true; 208 break; 209 } 210 node = rcu_dereference_protected(CHOOSE_NODE(parent, key), 211 lockdep_is_held(lock)); 212 } 213 *rnode = parent; 214 return exact; 215 } 216 217 static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, 218 u8 cidr, struct wg_peer *peer, struct mutex *lock) 219 { 220 struct allowedips_node *node, *parent, *down, *newnode; 221 222 if (unlikely(cidr > bits || !peer)) 223 return -EINVAL; 224 225 if (!rcu_access_pointer(*trie)) { 226 node = kzalloc(sizeof(*node), GFP_KERNEL); 227 if (unlikely(!node)) 228 return -ENOMEM; 229 RCU_INIT_POINTER(node->peer, peer); 230 list_add_tail(&node->peer_list, &peer->allowedips_list); 231 copy_and_assign_cidr(node, key, cidr, bits); 232 rcu_assign_pointer(*trie, node); 233 return 0; 234 } 235 if (node_placement(*trie, key, cidr, bits, &node, lock)) { 236 rcu_assign_pointer(node->peer, peer); 237 list_move_tail(&node->peer_list, &peer->allowedips_list); 238 return 0; 239 } 240 241 newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); 242 if (unlikely(!newnode)) 243 return -ENOMEM; 244 RCU_INIT_POINTER(newnode->peer, peer); 245 list_add_tail(&newnode->peer_list, &peer->allowedips_list); 246 copy_and_assign_cidr(newnode, key, cidr, bits); 247 248 if (!node) { 249 down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); 250 } else { 251 down = rcu_dereference_protected(CHOOSE_NODE(node, key), 252 lockdep_is_held(lock)); 253 if (!down) { 254 rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); 255 return 0; 256 } 257 } 258 cidr = min(cidr, common_bits(down, key, bits)); 259 parent = node; 260 261 if (newnode->cidr == cidr) { 262 rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); 263 if (!parent) 264 rcu_assign_pointer(*trie, newnode); 265 else 266 rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), 267 newnode); 268 } else { 269 node = kzalloc(sizeof(*node), GFP_KERNEL); 270 if (unlikely(!node)) { 271 kfree(newnode); 272 return -ENOMEM; 273 } 274 INIT_LIST_HEAD(&node->peer_list); 275 copy_and_assign_cidr(node, newnode->bits, cidr, bits); 276 277 rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); 278 rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); 279 if (!parent) 280 rcu_assign_pointer(*trie, node); 281 else 282 rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), 283 node); 284 } 285 return 0; 286 } 287 288 void wg_allowedips_init(struct allowedips *table) 289 { 290 table->root4 = table->root6 = NULL; 291 table->seq = 1; 292 } 293 294 void wg_allowedips_free(struct allowedips *table, struct mutex *lock) 295 { 296 struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6; 297 298 ++table->seq; 299 RCU_INIT_POINTER(table->root4, NULL); 300 RCU_INIT_POINTER(table->root6, NULL); 301 if (rcu_access_pointer(old4)) { 302 struct allowedips_node *node = rcu_dereference_protected(old4, 303 lockdep_is_held(lock)); 304 305 root_remove_peer_lists(node); 306 call_rcu(&node->rcu, root_free_rcu); 307 } 308 if (rcu_access_pointer(old6)) { 309 struct allowedips_node *node = rcu_dereference_protected(old6, 310 lockdep_is_held(lock)); 311 312 root_remove_peer_lists(node); 313 call_rcu(&node->rcu, root_free_rcu); 314 } 315 } 316 317 int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, 318 u8 cidr, struct wg_peer *peer, struct mutex *lock) 319 { 320 /* Aligned so it can be passed to fls */ 321 u8 key[4] __aligned(__alignof(u32)); 322 323 ++table->seq; 324 swap_endian(key, (const u8 *)ip, 32); 325 return add(&table->root4, 32, key, cidr, peer, lock); 326 } 327 328 int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, 329 u8 cidr, struct wg_peer *peer, struct mutex *lock) 330 { 331 /* Aligned so it can be passed to fls64 */ 332 u8 key[16] __aligned(__alignof(u64)); 333 334 ++table->seq; 335 swap_endian(key, (const u8 *)ip, 128); 336 return add(&table->root6, 128, key, cidr, peer, lock); 337 } 338 339 void wg_allowedips_remove_by_peer(struct allowedips *table, 340 struct wg_peer *peer, struct mutex *lock) 341 { 342 ++table->seq; 343 walk_remove_by_peer(&table->root4, peer, lock); 344 walk_remove_by_peer(&table->root6, peer, lock); 345 } 346 347 int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) 348 { 349 const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U); 350 swap_endian(ip, node->bits, node->bitlen); 351 memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes); 352 if (node->cidr) 353 ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U); 354 355 *cidr = node->cidr; 356 return node->bitlen == 32 ? AF_INET : AF_INET6; 357 } 358 359 /* Returns a strong reference to a peer */ 360 struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, 361 struct sk_buff *skb) 362 { 363 if (skb->protocol == htons(ETH_P_IP)) 364 return lookup(table->root4, 32, &ip_hdr(skb)->daddr); 365 else if (skb->protocol == htons(ETH_P_IPV6)) 366 return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr); 367 return NULL; 368 } 369 370 /* Returns a strong reference to a peer */ 371 struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, 372 struct sk_buff *skb) 373 { 374 if (skb->protocol == htons(ETH_P_IP)) 375 return lookup(table->root4, 32, &ip_hdr(skb)->saddr); 376 else if (skb->protocol == htons(ETH_P_IPV6)) 377 return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr); 378 return NULL; 379 } 380 381 #include "selftest/allowedips.c" 382