1 /* 2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 * 8 * Development of this code funded by Astaro AG (http://www.astaro.com/) 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/list.h> 15 #include <linux/rbtree.h> 16 #include <linux/netlink.h> 17 #include <linux/netfilter.h> 18 #include <linux/netfilter/nf_tables.h> 19 #include <net/netfilter/nf_tables.h> 20 21 struct nft_rbtree { 22 struct rb_root root; 23 rwlock_t lock; 24 seqcount_t count; 25 }; 26 27 struct nft_rbtree_elem { 28 struct rb_node node; 29 struct nft_set_ext ext; 30 }; 31 32 static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) 33 { 34 return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && 35 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); 36 } 37 38 static bool nft_rbtree_equal(const struct nft_set *set, const void *this, 39 const struct nft_rbtree_elem *interval) 40 { 41 return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; 42 } 43 44 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, 45 const u32 *key, const struct nft_set_ext **ext, 46 unsigned int seq) 47 { 48 struct nft_rbtree *priv = nft_set_priv(set); 49 const struct nft_rbtree_elem *rbe, *interval = NULL; 50 u8 genmask = nft_genmask_cur(net); 51 const struct rb_node *parent; 52 const void *this; 53 int d; 54 55 parent = rcu_dereference_raw(priv->root.rb_node); 56 while (parent != NULL) { 57 if (read_seqcount_retry(&priv->count, seq)) 58 return false; 59 60 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 61 62 this = nft_set_ext_key(&rbe->ext); 63 d = memcmp(this, key, set->klen); 64 if (d < 0) { 65 parent = rcu_dereference_raw(parent->rb_left); 66 if (interval && 67 nft_rbtree_equal(set, this, interval) && 68 nft_rbtree_interval_end(this) && 69 !nft_rbtree_interval_end(interval)) 70 continue; 71 interval = rbe; 72 } else if (d > 0) 73 parent = rcu_dereference_raw(parent->rb_right); 74 else { 75 if (!nft_set_elem_active(&rbe->ext, genmask)) { 76 parent = rcu_dereference_raw(parent->rb_left); 77 continue; 78 } 79 if (nft_rbtree_interval_end(rbe)) 80 goto out; 81 82 *ext = &rbe->ext; 83 return true; 84 } 85 } 86 87 if (set->flags & NFT_SET_INTERVAL && interval != NULL && 88 nft_set_elem_active(&interval->ext, genmask) && 89 !nft_rbtree_interval_end(interval)) { 90 *ext = &interval->ext; 91 return true; 92 } 93 out: 94 return false; 95 } 96 97 static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, 98 const u32 *key, const struct nft_set_ext **ext) 99 { 100 struct nft_rbtree *priv = nft_set_priv(set); 101 unsigned int seq = read_seqcount_begin(&priv->count); 102 bool ret; 103 104 ret = __nft_rbtree_lookup(net, set, key, ext, seq); 105 if (ret || !read_seqcount_retry(&priv->count, seq)) 106 return ret; 107 108 read_lock_bh(&priv->lock); 109 seq = read_seqcount_begin(&priv->count); 110 ret = __nft_rbtree_lookup(net, set, key, ext, seq); 111 read_unlock_bh(&priv->lock); 112 113 return ret; 114 } 115 116 static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, 117 const u32 *key, struct nft_rbtree_elem **elem, 118 unsigned int seq, unsigned int flags, u8 genmask) 119 { 120 struct nft_rbtree_elem *rbe, *interval = NULL; 121 struct nft_rbtree *priv = nft_set_priv(set); 122 const struct rb_node *parent; 123 const void *this; 124 int d; 125 126 parent = rcu_dereference_raw(priv->root.rb_node); 127 while (parent != NULL) { 128 if (read_seqcount_retry(&priv->count, seq)) 129 return false; 130 131 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 132 133 this = nft_set_ext_key(&rbe->ext); 134 d = memcmp(this, key, set->klen); 135 if (d < 0) { 136 parent = rcu_dereference_raw(parent->rb_left); 137 interval = rbe; 138 } else if (d > 0) { 139 parent = rcu_dereference_raw(parent->rb_right); 140 } else { 141 if (!nft_set_elem_active(&rbe->ext, genmask)) 142 parent = rcu_dereference_raw(parent->rb_left); 143 144 if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || 145 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == 146 (flags & NFT_SET_ELEM_INTERVAL_END)) { 147 *elem = rbe; 148 return true; 149 } 150 return false; 151 } 152 } 153 154 if (set->flags & NFT_SET_INTERVAL && interval != NULL && 155 nft_set_elem_active(&interval->ext, genmask) && 156 !nft_rbtree_interval_end(interval)) { 157 *elem = interval; 158 return true; 159 } 160 161 return false; 162 } 163 164 static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, 165 const struct nft_set_elem *elem, unsigned int flags) 166 { 167 struct nft_rbtree *priv = nft_set_priv(set); 168 unsigned int seq = read_seqcount_begin(&priv->count); 169 struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT); 170 const u32 *key = (const u32 *)&elem->key.val; 171 u8 genmask = nft_genmask_cur(net); 172 bool ret; 173 174 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); 175 if (ret || !read_seqcount_retry(&priv->count, seq)) 176 return rbe; 177 178 read_lock_bh(&priv->lock); 179 seq = read_seqcount_begin(&priv->count); 180 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); 181 if (!ret) 182 rbe = ERR_PTR(-ENOENT); 183 read_unlock_bh(&priv->lock); 184 185 return rbe; 186 } 187 188 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, 189 struct nft_rbtree_elem *new, 190 struct nft_set_ext **ext) 191 { 192 struct nft_rbtree *priv = nft_set_priv(set); 193 u8 genmask = nft_genmask_next(net); 194 struct nft_rbtree_elem *rbe; 195 struct rb_node *parent, **p; 196 int d; 197 198 parent = NULL; 199 p = &priv->root.rb_node; 200 while (*p != NULL) { 201 parent = *p; 202 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 203 d = memcmp(nft_set_ext_key(&rbe->ext), 204 nft_set_ext_key(&new->ext), 205 set->klen); 206 if (d < 0) 207 p = &parent->rb_left; 208 else if (d > 0) 209 p = &parent->rb_right; 210 else { 211 if (nft_rbtree_interval_end(rbe) && 212 !nft_rbtree_interval_end(new)) { 213 p = &parent->rb_left; 214 } else if (!nft_rbtree_interval_end(rbe) && 215 nft_rbtree_interval_end(new)) { 216 p = &parent->rb_right; 217 } else if (nft_set_elem_active(&rbe->ext, genmask)) { 218 *ext = &rbe->ext; 219 return -EEXIST; 220 } else { 221 p = &parent->rb_left; 222 } 223 } 224 } 225 rb_link_node_rcu(&new->node, parent, p); 226 rb_insert_color(&new->node, &priv->root); 227 return 0; 228 } 229 230 static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, 231 const struct nft_set_elem *elem, 232 struct nft_set_ext **ext) 233 { 234 struct nft_rbtree *priv = nft_set_priv(set); 235 struct nft_rbtree_elem *rbe = elem->priv; 236 int err; 237 238 write_lock_bh(&priv->lock); 239 write_seqcount_begin(&priv->count); 240 err = __nft_rbtree_insert(net, set, rbe, ext); 241 write_seqcount_end(&priv->count); 242 write_unlock_bh(&priv->lock); 243 244 return err; 245 } 246 247 static void nft_rbtree_remove(const struct net *net, 248 const struct nft_set *set, 249 const struct nft_set_elem *elem) 250 { 251 struct nft_rbtree *priv = nft_set_priv(set); 252 struct nft_rbtree_elem *rbe = elem->priv; 253 254 write_lock_bh(&priv->lock); 255 write_seqcount_begin(&priv->count); 256 rb_erase(&rbe->node, &priv->root); 257 write_seqcount_end(&priv->count); 258 write_unlock_bh(&priv->lock); 259 } 260 261 static void nft_rbtree_activate(const struct net *net, 262 const struct nft_set *set, 263 const struct nft_set_elem *elem) 264 { 265 struct nft_rbtree_elem *rbe = elem->priv; 266 267 nft_set_elem_change_active(net, set, &rbe->ext); 268 } 269 270 static bool nft_rbtree_flush(const struct net *net, 271 const struct nft_set *set, void *priv) 272 { 273 struct nft_rbtree_elem *rbe = priv; 274 275 nft_set_elem_change_active(net, set, &rbe->ext); 276 return true; 277 } 278 279 static void *nft_rbtree_deactivate(const struct net *net, 280 const struct nft_set *set, 281 const struct nft_set_elem *elem) 282 { 283 const struct nft_rbtree *priv = nft_set_priv(set); 284 const struct rb_node *parent = priv->root.rb_node; 285 struct nft_rbtree_elem *rbe, *this = elem->priv; 286 u8 genmask = nft_genmask_next(net); 287 int d; 288 289 while (parent != NULL) { 290 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 291 292 d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, 293 set->klen); 294 if (d < 0) 295 parent = parent->rb_left; 296 else if (d > 0) 297 parent = parent->rb_right; 298 else { 299 if (!nft_set_elem_active(&rbe->ext, genmask)) { 300 parent = parent->rb_left; 301 continue; 302 } 303 if (nft_rbtree_interval_end(rbe) && 304 !nft_rbtree_interval_end(this)) { 305 parent = parent->rb_left; 306 continue; 307 } else if (!nft_rbtree_interval_end(rbe) && 308 nft_rbtree_interval_end(this)) { 309 parent = parent->rb_right; 310 continue; 311 } 312 nft_rbtree_flush(net, set, rbe); 313 return rbe; 314 } 315 } 316 return NULL; 317 } 318 319 static void nft_rbtree_walk(const struct nft_ctx *ctx, 320 struct nft_set *set, 321 struct nft_set_iter *iter) 322 { 323 struct nft_rbtree *priv = nft_set_priv(set); 324 struct nft_rbtree_elem *rbe; 325 struct nft_set_elem elem; 326 struct rb_node *node; 327 328 read_lock_bh(&priv->lock); 329 for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { 330 rbe = rb_entry(node, struct nft_rbtree_elem, node); 331 332 if (iter->count < iter->skip) 333 goto cont; 334 if (!nft_set_elem_active(&rbe->ext, iter->genmask)) 335 goto cont; 336 337 elem.priv = rbe; 338 339 iter->err = iter->fn(ctx, set, iter, &elem); 340 if (iter->err < 0) { 341 read_unlock_bh(&priv->lock); 342 return; 343 } 344 cont: 345 iter->count++; 346 } 347 read_unlock_bh(&priv->lock); 348 } 349 350 static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], 351 const struct nft_set_desc *desc) 352 { 353 return sizeof(struct nft_rbtree); 354 } 355 356 static int nft_rbtree_init(const struct nft_set *set, 357 const struct nft_set_desc *desc, 358 const struct nlattr * const nla[]) 359 { 360 struct nft_rbtree *priv = nft_set_priv(set); 361 362 rwlock_init(&priv->lock); 363 seqcount_init(&priv->count); 364 priv->root = RB_ROOT; 365 return 0; 366 } 367 368 static void nft_rbtree_destroy(const struct nft_set *set) 369 { 370 struct nft_rbtree *priv = nft_set_priv(set); 371 struct nft_rbtree_elem *rbe; 372 struct rb_node *node; 373 374 while ((node = priv->root.rb_node) != NULL) { 375 rb_erase(node, &priv->root); 376 rbe = rb_entry(node, struct nft_rbtree_elem, node); 377 nft_set_elem_destroy(set, rbe, true); 378 } 379 } 380 381 static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, 382 struct nft_set_estimate *est) 383 { 384 if (desc->size) 385 est->size = sizeof(struct nft_rbtree) + 386 desc->size * sizeof(struct nft_rbtree_elem); 387 else 388 est->size = ~0; 389 390 est->lookup = NFT_SET_CLASS_O_LOG_N; 391 est->space = NFT_SET_CLASS_O_N; 392 393 return true; 394 } 395 396 static struct nft_set_type nft_rbtree_type; 397 static struct nft_set_ops nft_rbtree_ops __read_mostly = { 398 .type = &nft_rbtree_type, 399 .privsize = nft_rbtree_privsize, 400 .elemsize = offsetof(struct nft_rbtree_elem, ext), 401 .estimate = nft_rbtree_estimate, 402 .init = nft_rbtree_init, 403 .destroy = nft_rbtree_destroy, 404 .insert = nft_rbtree_insert, 405 .remove = nft_rbtree_remove, 406 .deactivate = nft_rbtree_deactivate, 407 .flush = nft_rbtree_flush, 408 .activate = nft_rbtree_activate, 409 .lookup = nft_rbtree_lookup, 410 .walk = nft_rbtree_walk, 411 .get = nft_rbtree_get, 412 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, 413 }; 414 415 static struct nft_set_type nft_rbtree_type __read_mostly = { 416 .ops = &nft_rbtree_ops, 417 .owner = THIS_MODULE, 418 }; 419 420 static int __init nft_rbtree_module_init(void) 421 { 422 return nft_register_set(&nft_rbtree_type); 423 } 424 425 static void __exit nft_rbtree_module_exit(void) 426 { 427 nft_unregister_set(&nft_rbtree_type); 428 } 429 430 module_init(nft_rbtree_module_init); 431 module_exit(nft_rbtree_module_exit); 432 433 MODULE_LICENSE("GPL"); 434 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 435 MODULE_ALIAS_NFT_SET(); 436