1 /* 2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 * 8 * Development of this code funded by Astaro AG (http://www.astaro.com/) 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/list.h> 15 #include <linux/rbtree.h> 16 #include <linux/netlink.h> 17 #include <linux/netfilter.h> 18 #include <linux/netfilter/nf_tables.h> 19 #include <net/netfilter/nf_tables.h> 20 21 struct nft_rbtree { 22 struct rb_root root; 23 rwlock_t lock; 24 seqcount_t count; 25 }; 26 27 struct nft_rbtree_elem { 28 struct rb_node node; 29 struct nft_set_ext ext; 30 }; 31 32 static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) 33 { 34 return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && 35 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); 36 } 37 38 static bool nft_rbtree_equal(const struct nft_set *set, const void *this, 39 const struct nft_rbtree_elem *interval) 40 { 41 return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; 42 } 43 44 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, 45 const u32 *key, const struct nft_set_ext **ext, 46 unsigned int seq) 47 { 48 struct nft_rbtree *priv = nft_set_priv(set); 49 const struct nft_rbtree_elem *rbe, *interval = NULL; 50 u8 genmask = nft_genmask_cur(net); 51 const struct rb_node *parent; 52 const void *this; 53 int d; 54 55 parent = rcu_dereference_raw(priv->root.rb_node); 56 while (parent != NULL) { 57 if (read_seqcount_retry(&priv->count, seq)) 58 return false; 59 60 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 61 62 this = nft_set_ext_key(&rbe->ext); 63 d = memcmp(this, key, set->klen); 64 if (d < 0) { 65 parent = rcu_dereference_raw(parent->rb_left); 66 if (interval && 67 nft_rbtree_equal(set, this, interval) && 68 nft_rbtree_interval_end(this) && 69 !nft_rbtree_interval_end(interval)) 70 continue; 71 interval = rbe; 72 } else if (d > 0) 73 parent = rcu_dereference_raw(parent->rb_right); 74 else { 75 if (!nft_set_elem_active(&rbe->ext, genmask)) { 76 parent = rcu_dereference_raw(parent->rb_left); 77 continue; 78 } 79 if (nft_rbtree_interval_end(rbe)) 80 goto out; 81 82 *ext = &rbe->ext; 83 return true; 84 } 85 } 86 87 if (set->flags & NFT_SET_INTERVAL && interval != NULL && 88 nft_set_elem_active(&interval->ext, genmask) && 89 !nft_rbtree_interval_end(interval)) { 90 *ext = &interval->ext; 91 return true; 92 } 93 out: 94 return false; 95 } 96 97 static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, 98 const u32 *key, const struct nft_set_ext **ext) 99 { 100 struct nft_rbtree *priv = nft_set_priv(set); 101 unsigned int seq = read_seqcount_begin(&priv->count); 102 bool ret; 103 104 ret = __nft_rbtree_lookup(net, set, key, ext, seq); 105 if (ret || !read_seqcount_retry(&priv->count, seq)) 106 return ret; 107 108 read_lock_bh(&priv->lock); 109 seq = read_seqcount_begin(&priv->count); 110 ret = __nft_rbtree_lookup(net, set, key, ext, seq); 111 read_unlock_bh(&priv->lock); 112 113 return ret; 114 } 115 116 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, 117 struct nft_rbtree_elem *new, 118 struct nft_set_ext **ext) 119 { 120 struct nft_rbtree *priv = nft_set_priv(set); 121 u8 genmask = nft_genmask_next(net); 122 struct nft_rbtree_elem *rbe; 123 struct rb_node *parent, **p; 124 int d; 125 126 parent = NULL; 127 p = &priv->root.rb_node; 128 while (*p != NULL) { 129 parent = *p; 130 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 131 d = memcmp(nft_set_ext_key(&rbe->ext), 132 nft_set_ext_key(&new->ext), 133 set->klen); 134 if (d < 0) 135 p = &parent->rb_left; 136 else if (d > 0) 137 p = &parent->rb_right; 138 else { 139 if (nft_rbtree_interval_end(rbe) && 140 !nft_rbtree_interval_end(new)) { 141 p = &parent->rb_left; 142 } else if (!nft_rbtree_interval_end(rbe) && 143 nft_rbtree_interval_end(new)) { 144 p = &parent->rb_right; 145 } else if (nft_set_elem_active(&rbe->ext, genmask)) { 146 *ext = &rbe->ext; 147 return -EEXIST; 148 } else { 149 p = &parent->rb_left; 150 } 151 } 152 } 153 rb_link_node_rcu(&new->node, parent, p); 154 rb_insert_color(&new->node, &priv->root); 155 return 0; 156 } 157 158 static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, 159 const struct nft_set_elem *elem, 160 struct nft_set_ext **ext) 161 { 162 struct nft_rbtree *priv = nft_set_priv(set); 163 struct nft_rbtree_elem *rbe = elem->priv; 164 int err; 165 166 write_lock_bh(&priv->lock); 167 write_seqcount_begin(&priv->count); 168 err = __nft_rbtree_insert(net, set, rbe, ext); 169 write_seqcount_end(&priv->count); 170 write_unlock_bh(&priv->lock); 171 172 return err; 173 } 174 175 static void nft_rbtree_remove(const struct net *net, 176 const struct nft_set *set, 177 const struct nft_set_elem *elem) 178 { 179 struct nft_rbtree *priv = nft_set_priv(set); 180 struct nft_rbtree_elem *rbe = elem->priv; 181 182 write_lock_bh(&priv->lock); 183 write_seqcount_begin(&priv->count); 184 rb_erase(&rbe->node, &priv->root); 185 write_seqcount_end(&priv->count); 186 write_unlock_bh(&priv->lock); 187 } 188 189 static void nft_rbtree_activate(const struct net *net, 190 const struct nft_set *set, 191 const struct nft_set_elem *elem) 192 { 193 struct nft_rbtree_elem *rbe = elem->priv; 194 195 nft_set_elem_change_active(net, set, &rbe->ext); 196 } 197 198 static bool nft_rbtree_flush(const struct net *net, 199 const struct nft_set *set, void *priv) 200 { 201 struct nft_rbtree_elem *rbe = priv; 202 203 nft_set_elem_change_active(net, set, &rbe->ext); 204 return true; 205 } 206 207 static void *nft_rbtree_deactivate(const struct net *net, 208 const struct nft_set *set, 209 const struct nft_set_elem *elem) 210 { 211 const struct nft_rbtree *priv = nft_set_priv(set); 212 const struct rb_node *parent = priv->root.rb_node; 213 struct nft_rbtree_elem *rbe, *this = elem->priv; 214 u8 genmask = nft_genmask_next(net); 215 int d; 216 217 while (parent != NULL) { 218 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 219 220 d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, 221 set->klen); 222 if (d < 0) 223 parent = parent->rb_left; 224 else if (d > 0) 225 parent = parent->rb_right; 226 else { 227 if (!nft_set_elem_active(&rbe->ext, genmask)) { 228 parent = parent->rb_left; 229 continue; 230 } 231 if (nft_rbtree_interval_end(rbe) && 232 !nft_rbtree_interval_end(this)) { 233 parent = parent->rb_left; 234 continue; 235 } else if (!nft_rbtree_interval_end(rbe) && 236 nft_rbtree_interval_end(this)) { 237 parent = parent->rb_right; 238 continue; 239 } 240 nft_rbtree_flush(net, set, rbe); 241 return rbe; 242 } 243 } 244 return NULL; 245 } 246 247 static void nft_rbtree_walk(const struct nft_ctx *ctx, 248 struct nft_set *set, 249 struct nft_set_iter *iter) 250 { 251 struct nft_rbtree *priv = nft_set_priv(set); 252 struct nft_rbtree_elem *rbe; 253 struct nft_set_elem elem; 254 struct rb_node *node; 255 256 read_lock_bh(&priv->lock); 257 for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { 258 rbe = rb_entry(node, struct nft_rbtree_elem, node); 259 260 if (iter->count < iter->skip) 261 goto cont; 262 if (!nft_set_elem_active(&rbe->ext, iter->genmask)) 263 goto cont; 264 265 elem.priv = rbe; 266 267 iter->err = iter->fn(ctx, set, iter, &elem); 268 if (iter->err < 0) { 269 read_unlock_bh(&priv->lock); 270 return; 271 } 272 cont: 273 iter->count++; 274 } 275 read_unlock_bh(&priv->lock); 276 } 277 278 static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], 279 const struct nft_set_desc *desc) 280 { 281 return sizeof(struct nft_rbtree); 282 } 283 284 static int nft_rbtree_init(const struct nft_set *set, 285 const struct nft_set_desc *desc, 286 const struct nlattr * const nla[]) 287 { 288 struct nft_rbtree *priv = nft_set_priv(set); 289 290 rwlock_init(&priv->lock); 291 seqcount_init(&priv->count); 292 priv->root = RB_ROOT; 293 return 0; 294 } 295 296 static void nft_rbtree_destroy(const struct nft_set *set) 297 { 298 struct nft_rbtree *priv = nft_set_priv(set); 299 struct nft_rbtree_elem *rbe; 300 struct rb_node *node; 301 302 while ((node = priv->root.rb_node) != NULL) { 303 rb_erase(node, &priv->root); 304 rbe = rb_entry(node, struct nft_rbtree_elem, node); 305 nft_set_elem_destroy(set, rbe, true); 306 } 307 } 308 309 static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, 310 struct nft_set_estimate *est) 311 { 312 if (desc->size) 313 est->size = sizeof(struct nft_rbtree) + 314 desc->size * sizeof(struct nft_rbtree_elem); 315 else 316 est->size = ~0; 317 318 est->lookup = NFT_SET_CLASS_O_LOG_N; 319 est->space = NFT_SET_CLASS_O_N; 320 321 return true; 322 } 323 324 static struct nft_set_type nft_rbtree_type; 325 static struct nft_set_ops nft_rbtree_ops __read_mostly = { 326 .type = &nft_rbtree_type, 327 .privsize = nft_rbtree_privsize, 328 .elemsize = offsetof(struct nft_rbtree_elem, ext), 329 .estimate = nft_rbtree_estimate, 330 .init = nft_rbtree_init, 331 .destroy = nft_rbtree_destroy, 332 .insert = nft_rbtree_insert, 333 .remove = nft_rbtree_remove, 334 .deactivate = nft_rbtree_deactivate, 335 .flush = nft_rbtree_flush, 336 .activate = nft_rbtree_activate, 337 .lookup = nft_rbtree_lookup, 338 .walk = nft_rbtree_walk, 339 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, 340 }; 341 342 static struct nft_set_type nft_rbtree_type __read_mostly = { 343 .ops = &nft_rbtree_ops, 344 .owner = THIS_MODULE, 345 }; 346 347 static int __init nft_rbtree_module_init(void) 348 { 349 return nft_register_set(&nft_rbtree_type); 350 } 351 352 static void __exit nft_rbtree_module_exit(void) 353 { 354 nft_unregister_set(&nft_rbtree_type); 355 } 356 357 module_init(nft_rbtree_module_init); 358 module_exit(nft_rbtree_module_exit); 359 360 MODULE_LICENSE("GPL"); 361 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 362 MODULE_ALIAS_NFT_SET(); 363