1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ 3 4 #ifndef _IP_SET_HASH_GEN_H 5 #define _IP_SET_HASH_GEN_H 6 7 #include <linux/rcupdate.h> 8 #include <linux/jhash.h> 9 #include <linux/types.h> 10 #include <linux/netfilter/nfnetlink.h> 11 #include <linux/netfilter/ipset/ip_set.h> 12 13 #define __ipset_dereference(p) \ 14 rcu_dereference_protected(p, 1) 15 #define ipset_dereference_nfnl(p) \ 16 rcu_dereference_protected(p, \ 17 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 18 #define ipset_dereference_set(p, set) \ 19 rcu_dereference_protected(p, \ 20 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ 21 lockdep_is_held(&(set)->lock)) 22 #define ipset_dereference_bh_nfnl(p) \ 23 rcu_dereference_bh_check(p, \ 24 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 25 26 /* Hashing which uses arrays to resolve clashing. The hash table is resized 27 * (doubled) when searching becomes too long. 28 * Internally jhash is used with the assumption that the size of the 29 * stored data is a multiple of sizeof(u32). 30 * 31 * Readers and resizing 32 * 33 * Resizing can be triggered by userspace command only, and those 34 * are serialized by the nfnl mutex. During resizing the set is 35 * read-locked, so the only possible concurrent operations are 36 * the kernel side readers. Those must be protected by proper RCU locking. 37 */ 38 39 /* Number of elements to store in an initial array block */ 40 #define AHASH_INIT_SIZE 2 41 /* Max number of elements to store in an array block */ 42 #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) 43 /* Max muber of elements in the array block when tuned */ 44 #define AHASH_MAX_TUNED 64 45 #define AHASH_MAX(h) ((h)->bucketsize) 46 47 /* A hash bucket */ 48 struct hbucket { 49 struct rcu_head rcu; /* for call_rcu */ 50 /* Which positions are used in the array */ 51 DECLARE_BITMAP(used, AHASH_MAX_TUNED); 52 u8 size; /* size of the array */ 53 u8 pos; /* position of the first free entry */ 54 unsigned char value[] /* the array of the values */ 55 __aligned(__alignof__(u64)); 56 }; 57 58 /* Region size for locking == 2^HTABLE_REGION_BITS */ 59 #define HTABLE_REGION_BITS 10 60 #define ahash_numof_locks(htable_bits) \ 61 ((htable_bits) < HTABLE_REGION_BITS ? 1 \ 62 : jhash_size((htable_bits) - HTABLE_REGION_BITS)) 63 #define ahash_sizeof_regions(htable_bits) \ 64 (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) 65 #define ahash_region(n, htable_bits) \ 66 ((n) % ahash_numof_locks(htable_bits)) 67 #define ahash_bucket_start(h, htable_bits) \ 68 ((htable_bits) < HTABLE_REGION_BITS ? 0 \ 69 : (h) * jhash_size(HTABLE_REGION_BITS)) 70 #define ahash_bucket_end(h, htable_bits) \ 71 ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ 72 : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) 73 74 struct htable_gc { 75 struct delayed_work dwork; 76 struct ip_set *set; /* Set the gc belongs to */ 77 u32 region; /* Last gc run position */ 78 }; 79 80 /* The hash table: the table size stored here in order to make resizing easy */ 81 struct htable { 82 atomic_t ref; /* References for resizing */ 83 atomic_t uref; /* References for dumping and gc */ 84 u8 htable_bits; /* size of hash table == 2^htable_bits */ 85 u32 maxelem; /* Maxelem per region */ 86 struct ip_set_region *hregion; /* Region locks and ext sizes */ 87 struct hbucket __rcu *bucket[]; /* hashtable buckets */ 88 }; 89 90 #define hbucket(h, i) ((h)->bucket[i]) 91 #define ext_size(n, dsize) \ 92 (sizeof(struct hbucket) + (n) * (dsize)) 93 94 #ifndef IPSET_NET_COUNT 95 #define IPSET_NET_COUNT 1 96 #endif 97 98 /* Book-keeping of the prefixes added to the set */ 99 struct net_prefixes { 100 u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */ 101 u8 cidr[IPSET_NET_COUNT]; /* the cidr value */ 102 }; 103 104 /* Compute the hash table size */ 105 static size_t 106 htable_size(u8 hbits) 107 { 108 size_t hsize; 109 110 /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */ 111 if (hbits > 31) 112 return 0; 113 hsize = jhash_size(hbits); 114 if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *) 115 < hsize) 116 return 0; 117 118 return hsize * sizeof(struct hbucket *) + sizeof(struct htable); 119 } 120 121 #ifdef IP_SET_HASH_WITH_NETS 122 #if IPSET_NET_COUNT > 1 123 #define __CIDR(cidr, i) (cidr[i]) 124 #else 125 #define __CIDR(cidr, i) (cidr) 126 #endif 127 128 /* cidr + 1 is stored in net_prefixes to support /0 */ 129 #define NCIDR_PUT(cidr) ((cidr) + 1) 130 #define NCIDR_GET(cidr) ((cidr) - 1) 131 132 #ifdef IP_SET_HASH_WITH_NETS_PACKED 133 /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ 134 #define DCIDR_PUT(cidr) ((cidr) - 1) 135 #define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) 136 #else 137 #define DCIDR_PUT(cidr) (cidr) 138 #define DCIDR_GET(cidr, i) __CIDR(cidr, i) 139 #endif 140 141 #define INIT_CIDR(cidr, host_mask) \ 142 DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) 143 144 #ifdef IP_SET_HASH_WITH_NET0 145 /* cidr from 0 to HOST_MASK value and c = cidr + 1 */ 146 #define NLEN (HOST_MASK + 1) 147 #define CIDR_POS(c) ((c) - 1) 148 #else 149 /* cidr from 1 to HOST_MASK value and c = cidr + 1 */ 150 #define NLEN HOST_MASK 151 #define CIDR_POS(c) ((c) - 2) 152 #endif 153 154 #else 155 #define NLEN 0 156 #endif /* IP_SET_HASH_WITH_NETS */ 157 158 #define SET_ELEM_EXPIRED(set, d) \ 159 (SET_WITH_TIMEOUT(set) && \ 160 ip_set_timeout_expired(ext_timeout(d, set))) 161 162 #endif /* _IP_SET_HASH_GEN_H */ 163 164 #ifndef MTYPE 165 #error "MTYPE is not defined!" 166 #endif 167 168 #ifndef HTYPE 169 #error "HTYPE is not defined!" 170 #endif 171 172 #ifndef HOST_MASK 173 #error "HOST_MASK is not defined!" 174 #endif 175 176 /* Family dependent templates */ 177 178 #undef ahash_data 179 #undef mtype_data_equal 180 #undef mtype_do_data_match 181 #undef mtype_data_set_flags 182 #undef mtype_data_reset_elem 183 #undef mtype_data_reset_flags 184 #undef mtype_data_netmask 185 #undef mtype_data_list 186 #undef mtype_data_next 187 #undef mtype_elem 188 189 #undef mtype_ahash_destroy 190 #undef mtype_ext_cleanup 191 #undef mtype_add_cidr 192 #undef mtype_del_cidr 193 #undef mtype_ahash_memsize 194 #undef mtype_flush 195 #undef mtype_destroy 196 #undef mtype_same_set 197 #undef mtype_kadt 198 #undef mtype_uadt 199 200 #undef mtype_add 201 #undef mtype_del 202 #undef mtype_test_cidrs 203 #undef mtype_test 204 #undef mtype_uref 205 #undef mtype_resize 206 #undef mtype_ext_size 207 #undef mtype_resize_ad 208 #undef mtype_head 209 #undef mtype_list 210 #undef mtype_gc_do 211 #undef mtype_gc 212 #undef mtype_gc_init 213 #undef mtype_variant 214 #undef mtype_data_match 215 216 #undef htype 217 #undef HKEY 218 219 #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) 220 #ifdef IP_SET_HASH_WITH_NETS 221 #define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) 222 #else 223 #define mtype_do_data_match(d) 1 224 #endif 225 #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) 226 #define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) 227 #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) 228 #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) 229 #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) 230 #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) 231 #define mtype_elem IPSET_TOKEN(MTYPE, _elem) 232 233 #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) 234 #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) 235 #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) 236 #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) 237 #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) 238 #define mtype_flush IPSET_TOKEN(MTYPE, _flush) 239 #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) 240 #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) 241 #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) 242 #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) 243 244 #define mtype_add IPSET_TOKEN(MTYPE, _add) 245 #define mtype_del IPSET_TOKEN(MTYPE, _del) 246 #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) 247 #define mtype_test IPSET_TOKEN(MTYPE, _test) 248 #define mtype_uref IPSET_TOKEN(MTYPE, _uref) 249 #define mtype_resize IPSET_TOKEN(MTYPE, _resize) 250 #define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) 251 #define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) 252 #define mtype_head IPSET_TOKEN(MTYPE, _head) 253 #define mtype_list IPSET_TOKEN(MTYPE, _list) 254 #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) 255 #define mtype_gc IPSET_TOKEN(MTYPE, _gc) 256 #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) 257 #define mtype_variant IPSET_TOKEN(MTYPE, _variant) 258 #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) 259 260 #ifndef HKEY_DATALEN 261 #define HKEY_DATALEN sizeof(struct mtype_elem) 262 #endif 263 264 #define htype MTYPE 265 266 #define HKEY(data, initval, htable_bits) \ 267 ({ \ 268 const u32 *__k = (const u32 *)data; \ 269 u32 __l = HKEY_DATALEN / sizeof(u32); \ 270 \ 271 BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \ 272 \ 273 jhash2(__k, __l, initval) & jhash_mask(htable_bits); \ 274 }) 275 276 /* The generic hash structure */ 277 struct htype { 278 struct htable __rcu *table; /* the hash table */ 279 struct htable_gc gc; /* gc workqueue */ 280 u32 maxelem; /* max elements in the hash */ 281 u32 initval; /* random jhash init value */ 282 #ifdef IP_SET_HASH_WITH_MARKMASK 283 u32 markmask; /* markmask value for mark mask to store */ 284 #endif 285 u8 bucketsize; /* max elements in an array block */ 286 #ifdef IP_SET_HASH_WITH_NETMASK 287 u8 netmask; /* netmask value for subnets to store */ 288 #endif 289 struct list_head ad; /* Resize add|del backlist */ 290 struct mtype_elem next; /* temporary storage for uadd */ 291 #ifdef IP_SET_HASH_WITH_NETS 292 struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ 293 #endif 294 }; 295 296 /* ADD|DEL entries saved during resize */ 297 struct mtype_resize_ad { 298 struct list_head list; 299 enum ipset_adt ad; /* ADD|DEL element */ 300 struct mtype_elem d; /* Element value */ 301 struct ip_set_ext ext; /* Extensions for ADD */ 302 struct ip_set_ext mext; /* Target extensions for ADD */ 303 u32 flags; /* Flags for ADD */ 304 }; 305 306 #ifdef IP_SET_HASH_WITH_NETS 307 /* Network cidr size book keeping when the hash stores different 308 * sized networks. cidr == real cidr + 1 to support /0. 309 */ 310 static void 311 mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 312 { 313 int i, j; 314 315 spin_lock_bh(&set->lock); 316 /* Add in increasing prefix order, so larger cidr first */ 317 for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { 318 if (j != -1) { 319 continue; 320 } else if (h->nets[i].cidr[n] < cidr) { 321 j = i; 322 } else if (h->nets[i].cidr[n] == cidr) { 323 h->nets[CIDR_POS(cidr)].nets[n]++; 324 goto unlock; 325 } 326 } 327 if (j != -1) { 328 for (; i > j; i--) 329 h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; 330 } 331 h->nets[i].cidr[n] = cidr; 332 h->nets[CIDR_POS(cidr)].nets[n] = 1; 333 unlock: 334 spin_unlock_bh(&set->lock); 335 } 336 337 static void 338 mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 339 { 340 u8 i, j, net_end = NLEN - 1; 341 342 spin_lock_bh(&set->lock); 343 for (i = 0; i < NLEN; i++) { 344 if (h->nets[i].cidr[n] != cidr) 345 continue; 346 h->nets[CIDR_POS(cidr)].nets[n]--; 347 if (h->nets[CIDR_POS(cidr)].nets[n] > 0) 348 goto unlock; 349 for (j = i; j < net_end && h->nets[j].cidr[n]; j++) 350 h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; 351 h->nets[j].cidr[n] = 0; 352 goto unlock; 353 } 354 unlock: 355 spin_unlock_bh(&set->lock); 356 } 357 #endif 358 359 /* Calculate the actual memory size of the set data */ 360 static size_t 361 mtype_ahash_memsize(const struct htype *h, const struct htable *t) 362 { 363 return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); 364 } 365 366 /* Get the ith element from the array block n */ 367 #define ahash_data(n, i, dsize) \ 368 ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) 369 370 static void 371 mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) 372 { 373 int i; 374 375 for (i = 0; i < n->pos; i++) 376 if (test_bit(i, n->used)) 377 ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); 378 } 379 380 /* Flush a hash type of set: destroy all elements */ 381 static void 382 mtype_flush(struct ip_set *set) 383 { 384 struct htype *h = set->data; 385 struct htable *t; 386 struct hbucket *n; 387 u32 r, i; 388 389 t = ipset_dereference_nfnl(h->table); 390 for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 391 spin_lock_bh(&t->hregion[r].lock); 392 for (i = ahash_bucket_start(r, t->htable_bits); 393 i < ahash_bucket_end(r, t->htable_bits); i++) { 394 n = __ipset_dereference(hbucket(t, i)); 395 if (!n) 396 continue; 397 if (set->extensions & IPSET_EXT_DESTROY) 398 mtype_ext_cleanup(set, n); 399 /* FIXME: use slab cache */ 400 rcu_assign_pointer(hbucket(t, i), NULL); 401 kfree_rcu(n, rcu); 402 } 403 t->hregion[r].ext_size = 0; 404 t->hregion[r].elements = 0; 405 spin_unlock_bh(&t->hregion[r].lock); 406 } 407 #ifdef IP_SET_HASH_WITH_NETS 408 memset(h->nets, 0, sizeof(h->nets)); 409 #endif 410 } 411 412 /* Destroy the hashtable part of the set */ 413 static void 414 mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) 415 { 416 struct hbucket *n; 417 u32 i; 418 419 for (i = 0; i < jhash_size(t->htable_bits); i++) { 420 n = __ipset_dereference(hbucket(t, i)); 421 if (!n) 422 continue; 423 if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) 424 mtype_ext_cleanup(set, n); 425 /* FIXME: use slab cache */ 426 kfree(n); 427 } 428 429 ip_set_free(t->hregion); 430 ip_set_free(t); 431 } 432 433 /* Destroy a hash type of set */ 434 static void 435 mtype_destroy(struct ip_set *set) 436 { 437 struct htype *h = set->data; 438 struct list_head *l, *lt; 439 440 if (SET_WITH_TIMEOUT(set)) 441 cancel_delayed_work_sync(&h->gc.dwork); 442 443 mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); 444 list_for_each_safe(l, lt, &h->ad) { 445 list_del(l); 446 kfree(l); 447 } 448 kfree(h); 449 450 set->data = NULL; 451 } 452 453 static bool 454 mtype_same_set(const struct ip_set *a, const struct ip_set *b) 455 { 456 const struct htype *x = a->data; 457 const struct htype *y = b->data; 458 459 /* Resizing changes htable_bits, so we ignore it */ 460 return x->maxelem == y->maxelem && 461 a->timeout == b->timeout && 462 #ifdef IP_SET_HASH_WITH_NETMASK 463 x->netmask == y->netmask && 464 #endif 465 #ifdef IP_SET_HASH_WITH_MARKMASK 466 x->markmask == y->markmask && 467 #endif 468 a->extensions == b->extensions; 469 } 470 471 static void 472 mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) 473 { 474 struct hbucket *n, *tmp; 475 struct mtype_elem *data; 476 u32 i, j, d; 477 size_t dsize = set->dsize; 478 #ifdef IP_SET_HASH_WITH_NETS 479 u8 k; 480 #endif 481 u8 htable_bits = t->htable_bits; 482 483 spin_lock_bh(&t->hregion[r].lock); 484 for (i = ahash_bucket_start(r, htable_bits); 485 i < ahash_bucket_end(r, htable_bits); i++) { 486 n = __ipset_dereference(hbucket(t, i)); 487 if (!n) 488 continue; 489 for (j = 0, d = 0; j < n->pos; j++) { 490 if (!test_bit(j, n->used)) { 491 d++; 492 continue; 493 } 494 data = ahash_data(n, j, dsize); 495 if (!ip_set_timeout_expired(ext_timeout(data, set))) 496 continue; 497 pr_debug("expired %u/%u\n", i, j); 498 clear_bit(j, n->used); 499 smp_mb__after_atomic(); 500 #ifdef IP_SET_HASH_WITH_NETS 501 for (k = 0; k < IPSET_NET_COUNT; k++) 502 mtype_del_cidr(set, h, 503 NCIDR_PUT(DCIDR_GET(data->cidr, k)), 504 k); 505 #endif 506 t->hregion[r].elements--; 507 ip_set_ext_destroy(set, data); 508 d++; 509 } 510 if (d >= AHASH_INIT_SIZE) { 511 if (d >= n->size) { 512 t->hregion[r].ext_size -= 513 ext_size(n->size, dsize); 514 rcu_assign_pointer(hbucket(t, i), NULL); 515 kfree_rcu(n, rcu); 516 continue; 517 } 518 tmp = kzalloc(sizeof(*tmp) + 519 (n->size - AHASH_INIT_SIZE) * dsize, 520 GFP_ATOMIC); 521 if (!tmp) 522 /* Still try to delete expired elements. */ 523 continue; 524 tmp->size = n->size - AHASH_INIT_SIZE; 525 for (j = 0, d = 0; j < n->pos; j++) { 526 if (!test_bit(j, n->used)) 527 continue; 528 data = ahash_data(n, j, dsize); 529 memcpy(tmp->value + d * dsize, 530 data, dsize); 531 set_bit(d, tmp->used); 532 d++; 533 } 534 tmp->pos = d; 535 t->hregion[r].ext_size -= 536 ext_size(AHASH_INIT_SIZE, dsize); 537 rcu_assign_pointer(hbucket(t, i), tmp); 538 kfree_rcu(n, rcu); 539 } 540 } 541 spin_unlock_bh(&t->hregion[r].lock); 542 } 543 544 static void 545 mtype_gc(struct work_struct *work) 546 { 547 struct htable_gc *gc; 548 struct ip_set *set; 549 struct htype *h; 550 struct htable *t; 551 u32 r, numof_locks; 552 unsigned int next_run; 553 554 gc = container_of(work, struct htable_gc, dwork.work); 555 set = gc->set; 556 h = set->data; 557 558 spin_lock_bh(&set->lock); 559 t = ipset_dereference_set(h->table, set); 560 atomic_inc(&t->uref); 561 numof_locks = ahash_numof_locks(t->htable_bits); 562 r = gc->region++; 563 if (r >= numof_locks) { 564 r = gc->region = 0; 565 } 566 next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; 567 if (next_run < HZ/10) 568 next_run = HZ/10; 569 spin_unlock_bh(&set->lock); 570 571 mtype_gc_do(set, h, t, r); 572 573 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 574 pr_debug("Table destroy after resize by expire: %p\n", t); 575 mtype_ahash_destroy(set, t, false); 576 } 577 578 queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); 579 580 } 581 582 static void 583 mtype_gc_init(struct htable_gc *gc) 584 { 585 INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); 586 queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); 587 } 588 589 static int 590 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 591 struct ip_set_ext *mext, u32 flags); 592 static int 593 mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 594 struct ip_set_ext *mext, u32 flags); 595 596 /* Resize a hash: create a new hash table with doubling the hashsize 597 * and inserting the elements to it. Repeat until we succeed or 598 * fail due to memory pressures. 599 */ 600 static int 601 mtype_resize(struct ip_set *set, bool retried) 602 { 603 struct htype *h = set->data; 604 struct htable *t, *orig; 605 u8 htable_bits; 606 size_t hsize, dsize = set->dsize; 607 #ifdef IP_SET_HASH_WITH_NETS 608 u8 flags; 609 struct mtype_elem *tmp; 610 #endif 611 struct mtype_elem *data; 612 struct mtype_elem *d; 613 struct hbucket *n, *m; 614 struct list_head *l, *lt; 615 struct mtype_resize_ad *x; 616 u32 i, j, r, nr, key; 617 int ret; 618 619 #ifdef IP_SET_HASH_WITH_NETS 620 tmp = kmalloc(dsize, GFP_KERNEL); 621 if (!tmp) 622 return -ENOMEM; 623 #endif 624 orig = ipset_dereference_bh_nfnl(h->table); 625 htable_bits = orig->htable_bits; 626 627 retry: 628 ret = 0; 629 htable_bits++; 630 if (!htable_bits) 631 goto hbwarn; 632 hsize = htable_size(htable_bits); 633 if (!hsize) 634 goto hbwarn; 635 t = ip_set_alloc(hsize); 636 if (!t) { 637 ret = -ENOMEM; 638 goto out; 639 } 640 t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); 641 if (!t->hregion) { 642 ip_set_free(t); 643 ret = -ENOMEM; 644 goto out; 645 } 646 t->htable_bits = htable_bits; 647 t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); 648 for (i = 0; i < ahash_numof_locks(htable_bits); i++) 649 spin_lock_init(&t->hregion[i].lock); 650 651 /* There can't be another parallel resizing, 652 * but dumping, gc, kernel side add/del are possible 653 */ 654 orig = ipset_dereference_bh_nfnl(h->table); 655 atomic_set(&orig->ref, 1); 656 atomic_inc(&orig->uref); 657 pr_debug("attempt to resize set %s from %u to %u, t %p\n", 658 set->name, orig->htable_bits, htable_bits, orig); 659 for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { 660 /* Expire may replace a hbucket with another one */ 661 rcu_read_lock_bh(); 662 for (i = ahash_bucket_start(r, orig->htable_bits); 663 i < ahash_bucket_end(r, orig->htable_bits); i++) { 664 n = __ipset_dereference(hbucket(orig, i)); 665 if (!n) 666 continue; 667 for (j = 0; j < n->pos; j++) { 668 if (!test_bit(j, n->used)) 669 continue; 670 data = ahash_data(n, j, dsize); 671 if (SET_ELEM_EXPIRED(set, data)) 672 continue; 673 #ifdef IP_SET_HASH_WITH_NETS 674 /* We have readers running parallel with us, 675 * so the live data cannot be modified. 676 */ 677 flags = 0; 678 memcpy(tmp, data, dsize); 679 data = tmp; 680 mtype_data_reset_flags(data, &flags); 681 #endif 682 key = HKEY(data, h->initval, htable_bits); 683 m = __ipset_dereference(hbucket(t, key)); 684 nr = ahash_region(key, htable_bits); 685 if (!m) { 686 m = kzalloc(sizeof(*m) + 687 AHASH_INIT_SIZE * dsize, 688 GFP_ATOMIC); 689 if (!m) { 690 ret = -ENOMEM; 691 goto cleanup; 692 } 693 m->size = AHASH_INIT_SIZE; 694 t->hregion[nr].ext_size += 695 ext_size(AHASH_INIT_SIZE, 696 dsize); 697 RCU_INIT_POINTER(hbucket(t, key), m); 698 } else if (m->pos >= m->size) { 699 struct hbucket *ht; 700 701 if (m->size >= AHASH_MAX(h)) { 702 ret = -EAGAIN; 703 } else { 704 ht = kzalloc(sizeof(*ht) + 705 (m->size + AHASH_INIT_SIZE) 706 * dsize, 707 GFP_ATOMIC); 708 if (!ht) 709 ret = -ENOMEM; 710 } 711 if (ret < 0) 712 goto cleanup; 713 memcpy(ht, m, sizeof(struct hbucket) + 714 m->size * dsize); 715 ht->size = m->size + AHASH_INIT_SIZE; 716 t->hregion[nr].ext_size += 717 ext_size(AHASH_INIT_SIZE, 718 dsize); 719 kfree(m); 720 m = ht; 721 RCU_INIT_POINTER(hbucket(t, key), ht); 722 } 723 d = ahash_data(m, m->pos, dsize); 724 memcpy(d, data, dsize); 725 set_bit(m->pos++, m->used); 726 t->hregion[nr].elements++; 727 #ifdef IP_SET_HASH_WITH_NETS 728 mtype_data_reset_flags(d, &flags); 729 #endif 730 } 731 } 732 rcu_read_unlock_bh(); 733 } 734 735 /* There can't be any other writer. */ 736 rcu_assign_pointer(h->table, t); 737 738 /* Give time to other readers of the set */ 739 synchronize_rcu(); 740 741 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, 742 orig->htable_bits, orig, t->htable_bits, t); 743 /* Add/delete elements processed by the SET target during resize. 744 * Kernel-side add cannot trigger a resize and userspace actions 745 * are serialized by the mutex. 746 */ 747 list_for_each_safe(l, lt, &h->ad) { 748 x = list_entry(l, struct mtype_resize_ad, list); 749 if (x->ad == IPSET_ADD) { 750 mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); 751 } else { 752 mtype_del(set, &x->d, NULL, NULL, 0); 753 } 754 list_del(l); 755 kfree(l); 756 } 757 /* If there's nobody else using the table, destroy it */ 758 if (atomic_dec_and_test(&orig->uref)) { 759 pr_debug("Table destroy by resize %p\n", orig); 760 mtype_ahash_destroy(set, orig, false); 761 } 762 763 out: 764 #ifdef IP_SET_HASH_WITH_NETS 765 kfree(tmp); 766 #endif 767 return ret; 768 769 cleanup: 770 rcu_read_unlock_bh(); 771 atomic_set(&orig->ref, 0); 772 atomic_dec(&orig->uref); 773 mtype_ahash_destroy(set, t, false); 774 if (ret == -EAGAIN) 775 goto retry; 776 goto out; 777 778 hbwarn: 779 /* In case we have plenty of memory :-) */ 780 pr_warn("Cannot increase the hashsize of set %s further\n", set->name); 781 ret = -IPSET_ERR_HASH_FULL; 782 goto out; 783 } 784 785 /* Get the current number of elements and ext_size in the set */ 786 static void 787 mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) 788 { 789 struct htype *h = set->data; 790 const struct htable *t; 791 u32 i, j, r; 792 struct hbucket *n; 793 struct mtype_elem *data; 794 795 t = rcu_dereference_bh(h->table); 796 for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 797 for (i = ahash_bucket_start(r, t->htable_bits); 798 i < ahash_bucket_end(r, t->htable_bits); i++) { 799 n = rcu_dereference_bh(hbucket(t, i)); 800 if (!n) 801 continue; 802 for (j = 0; j < n->pos; j++) { 803 if (!test_bit(j, n->used)) 804 continue; 805 data = ahash_data(n, j, set->dsize); 806 if (!SET_ELEM_EXPIRED(set, data)) 807 (*elements)++; 808 } 809 } 810 *ext_size += t->hregion[r].ext_size; 811 } 812 } 813 814 /* Add an element to a hash and update the internal counters when succeeded, 815 * otherwise report the proper error code. 816 */ 817 static int 818 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 819 struct ip_set_ext *mext, u32 flags) 820 { 821 struct htype *h = set->data; 822 struct htable *t; 823 const struct mtype_elem *d = value; 824 struct mtype_elem *data; 825 struct hbucket *n, *old = ERR_PTR(-ENOENT); 826 int i, j = -1, ret; 827 bool flag_exist = flags & IPSET_FLAG_EXIST; 828 bool deleted = false, forceadd = false, reuse = false; 829 u32 r, key, multi = 0, elements, maxelem; 830 831 rcu_read_lock_bh(); 832 t = rcu_dereference_bh(h->table); 833 key = HKEY(value, h->initval, t->htable_bits); 834 r = ahash_region(key, t->htable_bits); 835 atomic_inc(&t->uref); 836 elements = t->hregion[r].elements; 837 maxelem = t->maxelem; 838 if (elements >= maxelem) { 839 u32 e; 840 if (SET_WITH_TIMEOUT(set)) { 841 rcu_read_unlock_bh(); 842 mtype_gc_do(set, h, t, r); 843 rcu_read_lock_bh(); 844 } 845 maxelem = h->maxelem; 846 elements = 0; 847 for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) 848 elements += t->hregion[e].elements; 849 if (elements >= maxelem && SET_WITH_FORCEADD(set)) 850 forceadd = true; 851 } 852 rcu_read_unlock_bh(); 853 854 spin_lock_bh(&t->hregion[r].lock); 855 n = rcu_dereference_bh(hbucket(t, key)); 856 if (!n) { 857 if (forceadd || elements >= maxelem) 858 goto set_full; 859 old = NULL; 860 n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, 861 GFP_ATOMIC); 862 if (!n) { 863 ret = -ENOMEM; 864 goto unlock; 865 } 866 n->size = AHASH_INIT_SIZE; 867 t->hregion[r].ext_size += 868 ext_size(AHASH_INIT_SIZE, set->dsize); 869 goto copy_elem; 870 } 871 for (i = 0; i < n->pos; i++) { 872 if (!test_bit(i, n->used)) { 873 /* Reuse first deleted entry */ 874 if (j == -1) { 875 deleted = reuse = true; 876 j = i; 877 } 878 continue; 879 } 880 data = ahash_data(n, i, set->dsize); 881 if (mtype_data_equal(data, d, &multi)) { 882 if (flag_exist || SET_ELEM_EXPIRED(set, data)) { 883 /* Just the extensions could be overwritten */ 884 j = i; 885 goto overwrite_extensions; 886 } 887 ret = -IPSET_ERR_EXIST; 888 goto unlock; 889 } 890 /* Reuse first timed out entry */ 891 if (SET_ELEM_EXPIRED(set, data) && j == -1) { 892 j = i; 893 reuse = true; 894 } 895 } 896 if (reuse || forceadd) { 897 if (j == -1) 898 j = 0; 899 data = ahash_data(n, j, set->dsize); 900 if (!deleted) { 901 #ifdef IP_SET_HASH_WITH_NETS 902 for (i = 0; i < IPSET_NET_COUNT; i++) 903 mtype_del_cidr(set, h, 904 NCIDR_PUT(DCIDR_GET(data->cidr, i)), 905 i); 906 #endif 907 ip_set_ext_destroy(set, data); 908 t->hregion[r].elements--; 909 } 910 goto copy_data; 911 } 912 if (elements >= maxelem) 913 goto set_full; 914 /* Create a new slot */ 915 if (n->pos >= n->size) { 916 #ifdef IP_SET_HASH_WITH_MULTI 917 if (h->bucketsize >= AHASH_MAX_TUNED) 918 goto set_full; 919 else if (h->bucketsize < multi) 920 h->bucketsize += AHASH_INIT_SIZE; 921 #endif 922 if (n->size >= AHASH_MAX(h)) { 923 /* Trigger rehashing */ 924 mtype_data_next(&h->next, d); 925 ret = -EAGAIN; 926 goto resize; 927 } 928 old = n; 929 n = kzalloc(sizeof(*n) + 930 (old->size + AHASH_INIT_SIZE) * set->dsize, 931 GFP_ATOMIC); 932 if (!n) { 933 ret = -ENOMEM; 934 goto unlock; 935 } 936 memcpy(n, old, sizeof(struct hbucket) + 937 old->size * set->dsize); 938 n->size = old->size + AHASH_INIT_SIZE; 939 t->hregion[r].ext_size += 940 ext_size(AHASH_INIT_SIZE, set->dsize); 941 } 942 943 copy_elem: 944 j = n->pos++; 945 data = ahash_data(n, j, set->dsize); 946 copy_data: 947 t->hregion[r].elements++; 948 #ifdef IP_SET_HASH_WITH_NETS 949 for (i = 0; i < IPSET_NET_COUNT; i++) 950 mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); 951 #endif 952 memcpy(data, d, sizeof(struct mtype_elem)); 953 overwrite_extensions: 954 #ifdef IP_SET_HASH_WITH_NETS 955 mtype_data_set_flags(data, flags); 956 #endif 957 if (SET_WITH_COUNTER(set)) 958 ip_set_init_counter(ext_counter(data, set), ext); 959 if (SET_WITH_COMMENT(set)) 960 ip_set_init_comment(set, ext_comment(data, set), ext); 961 if (SET_WITH_SKBINFO(set)) 962 ip_set_init_skbinfo(ext_skbinfo(data, set), ext); 963 /* Must come last for the case when timed out entry is reused */ 964 if (SET_WITH_TIMEOUT(set)) 965 ip_set_timeout_set(ext_timeout(data, set), ext->timeout); 966 smp_mb__before_atomic(); 967 set_bit(j, n->used); 968 if (old != ERR_PTR(-ENOENT)) { 969 rcu_assign_pointer(hbucket(t, key), n); 970 if (old) 971 kfree_rcu(old, rcu); 972 } 973 ret = 0; 974 resize: 975 spin_unlock_bh(&t->hregion[r].lock); 976 if (atomic_read(&t->ref) && ext->target) { 977 /* Resize is in process and kernel side add, save values */ 978 struct mtype_resize_ad *x; 979 980 x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); 981 if (!x) 982 /* Don't bother */ 983 goto out; 984 x->ad = IPSET_ADD; 985 memcpy(&x->d, value, sizeof(struct mtype_elem)); 986 memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); 987 memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); 988 x->flags = flags; 989 spin_lock_bh(&set->lock); 990 list_add_tail(&x->list, &h->ad); 991 spin_unlock_bh(&set->lock); 992 } 993 goto out; 994 995 set_full: 996 if (net_ratelimit()) 997 pr_warn("Set %s is full, maxelem %u reached\n", 998 set->name, maxelem); 999 ret = -IPSET_ERR_HASH_FULL; 1000 unlock: 1001 spin_unlock_bh(&t->hregion[r].lock); 1002 out: 1003 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1004 pr_debug("Table destroy after resize by add: %p\n", t); 1005 mtype_ahash_destroy(set, t, false); 1006 } 1007 return ret; 1008 } 1009 1010 /* Delete an element from the hash and free up space if possible. 1011 */ 1012 static int 1013 mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 1014 struct ip_set_ext *mext, u32 flags) 1015 { 1016 struct htype *h = set->data; 1017 struct htable *t; 1018 const struct mtype_elem *d = value; 1019 struct mtype_elem *data; 1020 struct hbucket *n; 1021 struct mtype_resize_ad *x = NULL; 1022 int i, j, k, r, ret = -IPSET_ERR_EXIST; 1023 u32 key, multi = 0; 1024 size_t dsize = set->dsize; 1025 1026 /* Userspace add and resize is excluded by the mutex. 1027 * Kernespace add does not trigger resize. 1028 */ 1029 rcu_read_lock_bh(); 1030 t = rcu_dereference_bh(h->table); 1031 key = HKEY(value, h->initval, t->htable_bits); 1032 r = ahash_region(key, t->htable_bits); 1033 atomic_inc(&t->uref); 1034 rcu_read_unlock_bh(); 1035 1036 spin_lock_bh(&t->hregion[r].lock); 1037 n = rcu_dereference_bh(hbucket(t, key)); 1038 if (!n) 1039 goto out; 1040 for (i = 0, k = 0; i < n->pos; i++) { 1041 if (!test_bit(i, n->used)) { 1042 k++; 1043 continue; 1044 } 1045 data = ahash_data(n, i, dsize); 1046 if (!mtype_data_equal(data, d, &multi)) 1047 continue; 1048 if (SET_ELEM_EXPIRED(set, data)) 1049 goto out; 1050 1051 ret = 0; 1052 clear_bit(i, n->used); 1053 smp_mb__after_atomic(); 1054 if (i + 1 == n->pos) 1055 n->pos--; 1056 t->hregion[r].elements--; 1057 #ifdef IP_SET_HASH_WITH_NETS 1058 for (j = 0; j < IPSET_NET_COUNT; j++) 1059 mtype_del_cidr(set, h, 1060 NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); 1061 #endif 1062 ip_set_ext_destroy(set, data); 1063 1064 if (atomic_read(&t->ref) && ext->target) { 1065 /* Resize is in process and kernel side del, 1066 * save values 1067 */ 1068 x = kzalloc(sizeof(struct mtype_resize_ad), 1069 GFP_ATOMIC); 1070 if (x) { 1071 x->ad = IPSET_DEL; 1072 memcpy(&x->d, value, 1073 sizeof(struct mtype_elem)); 1074 x->flags = flags; 1075 } 1076 } 1077 for (; i < n->pos; i++) { 1078 if (!test_bit(i, n->used)) 1079 k++; 1080 } 1081 if (n->pos == 0 && k == 0) { 1082 t->hregion[r].ext_size -= ext_size(n->size, dsize); 1083 rcu_assign_pointer(hbucket(t, key), NULL); 1084 kfree_rcu(n, rcu); 1085 } else if (k >= AHASH_INIT_SIZE) { 1086 struct hbucket *tmp = kzalloc(sizeof(*tmp) + 1087 (n->size - AHASH_INIT_SIZE) * dsize, 1088 GFP_ATOMIC); 1089 if (!tmp) 1090 goto out; 1091 tmp->size = n->size - AHASH_INIT_SIZE; 1092 for (j = 0, k = 0; j < n->pos; j++) { 1093 if (!test_bit(j, n->used)) 1094 continue; 1095 data = ahash_data(n, j, dsize); 1096 memcpy(tmp->value + k * dsize, data, dsize); 1097 set_bit(k, tmp->used); 1098 k++; 1099 } 1100 tmp->pos = k; 1101 t->hregion[r].ext_size -= 1102 ext_size(AHASH_INIT_SIZE, dsize); 1103 rcu_assign_pointer(hbucket(t, key), tmp); 1104 kfree_rcu(n, rcu); 1105 } 1106 goto out; 1107 } 1108 1109 out: 1110 spin_unlock_bh(&t->hregion[r].lock); 1111 if (x) { 1112 spin_lock_bh(&set->lock); 1113 list_add(&x->list, &h->ad); 1114 spin_unlock_bh(&set->lock); 1115 } 1116 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1117 pr_debug("Table destroy after resize by del: %p\n", t); 1118 mtype_ahash_destroy(set, t, false); 1119 } 1120 return ret; 1121 } 1122 1123 static int 1124 mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, 1125 struct ip_set_ext *mext, struct ip_set *set, u32 flags) 1126 { 1127 if (!ip_set_match_extensions(set, ext, mext, flags, data)) 1128 return 0; 1129 /* nomatch entries return -ENOTEMPTY */ 1130 return mtype_do_data_match(data); 1131 } 1132 1133 #ifdef IP_SET_HASH_WITH_NETS 1134 /* Special test function which takes into account the different network 1135 * sizes added to the set 1136 */ 1137 static int 1138 mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, 1139 const struct ip_set_ext *ext, 1140 struct ip_set_ext *mext, u32 flags) 1141 { 1142 struct htype *h = set->data; 1143 struct htable *t = rcu_dereference_bh(h->table); 1144 struct hbucket *n; 1145 struct mtype_elem *data; 1146 #if IPSET_NET_COUNT == 2 1147 struct mtype_elem orig = *d; 1148 int ret, i, j = 0, k; 1149 #else 1150 int ret, i, j = 0; 1151 #endif 1152 u32 key, multi = 0; 1153 1154 pr_debug("test by nets\n"); 1155 for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { 1156 #if IPSET_NET_COUNT == 2 1157 mtype_data_reset_elem(d, &orig); 1158 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); 1159 for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi; 1160 k++) { 1161 mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), 1162 true); 1163 #else 1164 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); 1165 #endif 1166 key = HKEY(d, h->initval, t->htable_bits); 1167 n = rcu_dereference_bh(hbucket(t, key)); 1168 if (!n) 1169 continue; 1170 for (i = 0; i < n->pos; i++) { 1171 if (!test_bit(i, n->used)) 1172 continue; 1173 data = ahash_data(n, i, set->dsize); 1174 if (!mtype_data_equal(data, d, &multi)) 1175 continue; 1176 ret = mtype_data_match(data, ext, mext, set, flags); 1177 if (ret != 0) 1178 return ret; 1179 #ifdef IP_SET_HASH_WITH_MULTI 1180 /* No match, reset multiple match flag */ 1181 multi = 0; 1182 #endif 1183 } 1184 #if IPSET_NET_COUNT == 2 1185 } 1186 #endif 1187 } 1188 return 0; 1189 } 1190 #endif 1191 1192 /* Test whether the element is added to the set */ 1193 static int 1194 mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, 1195 struct ip_set_ext *mext, u32 flags) 1196 { 1197 struct htype *h = set->data; 1198 struct htable *t; 1199 struct mtype_elem *d = value; 1200 struct hbucket *n; 1201 struct mtype_elem *data; 1202 int i, ret = 0; 1203 u32 key, multi = 0; 1204 1205 rcu_read_lock_bh(); 1206 t = rcu_dereference_bh(h->table); 1207 #ifdef IP_SET_HASH_WITH_NETS 1208 /* If we test an IP address and not a network address, 1209 * try all possible network sizes 1210 */ 1211 for (i = 0; i < IPSET_NET_COUNT; i++) 1212 if (DCIDR_GET(d->cidr, i) != HOST_MASK) 1213 break; 1214 if (i == IPSET_NET_COUNT) { 1215 ret = mtype_test_cidrs(set, d, ext, mext, flags); 1216 goto out; 1217 } 1218 #endif 1219 1220 key = HKEY(d, h->initval, t->htable_bits); 1221 n = rcu_dereference_bh(hbucket(t, key)); 1222 if (!n) { 1223 ret = 0; 1224 goto out; 1225 } 1226 for (i = 0; i < n->pos; i++) { 1227 if (!test_bit(i, n->used)) 1228 continue; 1229 data = ahash_data(n, i, set->dsize); 1230 if (!mtype_data_equal(data, d, &multi)) 1231 continue; 1232 ret = mtype_data_match(data, ext, mext, set, flags); 1233 if (ret != 0) 1234 goto out; 1235 } 1236 out: 1237 rcu_read_unlock_bh(); 1238 return ret; 1239 } 1240 1241 /* Reply a HEADER request: fill out the header part of the set */ 1242 static int 1243 mtype_head(struct ip_set *set, struct sk_buff *skb) 1244 { 1245 struct htype *h = set->data; 1246 const struct htable *t; 1247 struct nlattr *nested; 1248 size_t memsize; 1249 u32 elements = 0; 1250 size_t ext_size = 0; 1251 u8 htable_bits; 1252 1253 rcu_read_lock_bh(); 1254 t = rcu_dereference_bh(h->table); 1255 mtype_ext_size(set, &elements, &ext_size); 1256 memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; 1257 htable_bits = t->htable_bits; 1258 rcu_read_unlock_bh(); 1259 1260 nested = nla_nest_start(skb, IPSET_ATTR_DATA); 1261 if (!nested) 1262 goto nla_put_failure; 1263 if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, 1264 htonl(jhash_size(htable_bits))) || 1265 nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) 1266 goto nla_put_failure; 1267 #ifdef IP_SET_HASH_WITH_NETMASK 1268 if (h->netmask != HOST_MASK && 1269 nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) 1270 goto nla_put_failure; 1271 #endif 1272 #ifdef IP_SET_HASH_WITH_MARKMASK 1273 if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) 1274 goto nla_put_failure; 1275 #endif 1276 if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) { 1277 if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) || 1278 nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval))) 1279 goto nla_put_failure; 1280 } 1281 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 1282 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || 1283 nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) 1284 goto nla_put_failure; 1285 if (unlikely(ip_set_put_flags(skb, set))) 1286 goto nla_put_failure; 1287 nla_nest_end(skb, nested); 1288 1289 return 0; 1290 nla_put_failure: 1291 return -EMSGSIZE; 1292 } 1293 1294 /* Make possible to run dumping parallel with resizing */ 1295 static void 1296 mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) 1297 { 1298 struct htype *h = set->data; 1299 struct htable *t; 1300 1301 if (start) { 1302 rcu_read_lock_bh(); 1303 t = ipset_dereference_bh_nfnl(h->table); 1304 atomic_inc(&t->uref); 1305 cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; 1306 rcu_read_unlock_bh(); 1307 } else if (cb->args[IPSET_CB_PRIVATE]) { 1308 t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; 1309 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1310 pr_debug("Table destroy after resize " 1311 " by dump: %p\n", t); 1312 mtype_ahash_destroy(set, t, false); 1313 } 1314 cb->args[IPSET_CB_PRIVATE] = 0; 1315 } 1316 } 1317 1318 /* Reply a LIST/SAVE request: dump the elements of the specified set */ 1319 static int 1320 mtype_list(const struct ip_set *set, 1321 struct sk_buff *skb, struct netlink_callback *cb) 1322 { 1323 const struct htable *t; 1324 struct nlattr *atd, *nested; 1325 const struct hbucket *n; 1326 const struct mtype_elem *e; 1327 u32 first = cb->args[IPSET_CB_ARG0]; 1328 /* We assume that one hash bucket fills into one page */ 1329 void *incomplete; 1330 int i, ret = 0; 1331 1332 atd = nla_nest_start(skb, IPSET_ATTR_ADT); 1333 if (!atd) 1334 return -EMSGSIZE; 1335 1336 pr_debug("list hash set %s\n", set->name); 1337 t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; 1338 /* Expire may replace a hbucket with another one */ 1339 rcu_read_lock(); 1340 for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); 1341 cb->args[IPSET_CB_ARG0]++) { 1342 cond_resched_rcu(); 1343 incomplete = skb_tail_pointer(skb); 1344 n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); 1345 pr_debug("cb->arg bucket: %lu, t %p n %p\n", 1346 cb->args[IPSET_CB_ARG0], t, n); 1347 if (!n) 1348 continue; 1349 for (i = 0; i < n->pos; i++) { 1350 if (!test_bit(i, n->used)) 1351 continue; 1352 e = ahash_data(n, i, set->dsize); 1353 if (SET_ELEM_EXPIRED(set, e)) 1354 continue; 1355 pr_debug("list hash %lu hbucket %p i %u, data %p\n", 1356 cb->args[IPSET_CB_ARG0], n, i, e); 1357 nested = nla_nest_start(skb, IPSET_ATTR_DATA); 1358 if (!nested) { 1359 if (cb->args[IPSET_CB_ARG0] == first) { 1360 nla_nest_cancel(skb, atd); 1361 ret = -EMSGSIZE; 1362 goto out; 1363 } 1364 goto nla_put_failure; 1365 } 1366 if (mtype_data_list(skb, e)) 1367 goto nla_put_failure; 1368 if (ip_set_put_extensions(skb, set, e, true)) 1369 goto nla_put_failure; 1370 nla_nest_end(skb, nested); 1371 } 1372 } 1373 nla_nest_end(skb, atd); 1374 /* Set listing finished */ 1375 cb->args[IPSET_CB_ARG0] = 0; 1376 1377 goto out; 1378 1379 nla_put_failure: 1380 nlmsg_trim(skb, incomplete); 1381 if (unlikely(first == cb->args[IPSET_CB_ARG0])) { 1382 pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n", 1383 set->name); 1384 cb->args[IPSET_CB_ARG0] = 0; 1385 ret = -EMSGSIZE; 1386 } else { 1387 nla_nest_end(skb, atd); 1388 } 1389 out: 1390 rcu_read_unlock(); 1391 return ret; 1392 } 1393 1394 static int 1395 IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, 1396 const struct xt_action_param *par, 1397 enum ipset_adt adt, struct ip_set_adt_opt *opt); 1398 1399 static int 1400 IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], 1401 enum ipset_adt adt, u32 *lineno, u32 flags, 1402 bool retried); 1403 1404 static const struct ip_set_type_variant mtype_variant = { 1405 .kadt = mtype_kadt, 1406 .uadt = mtype_uadt, 1407 .adt = { 1408 [IPSET_ADD] = mtype_add, 1409 [IPSET_DEL] = mtype_del, 1410 [IPSET_TEST] = mtype_test, 1411 }, 1412 .destroy = mtype_destroy, 1413 .flush = mtype_flush, 1414 .head = mtype_head, 1415 .list = mtype_list, 1416 .uref = mtype_uref, 1417 .resize = mtype_resize, 1418 .same_set = mtype_same_set, 1419 .region_lock = true, 1420 }; 1421 1422 #ifdef IP_SET_EMIT_CREATE 1423 static int 1424 IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, 1425 struct nlattr *tb[], u32 flags) 1426 { 1427 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; 1428 #ifdef IP_SET_HASH_WITH_MARKMASK 1429 u32 markmask; 1430 #endif 1431 u8 hbits; 1432 #ifdef IP_SET_HASH_WITH_NETMASK 1433 u8 netmask; 1434 #endif 1435 size_t hsize; 1436 struct htype *h; 1437 struct htable *t; 1438 u32 i; 1439 1440 pr_debug("Create set %s with family %s\n", 1441 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); 1442 1443 #ifdef IP_SET_PROTO_UNDEF 1444 if (set->family != NFPROTO_UNSPEC) 1445 return -IPSET_ERR_INVALID_FAMILY; 1446 #else 1447 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) 1448 return -IPSET_ERR_INVALID_FAMILY; 1449 #endif 1450 1451 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || 1452 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || 1453 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 1454 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 1455 return -IPSET_ERR_PROTOCOL; 1456 1457 #ifdef IP_SET_HASH_WITH_MARKMASK 1458 /* Separated condition in order to avoid directive in argument list */ 1459 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) 1460 return -IPSET_ERR_PROTOCOL; 1461 1462 markmask = 0xffffffff; 1463 if (tb[IPSET_ATTR_MARKMASK]) { 1464 markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); 1465 if (markmask == 0) 1466 return -IPSET_ERR_INVALID_MARKMASK; 1467 } 1468 #endif 1469 1470 #ifdef IP_SET_HASH_WITH_NETMASK 1471 netmask = set->family == NFPROTO_IPV4 ? 32 : 128; 1472 if (tb[IPSET_ATTR_NETMASK]) { 1473 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); 1474 1475 if ((set->family == NFPROTO_IPV4 && netmask > 32) || 1476 (set->family == NFPROTO_IPV6 && netmask > 128) || 1477 netmask == 0) 1478 return -IPSET_ERR_INVALID_NETMASK; 1479 } 1480 #endif 1481 1482 if (tb[IPSET_ATTR_HASHSIZE]) { 1483 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); 1484 if (hashsize < IPSET_MIMINAL_HASHSIZE) 1485 hashsize = IPSET_MIMINAL_HASHSIZE; 1486 } 1487 1488 if (tb[IPSET_ATTR_MAXELEM]) 1489 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); 1490 1491 hsize = sizeof(*h); 1492 h = kzalloc(hsize, GFP_KERNEL); 1493 if (!h) 1494 return -ENOMEM; 1495 1496 /* Compute htable_bits from the user input parameter hashsize. 1497 * Assume that hashsize == 2^htable_bits, 1498 * otherwise round up to the first 2^n value. 1499 */ 1500 hbits = fls(hashsize - 1); 1501 hsize = htable_size(hbits); 1502 if (hsize == 0) { 1503 kfree(h); 1504 return -ENOMEM; 1505 } 1506 t = ip_set_alloc(hsize); 1507 if (!t) { 1508 kfree(h); 1509 return -ENOMEM; 1510 } 1511 t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); 1512 if (!t->hregion) { 1513 ip_set_free(t); 1514 kfree(h); 1515 return -ENOMEM; 1516 } 1517 h->gc.set = set; 1518 for (i = 0; i < ahash_numof_locks(hbits); i++) 1519 spin_lock_init(&t->hregion[i].lock); 1520 h->maxelem = maxelem; 1521 #ifdef IP_SET_HASH_WITH_NETMASK 1522 h->netmask = netmask; 1523 #endif 1524 #ifdef IP_SET_HASH_WITH_MARKMASK 1525 h->markmask = markmask; 1526 #endif 1527 if (tb[IPSET_ATTR_INITVAL]) 1528 h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL])); 1529 else 1530 get_random_bytes(&h->initval, sizeof(h->initval)); 1531 h->bucketsize = AHASH_MAX_SIZE; 1532 if (tb[IPSET_ATTR_BUCKETSIZE]) { 1533 h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]); 1534 if (h->bucketsize < AHASH_INIT_SIZE) 1535 h->bucketsize = AHASH_INIT_SIZE; 1536 else if (h->bucketsize > AHASH_MAX_SIZE) 1537 h->bucketsize = AHASH_MAX_SIZE; 1538 else if (h->bucketsize % 2) 1539 h->bucketsize += 1; 1540 } 1541 t->htable_bits = hbits; 1542 t->maxelem = h->maxelem / ahash_numof_locks(hbits); 1543 RCU_INIT_POINTER(h->table, t); 1544 1545 INIT_LIST_HEAD(&h->ad); 1546 set->data = h; 1547 #ifndef IP_SET_PROTO_UNDEF 1548 if (set->family == NFPROTO_IPV4) { 1549 #endif 1550 set->variant = &IPSET_TOKEN(HTYPE, 4_variant); 1551 set->dsize = ip_set_elem_len(set, tb, 1552 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)), 1553 __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem))); 1554 #ifndef IP_SET_PROTO_UNDEF 1555 } else { 1556 set->variant = &IPSET_TOKEN(HTYPE, 6_variant); 1557 set->dsize = ip_set_elem_len(set, tb, 1558 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)), 1559 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); 1560 } 1561 #endif 1562 set->timeout = IPSET_NO_TIMEOUT; 1563 if (tb[IPSET_ATTR_TIMEOUT]) { 1564 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 1565 #ifndef IP_SET_PROTO_UNDEF 1566 if (set->family == NFPROTO_IPV4) 1567 #endif 1568 IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); 1569 #ifndef IP_SET_PROTO_UNDEF 1570 else 1571 IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); 1572 #endif 1573 } 1574 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", 1575 set->name, jhash_size(t->htable_bits), 1576 t->htable_bits, h->maxelem, set->data, t); 1577 1578 return 0; 1579 } 1580 #endif /* IP_SET_EMIT_CREATE */ 1581 1582 #undef HKEY_DATALEN 1583