1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ 3 4 #ifndef _IP_SET_HASH_GEN_H 5 #define _IP_SET_HASH_GEN_H 6 7 #include <linux/rcupdate.h> 8 #include <linux/jhash.h> 9 #include <linux/types.h> 10 #include <linux/netfilter/nfnetlink.h> 11 #include <linux/netfilter/ipset/ip_set.h> 12 13 #define __ipset_dereference(p) \ 14 rcu_dereference_protected(p, 1) 15 #define ipset_dereference_nfnl(p) \ 16 rcu_dereference_protected(p, \ 17 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 18 #define ipset_dereference_set(p, set) \ 19 rcu_dereference_protected(p, \ 20 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ 21 lockdep_is_held(&(set)->lock)) 22 #define ipset_dereference_bh_nfnl(p) \ 23 rcu_dereference_bh_check(p, \ 24 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 25 26 /* Hashing which uses arrays to resolve clashing. The hash table is resized 27 * (doubled) when searching becomes too long. 28 * Internally jhash is used with the assumption that the size of the 29 * stored data is a multiple of sizeof(u32). 30 * 31 * Readers and resizing 32 * 33 * Resizing can be triggered by userspace command only, and those 34 * are serialized by the nfnl mutex. During resizing the set is 35 * read-locked, so the only possible concurrent operations are 36 * the kernel side readers. Those must be protected by proper RCU locking. 37 */ 38 39 /* Number of elements to store in an initial array block */ 40 #define AHASH_INIT_SIZE 2 41 /* Max number of elements to store in an array block */ 42 #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) 43 /* Max muber of elements in the array block when tuned */ 44 #define AHASH_MAX_TUNED 64 45 46 #define AHASH_MAX(h) ((h)->bucketsize) 47 48 /* Max number of elements can be tuned */ 49 #ifdef IP_SET_HASH_WITH_MULTI 50 static u8 51 tune_bucketsize(u8 curr, u32 multi) 52 { 53 u32 n; 54 55 if (multi < curr) 56 return curr; 57 58 n = curr + AHASH_INIT_SIZE; 59 /* Currently, at listing one hash bucket must fit into a message. 60 * Therefore we have a hard limit here. 61 */ 62 return n > curr && n <= AHASH_MAX_TUNED ? n : curr; 63 } 64 #define TUNE_BUCKETSIZE(h, multi) \ 65 ((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi)) 66 #else 67 #define TUNE_BUCKETSIZE(h, multi) 68 #endif 69 70 /* A hash bucket */ 71 struct hbucket { 72 struct rcu_head rcu; /* for call_rcu */ 73 /* Which positions are used in the array */ 74 DECLARE_BITMAP(used, AHASH_MAX_TUNED); 75 u8 size; /* size of the array */ 76 u8 pos; /* position of the first free entry */ 77 unsigned char value[] /* the array of the values */ 78 __aligned(__alignof__(u64)); 79 }; 80 81 /* Region size for locking == 2^HTABLE_REGION_BITS */ 82 #define HTABLE_REGION_BITS 10 83 #define ahash_numof_locks(htable_bits) \ 84 ((htable_bits) < HTABLE_REGION_BITS ? 1 \ 85 : jhash_size((htable_bits) - HTABLE_REGION_BITS)) 86 #define ahash_sizeof_regions(htable_bits) \ 87 (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) 88 #define ahash_region(n, htable_bits) \ 89 ((n) % ahash_numof_locks(htable_bits)) 90 #define ahash_bucket_start(h, htable_bits) \ 91 ((htable_bits) < HTABLE_REGION_BITS ? 0 \ 92 : (h) * jhash_size(HTABLE_REGION_BITS)) 93 #define ahash_bucket_end(h, htable_bits) \ 94 ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ 95 : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) 96 97 struct htable_gc { 98 struct delayed_work dwork; 99 struct ip_set *set; /* Set the gc belongs to */ 100 u32 region; /* Last gc run position */ 101 }; 102 103 /* The hash table: the table size stored here in order to make resizing easy */ 104 struct htable { 105 atomic_t ref; /* References for resizing */ 106 atomic_t uref; /* References for dumping and gc */ 107 u8 htable_bits; /* size of hash table == 2^htable_bits */ 108 u32 maxelem; /* Maxelem per region */ 109 struct ip_set_region *hregion; /* Region locks and ext sizes */ 110 struct hbucket __rcu *bucket[]; /* hashtable buckets */ 111 }; 112 113 #define hbucket(h, i) ((h)->bucket[i]) 114 #define ext_size(n, dsize) \ 115 (sizeof(struct hbucket) + (n) * (dsize)) 116 117 #ifndef IPSET_NET_COUNT 118 #define IPSET_NET_COUNT 1 119 #endif 120 121 /* Book-keeping of the prefixes added to the set */ 122 struct net_prefixes { 123 u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */ 124 u8 cidr[IPSET_NET_COUNT]; /* the cidr value */ 125 }; 126 127 /* Compute the hash table size */ 128 static size_t 129 htable_size(u8 hbits) 130 { 131 size_t hsize; 132 133 /* We must fit both into u32 in jhash and size_t */ 134 if (hbits > 31) 135 return 0; 136 hsize = jhash_size(hbits); 137 if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *) 138 < hsize) 139 return 0; 140 141 return hsize * sizeof(struct hbucket *) + sizeof(struct htable); 142 } 143 144 #ifdef IP_SET_HASH_WITH_NETS 145 #if IPSET_NET_COUNT > 1 146 #define __CIDR(cidr, i) (cidr[i]) 147 #else 148 #define __CIDR(cidr, i) (cidr) 149 #endif 150 151 /* cidr + 1 is stored in net_prefixes to support /0 */ 152 #define NCIDR_PUT(cidr) ((cidr) + 1) 153 #define NCIDR_GET(cidr) ((cidr) - 1) 154 155 #ifdef IP_SET_HASH_WITH_NETS_PACKED 156 /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ 157 #define DCIDR_PUT(cidr) ((cidr) - 1) 158 #define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) 159 #else 160 #define DCIDR_PUT(cidr) (cidr) 161 #define DCIDR_GET(cidr, i) __CIDR(cidr, i) 162 #endif 163 164 #define INIT_CIDR(cidr, host_mask) \ 165 DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) 166 167 #ifdef IP_SET_HASH_WITH_NET0 168 /* cidr from 0 to HOST_MASK value and c = cidr + 1 */ 169 #define NLEN (HOST_MASK + 1) 170 #define CIDR_POS(c) ((c) - 1) 171 #else 172 /* cidr from 1 to HOST_MASK value and c = cidr + 1 */ 173 #define NLEN HOST_MASK 174 #define CIDR_POS(c) ((c) - 2) 175 #endif 176 177 #else 178 #define NLEN 0 179 #endif /* IP_SET_HASH_WITH_NETS */ 180 181 #define SET_ELEM_EXPIRED(set, d) \ 182 (SET_WITH_TIMEOUT(set) && \ 183 ip_set_timeout_expired(ext_timeout(d, set))) 184 185 #endif /* _IP_SET_HASH_GEN_H */ 186 187 #ifndef MTYPE 188 #error "MTYPE is not defined!" 189 #endif 190 191 #ifndef HTYPE 192 #error "HTYPE is not defined!" 193 #endif 194 195 #ifndef HOST_MASK 196 #error "HOST_MASK is not defined!" 197 #endif 198 199 /* Family dependent templates */ 200 201 #undef ahash_data 202 #undef mtype_data_equal 203 #undef mtype_do_data_match 204 #undef mtype_data_set_flags 205 #undef mtype_data_reset_elem 206 #undef mtype_data_reset_flags 207 #undef mtype_data_netmask 208 #undef mtype_data_list 209 #undef mtype_data_next 210 #undef mtype_elem 211 212 #undef mtype_ahash_destroy 213 #undef mtype_ext_cleanup 214 #undef mtype_add_cidr 215 #undef mtype_del_cidr 216 #undef mtype_ahash_memsize 217 #undef mtype_flush 218 #undef mtype_destroy 219 #undef mtype_same_set 220 #undef mtype_kadt 221 #undef mtype_uadt 222 223 #undef mtype_add 224 #undef mtype_del 225 #undef mtype_test_cidrs 226 #undef mtype_test 227 #undef mtype_uref 228 #undef mtype_resize 229 #undef mtype_ext_size 230 #undef mtype_resize_ad 231 #undef mtype_head 232 #undef mtype_list 233 #undef mtype_gc_do 234 #undef mtype_gc 235 #undef mtype_gc_init 236 #undef mtype_variant 237 #undef mtype_data_match 238 239 #undef htype 240 #undef HKEY 241 242 #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) 243 #ifdef IP_SET_HASH_WITH_NETS 244 #define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) 245 #else 246 #define mtype_do_data_match(d) 1 247 #endif 248 #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) 249 #define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) 250 #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) 251 #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) 252 #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) 253 #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) 254 #define mtype_elem IPSET_TOKEN(MTYPE, _elem) 255 256 #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) 257 #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) 258 #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) 259 #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) 260 #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) 261 #define mtype_flush IPSET_TOKEN(MTYPE, _flush) 262 #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) 263 #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) 264 #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) 265 #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) 266 267 #define mtype_add IPSET_TOKEN(MTYPE, _add) 268 #define mtype_del IPSET_TOKEN(MTYPE, _del) 269 #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) 270 #define mtype_test IPSET_TOKEN(MTYPE, _test) 271 #define mtype_uref IPSET_TOKEN(MTYPE, _uref) 272 #define mtype_resize IPSET_TOKEN(MTYPE, _resize) 273 #define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) 274 #define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) 275 #define mtype_head IPSET_TOKEN(MTYPE, _head) 276 #define mtype_list IPSET_TOKEN(MTYPE, _list) 277 #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) 278 #define mtype_gc IPSET_TOKEN(MTYPE, _gc) 279 #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) 280 #define mtype_variant IPSET_TOKEN(MTYPE, _variant) 281 #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) 282 283 #ifndef HKEY_DATALEN 284 #define HKEY_DATALEN sizeof(struct mtype_elem) 285 #endif 286 287 #define htype MTYPE 288 289 #define HKEY(data, initval, htable_bits) \ 290 ({ \ 291 const u32 *__k = (const u32 *)data; \ 292 u32 __l = HKEY_DATALEN / sizeof(u32); \ 293 \ 294 BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \ 295 \ 296 jhash2(__k, __l, initval) & jhash_mask(htable_bits); \ 297 }) 298 299 /* The generic hash structure */ 300 struct htype { 301 struct htable __rcu *table; /* the hash table */ 302 struct htable_gc gc; /* gc workqueue */ 303 u32 maxelem; /* max elements in the hash */ 304 u32 initval; /* random jhash init value */ 305 #ifdef IP_SET_HASH_WITH_MARKMASK 306 u32 markmask; /* markmask value for mark mask to store */ 307 #endif 308 u8 bucketsize; /* max elements in an array block */ 309 #ifdef IP_SET_HASH_WITH_NETMASK 310 u8 netmask; /* netmask value for subnets to store */ 311 #endif 312 struct list_head ad; /* Resize add|del backlist */ 313 struct mtype_elem next; /* temporary storage for uadd */ 314 #ifdef IP_SET_HASH_WITH_NETS 315 struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ 316 #endif 317 }; 318 319 /* ADD|DEL entries saved during resize */ 320 struct mtype_resize_ad { 321 struct list_head list; 322 enum ipset_adt ad; /* ADD|DEL element */ 323 struct mtype_elem d; /* Element value */ 324 struct ip_set_ext ext; /* Extensions for ADD */ 325 struct ip_set_ext mext; /* Target extensions for ADD */ 326 u32 flags; /* Flags for ADD */ 327 }; 328 329 #ifdef IP_SET_HASH_WITH_NETS 330 /* Network cidr size book keeping when the hash stores different 331 * sized networks. cidr == real cidr + 1 to support /0. 332 */ 333 static void 334 mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 335 { 336 int i, j; 337 338 spin_lock_bh(&set->lock); 339 /* Add in increasing prefix order, so larger cidr first */ 340 for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { 341 if (j != -1) { 342 continue; 343 } else if (h->nets[i].cidr[n] < cidr) { 344 j = i; 345 } else if (h->nets[i].cidr[n] == cidr) { 346 h->nets[CIDR_POS(cidr)].nets[n]++; 347 goto unlock; 348 } 349 } 350 if (j != -1) { 351 for (; i > j; i--) 352 h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; 353 } 354 h->nets[i].cidr[n] = cidr; 355 h->nets[CIDR_POS(cidr)].nets[n] = 1; 356 unlock: 357 spin_unlock_bh(&set->lock); 358 } 359 360 static void 361 mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 362 { 363 u8 i, j, net_end = NLEN - 1; 364 365 spin_lock_bh(&set->lock); 366 for (i = 0; i < NLEN; i++) { 367 if (h->nets[i].cidr[n] != cidr) 368 continue; 369 h->nets[CIDR_POS(cidr)].nets[n]--; 370 if (h->nets[CIDR_POS(cidr)].nets[n] > 0) 371 goto unlock; 372 for (j = i; j < net_end && h->nets[j].cidr[n]; j++) 373 h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; 374 h->nets[j].cidr[n] = 0; 375 goto unlock; 376 } 377 unlock: 378 spin_unlock_bh(&set->lock); 379 } 380 #endif 381 382 /* Calculate the actual memory size of the set data */ 383 static size_t 384 mtype_ahash_memsize(const struct htype *h, const struct htable *t) 385 { 386 return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); 387 } 388 389 /* Get the ith element from the array block n */ 390 #define ahash_data(n, i, dsize) \ 391 ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) 392 393 static void 394 mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) 395 { 396 int i; 397 398 for (i = 0; i < n->pos; i++) 399 if (test_bit(i, n->used)) 400 ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); 401 } 402 403 /* Flush a hash type of set: destroy all elements */ 404 static void 405 mtype_flush(struct ip_set *set) 406 { 407 struct htype *h = set->data; 408 struct htable *t; 409 struct hbucket *n; 410 u32 r, i; 411 412 t = ipset_dereference_nfnl(h->table); 413 for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 414 spin_lock_bh(&t->hregion[r].lock); 415 for (i = ahash_bucket_start(r, t->htable_bits); 416 i < ahash_bucket_end(r, t->htable_bits); i++) { 417 n = __ipset_dereference(hbucket(t, i)); 418 if (!n) 419 continue; 420 if (set->extensions & IPSET_EXT_DESTROY) 421 mtype_ext_cleanup(set, n); 422 /* FIXME: use slab cache */ 423 rcu_assign_pointer(hbucket(t, i), NULL); 424 kfree_rcu(n, rcu); 425 } 426 t->hregion[r].ext_size = 0; 427 t->hregion[r].elements = 0; 428 spin_unlock_bh(&t->hregion[r].lock); 429 } 430 #ifdef IP_SET_HASH_WITH_NETS 431 memset(h->nets, 0, sizeof(h->nets)); 432 #endif 433 } 434 435 /* Destroy the hashtable part of the set */ 436 static void 437 mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) 438 { 439 struct hbucket *n; 440 u32 i; 441 442 for (i = 0; i < jhash_size(t->htable_bits); i++) { 443 n = __ipset_dereference(hbucket(t, i)); 444 if (!n) 445 continue; 446 if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) 447 mtype_ext_cleanup(set, n); 448 /* FIXME: use slab cache */ 449 kfree(n); 450 } 451 452 ip_set_free(t->hregion); 453 ip_set_free(t); 454 } 455 456 /* Destroy a hash type of set */ 457 static void 458 mtype_destroy(struct ip_set *set) 459 { 460 struct htype *h = set->data; 461 struct list_head *l, *lt; 462 463 if (SET_WITH_TIMEOUT(set)) 464 cancel_delayed_work_sync(&h->gc.dwork); 465 466 mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); 467 list_for_each_safe(l, lt, &h->ad) { 468 list_del(l); 469 kfree(l); 470 } 471 kfree(h); 472 473 set->data = NULL; 474 } 475 476 static bool 477 mtype_same_set(const struct ip_set *a, const struct ip_set *b) 478 { 479 const struct htype *x = a->data; 480 const struct htype *y = b->data; 481 482 /* Resizing changes htable_bits, so we ignore it */ 483 return x->maxelem == y->maxelem && 484 a->timeout == b->timeout && 485 #ifdef IP_SET_HASH_WITH_NETMASK 486 x->netmask == y->netmask && 487 #endif 488 #ifdef IP_SET_HASH_WITH_MARKMASK 489 x->markmask == y->markmask && 490 #endif 491 a->extensions == b->extensions; 492 } 493 494 static void 495 mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) 496 { 497 struct hbucket *n, *tmp; 498 struct mtype_elem *data; 499 u32 i, j, d; 500 size_t dsize = set->dsize; 501 #ifdef IP_SET_HASH_WITH_NETS 502 u8 k; 503 #endif 504 u8 htable_bits = t->htable_bits; 505 506 spin_lock_bh(&t->hregion[r].lock); 507 for (i = ahash_bucket_start(r, htable_bits); 508 i < ahash_bucket_end(r, htable_bits); i++) { 509 n = __ipset_dereference(hbucket(t, i)); 510 if (!n) 511 continue; 512 for (j = 0, d = 0; j < n->pos; j++) { 513 if (!test_bit(j, n->used)) { 514 d++; 515 continue; 516 } 517 data = ahash_data(n, j, dsize); 518 if (!ip_set_timeout_expired(ext_timeout(data, set))) 519 continue; 520 pr_debug("expired %u/%u\n", i, j); 521 clear_bit(j, n->used); 522 smp_mb__after_atomic(); 523 #ifdef IP_SET_HASH_WITH_NETS 524 for (k = 0; k < IPSET_NET_COUNT; k++) 525 mtype_del_cidr(set, h, 526 NCIDR_PUT(DCIDR_GET(data->cidr, k)), 527 k); 528 #endif 529 t->hregion[r].elements--; 530 ip_set_ext_destroy(set, data); 531 d++; 532 } 533 if (d >= AHASH_INIT_SIZE) { 534 if (d >= n->size) { 535 t->hregion[r].ext_size -= 536 ext_size(n->size, dsize); 537 rcu_assign_pointer(hbucket(t, i), NULL); 538 kfree_rcu(n, rcu); 539 continue; 540 } 541 tmp = kzalloc(sizeof(*tmp) + 542 (n->size - AHASH_INIT_SIZE) * dsize, 543 GFP_ATOMIC); 544 if (!tmp) 545 /* Still try to delete expired elements. */ 546 continue; 547 tmp->size = n->size - AHASH_INIT_SIZE; 548 for (j = 0, d = 0; j < n->pos; j++) { 549 if (!test_bit(j, n->used)) 550 continue; 551 data = ahash_data(n, j, dsize); 552 memcpy(tmp->value + d * dsize, 553 data, dsize); 554 set_bit(d, tmp->used); 555 d++; 556 } 557 tmp->pos = d; 558 t->hregion[r].ext_size -= 559 ext_size(AHASH_INIT_SIZE, dsize); 560 rcu_assign_pointer(hbucket(t, i), tmp); 561 kfree_rcu(n, rcu); 562 } 563 } 564 spin_unlock_bh(&t->hregion[r].lock); 565 } 566 567 static void 568 mtype_gc(struct work_struct *work) 569 { 570 struct htable_gc *gc; 571 struct ip_set *set; 572 struct htype *h; 573 struct htable *t; 574 u32 r, numof_locks; 575 unsigned int next_run; 576 577 gc = container_of(work, struct htable_gc, dwork.work); 578 set = gc->set; 579 h = set->data; 580 581 spin_lock_bh(&set->lock); 582 t = ipset_dereference_set(h->table, set); 583 atomic_inc(&t->uref); 584 numof_locks = ahash_numof_locks(t->htable_bits); 585 r = gc->region++; 586 if (r >= numof_locks) { 587 r = gc->region = 0; 588 } 589 next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; 590 if (next_run < HZ/10) 591 next_run = HZ/10; 592 spin_unlock_bh(&set->lock); 593 594 mtype_gc_do(set, h, t, r); 595 596 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 597 pr_debug("Table destroy after resize by expire: %p\n", t); 598 mtype_ahash_destroy(set, t, false); 599 } 600 601 queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); 602 603 } 604 605 static void 606 mtype_gc_init(struct htable_gc *gc) 607 { 608 INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); 609 queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); 610 } 611 612 static int 613 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 614 struct ip_set_ext *mext, u32 flags); 615 static int 616 mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 617 struct ip_set_ext *mext, u32 flags); 618 619 /* Resize a hash: create a new hash table with doubling the hashsize 620 * and inserting the elements to it. Repeat until we succeed or 621 * fail due to memory pressures. 622 */ 623 static int 624 mtype_resize(struct ip_set *set, bool retried) 625 { 626 struct htype *h = set->data; 627 struct htable *t, *orig; 628 u8 htable_bits; 629 size_t hsize, dsize = set->dsize; 630 #ifdef IP_SET_HASH_WITH_NETS 631 u8 flags; 632 struct mtype_elem *tmp; 633 #endif 634 struct mtype_elem *data; 635 struct mtype_elem *d; 636 struct hbucket *n, *m; 637 struct list_head *l, *lt; 638 struct mtype_resize_ad *x; 639 u32 i, j, r, nr, key; 640 int ret; 641 642 #ifdef IP_SET_HASH_WITH_NETS 643 tmp = kmalloc(dsize, GFP_KERNEL); 644 if (!tmp) 645 return -ENOMEM; 646 #endif 647 orig = ipset_dereference_bh_nfnl(h->table); 648 htable_bits = orig->htable_bits; 649 650 retry: 651 ret = 0; 652 htable_bits++; 653 if (!htable_bits) 654 goto hbwarn; 655 hsize = htable_size(htable_bits); 656 if (!hsize) 657 goto hbwarn; 658 t = ip_set_alloc(hsize); 659 if (!t) { 660 ret = -ENOMEM; 661 goto out; 662 } 663 t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); 664 if (!t->hregion) { 665 ip_set_free(t); 666 ret = -ENOMEM; 667 goto out; 668 } 669 t->htable_bits = htable_bits; 670 t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); 671 for (i = 0; i < ahash_numof_locks(htable_bits); i++) 672 spin_lock_init(&t->hregion[i].lock); 673 674 /* There can't be another parallel resizing, 675 * but dumping, gc, kernel side add/del are possible 676 */ 677 orig = ipset_dereference_bh_nfnl(h->table); 678 atomic_set(&orig->ref, 1); 679 atomic_inc(&orig->uref); 680 pr_debug("attempt to resize set %s from %u to %u, t %p\n", 681 set->name, orig->htable_bits, htable_bits, orig); 682 for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { 683 /* Expire may replace a hbucket with another one */ 684 rcu_read_lock_bh(); 685 for (i = ahash_bucket_start(r, orig->htable_bits); 686 i < ahash_bucket_end(r, orig->htable_bits); i++) { 687 n = __ipset_dereference(hbucket(orig, i)); 688 if (!n) 689 continue; 690 for (j = 0; j < n->pos; j++) { 691 if (!test_bit(j, n->used)) 692 continue; 693 data = ahash_data(n, j, dsize); 694 if (SET_ELEM_EXPIRED(set, data)) 695 continue; 696 #ifdef IP_SET_HASH_WITH_NETS 697 /* We have readers running parallel with us, 698 * so the live data cannot be modified. 699 */ 700 flags = 0; 701 memcpy(tmp, data, dsize); 702 data = tmp; 703 mtype_data_reset_flags(data, &flags); 704 #endif 705 key = HKEY(data, h->initval, htable_bits); 706 m = __ipset_dereference(hbucket(t, key)); 707 nr = ahash_region(key, htable_bits); 708 if (!m) { 709 m = kzalloc(sizeof(*m) + 710 AHASH_INIT_SIZE * dsize, 711 GFP_ATOMIC); 712 if (!m) { 713 ret = -ENOMEM; 714 goto cleanup; 715 } 716 m->size = AHASH_INIT_SIZE; 717 t->hregion[nr].ext_size += 718 ext_size(AHASH_INIT_SIZE, 719 dsize); 720 RCU_INIT_POINTER(hbucket(t, key), m); 721 } else if (m->pos >= m->size) { 722 struct hbucket *ht; 723 724 if (m->size >= AHASH_MAX(h)) { 725 ret = -EAGAIN; 726 } else { 727 ht = kzalloc(sizeof(*ht) + 728 (m->size + AHASH_INIT_SIZE) 729 * dsize, 730 GFP_ATOMIC); 731 if (!ht) 732 ret = -ENOMEM; 733 } 734 if (ret < 0) 735 goto cleanup; 736 memcpy(ht, m, sizeof(struct hbucket) + 737 m->size * dsize); 738 ht->size = m->size + AHASH_INIT_SIZE; 739 t->hregion[nr].ext_size += 740 ext_size(AHASH_INIT_SIZE, 741 dsize); 742 kfree(m); 743 m = ht; 744 RCU_INIT_POINTER(hbucket(t, key), ht); 745 } 746 d = ahash_data(m, m->pos, dsize); 747 memcpy(d, data, dsize); 748 set_bit(m->pos++, m->used); 749 t->hregion[nr].elements++; 750 #ifdef IP_SET_HASH_WITH_NETS 751 mtype_data_reset_flags(d, &flags); 752 #endif 753 } 754 } 755 rcu_read_unlock_bh(); 756 } 757 758 /* There can't be any other writer. */ 759 rcu_assign_pointer(h->table, t); 760 761 /* Give time to other readers of the set */ 762 synchronize_rcu(); 763 764 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, 765 orig->htable_bits, orig, t->htable_bits, t); 766 /* Add/delete elements processed by the SET target during resize. 767 * Kernel-side add cannot trigger a resize and userspace actions 768 * are serialized by the mutex. 769 */ 770 list_for_each_safe(l, lt, &h->ad) { 771 x = list_entry(l, struct mtype_resize_ad, list); 772 if (x->ad == IPSET_ADD) { 773 mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); 774 } else { 775 mtype_del(set, &x->d, NULL, NULL, 0); 776 } 777 list_del(l); 778 kfree(l); 779 } 780 /* If there's nobody else using the table, destroy it */ 781 if (atomic_dec_and_test(&orig->uref)) { 782 pr_debug("Table destroy by resize %p\n", orig); 783 mtype_ahash_destroy(set, orig, false); 784 } 785 786 out: 787 #ifdef IP_SET_HASH_WITH_NETS 788 kfree(tmp); 789 #endif 790 return ret; 791 792 cleanup: 793 rcu_read_unlock_bh(); 794 atomic_set(&orig->ref, 0); 795 atomic_dec(&orig->uref); 796 mtype_ahash_destroy(set, t, false); 797 if (ret == -EAGAIN) 798 goto retry; 799 goto out; 800 801 hbwarn: 802 /* In case we have plenty of memory :-) */ 803 pr_warn("Cannot increase the hashsize of set %s further\n", set->name); 804 ret = -IPSET_ERR_HASH_FULL; 805 goto out; 806 } 807 808 /* Get the current number of elements and ext_size in the set */ 809 static void 810 mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) 811 { 812 struct htype *h = set->data; 813 const struct htable *t; 814 u32 i, j, r; 815 struct hbucket *n; 816 struct mtype_elem *data; 817 818 t = rcu_dereference_bh(h->table); 819 for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 820 for (i = ahash_bucket_start(r, t->htable_bits); 821 i < ahash_bucket_end(r, t->htable_bits); i++) { 822 n = rcu_dereference_bh(hbucket(t, i)); 823 if (!n) 824 continue; 825 for (j = 0; j < n->pos; j++) { 826 if (!test_bit(j, n->used)) 827 continue; 828 data = ahash_data(n, j, set->dsize); 829 if (!SET_ELEM_EXPIRED(set, data)) 830 (*elements)++; 831 } 832 } 833 *ext_size += t->hregion[r].ext_size; 834 } 835 } 836 837 /* Add an element to a hash and update the internal counters when succeeded, 838 * otherwise report the proper error code. 839 */ 840 static int 841 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 842 struct ip_set_ext *mext, u32 flags) 843 { 844 struct htype *h = set->data; 845 struct htable *t; 846 const struct mtype_elem *d = value; 847 struct mtype_elem *data; 848 struct hbucket *n, *old = ERR_PTR(-ENOENT); 849 int i, j = -1, ret; 850 bool flag_exist = flags & IPSET_FLAG_EXIST; 851 bool deleted = false, forceadd = false, reuse = false; 852 u32 r, key, multi = 0, elements, maxelem; 853 854 rcu_read_lock_bh(); 855 t = rcu_dereference_bh(h->table); 856 key = HKEY(value, h->initval, t->htable_bits); 857 r = ahash_region(key, t->htable_bits); 858 atomic_inc(&t->uref); 859 elements = t->hregion[r].elements; 860 maxelem = t->maxelem; 861 if (elements >= maxelem) { 862 u32 e; 863 if (SET_WITH_TIMEOUT(set)) { 864 rcu_read_unlock_bh(); 865 mtype_gc_do(set, h, t, r); 866 rcu_read_lock_bh(); 867 } 868 maxelem = h->maxelem; 869 elements = 0; 870 for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) 871 elements += t->hregion[e].elements; 872 if (elements >= maxelem && SET_WITH_FORCEADD(set)) 873 forceadd = true; 874 } 875 rcu_read_unlock_bh(); 876 877 spin_lock_bh(&t->hregion[r].lock); 878 n = rcu_dereference_bh(hbucket(t, key)); 879 if (!n) { 880 if (forceadd || elements >= maxelem) 881 goto set_full; 882 old = NULL; 883 n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, 884 GFP_ATOMIC); 885 if (!n) { 886 ret = -ENOMEM; 887 goto unlock; 888 } 889 n->size = AHASH_INIT_SIZE; 890 t->hregion[r].ext_size += 891 ext_size(AHASH_INIT_SIZE, set->dsize); 892 goto copy_elem; 893 } 894 for (i = 0; i < n->pos; i++) { 895 if (!test_bit(i, n->used)) { 896 /* Reuse first deleted entry */ 897 if (j == -1) { 898 deleted = reuse = true; 899 j = i; 900 } 901 continue; 902 } 903 data = ahash_data(n, i, set->dsize); 904 if (mtype_data_equal(data, d, &multi)) { 905 if (flag_exist || SET_ELEM_EXPIRED(set, data)) { 906 /* Just the extensions could be overwritten */ 907 j = i; 908 goto overwrite_extensions; 909 } 910 ret = -IPSET_ERR_EXIST; 911 goto unlock; 912 } 913 /* Reuse first timed out entry */ 914 if (SET_ELEM_EXPIRED(set, data) && j == -1) { 915 j = i; 916 reuse = true; 917 } 918 } 919 if (reuse || forceadd) { 920 if (j == -1) 921 j = 0; 922 data = ahash_data(n, j, set->dsize); 923 if (!deleted) { 924 #ifdef IP_SET_HASH_WITH_NETS 925 for (i = 0; i < IPSET_NET_COUNT; i++) 926 mtype_del_cidr(set, h, 927 NCIDR_PUT(DCIDR_GET(data->cidr, i)), 928 i); 929 #endif 930 ip_set_ext_destroy(set, data); 931 t->hregion[r].elements--; 932 } 933 goto copy_data; 934 } 935 if (elements >= maxelem) 936 goto set_full; 937 /* Create a new slot */ 938 if (n->pos >= n->size) { 939 TUNE_BUCKETSIZE(h, multi); 940 if (n->size >= AHASH_MAX(h)) { 941 /* Trigger rehashing */ 942 mtype_data_next(&h->next, d); 943 ret = -EAGAIN; 944 goto resize; 945 } 946 old = n; 947 n = kzalloc(sizeof(*n) + 948 (old->size + AHASH_INIT_SIZE) * set->dsize, 949 GFP_ATOMIC); 950 if (!n) { 951 ret = -ENOMEM; 952 goto unlock; 953 } 954 memcpy(n, old, sizeof(struct hbucket) + 955 old->size * set->dsize); 956 n->size = old->size + AHASH_INIT_SIZE; 957 t->hregion[r].ext_size += 958 ext_size(AHASH_INIT_SIZE, set->dsize); 959 } 960 961 copy_elem: 962 j = n->pos++; 963 data = ahash_data(n, j, set->dsize); 964 copy_data: 965 t->hregion[r].elements++; 966 #ifdef IP_SET_HASH_WITH_NETS 967 for (i = 0; i < IPSET_NET_COUNT; i++) 968 mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); 969 #endif 970 memcpy(data, d, sizeof(struct mtype_elem)); 971 overwrite_extensions: 972 #ifdef IP_SET_HASH_WITH_NETS 973 mtype_data_set_flags(data, flags); 974 #endif 975 if (SET_WITH_COUNTER(set)) 976 ip_set_init_counter(ext_counter(data, set), ext); 977 if (SET_WITH_COMMENT(set)) 978 ip_set_init_comment(set, ext_comment(data, set), ext); 979 if (SET_WITH_SKBINFO(set)) 980 ip_set_init_skbinfo(ext_skbinfo(data, set), ext); 981 /* Must come last for the case when timed out entry is reused */ 982 if (SET_WITH_TIMEOUT(set)) 983 ip_set_timeout_set(ext_timeout(data, set), ext->timeout); 984 smp_mb__before_atomic(); 985 set_bit(j, n->used); 986 if (old != ERR_PTR(-ENOENT)) { 987 rcu_assign_pointer(hbucket(t, key), n); 988 if (old) 989 kfree_rcu(old, rcu); 990 } 991 ret = 0; 992 resize: 993 spin_unlock_bh(&t->hregion[r].lock); 994 if (atomic_read(&t->ref) && ext->target) { 995 /* Resize is in process and kernel side add, save values */ 996 struct mtype_resize_ad *x; 997 998 x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); 999 if (!x) 1000 /* Don't bother */ 1001 goto out; 1002 x->ad = IPSET_ADD; 1003 memcpy(&x->d, value, sizeof(struct mtype_elem)); 1004 memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); 1005 memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); 1006 x->flags = flags; 1007 spin_lock_bh(&set->lock); 1008 list_add_tail(&x->list, &h->ad); 1009 spin_unlock_bh(&set->lock); 1010 } 1011 goto out; 1012 1013 set_full: 1014 if (net_ratelimit()) 1015 pr_warn("Set %s is full, maxelem %u reached\n", 1016 set->name, maxelem); 1017 ret = -IPSET_ERR_HASH_FULL; 1018 unlock: 1019 spin_unlock_bh(&t->hregion[r].lock); 1020 out: 1021 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1022 pr_debug("Table destroy after resize by add: %p\n", t); 1023 mtype_ahash_destroy(set, t, false); 1024 } 1025 return ret; 1026 } 1027 1028 /* Delete an element from the hash and free up space if possible. 1029 */ 1030 static int 1031 mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 1032 struct ip_set_ext *mext, u32 flags) 1033 { 1034 struct htype *h = set->data; 1035 struct htable *t; 1036 const struct mtype_elem *d = value; 1037 struct mtype_elem *data; 1038 struct hbucket *n; 1039 struct mtype_resize_ad *x = NULL; 1040 int i, j, k, r, ret = -IPSET_ERR_EXIST; 1041 u32 key, multi = 0; 1042 size_t dsize = set->dsize; 1043 1044 /* Userspace add and resize is excluded by the mutex. 1045 * Kernespace add does not trigger resize. 1046 */ 1047 rcu_read_lock_bh(); 1048 t = rcu_dereference_bh(h->table); 1049 key = HKEY(value, h->initval, t->htable_bits); 1050 r = ahash_region(key, t->htable_bits); 1051 atomic_inc(&t->uref); 1052 rcu_read_unlock_bh(); 1053 1054 spin_lock_bh(&t->hregion[r].lock); 1055 n = rcu_dereference_bh(hbucket(t, key)); 1056 if (!n) 1057 goto out; 1058 for (i = 0, k = 0; i < n->pos; i++) { 1059 if (!test_bit(i, n->used)) { 1060 k++; 1061 continue; 1062 } 1063 data = ahash_data(n, i, dsize); 1064 if (!mtype_data_equal(data, d, &multi)) 1065 continue; 1066 if (SET_ELEM_EXPIRED(set, data)) 1067 goto out; 1068 1069 ret = 0; 1070 clear_bit(i, n->used); 1071 smp_mb__after_atomic(); 1072 if (i + 1 == n->pos) 1073 n->pos--; 1074 t->hregion[r].elements--; 1075 #ifdef IP_SET_HASH_WITH_NETS 1076 for (j = 0; j < IPSET_NET_COUNT; j++) 1077 mtype_del_cidr(set, h, 1078 NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); 1079 #endif 1080 ip_set_ext_destroy(set, data); 1081 1082 if (atomic_read(&t->ref) && ext->target) { 1083 /* Resize is in process and kernel side del, 1084 * save values 1085 */ 1086 x = kzalloc(sizeof(struct mtype_resize_ad), 1087 GFP_ATOMIC); 1088 if (x) { 1089 x->ad = IPSET_DEL; 1090 memcpy(&x->d, value, 1091 sizeof(struct mtype_elem)); 1092 x->flags = flags; 1093 } 1094 } 1095 for (; i < n->pos; i++) { 1096 if (!test_bit(i, n->used)) 1097 k++; 1098 } 1099 if (n->pos == 0 && k == 0) { 1100 t->hregion[r].ext_size -= ext_size(n->size, dsize); 1101 rcu_assign_pointer(hbucket(t, key), NULL); 1102 kfree_rcu(n, rcu); 1103 } else if (k >= AHASH_INIT_SIZE) { 1104 struct hbucket *tmp = kzalloc(sizeof(*tmp) + 1105 (n->size - AHASH_INIT_SIZE) * dsize, 1106 GFP_ATOMIC); 1107 if (!tmp) 1108 goto out; 1109 tmp->size = n->size - AHASH_INIT_SIZE; 1110 for (j = 0, k = 0; j < n->pos; j++) { 1111 if (!test_bit(j, n->used)) 1112 continue; 1113 data = ahash_data(n, j, dsize); 1114 memcpy(tmp->value + k * dsize, data, dsize); 1115 set_bit(k, tmp->used); 1116 k++; 1117 } 1118 tmp->pos = k; 1119 t->hregion[r].ext_size -= 1120 ext_size(AHASH_INIT_SIZE, dsize); 1121 rcu_assign_pointer(hbucket(t, key), tmp); 1122 kfree_rcu(n, rcu); 1123 } 1124 goto out; 1125 } 1126 1127 out: 1128 spin_unlock_bh(&t->hregion[r].lock); 1129 if (x) { 1130 spin_lock_bh(&set->lock); 1131 list_add(&x->list, &h->ad); 1132 spin_unlock_bh(&set->lock); 1133 } 1134 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1135 pr_debug("Table destroy after resize by del: %p\n", t); 1136 mtype_ahash_destroy(set, t, false); 1137 } 1138 return ret; 1139 } 1140 1141 static int 1142 mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, 1143 struct ip_set_ext *mext, struct ip_set *set, u32 flags) 1144 { 1145 if (!ip_set_match_extensions(set, ext, mext, flags, data)) 1146 return 0; 1147 /* nomatch entries return -ENOTEMPTY */ 1148 return mtype_do_data_match(data); 1149 } 1150 1151 #ifdef IP_SET_HASH_WITH_NETS 1152 /* Special test function which takes into account the different network 1153 * sizes added to the set 1154 */ 1155 static int 1156 mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, 1157 const struct ip_set_ext *ext, 1158 struct ip_set_ext *mext, u32 flags) 1159 { 1160 struct htype *h = set->data; 1161 struct htable *t = rcu_dereference_bh(h->table); 1162 struct hbucket *n; 1163 struct mtype_elem *data; 1164 #if IPSET_NET_COUNT == 2 1165 struct mtype_elem orig = *d; 1166 int ret, i, j = 0, k; 1167 #else 1168 int ret, i, j = 0; 1169 #endif 1170 u32 key, multi = 0; 1171 1172 pr_debug("test by nets\n"); 1173 for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { 1174 #if IPSET_NET_COUNT == 2 1175 mtype_data_reset_elem(d, &orig); 1176 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); 1177 for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi; 1178 k++) { 1179 mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), 1180 true); 1181 #else 1182 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); 1183 #endif 1184 key = HKEY(d, h->initval, t->htable_bits); 1185 n = rcu_dereference_bh(hbucket(t, key)); 1186 if (!n) 1187 continue; 1188 for (i = 0; i < n->pos; i++) { 1189 if (!test_bit(i, n->used)) 1190 continue; 1191 data = ahash_data(n, i, set->dsize); 1192 if (!mtype_data_equal(data, d, &multi)) 1193 continue; 1194 ret = mtype_data_match(data, ext, mext, set, flags); 1195 if (ret != 0) 1196 return ret; 1197 #ifdef IP_SET_HASH_WITH_MULTI 1198 /* No match, reset multiple match flag */ 1199 multi = 0; 1200 #endif 1201 } 1202 #if IPSET_NET_COUNT == 2 1203 } 1204 #endif 1205 } 1206 return 0; 1207 } 1208 #endif 1209 1210 /* Test whether the element is added to the set */ 1211 static int 1212 mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, 1213 struct ip_set_ext *mext, u32 flags) 1214 { 1215 struct htype *h = set->data; 1216 struct htable *t; 1217 struct mtype_elem *d = value; 1218 struct hbucket *n; 1219 struct mtype_elem *data; 1220 int i, ret = 0; 1221 u32 key, multi = 0; 1222 1223 rcu_read_lock_bh(); 1224 t = rcu_dereference_bh(h->table); 1225 #ifdef IP_SET_HASH_WITH_NETS 1226 /* If we test an IP address and not a network address, 1227 * try all possible network sizes 1228 */ 1229 for (i = 0; i < IPSET_NET_COUNT; i++) 1230 if (DCIDR_GET(d->cidr, i) != HOST_MASK) 1231 break; 1232 if (i == IPSET_NET_COUNT) { 1233 ret = mtype_test_cidrs(set, d, ext, mext, flags); 1234 goto out; 1235 } 1236 #endif 1237 1238 key = HKEY(d, h->initval, t->htable_bits); 1239 n = rcu_dereference_bh(hbucket(t, key)); 1240 if (!n) { 1241 ret = 0; 1242 goto out; 1243 } 1244 for (i = 0; i < n->pos; i++) { 1245 if (!test_bit(i, n->used)) 1246 continue; 1247 data = ahash_data(n, i, set->dsize); 1248 if (!mtype_data_equal(data, d, &multi)) 1249 continue; 1250 ret = mtype_data_match(data, ext, mext, set, flags); 1251 if (ret != 0) 1252 goto out; 1253 } 1254 out: 1255 rcu_read_unlock_bh(); 1256 return ret; 1257 } 1258 1259 /* Reply a HEADER request: fill out the header part of the set */ 1260 static int 1261 mtype_head(struct ip_set *set, struct sk_buff *skb) 1262 { 1263 struct htype *h = set->data; 1264 const struct htable *t; 1265 struct nlattr *nested; 1266 size_t memsize; 1267 u32 elements = 0; 1268 size_t ext_size = 0; 1269 u8 htable_bits; 1270 1271 rcu_read_lock_bh(); 1272 t = rcu_dereference_bh(h->table); 1273 mtype_ext_size(set, &elements, &ext_size); 1274 memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; 1275 htable_bits = t->htable_bits; 1276 rcu_read_unlock_bh(); 1277 1278 nested = nla_nest_start(skb, IPSET_ATTR_DATA); 1279 if (!nested) 1280 goto nla_put_failure; 1281 if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, 1282 htonl(jhash_size(htable_bits))) || 1283 nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) 1284 goto nla_put_failure; 1285 #ifdef IP_SET_HASH_WITH_NETMASK 1286 if (h->netmask != HOST_MASK && 1287 nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) 1288 goto nla_put_failure; 1289 #endif 1290 #ifdef IP_SET_HASH_WITH_MARKMASK 1291 if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) 1292 goto nla_put_failure; 1293 #endif 1294 if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) { 1295 if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) || 1296 nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval))) 1297 goto nla_put_failure; 1298 } 1299 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 1300 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || 1301 nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) 1302 goto nla_put_failure; 1303 if (unlikely(ip_set_put_flags(skb, set))) 1304 goto nla_put_failure; 1305 nla_nest_end(skb, nested); 1306 1307 return 0; 1308 nla_put_failure: 1309 return -EMSGSIZE; 1310 } 1311 1312 /* Make possible to run dumping parallel with resizing */ 1313 static void 1314 mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) 1315 { 1316 struct htype *h = set->data; 1317 struct htable *t; 1318 1319 if (start) { 1320 rcu_read_lock_bh(); 1321 t = ipset_dereference_bh_nfnl(h->table); 1322 atomic_inc(&t->uref); 1323 cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; 1324 rcu_read_unlock_bh(); 1325 } else if (cb->args[IPSET_CB_PRIVATE]) { 1326 t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; 1327 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1328 pr_debug("Table destroy after resize " 1329 " by dump: %p\n", t); 1330 mtype_ahash_destroy(set, t, false); 1331 } 1332 cb->args[IPSET_CB_PRIVATE] = 0; 1333 } 1334 } 1335 1336 /* Reply a LIST/SAVE request: dump the elements of the specified set */ 1337 static int 1338 mtype_list(const struct ip_set *set, 1339 struct sk_buff *skb, struct netlink_callback *cb) 1340 { 1341 const struct htable *t; 1342 struct nlattr *atd, *nested; 1343 const struct hbucket *n; 1344 const struct mtype_elem *e; 1345 u32 first = cb->args[IPSET_CB_ARG0]; 1346 /* We assume that one hash bucket fills into one page */ 1347 void *incomplete; 1348 int i, ret = 0; 1349 1350 atd = nla_nest_start(skb, IPSET_ATTR_ADT); 1351 if (!atd) 1352 return -EMSGSIZE; 1353 1354 pr_debug("list hash set %s\n", set->name); 1355 t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; 1356 /* Expire may replace a hbucket with another one */ 1357 rcu_read_lock(); 1358 for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); 1359 cb->args[IPSET_CB_ARG0]++) { 1360 cond_resched_rcu(); 1361 incomplete = skb_tail_pointer(skb); 1362 n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); 1363 pr_debug("cb->arg bucket: %lu, t %p n %p\n", 1364 cb->args[IPSET_CB_ARG0], t, n); 1365 if (!n) 1366 continue; 1367 for (i = 0; i < n->pos; i++) { 1368 if (!test_bit(i, n->used)) 1369 continue; 1370 e = ahash_data(n, i, set->dsize); 1371 if (SET_ELEM_EXPIRED(set, e)) 1372 continue; 1373 pr_debug("list hash %lu hbucket %p i %u, data %p\n", 1374 cb->args[IPSET_CB_ARG0], n, i, e); 1375 nested = nla_nest_start(skb, IPSET_ATTR_DATA); 1376 if (!nested) { 1377 if (cb->args[IPSET_CB_ARG0] == first) { 1378 nla_nest_cancel(skb, atd); 1379 ret = -EMSGSIZE; 1380 goto out; 1381 } 1382 goto nla_put_failure; 1383 } 1384 if (mtype_data_list(skb, e)) 1385 goto nla_put_failure; 1386 if (ip_set_put_extensions(skb, set, e, true)) 1387 goto nla_put_failure; 1388 nla_nest_end(skb, nested); 1389 } 1390 } 1391 nla_nest_end(skb, atd); 1392 /* Set listing finished */ 1393 cb->args[IPSET_CB_ARG0] = 0; 1394 1395 goto out; 1396 1397 nla_put_failure: 1398 nlmsg_trim(skb, incomplete); 1399 if (unlikely(first == cb->args[IPSET_CB_ARG0])) { 1400 pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n", 1401 set->name); 1402 cb->args[IPSET_CB_ARG0] = 0; 1403 ret = -EMSGSIZE; 1404 } else { 1405 nla_nest_end(skb, atd); 1406 } 1407 out: 1408 rcu_read_unlock(); 1409 return ret; 1410 } 1411 1412 static int 1413 IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, 1414 const struct xt_action_param *par, 1415 enum ipset_adt adt, struct ip_set_adt_opt *opt); 1416 1417 static int 1418 IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], 1419 enum ipset_adt adt, u32 *lineno, u32 flags, 1420 bool retried); 1421 1422 static const struct ip_set_type_variant mtype_variant = { 1423 .kadt = mtype_kadt, 1424 .uadt = mtype_uadt, 1425 .adt = { 1426 [IPSET_ADD] = mtype_add, 1427 [IPSET_DEL] = mtype_del, 1428 [IPSET_TEST] = mtype_test, 1429 }, 1430 .destroy = mtype_destroy, 1431 .flush = mtype_flush, 1432 .head = mtype_head, 1433 .list = mtype_list, 1434 .uref = mtype_uref, 1435 .resize = mtype_resize, 1436 .same_set = mtype_same_set, 1437 .region_lock = true, 1438 }; 1439 1440 #ifdef IP_SET_EMIT_CREATE 1441 static int 1442 IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, 1443 struct nlattr *tb[], u32 flags) 1444 { 1445 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; 1446 #ifdef IP_SET_HASH_WITH_MARKMASK 1447 u32 markmask; 1448 #endif 1449 u8 hbits; 1450 #ifdef IP_SET_HASH_WITH_NETMASK 1451 u8 netmask; 1452 #endif 1453 size_t hsize; 1454 struct htype *h; 1455 struct htable *t; 1456 u32 i; 1457 1458 pr_debug("Create set %s with family %s\n", 1459 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); 1460 1461 #ifdef IP_SET_PROTO_UNDEF 1462 if (set->family != NFPROTO_UNSPEC) 1463 return -IPSET_ERR_INVALID_FAMILY; 1464 #else 1465 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) 1466 return -IPSET_ERR_INVALID_FAMILY; 1467 #endif 1468 1469 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || 1470 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || 1471 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 1472 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 1473 return -IPSET_ERR_PROTOCOL; 1474 1475 #ifdef IP_SET_HASH_WITH_MARKMASK 1476 /* Separated condition in order to avoid directive in argument list */ 1477 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) 1478 return -IPSET_ERR_PROTOCOL; 1479 1480 markmask = 0xffffffff; 1481 if (tb[IPSET_ATTR_MARKMASK]) { 1482 markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); 1483 if (markmask == 0) 1484 return -IPSET_ERR_INVALID_MARKMASK; 1485 } 1486 #endif 1487 1488 #ifdef IP_SET_HASH_WITH_NETMASK 1489 netmask = set->family == NFPROTO_IPV4 ? 32 : 128; 1490 if (tb[IPSET_ATTR_NETMASK]) { 1491 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); 1492 1493 if ((set->family == NFPROTO_IPV4 && netmask > 32) || 1494 (set->family == NFPROTO_IPV6 && netmask > 128) || 1495 netmask == 0) 1496 return -IPSET_ERR_INVALID_NETMASK; 1497 } 1498 #endif 1499 1500 if (tb[IPSET_ATTR_HASHSIZE]) { 1501 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); 1502 if (hashsize < IPSET_MIMINAL_HASHSIZE) 1503 hashsize = IPSET_MIMINAL_HASHSIZE; 1504 } 1505 1506 if (tb[IPSET_ATTR_MAXELEM]) 1507 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); 1508 1509 hsize = sizeof(*h); 1510 h = kzalloc(hsize, GFP_KERNEL); 1511 if (!h) 1512 return -ENOMEM; 1513 1514 /* Compute htable_bits from the user input parameter hashsize. 1515 * Assume that hashsize == 2^htable_bits, 1516 * otherwise round up to the first 2^n value. 1517 */ 1518 hbits = fls(hashsize - 1); 1519 hsize = htable_size(hbits); 1520 if (hsize == 0) { 1521 kfree(h); 1522 return -ENOMEM; 1523 } 1524 t = ip_set_alloc(hsize); 1525 if (!t) { 1526 kfree(h); 1527 return -ENOMEM; 1528 } 1529 t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); 1530 if (!t->hregion) { 1531 ip_set_free(t); 1532 kfree(h); 1533 return -ENOMEM; 1534 } 1535 h->gc.set = set; 1536 for (i = 0; i < ahash_numof_locks(hbits); i++) 1537 spin_lock_init(&t->hregion[i].lock); 1538 h->maxelem = maxelem; 1539 #ifdef IP_SET_HASH_WITH_NETMASK 1540 h->netmask = netmask; 1541 #endif 1542 #ifdef IP_SET_HASH_WITH_MARKMASK 1543 h->markmask = markmask; 1544 #endif 1545 if (tb[IPSET_ATTR_INITVAL]) 1546 h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL])); 1547 else 1548 get_random_bytes(&h->initval, sizeof(h->initval)); 1549 h->bucketsize = AHASH_MAX_SIZE; 1550 if (tb[IPSET_ATTR_BUCKETSIZE]) { 1551 h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]); 1552 if (h->bucketsize < AHASH_INIT_SIZE) 1553 h->bucketsize = AHASH_INIT_SIZE; 1554 else if (h->bucketsize > AHASH_MAX_SIZE) 1555 h->bucketsize = AHASH_MAX_SIZE; 1556 else if (h->bucketsize % 2) 1557 h->bucketsize += 1; 1558 } 1559 t->htable_bits = hbits; 1560 t->maxelem = h->maxelem / ahash_numof_locks(hbits); 1561 RCU_INIT_POINTER(h->table, t); 1562 1563 INIT_LIST_HEAD(&h->ad); 1564 set->data = h; 1565 #ifndef IP_SET_PROTO_UNDEF 1566 if (set->family == NFPROTO_IPV4) { 1567 #endif 1568 set->variant = &IPSET_TOKEN(HTYPE, 4_variant); 1569 set->dsize = ip_set_elem_len(set, tb, 1570 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)), 1571 __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem))); 1572 #ifndef IP_SET_PROTO_UNDEF 1573 } else { 1574 set->variant = &IPSET_TOKEN(HTYPE, 6_variant); 1575 set->dsize = ip_set_elem_len(set, tb, 1576 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)), 1577 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); 1578 } 1579 #endif 1580 set->timeout = IPSET_NO_TIMEOUT; 1581 if (tb[IPSET_ATTR_TIMEOUT]) { 1582 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 1583 #ifndef IP_SET_PROTO_UNDEF 1584 if (set->family == NFPROTO_IPV4) 1585 #endif 1586 IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); 1587 #ifndef IP_SET_PROTO_UNDEF 1588 else 1589 IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); 1590 #endif 1591 } 1592 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", 1593 set->name, jhash_size(t->htable_bits), 1594 t->htable_bits, h->maxelem, set->data, t); 1595 1596 return 0; 1597 } 1598 #endif /* IP_SET_EMIT_CREATE */ 1599 1600 #undef HKEY_DATALEN 1601