1 /* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License version 2 as 5 * published by the Free Software Foundation. 6 */ 7 8 /* Kernel module implementing an IP set type: the list:set type */ 9 10 #include <linux/module.h> 11 #include <linux/ip.h> 12 #include <linux/rculist.h> 13 #include <linux/skbuff.h> 14 #include <linux/errno.h> 15 16 #include <linux/netfilter/ipset/ip_set.h> 17 #include <linux/netfilter/ipset/ip_set_list.h> 18 19 #define IPSET_TYPE_REV_MIN 0 20 /* 1 Counters support added */ 21 /* 2 Comments support added */ 22 #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ 23 24 MODULE_LICENSE("GPL"); 25 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 26 IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); 27 MODULE_ALIAS("ip_set_list:set"); 28 29 /* Member elements */ 30 struct set_elem { 31 struct rcu_head rcu; 32 struct list_head list; 33 struct ip_set *set; /* Sigh, in order to cleanup reference */ 34 ip_set_id_t id; 35 } __aligned(__alignof__(u64)); 36 37 struct set_adt_elem { 38 ip_set_id_t id; 39 ip_set_id_t refid; 40 int before; 41 }; 42 43 /* Type structure */ 44 struct list_set { 45 u32 size; /* size of set list array */ 46 struct timer_list gc; /* garbage collection */ 47 struct ip_set *set; /* attached to this ip_set */ 48 struct net *net; /* namespace */ 49 struct list_head members; /* the set members */ 50 }; 51 52 static int 53 list_set_ktest(struct ip_set *set, const struct sk_buff *skb, 54 const struct xt_action_param *par, 55 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) 56 { 57 struct list_set *map = set->data; 58 struct ip_set_ext *mext = &opt->ext; 59 struct set_elem *e; 60 u32 flags = opt->cmdflags; 61 int ret; 62 63 /* Don't lookup sub-counters at all */ 64 opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; 65 if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) 66 opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; 67 list_for_each_entry_rcu(e, &map->members, list) { 68 ret = ip_set_test(e->id, skb, par, opt); 69 if (ret <= 0) 70 continue; 71 if (ip_set_match_extensions(set, ext, mext, flags, e)) 72 return 1; 73 } 74 return 0; 75 } 76 77 static int 78 list_set_kadd(struct ip_set *set, const struct sk_buff *skb, 79 const struct xt_action_param *par, 80 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) 81 { 82 struct list_set *map = set->data; 83 struct set_elem *e; 84 int ret; 85 86 list_for_each_entry(e, &map->members, list) { 87 if (SET_WITH_TIMEOUT(set) && 88 ip_set_timeout_expired(ext_timeout(e, set))) 89 continue; 90 ret = ip_set_add(e->id, skb, par, opt); 91 if (ret == 0) 92 return ret; 93 } 94 return 0; 95 } 96 97 static int 98 list_set_kdel(struct ip_set *set, const struct sk_buff *skb, 99 const struct xt_action_param *par, 100 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) 101 { 102 struct list_set *map = set->data; 103 struct set_elem *e; 104 int ret; 105 106 list_for_each_entry(e, &map->members, list) { 107 if (SET_WITH_TIMEOUT(set) && 108 ip_set_timeout_expired(ext_timeout(e, set))) 109 continue; 110 ret = ip_set_del(e->id, skb, par, opt); 111 if (ret == 0) 112 return ret; 113 } 114 return 0; 115 } 116 117 static int 118 list_set_kadt(struct ip_set *set, const struct sk_buff *skb, 119 const struct xt_action_param *par, 120 enum ipset_adt adt, struct ip_set_adt_opt *opt) 121 { 122 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 123 int ret = -EINVAL; 124 125 rcu_read_lock(); 126 switch (adt) { 127 case IPSET_TEST: 128 ret = list_set_ktest(set, skb, par, opt, &ext); 129 break; 130 case IPSET_ADD: 131 ret = list_set_kadd(set, skb, par, opt, &ext); 132 break; 133 case IPSET_DEL: 134 ret = list_set_kdel(set, skb, par, opt, &ext); 135 break; 136 default: 137 break; 138 } 139 rcu_read_unlock(); 140 141 return ret; 142 } 143 144 /* Userspace interfaces: we are protected by the nfnl mutex */ 145 146 static void 147 __list_set_del_rcu(struct rcu_head * rcu) 148 { 149 struct set_elem *e = container_of(rcu, struct set_elem, rcu); 150 struct ip_set *set = e->set; 151 struct list_set *map = set->data; 152 153 ip_set_put_byindex(map->net, e->id); 154 ip_set_ext_destroy(set, e); 155 kfree(e); 156 } 157 158 static inline void 159 list_set_del(struct ip_set *set, struct set_elem *e) 160 { 161 set->elements--; 162 list_del_rcu(&e->list); 163 call_rcu(&e->rcu, __list_set_del_rcu); 164 } 165 166 static inline void 167 list_set_replace(struct set_elem *e, struct set_elem *old) 168 { 169 list_replace_rcu(&old->list, &e->list); 170 call_rcu(&old->rcu, __list_set_del_rcu); 171 } 172 173 static void 174 set_cleanup_entries(struct ip_set *set) 175 { 176 struct list_set *map = set->data; 177 struct set_elem *e, *n; 178 179 list_for_each_entry_safe(e, n, &map->members, list) 180 if (ip_set_timeout_expired(ext_timeout(e, set))) 181 list_set_del(set, e); 182 } 183 184 static int 185 list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, 186 struct ip_set_ext *mext, u32 flags) 187 { 188 struct list_set *map = set->data; 189 struct set_adt_elem *d = value; 190 struct set_elem *e, *next, *prev = NULL; 191 int ret; 192 193 list_for_each_entry(e, &map->members, list) { 194 if (SET_WITH_TIMEOUT(set) && 195 ip_set_timeout_expired(ext_timeout(e, set))) 196 continue; 197 else if (e->id != d->id) { 198 prev = e; 199 continue; 200 } 201 202 if (d->before == 0) { 203 ret = 1; 204 } else if (d->before > 0) { 205 next = list_next_entry(e, list); 206 ret = !list_is_last(&e->list, &map->members) && 207 next->id == d->refid; 208 } else { 209 ret = prev && prev->id == d->refid; 210 } 211 return ret; 212 } 213 return 0; 214 } 215 216 static void 217 list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, 218 struct set_elem *e) 219 { 220 if (SET_WITH_COUNTER(set)) 221 ip_set_init_counter(ext_counter(e, set), ext); 222 if (SET_WITH_COMMENT(set)) 223 ip_set_init_comment(set, ext_comment(e, set), ext); 224 if (SET_WITH_SKBINFO(set)) 225 ip_set_init_skbinfo(ext_skbinfo(e, set), ext); 226 /* Update timeout last */ 227 if (SET_WITH_TIMEOUT(set)) 228 ip_set_timeout_set(ext_timeout(e, set), ext->timeout); 229 } 230 231 static int 232 list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, 233 struct ip_set_ext *mext, u32 flags) 234 { 235 struct list_set *map = set->data; 236 struct set_adt_elem *d = value; 237 struct set_elem *e, *n, *prev, *next; 238 bool flag_exist = flags & IPSET_FLAG_EXIST; 239 240 /* Find where to add the new entry */ 241 n = prev = next = NULL; 242 list_for_each_entry(e, &map->members, list) { 243 if (SET_WITH_TIMEOUT(set) && 244 ip_set_timeout_expired(ext_timeout(e, set))) 245 continue; 246 else if (d->id == e->id) 247 n = e; 248 else if (d->before == 0 || e->id != d->refid) 249 continue; 250 else if (d->before > 0) 251 next = e; 252 else 253 prev = e; 254 } 255 256 /* If before/after is used on an empty set */ 257 if ((d->before > 0 && !next) || 258 (d->before < 0 && !prev)) 259 return -IPSET_ERR_REF_EXIST; 260 261 /* Re-add already existing element */ 262 if (n) { 263 if (!flag_exist) 264 return -IPSET_ERR_EXIST; 265 /* Update extensions */ 266 ip_set_ext_destroy(set, n); 267 list_set_init_extensions(set, ext, n); 268 269 /* Set is already added to the list */ 270 ip_set_put_byindex(map->net, d->id); 271 return 0; 272 } 273 /* Add new entry */ 274 if (d->before == 0) { 275 /* Append */ 276 n = list_empty(&map->members) ? NULL : 277 list_last_entry(&map->members, struct set_elem, list); 278 } else if (d->before > 0) { 279 /* Insert after next element */ 280 if (!list_is_last(&next->list, &map->members)) 281 n = list_next_entry(next, list); 282 } else { 283 /* Insert before prev element */ 284 if (prev->list.prev != &map->members) 285 n = list_prev_entry(prev, list); 286 } 287 /* Can we replace a timed out entry? */ 288 if (n && 289 !(SET_WITH_TIMEOUT(set) && 290 ip_set_timeout_expired(ext_timeout(n, set)))) 291 n = NULL; 292 293 e = kzalloc(set->dsize, GFP_ATOMIC); 294 if (!e) 295 return -ENOMEM; 296 e->id = d->id; 297 e->set = set; 298 INIT_LIST_HEAD(&e->list); 299 list_set_init_extensions(set, ext, e); 300 if (n) 301 list_set_replace(e, n); 302 else if (next) 303 list_add_tail_rcu(&e->list, &next->list); 304 else if (prev) 305 list_add_rcu(&e->list, &prev->list); 306 else 307 list_add_tail_rcu(&e->list, &map->members); 308 set->elements++; 309 310 return 0; 311 } 312 313 static int 314 list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, 315 struct ip_set_ext *mext, u32 flags) 316 { 317 struct list_set *map = set->data; 318 struct set_adt_elem *d = value; 319 struct set_elem *e, *next, *prev = NULL; 320 321 list_for_each_entry(e, &map->members, list) { 322 if (SET_WITH_TIMEOUT(set) && 323 ip_set_timeout_expired(ext_timeout(e, set))) 324 continue; 325 else if (e->id != d->id) { 326 prev = e; 327 continue; 328 } 329 330 if (d->before > 0) { 331 next = list_next_entry(e, list); 332 if (list_is_last(&e->list, &map->members) || 333 next->id != d->refid) 334 return -IPSET_ERR_REF_EXIST; 335 } else if (d->before < 0) { 336 if (!prev || prev->id != d->refid) 337 return -IPSET_ERR_REF_EXIST; 338 } 339 list_set_del(set, e); 340 return 0; 341 } 342 return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; 343 } 344 345 static int 346 list_set_uadt(struct ip_set *set, struct nlattr *tb[], 347 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 348 { 349 struct list_set *map = set->data; 350 ipset_adtfn adtfn = set->variant->adt[adt]; 351 struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; 352 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 353 struct ip_set *s; 354 int ret = 0; 355 356 if (tb[IPSET_ATTR_LINENO]) 357 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 358 359 if (unlikely(!tb[IPSET_ATTR_NAME] || 360 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 361 return -IPSET_ERR_PROTOCOL; 362 363 ret = ip_set_get_extensions(set, tb, &ext); 364 if (ret) 365 return ret; 366 e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); 367 if (e.id == IPSET_INVALID_ID) 368 return -IPSET_ERR_NAME; 369 /* "Loop detection" */ 370 if (s->type->features & IPSET_TYPE_NAME) { 371 ret = -IPSET_ERR_LOOP; 372 goto finish; 373 } 374 375 if (tb[IPSET_ATTR_CADT_FLAGS]) { 376 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 377 378 e.before = f & IPSET_FLAG_BEFORE; 379 } 380 381 if (e.before && !tb[IPSET_ATTR_NAMEREF]) { 382 ret = -IPSET_ERR_BEFORE; 383 goto finish; 384 } 385 386 if (tb[IPSET_ATTR_NAMEREF]) { 387 e.refid = ip_set_get_byname(map->net, 388 nla_data(tb[IPSET_ATTR_NAMEREF]), 389 &s); 390 if (e.refid == IPSET_INVALID_ID) { 391 ret = -IPSET_ERR_NAMEREF; 392 goto finish; 393 } 394 if (!e.before) 395 e.before = -1; 396 } 397 if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set)) 398 set_cleanup_entries(set); 399 400 ret = adtfn(set, &e, &ext, &ext, flags); 401 402 finish: 403 if (e.refid != IPSET_INVALID_ID) 404 ip_set_put_byindex(map->net, e.refid); 405 if (adt != IPSET_ADD || ret) 406 ip_set_put_byindex(map->net, e.id); 407 408 return ip_set_eexist(ret, flags) ? 0 : ret; 409 } 410 411 static void 412 list_set_flush(struct ip_set *set) 413 { 414 struct list_set *map = set->data; 415 struct set_elem *e, *n; 416 417 list_for_each_entry_safe(e, n, &map->members, list) 418 list_set_del(set, e); 419 set->elements = 0; 420 set->ext_size = 0; 421 } 422 423 static void 424 list_set_destroy(struct ip_set *set) 425 { 426 struct list_set *map = set->data; 427 struct set_elem *e, *n; 428 429 if (SET_WITH_TIMEOUT(set)) 430 del_timer_sync(&map->gc); 431 432 list_for_each_entry_safe(e, n, &map->members, list) { 433 list_del(&e->list); 434 ip_set_put_byindex(map->net, e->id); 435 ip_set_ext_destroy(set, e); 436 kfree(e); 437 } 438 kfree(map); 439 440 set->data = NULL; 441 } 442 443 /* Calculate the actual memory size of the set data */ 444 static size_t 445 list_set_memsize(const struct list_set *map, size_t dsize) 446 { 447 struct set_elem *e; 448 u32 n = 0; 449 450 rcu_read_lock(); 451 list_for_each_entry_rcu(e, &map->members, list) 452 n++; 453 rcu_read_unlock(); 454 455 return (sizeof(*map) + n * dsize); 456 } 457 458 static int 459 list_set_head(struct ip_set *set, struct sk_buff *skb) 460 { 461 const struct list_set *map = set->data; 462 struct nlattr *nested; 463 size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size; 464 465 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 466 if (!nested) 467 goto nla_put_failure; 468 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || 469 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 470 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || 471 nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) 472 goto nla_put_failure; 473 if (unlikely(ip_set_put_flags(skb, set))) 474 goto nla_put_failure; 475 ipset_nest_end(skb, nested); 476 477 return 0; 478 nla_put_failure: 479 return -EMSGSIZE; 480 } 481 482 static int 483 list_set_list(const struct ip_set *set, 484 struct sk_buff *skb, struct netlink_callback *cb) 485 { 486 const struct list_set *map = set->data; 487 struct nlattr *atd, *nested; 488 u32 i = 0, first = cb->args[IPSET_CB_ARG0]; 489 struct set_elem *e; 490 int ret = 0; 491 492 atd = ipset_nest_start(skb, IPSET_ATTR_ADT); 493 if (!atd) 494 return -EMSGSIZE; 495 496 rcu_read_lock(); 497 list_for_each_entry_rcu(e, &map->members, list) { 498 if (i < first || 499 (SET_WITH_TIMEOUT(set) && 500 ip_set_timeout_expired(ext_timeout(e, set)))) { 501 i++; 502 continue; 503 } 504 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 505 if (!nested) 506 goto nla_put_failure; 507 if (nla_put_string(skb, IPSET_ATTR_NAME, 508 ip_set_name_byindex(map->net, e->id))) 509 goto nla_put_failure; 510 if (ip_set_put_extensions(skb, set, e, true)) 511 goto nla_put_failure; 512 ipset_nest_end(skb, nested); 513 i++; 514 } 515 516 ipset_nest_end(skb, atd); 517 /* Set listing finished */ 518 cb->args[IPSET_CB_ARG0] = 0; 519 goto out; 520 521 nla_put_failure: 522 nla_nest_cancel(skb, nested); 523 if (unlikely(i == first)) { 524 nla_nest_cancel(skb, atd); 525 cb->args[IPSET_CB_ARG0] = 0; 526 ret = -EMSGSIZE; 527 } else { 528 cb->args[IPSET_CB_ARG0] = i; 529 } 530 ipset_nest_end(skb, atd); 531 out: 532 rcu_read_unlock(); 533 return ret; 534 } 535 536 static bool 537 list_set_same_set(const struct ip_set *a, const struct ip_set *b) 538 { 539 const struct list_set *x = a->data; 540 const struct list_set *y = b->data; 541 542 return x->size == y->size && 543 a->timeout == b->timeout && 544 a->extensions == b->extensions; 545 } 546 547 static const struct ip_set_type_variant set_variant = { 548 .kadt = list_set_kadt, 549 .uadt = list_set_uadt, 550 .adt = { 551 [IPSET_ADD] = list_set_uadd, 552 [IPSET_DEL] = list_set_udel, 553 [IPSET_TEST] = list_set_utest, 554 }, 555 .destroy = list_set_destroy, 556 .flush = list_set_flush, 557 .head = list_set_head, 558 .list = list_set_list, 559 .same_set = list_set_same_set, 560 }; 561 562 static void 563 list_set_gc(struct timer_list *t) 564 { 565 struct list_set *map = from_timer(map, t, gc); 566 struct ip_set *set = map->set; 567 568 spin_lock_bh(&set->lock); 569 set_cleanup_entries(set); 570 spin_unlock_bh(&set->lock); 571 572 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 573 add_timer(&map->gc); 574 } 575 576 static void 577 list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) 578 { 579 struct list_set *map = set->data; 580 581 timer_setup(&map->gc, gc, 0); 582 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 583 } 584 585 /* Create list:set type of sets */ 586 587 static bool 588 init_list_set(struct net *net, struct ip_set *set, u32 size) 589 { 590 struct list_set *map; 591 592 map = kzalloc(sizeof(*map), GFP_KERNEL); 593 if (!map) 594 return false; 595 596 map->size = size; 597 map->net = net; 598 map->set = set; 599 INIT_LIST_HEAD(&map->members); 600 set->data = map; 601 602 return true; 603 } 604 605 static int 606 list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], 607 u32 flags) 608 { 609 u32 size = IP_SET_LIST_DEFAULT_SIZE; 610 611 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || 612 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 613 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 614 return -IPSET_ERR_PROTOCOL; 615 616 if (tb[IPSET_ATTR_SIZE]) 617 size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]); 618 if (size < IP_SET_LIST_MIN_SIZE) 619 size = IP_SET_LIST_MIN_SIZE; 620 621 set->variant = &set_variant; 622 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem), 623 __alignof__(struct set_elem)); 624 if (!init_list_set(net, set, size)) 625 return -ENOMEM; 626 if (tb[IPSET_ATTR_TIMEOUT]) { 627 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 628 list_set_gc_init(set, list_set_gc); 629 } 630 return 0; 631 } 632 633 static struct ip_set_type list_set_type __read_mostly = { 634 .name = "list:set", 635 .protocol = IPSET_PROTOCOL, 636 .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, 637 .dimension = IPSET_DIM_ONE, 638 .family = NFPROTO_UNSPEC, 639 .revision_min = IPSET_TYPE_REV_MIN, 640 .revision_max = IPSET_TYPE_REV_MAX, 641 .create = list_set_create, 642 .create_policy = { 643 [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, 644 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 645 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 646 }, 647 .adt_policy = { 648 [IPSET_ATTR_NAME] = { .type = NLA_STRING, 649 .len = IPSET_MAXNAMELEN }, 650 [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING, 651 .len = IPSET_MAXNAMELEN }, 652 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 653 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 654 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 655 [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, 656 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, 657 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, 658 .len = IPSET_MAX_COMMENT_SIZE }, 659 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, 660 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, 661 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, 662 }, 663 .me = THIS_MODULE, 664 }; 665 666 static int __init 667 list_set_init(void) 668 { 669 return ip_set_type_register(&list_set_type); 670 } 671 672 static void __exit 673 list_set_fini(void) 674 { 675 rcu_barrier(); 676 ip_set_type_unregister(&list_set_type); 677 } 678 679 module_init(list_set_init); 680 module_exit(list_set_fini); 681