1 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> 2 * Patrick Schaaf <bof@bof.de> 3 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License version 2 as 7 * published by the Free Software Foundation. 8 */ 9 10 /* Kernel module for IP set management */ 11 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/moduleparam.h> 15 #include <linux/ip.h> 16 #include <linux/skbuff.h> 17 #include <linux/spinlock.h> 18 #include <linux/rculist.h> 19 #include <net/netlink.h> 20 #include <net/net_namespace.h> 21 #include <net/netns/generic.h> 22 23 #include <linux/netfilter.h> 24 #include <linux/netfilter/x_tables.h> 25 #include <linux/netfilter/nfnetlink.h> 26 #include <linux/netfilter/ipset/ip_set.h> 27 28 static LIST_HEAD(ip_set_type_list); /* all registered set types */ 29 static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ 30 static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ 31 32 struct ip_set_net { 33 struct ip_set * __rcu *ip_set_list; /* all individual sets */ 34 ip_set_id_t ip_set_max; /* max number of sets */ 35 bool is_deleted; /* deleted by ip_set_net_exit */ 36 bool is_destroyed; /* all sets are destroyed */ 37 }; 38 39 static unsigned int ip_set_net_id __read_mostly; 40 41 static inline struct ip_set_net *ip_set_pernet(struct net *net) 42 { 43 return net_generic(net, ip_set_net_id); 44 } 45 46 #define IP_SET_INC 64 47 #define STRNCMP(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) 48 49 static unsigned int max_sets; 50 51 module_param(max_sets, int, 0600); 52 MODULE_PARM_DESC(max_sets, "maximal number of sets"); 53 MODULE_LICENSE("GPL"); 54 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 55 MODULE_DESCRIPTION("core IP set support"); 56 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); 57 58 /* When the nfnl mutex is held: */ 59 #define ip_set_dereference(p) \ 60 rcu_dereference_protected(p, 1) 61 #define ip_set(inst, id) \ 62 ip_set_dereference((inst)->ip_set_list)[id] 63 64 /* The set types are implemented in modules and registered set types 65 * can be found in ip_set_type_list. Adding/deleting types is 66 * serialized by ip_set_type_mutex. 67 */ 68 69 static inline void 70 ip_set_type_lock(void) 71 { 72 mutex_lock(&ip_set_type_mutex); 73 } 74 75 static inline void 76 ip_set_type_unlock(void) 77 { 78 mutex_unlock(&ip_set_type_mutex); 79 } 80 81 /* Register and deregister settype */ 82 83 static struct ip_set_type * 84 find_set_type(const char *name, u8 family, u8 revision) 85 { 86 struct ip_set_type *type; 87 88 list_for_each_entry_rcu(type, &ip_set_type_list, list) 89 if (STRNCMP(type->name, name) && 90 (type->family == family || 91 type->family == NFPROTO_UNSPEC) && 92 revision >= type->revision_min && 93 revision <= type->revision_max) 94 return type; 95 return NULL; 96 } 97 98 /* Unlock, try to load a set type module and lock again */ 99 static bool 100 load_settype(const char *name) 101 { 102 nfnl_unlock(NFNL_SUBSYS_IPSET); 103 pr_debug("try to load ip_set_%s\n", name); 104 if (request_module("ip_set_%s", name) < 0) { 105 pr_warn("Can't find ip_set type %s\n", name); 106 nfnl_lock(NFNL_SUBSYS_IPSET); 107 return false; 108 } 109 nfnl_lock(NFNL_SUBSYS_IPSET); 110 return true; 111 } 112 113 /* Find a set type and reference it */ 114 #define find_set_type_get(name, family, revision, found) \ 115 __find_set_type_get(name, family, revision, found, false) 116 117 static int 118 __find_set_type_get(const char *name, u8 family, u8 revision, 119 struct ip_set_type **found, bool retry) 120 { 121 struct ip_set_type *type; 122 int err; 123 124 if (retry && !load_settype(name)) 125 return -IPSET_ERR_FIND_TYPE; 126 127 rcu_read_lock(); 128 *found = find_set_type(name, family, revision); 129 if (*found) { 130 err = !try_module_get((*found)->me) ? -EFAULT : 0; 131 goto unlock; 132 } 133 /* Make sure the type is already loaded 134 * but we don't support the revision 135 */ 136 list_for_each_entry_rcu(type, &ip_set_type_list, list) 137 if (STRNCMP(type->name, name)) { 138 err = -IPSET_ERR_FIND_TYPE; 139 goto unlock; 140 } 141 rcu_read_unlock(); 142 143 return retry ? -IPSET_ERR_FIND_TYPE : 144 __find_set_type_get(name, family, revision, found, true); 145 146 unlock: 147 rcu_read_unlock(); 148 return err; 149 } 150 151 /* Find a given set type by name and family. 152 * If we succeeded, the supported minimal and maximum revisions are 153 * filled out. 154 */ 155 #define find_set_type_minmax(name, family, min, max) \ 156 __find_set_type_minmax(name, family, min, max, false) 157 158 static int 159 __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max, 160 bool retry) 161 { 162 struct ip_set_type *type; 163 bool found = false; 164 165 if (retry && !load_settype(name)) 166 return -IPSET_ERR_FIND_TYPE; 167 168 *min = 255; *max = 0; 169 rcu_read_lock(); 170 list_for_each_entry_rcu(type, &ip_set_type_list, list) 171 if (STRNCMP(type->name, name) && 172 (type->family == family || 173 type->family == NFPROTO_UNSPEC)) { 174 found = true; 175 if (type->revision_min < *min) 176 *min = type->revision_min; 177 if (type->revision_max > *max) 178 *max = type->revision_max; 179 } 180 rcu_read_unlock(); 181 if (found) 182 return 0; 183 184 return retry ? -IPSET_ERR_FIND_TYPE : 185 __find_set_type_minmax(name, family, min, max, true); 186 } 187 188 #define family_name(f) ((f) == NFPROTO_IPV4 ? "inet" : \ 189 (f) == NFPROTO_IPV6 ? "inet6" : "any") 190 191 /* Register a set type structure. The type is identified by 192 * the unique triple of name, family and revision. 193 */ 194 int 195 ip_set_type_register(struct ip_set_type *type) 196 { 197 int ret = 0; 198 199 if (type->protocol != IPSET_PROTOCOL) { 200 pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n", 201 type->name, family_name(type->family), 202 type->revision_min, type->revision_max, 203 type->protocol, IPSET_PROTOCOL); 204 return -EINVAL; 205 } 206 207 ip_set_type_lock(); 208 if (find_set_type(type->name, type->family, type->revision_min)) { 209 /* Duplicate! */ 210 pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", 211 type->name, family_name(type->family), 212 type->revision_min); 213 ip_set_type_unlock(); 214 return -EINVAL; 215 } 216 list_add_rcu(&type->list, &ip_set_type_list); 217 pr_debug("type %s, family %s, revision %u:%u registered.\n", 218 type->name, family_name(type->family), 219 type->revision_min, type->revision_max); 220 ip_set_type_unlock(); 221 222 return ret; 223 } 224 EXPORT_SYMBOL_GPL(ip_set_type_register); 225 226 /* Unregister a set type. There's a small race with ip_set_create */ 227 void 228 ip_set_type_unregister(struct ip_set_type *type) 229 { 230 ip_set_type_lock(); 231 if (!find_set_type(type->name, type->family, type->revision_min)) { 232 pr_warn("ip_set type %s, family %s with revision min %u not registered\n", 233 type->name, family_name(type->family), 234 type->revision_min); 235 ip_set_type_unlock(); 236 return; 237 } 238 list_del_rcu(&type->list); 239 pr_debug("type %s, family %s with revision min %u unregistered.\n", 240 type->name, family_name(type->family), type->revision_min); 241 ip_set_type_unlock(); 242 243 synchronize_rcu(); 244 } 245 EXPORT_SYMBOL_GPL(ip_set_type_unregister); 246 247 /* Utility functions */ 248 void * 249 ip_set_alloc(size_t size) 250 { 251 void *members = NULL; 252 253 if (size < KMALLOC_MAX_SIZE) 254 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 255 256 if (members) { 257 pr_debug("%p: allocated with kmalloc\n", members); 258 return members; 259 } 260 261 members = vzalloc(size); 262 if (!members) 263 return NULL; 264 pr_debug("%p: allocated with vmalloc\n", members); 265 266 return members; 267 } 268 EXPORT_SYMBOL_GPL(ip_set_alloc); 269 270 void 271 ip_set_free(void *members) 272 { 273 pr_debug("%p: free with %s\n", members, 274 is_vmalloc_addr(members) ? "vfree" : "kfree"); 275 kvfree(members); 276 } 277 EXPORT_SYMBOL_GPL(ip_set_free); 278 279 static inline bool 280 flag_nested(const struct nlattr *nla) 281 { 282 return nla->nla_type & NLA_F_NESTED; 283 } 284 285 static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { 286 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, 287 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, 288 .len = sizeof(struct in6_addr) }, 289 }; 290 291 int 292 ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) 293 { 294 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; 295 296 if (unlikely(!flag_nested(nla))) 297 return -IPSET_ERR_PROTOCOL; 298 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, 299 ipaddr_policy, NULL)) 300 return -IPSET_ERR_PROTOCOL; 301 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) 302 return -IPSET_ERR_PROTOCOL; 303 304 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]); 305 return 0; 306 } 307 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); 308 309 int 310 ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) 311 { 312 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; 313 314 if (unlikely(!flag_nested(nla))) 315 return -IPSET_ERR_PROTOCOL; 316 317 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, 318 ipaddr_policy, NULL)) 319 return -IPSET_ERR_PROTOCOL; 320 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) 321 return -IPSET_ERR_PROTOCOL; 322 323 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), 324 sizeof(struct in6_addr)); 325 return 0; 326 } 327 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); 328 329 typedef void (*destroyer)(struct ip_set *, void *); 330 /* ipset data extension types, in size order */ 331 332 const struct ip_set_ext_type ip_set_extensions[] = { 333 [IPSET_EXT_ID_COUNTER] = { 334 .type = IPSET_EXT_COUNTER, 335 .flag = IPSET_FLAG_WITH_COUNTERS, 336 .len = sizeof(struct ip_set_counter), 337 .align = __alignof__(struct ip_set_counter), 338 }, 339 [IPSET_EXT_ID_TIMEOUT] = { 340 .type = IPSET_EXT_TIMEOUT, 341 .len = sizeof(unsigned long), 342 .align = __alignof__(unsigned long), 343 }, 344 [IPSET_EXT_ID_SKBINFO] = { 345 .type = IPSET_EXT_SKBINFO, 346 .flag = IPSET_FLAG_WITH_SKBINFO, 347 .len = sizeof(struct ip_set_skbinfo), 348 .align = __alignof__(struct ip_set_skbinfo), 349 }, 350 [IPSET_EXT_ID_COMMENT] = { 351 .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, 352 .flag = IPSET_FLAG_WITH_COMMENT, 353 .len = sizeof(struct ip_set_comment), 354 .align = __alignof__(struct ip_set_comment), 355 .destroy = (destroyer) ip_set_comment_free, 356 }, 357 }; 358 EXPORT_SYMBOL_GPL(ip_set_extensions); 359 360 static inline bool 361 add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[]) 362 { 363 return ip_set_extensions[id].flag ? 364 (flags & ip_set_extensions[id].flag) : 365 !!tb[IPSET_ATTR_TIMEOUT]; 366 } 367 368 size_t 369 ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, 370 size_t align) 371 { 372 enum ip_set_ext_id id; 373 u32 cadt_flags = 0; 374 375 if (tb[IPSET_ATTR_CADT_FLAGS]) 376 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 377 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) 378 set->flags |= IPSET_CREATE_FLAG_FORCEADD; 379 if (!align) 380 align = 1; 381 for (id = 0; id < IPSET_EXT_ID_MAX; id++) { 382 if (!add_extension(id, cadt_flags, tb)) 383 continue; 384 len = ALIGN(len, ip_set_extensions[id].align); 385 set->offset[id] = len; 386 set->extensions |= ip_set_extensions[id].type; 387 len += ip_set_extensions[id].len; 388 } 389 return ALIGN(len, align); 390 } 391 EXPORT_SYMBOL_GPL(ip_set_elem_len); 392 393 int 394 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], 395 struct ip_set_ext *ext) 396 { 397 u64 fullmark; 398 399 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 400 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || 401 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || 402 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || 403 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || 404 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) 405 return -IPSET_ERR_PROTOCOL; 406 407 if (tb[IPSET_ATTR_TIMEOUT]) { 408 if (!SET_WITH_TIMEOUT(set)) 409 return -IPSET_ERR_TIMEOUT; 410 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 411 } 412 if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) { 413 if (!SET_WITH_COUNTER(set)) 414 return -IPSET_ERR_COUNTER; 415 if (tb[IPSET_ATTR_BYTES]) 416 ext->bytes = be64_to_cpu(nla_get_be64( 417 tb[IPSET_ATTR_BYTES])); 418 if (tb[IPSET_ATTR_PACKETS]) 419 ext->packets = be64_to_cpu(nla_get_be64( 420 tb[IPSET_ATTR_PACKETS])); 421 } 422 if (tb[IPSET_ATTR_COMMENT]) { 423 if (!SET_WITH_COMMENT(set)) 424 return -IPSET_ERR_COMMENT; 425 ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); 426 } 427 if (tb[IPSET_ATTR_SKBMARK]) { 428 if (!SET_WITH_SKBINFO(set)) 429 return -IPSET_ERR_SKBINFO; 430 fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); 431 ext->skbinfo.skbmark = fullmark >> 32; 432 ext->skbinfo.skbmarkmask = fullmark & 0xffffffff; 433 } 434 if (tb[IPSET_ATTR_SKBPRIO]) { 435 if (!SET_WITH_SKBINFO(set)) 436 return -IPSET_ERR_SKBINFO; 437 ext->skbinfo.skbprio = 438 be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO])); 439 } 440 if (tb[IPSET_ATTR_SKBQUEUE]) { 441 if (!SET_WITH_SKBINFO(set)) 442 return -IPSET_ERR_SKBINFO; 443 ext->skbinfo.skbqueue = 444 be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE])); 445 } 446 return 0; 447 } 448 EXPORT_SYMBOL_GPL(ip_set_get_extensions); 449 450 int 451 ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, 452 const void *e, bool active) 453 { 454 if (SET_WITH_TIMEOUT(set)) { 455 unsigned long *timeout = ext_timeout(e, set); 456 457 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, 458 htonl(active ? ip_set_timeout_get(timeout) 459 : *timeout))) 460 return -EMSGSIZE; 461 } 462 if (SET_WITH_COUNTER(set) && 463 ip_set_put_counter(skb, ext_counter(e, set))) 464 return -EMSGSIZE; 465 if (SET_WITH_COMMENT(set) && 466 ip_set_put_comment(skb, ext_comment(e, set))) 467 return -EMSGSIZE; 468 if (SET_WITH_SKBINFO(set) && 469 ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) 470 return -EMSGSIZE; 471 return 0; 472 } 473 EXPORT_SYMBOL_GPL(ip_set_put_extensions); 474 475 /* Creating/destroying/renaming/swapping affect the existence and 476 * the properties of a set. All of these can be executed from userspace 477 * only and serialized by the nfnl mutex indirectly from nfnetlink. 478 * 479 * Sets are identified by their index in ip_set_list and the index 480 * is used by the external references (set/SET netfilter modules). 481 * 482 * The set behind an index may change by swapping only, from userspace. 483 */ 484 485 static inline void 486 __ip_set_get(struct ip_set *set) 487 { 488 write_lock_bh(&ip_set_ref_lock); 489 set->ref++; 490 write_unlock_bh(&ip_set_ref_lock); 491 } 492 493 static inline void 494 __ip_set_put(struct ip_set *set) 495 { 496 write_lock_bh(&ip_set_ref_lock); 497 BUG_ON(set->ref == 0); 498 set->ref--; 499 write_unlock_bh(&ip_set_ref_lock); 500 } 501 502 /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need 503 * a separate reference counter 504 */ 505 static inline void 506 __ip_set_put_netlink(struct ip_set *set) 507 { 508 write_lock_bh(&ip_set_ref_lock); 509 BUG_ON(set->ref_netlink == 0); 510 set->ref_netlink--; 511 write_unlock_bh(&ip_set_ref_lock); 512 } 513 514 /* Add, del and test set entries from kernel. 515 * 516 * The set behind the index must exist and must be referenced 517 * so it can't be destroyed (or changed) under our foot. 518 */ 519 520 static inline struct ip_set * 521 ip_set_rcu_get(struct net *net, ip_set_id_t index) 522 { 523 struct ip_set *set; 524 struct ip_set_net *inst = ip_set_pernet(net); 525 526 rcu_read_lock(); 527 /* ip_set_list itself needs to be protected */ 528 set = rcu_dereference(inst->ip_set_list)[index]; 529 rcu_read_unlock(); 530 531 return set; 532 } 533 534 int 535 ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 536 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 537 { 538 struct ip_set *set = ip_set_rcu_get(xt_net(par), index); 539 int ret = 0; 540 541 BUG_ON(!set); 542 pr_debug("set %s, index %u\n", set->name, index); 543 544 if (opt->dim < set->type->dimension || 545 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 546 return 0; 547 548 rcu_read_lock_bh(); 549 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); 550 rcu_read_unlock_bh(); 551 552 if (ret == -EAGAIN) { 553 /* Type requests element to be completed */ 554 pr_debug("element must be completed, ADD is triggered\n"); 555 spin_lock_bh(&set->lock); 556 set->variant->kadt(set, skb, par, IPSET_ADD, opt); 557 spin_unlock_bh(&set->lock); 558 ret = 1; 559 } else { 560 /* --return-nomatch: invert matched element */ 561 if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) && 562 (set->type->features & IPSET_TYPE_NOMATCH) && 563 (ret > 0 || ret == -ENOTEMPTY)) 564 ret = -ret; 565 } 566 567 /* Convert error codes to nomatch */ 568 return (ret < 0 ? 0 : ret); 569 } 570 EXPORT_SYMBOL_GPL(ip_set_test); 571 572 int 573 ip_set_add(ip_set_id_t index, const struct sk_buff *skb, 574 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 575 { 576 struct ip_set *set = ip_set_rcu_get(xt_net(par), index); 577 int ret; 578 579 BUG_ON(!set); 580 pr_debug("set %s, index %u\n", set->name, index); 581 582 if (opt->dim < set->type->dimension || 583 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 584 return -IPSET_ERR_TYPE_MISMATCH; 585 586 spin_lock_bh(&set->lock); 587 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); 588 spin_unlock_bh(&set->lock); 589 590 return ret; 591 } 592 EXPORT_SYMBOL_GPL(ip_set_add); 593 594 int 595 ip_set_del(ip_set_id_t index, const struct sk_buff *skb, 596 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 597 { 598 struct ip_set *set = ip_set_rcu_get(xt_net(par), index); 599 int ret = 0; 600 601 BUG_ON(!set); 602 pr_debug("set %s, index %u\n", set->name, index); 603 604 if (opt->dim < set->type->dimension || 605 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 606 return -IPSET_ERR_TYPE_MISMATCH; 607 608 spin_lock_bh(&set->lock); 609 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); 610 spin_unlock_bh(&set->lock); 611 612 return ret; 613 } 614 EXPORT_SYMBOL_GPL(ip_set_del); 615 616 /* Find set by name, reference it once. The reference makes sure the 617 * thing pointed to, does not go away under our feet. 618 * 619 */ 620 ip_set_id_t 621 ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) 622 { 623 ip_set_id_t i, index = IPSET_INVALID_ID; 624 struct ip_set *s; 625 struct ip_set_net *inst = ip_set_pernet(net); 626 627 rcu_read_lock(); 628 for (i = 0; i < inst->ip_set_max; i++) { 629 s = rcu_dereference(inst->ip_set_list)[i]; 630 if (s && STRNCMP(s->name, name)) { 631 __ip_set_get(s); 632 index = i; 633 *set = s; 634 break; 635 } 636 } 637 rcu_read_unlock(); 638 639 return index; 640 } 641 EXPORT_SYMBOL_GPL(ip_set_get_byname); 642 643 /* If the given set pointer points to a valid set, decrement 644 * reference count by 1. The caller shall not assume the index 645 * to be valid, after calling this function. 646 * 647 */ 648 649 static inline void 650 __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) 651 { 652 struct ip_set *set; 653 654 rcu_read_lock(); 655 set = rcu_dereference(inst->ip_set_list)[index]; 656 if (set) 657 __ip_set_put(set); 658 rcu_read_unlock(); 659 } 660 661 void 662 ip_set_put_byindex(struct net *net, ip_set_id_t index) 663 { 664 struct ip_set_net *inst = ip_set_pernet(net); 665 666 __ip_set_put_byindex(inst, index); 667 } 668 EXPORT_SYMBOL_GPL(ip_set_put_byindex); 669 670 /* Get the name of a set behind a set index. 671 * We assume the set is referenced, so it does exist and 672 * can't be destroyed. The set cannot be renamed due to 673 * the referencing either. 674 * 675 */ 676 const char * 677 ip_set_name_byindex(struct net *net, ip_set_id_t index) 678 { 679 const struct ip_set *set = ip_set_rcu_get(net, index); 680 681 BUG_ON(!set); 682 BUG_ON(set->ref == 0); 683 684 /* Referenced, so it's safe */ 685 return set->name; 686 } 687 EXPORT_SYMBOL_GPL(ip_set_name_byindex); 688 689 /* Routines to call by external subsystems, which do not 690 * call nfnl_lock for us. 691 */ 692 693 /* Find set by index, reference it once. The reference makes sure the 694 * thing pointed to, does not go away under our feet. 695 * 696 * The nfnl mutex is used in the function. 697 */ 698 ip_set_id_t 699 ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) 700 { 701 struct ip_set *set; 702 struct ip_set_net *inst = ip_set_pernet(net); 703 704 if (index >= inst->ip_set_max) 705 return IPSET_INVALID_ID; 706 707 nfnl_lock(NFNL_SUBSYS_IPSET); 708 set = ip_set(inst, index); 709 if (set) 710 __ip_set_get(set); 711 else 712 index = IPSET_INVALID_ID; 713 nfnl_unlock(NFNL_SUBSYS_IPSET); 714 715 return index; 716 } 717 EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); 718 719 /* If the given set pointer points to a valid set, decrement 720 * reference count by 1. The caller shall not assume the index 721 * to be valid, after calling this function. 722 * 723 * The nfnl mutex is used in the function. 724 */ 725 void 726 ip_set_nfnl_put(struct net *net, ip_set_id_t index) 727 { 728 struct ip_set *set; 729 struct ip_set_net *inst = ip_set_pernet(net); 730 731 nfnl_lock(NFNL_SUBSYS_IPSET); 732 if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ 733 set = ip_set(inst, index); 734 if (set) 735 __ip_set_put(set); 736 } 737 nfnl_unlock(NFNL_SUBSYS_IPSET); 738 } 739 EXPORT_SYMBOL_GPL(ip_set_nfnl_put); 740 741 /* Communication protocol with userspace over netlink. 742 * 743 * The commands are serialized by the nfnl mutex. 744 */ 745 746 static inline bool 747 protocol_failed(const struct nlattr * const tb[]) 748 { 749 return !tb[IPSET_ATTR_PROTOCOL] || 750 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; 751 } 752 753 static inline u32 754 flag_exist(const struct nlmsghdr *nlh) 755 { 756 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; 757 } 758 759 static struct nlmsghdr * 760 start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, 761 enum ipset_cmd cmd) 762 { 763 struct nlmsghdr *nlh; 764 struct nfgenmsg *nfmsg; 765 766 nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), 767 sizeof(*nfmsg), flags); 768 if (!nlh) 769 return NULL; 770 771 nfmsg = nlmsg_data(nlh); 772 nfmsg->nfgen_family = NFPROTO_IPV4; 773 nfmsg->version = NFNETLINK_V0; 774 nfmsg->res_id = 0; 775 776 return nlh; 777 } 778 779 /* Create a set */ 780 781 static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { 782 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 783 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 784 .len = IPSET_MAXNAMELEN - 1 }, 785 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, 786 .len = IPSET_MAXNAMELEN - 1}, 787 [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, 788 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, 789 [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, 790 }; 791 792 static struct ip_set * 793 find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) 794 { 795 struct ip_set *set = NULL; 796 ip_set_id_t i; 797 798 *id = IPSET_INVALID_ID; 799 for (i = 0; i < inst->ip_set_max; i++) { 800 set = ip_set(inst, i); 801 if (set && STRNCMP(set->name, name)) { 802 *id = i; 803 break; 804 } 805 } 806 return (*id == IPSET_INVALID_ID ? NULL : set); 807 } 808 809 static inline struct ip_set * 810 find_set(struct ip_set_net *inst, const char *name) 811 { 812 ip_set_id_t id; 813 814 return find_set_and_id(inst, name, &id); 815 } 816 817 static int 818 find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, 819 struct ip_set **set) 820 { 821 struct ip_set *s; 822 ip_set_id_t i; 823 824 *index = IPSET_INVALID_ID; 825 for (i = 0; i < inst->ip_set_max; i++) { 826 s = ip_set(inst, i); 827 if (!s) { 828 if (*index == IPSET_INVALID_ID) 829 *index = i; 830 } else if (STRNCMP(name, s->name)) { 831 /* Name clash */ 832 *set = s; 833 return -EEXIST; 834 } 835 } 836 if (*index == IPSET_INVALID_ID) 837 /* No free slot remained */ 838 return -IPSET_ERR_MAX_SETS; 839 return 0; 840 } 841 842 static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb, 843 const struct nlmsghdr *nlh, 844 const struct nlattr * const attr[], 845 struct netlink_ext_ack *extack) 846 { 847 return -EOPNOTSUPP; 848 } 849 850 static int ip_set_create(struct net *net, struct sock *ctnl, 851 struct sk_buff *skb, const struct nlmsghdr *nlh, 852 const struct nlattr * const attr[], 853 struct netlink_ext_ack *extack) 854 { 855 struct ip_set_net *inst = ip_set_pernet(net); 856 struct ip_set *set, *clash = NULL; 857 ip_set_id_t index = IPSET_INVALID_ID; 858 struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {}; 859 const char *name, *typename; 860 u8 family, revision; 861 u32 flags = flag_exist(nlh); 862 int ret = 0; 863 864 if (unlikely(protocol_failed(attr) || 865 !attr[IPSET_ATTR_SETNAME] || 866 !attr[IPSET_ATTR_TYPENAME] || 867 !attr[IPSET_ATTR_REVISION] || 868 !attr[IPSET_ATTR_FAMILY] || 869 (attr[IPSET_ATTR_DATA] && 870 !flag_nested(attr[IPSET_ATTR_DATA])))) 871 return -IPSET_ERR_PROTOCOL; 872 873 name = nla_data(attr[IPSET_ATTR_SETNAME]); 874 typename = nla_data(attr[IPSET_ATTR_TYPENAME]); 875 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); 876 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); 877 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", 878 name, typename, family_name(family), revision); 879 880 /* First, and without any locks, allocate and initialize 881 * a normal base set structure. 882 */ 883 set = kzalloc(sizeof(*set), GFP_KERNEL); 884 if (!set) 885 return -ENOMEM; 886 spin_lock_init(&set->lock); 887 strlcpy(set->name, name, IPSET_MAXNAMELEN); 888 set->family = family; 889 set->revision = revision; 890 891 /* Next, check that we know the type, and take 892 * a reference on the type, to make sure it stays available 893 * while constructing our new set. 894 * 895 * After referencing the type, we try to create the type 896 * specific part of the set without holding any locks. 897 */ 898 ret = find_set_type_get(typename, family, revision, &set->type); 899 if (ret) 900 goto out; 901 902 /* Without holding any locks, create private part. */ 903 if (attr[IPSET_ATTR_DATA] && 904 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], 905 set->type->create_policy, NULL)) { 906 ret = -IPSET_ERR_PROTOCOL; 907 goto put_out; 908 } 909 910 ret = set->type->create(net, set, tb, flags); 911 if (ret != 0) 912 goto put_out; 913 914 /* BTW, ret==0 here. */ 915 916 /* Here, we have a valid, constructed set and we are protected 917 * by the nfnl mutex. Find the first free index in ip_set_list 918 * and check clashing. 919 */ 920 ret = find_free_id(inst, set->name, &index, &clash); 921 if (ret == -EEXIST) { 922 /* If this is the same set and requested, ignore error */ 923 if ((flags & IPSET_FLAG_EXIST) && 924 STRNCMP(set->type->name, clash->type->name) && 925 set->type->family == clash->type->family && 926 set->type->revision_min == clash->type->revision_min && 927 set->type->revision_max == clash->type->revision_max && 928 set->variant->same_set(set, clash)) 929 ret = 0; 930 goto cleanup; 931 } else if (ret == -IPSET_ERR_MAX_SETS) { 932 struct ip_set **list, **tmp; 933 ip_set_id_t i = inst->ip_set_max + IP_SET_INC; 934 935 if (i < inst->ip_set_max || i == IPSET_INVALID_ID) 936 /* Wraparound */ 937 goto cleanup; 938 939 list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); 940 if (!list) 941 goto cleanup; 942 /* nfnl mutex is held, both lists are valid */ 943 tmp = ip_set_dereference(inst->ip_set_list); 944 memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); 945 rcu_assign_pointer(inst->ip_set_list, list); 946 /* Make sure all current packets have passed through */ 947 synchronize_net(); 948 /* Use new list */ 949 index = inst->ip_set_max; 950 inst->ip_set_max = i; 951 kfree(tmp); 952 ret = 0; 953 } else if (ret) { 954 goto cleanup; 955 } 956 957 /* Finally! Add our shiny new set to the list, and be done. */ 958 pr_debug("create: '%s' created with index %u!\n", set->name, index); 959 ip_set(inst, index) = set; 960 961 return ret; 962 963 cleanup: 964 set->variant->destroy(set); 965 put_out: 966 module_put(set->type->me); 967 out: 968 kfree(set); 969 return ret; 970 } 971 972 /* Destroy sets */ 973 974 static const struct nla_policy 975 ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { 976 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 977 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 978 .len = IPSET_MAXNAMELEN - 1 }, 979 }; 980 981 static void 982 ip_set_destroy_set(struct ip_set *set) 983 { 984 pr_debug("set: %s\n", set->name); 985 986 /* Must call it without holding any lock */ 987 set->variant->destroy(set); 988 module_put(set->type->me); 989 kfree(set); 990 } 991 992 static int ip_set_destroy(struct net *net, struct sock *ctnl, 993 struct sk_buff *skb, const struct nlmsghdr *nlh, 994 const struct nlattr * const attr[], 995 struct netlink_ext_ack *extack) 996 { 997 struct ip_set_net *inst = ip_set_pernet(net); 998 struct ip_set *s; 999 ip_set_id_t i; 1000 int ret = 0; 1001 1002 if (unlikely(protocol_failed(attr))) 1003 return -IPSET_ERR_PROTOCOL; 1004 1005 /* Must wait for flush to be really finished in list:set */ 1006 rcu_barrier(); 1007 1008 /* Commands are serialized and references are 1009 * protected by the ip_set_ref_lock. 1010 * External systems (i.e. xt_set) must call 1011 * ip_set_put|get_nfnl_* functions, that way we 1012 * can safely check references here. 1013 * 1014 * list:set timer can only decrement the reference 1015 * counter, so if it's already zero, we can proceed 1016 * without holding the lock. 1017 */ 1018 read_lock_bh(&ip_set_ref_lock); 1019 if (!attr[IPSET_ATTR_SETNAME]) { 1020 for (i = 0; i < inst->ip_set_max; i++) { 1021 s = ip_set(inst, i); 1022 if (s && (s->ref || s->ref_netlink)) { 1023 ret = -IPSET_ERR_BUSY; 1024 goto out; 1025 } 1026 } 1027 inst->is_destroyed = true; 1028 read_unlock_bh(&ip_set_ref_lock); 1029 for (i = 0; i < inst->ip_set_max; i++) { 1030 s = ip_set(inst, i); 1031 if (s) { 1032 ip_set(inst, i) = NULL; 1033 ip_set_destroy_set(s); 1034 } 1035 } 1036 /* Modified by ip_set_destroy() only, which is serialized */ 1037 inst->is_destroyed = false; 1038 } else { 1039 s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), 1040 &i); 1041 if (!s) { 1042 ret = -ENOENT; 1043 goto out; 1044 } else if (s->ref || s->ref_netlink) { 1045 ret = -IPSET_ERR_BUSY; 1046 goto out; 1047 } 1048 ip_set(inst, i) = NULL; 1049 read_unlock_bh(&ip_set_ref_lock); 1050 1051 ip_set_destroy_set(s); 1052 } 1053 return 0; 1054 out: 1055 read_unlock_bh(&ip_set_ref_lock); 1056 return ret; 1057 } 1058 1059 /* Flush sets */ 1060 1061 static void 1062 ip_set_flush_set(struct ip_set *set) 1063 { 1064 pr_debug("set: %s\n", set->name); 1065 1066 spin_lock_bh(&set->lock); 1067 set->variant->flush(set); 1068 spin_unlock_bh(&set->lock); 1069 } 1070 1071 static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, 1072 const struct nlmsghdr *nlh, 1073 const struct nlattr * const attr[], 1074 struct netlink_ext_ack *extack) 1075 { 1076 struct ip_set_net *inst = ip_set_pernet(net); 1077 struct ip_set *s; 1078 ip_set_id_t i; 1079 1080 if (unlikely(protocol_failed(attr))) 1081 return -IPSET_ERR_PROTOCOL; 1082 1083 if (!attr[IPSET_ATTR_SETNAME]) { 1084 for (i = 0; i < inst->ip_set_max; i++) { 1085 s = ip_set(inst, i); 1086 if (s) 1087 ip_set_flush_set(s); 1088 } 1089 } else { 1090 s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1091 if (!s) 1092 return -ENOENT; 1093 1094 ip_set_flush_set(s); 1095 } 1096 1097 return 0; 1098 } 1099 1100 /* Rename a set */ 1101 1102 static const struct nla_policy 1103 ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { 1104 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1105 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 1106 .len = IPSET_MAXNAMELEN - 1 }, 1107 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING, 1108 .len = IPSET_MAXNAMELEN - 1 }, 1109 }; 1110 1111 static int ip_set_rename(struct net *net, struct sock *ctnl, 1112 struct sk_buff *skb, const struct nlmsghdr *nlh, 1113 const struct nlattr * const attr[], 1114 struct netlink_ext_ack *extack) 1115 { 1116 struct ip_set_net *inst = ip_set_pernet(net); 1117 struct ip_set *set, *s; 1118 const char *name2; 1119 ip_set_id_t i; 1120 int ret = 0; 1121 1122 if (unlikely(protocol_failed(attr) || 1123 !attr[IPSET_ATTR_SETNAME] || 1124 !attr[IPSET_ATTR_SETNAME2])) 1125 return -IPSET_ERR_PROTOCOL; 1126 1127 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1128 if (!set) 1129 return -ENOENT; 1130 1131 read_lock_bh(&ip_set_ref_lock); 1132 if (set->ref != 0) { 1133 ret = -IPSET_ERR_REFERENCED; 1134 goto out; 1135 } 1136 1137 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); 1138 for (i = 0; i < inst->ip_set_max; i++) { 1139 s = ip_set(inst, i); 1140 if (s && STRNCMP(s->name, name2)) { 1141 ret = -IPSET_ERR_EXIST_SETNAME2; 1142 goto out; 1143 } 1144 } 1145 strncpy(set->name, name2, IPSET_MAXNAMELEN); 1146 1147 out: 1148 read_unlock_bh(&ip_set_ref_lock); 1149 return ret; 1150 } 1151 1152 /* Swap two sets so that name/index points to the other. 1153 * References and set names are also swapped. 1154 * 1155 * The commands are serialized by the nfnl mutex and references are 1156 * protected by the ip_set_ref_lock. The kernel interfaces 1157 * do not hold the mutex but the pointer settings are atomic 1158 * so the ip_set_list always contains valid pointers to the sets. 1159 */ 1160 1161 static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, 1162 const struct nlmsghdr *nlh, 1163 const struct nlattr * const attr[], 1164 struct netlink_ext_ack *extack) 1165 { 1166 struct ip_set_net *inst = ip_set_pernet(net); 1167 struct ip_set *from, *to; 1168 ip_set_id_t from_id, to_id; 1169 char from_name[IPSET_MAXNAMELEN]; 1170 1171 if (unlikely(protocol_failed(attr) || 1172 !attr[IPSET_ATTR_SETNAME] || 1173 !attr[IPSET_ATTR_SETNAME2])) 1174 return -IPSET_ERR_PROTOCOL; 1175 1176 from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), 1177 &from_id); 1178 if (!from) 1179 return -ENOENT; 1180 1181 to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), 1182 &to_id); 1183 if (!to) 1184 return -IPSET_ERR_EXIST_SETNAME2; 1185 1186 /* Features must not change. 1187 * Not an artifical restriction anymore, as we must prevent 1188 * possible loops created by swapping in setlist type of sets. 1189 */ 1190 if (!(from->type->features == to->type->features && 1191 from->family == to->family)) 1192 return -IPSET_ERR_TYPE_MISMATCH; 1193 1194 if (from->ref_netlink || to->ref_netlink) 1195 return -EBUSY; 1196 1197 strncpy(from_name, from->name, IPSET_MAXNAMELEN); 1198 strncpy(from->name, to->name, IPSET_MAXNAMELEN); 1199 strncpy(to->name, from_name, IPSET_MAXNAMELEN); 1200 1201 write_lock_bh(&ip_set_ref_lock); 1202 swap(from->ref, to->ref); 1203 ip_set(inst, from_id) = to; 1204 ip_set(inst, to_id) = from; 1205 write_unlock_bh(&ip_set_ref_lock); 1206 1207 return 0; 1208 } 1209 1210 /* List/save set data */ 1211 1212 #define DUMP_INIT 0 1213 #define DUMP_ALL 1 1214 #define DUMP_ONE 2 1215 #define DUMP_LAST 3 1216 1217 #define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF) 1218 #define DUMP_FLAGS(arg) (((u32)(arg)) >> 16) 1219 1220 static int 1221 ip_set_dump_done(struct netlink_callback *cb) 1222 { 1223 if (cb->args[IPSET_CB_ARG0]) { 1224 struct ip_set_net *inst = 1225 (struct ip_set_net *)cb->args[IPSET_CB_NET]; 1226 ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; 1227 struct ip_set *set = ip_set(inst, index); 1228 1229 if (set->variant->uref) 1230 set->variant->uref(set, cb, false); 1231 pr_debug("release set %s\n", set->name); 1232 __ip_set_put_netlink(set); 1233 } 1234 return 0; 1235 } 1236 1237 static inline void 1238 dump_attrs(struct nlmsghdr *nlh) 1239 { 1240 const struct nlattr *attr; 1241 int rem; 1242 1243 pr_debug("dump nlmsg\n"); 1244 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { 1245 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len); 1246 } 1247 } 1248 1249 static int 1250 dump_init(struct netlink_callback *cb, struct ip_set_net *inst) 1251 { 1252 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); 1253 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); 1254 struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; 1255 struct nlattr *attr = (void *)nlh + min_len; 1256 u32 dump_type; 1257 ip_set_id_t index; 1258 1259 /* Second pass, so parser can't fail */ 1260 nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, 1261 ip_set_setname_policy, NULL); 1262 1263 if (cda[IPSET_ATTR_SETNAME]) { 1264 struct ip_set *set; 1265 1266 set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), 1267 &index); 1268 if (!set) 1269 return -ENOENT; 1270 1271 dump_type = DUMP_ONE; 1272 cb->args[IPSET_CB_INDEX] = index; 1273 } else { 1274 dump_type = DUMP_ALL; 1275 } 1276 1277 if (cda[IPSET_ATTR_FLAGS]) { 1278 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); 1279 1280 dump_type |= (f << 16); 1281 } 1282 cb->args[IPSET_CB_NET] = (unsigned long)inst; 1283 cb->args[IPSET_CB_DUMP] = dump_type; 1284 1285 return 0; 1286 } 1287 1288 static int 1289 ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) 1290 { 1291 ip_set_id_t index = IPSET_INVALID_ID, max; 1292 struct ip_set *set = NULL; 1293 struct nlmsghdr *nlh = NULL; 1294 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; 1295 struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); 1296 u32 dump_type, dump_flags; 1297 bool is_destroyed; 1298 int ret = 0; 1299 1300 if (!cb->args[IPSET_CB_DUMP]) { 1301 ret = dump_init(cb, inst); 1302 if (ret < 0) { 1303 nlh = nlmsg_hdr(cb->skb); 1304 /* We have to create and send the error message 1305 * manually :-( 1306 */ 1307 if (nlh->nlmsg_flags & NLM_F_ACK) 1308 netlink_ack(cb->skb, nlh, ret, NULL); 1309 return ret; 1310 } 1311 } 1312 1313 if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) 1314 goto out; 1315 1316 dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]); 1317 dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]); 1318 max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1 1319 : inst->ip_set_max; 1320 dump_last: 1321 pr_debug("dump type, flag: %u %u index: %ld\n", 1322 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); 1323 for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { 1324 index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; 1325 write_lock_bh(&ip_set_ref_lock); 1326 set = ip_set(inst, index); 1327 is_destroyed = inst->is_destroyed; 1328 if (!set || is_destroyed) { 1329 write_unlock_bh(&ip_set_ref_lock); 1330 if (dump_type == DUMP_ONE) { 1331 ret = -ENOENT; 1332 goto out; 1333 } 1334 if (is_destroyed) { 1335 /* All sets are just being destroyed */ 1336 ret = 0; 1337 goto out; 1338 } 1339 continue; 1340 } 1341 /* When dumping all sets, we must dump "sorted" 1342 * so that lists (unions of sets) are dumped last. 1343 */ 1344 if (dump_type != DUMP_ONE && 1345 ((dump_type == DUMP_ALL) == 1346 !!(set->type->features & IPSET_DUMP_LAST))) { 1347 write_unlock_bh(&ip_set_ref_lock); 1348 continue; 1349 } 1350 pr_debug("List set: %s\n", set->name); 1351 if (!cb->args[IPSET_CB_ARG0]) { 1352 /* Start listing: make sure set won't be destroyed */ 1353 pr_debug("reference set\n"); 1354 set->ref_netlink++; 1355 } 1356 write_unlock_bh(&ip_set_ref_lock); 1357 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, 1358 cb->nlh->nlmsg_seq, flags, 1359 IPSET_CMD_LIST); 1360 if (!nlh) { 1361 ret = -EMSGSIZE; 1362 goto release_refcount; 1363 } 1364 if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || 1365 nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) 1366 goto nla_put_failure; 1367 if (dump_flags & IPSET_FLAG_LIST_SETNAME) 1368 goto next_set; 1369 switch (cb->args[IPSET_CB_ARG0]) { 1370 case 0: 1371 /* Core header data */ 1372 if (nla_put_string(skb, IPSET_ATTR_TYPENAME, 1373 set->type->name) || 1374 nla_put_u8(skb, IPSET_ATTR_FAMILY, 1375 set->family) || 1376 nla_put_u8(skb, IPSET_ATTR_REVISION, 1377 set->revision)) 1378 goto nla_put_failure; 1379 ret = set->variant->head(set, skb); 1380 if (ret < 0) 1381 goto release_refcount; 1382 if (dump_flags & IPSET_FLAG_LIST_HEADER) 1383 goto next_set; 1384 if (set->variant->uref) 1385 set->variant->uref(set, cb, true); 1386 /* Fall through and add elements */ 1387 default: 1388 rcu_read_lock_bh(); 1389 ret = set->variant->list(set, skb, cb); 1390 rcu_read_unlock_bh(); 1391 if (!cb->args[IPSET_CB_ARG0]) 1392 /* Set is done, proceed with next one */ 1393 goto next_set; 1394 goto release_refcount; 1395 } 1396 } 1397 /* If we dump all sets, continue with dumping last ones */ 1398 if (dump_type == DUMP_ALL) { 1399 dump_type = DUMP_LAST; 1400 cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); 1401 cb->args[IPSET_CB_INDEX] = 0; 1402 if (set && set->variant->uref) 1403 set->variant->uref(set, cb, false); 1404 goto dump_last; 1405 } 1406 goto out; 1407 1408 nla_put_failure: 1409 ret = -EFAULT; 1410 next_set: 1411 if (dump_type == DUMP_ONE) 1412 cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID; 1413 else 1414 cb->args[IPSET_CB_INDEX]++; 1415 release_refcount: 1416 /* If there was an error or set is done, release set */ 1417 if (ret || !cb->args[IPSET_CB_ARG0]) { 1418 set = ip_set(inst, index); 1419 if (set->variant->uref) 1420 set->variant->uref(set, cb, false); 1421 pr_debug("release set %s\n", set->name); 1422 __ip_set_put_netlink(set); 1423 cb->args[IPSET_CB_ARG0] = 0; 1424 } 1425 out: 1426 if (nlh) { 1427 nlmsg_end(skb, nlh); 1428 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len); 1429 dump_attrs(nlh); 1430 } 1431 1432 return ret < 0 ? ret : skb->len; 1433 } 1434 1435 static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, 1436 const struct nlmsghdr *nlh, 1437 const struct nlattr * const attr[], 1438 struct netlink_ext_ack *extack) 1439 { 1440 if (unlikely(protocol_failed(attr))) 1441 return -IPSET_ERR_PROTOCOL; 1442 1443 { 1444 struct netlink_dump_control c = { 1445 .dump = ip_set_dump_start, 1446 .done = ip_set_dump_done, 1447 }; 1448 return netlink_dump_start(ctnl, skb, nlh, &c); 1449 } 1450 } 1451 1452 /* Add, del and test */ 1453 1454 static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { 1455 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1456 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, 1457 .len = IPSET_MAXNAMELEN - 1 }, 1458 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 1459 [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, 1460 [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, 1461 }; 1462 1463 static int 1464 call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, 1465 struct nlattr *tb[], enum ipset_adt adt, 1466 u32 flags, bool use_lineno) 1467 { 1468 int ret; 1469 u32 lineno = 0; 1470 bool eexist = flags & IPSET_FLAG_EXIST, retried = false; 1471 1472 do { 1473 spin_lock_bh(&set->lock); 1474 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); 1475 spin_unlock_bh(&set->lock); 1476 retried = true; 1477 } while (ret == -EAGAIN && 1478 set->variant->resize && 1479 (ret = set->variant->resize(set, retried)) == 0); 1480 1481 if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) 1482 return 0; 1483 if (lineno && use_lineno) { 1484 /* Error in restore/batch mode: send back lineno */ 1485 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb); 1486 struct sk_buff *skb2; 1487 struct nlmsgerr *errmsg; 1488 size_t payload = min(SIZE_MAX, 1489 sizeof(*errmsg) + nlmsg_len(nlh)); 1490 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); 1491 struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; 1492 struct nlattr *cmdattr; 1493 u32 *errline; 1494 1495 skb2 = nlmsg_new(payload, GFP_KERNEL); 1496 if (!skb2) 1497 return -ENOMEM; 1498 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, 1499 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); 1500 errmsg = nlmsg_data(rep); 1501 errmsg->error = ret; 1502 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); 1503 cmdattr = (void *)&errmsg->msg + min_len; 1504 1505 nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, 1506 nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL); 1507 1508 errline = nla_data(cda[IPSET_ATTR_LINENO]); 1509 1510 *errline = lineno; 1511 1512 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, 1513 MSG_DONTWAIT); 1514 /* Signal netlink not to send its ACK/errmsg. */ 1515 return -EINTR; 1516 } 1517 1518 return ret; 1519 } 1520 1521 static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, 1522 const struct nlmsghdr *nlh, 1523 const struct nlattr * const attr[], 1524 struct netlink_ext_ack *extack) 1525 { 1526 struct ip_set_net *inst = ip_set_pernet(net); 1527 struct ip_set *set; 1528 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; 1529 const struct nlattr *nla; 1530 u32 flags = flag_exist(nlh); 1531 bool use_lineno; 1532 int ret = 0; 1533 1534 if (unlikely(protocol_failed(attr) || 1535 !attr[IPSET_ATTR_SETNAME] || 1536 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1537 (attr[IPSET_ATTR_ADT] != NULL)) || 1538 (attr[IPSET_ATTR_DATA] && 1539 !flag_nested(attr[IPSET_ATTR_DATA])) || 1540 (attr[IPSET_ATTR_ADT] && 1541 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1542 !attr[IPSET_ATTR_LINENO])))) 1543 return -IPSET_ERR_PROTOCOL; 1544 1545 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1546 if (!set) 1547 return -ENOENT; 1548 1549 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1550 if (attr[IPSET_ATTR_DATA]) { 1551 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, 1552 attr[IPSET_ATTR_DATA], 1553 set->type->adt_policy, NULL)) 1554 return -IPSET_ERR_PROTOCOL; 1555 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, 1556 use_lineno); 1557 } else { 1558 int nla_rem; 1559 1560 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { 1561 memset(tb, 0, sizeof(tb)); 1562 if (nla_type(nla) != IPSET_ATTR_DATA || 1563 !flag_nested(nla) || 1564 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, 1565 set->type->adt_policy, NULL)) 1566 return -IPSET_ERR_PROTOCOL; 1567 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, 1568 flags, use_lineno); 1569 if (ret < 0) 1570 return ret; 1571 } 1572 } 1573 return ret; 1574 } 1575 1576 static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, 1577 const struct nlmsghdr *nlh, 1578 const struct nlattr * const attr[], 1579 struct netlink_ext_ack *extack) 1580 { 1581 struct ip_set_net *inst = ip_set_pernet(net); 1582 struct ip_set *set; 1583 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; 1584 const struct nlattr *nla; 1585 u32 flags = flag_exist(nlh); 1586 bool use_lineno; 1587 int ret = 0; 1588 1589 if (unlikely(protocol_failed(attr) || 1590 !attr[IPSET_ATTR_SETNAME] || 1591 !((attr[IPSET_ATTR_DATA] != NULL) ^ 1592 (attr[IPSET_ATTR_ADT] != NULL)) || 1593 (attr[IPSET_ATTR_DATA] && 1594 !flag_nested(attr[IPSET_ATTR_DATA])) || 1595 (attr[IPSET_ATTR_ADT] && 1596 (!flag_nested(attr[IPSET_ATTR_ADT]) || 1597 !attr[IPSET_ATTR_LINENO])))) 1598 return -IPSET_ERR_PROTOCOL; 1599 1600 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1601 if (!set) 1602 return -ENOENT; 1603 1604 use_lineno = !!attr[IPSET_ATTR_LINENO]; 1605 if (attr[IPSET_ATTR_DATA]) { 1606 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, 1607 attr[IPSET_ATTR_DATA], 1608 set->type->adt_policy, NULL)) 1609 return -IPSET_ERR_PROTOCOL; 1610 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, 1611 use_lineno); 1612 } else { 1613 int nla_rem; 1614 1615 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { 1616 memset(tb, 0, sizeof(*tb)); 1617 if (nla_type(nla) != IPSET_ATTR_DATA || 1618 !flag_nested(nla) || 1619 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, 1620 set->type->adt_policy, NULL)) 1621 return -IPSET_ERR_PROTOCOL; 1622 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, 1623 flags, use_lineno); 1624 if (ret < 0) 1625 return ret; 1626 } 1627 } 1628 return ret; 1629 } 1630 1631 static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, 1632 const struct nlmsghdr *nlh, 1633 const struct nlattr * const attr[], 1634 struct netlink_ext_ack *extack) 1635 { 1636 struct ip_set_net *inst = ip_set_pernet(net); 1637 struct ip_set *set; 1638 struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; 1639 int ret = 0; 1640 1641 if (unlikely(protocol_failed(attr) || 1642 !attr[IPSET_ATTR_SETNAME] || 1643 !attr[IPSET_ATTR_DATA] || 1644 !flag_nested(attr[IPSET_ATTR_DATA]))) 1645 return -IPSET_ERR_PROTOCOL; 1646 1647 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1648 if (!set) 1649 return -ENOENT; 1650 1651 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], 1652 set->type->adt_policy, NULL)) 1653 return -IPSET_ERR_PROTOCOL; 1654 1655 rcu_read_lock_bh(); 1656 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); 1657 rcu_read_unlock_bh(); 1658 /* Userspace can't trigger element to be re-added */ 1659 if (ret == -EAGAIN) 1660 ret = 1; 1661 1662 return ret > 0 ? 0 : -IPSET_ERR_EXIST; 1663 } 1664 1665 /* Get headed data of a set */ 1666 1667 static int ip_set_header(struct net *net, struct sock *ctnl, 1668 struct sk_buff *skb, const struct nlmsghdr *nlh, 1669 const struct nlattr * const attr[], 1670 struct netlink_ext_ack *extack) 1671 { 1672 struct ip_set_net *inst = ip_set_pernet(net); 1673 const struct ip_set *set; 1674 struct sk_buff *skb2; 1675 struct nlmsghdr *nlh2; 1676 int ret = 0; 1677 1678 if (unlikely(protocol_failed(attr) || 1679 !attr[IPSET_ATTR_SETNAME])) 1680 return -IPSET_ERR_PROTOCOL; 1681 1682 set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); 1683 if (!set) 1684 return -ENOENT; 1685 1686 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1687 if (!skb2) 1688 return -ENOMEM; 1689 1690 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 1691 IPSET_CMD_HEADER); 1692 if (!nlh2) 1693 goto nlmsg_failure; 1694 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || 1695 nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) || 1696 nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) || 1697 nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || 1698 nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision)) 1699 goto nla_put_failure; 1700 nlmsg_end(skb2, nlh2); 1701 1702 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); 1703 if (ret < 0) 1704 return ret; 1705 1706 return 0; 1707 1708 nla_put_failure: 1709 nlmsg_cancel(skb2, nlh2); 1710 nlmsg_failure: 1711 kfree_skb(skb2); 1712 return -EMSGSIZE; 1713 } 1714 1715 /* Get type data */ 1716 1717 static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { 1718 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1719 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, 1720 .len = IPSET_MAXNAMELEN - 1 }, 1721 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, 1722 }; 1723 1724 static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, 1725 const struct nlmsghdr *nlh, 1726 const struct nlattr * const attr[], 1727 struct netlink_ext_ack *extack) 1728 { 1729 struct sk_buff *skb2; 1730 struct nlmsghdr *nlh2; 1731 u8 family, min, max; 1732 const char *typename; 1733 int ret = 0; 1734 1735 if (unlikely(protocol_failed(attr) || 1736 !attr[IPSET_ATTR_TYPENAME] || 1737 !attr[IPSET_ATTR_FAMILY])) 1738 return -IPSET_ERR_PROTOCOL; 1739 1740 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); 1741 typename = nla_data(attr[IPSET_ATTR_TYPENAME]); 1742 ret = find_set_type_minmax(typename, family, &min, &max); 1743 if (ret) 1744 return ret; 1745 1746 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1747 if (!skb2) 1748 return -ENOMEM; 1749 1750 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 1751 IPSET_CMD_TYPE); 1752 if (!nlh2) 1753 goto nlmsg_failure; 1754 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || 1755 nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) || 1756 nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) || 1757 nla_put_u8(skb2, IPSET_ATTR_REVISION, max) || 1758 nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min)) 1759 goto nla_put_failure; 1760 nlmsg_end(skb2, nlh2); 1761 1762 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); 1763 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); 1764 if (ret < 0) 1765 return ret; 1766 1767 return 0; 1768 1769 nla_put_failure: 1770 nlmsg_cancel(skb2, nlh2); 1771 nlmsg_failure: 1772 kfree_skb(skb2); 1773 return -EMSGSIZE; 1774 } 1775 1776 /* Get protocol version */ 1777 1778 static const struct nla_policy 1779 ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { 1780 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, 1781 }; 1782 1783 static int ip_set_protocol(struct net *net, struct sock *ctnl, 1784 struct sk_buff *skb, const struct nlmsghdr *nlh, 1785 const struct nlattr * const attr[], 1786 struct netlink_ext_ack *extack) 1787 { 1788 struct sk_buff *skb2; 1789 struct nlmsghdr *nlh2; 1790 int ret = 0; 1791 1792 if (unlikely(!attr[IPSET_ATTR_PROTOCOL])) 1793 return -IPSET_ERR_PROTOCOL; 1794 1795 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1796 if (!skb2) 1797 return -ENOMEM; 1798 1799 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 1800 IPSET_CMD_PROTOCOL); 1801 if (!nlh2) 1802 goto nlmsg_failure; 1803 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL)) 1804 goto nla_put_failure; 1805 nlmsg_end(skb2, nlh2); 1806 1807 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); 1808 if (ret < 0) 1809 return ret; 1810 1811 return 0; 1812 1813 nla_put_failure: 1814 nlmsg_cancel(skb2, nlh2); 1815 nlmsg_failure: 1816 kfree_skb(skb2); 1817 return -EMSGSIZE; 1818 } 1819 1820 static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { 1821 [IPSET_CMD_NONE] = { 1822 .call = ip_set_none, 1823 .attr_count = IPSET_ATTR_CMD_MAX, 1824 }, 1825 [IPSET_CMD_CREATE] = { 1826 .call = ip_set_create, 1827 .attr_count = IPSET_ATTR_CMD_MAX, 1828 .policy = ip_set_create_policy, 1829 }, 1830 [IPSET_CMD_DESTROY] = { 1831 .call = ip_set_destroy, 1832 .attr_count = IPSET_ATTR_CMD_MAX, 1833 .policy = ip_set_setname_policy, 1834 }, 1835 [IPSET_CMD_FLUSH] = { 1836 .call = ip_set_flush, 1837 .attr_count = IPSET_ATTR_CMD_MAX, 1838 .policy = ip_set_setname_policy, 1839 }, 1840 [IPSET_CMD_RENAME] = { 1841 .call = ip_set_rename, 1842 .attr_count = IPSET_ATTR_CMD_MAX, 1843 .policy = ip_set_setname2_policy, 1844 }, 1845 [IPSET_CMD_SWAP] = { 1846 .call = ip_set_swap, 1847 .attr_count = IPSET_ATTR_CMD_MAX, 1848 .policy = ip_set_setname2_policy, 1849 }, 1850 [IPSET_CMD_LIST] = { 1851 .call = ip_set_dump, 1852 .attr_count = IPSET_ATTR_CMD_MAX, 1853 .policy = ip_set_setname_policy, 1854 }, 1855 [IPSET_CMD_SAVE] = { 1856 .call = ip_set_dump, 1857 .attr_count = IPSET_ATTR_CMD_MAX, 1858 .policy = ip_set_setname_policy, 1859 }, 1860 [IPSET_CMD_ADD] = { 1861 .call = ip_set_uadd, 1862 .attr_count = IPSET_ATTR_CMD_MAX, 1863 .policy = ip_set_adt_policy, 1864 }, 1865 [IPSET_CMD_DEL] = { 1866 .call = ip_set_udel, 1867 .attr_count = IPSET_ATTR_CMD_MAX, 1868 .policy = ip_set_adt_policy, 1869 }, 1870 [IPSET_CMD_TEST] = { 1871 .call = ip_set_utest, 1872 .attr_count = IPSET_ATTR_CMD_MAX, 1873 .policy = ip_set_adt_policy, 1874 }, 1875 [IPSET_CMD_HEADER] = { 1876 .call = ip_set_header, 1877 .attr_count = IPSET_ATTR_CMD_MAX, 1878 .policy = ip_set_setname_policy, 1879 }, 1880 [IPSET_CMD_TYPE] = { 1881 .call = ip_set_type, 1882 .attr_count = IPSET_ATTR_CMD_MAX, 1883 .policy = ip_set_type_policy, 1884 }, 1885 [IPSET_CMD_PROTOCOL] = { 1886 .call = ip_set_protocol, 1887 .attr_count = IPSET_ATTR_CMD_MAX, 1888 .policy = ip_set_protocol_policy, 1889 }, 1890 }; 1891 1892 static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { 1893 .name = "ip_set", 1894 .subsys_id = NFNL_SUBSYS_IPSET, 1895 .cb_count = IPSET_MSG_MAX, 1896 .cb = ip_set_netlink_subsys_cb, 1897 }; 1898 1899 /* Interface to iptables/ip6tables */ 1900 1901 static int 1902 ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) 1903 { 1904 unsigned int *op; 1905 void *data; 1906 int copylen = *len, ret = 0; 1907 struct net *net = sock_net(sk); 1908 struct ip_set_net *inst = ip_set_pernet(net); 1909 1910 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1911 return -EPERM; 1912 if (optval != SO_IP_SET) 1913 return -EBADF; 1914 if (*len < sizeof(unsigned int)) 1915 return -EINVAL; 1916 1917 data = vmalloc(*len); 1918 if (!data) 1919 return -ENOMEM; 1920 if (copy_from_user(data, user, *len) != 0) { 1921 ret = -EFAULT; 1922 goto done; 1923 } 1924 op = data; 1925 1926 if (*op < IP_SET_OP_VERSION) { 1927 /* Check the version at the beginning of operations */ 1928 struct ip_set_req_version *req_version = data; 1929 1930 if (*len < sizeof(struct ip_set_req_version)) { 1931 ret = -EINVAL; 1932 goto done; 1933 } 1934 1935 if (req_version->version != IPSET_PROTOCOL) { 1936 ret = -EPROTO; 1937 goto done; 1938 } 1939 } 1940 1941 switch (*op) { 1942 case IP_SET_OP_VERSION: { 1943 struct ip_set_req_version *req_version = data; 1944 1945 if (*len != sizeof(struct ip_set_req_version)) { 1946 ret = -EINVAL; 1947 goto done; 1948 } 1949 1950 req_version->version = IPSET_PROTOCOL; 1951 ret = copy_to_user(user, req_version, 1952 sizeof(struct ip_set_req_version)); 1953 goto done; 1954 } 1955 case IP_SET_OP_GET_BYNAME: { 1956 struct ip_set_req_get_set *req_get = data; 1957 ip_set_id_t id; 1958 1959 if (*len != sizeof(struct ip_set_req_get_set)) { 1960 ret = -EINVAL; 1961 goto done; 1962 } 1963 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; 1964 nfnl_lock(NFNL_SUBSYS_IPSET); 1965 find_set_and_id(inst, req_get->set.name, &id); 1966 req_get->set.index = id; 1967 nfnl_unlock(NFNL_SUBSYS_IPSET); 1968 goto copy; 1969 } 1970 case IP_SET_OP_GET_FNAME: { 1971 struct ip_set_req_get_set_family *req_get = data; 1972 ip_set_id_t id; 1973 1974 if (*len != sizeof(struct ip_set_req_get_set_family)) { 1975 ret = -EINVAL; 1976 goto done; 1977 } 1978 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; 1979 nfnl_lock(NFNL_SUBSYS_IPSET); 1980 find_set_and_id(inst, req_get->set.name, &id); 1981 req_get->set.index = id; 1982 if (id != IPSET_INVALID_ID) 1983 req_get->family = ip_set(inst, id)->family; 1984 nfnl_unlock(NFNL_SUBSYS_IPSET); 1985 goto copy; 1986 } 1987 case IP_SET_OP_GET_BYINDEX: { 1988 struct ip_set_req_get_set *req_get = data; 1989 struct ip_set *set; 1990 1991 if (*len != sizeof(struct ip_set_req_get_set) || 1992 req_get->set.index >= inst->ip_set_max) { 1993 ret = -EINVAL; 1994 goto done; 1995 } 1996 nfnl_lock(NFNL_SUBSYS_IPSET); 1997 set = ip_set(inst, req_get->set.index); 1998 strncpy(req_get->set.name, set ? set->name : "", 1999 IPSET_MAXNAMELEN); 2000 nfnl_unlock(NFNL_SUBSYS_IPSET); 2001 goto copy; 2002 } 2003 default: 2004 ret = -EBADMSG; 2005 goto done; 2006 } /* end of switch(op) */ 2007 2008 copy: 2009 ret = copy_to_user(user, data, copylen); 2010 2011 done: 2012 vfree(data); 2013 if (ret > 0) 2014 ret = 0; 2015 return ret; 2016 } 2017 2018 static struct nf_sockopt_ops so_set __read_mostly = { 2019 .pf = PF_INET, 2020 .get_optmin = SO_IP_SET, 2021 .get_optmax = SO_IP_SET + 1, 2022 .get = ip_set_sockfn_get, 2023 .owner = THIS_MODULE, 2024 }; 2025 2026 static int __net_init 2027 ip_set_net_init(struct net *net) 2028 { 2029 struct ip_set_net *inst = ip_set_pernet(net); 2030 struct ip_set **list; 2031 2032 inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; 2033 if (inst->ip_set_max >= IPSET_INVALID_ID) 2034 inst->ip_set_max = IPSET_INVALID_ID - 1; 2035 2036 list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); 2037 if (!list) 2038 return -ENOMEM; 2039 inst->is_deleted = false; 2040 inst->is_destroyed = false; 2041 rcu_assign_pointer(inst->ip_set_list, list); 2042 return 0; 2043 } 2044 2045 static void __net_exit 2046 ip_set_net_exit(struct net *net) 2047 { 2048 struct ip_set_net *inst = ip_set_pernet(net); 2049 2050 struct ip_set *set = NULL; 2051 ip_set_id_t i; 2052 2053 inst->is_deleted = true; /* flag for ip_set_nfnl_put */ 2054 2055 for (i = 0; i < inst->ip_set_max; i++) { 2056 set = ip_set(inst, i); 2057 if (set) { 2058 ip_set(inst, i) = NULL; 2059 ip_set_destroy_set(set); 2060 } 2061 } 2062 kfree(rcu_dereference_protected(inst->ip_set_list, 1)); 2063 } 2064 2065 static struct pernet_operations ip_set_net_ops = { 2066 .init = ip_set_net_init, 2067 .exit = ip_set_net_exit, 2068 .id = &ip_set_net_id, 2069 .size = sizeof(struct ip_set_net) 2070 }; 2071 2072 static int __init 2073 ip_set_init(void) 2074 { 2075 int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); 2076 2077 if (ret != 0) { 2078 pr_err("ip_set: cannot register with nfnetlink.\n"); 2079 return ret; 2080 } 2081 ret = nf_register_sockopt(&so_set); 2082 if (ret != 0) { 2083 pr_err("SO_SET registry failed: %d\n", ret); 2084 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 2085 return ret; 2086 } 2087 ret = register_pernet_subsys(&ip_set_net_ops); 2088 if (ret) { 2089 pr_err("ip_set: cannot register pernet_subsys.\n"); 2090 nf_unregister_sockopt(&so_set); 2091 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 2092 return ret; 2093 } 2094 pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); 2095 return 0; 2096 } 2097 2098 static void __exit 2099 ip_set_fini(void) 2100 { 2101 unregister_pernet_subsys(&ip_set_net_ops); 2102 nf_unregister_sockopt(&so_set); 2103 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 2104 pr_debug("these are the famous last words\n"); 2105 } 2106 2107 module_init(ip_set_init); 2108 module_exit(ip_set_fini); 2109