1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2007-2014 Nicira, Inc. 4 */ 5 6 #include "flow.h" 7 #include "datapath.h" 8 #include "flow_netlink.h" 9 #include <linux/uaccess.h> 10 #include <linux/netdevice.h> 11 #include <linux/etherdevice.h> 12 #include <linux/if_ether.h> 13 #include <linux/if_vlan.h> 14 #include <net/llc_pdu.h> 15 #include <linux/kernel.h> 16 #include <linux/jhash.h> 17 #include <linux/jiffies.h> 18 #include <linux/llc.h> 19 #include <linux/module.h> 20 #include <linux/in.h> 21 #include <linux/rcupdate.h> 22 #include <linux/cpumask.h> 23 #include <linux/if_arp.h> 24 #include <linux/ip.h> 25 #include <linux/ipv6.h> 26 #include <linux/sctp.h> 27 #include <linux/tcp.h> 28 #include <linux/udp.h> 29 #include <linux/icmp.h> 30 #include <linux/icmpv6.h> 31 #include <linux/rculist.h> 32 #include <net/ip.h> 33 #include <net/ipv6.h> 34 #include <net/ndisc.h> 35 36 #define TBL_MIN_BUCKETS 1024 37 #define MASK_ARRAY_SIZE_MIN 16 38 #define REHASH_INTERVAL (10 * 60 * HZ) 39 40 #define MC_HASH_SHIFT 8 41 #define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT) 42 #define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) 43 44 static struct kmem_cache *flow_cache; 45 struct kmem_cache *flow_stats_cache __read_mostly; 46 47 static u16 range_n_bytes(const struct sw_flow_key_range *range) 48 { 49 return range->end - range->start; 50 } 51 52 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, 53 bool full, const struct sw_flow_mask *mask) 54 { 55 int start = full ? 0 : mask->range.start; 56 int len = full ? sizeof *dst : range_n_bytes(&mask->range); 57 const long *m = (const long *)((const u8 *)&mask->key + start); 58 const long *s = (const long *)((const u8 *)src + start); 59 long *d = (long *)((u8 *)dst + start); 60 int i; 61 62 /* If 'full' is true then all of 'dst' is fully initialized. Otherwise, 63 * if 'full' is false the memory outside of the 'mask->range' is left 64 * uninitialized. This can be used as an optimization when further 65 * operations on 'dst' only use contents within 'mask->range'. 66 */ 67 for (i = 0; i < len; i += sizeof(long)) 68 *d++ = *s++ & *m++; 69 } 70 71 struct sw_flow *ovs_flow_alloc(void) 72 { 73 struct sw_flow *flow; 74 struct sw_flow_stats *stats; 75 76 flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); 77 if (!flow) 78 return ERR_PTR(-ENOMEM); 79 80 flow->stats_last_writer = -1; 81 82 /* Initialize the default stat node. */ 83 stats = kmem_cache_alloc_node(flow_stats_cache, 84 GFP_KERNEL | __GFP_ZERO, 85 node_online(0) ? 0 : NUMA_NO_NODE); 86 if (!stats) 87 goto err; 88 89 spin_lock_init(&stats->lock); 90 91 RCU_INIT_POINTER(flow->stats[0], stats); 92 93 cpumask_set_cpu(0, &flow->cpu_used_mask); 94 95 return flow; 96 err: 97 kmem_cache_free(flow_cache, flow); 98 return ERR_PTR(-ENOMEM); 99 } 100 101 int ovs_flow_tbl_count(const struct flow_table *table) 102 { 103 return table->count; 104 } 105 106 static void flow_free(struct sw_flow *flow) 107 { 108 int cpu; 109 110 if (ovs_identifier_is_key(&flow->id)) 111 kfree(flow->id.unmasked_key); 112 if (flow->sf_acts) 113 ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); 114 /* We open code this to make sure cpu 0 is always considered */ 115 for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) 116 if (flow->stats[cpu]) 117 kmem_cache_free(flow_stats_cache, 118 (struct sw_flow_stats __force *)flow->stats[cpu]); 119 kmem_cache_free(flow_cache, flow); 120 } 121 122 static void rcu_free_flow_callback(struct rcu_head *rcu) 123 { 124 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 125 126 flow_free(flow); 127 } 128 129 void ovs_flow_free(struct sw_flow *flow, bool deferred) 130 { 131 if (!flow) 132 return; 133 134 if (deferred) 135 call_rcu(&flow->rcu, rcu_free_flow_callback); 136 else 137 flow_free(flow); 138 } 139 140 static void __table_instance_destroy(struct table_instance *ti) 141 { 142 kvfree(ti->buckets); 143 kfree(ti); 144 } 145 146 static struct table_instance *table_instance_alloc(int new_size) 147 { 148 struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); 149 int i; 150 151 if (!ti) 152 return NULL; 153 154 ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head), 155 GFP_KERNEL); 156 if (!ti->buckets) { 157 kfree(ti); 158 return NULL; 159 } 160 161 for (i = 0; i < new_size; i++) 162 INIT_HLIST_HEAD(&ti->buckets[i]); 163 164 ti->n_buckets = new_size; 165 ti->node_ver = 0; 166 ti->keep_flows = false; 167 get_random_bytes(&ti->hash_seed, sizeof(u32)); 168 169 return ti; 170 } 171 172 static struct mask_array *tbl_mask_array_alloc(int size) 173 { 174 struct mask_array *new; 175 176 size = max(MASK_ARRAY_SIZE_MIN, size); 177 new = kzalloc(sizeof(struct mask_array) + 178 sizeof(struct sw_flow_mask *) * size, GFP_KERNEL); 179 if (!new) 180 return NULL; 181 182 new->count = 0; 183 new->max = size; 184 185 return new; 186 } 187 188 static int tbl_mask_array_realloc(struct flow_table *tbl, int size) 189 { 190 struct mask_array *old; 191 struct mask_array *new; 192 193 new = tbl_mask_array_alloc(size); 194 if (!new) 195 return -ENOMEM; 196 197 old = ovsl_dereference(tbl->mask_array); 198 if (old) { 199 int i; 200 201 for (i = 0; i < old->max; i++) { 202 if (ovsl_dereference(old->masks[i])) 203 new->masks[new->count++] = old->masks[i]; 204 } 205 } 206 207 rcu_assign_pointer(tbl->mask_array, new); 208 kfree_rcu(old, rcu); 209 210 return 0; 211 } 212 213 int ovs_flow_tbl_init(struct flow_table *table) 214 { 215 struct table_instance *ti, *ufid_ti; 216 struct mask_array *ma; 217 218 table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * 219 MC_HASH_ENTRIES, 220 __alignof__(struct mask_cache_entry)); 221 if (!table->mask_cache) 222 return -ENOMEM; 223 224 ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); 225 if (!ma) 226 goto free_mask_cache; 227 228 ti = table_instance_alloc(TBL_MIN_BUCKETS); 229 if (!ti) 230 goto free_mask_array; 231 232 ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); 233 if (!ufid_ti) 234 goto free_ti; 235 236 rcu_assign_pointer(table->ti, ti); 237 rcu_assign_pointer(table->ufid_ti, ufid_ti); 238 rcu_assign_pointer(table->mask_array, ma); 239 table->last_rehash = jiffies; 240 table->count = 0; 241 table->ufid_count = 0; 242 return 0; 243 244 free_ti: 245 __table_instance_destroy(ti); 246 free_mask_array: 247 kfree(ma); 248 free_mask_cache: 249 free_percpu(table->mask_cache); 250 return -ENOMEM; 251 } 252 253 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 254 { 255 struct table_instance *ti = container_of(rcu, struct table_instance, rcu); 256 257 __table_instance_destroy(ti); 258 } 259 260 static void table_instance_destroy(struct table_instance *ti, 261 struct table_instance *ufid_ti, 262 bool deferred) 263 { 264 int i; 265 266 if (!ti) 267 return; 268 269 BUG_ON(!ufid_ti); 270 if (ti->keep_flows) 271 goto skip_flows; 272 273 for (i = 0; i < ti->n_buckets; i++) { 274 struct sw_flow *flow; 275 struct hlist_head *head = &ti->buckets[i]; 276 struct hlist_node *n; 277 int ver = ti->node_ver; 278 int ufid_ver = ufid_ti->node_ver; 279 280 hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { 281 hlist_del_rcu(&flow->flow_table.node[ver]); 282 if (ovs_identifier_is_ufid(&flow->id)) 283 hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); 284 ovs_flow_free(flow, deferred); 285 } 286 } 287 288 skip_flows: 289 if (deferred) { 290 call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); 291 call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); 292 } else { 293 __table_instance_destroy(ti); 294 __table_instance_destroy(ufid_ti); 295 } 296 } 297 298 /* No need for locking this function is called from RCU callback or 299 * error path. 300 */ 301 void ovs_flow_tbl_destroy(struct flow_table *table) 302 { 303 struct table_instance *ti = rcu_dereference_raw(table->ti); 304 struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); 305 306 free_percpu(table->mask_cache); 307 kfree_rcu(rcu_dereference_raw(table->mask_array), rcu); 308 table_instance_destroy(ti, ufid_ti, false); 309 } 310 311 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, 312 u32 *bucket, u32 *last) 313 { 314 struct sw_flow *flow; 315 struct hlist_head *head; 316 int ver; 317 int i; 318 319 ver = ti->node_ver; 320 while (*bucket < ti->n_buckets) { 321 i = 0; 322 head = &ti->buckets[*bucket]; 323 hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { 324 if (i < *last) { 325 i++; 326 continue; 327 } 328 *last = i + 1; 329 return flow; 330 } 331 (*bucket)++; 332 *last = 0; 333 } 334 335 return NULL; 336 } 337 338 static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) 339 { 340 hash = jhash_1word(hash, ti->hash_seed); 341 return &ti->buckets[hash & (ti->n_buckets - 1)]; 342 } 343 344 static void table_instance_insert(struct table_instance *ti, 345 struct sw_flow *flow) 346 { 347 struct hlist_head *head; 348 349 head = find_bucket(ti, flow->flow_table.hash); 350 hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head); 351 } 352 353 static void ufid_table_instance_insert(struct table_instance *ti, 354 struct sw_flow *flow) 355 { 356 struct hlist_head *head; 357 358 head = find_bucket(ti, flow->ufid_table.hash); 359 hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head); 360 } 361 362 static void flow_table_copy_flows(struct table_instance *old, 363 struct table_instance *new, bool ufid) 364 { 365 int old_ver; 366 int i; 367 368 old_ver = old->node_ver; 369 new->node_ver = !old_ver; 370 371 /* Insert in new table. */ 372 for (i = 0; i < old->n_buckets; i++) { 373 struct sw_flow *flow; 374 struct hlist_head *head = &old->buckets[i]; 375 376 if (ufid) 377 hlist_for_each_entry(flow, head, 378 ufid_table.node[old_ver]) 379 ufid_table_instance_insert(new, flow); 380 else 381 hlist_for_each_entry(flow, head, 382 flow_table.node[old_ver]) 383 table_instance_insert(new, flow); 384 } 385 386 old->keep_flows = true; 387 } 388 389 static struct table_instance *table_instance_rehash(struct table_instance *ti, 390 int n_buckets, bool ufid) 391 { 392 struct table_instance *new_ti; 393 394 new_ti = table_instance_alloc(n_buckets); 395 if (!new_ti) 396 return NULL; 397 398 flow_table_copy_flows(ti, new_ti, ufid); 399 400 return new_ti; 401 } 402 403 int ovs_flow_tbl_flush(struct flow_table *flow_table) 404 { 405 struct table_instance *old_ti, *new_ti; 406 struct table_instance *old_ufid_ti, *new_ufid_ti; 407 408 new_ti = table_instance_alloc(TBL_MIN_BUCKETS); 409 if (!new_ti) 410 return -ENOMEM; 411 new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); 412 if (!new_ufid_ti) 413 goto err_free_ti; 414 415 old_ti = ovsl_dereference(flow_table->ti); 416 old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); 417 418 rcu_assign_pointer(flow_table->ti, new_ti); 419 rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); 420 flow_table->last_rehash = jiffies; 421 flow_table->count = 0; 422 flow_table->ufid_count = 0; 423 424 table_instance_destroy(old_ti, old_ufid_ti, true); 425 return 0; 426 427 err_free_ti: 428 __table_instance_destroy(new_ti); 429 return -ENOMEM; 430 } 431 432 static u32 flow_hash(const struct sw_flow_key *key, 433 const struct sw_flow_key_range *range) 434 { 435 int key_start = range->start; 436 int key_end = range->end; 437 const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); 438 int hash_u32s = (key_end - key_start) >> 2; 439 440 /* Make sure number of hash bytes are multiple of u32. */ 441 BUILD_BUG_ON(sizeof(long) % sizeof(u32)); 442 443 return jhash2(hash_key, hash_u32s, 0); 444 } 445 446 static int flow_key_start(const struct sw_flow_key *key) 447 { 448 if (key->tun_proto) 449 return 0; 450 else 451 return rounddown(offsetof(struct sw_flow_key, phy), 452 sizeof(long)); 453 } 454 455 static bool cmp_key(const struct sw_flow_key *key1, 456 const struct sw_flow_key *key2, 457 int key_start, int key_end) 458 { 459 const long *cp1 = (const long *)((const u8 *)key1 + key_start); 460 const long *cp2 = (const long *)((const u8 *)key2 + key_start); 461 long diffs = 0; 462 int i; 463 464 for (i = key_start; i < key_end; i += sizeof(long)) 465 diffs |= *cp1++ ^ *cp2++; 466 467 return diffs == 0; 468 } 469 470 static bool flow_cmp_masked_key(const struct sw_flow *flow, 471 const struct sw_flow_key *key, 472 const struct sw_flow_key_range *range) 473 { 474 return cmp_key(&flow->key, key, range->start, range->end); 475 } 476 477 static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 478 const struct sw_flow_match *match) 479 { 480 struct sw_flow_key *key = match->key; 481 int key_start = flow_key_start(key); 482 int key_end = match->range.end; 483 484 BUG_ON(ovs_identifier_is_ufid(&flow->id)); 485 return cmp_key(flow->id.unmasked_key, key, key_start, key_end); 486 } 487 488 static struct sw_flow *masked_flow_lookup(struct table_instance *ti, 489 const struct sw_flow_key *unmasked, 490 const struct sw_flow_mask *mask, 491 u32 *n_mask_hit) 492 { 493 struct sw_flow *flow; 494 struct hlist_head *head; 495 u32 hash; 496 struct sw_flow_key masked_key; 497 498 ovs_flow_mask_key(&masked_key, unmasked, false, mask); 499 hash = flow_hash(&masked_key, &mask->range); 500 head = find_bucket(ti, hash); 501 (*n_mask_hit)++; 502 503 hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { 504 if (flow->mask == mask && flow->flow_table.hash == hash && 505 flow_cmp_masked_key(flow, &masked_key, &mask->range)) 506 return flow; 507 } 508 return NULL; 509 } 510 511 static struct sw_flow *flow_lookup(struct flow_table *tbl, 512 struct table_instance *ti, 513 struct mask_array *ma, 514 const struct sw_flow_key *key, 515 u32 *n_mask_hit, 516 u32 *index) 517 { 518 struct sw_flow *flow; 519 int i; 520 521 for (i = 0; i < ma->max; i++) { 522 struct sw_flow_mask *mask; 523 524 mask = rcu_dereference_ovsl(ma->masks[i]); 525 if (mask) { 526 flow = masked_flow_lookup(ti, key, mask, n_mask_hit); 527 if (flow) { /* Found */ 528 *index = i; 529 return flow; 530 } 531 } 532 } 533 534 return NULL; 535 } 536 537 /* 538 * mask_cache maps flow to probable mask. This cache is not tightly 539 * coupled cache, It means updates to mask list can result in inconsistent 540 * cache entry in mask cache. 541 * This is per cpu cache and is divided in MC_HASH_SEGS segments. 542 * In case of a hash collision the entry is hashed in next segment. 543 * */ 544 struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, 545 const struct sw_flow_key *key, 546 u32 skb_hash, 547 u32 *n_mask_hit) 548 { 549 struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); 550 struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); 551 struct mask_cache_entry *entries, *ce, *del; 552 struct sw_flow *flow; 553 u32 hash = skb_hash; 554 int seg; 555 556 *n_mask_hit = 0; 557 if (unlikely(!skb_hash)) { 558 u32 __always_unused mask_index; 559 560 return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); 561 } 562 563 del = NULL; 564 entries = this_cpu_ptr(tbl->mask_cache); 565 566 for (seg = 0; seg < MC_HASH_SEGS; seg++) { 567 int index; 568 569 index = hash & (MC_HASH_ENTRIES - 1); 570 ce = &entries[index]; 571 572 if (ce->skb_hash == skb_hash) { 573 struct sw_flow_mask *mask; 574 struct sw_flow *flow; 575 576 mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]); 577 if (mask) { 578 flow = masked_flow_lookup(ti, key, mask, 579 n_mask_hit); 580 if (flow) /* Found */ 581 return flow; 582 } 583 584 del = ce; 585 break; 586 } 587 588 if (!del || (del->skb_hash && !ce->skb_hash) || 589 (rcu_dereference_ovsl(ma->masks[del->mask_index]) && 590 !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) { 591 del = ce; 592 } 593 594 hash >>= MC_HASH_SHIFT; 595 } 596 597 flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index); 598 599 if (flow) 600 del->skb_hash = skb_hash; 601 602 return flow; 603 } 604 605 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, 606 const struct sw_flow_key *key) 607 { 608 struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); 609 struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); 610 611 u32 __always_unused n_mask_hit; 612 u32 __always_unused index; 613 614 return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index); 615 } 616 617 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, 618 const struct sw_flow_match *match) 619 { 620 struct mask_array *ma = ovsl_dereference(tbl->mask_array); 621 int i; 622 623 /* Always called under ovs-mutex. */ 624 for (i = 0; i < ma->max; i++) { 625 struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); 626 u32 __always_unused n_mask_hit; 627 struct sw_flow_mask *mask; 628 struct sw_flow *flow; 629 630 mask = ovsl_dereference(ma->masks[i]); 631 if (!mask) 632 continue; 633 634 flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit); 635 if (flow && ovs_identifier_is_key(&flow->id) && 636 ovs_flow_cmp_unmasked_key(flow, match)) { 637 return flow; 638 } 639 } 640 641 return NULL; 642 } 643 644 static u32 ufid_hash(const struct sw_flow_id *sfid) 645 { 646 return jhash(sfid->ufid, sfid->ufid_len, 0); 647 } 648 649 static bool ovs_flow_cmp_ufid(const struct sw_flow *flow, 650 const struct sw_flow_id *sfid) 651 { 652 if (flow->id.ufid_len != sfid->ufid_len) 653 return false; 654 655 return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); 656 } 657 658 bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) 659 { 660 if (ovs_identifier_is_ufid(&flow->id)) 661 return flow_cmp_masked_key(flow, match->key, &match->range); 662 663 return ovs_flow_cmp_unmasked_key(flow, match); 664 } 665 666 struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, 667 const struct sw_flow_id *ufid) 668 { 669 struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti); 670 struct sw_flow *flow; 671 struct hlist_head *head; 672 u32 hash; 673 674 hash = ufid_hash(ufid); 675 head = find_bucket(ti, hash); 676 hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { 677 if (flow->ufid_table.hash == hash && 678 ovs_flow_cmp_ufid(flow, ufid)) 679 return flow; 680 } 681 return NULL; 682 } 683 684 int ovs_flow_tbl_num_masks(const struct flow_table *table) 685 { 686 struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); 687 688 return ma->count; 689 } 690 691 static struct table_instance *table_instance_expand(struct table_instance *ti, 692 bool ufid) 693 { 694 return table_instance_rehash(ti, ti->n_buckets * 2, ufid); 695 } 696 697 /* Remove 'mask' from the mask list, if it is not needed any more. */ 698 static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) 699 { 700 if (mask) { 701 /* ovs-lock is required to protect mask-refcount and 702 * mask list. 703 */ 704 ASSERT_OVSL(); 705 BUG_ON(!mask->ref_count); 706 mask->ref_count--; 707 708 if (!mask->ref_count) { 709 struct mask_array *ma; 710 int i; 711 712 ma = ovsl_dereference(tbl->mask_array); 713 for (i = 0; i < ma->max; i++) { 714 if (mask == ovsl_dereference(ma->masks[i])) { 715 RCU_INIT_POINTER(ma->masks[i], NULL); 716 ma->count--; 717 kfree_rcu(mask, rcu); 718 return; 719 } 720 } 721 BUG(); 722 } 723 } 724 } 725 726 /* Must be called with OVS mutex held. */ 727 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 728 { 729 struct table_instance *ti = ovsl_dereference(table->ti); 730 struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); 731 732 BUG_ON(table->count == 0); 733 hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); 734 table->count--; 735 if (ovs_identifier_is_ufid(&flow->id)) { 736 hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); 737 table->ufid_count--; 738 } 739 740 /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be 741 * accessible as long as the RCU read lock is held. 742 */ 743 flow_mask_remove(table, flow->mask); 744 } 745 746 static struct sw_flow_mask *mask_alloc(void) 747 { 748 struct sw_flow_mask *mask; 749 750 mask = kmalloc(sizeof(*mask), GFP_KERNEL); 751 if (mask) 752 mask->ref_count = 1; 753 754 return mask; 755 } 756 757 static bool mask_equal(const struct sw_flow_mask *a, 758 const struct sw_flow_mask *b) 759 { 760 const u8 *a_ = (const u8 *)&a->key + a->range.start; 761 const u8 *b_ = (const u8 *)&b->key + b->range.start; 762 763 return (a->range.end == b->range.end) 764 && (a->range.start == b->range.start) 765 && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); 766 } 767 768 static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, 769 const struct sw_flow_mask *mask) 770 { 771 struct mask_array *ma; 772 int i; 773 774 ma = ovsl_dereference(tbl->mask_array); 775 for (i = 0; i < ma->max; i++) { 776 struct sw_flow_mask *t; 777 t = ovsl_dereference(ma->masks[i]); 778 779 if (t && mask_equal(mask, t)) 780 return t; 781 } 782 783 return NULL; 784 } 785 786 /* Add 'mask' into the mask list, if it is not already there. */ 787 static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, 788 const struct sw_flow_mask *new) 789 { 790 struct sw_flow_mask *mask; 791 792 mask = flow_mask_find(tbl, new); 793 if (!mask) { 794 struct mask_array *ma; 795 int i; 796 797 /* Allocate a new mask if none exsits. */ 798 mask = mask_alloc(); 799 if (!mask) 800 return -ENOMEM; 801 mask->key = new->key; 802 mask->range = new->range; 803 804 /* Add mask to mask-list. */ 805 ma = ovsl_dereference(tbl->mask_array); 806 if (ma->count >= ma->max) { 807 int err; 808 809 err = tbl_mask_array_realloc(tbl, ma->max + 810 MASK_ARRAY_SIZE_MIN); 811 if (err) { 812 kfree(mask); 813 return err; 814 } 815 816 ma = ovsl_dereference(tbl->mask_array); 817 } 818 819 for (i = 0; i < ma->max; i++) { 820 const struct sw_flow_mask *t; 821 822 t = ovsl_dereference(ma->masks[i]); 823 if (!t) { 824 rcu_assign_pointer(ma->masks[i], mask); 825 ma->count++; 826 break; 827 } 828 } 829 } else { 830 BUG_ON(!mask->ref_count); 831 mask->ref_count++; 832 } 833 834 flow->mask = mask; 835 return 0; 836 } 837 838 /* Must be called with OVS mutex held. */ 839 static void flow_key_insert(struct flow_table *table, struct sw_flow *flow) 840 { 841 struct table_instance *new_ti = NULL; 842 struct table_instance *ti; 843 844 flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range); 845 ti = ovsl_dereference(table->ti); 846 table_instance_insert(ti, flow); 847 table->count++; 848 849 /* Expand table, if necessary, to make room. */ 850 if (table->count > ti->n_buckets) 851 new_ti = table_instance_expand(ti, false); 852 else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) 853 new_ti = table_instance_rehash(ti, ti->n_buckets, false); 854 855 if (new_ti) { 856 rcu_assign_pointer(table->ti, new_ti); 857 call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); 858 table->last_rehash = jiffies; 859 } 860 } 861 862 /* Must be called with OVS mutex held. */ 863 static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow) 864 { 865 struct table_instance *ti; 866 867 flow->ufid_table.hash = ufid_hash(&flow->id); 868 ti = ovsl_dereference(table->ufid_ti); 869 ufid_table_instance_insert(ti, flow); 870 table->ufid_count++; 871 872 /* Expand table, if necessary, to make room. */ 873 if (table->ufid_count > ti->n_buckets) { 874 struct table_instance *new_ti; 875 876 new_ti = table_instance_expand(ti, true); 877 if (new_ti) { 878 rcu_assign_pointer(table->ufid_ti, new_ti); 879 call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); 880 } 881 } 882 } 883 884 /* Must be called with OVS mutex held. */ 885 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 886 const struct sw_flow_mask *mask) 887 { 888 int err; 889 890 err = flow_mask_insert(table, flow, mask); 891 if (err) 892 return err; 893 flow_key_insert(table, flow); 894 if (ovs_identifier_is_ufid(&flow->id)) 895 flow_ufid_insert(table, flow); 896 897 return 0; 898 } 899 900 /* Initializes the flow module. 901 * Returns zero if successful or a negative error code. */ 902 int ovs_flow_init(void) 903 { 904 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); 905 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); 906 907 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) 908 + (nr_cpu_ids 909 * sizeof(struct sw_flow_stats *)), 910 0, 0, NULL); 911 if (flow_cache == NULL) 912 return -ENOMEM; 913 914 flow_stats_cache 915 = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats), 916 0, SLAB_HWCACHE_ALIGN, NULL); 917 if (flow_stats_cache == NULL) { 918 kmem_cache_destroy(flow_cache); 919 flow_cache = NULL; 920 return -ENOMEM; 921 } 922 923 return 0; 924 } 925 926 /* Uninitializes the flow module. */ 927 void ovs_flow_exit(void) 928 { 929 kmem_cache_destroy(flow_stats_cache); 930 kmem_cache_destroy(flow_cache); 931 } 932