1 /* 2 * Copyright (c) 2007-2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #include "flow.h" 20 #include "datapath.h" 21 #include <linux/uaccess.h> 22 #include <linux/netdevice.h> 23 #include <linux/etherdevice.h> 24 #include <linux/if_ether.h> 25 #include <linux/if_vlan.h> 26 #include <net/llc_pdu.h> 27 #include <linux/kernel.h> 28 #include <linux/jhash.h> 29 #include <linux/jiffies.h> 30 #include <linux/llc.h> 31 #include <linux/module.h> 32 #include <linux/in.h> 33 #include <linux/rcupdate.h> 34 #include <linux/if_arp.h> 35 #include <linux/ip.h> 36 #include <linux/ipv6.h> 37 #include <linux/sctp.h> 38 #include <linux/tcp.h> 39 #include <linux/udp.h> 40 #include <linux/icmp.h> 41 #include <linux/icmpv6.h> 42 #include <linux/rculist.h> 43 #include <net/ip.h> 44 #include <net/ip_tunnels.h> 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 48 static struct kmem_cache *flow_cache; 49 50 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, 51 struct sw_flow_key_range *range, u8 val); 52 53 static void update_range__(struct sw_flow_match *match, 54 size_t offset, size_t size, bool is_mask) 55 { 56 struct sw_flow_key_range *range = NULL; 57 size_t start = rounddown(offset, sizeof(long)); 58 size_t end = roundup(offset + size, sizeof(long)); 59 60 if (!is_mask) 61 range = &match->range; 62 else if (match->mask) 63 range = &match->mask->range; 64 65 if (!range) 66 return; 67 68 if (range->start == range->end) { 69 range->start = start; 70 range->end = end; 71 return; 72 } 73 74 if (range->start > start) 75 range->start = start; 76 77 if (range->end < end) 78 range->end = end; 79 } 80 81 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 82 do { \ 83 update_range__(match, offsetof(struct sw_flow_key, field), \ 84 sizeof((match)->key->field), is_mask); \ 85 if (is_mask) { \ 86 if ((match)->mask) \ 87 (match)->mask->key.field = value; \ 88 } else { \ 89 (match)->key->field = value; \ 90 } \ 91 } while (0) 92 93 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 94 do { \ 95 update_range__(match, offsetof(struct sw_flow_key, field), \ 96 len, is_mask); \ 97 if (is_mask) { \ 98 if ((match)->mask) \ 99 memcpy(&(match)->mask->key.field, value_p, len);\ 100 } else { \ 101 memcpy(&(match)->key->field, value_p, len); \ 102 } \ 103 } while (0) 104 105 static u16 range_n_bytes(const struct sw_flow_key_range *range) 106 { 107 return range->end - range->start; 108 } 109 110 void ovs_match_init(struct sw_flow_match *match, 111 struct sw_flow_key *key, 112 struct sw_flow_mask *mask) 113 { 114 memset(match, 0, sizeof(*match)); 115 match->key = key; 116 match->mask = mask; 117 118 memset(key, 0, sizeof(*key)); 119 120 if (mask) { 121 memset(&mask->key, 0, sizeof(mask->key)); 122 mask->range.start = mask->range.end = 0; 123 } 124 } 125 126 static bool ovs_match_validate(const struct sw_flow_match *match, 127 u64 key_attrs, u64 mask_attrs) 128 { 129 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 130 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 131 132 /* The following mask attributes allowed only if they 133 * pass the validation tests. */ 134 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 135 | (1 << OVS_KEY_ATTR_IPV6) 136 | (1 << OVS_KEY_ATTR_TCP) 137 | (1 << OVS_KEY_ATTR_UDP) 138 | (1 << OVS_KEY_ATTR_SCTP) 139 | (1 << OVS_KEY_ATTR_ICMP) 140 | (1 << OVS_KEY_ATTR_ICMPV6) 141 | (1 << OVS_KEY_ATTR_ARP) 142 | (1 << OVS_KEY_ATTR_ND)); 143 144 /* Always allowed mask fields. */ 145 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 146 | (1 << OVS_KEY_ATTR_IN_PORT) 147 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 148 149 /* Check key attributes. */ 150 if (match->key->eth.type == htons(ETH_P_ARP) 151 || match->key->eth.type == htons(ETH_P_RARP)) { 152 key_expected |= 1 << OVS_KEY_ATTR_ARP; 153 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 154 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 155 } 156 157 if (match->key->eth.type == htons(ETH_P_IP)) { 158 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 159 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 160 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 161 162 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 163 if (match->key->ip.proto == IPPROTO_UDP) { 164 key_expected |= 1 << OVS_KEY_ATTR_UDP; 165 if (match->mask && (match->mask->key.ip.proto == 0xff)) 166 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 167 } 168 169 if (match->key->ip.proto == IPPROTO_SCTP) { 170 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 171 if (match->mask && (match->mask->key.ip.proto == 0xff)) 172 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 173 } 174 175 if (match->key->ip.proto == IPPROTO_TCP) { 176 key_expected |= 1 << OVS_KEY_ATTR_TCP; 177 if (match->mask && (match->mask->key.ip.proto == 0xff)) 178 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 179 } 180 181 if (match->key->ip.proto == IPPROTO_ICMP) { 182 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 183 if (match->mask && (match->mask->key.ip.proto == 0xff)) 184 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 185 } 186 } 187 } 188 189 if (match->key->eth.type == htons(ETH_P_IPV6)) { 190 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 191 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 192 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 193 194 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 195 if (match->key->ip.proto == IPPROTO_UDP) { 196 key_expected |= 1 << OVS_KEY_ATTR_UDP; 197 if (match->mask && (match->mask->key.ip.proto == 0xff)) 198 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 199 } 200 201 if (match->key->ip.proto == IPPROTO_SCTP) { 202 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 203 if (match->mask && (match->mask->key.ip.proto == 0xff)) 204 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 205 } 206 207 if (match->key->ip.proto == IPPROTO_TCP) { 208 key_expected |= 1 << OVS_KEY_ATTR_TCP; 209 if (match->mask && (match->mask->key.ip.proto == 0xff)) 210 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 211 } 212 213 if (match->key->ip.proto == IPPROTO_ICMPV6) { 214 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 215 if (match->mask && (match->mask->key.ip.proto == 0xff)) 216 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 217 218 if (match->key->ipv6.tp.src == 219 htons(NDISC_NEIGHBOUR_SOLICITATION) || 220 match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 221 key_expected |= 1 << OVS_KEY_ATTR_ND; 222 if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) 223 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 224 } 225 } 226 } 227 } 228 229 if ((key_attrs & key_expected) != key_expected) { 230 /* Key attributes check failed. */ 231 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", 232 key_attrs, key_expected); 233 return false; 234 } 235 236 if ((mask_attrs & mask_allowed) != mask_attrs) { 237 /* Mask attributes check failed. */ 238 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", 239 mask_attrs, mask_allowed); 240 return false; 241 } 242 243 return true; 244 } 245 246 static int check_header(struct sk_buff *skb, int len) 247 { 248 if (unlikely(skb->len < len)) 249 return -EINVAL; 250 if (unlikely(!pskb_may_pull(skb, len))) 251 return -ENOMEM; 252 return 0; 253 } 254 255 static bool arphdr_ok(struct sk_buff *skb) 256 { 257 return pskb_may_pull(skb, skb_network_offset(skb) + 258 sizeof(struct arp_eth_header)); 259 } 260 261 static int check_iphdr(struct sk_buff *skb) 262 { 263 unsigned int nh_ofs = skb_network_offset(skb); 264 unsigned int ip_len; 265 int err; 266 267 err = check_header(skb, nh_ofs + sizeof(struct iphdr)); 268 if (unlikely(err)) 269 return err; 270 271 ip_len = ip_hdrlen(skb); 272 if (unlikely(ip_len < sizeof(struct iphdr) || 273 skb->len < nh_ofs + ip_len)) 274 return -EINVAL; 275 276 skb_set_transport_header(skb, nh_ofs + ip_len); 277 return 0; 278 } 279 280 static bool tcphdr_ok(struct sk_buff *skb) 281 { 282 int th_ofs = skb_transport_offset(skb); 283 int tcp_len; 284 285 if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr)))) 286 return false; 287 288 tcp_len = tcp_hdrlen(skb); 289 if (unlikely(tcp_len < sizeof(struct tcphdr) || 290 skb->len < th_ofs + tcp_len)) 291 return false; 292 293 return true; 294 } 295 296 static bool udphdr_ok(struct sk_buff *skb) 297 { 298 return pskb_may_pull(skb, skb_transport_offset(skb) + 299 sizeof(struct udphdr)); 300 } 301 302 static bool sctphdr_ok(struct sk_buff *skb) 303 { 304 return pskb_may_pull(skb, skb_transport_offset(skb) + 305 sizeof(struct sctphdr)); 306 } 307 308 static bool icmphdr_ok(struct sk_buff *skb) 309 { 310 return pskb_may_pull(skb, skb_transport_offset(skb) + 311 sizeof(struct icmphdr)); 312 } 313 314 u64 ovs_flow_used_time(unsigned long flow_jiffies) 315 { 316 struct timespec cur_ts; 317 u64 cur_ms, idle_ms; 318 319 ktime_get_ts(&cur_ts); 320 idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); 321 cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + 322 cur_ts.tv_nsec / NSEC_PER_MSEC; 323 324 return cur_ms - idle_ms; 325 } 326 327 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) 328 { 329 unsigned int nh_ofs = skb_network_offset(skb); 330 unsigned int nh_len; 331 int payload_ofs; 332 struct ipv6hdr *nh; 333 uint8_t nexthdr; 334 __be16 frag_off; 335 int err; 336 337 err = check_header(skb, nh_ofs + sizeof(*nh)); 338 if (unlikely(err)) 339 return err; 340 341 nh = ipv6_hdr(skb); 342 nexthdr = nh->nexthdr; 343 payload_ofs = (u8 *)(nh + 1) - skb->data; 344 345 key->ip.proto = NEXTHDR_NONE; 346 key->ip.tos = ipv6_get_dsfield(nh); 347 key->ip.ttl = nh->hop_limit; 348 key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); 349 key->ipv6.addr.src = nh->saddr; 350 key->ipv6.addr.dst = nh->daddr; 351 352 payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off); 353 if (unlikely(payload_ofs < 0)) 354 return -EINVAL; 355 356 if (frag_off) { 357 if (frag_off & htons(~0x7)) 358 key->ip.frag = OVS_FRAG_TYPE_LATER; 359 else 360 key->ip.frag = OVS_FRAG_TYPE_FIRST; 361 } 362 363 nh_len = payload_ofs - nh_ofs; 364 skb_set_transport_header(skb, nh_ofs + nh_len); 365 key->ip.proto = nexthdr; 366 return nh_len; 367 } 368 369 static bool icmp6hdr_ok(struct sk_buff *skb) 370 { 371 return pskb_may_pull(skb, skb_transport_offset(skb) + 372 sizeof(struct icmp6hdr)); 373 } 374 375 void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, 376 const struct sw_flow_mask *mask) 377 { 378 const long *m = (long *)((u8 *)&mask->key + mask->range.start); 379 const long *s = (long *)((u8 *)src + mask->range.start); 380 long *d = (long *)((u8 *)dst + mask->range.start); 381 int i; 382 383 /* The memory outside of the 'mask->range' are not set since 384 * further operations on 'dst' only uses contents within 385 * 'mask->range'. 386 */ 387 for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) 388 *d++ = *s++ & *m++; 389 } 390 391 #define TCP_FLAGS_OFFSET 13 392 #define TCP_FLAG_MASK 0x3f 393 394 void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) 395 { 396 u8 tcp_flags = 0; 397 398 if ((flow->key.eth.type == htons(ETH_P_IP) || 399 flow->key.eth.type == htons(ETH_P_IPV6)) && 400 flow->key.ip.proto == IPPROTO_TCP && 401 likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { 402 u8 *tcp = (u8 *)tcp_hdr(skb); 403 tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; 404 } 405 406 spin_lock(&flow->lock); 407 flow->used = jiffies; 408 flow->packet_count++; 409 flow->byte_count += skb->len; 410 flow->tcp_flags |= tcp_flags; 411 spin_unlock(&flow->lock); 412 } 413 414 struct sw_flow_actions *ovs_flow_actions_alloc(int size) 415 { 416 struct sw_flow_actions *sfa; 417 418 if (size > MAX_ACTIONS_BUFSIZE) 419 return ERR_PTR(-EINVAL); 420 421 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 422 if (!sfa) 423 return ERR_PTR(-ENOMEM); 424 425 sfa->actions_len = 0; 426 return sfa; 427 } 428 429 struct sw_flow *ovs_flow_alloc(void) 430 { 431 struct sw_flow *flow; 432 433 flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); 434 if (!flow) 435 return ERR_PTR(-ENOMEM); 436 437 spin_lock_init(&flow->lock); 438 flow->sf_acts = NULL; 439 flow->mask = NULL; 440 441 return flow; 442 } 443 444 static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) 445 { 446 hash = jhash_1word(hash, table->hash_seed); 447 return flex_array_get(table->buckets, 448 (hash & (table->n_buckets - 1))); 449 } 450 451 static struct flex_array *alloc_buckets(unsigned int n_buckets) 452 { 453 struct flex_array *buckets; 454 int i, err; 455 456 buckets = flex_array_alloc(sizeof(struct hlist_head), 457 n_buckets, GFP_KERNEL); 458 if (!buckets) 459 return NULL; 460 461 err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); 462 if (err) { 463 flex_array_free(buckets); 464 return NULL; 465 } 466 467 for (i = 0; i < n_buckets; i++) 468 INIT_HLIST_HEAD((struct hlist_head *) 469 flex_array_get(buckets, i)); 470 471 return buckets; 472 } 473 474 static void free_buckets(struct flex_array *buckets) 475 { 476 flex_array_free(buckets); 477 } 478 479 static struct flow_table *__flow_tbl_alloc(int new_size) 480 { 481 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); 482 483 if (!table) 484 return NULL; 485 486 table->buckets = alloc_buckets(new_size); 487 488 if (!table->buckets) { 489 kfree(table); 490 return NULL; 491 } 492 table->n_buckets = new_size; 493 table->count = 0; 494 table->node_ver = 0; 495 table->keep_flows = false; 496 get_random_bytes(&table->hash_seed, sizeof(u32)); 497 table->mask_list = NULL; 498 499 return table; 500 } 501 502 static void __flow_tbl_destroy(struct flow_table *table) 503 { 504 int i; 505 506 if (table->keep_flows) 507 goto skip_flows; 508 509 for (i = 0; i < table->n_buckets; i++) { 510 struct sw_flow *flow; 511 struct hlist_head *head = flex_array_get(table->buckets, i); 512 struct hlist_node *n; 513 int ver = table->node_ver; 514 515 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 516 hlist_del(&flow->hash_node[ver]); 517 ovs_flow_free(flow, false); 518 } 519 } 520 521 BUG_ON(!list_empty(table->mask_list)); 522 kfree(table->mask_list); 523 524 skip_flows: 525 free_buckets(table->buckets); 526 kfree(table); 527 } 528 529 struct flow_table *ovs_flow_tbl_alloc(int new_size) 530 { 531 struct flow_table *table = __flow_tbl_alloc(new_size); 532 533 if (!table) 534 return NULL; 535 536 table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); 537 if (!table->mask_list) { 538 table->keep_flows = true; 539 __flow_tbl_destroy(table); 540 return NULL; 541 } 542 INIT_LIST_HEAD(table->mask_list); 543 544 return table; 545 } 546 547 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 548 { 549 struct flow_table *table = container_of(rcu, struct flow_table, rcu); 550 551 __flow_tbl_destroy(table); 552 } 553 554 void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) 555 { 556 if (!table) 557 return; 558 559 if (deferred) 560 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); 561 else 562 __flow_tbl_destroy(table); 563 } 564 565 struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last) 566 { 567 struct sw_flow *flow; 568 struct hlist_head *head; 569 int ver; 570 int i; 571 572 ver = table->node_ver; 573 while (*bucket < table->n_buckets) { 574 i = 0; 575 head = flex_array_get(table->buckets, *bucket); 576 hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { 577 if (i < *last) { 578 i++; 579 continue; 580 } 581 *last = i + 1; 582 return flow; 583 } 584 (*bucket)++; 585 *last = 0; 586 } 587 588 return NULL; 589 } 590 591 static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) 592 { 593 struct hlist_head *head; 594 595 head = find_bucket(table, flow->hash); 596 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); 597 598 table->count++; 599 } 600 601 static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) 602 { 603 int old_ver; 604 int i; 605 606 old_ver = old->node_ver; 607 new->node_ver = !old_ver; 608 609 /* Insert in new table. */ 610 for (i = 0; i < old->n_buckets; i++) { 611 struct sw_flow *flow; 612 struct hlist_head *head; 613 614 head = flex_array_get(old->buckets, i); 615 616 hlist_for_each_entry(flow, head, hash_node[old_ver]) 617 __tbl_insert(new, flow); 618 } 619 620 new->mask_list = old->mask_list; 621 old->keep_flows = true; 622 } 623 624 static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) 625 { 626 struct flow_table *new_table; 627 628 new_table = __flow_tbl_alloc(n_buckets); 629 if (!new_table) 630 return ERR_PTR(-ENOMEM); 631 632 flow_table_copy_flows(table, new_table); 633 634 return new_table; 635 } 636 637 struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) 638 { 639 return __flow_tbl_rehash(table, table->n_buckets); 640 } 641 642 struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) 643 { 644 return __flow_tbl_rehash(table, table->n_buckets * 2); 645 } 646 647 static void __flow_free(struct sw_flow *flow) 648 { 649 kfree((struct sf_flow_acts __force *)flow->sf_acts); 650 kmem_cache_free(flow_cache, flow); 651 } 652 653 static void rcu_free_flow_callback(struct rcu_head *rcu) 654 { 655 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 656 657 __flow_free(flow); 658 } 659 660 void ovs_flow_free(struct sw_flow *flow, bool deferred) 661 { 662 if (!flow) 663 return; 664 665 ovs_sw_flow_mask_del_ref(flow->mask, deferred); 666 667 if (deferred) 668 call_rcu(&flow->rcu, rcu_free_flow_callback); 669 else 670 __flow_free(flow); 671 } 672 673 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 674 * The caller must hold rcu_read_lock for this to be sensible. */ 675 void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) 676 { 677 kfree_rcu(sf_acts, rcu); 678 } 679 680 static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) 681 { 682 struct qtag_prefix { 683 __be16 eth_type; /* ETH_P_8021Q */ 684 __be16 tci; 685 }; 686 struct qtag_prefix *qp; 687 688 if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) 689 return 0; 690 691 if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + 692 sizeof(__be16)))) 693 return -ENOMEM; 694 695 qp = (struct qtag_prefix *) skb->data; 696 key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); 697 __skb_pull(skb, sizeof(struct qtag_prefix)); 698 699 return 0; 700 } 701 702 static __be16 parse_ethertype(struct sk_buff *skb) 703 { 704 struct llc_snap_hdr { 705 u8 dsap; /* Always 0xAA */ 706 u8 ssap; /* Always 0xAA */ 707 u8 ctrl; 708 u8 oui[3]; 709 __be16 ethertype; 710 }; 711 struct llc_snap_hdr *llc; 712 __be16 proto; 713 714 proto = *(__be16 *) skb->data; 715 __skb_pull(skb, sizeof(__be16)); 716 717 if (ntohs(proto) >= ETH_P_802_3_MIN) 718 return proto; 719 720 if (skb->len < sizeof(struct llc_snap_hdr)) 721 return htons(ETH_P_802_2); 722 723 if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr)))) 724 return htons(0); 725 726 llc = (struct llc_snap_hdr *) skb->data; 727 if (llc->dsap != LLC_SAP_SNAP || 728 llc->ssap != LLC_SAP_SNAP || 729 (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) 730 return htons(ETH_P_802_2); 731 732 __skb_pull(skb, sizeof(struct llc_snap_hdr)); 733 734 if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) 735 return llc->ethertype; 736 737 return htons(ETH_P_802_2); 738 } 739 740 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, 741 int nh_len) 742 { 743 struct icmp6hdr *icmp = icmp6_hdr(skb); 744 745 /* The ICMPv6 type and code fields use the 16-bit transport port 746 * fields, so we need to store them in 16-bit network byte order. 747 */ 748 key->ipv6.tp.src = htons(icmp->icmp6_type); 749 key->ipv6.tp.dst = htons(icmp->icmp6_code); 750 751 if (icmp->icmp6_code == 0 && 752 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || 753 icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) { 754 int icmp_len = skb->len - skb_transport_offset(skb); 755 struct nd_msg *nd; 756 int offset; 757 758 /* In order to process neighbor discovery options, we need the 759 * entire packet. 760 */ 761 if (unlikely(icmp_len < sizeof(*nd))) 762 return 0; 763 764 if (unlikely(skb_linearize(skb))) 765 return -ENOMEM; 766 767 nd = (struct nd_msg *)skb_transport_header(skb); 768 key->ipv6.nd.target = nd->target; 769 770 icmp_len -= sizeof(*nd); 771 offset = 0; 772 while (icmp_len >= 8) { 773 struct nd_opt_hdr *nd_opt = 774 (struct nd_opt_hdr *)(nd->opt + offset); 775 int opt_len = nd_opt->nd_opt_len * 8; 776 777 if (unlikely(!opt_len || opt_len > icmp_len)) 778 return 0; 779 780 /* Store the link layer address if the appropriate 781 * option is provided. It is considered an error if 782 * the same link layer option is specified twice. 783 */ 784 if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR 785 && opt_len == 8) { 786 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) 787 goto invalid; 788 memcpy(key->ipv6.nd.sll, 789 &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); 790 } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR 791 && opt_len == 8) { 792 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) 793 goto invalid; 794 memcpy(key->ipv6.nd.tll, 795 &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); 796 } 797 798 icmp_len -= opt_len; 799 offset += opt_len; 800 } 801 } 802 803 return 0; 804 805 invalid: 806 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); 807 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); 808 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); 809 810 return 0; 811 } 812 813 /** 814 * ovs_flow_extract - extracts a flow key from an Ethernet frame. 815 * @skb: sk_buff that contains the frame, with skb->data pointing to the 816 * Ethernet header 817 * @in_port: port number on which @skb was received. 818 * @key: output flow key 819 * 820 * The caller must ensure that skb->len >= ETH_HLEN. 821 * 822 * Returns 0 if successful, otherwise a negative errno value. 823 * 824 * Initializes @skb header pointers as follows: 825 * 826 * - skb->mac_header: the Ethernet header. 827 * 828 * - skb->network_header: just past the Ethernet header, or just past the 829 * VLAN header, to the first byte of the Ethernet payload. 830 * 831 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 832 * on output, then just past the IP header, if one is present and 833 * of a correct length, otherwise the same as skb->network_header. 834 * For other key->eth.type values it is left untouched. 835 */ 836 int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) 837 { 838 int error; 839 struct ethhdr *eth; 840 841 memset(key, 0, sizeof(*key)); 842 843 key->phy.priority = skb->priority; 844 if (OVS_CB(skb)->tun_key) 845 memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); 846 key->phy.in_port = in_port; 847 key->phy.skb_mark = skb->mark; 848 849 skb_reset_mac_header(skb); 850 851 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet 852 * header in the linear data area. 853 */ 854 eth = eth_hdr(skb); 855 memcpy(key->eth.src, eth->h_source, ETH_ALEN); 856 memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); 857 858 __skb_pull(skb, 2 * ETH_ALEN); 859 /* We are going to push all headers that we pull, so no need to 860 * update skb->csum here. 861 */ 862 863 if (vlan_tx_tag_present(skb)) 864 key->eth.tci = htons(skb->vlan_tci); 865 else if (eth->h_proto == htons(ETH_P_8021Q)) 866 if (unlikely(parse_vlan(skb, key))) 867 return -ENOMEM; 868 869 key->eth.type = parse_ethertype(skb); 870 if (unlikely(key->eth.type == htons(0))) 871 return -ENOMEM; 872 873 skb_reset_network_header(skb); 874 __skb_push(skb, skb->data - skb_mac_header(skb)); 875 876 /* Network layer. */ 877 if (key->eth.type == htons(ETH_P_IP)) { 878 struct iphdr *nh; 879 __be16 offset; 880 881 error = check_iphdr(skb); 882 if (unlikely(error)) { 883 if (error == -EINVAL) { 884 skb->transport_header = skb->network_header; 885 error = 0; 886 } 887 return error; 888 } 889 890 nh = ip_hdr(skb); 891 key->ipv4.addr.src = nh->saddr; 892 key->ipv4.addr.dst = nh->daddr; 893 894 key->ip.proto = nh->protocol; 895 key->ip.tos = nh->tos; 896 key->ip.ttl = nh->ttl; 897 898 offset = nh->frag_off & htons(IP_OFFSET); 899 if (offset) { 900 key->ip.frag = OVS_FRAG_TYPE_LATER; 901 return 0; 902 } 903 if (nh->frag_off & htons(IP_MF) || 904 skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 905 key->ip.frag = OVS_FRAG_TYPE_FIRST; 906 907 /* Transport layer. */ 908 if (key->ip.proto == IPPROTO_TCP) { 909 if (tcphdr_ok(skb)) { 910 struct tcphdr *tcp = tcp_hdr(skb); 911 key->ipv4.tp.src = tcp->source; 912 key->ipv4.tp.dst = tcp->dest; 913 } 914 } else if (key->ip.proto == IPPROTO_UDP) { 915 if (udphdr_ok(skb)) { 916 struct udphdr *udp = udp_hdr(skb); 917 key->ipv4.tp.src = udp->source; 918 key->ipv4.tp.dst = udp->dest; 919 } 920 } else if (key->ip.proto == IPPROTO_SCTP) { 921 if (sctphdr_ok(skb)) { 922 struct sctphdr *sctp = sctp_hdr(skb); 923 key->ipv4.tp.src = sctp->source; 924 key->ipv4.tp.dst = sctp->dest; 925 } 926 } else if (key->ip.proto == IPPROTO_ICMP) { 927 if (icmphdr_ok(skb)) { 928 struct icmphdr *icmp = icmp_hdr(skb); 929 /* The ICMP type and code fields use the 16-bit 930 * transport port fields, so we need to store 931 * them in 16-bit network byte order. */ 932 key->ipv4.tp.src = htons(icmp->type); 933 key->ipv4.tp.dst = htons(icmp->code); 934 } 935 } 936 937 } else if ((key->eth.type == htons(ETH_P_ARP) || 938 key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) { 939 struct arp_eth_header *arp; 940 941 arp = (struct arp_eth_header *)skb_network_header(skb); 942 943 if (arp->ar_hrd == htons(ARPHRD_ETHER) 944 && arp->ar_pro == htons(ETH_P_IP) 945 && arp->ar_hln == ETH_ALEN 946 && arp->ar_pln == 4) { 947 948 /* We only match on the lower 8 bits of the opcode. */ 949 if (ntohs(arp->ar_op) <= 0xff) 950 key->ip.proto = ntohs(arp->ar_op); 951 memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); 952 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); 953 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); 954 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); 955 } 956 } else if (key->eth.type == htons(ETH_P_IPV6)) { 957 int nh_len; /* IPv6 Header + Extensions */ 958 959 nh_len = parse_ipv6hdr(skb, key); 960 if (unlikely(nh_len < 0)) { 961 if (nh_len == -EINVAL) { 962 skb->transport_header = skb->network_header; 963 error = 0; 964 } else { 965 error = nh_len; 966 } 967 return error; 968 } 969 970 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 971 return 0; 972 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 973 key->ip.frag = OVS_FRAG_TYPE_FIRST; 974 975 /* Transport layer. */ 976 if (key->ip.proto == NEXTHDR_TCP) { 977 if (tcphdr_ok(skb)) { 978 struct tcphdr *tcp = tcp_hdr(skb); 979 key->ipv6.tp.src = tcp->source; 980 key->ipv6.tp.dst = tcp->dest; 981 } 982 } else if (key->ip.proto == NEXTHDR_UDP) { 983 if (udphdr_ok(skb)) { 984 struct udphdr *udp = udp_hdr(skb); 985 key->ipv6.tp.src = udp->source; 986 key->ipv6.tp.dst = udp->dest; 987 } 988 } else if (key->ip.proto == NEXTHDR_SCTP) { 989 if (sctphdr_ok(skb)) { 990 struct sctphdr *sctp = sctp_hdr(skb); 991 key->ipv6.tp.src = sctp->source; 992 key->ipv6.tp.dst = sctp->dest; 993 } 994 } else if (key->ip.proto == NEXTHDR_ICMP) { 995 if (icmp6hdr_ok(skb)) { 996 error = parse_icmpv6(skb, key, nh_len); 997 if (error) 998 return error; 999 } 1000 } 1001 } 1002 1003 return 0; 1004 } 1005 1006 static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, 1007 int key_end) 1008 { 1009 u32 *hash_key = (u32 *)((u8 *)key + key_start); 1010 int hash_u32s = (key_end - key_start) >> 2; 1011 1012 /* Make sure number of hash bytes are multiple of u32. */ 1013 BUILD_BUG_ON(sizeof(long) % sizeof(u32)); 1014 1015 return jhash2(hash_key, hash_u32s, 0); 1016 } 1017 1018 static int flow_key_start(const struct sw_flow_key *key) 1019 { 1020 if (key->tun_key.ipv4_dst) 1021 return 0; 1022 else 1023 return rounddown(offsetof(struct sw_flow_key, phy), 1024 sizeof(long)); 1025 } 1026 1027 static bool __cmp_key(const struct sw_flow_key *key1, 1028 const struct sw_flow_key *key2, int key_start, int key_end) 1029 { 1030 const long *cp1 = (long *)((u8 *)key1 + key_start); 1031 const long *cp2 = (long *)((u8 *)key2 + key_start); 1032 long diffs = 0; 1033 int i; 1034 1035 for (i = key_start; i < key_end; i += sizeof(long)) 1036 diffs |= *cp1++ ^ *cp2++; 1037 1038 return diffs == 0; 1039 } 1040 1041 static bool __flow_cmp_masked_key(const struct sw_flow *flow, 1042 const struct sw_flow_key *key, int key_start, int key_end) 1043 { 1044 return __cmp_key(&flow->key, key, key_start, key_end); 1045 } 1046 1047 static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, 1048 const struct sw_flow_key *key, int key_start, int key_end) 1049 { 1050 return __cmp_key(&flow->unmasked_key, key, key_start, key_end); 1051 } 1052 1053 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 1054 const struct sw_flow_key *key, int key_end) 1055 { 1056 int key_start; 1057 key_start = flow_key_start(key); 1058 1059 return __flow_cmp_unmasked_key(flow, key, key_start, key_end); 1060 1061 } 1062 1063 struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, 1064 struct sw_flow_match *match) 1065 { 1066 struct sw_flow_key *unmasked = match->key; 1067 int key_end = match->range.end; 1068 struct sw_flow *flow; 1069 1070 flow = ovs_flow_lookup(table, unmasked); 1071 if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) 1072 flow = NULL; 1073 1074 return flow; 1075 } 1076 1077 static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, 1078 const struct sw_flow_key *unmasked, 1079 struct sw_flow_mask *mask) 1080 { 1081 struct sw_flow *flow; 1082 struct hlist_head *head; 1083 int key_start = mask->range.start; 1084 int key_end = mask->range.end; 1085 u32 hash; 1086 struct sw_flow_key masked_key; 1087 1088 ovs_flow_key_mask(&masked_key, unmasked, mask); 1089 hash = ovs_flow_hash(&masked_key, key_start, key_end); 1090 head = find_bucket(table, hash); 1091 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 1092 if (flow->mask == mask && 1093 __flow_cmp_masked_key(flow, &masked_key, 1094 key_start, key_end)) 1095 return flow; 1096 } 1097 return NULL; 1098 } 1099 1100 struct sw_flow *ovs_flow_lookup(struct flow_table *tbl, 1101 const struct sw_flow_key *key) 1102 { 1103 struct sw_flow *flow = NULL; 1104 struct sw_flow_mask *mask; 1105 1106 list_for_each_entry_rcu(mask, tbl->mask_list, list) { 1107 flow = ovs_masked_flow_lookup(tbl, key, mask); 1108 if (flow) /* Found */ 1109 break; 1110 } 1111 1112 return flow; 1113 } 1114 1115 1116 void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow) 1117 { 1118 flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start, 1119 flow->mask->range.end); 1120 __tbl_insert(table, flow); 1121 } 1122 1123 void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow) 1124 { 1125 BUG_ON(table->count == 0); 1126 hlist_del_rcu(&flow->hash_node[table->node_ver]); 1127 table->count--; 1128 } 1129 1130 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 1131 const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 1132 [OVS_KEY_ATTR_ENCAP] = -1, 1133 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 1134 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 1135 [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 1136 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 1137 [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 1138 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 1139 [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 1140 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 1141 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 1142 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 1143 [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 1144 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 1145 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 1146 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 1147 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 1148 [OVS_KEY_ATTR_TUNNEL] = -1, 1149 }; 1150 1151 static bool is_all_zero(const u8 *fp, size_t size) 1152 { 1153 int i; 1154 1155 if (!fp) 1156 return false; 1157 1158 for (i = 0; i < size; i++) 1159 if (fp[i]) 1160 return false; 1161 1162 return true; 1163 } 1164 1165 static int __parse_flow_nlattrs(const struct nlattr *attr, 1166 const struct nlattr *a[], 1167 u64 *attrsp, bool nz) 1168 { 1169 const struct nlattr *nla; 1170 u32 attrs; 1171 int rem; 1172 1173 attrs = *attrsp; 1174 nla_for_each_nested(nla, attr, rem) { 1175 u16 type = nla_type(nla); 1176 int expected_len; 1177 1178 if (type > OVS_KEY_ATTR_MAX) { 1179 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", 1180 type, OVS_KEY_ATTR_MAX); 1181 } 1182 1183 if (attrs & (1 << type)) { 1184 OVS_NLERR("Duplicate key attribute (type %d).\n", type); 1185 return -EINVAL; 1186 } 1187 1188 expected_len = ovs_key_lens[type]; 1189 if (nla_len(nla) != expected_len && expected_len != -1) { 1190 OVS_NLERR("Key attribute has unexpected length (type=%d" 1191 ", length=%d, expected=%d).\n", type, 1192 nla_len(nla), expected_len); 1193 return -EINVAL; 1194 } 1195 1196 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 1197 attrs |= 1 << type; 1198 a[type] = nla; 1199 } 1200 } 1201 if (rem) { 1202 OVS_NLERR("Message has %d unknown bytes.\n", rem); 1203 return -EINVAL; 1204 } 1205 1206 *attrsp = attrs; 1207 return 0; 1208 } 1209 1210 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 1211 const struct nlattr *a[], u64 *attrsp) 1212 { 1213 return __parse_flow_nlattrs(attr, a, attrsp, true); 1214 } 1215 1216 static int parse_flow_nlattrs(const struct nlattr *attr, 1217 const struct nlattr *a[], u64 *attrsp) 1218 { 1219 return __parse_flow_nlattrs(attr, a, attrsp, false); 1220 } 1221 1222 int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 1223 struct sw_flow_match *match, bool is_mask) 1224 { 1225 struct nlattr *a; 1226 int rem; 1227 bool ttl = false; 1228 __be16 tun_flags = 0; 1229 1230 nla_for_each_nested(a, attr, rem) { 1231 int type = nla_type(a); 1232 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 1233 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 1234 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 1235 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 1236 [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 1237 [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 1238 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 1239 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 1240 }; 1241 1242 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 1243 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", 1244 type, OVS_TUNNEL_KEY_ATTR_MAX); 1245 return -EINVAL; 1246 } 1247 1248 if (ovs_tunnel_key_lens[type] != nla_len(a)) { 1249 OVS_NLERR("IPv4 tunnel attribute type has unexpected " 1250 " length (type=%d, length=%d, expected=%d).\n", 1251 type, nla_len(a), ovs_tunnel_key_lens[type]); 1252 return -EINVAL; 1253 } 1254 1255 switch (type) { 1256 case OVS_TUNNEL_KEY_ATTR_ID: 1257 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 1258 nla_get_be64(a), is_mask); 1259 tun_flags |= TUNNEL_KEY; 1260 break; 1261 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 1262 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 1263 nla_get_be32(a), is_mask); 1264 break; 1265 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 1266 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 1267 nla_get_be32(a), is_mask); 1268 break; 1269 case OVS_TUNNEL_KEY_ATTR_TOS: 1270 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 1271 nla_get_u8(a), is_mask); 1272 break; 1273 case OVS_TUNNEL_KEY_ATTR_TTL: 1274 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 1275 nla_get_u8(a), is_mask); 1276 ttl = true; 1277 break; 1278 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 1279 tun_flags |= TUNNEL_DONT_FRAGMENT; 1280 break; 1281 case OVS_TUNNEL_KEY_ATTR_CSUM: 1282 tun_flags |= TUNNEL_CSUM; 1283 break; 1284 default: 1285 return -EINVAL; 1286 } 1287 } 1288 1289 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 1290 1291 if (rem > 0) { 1292 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); 1293 return -EINVAL; 1294 } 1295 1296 if (!is_mask) { 1297 if (!match->key->tun_key.ipv4_dst) { 1298 OVS_NLERR("IPv4 tunnel destination address is zero.\n"); 1299 return -EINVAL; 1300 } 1301 1302 if (!ttl) { 1303 OVS_NLERR("IPv4 tunnel TTL not specified.\n"); 1304 return -EINVAL; 1305 } 1306 } 1307 1308 return 0; 1309 } 1310 1311 int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 1312 const struct ovs_key_ipv4_tunnel *tun_key, 1313 const struct ovs_key_ipv4_tunnel *output) 1314 { 1315 struct nlattr *nla; 1316 1317 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 1318 if (!nla) 1319 return -EMSGSIZE; 1320 1321 if (output->tun_flags & TUNNEL_KEY && 1322 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 1323 return -EMSGSIZE; 1324 if (output->ipv4_src && 1325 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 1326 return -EMSGSIZE; 1327 if (output->ipv4_dst && 1328 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 1329 return -EMSGSIZE; 1330 if (output->ipv4_tos && 1331 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 1332 return -EMSGSIZE; 1333 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 1334 return -EMSGSIZE; 1335 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 1336 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 1337 return -EMSGSIZE; 1338 if ((output->tun_flags & TUNNEL_CSUM) && 1339 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 1340 return -EMSGSIZE; 1341 1342 nla_nest_end(skb, nla); 1343 return 0; 1344 } 1345 1346 static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 1347 const struct nlattr **a, bool is_mask) 1348 { 1349 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1350 SW_FLOW_KEY_PUT(match, phy.priority, 1351 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 1352 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1353 } 1354 1355 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1356 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1357 1358 if (is_mask) 1359 in_port = 0xffffffff; /* Always exact match in_port. */ 1360 else if (in_port >= DP_MAX_PORTS) 1361 return -EINVAL; 1362 1363 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 1364 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1365 } else if (!is_mask) { 1366 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 1367 } 1368 1369 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1370 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1371 1372 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 1373 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1374 } 1375 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1376 if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 1377 is_mask)) 1378 return -EINVAL; 1379 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1380 } 1381 return 0; 1382 } 1383 1384 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 1385 const struct nlattr **a, bool is_mask) 1386 { 1387 int err; 1388 u64 orig_attrs = attrs; 1389 1390 err = metadata_from_nlattrs(match, &attrs, a, is_mask); 1391 if (err) 1392 return err; 1393 1394 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 1395 const struct ovs_key_ethernet *eth_key; 1396 1397 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1398 SW_FLOW_KEY_MEMCPY(match, eth.src, 1399 eth_key->eth_src, ETH_ALEN, is_mask); 1400 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1401 eth_key->eth_dst, ETH_ALEN, is_mask); 1402 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1403 } 1404 1405 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1406 __be16 tci; 1407 1408 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1409 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1410 if (is_mask) 1411 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); 1412 else 1413 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); 1414 1415 return -EINVAL; 1416 } 1417 1418 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 1419 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 1420 } else if (!is_mask) 1421 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 1422 1423 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1424 __be16 eth_type; 1425 1426 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1427 if (is_mask) { 1428 /* Always exact match EtherType. */ 1429 eth_type = htons(0xffff); 1430 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 1431 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", 1432 ntohs(eth_type), ETH_P_802_3_MIN); 1433 return -EINVAL; 1434 } 1435 1436 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 1437 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1438 } else if (!is_mask) { 1439 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1440 } 1441 1442 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1443 const struct ovs_key_ipv4 *ipv4_key; 1444 1445 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1446 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 1447 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", 1448 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 1449 return -EINVAL; 1450 } 1451 SW_FLOW_KEY_PUT(match, ip.proto, 1452 ipv4_key->ipv4_proto, is_mask); 1453 SW_FLOW_KEY_PUT(match, ip.tos, 1454 ipv4_key->ipv4_tos, is_mask); 1455 SW_FLOW_KEY_PUT(match, ip.ttl, 1456 ipv4_key->ipv4_ttl, is_mask); 1457 SW_FLOW_KEY_PUT(match, ip.frag, 1458 ipv4_key->ipv4_frag, is_mask); 1459 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1460 ipv4_key->ipv4_src, is_mask); 1461 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1462 ipv4_key->ipv4_dst, is_mask); 1463 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 1464 } 1465 1466 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 1467 const struct ovs_key_ipv6 *ipv6_key; 1468 1469 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1470 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 1471 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", 1472 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 1473 return -EINVAL; 1474 } 1475 SW_FLOW_KEY_PUT(match, ipv6.label, 1476 ipv6_key->ipv6_label, is_mask); 1477 SW_FLOW_KEY_PUT(match, ip.proto, 1478 ipv6_key->ipv6_proto, is_mask); 1479 SW_FLOW_KEY_PUT(match, ip.tos, 1480 ipv6_key->ipv6_tclass, is_mask); 1481 SW_FLOW_KEY_PUT(match, ip.ttl, 1482 ipv6_key->ipv6_hlimit, is_mask); 1483 SW_FLOW_KEY_PUT(match, ip.frag, 1484 ipv6_key->ipv6_frag, is_mask); 1485 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 1486 ipv6_key->ipv6_src, 1487 sizeof(match->key->ipv6.addr.src), 1488 is_mask); 1489 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 1490 ipv6_key->ipv6_dst, 1491 sizeof(match->key->ipv6.addr.dst), 1492 is_mask); 1493 1494 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1495 } 1496 1497 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 1498 const struct ovs_key_arp *arp_key; 1499 1500 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1501 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 1502 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", 1503 arp_key->arp_op); 1504 return -EINVAL; 1505 } 1506 1507 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1508 arp_key->arp_sip, is_mask); 1509 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1510 arp_key->arp_tip, is_mask); 1511 SW_FLOW_KEY_PUT(match, ip.proto, 1512 ntohs(arp_key->arp_op), is_mask); 1513 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 1514 arp_key->arp_sha, ETH_ALEN, is_mask); 1515 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 1516 arp_key->arp_tha, ETH_ALEN, is_mask); 1517 1518 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1519 } 1520 1521 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 1522 const struct ovs_key_tcp *tcp_key; 1523 1524 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1525 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1526 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1527 tcp_key->tcp_src, is_mask); 1528 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1529 tcp_key->tcp_dst, is_mask); 1530 } else { 1531 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1532 tcp_key->tcp_src, is_mask); 1533 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1534 tcp_key->tcp_dst, is_mask); 1535 } 1536 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 1537 } 1538 1539 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 1540 const struct ovs_key_udp *udp_key; 1541 1542 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1543 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1544 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1545 udp_key->udp_src, is_mask); 1546 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1547 udp_key->udp_dst, is_mask); 1548 } else { 1549 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1550 udp_key->udp_src, is_mask); 1551 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1552 udp_key->udp_dst, is_mask); 1553 } 1554 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1555 } 1556 1557 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 1558 const struct ovs_key_sctp *sctp_key; 1559 1560 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 1561 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1562 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1563 sctp_key->sctp_src, is_mask); 1564 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1565 sctp_key->sctp_dst, is_mask); 1566 } else { 1567 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1568 sctp_key->sctp_src, is_mask); 1569 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1570 sctp_key->sctp_dst, is_mask); 1571 } 1572 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 1573 } 1574 1575 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 1576 const struct ovs_key_icmp *icmp_key; 1577 1578 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1579 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1580 htons(icmp_key->icmp_type), is_mask); 1581 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1582 htons(icmp_key->icmp_code), is_mask); 1583 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1584 } 1585 1586 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 1587 const struct ovs_key_icmpv6 *icmpv6_key; 1588 1589 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 1590 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1591 htons(icmpv6_key->icmpv6_type), is_mask); 1592 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1593 htons(icmpv6_key->icmpv6_code), is_mask); 1594 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1595 } 1596 1597 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 1598 const struct ovs_key_nd *nd_key; 1599 1600 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1601 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 1602 nd_key->nd_target, 1603 sizeof(match->key->ipv6.nd.target), 1604 is_mask); 1605 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 1606 nd_key->nd_sll, ETH_ALEN, is_mask); 1607 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 1608 nd_key->nd_tll, ETH_ALEN, is_mask); 1609 attrs &= ~(1 << OVS_KEY_ATTR_ND); 1610 } 1611 1612 if (attrs != 0) 1613 return -EINVAL; 1614 1615 return 0; 1616 } 1617 1618 /** 1619 * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and 1620 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1621 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1622 * does not include any don't care bit. 1623 * @match: receives the extracted flow match information. 1624 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1625 * sequence. The fields should of the packet that triggered the creation 1626 * of this flow. 1627 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1628 * attribute specifies the mask field of the wildcarded flow. 1629 */ 1630 int ovs_match_from_nlattrs(struct sw_flow_match *match, 1631 const struct nlattr *key, 1632 const struct nlattr *mask) 1633 { 1634 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1635 const struct nlattr *encap; 1636 u64 key_attrs = 0; 1637 u64 mask_attrs = 0; 1638 bool encap_valid = false; 1639 int err; 1640 1641 err = parse_flow_nlattrs(key, a, &key_attrs); 1642 if (err) 1643 return err; 1644 1645 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 1646 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 1647 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 1648 __be16 tci; 1649 1650 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 1651 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 1652 OVS_NLERR("Invalid Vlan frame.\n"); 1653 return -EINVAL; 1654 } 1655 1656 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1657 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1658 encap = a[OVS_KEY_ATTR_ENCAP]; 1659 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1660 encap_valid = true; 1661 1662 if (tci & htons(VLAN_TAG_PRESENT)) { 1663 err = parse_flow_nlattrs(encap, a, &key_attrs); 1664 if (err) 1665 return err; 1666 } else if (!tci) { 1667 /* Corner case for truncated 802.1Q header. */ 1668 if (nla_len(encap)) { 1669 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); 1670 return -EINVAL; 1671 } 1672 } else { 1673 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); 1674 return -EINVAL; 1675 } 1676 } 1677 1678 err = ovs_key_from_nlattrs(match, key_attrs, a, false); 1679 if (err) 1680 return err; 1681 1682 if (mask) { 1683 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); 1684 if (err) 1685 return err; 1686 1687 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { 1688 __be16 eth_type = 0; 1689 __be16 tci = 0; 1690 1691 if (!encap_valid) { 1692 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); 1693 return -EINVAL; 1694 } 1695 1696 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1697 if (a[OVS_KEY_ATTR_ETHERTYPE]) 1698 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1699 1700 if (eth_type == htons(0xffff)) { 1701 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1702 encap = a[OVS_KEY_ATTR_ENCAP]; 1703 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); 1704 } else { 1705 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", 1706 ntohs(eth_type)); 1707 return -EINVAL; 1708 } 1709 1710 if (a[OVS_KEY_ATTR_VLAN]) 1711 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1712 1713 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1714 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); 1715 return -EINVAL; 1716 } 1717 } 1718 1719 err = ovs_key_from_nlattrs(match, mask_attrs, a, true); 1720 if (err) 1721 return err; 1722 } else { 1723 /* Populate exact match flow's key mask. */ 1724 if (match->mask) 1725 ovs_sw_flow_mask_set(match->mask, &match->range, 0xff); 1726 } 1727 1728 if (!ovs_match_validate(match, key_attrs, mask_attrs)) 1729 return -EINVAL; 1730 1731 return 0; 1732 } 1733 1734 /** 1735 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1736 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 1737 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1738 * sequence. 1739 * 1740 * This parses a series of Netlink attributes that form a flow key, which must 1741 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1742 * get the metadata, that is, the parts of the flow key that cannot be 1743 * extracted from the packet itself. 1744 */ 1745 1746 int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, 1747 const struct nlattr *attr) 1748 { 1749 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 1750 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1751 u64 attrs = 0; 1752 int err; 1753 struct sw_flow_match match; 1754 1755 flow->key.phy.in_port = DP_MAX_PORTS; 1756 flow->key.phy.priority = 0; 1757 flow->key.phy.skb_mark = 0; 1758 memset(tun_key, 0, sizeof(flow->key.tun_key)); 1759 1760 err = parse_flow_nlattrs(attr, a, &attrs); 1761 if (err) 1762 return -EINVAL; 1763 1764 memset(&match, 0, sizeof(match)); 1765 match.key = &flow->key; 1766 1767 err = metadata_from_nlattrs(&match, &attrs, a, false); 1768 if (err) 1769 return err; 1770 1771 return 0; 1772 } 1773 1774 int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, 1775 const struct sw_flow_key *output, struct sk_buff *skb) 1776 { 1777 struct ovs_key_ethernet *eth_key; 1778 struct nlattr *nla, *encap; 1779 bool is_mask = (swkey != output); 1780 1781 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1782 goto nla_put_failure; 1783 1784 if ((swkey->tun_key.ipv4_dst || is_mask) && 1785 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) 1786 goto nla_put_failure; 1787 1788 if (swkey->phy.in_port == DP_MAX_PORTS) { 1789 if (is_mask && (output->phy.in_port == 0xffff)) 1790 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1791 goto nla_put_failure; 1792 } else { 1793 u16 upper_u16; 1794 upper_u16 = !is_mask ? 0 : 0xffff; 1795 1796 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1797 (upper_u16 << 16) | output->phy.in_port)) 1798 goto nla_put_failure; 1799 } 1800 1801 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1802 goto nla_put_failure; 1803 1804 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1805 if (!nla) 1806 goto nla_put_failure; 1807 1808 eth_key = nla_data(nla); 1809 memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); 1810 memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); 1811 1812 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1813 __be16 eth_type; 1814 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 1815 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1816 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 1817 goto nla_put_failure; 1818 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1819 if (!swkey->eth.tci) 1820 goto unencap; 1821 } else 1822 encap = NULL; 1823 1824 if (swkey->eth.type == htons(ETH_P_802_2)) { 1825 /* 1826 * Ethertype 802.2 is represented in the netlink with omitted 1827 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1828 * 0xffff in the mask attribute. Ethertype can also 1829 * be wildcarded. 1830 */ 1831 if (is_mask && output->eth.type) 1832 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1833 output->eth.type)) 1834 goto nla_put_failure; 1835 goto unencap; 1836 } 1837 1838 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1839 goto nla_put_failure; 1840 1841 if (swkey->eth.type == htons(ETH_P_IP)) { 1842 struct ovs_key_ipv4 *ipv4_key; 1843 1844 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1845 if (!nla) 1846 goto nla_put_failure; 1847 ipv4_key = nla_data(nla); 1848 ipv4_key->ipv4_src = output->ipv4.addr.src; 1849 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1850 ipv4_key->ipv4_proto = output->ip.proto; 1851 ipv4_key->ipv4_tos = output->ip.tos; 1852 ipv4_key->ipv4_ttl = output->ip.ttl; 1853 ipv4_key->ipv4_frag = output->ip.frag; 1854 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1855 struct ovs_key_ipv6 *ipv6_key; 1856 1857 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1858 if (!nla) 1859 goto nla_put_failure; 1860 ipv6_key = nla_data(nla); 1861 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1862 sizeof(ipv6_key->ipv6_src)); 1863 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1864 sizeof(ipv6_key->ipv6_dst)); 1865 ipv6_key->ipv6_label = output->ipv6.label; 1866 ipv6_key->ipv6_proto = output->ip.proto; 1867 ipv6_key->ipv6_tclass = output->ip.tos; 1868 ipv6_key->ipv6_hlimit = output->ip.ttl; 1869 ipv6_key->ipv6_frag = output->ip.frag; 1870 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1871 swkey->eth.type == htons(ETH_P_RARP)) { 1872 struct ovs_key_arp *arp_key; 1873 1874 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1875 if (!nla) 1876 goto nla_put_failure; 1877 arp_key = nla_data(nla); 1878 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1879 arp_key->arp_sip = output->ipv4.addr.src; 1880 arp_key->arp_tip = output->ipv4.addr.dst; 1881 arp_key->arp_op = htons(output->ip.proto); 1882 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); 1883 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); 1884 } 1885 1886 if ((swkey->eth.type == htons(ETH_P_IP) || 1887 swkey->eth.type == htons(ETH_P_IPV6)) && 1888 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1889 1890 if (swkey->ip.proto == IPPROTO_TCP) { 1891 struct ovs_key_tcp *tcp_key; 1892 1893 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1894 if (!nla) 1895 goto nla_put_failure; 1896 tcp_key = nla_data(nla); 1897 if (swkey->eth.type == htons(ETH_P_IP)) { 1898 tcp_key->tcp_src = output->ipv4.tp.src; 1899 tcp_key->tcp_dst = output->ipv4.tp.dst; 1900 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1901 tcp_key->tcp_src = output->ipv6.tp.src; 1902 tcp_key->tcp_dst = output->ipv6.tp.dst; 1903 } 1904 } else if (swkey->ip.proto == IPPROTO_UDP) { 1905 struct ovs_key_udp *udp_key; 1906 1907 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1908 if (!nla) 1909 goto nla_put_failure; 1910 udp_key = nla_data(nla); 1911 if (swkey->eth.type == htons(ETH_P_IP)) { 1912 udp_key->udp_src = output->ipv4.tp.src; 1913 udp_key->udp_dst = output->ipv4.tp.dst; 1914 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1915 udp_key->udp_src = output->ipv6.tp.src; 1916 udp_key->udp_dst = output->ipv6.tp.dst; 1917 } 1918 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1919 struct ovs_key_sctp *sctp_key; 1920 1921 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1922 if (!nla) 1923 goto nla_put_failure; 1924 sctp_key = nla_data(nla); 1925 if (swkey->eth.type == htons(ETH_P_IP)) { 1926 sctp_key->sctp_src = swkey->ipv4.tp.src; 1927 sctp_key->sctp_dst = swkey->ipv4.tp.dst; 1928 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1929 sctp_key->sctp_src = swkey->ipv6.tp.src; 1930 sctp_key->sctp_dst = swkey->ipv6.tp.dst; 1931 } 1932 } else if (swkey->eth.type == htons(ETH_P_IP) && 1933 swkey->ip.proto == IPPROTO_ICMP) { 1934 struct ovs_key_icmp *icmp_key; 1935 1936 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1937 if (!nla) 1938 goto nla_put_failure; 1939 icmp_key = nla_data(nla); 1940 icmp_key->icmp_type = ntohs(output->ipv4.tp.src); 1941 icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); 1942 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1943 swkey->ip.proto == IPPROTO_ICMPV6) { 1944 struct ovs_key_icmpv6 *icmpv6_key; 1945 1946 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1947 sizeof(*icmpv6_key)); 1948 if (!nla) 1949 goto nla_put_failure; 1950 icmpv6_key = nla_data(nla); 1951 icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); 1952 icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); 1953 1954 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1955 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1956 struct ovs_key_nd *nd_key; 1957 1958 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1959 if (!nla) 1960 goto nla_put_failure; 1961 nd_key = nla_data(nla); 1962 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1963 sizeof(nd_key->nd_target)); 1964 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); 1965 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); 1966 } 1967 } 1968 } 1969 1970 unencap: 1971 if (encap) 1972 nla_nest_end(skb, encap); 1973 1974 return 0; 1975 1976 nla_put_failure: 1977 return -EMSGSIZE; 1978 } 1979 1980 /* Initializes the flow module. 1981 * Returns zero if successful or a negative error code. */ 1982 int ovs_flow_init(void) 1983 { 1984 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); 1985 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); 1986 1987 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 1988 0, NULL); 1989 if (flow_cache == NULL) 1990 return -ENOMEM; 1991 1992 return 0; 1993 } 1994 1995 /* Uninitializes the flow module. */ 1996 void ovs_flow_exit(void) 1997 { 1998 kmem_cache_destroy(flow_cache); 1999 } 2000 2001 struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) 2002 { 2003 struct sw_flow_mask *mask; 2004 2005 mask = kmalloc(sizeof(*mask), GFP_KERNEL); 2006 if (mask) 2007 mask->ref_count = 0; 2008 2009 return mask; 2010 } 2011 2012 void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) 2013 { 2014 mask->ref_count++; 2015 } 2016 2017 void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) 2018 { 2019 if (!mask) 2020 return; 2021 2022 BUG_ON(!mask->ref_count); 2023 mask->ref_count--; 2024 2025 if (!mask->ref_count) { 2026 list_del_rcu(&mask->list); 2027 if (deferred) 2028 kfree_rcu(mask, rcu); 2029 else 2030 kfree(mask); 2031 } 2032 } 2033 2034 static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, 2035 const struct sw_flow_mask *b) 2036 { 2037 u8 *a_ = (u8 *)&a->key + a->range.start; 2038 u8 *b_ = (u8 *)&b->key + b->range.start; 2039 2040 return (a->range.end == b->range.end) 2041 && (a->range.start == b->range.start) 2042 && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); 2043 } 2044 2045 struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, 2046 const struct sw_flow_mask *mask) 2047 { 2048 struct list_head *ml; 2049 2050 list_for_each(ml, tbl->mask_list) { 2051 struct sw_flow_mask *m; 2052 m = container_of(ml, struct sw_flow_mask, list); 2053 if (ovs_sw_flow_mask_equal(mask, m)) 2054 return m; 2055 } 2056 2057 return NULL; 2058 } 2059 2060 /** 2061 * add a new mask into the mask list. 2062 * The caller needs to make sure that 'mask' is not the same 2063 * as any masks that are already on the list. 2064 */ 2065 void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) 2066 { 2067 list_add_rcu(&mask->list, tbl->mask_list); 2068 } 2069 2070 /** 2071 * Set 'range' fields in the mask to the value of 'val'. 2072 */ 2073 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, 2074 struct sw_flow_key_range *range, u8 val) 2075 { 2076 u8 *m = (u8 *)&mask->key + range->start; 2077 2078 mask->range = *range; 2079 memset(m, val, range_n_bytes(range)); 2080 } 2081