1 /* 2 * Copyright (c) 2007-2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #include "flow.h" 20 #include "datapath.h" 21 #include <linux/uaccess.h> 22 #include <linux/netdevice.h> 23 #include <linux/etherdevice.h> 24 #include <linux/if_ether.h> 25 #include <linux/if_vlan.h> 26 #include <net/llc_pdu.h> 27 #include <linux/kernel.h> 28 #include <linux/jhash.h> 29 #include <linux/jiffies.h> 30 #include <linux/llc.h> 31 #include <linux/module.h> 32 #include <linux/in.h> 33 #include <linux/rcupdate.h> 34 #include <linux/if_arp.h> 35 #include <linux/ip.h> 36 #include <linux/ipv6.h> 37 #include <linux/sctp.h> 38 #include <linux/tcp.h> 39 #include <linux/udp.h> 40 #include <linux/icmp.h> 41 #include <linux/icmpv6.h> 42 #include <linux/rculist.h> 43 #include <net/ip.h> 44 #include <net/ip_tunnels.h> 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 48 static struct kmem_cache *flow_cache; 49 50 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, 51 struct sw_flow_key_range *range, u8 val); 52 53 static void update_range__(struct sw_flow_match *match, 54 size_t offset, size_t size, bool is_mask) 55 { 56 struct sw_flow_key_range *range = NULL; 57 size_t start = rounddown(offset, sizeof(long)); 58 size_t end = roundup(offset + size, sizeof(long)); 59 60 if (!is_mask) 61 range = &match->range; 62 else if (match->mask) 63 range = &match->mask->range; 64 65 if (!range) 66 return; 67 68 if (range->start == range->end) { 69 range->start = start; 70 range->end = end; 71 return; 72 } 73 74 if (range->start > start) 75 range->start = start; 76 77 if (range->end < end) 78 range->end = end; 79 } 80 81 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 82 do { \ 83 update_range__(match, offsetof(struct sw_flow_key, field), \ 84 sizeof((match)->key->field), is_mask); \ 85 if (is_mask) { \ 86 if ((match)->mask) \ 87 (match)->mask->key.field = value; \ 88 } else { \ 89 (match)->key->field = value; \ 90 } \ 91 } while (0) 92 93 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 94 do { \ 95 update_range__(match, offsetof(struct sw_flow_key, field), \ 96 len, is_mask); \ 97 if (is_mask) { \ 98 if ((match)->mask) \ 99 memcpy(&(match)->mask->key.field, value_p, len);\ 100 } else { \ 101 memcpy(&(match)->key->field, value_p, len); \ 102 } \ 103 } while (0) 104 105 static u16 range_n_bytes(const struct sw_flow_key_range *range) 106 { 107 return range->end - range->start; 108 } 109 110 void ovs_match_init(struct sw_flow_match *match, 111 struct sw_flow_key *key, 112 struct sw_flow_mask *mask) 113 { 114 memset(match, 0, sizeof(*match)); 115 match->key = key; 116 match->mask = mask; 117 118 memset(key, 0, sizeof(*key)); 119 120 if (mask) { 121 memset(&mask->key, 0, sizeof(mask->key)); 122 mask->range.start = mask->range.end = 0; 123 } 124 } 125 126 static bool ovs_match_validate(const struct sw_flow_match *match, 127 u64 key_attrs, u64 mask_attrs) 128 { 129 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 130 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 131 132 /* The following mask attributes allowed only if they 133 * pass the validation tests. */ 134 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 135 | (1 << OVS_KEY_ATTR_IPV6) 136 | (1 << OVS_KEY_ATTR_TCP) 137 | (1 << OVS_KEY_ATTR_UDP) 138 | (1 << OVS_KEY_ATTR_SCTP) 139 | (1 << OVS_KEY_ATTR_ICMP) 140 | (1 << OVS_KEY_ATTR_ICMPV6) 141 | (1 << OVS_KEY_ATTR_ARP) 142 | (1 << OVS_KEY_ATTR_ND)); 143 144 /* Always allowed mask fields. */ 145 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 146 | (1 << OVS_KEY_ATTR_IN_PORT) 147 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 148 149 /* Check key attributes. */ 150 if (match->key->eth.type == htons(ETH_P_ARP) 151 || match->key->eth.type == htons(ETH_P_RARP)) { 152 key_expected |= 1 << OVS_KEY_ATTR_ARP; 153 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 154 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 155 } 156 157 if (match->key->eth.type == htons(ETH_P_IP)) { 158 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 159 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 160 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 161 162 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 163 if (match->key->ip.proto == IPPROTO_UDP) { 164 key_expected |= 1 << OVS_KEY_ATTR_UDP; 165 if (match->mask && (match->mask->key.ip.proto == 0xff)) 166 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 167 } 168 169 if (match->key->ip.proto == IPPROTO_SCTP) { 170 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 171 if (match->mask && (match->mask->key.ip.proto == 0xff)) 172 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 173 } 174 175 if (match->key->ip.proto == IPPROTO_TCP) { 176 key_expected |= 1 << OVS_KEY_ATTR_TCP; 177 if (match->mask && (match->mask->key.ip.proto == 0xff)) 178 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 179 } 180 181 if (match->key->ip.proto == IPPROTO_ICMP) { 182 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 183 if (match->mask && (match->mask->key.ip.proto == 0xff)) 184 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 185 } 186 } 187 } 188 189 if (match->key->eth.type == htons(ETH_P_IPV6)) { 190 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 191 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 192 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 193 194 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 195 if (match->key->ip.proto == IPPROTO_UDP) { 196 key_expected |= 1 << OVS_KEY_ATTR_UDP; 197 if (match->mask && (match->mask->key.ip.proto == 0xff)) 198 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 199 } 200 201 if (match->key->ip.proto == IPPROTO_SCTP) { 202 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 203 if (match->mask && (match->mask->key.ip.proto == 0xff)) 204 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 205 } 206 207 if (match->key->ip.proto == IPPROTO_TCP) { 208 key_expected |= 1 << OVS_KEY_ATTR_TCP; 209 if (match->mask && (match->mask->key.ip.proto == 0xff)) 210 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 211 } 212 213 if (match->key->ip.proto == IPPROTO_ICMPV6) { 214 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 215 if (match->mask && (match->mask->key.ip.proto == 0xff)) 216 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 217 218 if (match->key->ipv6.tp.src == 219 htons(NDISC_NEIGHBOUR_SOLICITATION) || 220 match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 221 key_expected |= 1 << OVS_KEY_ATTR_ND; 222 if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) 223 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 224 } 225 } 226 } 227 } 228 229 if ((key_attrs & key_expected) != key_expected) { 230 /* Key attributes check failed. */ 231 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", 232 key_attrs, key_expected); 233 return false; 234 } 235 236 if ((mask_attrs & mask_allowed) != mask_attrs) { 237 /* Mask attributes check failed. */ 238 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", 239 mask_attrs, mask_allowed); 240 return false; 241 } 242 243 return true; 244 } 245 246 static int check_header(struct sk_buff *skb, int len) 247 { 248 if (unlikely(skb->len < len)) 249 return -EINVAL; 250 if (unlikely(!pskb_may_pull(skb, len))) 251 return -ENOMEM; 252 return 0; 253 } 254 255 static bool arphdr_ok(struct sk_buff *skb) 256 { 257 return pskb_may_pull(skb, skb_network_offset(skb) + 258 sizeof(struct arp_eth_header)); 259 } 260 261 static int check_iphdr(struct sk_buff *skb) 262 { 263 unsigned int nh_ofs = skb_network_offset(skb); 264 unsigned int ip_len; 265 int err; 266 267 err = check_header(skb, nh_ofs + sizeof(struct iphdr)); 268 if (unlikely(err)) 269 return err; 270 271 ip_len = ip_hdrlen(skb); 272 if (unlikely(ip_len < sizeof(struct iphdr) || 273 skb->len < nh_ofs + ip_len)) 274 return -EINVAL; 275 276 skb_set_transport_header(skb, nh_ofs + ip_len); 277 return 0; 278 } 279 280 static bool tcphdr_ok(struct sk_buff *skb) 281 { 282 int th_ofs = skb_transport_offset(skb); 283 int tcp_len; 284 285 if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr)))) 286 return false; 287 288 tcp_len = tcp_hdrlen(skb); 289 if (unlikely(tcp_len < sizeof(struct tcphdr) || 290 skb->len < th_ofs + tcp_len)) 291 return false; 292 293 return true; 294 } 295 296 static bool udphdr_ok(struct sk_buff *skb) 297 { 298 return pskb_may_pull(skb, skb_transport_offset(skb) + 299 sizeof(struct udphdr)); 300 } 301 302 static bool sctphdr_ok(struct sk_buff *skb) 303 { 304 return pskb_may_pull(skb, skb_transport_offset(skb) + 305 sizeof(struct sctphdr)); 306 } 307 308 static bool icmphdr_ok(struct sk_buff *skb) 309 { 310 return pskb_may_pull(skb, skb_transport_offset(skb) + 311 sizeof(struct icmphdr)); 312 } 313 314 u64 ovs_flow_used_time(unsigned long flow_jiffies) 315 { 316 struct timespec cur_ts; 317 u64 cur_ms, idle_ms; 318 319 ktime_get_ts(&cur_ts); 320 idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); 321 cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + 322 cur_ts.tv_nsec / NSEC_PER_MSEC; 323 324 return cur_ms - idle_ms; 325 } 326 327 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) 328 { 329 unsigned int nh_ofs = skb_network_offset(skb); 330 unsigned int nh_len; 331 int payload_ofs; 332 struct ipv6hdr *nh; 333 uint8_t nexthdr; 334 __be16 frag_off; 335 int err; 336 337 err = check_header(skb, nh_ofs + sizeof(*nh)); 338 if (unlikely(err)) 339 return err; 340 341 nh = ipv6_hdr(skb); 342 nexthdr = nh->nexthdr; 343 payload_ofs = (u8 *)(nh + 1) - skb->data; 344 345 key->ip.proto = NEXTHDR_NONE; 346 key->ip.tos = ipv6_get_dsfield(nh); 347 key->ip.ttl = nh->hop_limit; 348 key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); 349 key->ipv6.addr.src = nh->saddr; 350 key->ipv6.addr.dst = nh->daddr; 351 352 payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off); 353 if (unlikely(payload_ofs < 0)) 354 return -EINVAL; 355 356 if (frag_off) { 357 if (frag_off & htons(~0x7)) 358 key->ip.frag = OVS_FRAG_TYPE_LATER; 359 else 360 key->ip.frag = OVS_FRAG_TYPE_FIRST; 361 } 362 363 nh_len = payload_ofs - nh_ofs; 364 skb_set_transport_header(skb, nh_ofs + nh_len); 365 key->ip.proto = nexthdr; 366 return nh_len; 367 } 368 369 static bool icmp6hdr_ok(struct sk_buff *skb) 370 { 371 return pskb_may_pull(skb, skb_transport_offset(skb) + 372 sizeof(struct icmp6hdr)); 373 } 374 375 void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, 376 const struct sw_flow_mask *mask) 377 { 378 const long *m = (long *)((u8 *)&mask->key + mask->range.start); 379 const long *s = (long *)((u8 *)src + mask->range.start); 380 long *d = (long *)((u8 *)dst + mask->range.start); 381 int i; 382 383 /* The memory outside of the 'mask->range' are not set since 384 * further operations on 'dst' only uses contents within 385 * 'mask->range'. 386 */ 387 for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) 388 *d++ = *s++ & *m++; 389 } 390 391 #define TCP_FLAGS_OFFSET 13 392 #define TCP_FLAG_MASK 0x3f 393 394 void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) 395 { 396 u8 tcp_flags = 0; 397 398 if ((flow->key.eth.type == htons(ETH_P_IP) || 399 flow->key.eth.type == htons(ETH_P_IPV6)) && 400 flow->key.ip.proto == IPPROTO_TCP && 401 likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { 402 u8 *tcp = (u8 *)tcp_hdr(skb); 403 tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; 404 } 405 406 spin_lock(&flow->lock); 407 flow->used = jiffies; 408 flow->packet_count++; 409 flow->byte_count += skb->len; 410 flow->tcp_flags |= tcp_flags; 411 spin_unlock(&flow->lock); 412 } 413 414 struct sw_flow_actions *ovs_flow_actions_alloc(int size) 415 { 416 struct sw_flow_actions *sfa; 417 418 if (size > MAX_ACTIONS_BUFSIZE) 419 return ERR_PTR(-EINVAL); 420 421 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 422 if (!sfa) 423 return ERR_PTR(-ENOMEM); 424 425 sfa->actions_len = 0; 426 return sfa; 427 } 428 429 struct sw_flow *ovs_flow_alloc(void) 430 { 431 struct sw_flow *flow; 432 433 flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); 434 if (!flow) 435 return ERR_PTR(-ENOMEM); 436 437 spin_lock_init(&flow->lock); 438 flow->sf_acts = NULL; 439 flow->mask = NULL; 440 441 return flow; 442 } 443 444 static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) 445 { 446 hash = jhash_1word(hash, table->hash_seed); 447 return flex_array_get(table->buckets, 448 (hash & (table->n_buckets - 1))); 449 } 450 451 static struct flex_array *alloc_buckets(unsigned int n_buckets) 452 { 453 struct flex_array *buckets; 454 int i, err; 455 456 buckets = flex_array_alloc(sizeof(struct hlist_head), 457 n_buckets, GFP_KERNEL); 458 if (!buckets) 459 return NULL; 460 461 err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); 462 if (err) { 463 flex_array_free(buckets); 464 return NULL; 465 } 466 467 for (i = 0; i < n_buckets; i++) 468 INIT_HLIST_HEAD((struct hlist_head *) 469 flex_array_get(buckets, i)); 470 471 return buckets; 472 } 473 474 static void free_buckets(struct flex_array *buckets) 475 { 476 flex_array_free(buckets); 477 } 478 479 static struct flow_table *__flow_tbl_alloc(int new_size) 480 { 481 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); 482 483 if (!table) 484 return NULL; 485 486 table->buckets = alloc_buckets(new_size); 487 488 if (!table->buckets) { 489 kfree(table); 490 return NULL; 491 } 492 table->n_buckets = new_size; 493 table->count = 0; 494 table->node_ver = 0; 495 table->keep_flows = false; 496 get_random_bytes(&table->hash_seed, sizeof(u32)); 497 table->mask_list = NULL; 498 499 return table; 500 } 501 502 static void __flow_tbl_destroy(struct flow_table *table) 503 { 504 int i; 505 506 if (table->keep_flows) 507 goto skip_flows; 508 509 for (i = 0; i < table->n_buckets; i++) { 510 struct sw_flow *flow; 511 struct hlist_head *head = flex_array_get(table->buckets, i); 512 struct hlist_node *n; 513 int ver = table->node_ver; 514 515 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 516 hlist_del(&flow->hash_node[ver]); 517 ovs_flow_free(flow, false); 518 } 519 } 520 521 BUG_ON(!list_empty(table->mask_list)); 522 kfree(table->mask_list); 523 524 skip_flows: 525 free_buckets(table->buckets); 526 kfree(table); 527 } 528 529 struct flow_table *ovs_flow_tbl_alloc(int new_size) 530 { 531 struct flow_table *table = __flow_tbl_alloc(new_size); 532 533 if (!table) 534 return NULL; 535 536 table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); 537 if (!table->mask_list) { 538 table->keep_flows = true; 539 __flow_tbl_destroy(table); 540 return NULL; 541 } 542 INIT_LIST_HEAD(table->mask_list); 543 544 return table; 545 } 546 547 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 548 { 549 struct flow_table *table = container_of(rcu, struct flow_table, rcu); 550 551 __flow_tbl_destroy(table); 552 } 553 554 void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) 555 { 556 if (!table) 557 return; 558 559 if (deferred) 560 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); 561 else 562 __flow_tbl_destroy(table); 563 } 564 565 struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last) 566 { 567 struct sw_flow *flow; 568 struct hlist_head *head; 569 int ver; 570 int i; 571 572 ver = table->node_ver; 573 while (*bucket < table->n_buckets) { 574 i = 0; 575 head = flex_array_get(table->buckets, *bucket); 576 hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { 577 if (i < *last) { 578 i++; 579 continue; 580 } 581 *last = i + 1; 582 return flow; 583 } 584 (*bucket)++; 585 *last = 0; 586 } 587 588 return NULL; 589 } 590 591 static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) 592 { 593 struct hlist_head *head; 594 595 head = find_bucket(table, flow->hash); 596 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); 597 598 table->count++; 599 } 600 601 static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) 602 { 603 int old_ver; 604 int i; 605 606 old_ver = old->node_ver; 607 new->node_ver = !old_ver; 608 609 /* Insert in new table. */ 610 for (i = 0; i < old->n_buckets; i++) { 611 struct sw_flow *flow; 612 struct hlist_head *head; 613 614 head = flex_array_get(old->buckets, i); 615 616 hlist_for_each_entry(flow, head, hash_node[old_ver]) 617 __tbl_insert(new, flow); 618 } 619 620 new->mask_list = old->mask_list; 621 old->keep_flows = true; 622 } 623 624 static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) 625 { 626 struct flow_table *new_table; 627 628 new_table = __flow_tbl_alloc(n_buckets); 629 if (!new_table) 630 return ERR_PTR(-ENOMEM); 631 632 flow_table_copy_flows(table, new_table); 633 634 return new_table; 635 } 636 637 struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) 638 { 639 return __flow_tbl_rehash(table, table->n_buckets); 640 } 641 642 struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) 643 { 644 return __flow_tbl_rehash(table, table->n_buckets * 2); 645 } 646 647 static void __flow_free(struct sw_flow *flow) 648 { 649 kfree((struct sf_flow_acts __force *)flow->sf_acts); 650 kmem_cache_free(flow_cache, flow); 651 } 652 653 static void rcu_free_flow_callback(struct rcu_head *rcu) 654 { 655 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 656 657 __flow_free(flow); 658 } 659 660 void ovs_flow_free(struct sw_flow *flow, bool deferred) 661 { 662 if (!flow) 663 return; 664 665 ovs_sw_flow_mask_del_ref(flow->mask, deferred); 666 667 if (deferred) 668 call_rcu(&flow->rcu, rcu_free_flow_callback); 669 else 670 __flow_free(flow); 671 } 672 673 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 674 * The caller must hold rcu_read_lock for this to be sensible. */ 675 void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) 676 { 677 kfree_rcu(sf_acts, rcu); 678 } 679 680 static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) 681 { 682 struct qtag_prefix { 683 __be16 eth_type; /* ETH_P_8021Q */ 684 __be16 tci; 685 }; 686 struct qtag_prefix *qp; 687 688 if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) 689 return 0; 690 691 if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + 692 sizeof(__be16)))) 693 return -ENOMEM; 694 695 qp = (struct qtag_prefix *) skb->data; 696 key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); 697 __skb_pull(skb, sizeof(struct qtag_prefix)); 698 699 return 0; 700 } 701 702 static __be16 parse_ethertype(struct sk_buff *skb) 703 { 704 struct llc_snap_hdr { 705 u8 dsap; /* Always 0xAA */ 706 u8 ssap; /* Always 0xAA */ 707 u8 ctrl; 708 u8 oui[3]; 709 __be16 ethertype; 710 }; 711 struct llc_snap_hdr *llc; 712 __be16 proto; 713 714 proto = *(__be16 *) skb->data; 715 __skb_pull(skb, sizeof(__be16)); 716 717 if (ntohs(proto) >= ETH_P_802_3_MIN) 718 return proto; 719 720 if (skb->len < sizeof(struct llc_snap_hdr)) 721 return htons(ETH_P_802_2); 722 723 if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr)))) 724 return htons(0); 725 726 llc = (struct llc_snap_hdr *) skb->data; 727 if (llc->dsap != LLC_SAP_SNAP || 728 llc->ssap != LLC_SAP_SNAP || 729 (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) 730 return htons(ETH_P_802_2); 731 732 __skb_pull(skb, sizeof(struct llc_snap_hdr)); 733 734 if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) 735 return llc->ethertype; 736 737 return htons(ETH_P_802_2); 738 } 739 740 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, 741 int nh_len) 742 { 743 struct icmp6hdr *icmp = icmp6_hdr(skb); 744 745 /* The ICMPv6 type and code fields use the 16-bit transport port 746 * fields, so we need to store them in 16-bit network byte order. 747 */ 748 key->ipv6.tp.src = htons(icmp->icmp6_type); 749 key->ipv6.tp.dst = htons(icmp->icmp6_code); 750 751 if (icmp->icmp6_code == 0 && 752 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || 753 icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) { 754 int icmp_len = skb->len - skb_transport_offset(skb); 755 struct nd_msg *nd; 756 int offset; 757 758 /* In order to process neighbor discovery options, we need the 759 * entire packet. 760 */ 761 if (unlikely(icmp_len < sizeof(*nd))) 762 return 0; 763 764 if (unlikely(skb_linearize(skb))) 765 return -ENOMEM; 766 767 nd = (struct nd_msg *)skb_transport_header(skb); 768 key->ipv6.nd.target = nd->target; 769 770 icmp_len -= sizeof(*nd); 771 offset = 0; 772 while (icmp_len >= 8) { 773 struct nd_opt_hdr *nd_opt = 774 (struct nd_opt_hdr *)(nd->opt + offset); 775 int opt_len = nd_opt->nd_opt_len * 8; 776 777 if (unlikely(!opt_len || opt_len > icmp_len)) 778 return 0; 779 780 /* Store the link layer address if the appropriate 781 * option is provided. It is considered an error if 782 * the same link layer option is specified twice. 783 */ 784 if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR 785 && opt_len == 8) { 786 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) 787 goto invalid; 788 memcpy(key->ipv6.nd.sll, 789 &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); 790 } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR 791 && opt_len == 8) { 792 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) 793 goto invalid; 794 memcpy(key->ipv6.nd.tll, 795 &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); 796 } 797 798 icmp_len -= opt_len; 799 offset += opt_len; 800 } 801 } 802 803 return 0; 804 805 invalid: 806 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); 807 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); 808 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); 809 810 return 0; 811 } 812 813 /** 814 * ovs_flow_extract - extracts a flow key from an Ethernet frame. 815 * @skb: sk_buff that contains the frame, with skb->data pointing to the 816 * Ethernet header 817 * @in_port: port number on which @skb was received. 818 * @key: output flow key 819 * 820 * The caller must ensure that skb->len >= ETH_HLEN. 821 * 822 * Returns 0 if successful, otherwise a negative errno value. 823 * 824 * Initializes @skb header pointers as follows: 825 * 826 * - skb->mac_header: the Ethernet header. 827 * 828 * - skb->network_header: just past the Ethernet header, or just past the 829 * VLAN header, to the first byte of the Ethernet payload. 830 * 831 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 832 * on output, then just past the IP header, if one is present and 833 * of a correct length, otherwise the same as skb->network_header. 834 * For other key->eth.type values it is left untouched. 835 */ 836 int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) 837 { 838 int error; 839 struct ethhdr *eth; 840 841 memset(key, 0, sizeof(*key)); 842 843 key->phy.priority = skb->priority; 844 if (OVS_CB(skb)->tun_key) 845 memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); 846 key->phy.in_port = in_port; 847 key->phy.skb_mark = skb->mark; 848 849 skb_reset_mac_header(skb); 850 851 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet 852 * header in the linear data area. 853 */ 854 eth = eth_hdr(skb); 855 memcpy(key->eth.src, eth->h_source, ETH_ALEN); 856 memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); 857 858 __skb_pull(skb, 2 * ETH_ALEN); 859 /* We are going to push all headers that we pull, so no need to 860 * update skb->csum here. 861 */ 862 863 if (vlan_tx_tag_present(skb)) 864 key->eth.tci = htons(skb->vlan_tci); 865 else if (eth->h_proto == htons(ETH_P_8021Q)) 866 if (unlikely(parse_vlan(skb, key))) 867 return -ENOMEM; 868 869 key->eth.type = parse_ethertype(skb); 870 if (unlikely(key->eth.type == htons(0))) 871 return -ENOMEM; 872 873 skb_reset_network_header(skb); 874 __skb_push(skb, skb->data - skb_mac_header(skb)); 875 876 /* Network layer. */ 877 if (key->eth.type == htons(ETH_P_IP)) { 878 struct iphdr *nh; 879 __be16 offset; 880 881 error = check_iphdr(skb); 882 if (unlikely(error)) { 883 if (error == -EINVAL) { 884 skb->transport_header = skb->network_header; 885 error = 0; 886 } 887 return error; 888 } 889 890 nh = ip_hdr(skb); 891 key->ipv4.addr.src = nh->saddr; 892 key->ipv4.addr.dst = nh->daddr; 893 894 key->ip.proto = nh->protocol; 895 key->ip.tos = nh->tos; 896 key->ip.ttl = nh->ttl; 897 898 offset = nh->frag_off & htons(IP_OFFSET); 899 if (offset) { 900 key->ip.frag = OVS_FRAG_TYPE_LATER; 901 return 0; 902 } 903 if (nh->frag_off & htons(IP_MF) || 904 skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 905 key->ip.frag = OVS_FRAG_TYPE_FIRST; 906 907 /* Transport layer. */ 908 if (key->ip.proto == IPPROTO_TCP) { 909 if (tcphdr_ok(skb)) { 910 struct tcphdr *tcp = tcp_hdr(skb); 911 key->ipv4.tp.src = tcp->source; 912 key->ipv4.tp.dst = tcp->dest; 913 } 914 } else if (key->ip.proto == IPPROTO_UDP) { 915 if (udphdr_ok(skb)) { 916 struct udphdr *udp = udp_hdr(skb); 917 key->ipv4.tp.src = udp->source; 918 key->ipv4.tp.dst = udp->dest; 919 } 920 } else if (key->ip.proto == IPPROTO_SCTP) { 921 if (sctphdr_ok(skb)) { 922 struct sctphdr *sctp = sctp_hdr(skb); 923 key->ipv4.tp.src = sctp->source; 924 key->ipv4.tp.dst = sctp->dest; 925 } 926 } else if (key->ip.proto == IPPROTO_ICMP) { 927 if (icmphdr_ok(skb)) { 928 struct icmphdr *icmp = icmp_hdr(skb); 929 /* The ICMP type and code fields use the 16-bit 930 * transport port fields, so we need to store 931 * them in 16-bit network byte order. */ 932 key->ipv4.tp.src = htons(icmp->type); 933 key->ipv4.tp.dst = htons(icmp->code); 934 } 935 } 936 937 } else if ((key->eth.type == htons(ETH_P_ARP) || 938 key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) { 939 struct arp_eth_header *arp; 940 941 arp = (struct arp_eth_header *)skb_network_header(skb); 942 943 if (arp->ar_hrd == htons(ARPHRD_ETHER) 944 && arp->ar_pro == htons(ETH_P_IP) 945 && arp->ar_hln == ETH_ALEN 946 && arp->ar_pln == 4) { 947 948 /* We only match on the lower 8 bits of the opcode. */ 949 if (ntohs(arp->ar_op) <= 0xff) 950 key->ip.proto = ntohs(arp->ar_op); 951 memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); 952 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); 953 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); 954 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); 955 } 956 } else if (key->eth.type == htons(ETH_P_IPV6)) { 957 int nh_len; /* IPv6 Header + Extensions */ 958 959 nh_len = parse_ipv6hdr(skb, key); 960 if (unlikely(nh_len < 0)) { 961 if (nh_len == -EINVAL) { 962 skb->transport_header = skb->network_header; 963 error = 0; 964 } else { 965 error = nh_len; 966 } 967 return error; 968 } 969 970 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 971 return 0; 972 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 973 key->ip.frag = OVS_FRAG_TYPE_FIRST; 974 975 /* Transport layer. */ 976 if (key->ip.proto == NEXTHDR_TCP) { 977 if (tcphdr_ok(skb)) { 978 struct tcphdr *tcp = tcp_hdr(skb); 979 key->ipv6.tp.src = tcp->source; 980 key->ipv6.tp.dst = tcp->dest; 981 } 982 } else if (key->ip.proto == NEXTHDR_UDP) { 983 if (udphdr_ok(skb)) { 984 struct udphdr *udp = udp_hdr(skb); 985 key->ipv6.tp.src = udp->source; 986 key->ipv6.tp.dst = udp->dest; 987 } 988 } else if (key->ip.proto == NEXTHDR_SCTP) { 989 if (sctphdr_ok(skb)) { 990 struct sctphdr *sctp = sctp_hdr(skb); 991 key->ipv6.tp.src = sctp->source; 992 key->ipv6.tp.dst = sctp->dest; 993 } 994 } else if (key->ip.proto == NEXTHDR_ICMP) { 995 if (icmp6hdr_ok(skb)) { 996 error = parse_icmpv6(skb, key, nh_len); 997 if (error) 998 return error; 999 } 1000 } 1001 } 1002 1003 return 0; 1004 } 1005 1006 static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, 1007 int key_end) 1008 { 1009 u32 *hash_key = (u32 *)((u8 *)key + key_start); 1010 int hash_u32s = (key_end - key_start) >> 2; 1011 1012 /* Make sure number of hash bytes are multiple of u32. */ 1013 BUILD_BUG_ON(sizeof(long) % sizeof(u32)); 1014 1015 return jhash2(hash_key, hash_u32s, 0); 1016 } 1017 1018 static int flow_key_start(const struct sw_flow_key *key) 1019 { 1020 if (key->tun_key.ipv4_dst) 1021 return 0; 1022 else 1023 return rounddown(offsetof(struct sw_flow_key, phy), 1024 sizeof(long)); 1025 } 1026 1027 static bool __cmp_key(const struct sw_flow_key *key1, 1028 const struct sw_flow_key *key2, int key_start, int key_end) 1029 { 1030 const long *cp1 = (long *)((u8 *)key1 + key_start); 1031 const long *cp2 = (long *)((u8 *)key2 + key_start); 1032 long diffs = 0; 1033 int i; 1034 1035 for (i = key_start; i < key_end; i += sizeof(long)) 1036 diffs |= *cp1++ ^ *cp2++; 1037 1038 return diffs == 0; 1039 } 1040 1041 static bool __flow_cmp_masked_key(const struct sw_flow *flow, 1042 const struct sw_flow_key *key, int key_start, int key_end) 1043 { 1044 return __cmp_key(&flow->key, key, key_start, key_end); 1045 } 1046 1047 static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, 1048 const struct sw_flow_key *key, int key_start, int key_end) 1049 { 1050 return __cmp_key(&flow->unmasked_key, key, key_start, key_end); 1051 } 1052 1053 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 1054 const struct sw_flow_key *key, int key_end) 1055 { 1056 int key_start; 1057 key_start = flow_key_start(key); 1058 1059 return __flow_cmp_unmasked_key(flow, key, key_start, key_end); 1060 1061 } 1062 1063 struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, 1064 struct sw_flow_match *match) 1065 { 1066 struct sw_flow_key *unmasked = match->key; 1067 int key_end = match->range.end; 1068 struct sw_flow *flow; 1069 1070 flow = ovs_flow_lookup(table, unmasked); 1071 if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) 1072 flow = NULL; 1073 1074 return flow; 1075 } 1076 1077 static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, 1078 const struct sw_flow_key *unmasked, 1079 struct sw_flow_mask *mask) 1080 { 1081 struct sw_flow *flow; 1082 struct hlist_head *head; 1083 int key_start = mask->range.start; 1084 int key_end = mask->range.end; 1085 u32 hash; 1086 struct sw_flow_key masked_key; 1087 1088 ovs_flow_key_mask(&masked_key, unmasked, mask); 1089 hash = ovs_flow_hash(&masked_key, key_start, key_end); 1090 head = find_bucket(table, hash); 1091 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 1092 if (flow->mask == mask && 1093 __flow_cmp_masked_key(flow, &masked_key, 1094 key_start, key_end)) 1095 return flow; 1096 } 1097 return NULL; 1098 } 1099 1100 struct sw_flow *ovs_flow_lookup(struct flow_table *tbl, 1101 const struct sw_flow_key *key) 1102 { 1103 struct sw_flow *flow = NULL; 1104 struct sw_flow_mask *mask; 1105 1106 list_for_each_entry_rcu(mask, tbl->mask_list, list) { 1107 flow = ovs_masked_flow_lookup(tbl, key, mask); 1108 if (flow) /* Found */ 1109 break; 1110 } 1111 1112 return flow; 1113 } 1114 1115 1116 void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow) 1117 { 1118 flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start, 1119 flow->mask->range.end); 1120 __tbl_insert(table, flow); 1121 } 1122 1123 void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow) 1124 { 1125 BUG_ON(table->count == 0); 1126 hlist_del_rcu(&flow->hash_node[table->node_ver]); 1127 table->count--; 1128 } 1129 1130 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 1131 const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 1132 [OVS_KEY_ATTR_ENCAP] = -1, 1133 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 1134 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 1135 [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 1136 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 1137 [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 1138 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 1139 [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 1140 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 1141 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 1142 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 1143 [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 1144 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 1145 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 1146 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 1147 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 1148 [OVS_KEY_ATTR_TUNNEL] = -1, 1149 }; 1150 1151 static bool is_all_zero(const u8 *fp, size_t size) 1152 { 1153 int i; 1154 1155 if (!fp) 1156 return false; 1157 1158 for (i = 0; i < size; i++) 1159 if (fp[i]) 1160 return false; 1161 1162 return true; 1163 } 1164 1165 static int __parse_flow_nlattrs(const struct nlattr *attr, 1166 const struct nlattr *a[], 1167 u64 *attrsp, bool nz) 1168 { 1169 const struct nlattr *nla; 1170 u32 attrs; 1171 int rem; 1172 1173 attrs = *attrsp; 1174 nla_for_each_nested(nla, attr, rem) { 1175 u16 type = nla_type(nla); 1176 int expected_len; 1177 1178 if (type > OVS_KEY_ATTR_MAX) { 1179 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", 1180 type, OVS_KEY_ATTR_MAX); 1181 return -EINVAL; 1182 } 1183 1184 if (attrs & (1 << type)) { 1185 OVS_NLERR("Duplicate key attribute (type %d).\n", type); 1186 return -EINVAL; 1187 } 1188 1189 expected_len = ovs_key_lens[type]; 1190 if (nla_len(nla) != expected_len && expected_len != -1) { 1191 OVS_NLERR("Key attribute has unexpected length (type=%d" 1192 ", length=%d, expected=%d).\n", type, 1193 nla_len(nla), expected_len); 1194 return -EINVAL; 1195 } 1196 1197 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 1198 attrs |= 1 << type; 1199 a[type] = nla; 1200 } 1201 } 1202 if (rem) { 1203 OVS_NLERR("Message has %d unknown bytes.\n", rem); 1204 return -EINVAL; 1205 } 1206 1207 *attrsp = attrs; 1208 return 0; 1209 } 1210 1211 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 1212 const struct nlattr *a[], u64 *attrsp) 1213 { 1214 return __parse_flow_nlattrs(attr, a, attrsp, true); 1215 } 1216 1217 static int parse_flow_nlattrs(const struct nlattr *attr, 1218 const struct nlattr *a[], u64 *attrsp) 1219 { 1220 return __parse_flow_nlattrs(attr, a, attrsp, false); 1221 } 1222 1223 int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 1224 struct sw_flow_match *match, bool is_mask) 1225 { 1226 struct nlattr *a; 1227 int rem; 1228 bool ttl = false; 1229 __be16 tun_flags = 0; 1230 1231 nla_for_each_nested(a, attr, rem) { 1232 int type = nla_type(a); 1233 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 1234 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 1235 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 1236 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 1237 [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 1238 [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 1239 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 1240 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 1241 }; 1242 1243 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 1244 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", 1245 type, OVS_TUNNEL_KEY_ATTR_MAX); 1246 return -EINVAL; 1247 } 1248 1249 if (ovs_tunnel_key_lens[type] != nla_len(a)) { 1250 OVS_NLERR("IPv4 tunnel attribute type has unexpected " 1251 " length (type=%d, length=%d, expected=%d).\n", 1252 type, nla_len(a), ovs_tunnel_key_lens[type]); 1253 return -EINVAL; 1254 } 1255 1256 switch (type) { 1257 case OVS_TUNNEL_KEY_ATTR_ID: 1258 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 1259 nla_get_be64(a), is_mask); 1260 tun_flags |= TUNNEL_KEY; 1261 break; 1262 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 1263 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 1264 nla_get_be32(a), is_mask); 1265 break; 1266 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 1267 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 1268 nla_get_be32(a), is_mask); 1269 break; 1270 case OVS_TUNNEL_KEY_ATTR_TOS: 1271 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 1272 nla_get_u8(a), is_mask); 1273 break; 1274 case OVS_TUNNEL_KEY_ATTR_TTL: 1275 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 1276 nla_get_u8(a), is_mask); 1277 ttl = true; 1278 break; 1279 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 1280 tun_flags |= TUNNEL_DONT_FRAGMENT; 1281 break; 1282 case OVS_TUNNEL_KEY_ATTR_CSUM: 1283 tun_flags |= TUNNEL_CSUM; 1284 break; 1285 default: 1286 return -EINVAL; 1287 } 1288 } 1289 1290 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 1291 1292 if (rem > 0) { 1293 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); 1294 return -EINVAL; 1295 } 1296 1297 if (!is_mask) { 1298 if (!match->key->tun_key.ipv4_dst) { 1299 OVS_NLERR("IPv4 tunnel destination address is zero.\n"); 1300 return -EINVAL; 1301 } 1302 1303 if (!ttl) { 1304 OVS_NLERR("IPv4 tunnel TTL not specified.\n"); 1305 return -EINVAL; 1306 } 1307 } 1308 1309 return 0; 1310 } 1311 1312 int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 1313 const struct ovs_key_ipv4_tunnel *tun_key, 1314 const struct ovs_key_ipv4_tunnel *output) 1315 { 1316 struct nlattr *nla; 1317 1318 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 1319 if (!nla) 1320 return -EMSGSIZE; 1321 1322 if (output->tun_flags & TUNNEL_KEY && 1323 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 1324 return -EMSGSIZE; 1325 if (output->ipv4_src && 1326 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 1327 return -EMSGSIZE; 1328 if (output->ipv4_dst && 1329 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 1330 return -EMSGSIZE; 1331 if (output->ipv4_tos && 1332 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 1333 return -EMSGSIZE; 1334 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 1335 return -EMSGSIZE; 1336 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 1337 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 1338 return -EMSGSIZE; 1339 if ((output->tun_flags & TUNNEL_CSUM) && 1340 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 1341 return -EMSGSIZE; 1342 1343 nla_nest_end(skb, nla); 1344 return 0; 1345 } 1346 1347 static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 1348 const struct nlattr **a, bool is_mask) 1349 { 1350 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1351 SW_FLOW_KEY_PUT(match, phy.priority, 1352 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 1353 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1354 } 1355 1356 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1357 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1358 1359 if (is_mask) 1360 in_port = 0xffffffff; /* Always exact match in_port. */ 1361 else if (in_port >= DP_MAX_PORTS) 1362 return -EINVAL; 1363 1364 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 1365 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1366 } else if (!is_mask) { 1367 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 1368 } 1369 1370 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1371 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1372 1373 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 1374 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1375 } 1376 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1377 if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 1378 is_mask)) 1379 return -EINVAL; 1380 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1381 } 1382 return 0; 1383 } 1384 1385 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 1386 const struct nlattr **a, bool is_mask) 1387 { 1388 int err; 1389 u64 orig_attrs = attrs; 1390 1391 err = metadata_from_nlattrs(match, &attrs, a, is_mask); 1392 if (err) 1393 return err; 1394 1395 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 1396 const struct ovs_key_ethernet *eth_key; 1397 1398 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1399 SW_FLOW_KEY_MEMCPY(match, eth.src, 1400 eth_key->eth_src, ETH_ALEN, is_mask); 1401 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1402 eth_key->eth_dst, ETH_ALEN, is_mask); 1403 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1404 } 1405 1406 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1407 __be16 tci; 1408 1409 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1410 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1411 if (is_mask) 1412 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); 1413 else 1414 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); 1415 1416 return -EINVAL; 1417 } 1418 1419 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 1420 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 1421 } else if (!is_mask) 1422 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 1423 1424 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1425 __be16 eth_type; 1426 1427 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1428 if (is_mask) { 1429 /* Always exact match EtherType. */ 1430 eth_type = htons(0xffff); 1431 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 1432 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", 1433 ntohs(eth_type), ETH_P_802_3_MIN); 1434 return -EINVAL; 1435 } 1436 1437 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 1438 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1439 } else if (!is_mask) { 1440 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1441 } 1442 1443 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1444 const struct ovs_key_ipv4 *ipv4_key; 1445 1446 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1447 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 1448 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", 1449 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 1450 return -EINVAL; 1451 } 1452 SW_FLOW_KEY_PUT(match, ip.proto, 1453 ipv4_key->ipv4_proto, is_mask); 1454 SW_FLOW_KEY_PUT(match, ip.tos, 1455 ipv4_key->ipv4_tos, is_mask); 1456 SW_FLOW_KEY_PUT(match, ip.ttl, 1457 ipv4_key->ipv4_ttl, is_mask); 1458 SW_FLOW_KEY_PUT(match, ip.frag, 1459 ipv4_key->ipv4_frag, is_mask); 1460 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1461 ipv4_key->ipv4_src, is_mask); 1462 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1463 ipv4_key->ipv4_dst, is_mask); 1464 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 1465 } 1466 1467 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 1468 const struct ovs_key_ipv6 *ipv6_key; 1469 1470 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1471 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 1472 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", 1473 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 1474 return -EINVAL; 1475 } 1476 SW_FLOW_KEY_PUT(match, ipv6.label, 1477 ipv6_key->ipv6_label, is_mask); 1478 SW_FLOW_KEY_PUT(match, ip.proto, 1479 ipv6_key->ipv6_proto, is_mask); 1480 SW_FLOW_KEY_PUT(match, ip.tos, 1481 ipv6_key->ipv6_tclass, is_mask); 1482 SW_FLOW_KEY_PUT(match, ip.ttl, 1483 ipv6_key->ipv6_hlimit, is_mask); 1484 SW_FLOW_KEY_PUT(match, ip.frag, 1485 ipv6_key->ipv6_frag, is_mask); 1486 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 1487 ipv6_key->ipv6_src, 1488 sizeof(match->key->ipv6.addr.src), 1489 is_mask); 1490 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 1491 ipv6_key->ipv6_dst, 1492 sizeof(match->key->ipv6.addr.dst), 1493 is_mask); 1494 1495 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1496 } 1497 1498 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 1499 const struct ovs_key_arp *arp_key; 1500 1501 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1502 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 1503 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", 1504 arp_key->arp_op); 1505 return -EINVAL; 1506 } 1507 1508 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1509 arp_key->arp_sip, is_mask); 1510 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1511 arp_key->arp_tip, is_mask); 1512 SW_FLOW_KEY_PUT(match, ip.proto, 1513 ntohs(arp_key->arp_op), is_mask); 1514 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 1515 arp_key->arp_sha, ETH_ALEN, is_mask); 1516 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 1517 arp_key->arp_tha, ETH_ALEN, is_mask); 1518 1519 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1520 } 1521 1522 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 1523 const struct ovs_key_tcp *tcp_key; 1524 1525 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1526 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1527 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1528 tcp_key->tcp_src, is_mask); 1529 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1530 tcp_key->tcp_dst, is_mask); 1531 } else { 1532 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1533 tcp_key->tcp_src, is_mask); 1534 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1535 tcp_key->tcp_dst, is_mask); 1536 } 1537 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 1538 } 1539 1540 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 1541 const struct ovs_key_udp *udp_key; 1542 1543 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1544 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1545 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1546 udp_key->udp_src, is_mask); 1547 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1548 udp_key->udp_dst, is_mask); 1549 } else { 1550 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1551 udp_key->udp_src, is_mask); 1552 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1553 udp_key->udp_dst, is_mask); 1554 } 1555 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1556 } 1557 1558 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 1559 const struct ovs_key_sctp *sctp_key; 1560 1561 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 1562 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1563 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1564 sctp_key->sctp_src, is_mask); 1565 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1566 sctp_key->sctp_dst, is_mask); 1567 } else { 1568 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1569 sctp_key->sctp_src, is_mask); 1570 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1571 sctp_key->sctp_dst, is_mask); 1572 } 1573 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 1574 } 1575 1576 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 1577 const struct ovs_key_icmp *icmp_key; 1578 1579 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1580 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1581 htons(icmp_key->icmp_type), is_mask); 1582 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1583 htons(icmp_key->icmp_code), is_mask); 1584 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1585 } 1586 1587 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 1588 const struct ovs_key_icmpv6 *icmpv6_key; 1589 1590 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 1591 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1592 htons(icmpv6_key->icmpv6_type), is_mask); 1593 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1594 htons(icmpv6_key->icmpv6_code), is_mask); 1595 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1596 } 1597 1598 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 1599 const struct ovs_key_nd *nd_key; 1600 1601 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1602 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 1603 nd_key->nd_target, 1604 sizeof(match->key->ipv6.nd.target), 1605 is_mask); 1606 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 1607 nd_key->nd_sll, ETH_ALEN, is_mask); 1608 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 1609 nd_key->nd_tll, ETH_ALEN, is_mask); 1610 attrs &= ~(1 << OVS_KEY_ATTR_ND); 1611 } 1612 1613 if (attrs != 0) 1614 return -EINVAL; 1615 1616 return 0; 1617 } 1618 1619 /** 1620 * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and 1621 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1622 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1623 * does not include any don't care bit. 1624 * @match: receives the extracted flow match information. 1625 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1626 * sequence. The fields should of the packet that triggered the creation 1627 * of this flow. 1628 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1629 * attribute specifies the mask field of the wildcarded flow. 1630 */ 1631 int ovs_match_from_nlattrs(struct sw_flow_match *match, 1632 const struct nlattr *key, 1633 const struct nlattr *mask) 1634 { 1635 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1636 const struct nlattr *encap; 1637 u64 key_attrs = 0; 1638 u64 mask_attrs = 0; 1639 bool encap_valid = false; 1640 int err; 1641 1642 err = parse_flow_nlattrs(key, a, &key_attrs); 1643 if (err) 1644 return err; 1645 1646 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 1647 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 1648 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 1649 __be16 tci; 1650 1651 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 1652 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 1653 OVS_NLERR("Invalid Vlan frame.\n"); 1654 return -EINVAL; 1655 } 1656 1657 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1658 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1659 encap = a[OVS_KEY_ATTR_ENCAP]; 1660 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1661 encap_valid = true; 1662 1663 if (tci & htons(VLAN_TAG_PRESENT)) { 1664 err = parse_flow_nlattrs(encap, a, &key_attrs); 1665 if (err) 1666 return err; 1667 } else if (!tci) { 1668 /* Corner case for truncated 802.1Q header. */ 1669 if (nla_len(encap)) { 1670 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); 1671 return -EINVAL; 1672 } 1673 } else { 1674 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); 1675 return -EINVAL; 1676 } 1677 } 1678 1679 err = ovs_key_from_nlattrs(match, key_attrs, a, false); 1680 if (err) 1681 return err; 1682 1683 if (mask) { 1684 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); 1685 if (err) 1686 return err; 1687 1688 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { 1689 __be16 eth_type = 0; 1690 __be16 tci = 0; 1691 1692 if (!encap_valid) { 1693 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); 1694 return -EINVAL; 1695 } 1696 1697 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1698 if (a[OVS_KEY_ATTR_ETHERTYPE]) 1699 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1700 1701 if (eth_type == htons(0xffff)) { 1702 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1703 encap = a[OVS_KEY_ATTR_ENCAP]; 1704 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); 1705 } else { 1706 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", 1707 ntohs(eth_type)); 1708 return -EINVAL; 1709 } 1710 1711 if (a[OVS_KEY_ATTR_VLAN]) 1712 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1713 1714 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1715 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); 1716 return -EINVAL; 1717 } 1718 } 1719 1720 err = ovs_key_from_nlattrs(match, mask_attrs, a, true); 1721 if (err) 1722 return err; 1723 } else { 1724 /* Populate exact match flow's key mask. */ 1725 if (match->mask) 1726 ovs_sw_flow_mask_set(match->mask, &match->range, 0xff); 1727 } 1728 1729 if (!ovs_match_validate(match, key_attrs, mask_attrs)) 1730 return -EINVAL; 1731 1732 return 0; 1733 } 1734 1735 /** 1736 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1737 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 1738 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1739 * sequence. 1740 * 1741 * This parses a series of Netlink attributes that form a flow key, which must 1742 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1743 * get the metadata, that is, the parts of the flow key that cannot be 1744 * extracted from the packet itself. 1745 */ 1746 1747 int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, 1748 const struct nlattr *attr) 1749 { 1750 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 1751 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1752 u64 attrs = 0; 1753 int err; 1754 struct sw_flow_match match; 1755 1756 flow->key.phy.in_port = DP_MAX_PORTS; 1757 flow->key.phy.priority = 0; 1758 flow->key.phy.skb_mark = 0; 1759 memset(tun_key, 0, sizeof(flow->key.tun_key)); 1760 1761 err = parse_flow_nlattrs(attr, a, &attrs); 1762 if (err) 1763 return -EINVAL; 1764 1765 memset(&match, 0, sizeof(match)); 1766 match.key = &flow->key; 1767 1768 err = metadata_from_nlattrs(&match, &attrs, a, false); 1769 if (err) 1770 return err; 1771 1772 return 0; 1773 } 1774 1775 int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, 1776 const struct sw_flow_key *output, struct sk_buff *skb) 1777 { 1778 struct ovs_key_ethernet *eth_key; 1779 struct nlattr *nla, *encap; 1780 bool is_mask = (swkey != output); 1781 1782 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1783 goto nla_put_failure; 1784 1785 if ((swkey->tun_key.ipv4_dst || is_mask) && 1786 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) 1787 goto nla_put_failure; 1788 1789 if (swkey->phy.in_port == DP_MAX_PORTS) { 1790 if (is_mask && (output->phy.in_port == 0xffff)) 1791 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1792 goto nla_put_failure; 1793 } else { 1794 u16 upper_u16; 1795 upper_u16 = !is_mask ? 0 : 0xffff; 1796 1797 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1798 (upper_u16 << 16) | output->phy.in_port)) 1799 goto nla_put_failure; 1800 } 1801 1802 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1803 goto nla_put_failure; 1804 1805 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1806 if (!nla) 1807 goto nla_put_failure; 1808 1809 eth_key = nla_data(nla); 1810 memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); 1811 memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); 1812 1813 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1814 __be16 eth_type; 1815 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 1816 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1817 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 1818 goto nla_put_failure; 1819 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1820 if (!swkey->eth.tci) 1821 goto unencap; 1822 } else 1823 encap = NULL; 1824 1825 if (swkey->eth.type == htons(ETH_P_802_2)) { 1826 /* 1827 * Ethertype 802.2 is represented in the netlink with omitted 1828 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1829 * 0xffff in the mask attribute. Ethertype can also 1830 * be wildcarded. 1831 */ 1832 if (is_mask && output->eth.type) 1833 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1834 output->eth.type)) 1835 goto nla_put_failure; 1836 goto unencap; 1837 } 1838 1839 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1840 goto nla_put_failure; 1841 1842 if (swkey->eth.type == htons(ETH_P_IP)) { 1843 struct ovs_key_ipv4 *ipv4_key; 1844 1845 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1846 if (!nla) 1847 goto nla_put_failure; 1848 ipv4_key = nla_data(nla); 1849 ipv4_key->ipv4_src = output->ipv4.addr.src; 1850 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1851 ipv4_key->ipv4_proto = output->ip.proto; 1852 ipv4_key->ipv4_tos = output->ip.tos; 1853 ipv4_key->ipv4_ttl = output->ip.ttl; 1854 ipv4_key->ipv4_frag = output->ip.frag; 1855 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1856 struct ovs_key_ipv6 *ipv6_key; 1857 1858 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1859 if (!nla) 1860 goto nla_put_failure; 1861 ipv6_key = nla_data(nla); 1862 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1863 sizeof(ipv6_key->ipv6_src)); 1864 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1865 sizeof(ipv6_key->ipv6_dst)); 1866 ipv6_key->ipv6_label = output->ipv6.label; 1867 ipv6_key->ipv6_proto = output->ip.proto; 1868 ipv6_key->ipv6_tclass = output->ip.tos; 1869 ipv6_key->ipv6_hlimit = output->ip.ttl; 1870 ipv6_key->ipv6_frag = output->ip.frag; 1871 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1872 swkey->eth.type == htons(ETH_P_RARP)) { 1873 struct ovs_key_arp *arp_key; 1874 1875 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1876 if (!nla) 1877 goto nla_put_failure; 1878 arp_key = nla_data(nla); 1879 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1880 arp_key->arp_sip = output->ipv4.addr.src; 1881 arp_key->arp_tip = output->ipv4.addr.dst; 1882 arp_key->arp_op = htons(output->ip.proto); 1883 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); 1884 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); 1885 } 1886 1887 if ((swkey->eth.type == htons(ETH_P_IP) || 1888 swkey->eth.type == htons(ETH_P_IPV6)) && 1889 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1890 1891 if (swkey->ip.proto == IPPROTO_TCP) { 1892 struct ovs_key_tcp *tcp_key; 1893 1894 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1895 if (!nla) 1896 goto nla_put_failure; 1897 tcp_key = nla_data(nla); 1898 if (swkey->eth.type == htons(ETH_P_IP)) { 1899 tcp_key->tcp_src = output->ipv4.tp.src; 1900 tcp_key->tcp_dst = output->ipv4.tp.dst; 1901 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1902 tcp_key->tcp_src = output->ipv6.tp.src; 1903 tcp_key->tcp_dst = output->ipv6.tp.dst; 1904 } 1905 } else if (swkey->ip.proto == IPPROTO_UDP) { 1906 struct ovs_key_udp *udp_key; 1907 1908 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1909 if (!nla) 1910 goto nla_put_failure; 1911 udp_key = nla_data(nla); 1912 if (swkey->eth.type == htons(ETH_P_IP)) { 1913 udp_key->udp_src = output->ipv4.tp.src; 1914 udp_key->udp_dst = output->ipv4.tp.dst; 1915 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1916 udp_key->udp_src = output->ipv6.tp.src; 1917 udp_key->udp_dst = output->ipv6.tp.dst; 1918 } 1919 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1920 struct ovs_key_sctp *sctp_key; 1921 1922 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1923 if (!nla) 1924 goto nla_put_failure; 1925 sctp_key = nla_data(nla); 1926 if (swkey->eth.type == htons(ETH_P_IP)) { 1927 sctp_key->sctp_src = swkey->ipv4.tp.src; 1928 sctp_key->sctp_dst = swkey->ipv4.tp.dst; 1929 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1930 sctp_key->sctp_src = swkey->ipv6.tp.src; 1931 sctp_key->sctp_dst = swkey->ipv6.tp.dst; 1932 } 1933 } else if (swkey->eth.type == htons(ETH_P_IP) && 1934 swkey->ip.proto == IPPROTO_ICMP) { 1935 struct ovs_key_icmp *icmp_key; 1936 1937 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1938 if (!nla) 1939 goto nla_put_failure; 1940 icmp_key = nla_data(nla); 1941 icmp_key->icmp_type = ntohs(output->ipv4.tp.src); 1942 icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); 1943 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1944 swkey->ip.proto == IPPROTO_ICMPV6) { 1945 struct ovs_key_icmpv6 *icmpv6_key; 1946 1947 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1948 sizeof(*icmpv6_key)); 1949 if (!nla) 1950 goto nla_put_failure; 1951 icmpv6_key = nla_data(nla); 1952 icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); 1953 icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); 1954 1955 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1956 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1957 struct ovs_key_nd *nd_key; 1958 1959 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1960 if (!nla) 1961 goto nla_put_failure; 1962 nd_key = nla_data(nla); 1963 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1964 sizeof(nd_key->nd_target)); 1965 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); 1966 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); 1967 } 1968 } 1969 } 1970 1971 unencap: 1972 if (encap) 1973 nla_nest_end(skb, encap); 1974 1975 return 0; 1976 1977 nla_put_failure: 1978 return -EMSGSIZE; 1979 } 1980 1981 /* Initializes the flow module. 1982 * Returns zero if successful or a negative error code. */ 1983 int ovs_flow_init(void) 1984 { 1985 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); 1986 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); 1987 1988 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 1989 0, NULL); 1990 if (flow_cache == NULL) 1991 return -ENOMEM; 1992 1993 return 0; 1994 } 1995 1996 /* Uninitializes the flow module. */ 1997 void ovs_flow_exit(void) 1998 { 1999 kmem_cache_destroy(flow_cache); 2000 } 2001 2002 struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) 2003 { 2004 struct sw_flow_mask *mask; 2005 2006 mask = kmalloc(sizeof(*mask), GFP_KERNEL); 2007 if (mask) 2008 mask->ref_count = 0; 2009 2010 return mask; 2011 } 2012 2013 void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) 2014 { 2015 mask->ref_count++; 2016 } 2017 2018 void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) 2019 { 2020 if (!mask) 2021 return; 2022 2023 BUG_ON(!mask->ref_count); 2024 mask->ref_count--; 2025 2026 if (!mask->ref_count) { 2027 list_del_rcu(&mask->list); 2028 if (deferred) 2029 kfree_rcu(mask, rcu); 2030 else 2031 kfree(mask); 2032 } 2033 } 2034 2035 static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, 2036 const struct sw_flow_mask *b) 2037 { 2038 u8 *a_ = (u8 *)&a->key + a->range.start; 2039 u8 *b_ = (u8 *)&b->key + b->range.start; 2040 2041 return (a->range.end == b->range.end) 2042 && (a->range.start == b->range.start) 2043 && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); 2044 } 2045 2046 struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, 2047 const struct sw_flow_mask *mask) 2048 { 2049 struct list_head *ml; 2050 2051 list_for_each(ml, tbl->mask_list) { 2052 struct sw_flow_mask *m; 2053 m = container_of(ml, struct sw_flow_mask, list); 2054 if (ovs_sw_flow_mask_equal(mask, m)) 2055 return m; 2056 } 2057 2058 return NULL; 2059 } 2060 2061 /** 2062 * add a new mask into the mask list. 2063 * The caller needs to make sure that 'mask' is not the same 2064 * as any masks that are already on the list. 2065 */ 2066 void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) 2067 { 2068 list_add_rcu(&mask->list, tbl->mask_list); 2069 } 2070 2071 /** 2072 * Set 'range' fields in the mask to the value of 'val'. 2073 */ 2074 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, 2075 struct sw_flow_key_range *range, u8 val) 2076 { 2077 u8 *m = (u8 *)&mask->key + range->start; 2078 2079 mask->range = *range; 2080 memset(m, val, range_n_bytes(range)); 2081 } 2082