1 /* 2 * Copyright (c) 2007-2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #include "flow.h" 20 #include "datapath.h" 21 #include <linux/uaccess.h> 22 #include <linux/netdevice.h> 23 #include <linux/etherdevice.h> 24 #include <linux/if_ether.h> 25 #include <linux/if_vlan.h> 26 #include <net/llc_pdu.h> 27 #include <linux/kernel.h> 28 #include <linux/jhash.h> 29 #include <linux/jiffies.h> 30 #include <linux/llc.h> 31 #include <linux/module.h> 32 #include <linux/in.h> 33 #include <linux/rcupdate.h> 34 #include <linux/if_arp.h> 35 #include <linux/ip.h> 36 #include <linux/ipv6.h> 37 #include <linux/sctp.h> 38 #include <linux/tcp.h> 39 #include <linux/udp.h> 40 #include <linux/icmp.h> 41 #include <linux/icmpv6.h> 42 #include <linux/rculist.h> 43 #include <net/ip.h> 44 #include <net/ipv6.h> 45 #include <net/ndisc.h> 46 47 #include "flow_netlink.h" 48 49 static void update_range__(struct sw_flow_match *match, 50 size_t offset, size_t size, bool is_mask) 51 { 52 struct sw_flow_key_range *range = NULL; 53 size_t start = rounddown(offset, sizeof(long)); 54 size_t end = roundup(offset + size, sizeof(long)); 55 56 if (!is_mask) 57 range = &match->range; 58 else if (match->mask) 59 range = &match->mask->range; 60 61 if (!range) 62 return; 63 64 if (range->start == range->end) { 65 range->start = start; 66 range->end = end; 67 return; 68 } 69 70 if (range->start > start) 71 range->start = start; 72 73 if (range->end < end) 74 range->end = end; 75 } 76 77 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 78 do { \ 79 update_range__(match, offsetof(struct sw_flow_key, field), \ 80 sizeof((match)->key->field), is_mask); \ 81 if (is_mask) { \ 82 if ((match)->mask) \ 83 (match)->mask->key.field = value; \ 84 } else { \ 85 (match)->key->field = value; \ 86 } \ 87 } while (0) 88 89 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 90 do { \ 91 update_range__(match, offsetof(struct sw_flow_key, field), \ 92 len, is_mask); \ 93 if (is_mask) { \ 94 if ((match)->mask) \ 95 memcpy(&(match)->mask->key.field, value_p, len);\ 96 } else { \ 97 memcpy(&(match)->key->field, value_p, len); \ 98 } \ 99 } while (0) 100 101 static u16 range_n_bytes(const struct sw_flow_key_range *range) 102 { 103 return range->end - range->start; 104 } 105 106 static bool match_validate(const struct sw_flow_match *match, 107 u64 key_attrs, u64 mask_attrs) 108 { 109 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 110 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 111 112 /* The following mask attributes allowed only if they 113 * pass the validation tests. */ 114 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 115 | (1 << OVS_KEY_ATTR_IPV6) 116 | (1 << OVS_KEY_ATTR_TCP) 117 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 118 | (1 << OVS_KEY_ATTR_UDP) 119 | (1 << OVS_KEY_ATTR_SCTP) 120 | (1 << OVS_KEY_ATTR_ICMP) 121 | (1 << OVS_KEY_ATTR_ICMPV6) 122 | (1 << OVS_KEY_ATTR_ARP) 123 | (1 << OVS_KEY_ATTR_ND)); 124 125 /* Always allowed mask fields. */ 126 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 127 | (1 << OVS_KEY_ATTR_IN_PORT) 128 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 129 130 /* Check key attributes. */ 131 if (match->key->eth.type == htons(ETH_P_ARP) 132 || match->key->eth.type == htons(ETH_P_RARP)) { 133 key_expected |= 1 << OVS_KEY_ATTR_ARP; 134 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 135 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 136 } 137 138 if (match->key->eth.type == htons(ETH_P_IP)) { 139 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 140 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 141 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 142 143 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 144 if (match->key->ip.proto == IPPROTO_UDP) { 145 key_expected |= 1 << OVS_KEY_ATTR_UDP; 146 if (match->mask && (match->mask->key.ip.proto == 0xff)) 147 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 148 } 149 150 if (match->key->ip.proto == IPPROTO_SCTP) { 151 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 152 if (match->mask && (match->mask->key.ip.proto == 0xff)) 153 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 154 } 155 156 if (match->key->ip.proto == IPPROTO_TCP) { 157 key_expected |= 1 << OVS_KEY_ATTR_TCP; 158 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 159 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 160 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 161 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 162 } 163 } 164 165 if (match->key->ip.proto == IPPROTO_ICMP) { 166 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 167 if (match->mask && (match->mask->key.ip.proto == 0xff)) 168 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 169 } 170 } 171 } 172 173 if (match->key->eth.type == htons(ETH_P_IPV6)) { 174 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 175 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 176 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 177 178 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 179 if (match->key->ip.proto == IPPROTO_UDP) { 180 key_expected |= 1 << OVS_KEY_ATTR_UDP; 181 if (match->mask && (match->mask->key.ip.proto == 0xff)) 182 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 183 } 184 185 if (match->key->ip.proto == IPPROTO_SCTP) { 186 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 187 if (match->mask && (match->mask->key.ip.proto == 0xff)) 188 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 189 } 190 191 if (match->key->ip.proto == IPPROTO_TCP) { 192 key_expected |= 1 << OVS_KEY_ATTR_TCP; 193 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 194 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 195 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 196 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 197 } 198 } 199 200 if (match->key->ip.proto == IPPROTO_ICMPV6) { 201 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 202 if (match->mask && (match->mask->key.ip.proto == 0xff)) 203 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 204 205 if (match->key->ipv6.tp.src == 206 htons(NDISC_NEIGHBOUR_SOLICITATION) || 207 match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 208 key_expected |= 1 << OVS_KEY_ATTR_ND; 209 if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) 210 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 211 } 212 } 213 } 214 } 215 216 if ((key_attrs & key_expected) != key_expected) { 217 /* Key attributes check failed. */ 218 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", 219 key_attrs, key_expected); 220 return false; 221 } 222 223 if ((mask_attrs & mask_allowed) != mask_attrs) { 224 /* Mask attributes check failed. */ 225 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", 226 mask_attrs, mask_allowed); 227 return false; 228 } 229 230 return true; 231 } 232 233 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 234 static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 235 [OVS_KEY_ATTR_ENCAP] = -1, 236 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 237 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 238 [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 239 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 240 [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 241 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 242 [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 243 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 244 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 245 [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), 246 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 247 [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 248 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 249 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 250 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 251 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 252 [OVS_KEY_ATTR_TUNNEL] = -1, 253 }; 254 255 static bool is_all_zero(const u8 *fp, size_t size) 256 { 257 int i; 258 259 if (!fp) 260 return false; 261 262 for (i = 0; i < size; i++) 263 if (fp[i]) 264 return false; 265 266 return true; 267 } 268 269 static bool is_all_set(const u8 *fp, size_t size) 270 { 271 int i; 272 273 if (!fp) 274 return false; 275 276 for (i = 0; i < size; i++) 277 if (fp[i] != 0xff) 278 return false; 279 280 return true; 281 } 282 283 static int __parse_flow_nlattrs(const struct nlattr *attr, 284 const struct nlattr *a[], 285 u64 *attrsp, bool nz) 286 { 287 const struct nlattr *nla; 288 u64 attrs; 289 int rem; 290 291 attrs = *attrsp; 292 nla_for_each_nested(nla, attr, rem) { 293 u16 type = nla_type(nla); 294 int expected_len; 295 296 if (type > OVS_KEY_ATTR_MAX) { 297 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", 298 type, OVS_KEY_ATTR_MAX); 299 return -EINVAL; 300 } 301 302 if (attrs & (1 << type)) { 303 OVS_NLERR("Duplicate key attribute (type %d).\n", type); 304 return -EINVAL; 305 } 306 307 expected_len = ovs_key_lens[type]; 308 if (nla_len(nla) != expected_len && expected_len != -1) { 309 OVS_NLERR("Key attribute has unexpected length (type=%d" 310 ", length=%d, expected=%d).\n", type, 311 nla_len(nla), expected_len); 312 return -EINVAL; 313 } 314 315 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 316 attrs |= 1 << type; 317 a[type] = nla; 318 } 319 } 320 if (rem) { 321 OVS_NLERR("Message has %d unknown bytes.\n", rem); 322 return -EINVAL; 323 } 324 325 *attrsp = attrs; 326 return 0; 327 } 328 329 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 330 const struct nlattr *a[], u64 *attrsp) 331 { 332 return __parse_flow_nlattrs(attr, a, attrsp, true); 333 } 334 335 static int parse_flow_nlattrs(const struct nlattr *attr, 336 const struct nlattr *a[], u64 *attrsp) 337 { 338 return __parse_flow_nlattrs(attr, a, attrsp, false); 339 } 340 341 static int ipv4_tun_from_nlattr(const struct nlattr *attr, 342 struct sw_flow_match *match, bool is_mask) 343 { 344 struct nlattr *a; 345 int rem; 346 bool ttl = false; 347 __be16 tun_flags = 0; 348 349 nla_for_each_nested(a, attr, rem) { 350 int type = nla_type(a); 351 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 352 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 353 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 354 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 355 [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 356 [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 357 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 358 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 359 }; 360 361 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 362 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", 363 type, OVS_TUNNEL_KEY_ATTR_MAX); 364 return -EINVAL; 365 } 366 367 if (ovs_tunnel_key_lens[type] != nla_len(a)) { 368 OVS_NLERR("IPv4 tunnel attribute type has unexpected " 369 " length (type=%d, length=%d, expected=%d).\n", 370 type, nla_len(a), ovs_tunnel_key_lens[type]); 371 return -EINVAL; 372 } 373 374 switch (type) { 375 case OVS_TUNNEL_KEY_ATTR_ID: 376 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 377 nla_get_be64(a), is_mask); 378 tun_flags |= TUNNEL_KEY; 379 break; 380 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 381 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 382 nla_get_be32(a), is_mask); 383 break; 384 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 385 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 386 nla_get_be32(a), is_mask); 387 break; 388 case OVS_TUNNEL_KEY_ATTR_TOS: 389 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 390 nla_get_u8(a), is_mask); 391 break; 392 case OVS_TUNNEL_KEY_ATTR_TTL: 393 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 394 nla_get_u8(a), is_mask); 395 ttl = true; 396 break; 397 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 398 tun_flags |= TUNNEL_DONT_FRAGMENT; 399 break; 400 case OVS_TUNNEL_KEY_ATTR_CSUM: 401 tun_flags |= TUNNEL_CSUM; 402 break; 403 default: 404 return -EINVAL; 405 } 406 } 407 408 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 409 410 if (rem > 0) { 411 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); 412 return -EINVAL; 413 } 414 415 if (!is_mask) { 416 if (!match->key->tun_key.ipv4_dst) { 417 OVS_NLERR("IPv4 tunnel destination address is zero.\n"); 418 return -EINVAL; 419 } 420 421 if (!ttl) { 422 OVS_NLERR("IPv4 tunnel TTL not specified.\n"); 423 return -EINVAL; 424 } 425 } 426 427 return 0; 428 } 429 430 static int ipv4_tun_to_nlattr(struct sk_buff *skb, 431 const struct ovs_key_ipv4_tunnel *tun_key, 432 const struct ovs_key_ipv4_tunnel *output) 433 { 434 struct nlattr *nla; 435 436 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 437 if (!nla) 438 return -EMSGSIZE; 439 440 if (output->tun_flags & TUNNEL_KEY && 441 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 442 return -EMSGSIZE; 443 if (output->ipv4_src && 444 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 445 return -EMSGSIZE; 446 if (output->ipv4_dst && 447 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 448 return -EMSGSIZE; 449 if (output->ipv4_tos && 450 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 451 return -EMSGSIZE; 452 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 453 return -EMSGSIZE; 454 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 455 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 456 return -EMSGSIZE; 457 if ((output->tun_flags & TUNNEL_CSUM) && 458 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 459 return -EMSGSIZE; 460 461 nla_nest_end(skb, nla); 462 return 0; 463 } 464 465 466 static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 467 const struct nlattr **a, bool is_mask) 468 { 469 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 470 SW_FLOW_KEY_PUT(match, phy.priority, 471 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 472 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 473 } 474 475 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 476 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 477 478 if (is_mask) 479 in_port = 0xffffffff; /* Always exact match in_port. */ 480 else if (in_port >= DP_MAX_PORTS) 481 return -EINVAL; 482 483 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 484 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 485 } else if (!is_mask) { 486 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 487 } 488 489 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 490 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 491 492 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 493 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 494 } 495 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 496 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 497 is_mask)) 498 return -EINVAL; 499 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 500 } 501 return 0; 502 } 503 504 static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, 505 u64 attrs, const struct nlattr **a, 506 bool is_mask) 507 { 508 int err; 509 u64 orig_attrs = attrs; 510 511 err = metadata_from_nlattrs(match, &attrs, a, is_mask); 512 if (err) 513 return err; 514 515 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 516 const struct ovs_key_ethernet *eth_key; 517 518 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 519 SW_FLOW_KEY_MEMCPY(match, eth.src, 520 eth_key->eth_src, ETH_ALEN, is_mask); 521 SW_FLOW_KEY_MEMCPY(match, eth.dst, 522 eth_key->eth_dst, ETH_ALEN, is_mask); 523 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 524 } 525 526 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 527 __be16 tci; 528 529 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 530 if (!(tci & htons(VLAN_TAG_PRESENT))) { 531 if (is_mask) 532 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); 533 else 534 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); 535 536 return -EINVAL; 537 } 538 539 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 540 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 541 } else if (!is_mask) 542 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 543 544 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 545 __be16 eth_type; 546 547 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 548 if (is_mask) { 549 /* Always exact match EtherType. */ 550 eth_type = htons(0xffff); 551 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 552 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", 553 ntohs(eth_type), ETH_P_802_3_MIN); 554 return -EINVAL; 555 } 556 557 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 558 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 559 } else if (!is_mask) { 560 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 561 } 562 563 if (is_mask && exact_5tuple) { 564 if (match->mask->key.eth.type != htons(0xffff)) 565 *exact_5tuple = false; 566 } 567 568 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 569 const struct ovs_key_ipv4 *ipv4_key; 570 571 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 572 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 573 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", 574 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 575 return -EINVAL; 576 } 577 SW_FLOW_KEY_PUT(match, ip.proto, 578 ipv4_key->ipv4_proto, is_mask); 579 SW_FLOW_KEY_PUT(match, ip.tos, 580 ipv4_key->ipv4_tos, is_mask); 581 SW_FLOW_KEY_PUT(match, ip.ttl, 582 ipv4_key->ipv4_ttl, is_mask); 583 SW_FLOW_KEY_PUT(match, ip.frag, 584 ipv4_key->ipv4_frag, is_mask); 585 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 586 ipv4_key->ipv4_src, is_mask); 587 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 588 ipv4_key->ipv4_dst, is_mask); 589 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 590 591 if (is_mask && exact_5tuple && *exact_5tuple) { 592 if (ipv4_key->ipv4_proto != 0xff || 593 ipv4_key->ipv4_src != htonl(0xffffffff) || 594 ipv4_key->ipv4_dst != htonl(0xffffffff)) 595 *exact_5tuple = false; 596 } 597 } 598 599 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 600 const struct ovs_key_ipv6 *ipv6_key; 601 602 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 603 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 604 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", 605 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 606 return -EINVAL; 607 } 608 SW_FLOW_KEY_PUT(match, ipv6.label, 609 ipv6_key->ipv6_label, is_mask); 610 SW_FLOW_KEY_PUT(match, ip.proto, 611 ipv6_key->ipv6_proto, is_mask); 612 SW_FLOW_KEY_PUT(match, ip.tos, 613 ipv6_key->ipv6_tclass, is_mask); 614 SW_FLOW_KEY_PUT(match, ip.ttl, 615 ipv6_key->ipv6_hlimit, is_mask); 616 SW_FLOW_KEY_PUT(match, ip.frag, 617 ipv6_key->ipv6_frag, is_mask); 618 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 619 ipv6_key->ipv6_src, 620 sizeof(match->key->ipv6.addr.src), 621 is_mask); 622 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 623 ipv6_key->ipv6_dst, 624 sizeof(match->key->ipv6.addr.dst), 625 is_mask); 626 627 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 628 629 if (is_mask && exact_5tuple && *exact_5tuple) { 630 if (ipv6_key->ipv6_proto != 0xff || 631 !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || 632 !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) 633 *exact_5tuple = false; 634 } 635 } 636 637 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 638 const struct ovs_key_arp *arp_key; 639 640 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 641 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 642 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", 643 arp_key->arp_op); 644 return -EINVAL; 645 } 646 647 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 648 arp_key->arp_sip, is_mask); 649 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 650 arp_key->arp_tip, is_mask); 651 SW_FLOW_KEY_PUT(match, ip.proto, 652 ntohs(arp_key->arp_op), is_mask); 653 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 654 arp_key->arp_sha, ETH_ALEN, is_mask); 655 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 656 arp_key->arp_tha, ETH_ALEN, is_mask); 657 658 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 659 } 660 661 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 662 const struct ovs_key_tcp *tcp_key; 663 664 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 665 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 666 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 667 tcp_key->tcp_src, is_mask); 668 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 669 tcp_key->tcp_dst, is_mask); 670 } else { 671 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 672 tcp_key->tcp_src, is_mask); 673 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 674 tcp_key->tcp_dst, is_mask); 675 } 676 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 677 678 if (is_mask && exact_5tuple && *exact_5tuple && 679 (tcp_key->tcp_src != htons(0xffff) || 680 tcp_key->tcp_dst != htons(0xffff))) 681 *exact_5tuple = false; 682 } 683 684 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 685 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 686 SW_FLOW_KEY_PUT(match, ipv4.tp.flags, 687 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 688 is_mask); 689 } else { 690 SW_FLOW_KEY_PUT(match, ipv6.tp.flags, 691 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 692 is_mask); 693 } 694 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 695 } 696 697 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 698 const struct ovs_key_udp *udp_key; 699 700 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 701 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 702 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 703 udp_key->udp_src, is_mask); 704 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 705 udp_key->udp_dst, is_mask); 706 } else { 707 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 708 udp_key->udp_src, is_mask); 709 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 710 udp_key->udp_dst, is_mask); 711 } 712 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 713 714 if (is_mask && exact_5tuple && *exact_5tuple && 715 (udp_key->udp_src != htons(0xffff) || 716 udp_key->udp_dst != htons(0xffff))) 717 *exact_5tuple = false; 718 } 719 720 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 721 const struct ovs_key_sctp *sctp_key; 722 723 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 724 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 725 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 726 sctp_key->sctp_src, is_mask); 727 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 728 sctp_key->sctp_dst, is_mask); 729 } else { 730 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 731 sctp_key->sctp_src, is_mask); 732 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 733 sctp_key->sctp_dst, is_mask); 734 } 735 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 736 } 737 738 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 739 const struct ovs_key_icmp *icmp_key; 740 741 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 742 SW_FLOW_KEY_PUT(match, ipv4.tp.src, 743 htons(icmp_key->icmp_type), is_mask); 744 SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 745 htons(icmp_key->icmp_code), is_mask); 746 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 747 } 748 749 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 750 const struct ovs_key_icmpv6 *icmpv6_key; 751 752 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 753 SW_FLOW_KEY_PUT(match, ipv6.tp.src, 754 htons(icmpv6_key->icmpv6_type), is_mask); 755 SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 756 htons(icmpv6_key->icmpv6_code), is_mask); 757 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 758 } 759 760 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 761 const struct ovs_key_nd *nd_key; 762 763 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 764 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 765 nd_key->nd_target, 766 sizeof(match->key->ipv6.nd.target), 767 is_mask); 768 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 769 nd_key->nd_sll, ETH_ALEN, is_mask); 770 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 771 nd_key->nd_tll, ETH_ALEN, is_mask); 772 attrs &= ~(1 << OVS_KEY_ATTR_ND); 773 } 774 775 if (attrs != 0) 776 return -EINVAL; 777 778 return 0; 779 } 780 781 static void sw_flow_mask_set(struct sw_flow_mask *mask, 782 struct sw_flow_key_range *range, u8 val) 783 { 784 u8 *m = (u8 *)&mask->key + range->start; 785 786 mask->range = *range; 787 memset(m, val, range_n_bytes(range)); 788 } 789 790 /** 791 * ovs_nla_get_match - parses Netlink attributes into a flow key and 792 * mask. In case the 'mask' is NULL, the flow is treated as exact match 793 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 794 * does not include any don't care bit. 795 * @match: receives the extracted flow match information. 796 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 797 * sequence. The fields should of the packet that triggered the creation 798 * of this flow. 799 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 800 * attribute specifies the mask field of the wildcarded flow. 801 */ 802 int ovs_nla_get_match(struct sw_flow_match *match, 803 bool *exact_5tuple, 804 const struct nlattr *key, 805 const struct nlattr *mask) 806 { 807 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 808 const struct nlattr *encap; 809 u64 key_attrs = 0; 810 u64 mask_attrs = 0; 811 bool encap_valid = false; 812 int err; 813 814 err = parse_flow_nlattrs(key, a, &key_attrs); 815 if (err) 816 return err; 817 818 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 819 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 820 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 821 __be16 tci; 822 823 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 824 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 825 OVS_NLERR("Invalid Vlan frame.\n"); 826 return -EINVAL; 827 } 828 829 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 830 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 831 encap = a[OVS_KEY_ATTR_ENCAP]; 832 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 833 encap_valid = true; 834 835 if (tci & htons(VLAN_TAG_PRESENT)) { 836 err = parse_flow_nlattrs(encap, a, &key_attrs); 837 if (err) 838 return err; 839 } else if (!tci) { 840 /* Corner case for truncated 802.1Q header. */ 841 if (nla_len(encap)) { 842 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); 843 return -EINVAL; 844 } 845 } else { 846 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); 847 return -EINVAL; 848 } 849 } 850 851 err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); 852 if (err) 853 return err; 854 855 if (exact_5tuple) 856 *exact_5tuple = true; 857 858 if (mask) { 859 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); 860 if (err) 861 return err; 862 863 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { 864 __be16 eth_type = 0; 865 __be16 tci = 0; 866 867 if (!encap_valid) { 868 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); 869 return -EINVAL; 870 } 871 872 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 873 if (a[OVS_KEY_ATTR_ETHERTYPE]) 874 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 875 876 if (eth_type == htons(0xffff)) { 877 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 878 encap = a[OVS_KEY_ATTR_ENCAP]; 879 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); 880 } else { 881 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", 882 ntohs(eth_type)); 883 return -EINVAL; 884 } 885 886 if (a[OVS_KEY_ATTR_VLAN]) 887 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 888 889 if (!(tci & htons(VLAN_TAG_PRESENT))) { 890 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); 891 return -EINVAL; 892 } 893 } 894 895 err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); 896 if (err) 897 return err; 898 } else { 899 /* Populate exact match flow's key mask. */ 900 if (match->mask) 901 sw_flow_mask_set(match->mask, &match->range, 0xff); 902 } 903 904 if (!match_validate(match, key_attrs, mask_attrs)) 905 return -EINVAL; 906 907 return 0; 908 } 909 910 /** 911 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 912 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 913 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 914 * sequence. 915 * 916 * This parses a series of Netlink attributes that form a flow key, which must 917 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 918 * get the metadata, that is, the parts of the flow key that cannot be 919 * extracted from the packet itself. 920 */ 921 922 int ovs_nla_get_flow_metadata(struct sw_flow *flow, 923 const struct nlattr *attr) 924 { 925 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 926 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 927 u64 attrs = 0; 928 int err; 929 struct sw_flow_match match; 930 931 flow->key.phy.in_port = DP_MAX_PORTS; 932 flow->key.phy.priority = 0; 933 flow->key.phy.skb_mark = 0; 934 memset(tun_key, 0, sizeof(flow->key.tun_key)); 935 936 err = parse_flow_nlattrs(attr, a, &attrs); 937 if (err) 938 return -EINVAL; 939 940 memset(&match, 0, sizeof(match)); 941 match.key = &flow->key; 942 943 err = metadata_from_nlattrs(&match, &attrs, a, false); 944 if (err) 945 return err; 946 947 return 0; 948 } 949 950 int ovs_nla_put_flow(const struct sw_flow_key *swkey, 951 const struct sw_flow_key *output, struct sk_buff *skb) 952 { 953 struct ovs_key_ethernet *eth_key; 954 struct nlattr *nla, *encap; 955 bool is_mask = (swkey != output); 956 957 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 958 goto nla_put_failure; 959 960 if ((swkey->tun_key.ipv4_dst || is_mask) && 961 ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) 962 goto nla_put_failure; 963 964 if (swkey->phy.in_port == DP_MAX_PORTS) { 965 if (is_mask && (output->phy.in_port == 0xffff)) 966 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 967 goto nla_put_failure; 968 } else { 969 u16 upper_u16; 970 upper_u16 = !is_mask ? 0 : 0xffff; 971 972 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 973 (upper_u16 << 16) | output->phy.in_port)) 974 goto nla_put_failure; 975 } 976 977 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 978 goto nla_put_failure; 979 980 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 981 if (!nla) 982 goto nla_put_failure; 983 984 eth_key = nla_data(nla); 985 memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); 986 memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); 987 988 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 989 __be16 eth_type; 990 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 991 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 992 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 993 goto nla_put_failure; 994 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 995 if (!swkey->eth.tci) 996 goto unencap; 997 } else 998 encap = NULL; 999 1000 if (swkey->eth.type == htons(ETH_P_802_2)) { 1001 /* 1002 * Ethertype 802.2 is represented in the netlink with omitted 1003 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1004 * 0xffff in the mask attribute. Ethertype can also 1005 * be wildcarded. 1006 */ 1007 if (is_mask && output->eth.type) 1008 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1009 output->eth.type)) 1010 goto nla_put_failure; 1011 goto unencap; 1012 } 1013 1014 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1015 goto nla_put_failure; 1016 1017 if (swkey->eth.type == htons(ETH_P_IP)) { 1018 struct ovs_key_ipv4 *ipv4_key; 1019 1020 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1021 if (!nla) 1022 goto nla_put_failure; 1023 ipv4_key = nla_data(nla); 1024 ipv4_key->ipv4_src = output->ipv4.addr.src; 1025 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1026 ipv4_key->ipv4_proto = output->ip.proto; 1027 ipv4_key->ipv4_tos = output->ip.tos; 1028 ipv4_key->ipv4_ttl = output->ip.ttl; 1029 ipv4_key->ipv4_frag = output->ip.frag; 1030 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1031 struct ovs_key_ipv6 *ipv6_key; 1032 1033 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1034 if (!nla) 1035 goto nla_put_failure; 1036 ipv6_key = nla_data(nla); 1037 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1038 sizeof(ipv6_key->ipv6_src)); 1039 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1040 sizeof(ipv6_key->ipv6_dst)); 1041 ipv6_key->ipv6_label = output->ipv6.label; 1042 ipv6_key->ipv6_proto = output->ip.proto; 1043 ipv6_key->ipv6_tclass = output->ip.tos; 1044 ipv6_key->ipv6_hlimit = output->ip.ttl; 1045 ipv6_key->ipv6_frag = output->ip.frag; 1046 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1047 swkey->eth.type == htons(ETH_P_RARP)) { 1048 struct ovs_key_arp *arp_key; 1049 1050 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1051 if (!nla) 1052 goto nla_put_failure; 1053 arp_key = nla_data(nla); 1054 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1055 arp_key->arp_sip = output->ipv4.addr.src; 1056 arp_key->arp_tip = output->ipv4.addr.dst; 1057 arp_key->arp_op = htons(output->ip.proto); 1058 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); 1059 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); 1060 } 1061 1062 if ((swkey->eth.type == htons(ETH_P_IP) || 1063 swkey->eth.type == htons(ETH_P_IPV6)) && 1064 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1065 1066 if (swkey->ip.proto == IPPROTO_TCP) { 1067 struct ovs_key_tcp *tcp_key; 1068 1069 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1070 if (!nla) 1071 goto nla_put_failure; 1072 tcp_key = nla_data(nla); 1073 if (swkey->eth.type == htons(ETH_P_IP)) { 1074 tcp_key->tcp_src = output->ipv4.tp.src; 1075 tcp_key->tcp_dst = output->ipv4.tp.dst; 1076 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1077 output->ipv4.tp.flags)) 1078 goto nla_put_failure; 1079 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1080 tcp_key->tcp_src = output->ipv6.tp.src; 1081 tcp_key->tcp_dst = output->ipv6.tp.dst; 1082 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1083 output->ipv6.tp.flags)) 1084 goto nla_put_failure; 1085 } 1086 } else if (swkey->ip.proto == IPPROTO_UDP) { 1087 struct ovs_key_udp *udp_key; 1088 1089 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1090 if (!nla) 1091 goto nla_put_failure; 1092 udp_key = nla_data(nla); 1093 if (swkey->eth.type == htons(ETH_P_IP)) { 1094 udp_key->udp_src = output->ipv4.tp.src; 1095 udp_key->udp_dst = output->ipv4.tp.dst; 1096 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1097 udp_key->udp_src = output->ipv6.tp.src; 1098 udp_key->udp_dst = output->ipv6.tp.dst; 1099 } 1100 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1101 struct ovs_key_sctp *sctp_key; 1102 1103 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1104 if (!nla) 1105 goto nla_put_failure; 1106 sctp_key = nla_data(nla); 1107 if (swkey->eth.type == htons(ETH_P_IP)) { 1108 sctp_key->sctp_src = swkey->ipv4.tp.src; 1109 sctp_key->sctp_dst = swkey->ipv4.tp.dst; 1110 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1111 sctp_key->sctp_src = swkey->ipv6.tp.src; 1112 sctp_key->sctp_dst = swkey->ipv6.tp.dst; 1113 } 1114 } else if (swkey->eth.type == htons(ETH_P_IP) && 1115 swkey->ip.proto == IPPROTO_ICMP) { 1116 struct ovs_key_icmp *icmp_key; 1117 1118 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1119 if (!nla) 1120 goto nla_put_failure; 1121 icmp_key = nla_data(nla); 1122 icmp_key->icmp_type = ntohs(output->ipv4.tp.src); 1123 icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); 1124 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1125 swkey->ip.proto == IPPROTO_ICMPV6) { 1126 struct ovs_key_icmpv6 *icmpv6_key; 1127 1128 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1129 sizeof(*icmpv6_key)); 1130 if (!nla) 1131 goto nla_put_failure; 1132 icmpv6_key = nla_data(nla); 1133 icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); 1134 icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); 1135 1136 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1137 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1138 struct ovs_key_nd *nd_key; 1139 1140 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1141 if (!nla) 1142 goto nla_put_failure; 1143 nd_key = nla_data(nla); 1144 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1145 sizeof(nd_key->nd_target)); 1146 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); 1147 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); 1148 } 1149 } 1150 } 1151 1152 unencap: 1153 if (encap) 1154 nla_nest_end(skb, encap); 1155 1156 return 0; 1157 1158 nla_put_failure: 1159 return -EMSGSIZE; 1160 } 1161 1162 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1163 1164 struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) 1165 { 1166 struct sw_flow_actions *sfa; 1167 1168 if (size > MAX_ACTIONS_BUFSIZE) 1169 return ERR_PTR(-EINVAL); 1170 1171 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1172 if (!sfa) 1173 return ERR_PTR(-ENOMEM); 1174 1175 sfa->actions_len = 0; 1176 return sfa; 1177 } 1178 1179 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1180 * The caller must hold rcu_read_lock for this to be sensible. */ 1181 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1182 { 1183 kfree_rcu(sf_acts, rcu); 1184 } 1185 1186 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1187 int attr_len) 1188 { 1189 1190 struct sw_flow_actions *acts; 1191 int new_acts_size; 1192 int req_size = NLA_ALIGN(attr_len); 1193 int next_offset = offsetof(struct sw_flow_actions, actions) + 1194 (*sfa)->actions_len; 1195 1196 if (req_size <= (ksize(*sfa) - next_offset)) 1197 goto out; 1198 1199 new_acts_size = ksize(*sfa) * 2; 1200 1201 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1202 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1203 return ERR_PTR(-EMSGSIZE); 1204 new_acts_size = MAX_ACTIONS_BUFSIZE; 1205 } 1206 1207 acts = ovs_nla_alloc_flow_actions(new_acts_size); 1208 if (IS_ERR(acts)) 1209 return (void *)acts; 1210 1211 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1212 acts->actions_len = (*sfa)->actions_len; 1213 kfree(*sfa); 1214 *sfa = acts; 1215 1216 out: 1217 (*sfa)->actions_len += req_size; 1218 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1219 } 1220 1221 static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) 1222 { 1223 struct nlattr *a; 1224 1225 a = reserve_sfa_size(sfa, nla_attr_size(len)); 1226 if (IS_ERR(a)) 1227 return PTR_ERR(a); 1228 1229 a->nla_type = attrtype; 1230 a->nla_len = nla_attr_size(len); 1231 1232 if (data) 1233 memcpy(nla_data(a), data, len); 1234 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1235 1236 return 0; 1237 } 1238 1239 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1240 int attrtype) 1241 { 1242 int used = (*sfa)->actions_len; 1243 int err; 1244 1245 err = add_action(sfa, attrtype, NULL, 0); 1246 if (err) 1247 return err; 1248 1249 return used; 1250 } 1251 1252 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 1253 int st_offset) 1254 { 1255 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 1256 st_offset); 1257 1258 a->nla_len = sfa->actions_len - st_offset; 1259 } 1260 1261 static int validate_and_copy_sample(const struct nlattr *attr, 1262 const struct sw_flow_key *key, int depth, 1263 struct sw_flow_actions **sfa) 1264 { 1265 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1266 const struct nlattr *probability, *actions; 1267 const struct nlattr *a; 1268 int rem, start, err, st_acts; 1269 1270 memset(attrs, 0, sizeof(attrs)); 1271 nla_for_each_nested(a, attr, rem) { 1272 int type = nla_type(a); 1273 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 1274 return -EINVAL; 1275 attrs[type] = a; 1276 } 1277 if (rem) 1278 return -EINVAL; 1279 1280 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 1281 if (!probability || nla_len(probability) != sizeof(u32)) 1282 return -EINVAL; 1283 1284 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 1285 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 1286 return -EINVAL; 1287 1288 /* validation done, copy sample action. */ 1289 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); 1290 if (start < 0) 1291 return start; 1292 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1293 nla_data(probability), sizeof(u32)); 1294 if (err) 1295 return err; 1296 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); 1297 if (st_acts < 0) 1298 return st_acts; 1299 1300 err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); 1301 if (err) 1302 return err; 1303 1304 add_nested_action_end(*sfa, st_acts); 1305 add_nested_action_end(*sfa, start); 1306 1307 return 0; 1308 } 1309 1310 static int validate_tp_port(const struct sw_flow_key *flow_key) 1311 { 1312 if (flow_key->eth.type == htons(ETH_P_IP)) { 1313 if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) 1314 return 0; 1315 } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { 1316 if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) 1317 return 0; 1318 } 1319 1320 return -EINVAL; 1321 } 1322 1323 void ovs_match_init(struct sw_flow_match *match, 1324 struct sw_flow_key *key, 1325 struct sw_flow_mask *mask) 1326 { 1327 memset(match, 0, sizeof(*match)); 1328 match->key = key; 1329 match->mask = mask; 1330 1331 memset(key, 0, sizeof(*key)); 1332 1333 if (mask) { 1334 memset(&mask->key, 0, sizeof(mask->key)); 1335 mask->range.start = mask->range.end = 0; 1336 } 1337 } 1338 1339 static int validate_and_copy_set_tun(const struct nlattr *attr, 1340 struct sw_flow_actions **sfa) 1341 { 1342 struct sw_flow_match match; 1343 struct sw_flow_key key; 1344 int err, start; 1345 1346 ovs_match_init(&match, &key, NULL); 1347 err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); 1348 if (err) 1349 return err; 1350 1351 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); 1352 if (start < 0) 1353 return start; 1354 1355 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, 1356 sizeof(match.key->tun_key)); 1357 add_nested_action_end(*sfa, start); 1358 1359 return err; 1360 } 1361 1362 static int validate_set(const struct nlattr *a, 1363 const struct sw_flow_key *flow_key, 1364 struct sw_flow_actions **sfa, 1365 bool *set_tun) 1366 { 1367 const struct nlattr *ovs_key = nla_data(a); 1368 int key_type = nla_type(ovs_key); 1369 1370 /* There can be only one key in a action */ 1371 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1372 return -EINVAL; 1373 1374 if (key_type > OVS_KEY_ATTR_MAX || 1375 (ovs_key_lens[key_type] != nla_len(ovs_key) && 1376 ovs_key_lens[key_type] != -1)) 1377 return -EINVAL; 1378 1379 switch (key_type) { 1380 const struct ovs_key_ipv4 *ipv4_key; 1381 const struct ovs_key_ipv6 *ipv6_key; 1382 int err; 1383 1384 case OVS_KEY_ATTR_PRIORITY: 1385 case OVS_KEY_ATTR_SKB_MARK: 1386 case OVS_KEY_ATTR_ETHERNET: 1387 break; 1388 1389 case OVS_KEY_ATTR_TUNNEL: 1390 *set_tun = true; 1391 err = validate_and_copy_set_tun(a, sfa); 1392 if (err) 1393 return err; 1394 break; 1395 1396 case OVS_KEY_ATTR_IPV4: 1397 if (flow_key->eth.type != htons(ETH_P_IP)) 1398 return -EINVAL; 1399 1400 if (!flow_key->ip.proto) 1401 return -EINVAL; 1402 1403 ipv4_key = nla_data(ovs_key); 1404 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1405 return -EINVAL; 1406 1407 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1408 return -EINVAL; 1409 1410 break; 1411 1412 case OVS_KEY_ATTR_IPV6: 1413 if (flow_key->eth.type != htons(ETH_P_IPV6)) 1414 return -EINVAL; 1415 1416 if (!flow_key->ip.proto) 1417 return -EINVAL; 1418 1419 ipv6_key = nla_data(ovs_key); 1420 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1421 return -EINVAL; 1422 1423 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1424 return -EINVAL; 1425 1426 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1427 return -EINVAL; 1428 1429 break; 1430 1431 case OVS_KEY_ATTR_TCP: 1432 if (flow_key->ip.proto != IPPROTO_TCP) 1433 return -EINVAL; 1434 1435 return validate_tp_port(flow_key); 1436 1437 case OVS_KEY_ATTR_UDP: 1438 if (flow_key->ip.proto != IPPROTO_UDP) 1439 return -EINVAL; 1440 1441 return validate_tp_port(flow_key); 1442 1443 case OVS_KEY_ATTR_SCTP: 1444 if (flow_key->ip.proto != IPPROTO_SCTP) 1445 return -EINVAL; 1446 1447 return validate_tp_port(flow_key); 1448 1449 default: 1450 return -EINVAL; 1451 } 1452 1453 return 0; 1454 } 1455 1456 static int validate_userspace(const struct nlattr *attr) 1457 { 1458 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 1459 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 1460 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 1461 }; 1462 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 1463 int error; 1464 1465 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 1466 attr, userspace_policy); 1467 if (error) 1468 return error; 1469 1470 if (!a[OVS_USERSPACE_ATTR_PID] || 1471 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 1472 return -EINVAL; 1473 1474 return 0; 1475 } 1476 1477 static int copy_action(const struct nlattr *from, 1478 struct sw_flow_actions **sfa) 1479 { 1480 int totlen = NLA_ALIGN(from->nla_len); 1481 struct nlattr *to; 1482 1483 to = reserve_sfa_size(sfa, from->nla_len); 1484 if (IS_ERR(to)) 1485 return PTR_ERR(to); 1486 1487 memcpy(to, from, totlen); 1488 return 0; 1489 } 1490 1491 int ovs_nla_copy_actions(const struct nlattr *attr, 1492 const struct sw_flow_key *key, 1493 int depth, 1494 struct sw_flow_actions **sfa) 1495 { 1496 const struct nlattr *a; 1497 int rem, err; 1498 1499 if (depth >= SAMPLE_ACTION_DEPTH) 1500 return -EOVERFLOW; 1501 1502 nla_for_each_nested(a, attr, rem) { 1503 /* Expected argument lengths, (u32)-1 for variable length. */ 1504 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 1505 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 1506 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 1507 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 1508 [OVS_ACTION_ATTR_POP_VLAN] = 0, 1509 [OVS_ACTION_ATTR_SET] = (u32)-1, 1510 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 1511 }; 1512 const struct ovs_action_push_vlan *vlan; 1513 int type = nla_type(a); 1514 bool skip_copy; 1515 1516 if (type > OVS_ACTION_ATTR_MAX || 1517 (action_lens[type] != nla_len(a) && 1518 action_lens[type] != (u32)-1)) 1519 return -EINVAL; 1520 1521 skip_copy = false; 1522 switch (type) { 1523 case OVS_ACTION_ATTR_UNSPEC: 1524 return -EINVAL; 1525 1526 case OVS_ACTION_ATTR_USERSPACE: 1527 err = validate_userspace(a); 1528 if (err) 1529 return err; 1530 break; 1531 1532 case OVS_ACTION_ATTR_OUTPUT: 1533 if (nla_get_u32(a) >= DP_MAX_PORTS) 1534 return -EINVAL; 1535 break; 1536 1537 1538 case OVS_ACTION_ATTR_POP_VLAN: 1539 break; 1540 1541 case OVS_ACTION_ATTR_PUSH_VLAN: 1542 vlan = nla_data(a); 1543 if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 1544 return -EINVAL; 1545 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 1546 return -EINVAL; 1547 break; 1548 1549 case OVS_ACTION_ATTR_SET: 1550 err = validate_set(a, key, sfa, &skip_copy); 1551 if (err) 1552 return err; 1553 break; 1554 1555 case OVS_ACTION_ATTR_SAMPLE: 1556 err = validate_and_copy_sample(a, key, depth, sfa); 1557 if (err) 1558 return err; 1559 skip_copy = true; 1560 break; 1561 1562 default: 1563 return -EINVAL; 1564 } 1565 if (!skip_copy) { 1566 err = copy_action(a, sfa); 1567 if (err) 1568 return err; 1569 } 1570 } 1571 1572 if (rem > 0) 1573 return -EINVAL; 1574 1575 return 0; 1576 } 1577 1578 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 1579 { 1580 const struct nlattr *a; 1581 struct nlattr *start; 1582 int err = 0, rem; 1583 1584 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 1585 if (!start) 1586 return -EMSGSIZE; 1587 1588 nla_for_each_nested(a, attr, rem) { 1589 int type = nla_type(a); 1590 struct nlattr *st_sample; 1591 1592 switch (type) { 1593 case OVS_SAMPLE_ATTR_PROBABILITY: 1594 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 1595 sizeof(u32), nla_data(a))) 1596 return -EMSGSIZE; 1597 break; 1598 case OVS_SAMPLE_ATTR_ACTIONS: 1599 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 1600 if (!st_sample) 1601 return -EMSGSIZE; 1602 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 1603 if (err) 1604 return err; 1605 nla_nest_end(skb, st_sample); 1606 break; 1607 } 1608 } 1609 1610 nla_nest_end(skb, start); 1611 return err; 1612 } 1613 1614 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 1615 { 1616 const struct nlattr *ovs_key = nla_data(a); 1617 int key_type = nla_type(ovs_key); 1618 struct nlattr *start; 1619 int err; 1620 1621 switch (key_type) { 1622 case OVS_KEY_ATTR_IPV4_TUNNEL: 1623 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 1624 if (!start) 1625 return -EMSGSIZE; 1626 1627 err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key), 1628 nla_data(ovs_key)); 1629 if (err) 1630 return err; 1631 nla_nest_end(skb, start); 1632 break; 1633 default: 1634 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 1635 return -EMSGSIZE; 1636 break; 1637 } 1638 1639 return 0; 1640 } 1641 1642 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 1643 { 1644 const struct nlattr *a; 1645 int rem, err; 1646 1647 nla_for_each_attr(a, attr, len, rem) { 1648 int type = nla_type(a); 1649 1650 switch (type) { 1651 case OVS_ACTION_ATTR_SET: 1652 err = set_action_to_attr(a, skb); 1653 if (err) 1654 return err; 1655 break; 1656 1657 case OVS_ACTION_ATTR_SAMPLE: 1658 err = sample_action_to_attr(a, skb); 1659 if (err) 1660 return err; 1661 break; 1662 default: 1663 if (nla_put(skb, type, nla_len(a), nla_data(a))) 1664 return -EMSGSIZE; 1665 break; 1666 } 1667 } 1668 1669 return 0; 1670 } 1671