1 /* 2 * Copyright (c) 2007-2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include "flow.h" 22 #include "datapath.h" 23 #include <linux/uaccess.h> 24 #include <linux/netdevice.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <net/llc_pdu.h> 29 #include <linux/kernel.h> 30 #include <linux/jhash.h> 31 #include <linux/jiffies.h> 32 #include <linux/llc.h> 33 #include <linux/module.h> 34 #include <linux/in.h> 35 #include <linux/rcupdate.h> 36 #include <linux/if_arp.h> 37 #include <linux/ip.h> 38 #include <linux/ipv6.h> 39 #include <linux/sctp.h> 40 #include <linux/tcp.h> 41 #include <linux/udp.h> 42 #include <linux/icmp.h> 43 #include <linux/icmpv6.h> 44 #include <linux/rculist.h> 45 #include <net/ip.h> 46 #include <net/ipv6.h> 47 #include <net/ndisc.h> 48 49 #include "flow_netlink.h" 50 51 static void update_range__(struct sw_flow_match *match, 52 size_t offset, size_t size, bool is_mask) 53 { 54 struct sw_flow_key_range *range = NULL; 55 size_t start = rounddown(offset, sizeof(long)); 56 size_t end = roundup(offset + size, sizeof(long)); 57 58 if (!is_mask) 59 range = &match->range; 60 else if (match->mask) 61 range = &match->mask->range; 62 63 if (!range) 64 return; 65 66 if (range->start == range->end) { 67 range->start = start; 68 range->end = end; 69 return; 70 } 71 72 if (range->start > start) 73 range->start = start; 74 75 if (range->end < end) 76 range->end = end; 77 } 78 79 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 80 do { \ 81 update_range__(match, offsetof(struct sw_flow_key, field), \ 82 sizeof((match)->key->field), is_mask); \ 83 if (is_mask) { \ 84 if ((match)->mask) \ 85 (match)->mask->key.field = value; \ 86 } else { \ 87 (match)->key->field = value; \ 88 } \ 89 } while (0) 90 91 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 92 do { \ 93 update_range__(match, offsetof(struct sw_flow_key, field), \ 94 len, is_mask); \ 95 if (is_mask) { \ 96 if ((match)->mask) \ 97 memcpy(&(match)->mask->key.field, value_p, len);\ 98 } else { \ 99 memcpy(&(match)->key->field, value_p, len); \ 100 } \ 101 } while (0) 102 103 static u16 range_n_bytes(const struct sw_flow_key_range *range) 104 { 105 return range->end - range->start; 106 } 107 108 static bool match_validate(const struct sw_flow_match *match, 109 u64 key_attrs, u64 mask_attrs) 110 { 111 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 112 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 113 114 /* The following mask attributes allowed only if they 115 * pass the validation tests. */ 116 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 117 | (1 << OVS_KEY_ATTR_IPV6) 118 | (1 << OVS_KEY_ATTR_TCP) 119 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 120 | (1 << OVS_KEY_ATTR_UDP) 121 | (1 << OVS_KEY_ATTR_SCTP) 122 | (1 << OVS_KEY_ATTR_ICMP) 123 | (1 << OVS_KEY_ATTR_ICMPV6) 124 | (1 << OVS_KEY_ATTR_ARP) 125 | (1 << OVS_KEY_ATTR_ND)); 126 127 /* Always allowed mask fields. */ 128 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 129 | (1 << OVS_KEY_ATTR_IN_PORT) 130 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 131 132 /* Check key attributes. */ 133 if (match->key->eth.type == htons(ETH_P_ARP) 134 || match->key->eth.type == htons(ETH_P_RARP)) { 135 key_expected |= 1 << OVS_KEY_ATTR_ARP; 136 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 137 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 138 } 139 140 if (match->key->eth.type == htons(ETH_P_IP)) { 141 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 142 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 143 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 144 145 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 146 if (match->key->ip.proto == IPPROTO_UDP) { 147 key_expected |= 1 << OVS_KEY_ATTR_UDP; 148 if (match->mask && (match->mask->key.ip.proto == 0xff)) 149 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 150 } 151 152 if (match->key->ip.proto == IPPROTO_SCTP) { 153 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 154 if (match->mask && (match->mask->key.ip.proto == 0xff)) 155 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 156 } 157 158 if (match->key->ip.proto == IPPROTO_TCP) { 159 key_expected |= 1 << OVS_KEY_ATTR_TCP; 160 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 161 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 162 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 163 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 164 } 165 } 166 167 if (match->key->ip.proto == IPPROTO_ICMP) { 168 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 169 if (match->mask && (match->mask->key.ip.proto == 0xff)) 170 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 171 } 172 } 173 } 174 175 if (match->key->eth.type == htons(ETH_P_IPV6)) { 176 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 177 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 178 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 179 180 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 181 if (match->key->ip.proto == IPPROTO_UDP) { 182 key_expected |= 1 << OVS_KEY_ATTR_UDP; 183 if (match->mask && (match->mask->key.ip.proto == 0xff)) 184 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 185 } 186 187 if (match->key->ip.proto == IPPROTO_SCTP) { 188 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 189 if (match->mask && (match->mask->key.ip.proto == 0xff)) 190 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 191 } 192 193 if (match->key->ip.proto == IPPROTO_TCP) { 194 key_expected |= 1 << OVS_KEY_ATTR_TCP; 195 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 196 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 197 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 198 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 199 } 200 } 201 202 if (match->key->ip.proto == IPPROTO_ICMPV6) { 203 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 204 if (match->mask && (match->mask->key.ip.proto == 0xff)) 205 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 206 207 if (match->key->tp.src == 208 htons(NDISC_NEIGHBOUR_SOLICITATION) || 209 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 210 key_expected |= 1 << OVS_KEY_ATTR_ND; 211 if (match->mask && (match->mask->key.tp.src == htons(0xffff))) 212 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 213 } 214 } 215 } 216 } 217 218 if ((key_attrs & key_expected) != key_expected) { 219 /* Key attributes check failed. */ 220 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", 221 (unsigned long long)key_attrs, (unsigned long long)key_expected); 222 return false; 223 } 224 225 if ((mask_attrs & mask_allowed) != mask_attrs) { 226 /* Mask attributes check failed. */ 227 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", 228 (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); 229 return false; 230 } 231 232 return true; 233 } 234 235 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 236 static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 237 [OVS_KEY_ATTR_ENCAP] = -1, 238 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 239 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 240 [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 241 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 242 [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 243 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 244 [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 245 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 246 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 247 [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), 248 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 249 [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 250 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 251 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 252 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 253 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 254 [OVS_KEY_ATTR_TUNNEL] = -1, 255 }; 256 257 static bool is_all_zero(const u8 *fp, size_t size) 258 { 259 int i; 260 261 if (!fp) 262 return false; 263 264 for (i = 0; i < size; i++) 265 if (fp[i]) 266 return false; 267 268 return true; 269 } 270 271 static int __parse_flow_nlattrs(const struct nlattr *attr, 272 const struct nlattr *a[], 273 u64 *attrsp, bool nz) 274 { 275 const struct nlattr *nla; 276 u64 attrs; 277 int rem; 278 279 attrs = *attrsp; 280 nla_for_each_nested(nla, attr, rem) { 281 u16 type = nla_type(nla); 282 int expected_len; 283 284 if (type > OVS_KEY_ATTR_MAX) { 285 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", 286 type, OVS_KEY_ATTR_MAX); 287 return -EINVAL; 288 } 289 290 if (attrs & (1 << type)) { 291 OVS_NLERR("Duplicate key attribute (type %d).\n", type); 292 return -EINVAL; 293 } 294 295 expected_len = ovs_key_lens[type]; 296 if (nla_len(nla) != expected_len && expected_len != -1) { 297 OVS_NLERR("Key attribute has unexpected length (type=%d" 298 ", length=%d, expected=%d).\n", type, 299 nla_len(nla), expected_len); 300 return -EINVAL; 301 } 302 303 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 304 attrs |= 1 << type; 305 a[type] = nla; 306 } 307 } 308 if (rem) { 309 OVS_NLERR("Message has %d unknown bytes.\n", rem); 310 return -EINVAL; 311 } 312 313 *attrsp = attrs; 314 return 0; 315 } 316 317 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 318 const struct nlattr *a[], u64 *attrsp) 319 { 320 return __parse_flow_nlattrs(attr, a, attrsp, true); 321 } 322 323 static int parse_flow_nlattrs(const struct nlattr *attr, 324 const struct nlattr *a[], u64 *attrsp) 325 { 326 return __parse_flow_nlattrs(attr, a, attrsp, false); 327 } 328 329 static int ipv4_tun_from_nlattr(const struct nlattr *attr, 330 struct sw_flow_match *match, bool is_mask) 331 { 332 struct nlattr *a; 333 int rem; 334 bool ttl = false; 335 __be16 tun_flags = 0; 336 337 nla_for_each_nested(a, attr, rem) { 338 int type = nla_type(a); 339 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 340 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 341 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 342 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 343 [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 344 [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 345 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 346 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 347 }; 348 349 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 350 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", 351 type, OVS_TUNNEL_KEY_ATTR_MAX); 352 return -EINVAL; 353 } 354 355 if (ovs_tunnel_key_lens[type] != nla_len(a)) { 356 OVS_NLERR("IPv4 tunnel attribute type has unexpected " 357 " length (type=%d, length=%d, expected=%d).\n", 358 type, nla_len(a), ovs_tunnel_key_lens[type]); 359 return -EINVAL; 360 } 361 362 switch (type) { 363 case OVS_TUNNEL_KEY_ATTR_ID: 364 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 365 nla_get_be64(a), is_mask); 366 tun_flags |= TUNNEL_KEY; 367 break; 368 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 369 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 370 nla_get_be32(a), is_mask); 371 break; 372 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 373 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 374 nla_get_be32(a), is_mask); 375 break; 376 case OVS_TUNNEL_KEY_ATTR_TOS: 377 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 378 nla_get_u8(a), is_mask); 379 break; 380 case OVS_TUNNEL_KEY_ATTR_TTL: 381 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 382 nla_get_u8(a), is_mask); 383 ttl = true; 384 break; 385 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 386 tun_flags |= TUNNEL_DONT_FRAGMENT; 387 break; 388 case OVS_TUNNEL_KEY_ATTR_CSUM: 389 tun_flags |= TUNNEL_CSUM; 390 break; 391 default: 392 return -EINVAL; 393 } 394 } 395 396 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 397 398 if (rem > 0) { 399 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); 400 return -EINVAL; 401 } 402 403 if (!is_mask) { 404 if (!match->key->tun_key.ipv4_dst) { 405 OVS_NLERR("IPv4 tunnel destination address is zero.\n"); 406 return -EINVAL; 407 } 408 409 if (!ttl) { 410 OVS_NLERR("IPv4 tunnel TTL not specified.\n"); 411 return -EINVAL; 412 } 413 } 414 415 return 0; 416 } 417 418 static int ipv4_tun_to_nlattr(struct sk_buff *skb, 419 const struct ovs_key_ipv4_tunnel *tun_key, 420 const struct ovs_key_ipv4_tunnel *output) 421 { 422 struct nlattr *nla; 423 424 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 425 if (!nla) 426 return -EMSGSIZE; 427 428 if (output->tun_flags & TUNNEL_KEY && 429 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 430 return -EMSGSIZE; 431 if (output->ipv4_src && 432 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 433 return -EMSGSIZE; 434 if (output->ipv4_dst && 435 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 436 return -EMSGSIZE; 437 if (output->ipv4_tos && 438 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 439 return -EMSGSIZE; 440 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 441 return -EMSGSIZE; 442 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 443 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 444 return -EMSGSIZE; 445 if ((output->tun_flags & TUNNEL_CSUM) && 446 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 447 return -EMSGSIZE; 448 449 nla_nest_end(skb, nla); 450 return 0; 451 } 452 453 454 static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 455 const struct nlattr **a, bool is_mask) 456 { 457 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 458 SW_FLOW_KEY_PUT(match, phy.priority, 459 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 460 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 461 } 462 463 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 464 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 465 466 if (is_mask) 467 in_port = 0xffffffff; /* Always exact match in_port. */ 468 else if (in_port >= DP_MAX_PORTS) 469 return -EINVAL; 470 471 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 472 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 473 } else if (!is_mask) { 474 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 475 } 476 477 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 478 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 479 480 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 481 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 482 } 483 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 484 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 485 is_mask)) 486 return -EINVAL; 487 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 488 } 489 return 0; 490 } 491 492 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 493 const struct nlattr **a, bool is_mask) 494 { 495 int err; 496 u64 orig_attrs = attrs; 497 498 err = metadata_from_nlattrs(match, &attrs, a, is_mask); 499 if (err) 500 return err; 501 502 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 503 const struct ovs_key_ethernet *eth_key; 504 505 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 506 SW_FLOW_KEY_MEMCPY(match, eth.src, 507 eth_key->eth_src, ETH_ALEN, is_mask); 508 SW_FLOW_KEY_MEMCPY(match, eth.dst, 509 eth_key->eth_dst, ETH_ALEN, is_mask); 510 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 511 } 512 513 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 514 __be16 tci; 515 516 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 517 if (!(tci & htons(VLAN_TAG_PRESENT))) { 518 if (is_mask) 519 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); 520 else 521 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); 522 523 return -EINVAL; 524 } 525 526 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 527 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 528 } else if (!is_mask) 529 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 530 531 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 532 __be16 eth_type; 533 534 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 535 if (is_mask) { 536 /* Always exact match EtherType. */ 537 eth_type = htons(0xffff); 538 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 539 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", 540 ntohs(eth_type), ETH_P_802_3_MIN); 541 return -EINVAL; 542 } 543 544 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 545 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 546 } else if (!is_mask) { 547 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 548 } 549 550 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 551 const struct ovs_key_ipv4 *ipv4_key; 552 553 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 554 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 555 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", 556 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 557 return -EINVAL; 558 } 559 SW_FLOW_KEY_PUT(match, ip.proto, 560 ipv4_key->ipv4_proto, is_mask); 561 SW_FLOW_KEY_PUT(match, ip.tos, 562 ipv4_key->ipv4_tos, is_mask); 563 SW_FLOW_KEY_PUT(match, ip.ttl, 564 ipv4_key->ipv4_ttl, is_mask); 565 SW_FLOW_KEY_PUT(match, ip.frag, 566 ipv4_key->ipv4_frag, is_mask); 567 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 568 ipv4_key->ipv4_src, is_mask); 569 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 570 ipv4_key->ipv4_dst, is_mask); 571 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 572 } 573 574 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 575 const struct ovs_key_ipv6 *ipv6_key; 576 577 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 578 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 579 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", 580 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 581 return -EINVAL; 582 } 583 SW_FLOW_KEY_PUT(match, ipv6.label, 584 ipv6_key->ipv6_label, is_mask); 585 SW_FLOW_KEY_PUT(match, ip.proto, 586 ipv6_key->ipv6_proto, is_mask); 587 SW_FLOW_KEY_PUT(match, ip.tos, 588 ipv6_key->ipv6_tclass, is_mask); 589 SW_FLOW_KEY_PUT(match, ip.ttl, 590 ipv6_key->ipv6_hlimit, is_mask); 591 SW_FLOW_KEY_PUT(match, ip.frag, 592 ipv6_key->ipv6_frag, is_mask); 593 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 594 ipv6_key->ipv6_src, 595 sizeof(match->key->ipv6.addr.src), 596 is_mask); 597 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 598 ipv6_key->ipv6_dst, 599 sizeof(match->key->ipv6.addr.dst), 600 is_mask); 601 602 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 603 } 604 605 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 606 const struct ovs_key_arp *arp_key; 607 608 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 609 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 610 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", 611 arp_key->arp_op); 612 return -EINVAL; 613 } 614 615 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 616 arp_key->arp_sip, is_mask); 617 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 618 arp_key->arp_tip, is_mask); 619 SW_FLOW_KEY_PUT(match, ip.proto, 620 ntohs(arp_key->arp_op), is_mask); 621 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 622 arp_key->arp_sha, ETH_ALEN, is_mask); 623 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 624 arp_key->arp_tha, ETH_ALEN, is_mask); 625 626 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 627 } 628 629 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 630 const struct ovs_key_tcp *tcp_key; 631 632 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 633 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 634 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 635 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 636 } 637 638 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 639 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 640 SW_FLOW_KEY_PUT(match, tp.flags, 641 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 642 is_mask); 643 } else { 644 SW_FLOW_KEY_PUT(match, tp.flags, 645 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 646 is_mask); 647 } 648 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 649 } 650 651 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 652 const struct ovs_key_udp *udp_key; 653 654 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 655 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 656 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 657 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 658 } 659 660 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 661 const struct ovs_key_sctp *sctp_key; 662 663 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 664 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 665 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 666 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 667 } 668 669 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 670 const struct ovs_key_icmp *icmp_key; 671 672 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 673 SW_FLOW_KEY_PUT(match, tp.src, 674 htons(icmp_key->icmp_type), is_mask); 675 SW_FLOW_KEY_PUT(match, tp.dst, 676 htons(icmp_key->icmp_code), is_mask); 677 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 678 } 679 680 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 681 const struct ovs_key_icmpv6 *icmpv6_key; 682 683 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 684 SW_FLOW_KEY_PUT(match, tp.src, 685 htons(icmpv6_key->icmpv6_type), is_mask); 686 SW_FLOW_KEY_PUT(match, tp.dst, 687 htons(icmpv6_key->icmpv6_code), is_mask); 688 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 689 } 690 691 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 692 const struct ovs_key_nd *nd_key; 693 694 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 695 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 696 nd_key->nd_target, 697 sizeof(match->key->ipv6.nd.target), 698 is_mask); 699 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 700 nd_key->nd_sll, ETH_ALEN, is_mask); 701 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 702 nd_key->nd_tll, ETH_ALEN, is_mask); 703 attrs &= ~(1 << OVS_KEY_ATTR_ND); 704 } 705 706 if (attrs != 0) 707 return -EINVAL; 708 709 return 0; 710 } 711 712 static void sw_flow_mask_set(struct sw_flow_mask *mask, 713 struct sw_flow_key_range *range, u8 val) 714 { 715 u8 *m = (u8 *)&mask->key + range->start; 716 717 mask->range = *range; 718 memset(m, val, range_n_bytes(range)); 719 } 720 721 /** 722 * ovs_nla_get_match - parses Netlink attributes into a flow key and 723 * mask. In case the 'mask' is NULL, the flow is treated as exact match 724 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 725 * does not include any don't care bit. 726 * @match: receives the extracted flow match information. 727 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 728 * sequence. The fields should of the packet that triggered the creation 729 * of this flow. 730 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 731 * attribute specifies the mask field of the wildcarded flow. 732 */ 733 int ovs_nla_get_match(struct sw_flow_match *match, 734 const struct nlattr *key, 735 const struct nlattr *mask) 736 { 737 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 738 const struct nlattr *encap; 739 u64 key_attrs = 0; 740 u64 mask_attrs = 0; 741 bool encap_valid = false; 742 int err; 743 744 err = parse_flow_nlattrs(key, a, &key_attrs); 745 if (err) 746 return err; 747 748 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 749 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 750 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 751 __be16 tci; 752 753 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 754 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 755 OVS_NLERR("Invalid Vlan frame.\n"); 756 return -EINVAL; 757 } 758 759 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 760 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 761 encap = a[OVS_KEY_ATTR_ENCAP]; 762 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 763 encap_valid = true; 764 765 if (tci & htons(VLAN_TAG_PRESENT)) { 766 err = parse_flow_nlattrs(encap, a, &key_attrs); 767 if (err) 768 return err; 769 } else if (!tci) { 770 /* Corner case for truncated 802.1Q header. */ 771 if (nla_len(encap)) { 772 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); 773 return -EINVAL; 774 } 775 } else { 776 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); 777 return -EINVAL; 778 } 779 } 780 781 err = ovs_key_from_nlattrs(match, key_attrs, a, false); 782 if (err) 783 return err; 784 785 if (mask) { 786 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); 787 if (err) 788 return err; 789 790 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { 791 __be16 eth_type = 0; 792 __be16 tci = 0; 793 794 if (!encap_valid) { 795 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); 796 return -EINVAL; 797 } 798 799 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 800 if (a[OVS_KEY_ATTR_ETHERTYPE]) 801 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 802 803 if (eth_type == htons(0xffff)) { 804 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 805 encap = a[OVS_KEY_ATTR_ENCAP]; 806 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); 807 } else { 808 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", 809 ntohs(eth_type)); 810 return -EINVAL; 811 } 812 813 if (a[OVS_KEY_ATTR_VLAN]) 814 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 815 816 if (!(tci & htons(VLAN_TAG_PRESENT))) { 817 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); 818 return -EINVAL; 819 } 820 } 821 822 err = ovs_key_from_nlattrs(match, mask_attrs, a, true); 823 if (err) 824 return err; 825 } else { 826 /* Populate exact match flow's key mask. */ 827 if (match->mask) 828 sw_flow_mask_set(match->mask, &match->range, 0xff); 829 } 830 831 if (!match_validate(match, key_attrs, mask_attrs)) 832 return -EINVAL; 833 834 return 0; 835 } 836 837 /** 838 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 839 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 840 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 841 * sequence. 842 * 843 * This parses a series of Netlink attributes that form a flow key, which must 844 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 845 * get the metadata, that is, the parts of the flow key that cannot be 846 * extracted from the packet itself. 847 */ 848 849 int ovs_nla_get_flow_metadata(struct sw_flow *flow, 850 const struct nlattr *attr) 851 { 852 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 853 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 854 u64 attrs = 0; 855 int err; 856 struct sw_flow_match match; 857 858 flow->key.phy.in_port = DP_MAX_PORTS; 859 flow->key.phy.priority = 0; 860 flow->key.phy.skb_mark = 0; 861 memset(tun_key, 0, sizeof(flow->key.tun_key)); 862 863 err = parse_flow_nlattrs(attr, a, &attrs); 864 if (err) 865 return -EINVAL; 866 867 memset(&match, 0, sizeof(match)); 868 match.key = &flow->key; 869 870 err = metadata_from_nlattrs(&match, &attrs, a, false); 871 if (err) 872 return err; 873 874 return 0; 875 } 876 877 int ovs_nla_put_flow(const struct sw_flow_key *swkey, 878 const struct sw_flow_key *output, struct sk_buff *skb) 879 { 880 struct ovs_key_ethernet *eth_key; 881 struct nlattr *nla, *encap; 882 bool is_mask = (swkey != output); 883 884 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 885 goto nla_put_failure; 886 887 if ((swkey->tun_key.ipv4_dst || is_mask) && 888 ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) 889 goto nla_put_failure; 890 891 if (swkey->phy.in_port == DP_MAX_PORTS) { 892 if (is_mask && (output->phy.in_port == 0xffff)) 893 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 894 goto nla_put_failure; 895 } else { 896 u16 upper_u16; 897 upper_u16 = !is_mask ? 0 : 0xffff; 898 899 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 900 (upper_u16 << 16) | output->phy.in_port)) 901 goto nla_put_failure; 902 } 903 904 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 905 goto nla_put_failure; 906 907 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 908 if (!nla) 909 goto nla_put_failure; 910 911 eth_key = nla_data(nla); 912 ether_addr_copy(eth_key->eth_src, output->eth.src); 913 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 914 915 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 916 __be16 eth_type; 917 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 918 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 919 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 920 goto nla_put_failure; 921 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 922 if (!swkey->eth.tci) 923 goto unencap; 924 } else 925 encap = NULL; 926 927 if (swkey->eth.type == htons(ETH_P_802_2)) { 928 /* 929 * Ethertype 802.2 is represented in the netlink with omitted 930 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 931 * 0xffff in the mask attribute. Ethertype can also 932 * be wildcarded. 933 */ 934 if (is_mask && output->eth.type) 935 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 936 output->eth.type)) 937 goto nla_put_failure; 938 goto unencap; 939 } 940 941 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 942 goto nla_put_failure; 943 944 if (swkey->eth.type == htons(ETH_P_IP)) { 945 struct ovs_key_ipv4 *ipv4_key; 946 947 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 948 if (!nla) 949 goto nla_put_failure; 950 ipv4_key = nla_data(nla); 951 ipv4_key->ipv4_src = output->ipv4.addr.src; 952 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 953 ipv4_key->ipv4_proto = output->ip.proto; 954 ipv4_key->ipv4_tos = output->ip.tos; 955 ipv4_key->ipv4_ttl = output->ip.ttl; 956 ipv4_key->ipv4_frag = output->ip.frag; 957 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 958 struct ovs_key_ipv6 *ipv6_key; 959 960 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 961 if (!nla) 962 goto nla_put_failure; 963 ipv6_key = nla_data(nla); 964 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 965 sizeof(ipv6_key->ipv6_src)); 966 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 967 sizeof(ipv6_key->ipv6_dst)); 968 ipv6_key->ipv6_label = output->ipv6.label; 969 ipv6_key->ipv6_proto = output->ip.proto; 970 ipv6_key->ipv6_tclass = output->ip.tos; 971 ipv6_key->ipv6_hlimit = output->ip.ttl; 972 ipv6_key->ipv6_frag = output->ip.frag; 973 } else if (swkey->eth.type == htons(ETH_P_ARP) || 974 swkey->eth.type == htons(ETH_P_RARP)) { 975 struct ovs_key_arp *arp_key; 976 977 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 978 if (!nla) 979 goto nla_put_failure; 980 arp_key = nla_data(nla); 981 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 982 arp_key->arp_sip = output->ipv4.addr.src; 983 arp_key->arp_tip = output->ipv4.addr.dst; 984 arp_key->arp_op = htons(output->ip.proto); 985 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 986 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 987 } 988 989 if ((swkey->eth.type == htons(ETH_P_IP) || 990 swkey->eth.type == htons(ETH_P_IPV6)) && 991 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 992 993 if (swkey->ip.proto == IPPROTO_TCP) { 994 struct ovs_key_tcp *tcp_key; 995 996 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 997 if (!nla) 998 goto nla_put_failure; 999 tcp_key = nla_data(nla); 1000 tcp_key->tcp_src = output->tp.src; 1001 tcp_key->tcp_dst = output->tp.dst; 1002 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1003 output->tp.flags)) 1004 goto nla_put_failure; 1005 } else if (swkey->ip.proto == IPPROTO_UDP) { 1006 struct ovs_key_udp *udp_key; 1007 1008 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1009 if (!nla) 1010 goto nla_put_failure; 1011 udp_key = nla_data(nla); 1012 udp_key->udp_src = output->tp.src; 1013 udp_key->udp_dst = output->tp.dst; 1014 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1015 struct ovs_key_sctp *sctp_key; 1016 1017 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1018 if (!nla) 1019 goto nla_put_failure; 1020 sctp_key = nla_data(nla); 1021 sctp_key->sctp_src = output->tp.src; 1022 sctp_key->sctp_dst = output->tp.dst; 1023 } else if (swkey->eth.type == htons(ETH_P_IP) && 1024 swkey->ip.proto == IPPROTO_ICMP) { 1025 struct ovs_key_icmp *icmp_key; 1026 1027 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1028 if (!nla) 1029 goto nla_put_failure; 1030 icmp_key = nla_data(nla); 1031 icmp_key->icmp_type = ntohs(output->tp.src); 1032 icmp_key->icmp_code = ntohs(output->tp.dst); 1033 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1034 swkey->ip.proto == IPPROTO_ICMPV6) { 1035 struct ovs_key_icmpv6 *icmpv6_key; 1036 1037 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1038 sizeof(*icmpv6_key)); 1039 if (!nla) 1040 goto nla_put_failure; 1041 icmpv6_key = nla_data(nla); 1042 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1043 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1044 1045 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1046 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1047 struct ovs_key_nd *nd_key; 1048 1049 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1050 if (!nla) 1051 goto nla_put_failure; 1052 nd_key = nla_data(nla); 1053 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1054 sizeof(nd_key->nd_target)); 1055 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1056 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1057 } 1058 } 1059 } 1060 1061 unencap: 1062 if (encap) 1063 nla_nest_end(skb, encap); 1064 1065 return 0; 1066 1067 nla_put_failure: 1068 return -EMSGSIZE; 1069 } 1070 1071 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1072 1073 struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) 1074 { 1075 struct sw_flow_actions *sfa; 1076 1077 if (size > MAX_ACTIONS_BUFSIZE) 1078 return ERR_PTR(-EINVAL); 1079 1080 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1081 if (!sfa) 1082 return ERR_PTR(-ENOMEM); 1083 1084 sfa->actions_len = 0; 1085 return sfa; 1086 } 1087 1088 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1089 * The caller must hold rcu_read_lock for this to be sensible. */ 1090 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1091 { 1092 kfree_rcu(sf_acts, rcu); 1093 } 1094 1095 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1096 int attr_len) 1097 { 1098 1099 struct sw_flow_actions *acts; 1100 int new_acts_size; 1101 int req_size = NLA_ALIGN(attr_len); 1102 int next_offset = offsetof(struct sw_flow_actions, actions) + 1103 (*sfa)->actions_len; 1104 1105 if (req_size <= (ksize(*sfa) - next_offset)) 1106 goto out; 1107 1108 new_acts_size = ksize(*sfa) * 2; 1109 1110 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1111 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1112 return ERR_PTR(-EMSGSIZE); 1113 new_acts_size = MAX_ACTIONS_BUFSIZE; 1114 } 1115 1116 acts = ovs_nla_alloc_flow_actions(new_acts_size); 1117 if (IS_ERR(acts)) 1118 return (void *)acts; 1119 1120 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1121 acts->actions_len = (*sfa)->actions_len; 1122 kfree(*sfa); 1123 *sfa = acts; 1124 1125 out: 1126 (*sfa)->actions_len += req_size; 1127 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1128 } 1129 1130 static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) 1131 { 1132 struct nlattr *a; 1133 1134 a = reserve_sfa_size(sfa, nla_attr_size(len)); 1135 if (IS_ERR(a)) 1136 return PTR_ERR(a); 1137 1138 a->nla_type = attrtype; 1139 a->nla_len = nla_attr_size(len); 1140 1141 if (data) 1142 memcpy(nla_data(a), data, len); 1143 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1144 1145 return 0; 1146 } 1147 1148 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1149 int attrtype) 1150 { 1151 int used = (*sfa)->actions_len; 1152 int err; 1153 1154 err = add_action(sfa, attrtype, NULL, 0); 1155 if (err) 1156 return err; 1157 1158 return used; 1159 } 1160 1161 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 1162 int st_offset) 1163 { 1164 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 1165 st_offset); 1166 1167 a->nla_len = sfa->actions_len - st_offset; 1168 } 1169 1170 static int validate_and_copy_sample(const struct nlattr *attr, 1171 const struct sw_flow_key *key, int depth, 1172 struct sw_flow_actions **sfa) 1173 { 1174 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1175 const struct nlattr *probability, *actions; 1176 const struct nlattr *a; 1177 int rem, start, err, st_acts; 1178 1179 memset(attrs, 0, sizeof(attrs)); 1180 nla_for_each_nested(a, attr, rem) { 1181 int type = nla_type(a); 1182 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 1183 return -EINVAL; 1184 attrs[type] = a; 1185 } 1186 if (rem) 1187 return -EINVAL; 1188 1189 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 1190 if (!probability || nla_len(probability) != sizeof(u32)) 1191 return -EINVAL; 1192 1193 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 1194 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 1195 return -EINVAL; 1196 1197 /* validation done, copy sample action. */ 1198 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); 1199 if (start < 0) 1200 return start; 1201 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1202 nla_data(probability), sizeof(u32)); 1203 if (err) 1204 return err; 1205 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); 1206 if (st_acts < 0) 1207 return st_acts; 1208 1209 err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); 1210 if (err) 1211 return err; 1212 1213 add_nested_action_end(*sfa, st_acts); 1214 add_nested_action_end(*sfa, start); 1215 1216 return 0; 1217 } 1218 1219 static int validate_tp_port(const struct sw_flow_key *flow_key) 1220 { 1221 if ((flow_key->eth.type == htons(ETH_P_IP) || 1222 flow_key->eth.type == htons(ETH_P_IPV6)) && 1223 (flow_key->tp.src || flow_key->tp.dst)) 1224 return 0; 1225 1226 return -EINVAL; 1227 } 1228 1229 void ovs_match_init(struct sw_flow_match *match, 1230 struct sw_flow_key *key, 1231 struct sw_flow_mask *mask) 1232 { 1233 memset(match, 0, sizeof(*match)); 1234 match->key = key; 1235 match->mask = mask; 1236 1237 memset(key, 0, sizeof(*key)); 1238 1239 if (mask) { 1240 memset(&mask->key, 0, sizeof(mask->key)); 1241 mask->range.start = mask->range.end = 0; 1242 } 1243 } 1244 1245 static int validate_and_copy_set_tun(const struct nlattr *attr, 1246 struct sw_flow_actions **sfa) 1247 { 1248 struct sw_flow_match match; 1249 struct sw_flow_key key; 1250 int err, start; 1251 1252 ovs_match_init(&match, &key, NULL); 1253 err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); 1254 if (err) 1255 return err; 1256 1257 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); 1258 if (start < 0) 1259 return start; 1260 1261 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, 1262 sizeof(match.key->tun_key)); 1263 add_nested_action_end(*sfa, start); 1264 1265 return err; 1266 } 1267 1268 static int validate_set(const struct nlattr *a, 1269 const struct sw_flow_key *flow_key, 1270 struct sw_flow_actions **sfa, 1271 bool *set_tun) 1272 { 1273 const struct nlattr *ovs_key = nla_data(a); 1274 int key_type = nla_type(ovs_key); 1275 1276 /* There can be only one key in a action */ 1277 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1278 return -EINVAL; 1279 1280 if (key_type > OVS_KEY_ATTR_MAX || 1281 (ovs_key_lens[key_type] != nla_len(ovs_key) && 1282 ovs_key_lens[key_type] != -1)) 1283 return -EINVAL; 1284 1285 switch (key_type) { 1286 const struct ovs_key_ipv4 *ipv4_key; 1287 const struct ovs_key_ipv6 *ipv6_key; 1288 int err; 1289 1290 case OVS_KEY_ATTR_PRIORITY: 1291 case OVS_KEY_ATTR_SKB_MARK: 1292 case OVS_KEY_ATTR_ETHERNET: 1293 break; 1294 1295 case OVS_KEY_ATTR_TUNNEL: 1296 *set_tun = true; 1297 err = validate_and_copy_set_tun(a, sfa); 1298 if (err) 1299 return err; 1300 break; 1301 1302 case OVS_KEY_ATTR_IPV4: 1303 if (flow_key->eth.type != htons(ETH_P_IP)) 1304 return -EINVAL; 1305 1306 if (!flow_key->ip.proto) 1307 return -EINVAL; 1308 1309 ipv4_key = nla_data(ovs_key); 1310 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1311 return -EINVAL; 1312 1313 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1314 return -EINVAL; 1315 1316 break; 1317 1318 case OVS_KEY_ATTR_IPV6: 1319 if (flow_key->eth.type != htons(ETH_P_IPV6)) 1320 return -EINVAL; 1321 1322 if (!flow_key->ip.proto) 1323 return -EINVAL; 1324 1325 ipv6_key = nla_data(ovs_key); 1326 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1327 return -EINVAL; 1328 1329 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1330 return -EINVAL; 1331 1332 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1333 return -EINVAL; 1334 1335 break; 1336 1337 case OVS_KEY_ATTR_TCP: 1338 if (flow_key->ip.proto != IPPROTO_TCP) 1339 return -EINVAL; 1340 1341 return validate_tp_port(flow_key); 1342 1343 case OVS_KEY_ATTR_UDP: 1344 if (flow_key->ip.proto != IPPROTO_UDP) 1345 return -EINVAL; 1346 1347 return validate_tp_port(flow_key); 1348 1349 case OVS_KEY_ATTR_SCTP: 1350 if (flow_key->ip.proto != IPPROTO_SCTP) 1351 return -EINVAL; 1352 1353 return validate_tp_port(flow_key); 1354 1355 default: 1356 return -EINVAL; 1357 } 1358 1359 return 0; 1360 } 1361 1362 static int validate_userspace(const struct nlattr *attr) 1363 { 1364 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 1365 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 1366 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 1367 }; 1368 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 1369 int error; 1370 1371 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 1372 attr, userspace_policy); 1373 if (error) 1374 return error; 1375 1376 if (!a[OVS_USERSPACE_ATTR_PID] || 1377 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 1378 return -EINVAL; 1379 1380 return 0; 1381 } 1382 1383 static int copy_action(const struct nlattr *from, 1384 struct sw_flow_actions **sfa) 1385 { 1386 int totlen = NLA_ALIGN(from->nla_len); 1387 struct nlattr *to; 1388 1389 to = reserve_sfa_size(sfa, from->nla_len); 1390 if (IS_ERR(to)) 1391 return PTR_ERR(to); 1392 1393 memcpy(to, from, totlen); 1394 return 0; 1395 } 1396 1397 int ovs_nla_copy_actions(const struct nlattr *attr, 1398 const struct sw_flow_key *key, 1399 int depth, 1400 struct sw_flow_actions **sfa) 1401 { 1402 const struct nlattr *a; 1403 int rem, err; 1404 1405 if (depth >= SAMPLE_ACTION_DEPTH) 1406 return -EOVERFLOW; 1407 1408 nla_for_each_nested(a, attr, rem) { 1409 /* Expected argument lengths, (u32)-1 for variable length. */ 1410 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 1411 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 1412 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 1413 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 1414 [OVS_ACTION_ATTR_POP_VLAN] = 0, 1415 [OVS_ACTION_ATTR_SET] = (u32)-1, 1416 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 1417 }; 1418 const struct ovs_action_push_vlan *vlan; 1419 int type = nla_type(a); 1420 bool skip_copy; 1421 1422 if (type > OVS_ACTION_ATTR_MAX || 1423 (action_lens[type] != nla_len(a) && 1424 action_lens[type] != (u32)-1)) 1425 return -EINVAL; 1426 1427 skip_copy = false; 1428 switch (type) { 1429 case OVS_ACTION_ATTR_UNSPEC: 1430 return -EINVAL; 1431 1432 case OVS_ACTION_ATTR_USERSPACE: 1433 err = validate_userspace(a); 1434 if (err) 1435 return err; 1436 break; 1437 1438 case OVS_ACTION_ATTR_OUTPUT: 1439 if (nla_get_u32(a) >= DP_MAX_PORTS) 1440 return -EINVAL; 1441 break; 1442 1443 1444 case OVS_ACTION_ATTR_POP_VLAN: 1445 break; 1446 1447 case OVS_ACTION_ATTR_PUSH_VLAN: 1448 vlan = nla_data(a); 1449 if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 1450 return -EINVAL; 1451 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 1452 return -EINVAL; 1453 break; 1454 1455 case OVS_ACTION_ATTR_SET: 1456 err = validate_set(a, key, sfa, &skip_copy); 1457 if (err) 1458 return err; 1459 break; 1460 1461 case OVS_ACTION_ATTR_SAMPLE: 1462 err = validate_and_copy_sample(a, key, depth, sfa); 1463 if (err) 1464 return err; 1465 skip_copy = true; 1466 break; 1467 1468 default: 1469 return -EINVAL; 1470 } 1471 if (!skip_copy) { 1472 err = copy_action(a, sfa); 1473 if (err) 1474 return err; 1475 } 1476 } 1477 1478 if (rem > 0) 1479 return -EINVAL; 1480 1481 return 0; 1482 } 1483 1484 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 1485 { 1486 const struct nlattr *a; 1487 struct nlattr *start; 1488 int err = 0, rem; 1489 1490 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 1491 if (!start) 1492 return -EMSGSIZE; 1493 1494 nla_for_each_nested(a, attr, rem) { 1495 int type = nla_type(a); 1496 struct nlattr *st_sample; 1497 1498 switch (type) { 1499 case OVS_SAMPLE_ATTR_PROBABILITY: 1500 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 1501 sizeof(u32), nla_data(a))) 1502 return -EMSGSIZE; 1503 break; 1504 case OVS_SAMPLE_ATTR_ACTIONS: 1505 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 1506 if (!st_sample) 1507 return -EMSGSIZE; 1508 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 1509 if (err) 1510 return err; 1511 nla_nest_end(skb, st_sample); 1512 break; 1513 } 1514 } 1515 1516 nla_nest_end(skb, start); 1517 return err; 1518 } 1519 1520 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 1521 { 1522 const struct nlattr *ovs_key = nla_data(a); 1523 int key_type = nla_type(ovs_key); 1524 struct nlattr *start; 1525 int err; 1526 1527 switch (key_type) { 1528 case OVS_KEY_ATTR_IPV4_TUNNEL: 1529 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 1530 if (!start) 1531 return -EMSGSIZE; 1532 1533 err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key), 1534 nla_data(ovs_key)); 1535 if (err) 1536 return err; 1537 nla_nest_end(skb, start); 1538 break; 1539 default: 1540 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 1541 return -EMSGSIZE; 1542 break; 1543 } 1544 1545 return 0; 1546 } 1547 1548 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 1549 { 1550 const struct nlattr *a; 1551 int rem, err; 1552 1553 nla_for_each_attr(a, attr, len, rem) { 1554 int type = nla_type(a); 1555 1556 switch (type) { 1557 case OVS_ACTION_ATTR_SET: 1558 err = set_action_to_attr(a, skb); 1559 if (err) 1560 return err; 1561 break; 1562 1563 case OVS_ACTION_ATTR_SAMPLE: 1564 err = sample_action_to_attr(a, skb); 1565 if (err) 1566 return err; 1567 break; 1568 default: 1569 if (nla_put(skb, type, nla_len(a), nla_data(a))) 1570 return -EMSGSIZE; 1571 break; 1572 } 1573 } 1574 1575 return 0; 1576 } 1577