1 /* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include "flow.h" 22 #include "datapath.h" 23 #include <linux/uaccess.h> 24 #include <linux/netdevice.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <net/llc_pdu.h> 29 #include <linux/kernel.h> 30 #include <linux/jhash.h> 31 #include <linux/jiffies.h> 32 #include <linux/llc.h> 33 #include <linux/module.h> 34 #include <linux/in.h> 35 #include <linux/rcupdate.h> 36 #include <linux/if_arp.h> 37 #include <linux/ip.h> 38 #include <linux/ipv6.h> 39 #include <linux/sctp.h> 40 #include <linux/tcp.h> 41 #include <linux/udp.h> 42 #include <linux/icmp.h> 43 #include <linux/icmpv6.h> 44 #include <linux/rculist.h> 45 #include <net/geneve.h> 46 #include <net/ip.h> 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/mpls.h> 50 51 #include "flow_netlink.h" 52 53 static void update_range(struct sw_flow_match *match, 54 size_t offset, size_t size, bool is_mask) 55 { 56 struct sw_flow_key_range *range; 57 size_t start = rounddown(offset, sizeof(long)); 58 size_t end = roundup(offset + size, sizeof(long)); 59 60 if (!is_mask) 61 range = &match->range; 62 else 63 range = &match->mask->range; 64 65 if (range->start == range->end) { 66 range->start = start; 67 range->end = end; 68 return; 69 } 70 71 if (range->start > start) 72 range->start = start; 73 74 if (range->end < end) 75 range->end = end; 76 } 77 78 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 79 do { \ 80 update_range(match, offsetof(struct sw_flow_key, field), \ 81 sizeof((match)->key->field), is_mask); \ 82 if (is_mask) \ 83 (match)->mask->key.field = value; \ 84 else \ 85 (match)->key->field = value; \ 86 } while (0) 87 88 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 89 do { \ 90 update_range(match, offset, len, is_mask); \ 91 if (is_mask) \ 92 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 93 len); \ 94 else \ 95 memcpy((u8 *)(match)->key + offset, value_p, len); \ 96 } while (0) 97 98 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 99 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 100 value_p, len, is_mask) 101 102 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 103 do { \ 104 update_range(match, offsetof(struct sw_flow_key, field), \ 105 sizeof((match)->key->field), is_mask); \ 106 if (is_mask) \ 107 memset((u8 *)&(match)->mask->key.field, value, \ 108 sizeof((match)->mask->key.field)); \ 109 else \ 110 memset((u8 *)&(match)->key->field, value, \ 111 sizeof((match)->key->field)); \ 112 } while (0) 113 114 static bool match_validate(const struct sw_flow_match *match, 115 u64 key_attrs, u64 mask_attrs, bool log) 116 { 117 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 118 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 119 120 /* The following mask attributes allowed only if they 121 * pass the validation tests. */ 122 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 123 | (1 << OVS_KEY_ATTR_IPV6) 124 | (1 << OVS_KEY_ATTR_TCP) 125 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 126 | (1 << OVS_KEY_ATTR_UDP) 127 | (1 << OVS_KEY_ATTR_SCTP) 128 | (1 << OVS_KEY_ATTR_ICMP) 129 | (1 << OVS_KEY_ATTR_ICMPV6) 130 | (1 << OVS_KEY_ATTR_ARP) 131 | (1 << OVS_KEY_ATTR_ND) 132 | (1 << OVS_KEY_ATTR_MPLS)); 133 134 /* Always allowed mask fields. */ 135 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 136 | (1 << OVS_KEY_ATTR_IN_PORT) 137 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 138 139 /* Check key attributes. */ 140 if (match->key->eth.type == htons(ETH_P_ARP) 141 || match->key->eth.type == htons(ETH_P_RARP)) { 142 key_expected |= 1 << OVS_KEY_ATTR_ARP; 143 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 144 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 145 } 146 147 if (eth_p_mpls(match->key->eth.type)) { 148 key_expected |= 1 << OVS_KEY_ATTR_MPLS; 149 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 150 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; 151 } 152 153 if (match->key->eth.type == htons(ETH_P_IP)) { 154 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 155 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 156 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 157 158 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 159 if (match->key->ip.proto == IPPROTO_UDP) { 160 key_expected |= 1 << OVS_KEY_ATTR_UDP; 161 if (match->mask && (match->mask->key.ip.proto == 0xff)) 162 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 163 } 164 165 if (match->key->ip.proto == IPPROTO_SCTP) { 166 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 167 if (match->mask && (match->mask->key.ip.proto == 0xff)) 168 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 169 } 170 171 if (match->key->ip.proto == IPPROTO_TCP) { 172 key_expected |= 1 << OVS_KEY_ATTR_TCP; 173 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 174 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 175 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 176 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 177 } 178 } 179 180 if (match->key->ip.proto == IPPROTO_ICMP) { 181 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 182 if (match->mask && (match->mask->key.ip.proto == 0xff)) 183 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 184 } 185 } 186 } 187 188 if (match->key->eth.type == htons(ETH_P_IPV6)) { 189 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 190 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 191 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 192 193 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 194 if (match->key->ip.proto == IPPROTO_UDP) { 195 key_expected |= 1 << OVS_KEY_ATTR_UDP; 196 if (match->mask && (match->mask->key.ip.proto == 0xff)) 197 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 198 } 199 200 if (match->key->ip.proto == IPPROTO_SCTP) { 201 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 202 if (match->mask && (match->mask->key.ip.proto == 0xff)) 203 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 204 } 205 206 if (match->key->ip.proto == IPPROTO_TCP) { 207 key_expected |= 1 << OVS_KEY_ATTR_TCP; 208 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 209 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 210 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 211 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 212 } 213 } 214 215 if (match->key->ip.proto == IPPROTO_ICMPV6) { 216 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 217 if (match->mask && (match->mask->key.ip.proto == 0xff)) 218 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 219 220 if (match->key->tp.src == 221 htons(NDISC_NEIGHBOUR_SOLICITATION) || 222 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 223 key_expected |= 1 << OVS_KEY_ATTR_ND; 224 if (match->mask && (match->mask->key.tp.src == htons(0xff))) 225 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 226 } 227 } 228 } 229 } 230 231 if ((key_attrs & key_expected) != key_expected) { 232 /* Key attributes check failed. */ 233 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 234 (unsigned long long)key_attrs, 235 (unsigned long long)key_expected); 236 return false; 237 } 238 239 if ((mask_attrs & mask_allowed) != mask_attrs) { 240 /* Mask attributes check failed. */ 241 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", 242 (unsigned long long)mask_attrs, 243 (unsigned long long)mask_allowed); 244 return false; 245 } 246 247 return true; 248 } 249 250 size_t ovs_tun_key_attr_size(void) 251 { 252 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider 253 * updating this function. 254 */ 255 return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 256 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ 257 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ 258 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 259 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 260 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 261 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 262 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 263 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 264 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 265 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 266 } 267 268 size_t ovs_key_attr_size(void) 269 { 270 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 271 * updating this function. 272 */ 273 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); 274 275 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 276 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 277 + ovs_tun_key_attr_size() 278 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 279 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 280 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 281 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 282 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 283 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 284 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 285 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 286 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 287 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 288 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 289 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 290 } 291 292 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 293 static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 294 [OVS_KEY_ATTR_ENCAP] = -1, 295 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 296 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 297 [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 298 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 299 [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 300 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 301 [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 302 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 303 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 304 [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), 305 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 306 [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 307 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 308 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 309 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 310 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 311 [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), 312 [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), 313 [OVS_KEY_ATTR_TUNNEL] = -1, 314 [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), 315 }; 316 317 static bool is_all_zero(const u8 *fp, size_t size) 318 { 319 int i; 320 321 if (!fp) 322 return false; 323 324 for (i = 0; i < size; i++) 325 if (fp[i]) 326 return false; 327 328 return true; 329 } 330 331 static int __parse_flow_nlattrs(const struct nlattr *attr, 332 const struct nlattr *a[], 333 u64 *attrsp, bool log, bool nz) 334 { 335 const struct nlattr *nla; 336 u64 attrs; 337 int rem; 338 339 attrs = *attrsp; 340 nla_for_each_nested(nla, attr, rem) { 341 u16 type = nla_type(nla); 342 int expected_len; 343 344 if (type > OVS_KEY_ATTR_MAX) { 345 OVS_NLERR(log, "Key type %d is out of range max %d", 346 type, OVS_KEY_ATTR_MAX); 347 return -EINVAL; 348 } 349 350 if (attrs & (1 << type)) { 351 OVS_NLERR(log, "Duplicate key (type %d).", type); 352 return -EINVAL; 353 } 354 355 expected_len = ovs_key_lens[type]; 356 if (nla_len(nla) != expected_len && expected_len != -1) { 357 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 358 type, nla_len(nla), expected_len); 359 return -EINVAL; 360 } 361 362 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 363 attrs |= 1 << type; 364 a[type] = nla; 365 } 366 } 367 if (rem) { 368 OVS_NLERR(log, "Message has %d unknown bytes.", rem); 369 return -EINVAL; 370 } 371 372 *attrsp = attrs; 373 return 0; 374 } 375 376 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 377 const struct nlattr *a[], u64 *attrsp, 378 bool log) 379 { 380 return __parse_flow_nlattrs(attr, a, attrsp, log, true); 381 } 382 383 static int parse_flow_nlattrs(const struct nlattr *attr, 384 const struct nlattr *a[], u64 *attrsp, 385 bool log) 386 { 387 return __parse_flow_nlattrs(attr, a, attrsp, log, false); 388 } 389 390 static int genev_tun_opt_from_nlattr(const struct nlattr *a, 391 struct sw_flow_match *match, bool is_mask, 392 bool log) 393 { 394 unsigned long opt_key_offset; 395 396 if (nla_len(a) > sizeof(match->key->tun_opts)) { 397 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", 398 nla_len(a), sizeof(match->key->tun_opts)); 399 return -EINVAL; 400 } 401 402 if (nla_len(a) % 4 != 0) { 403 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", 404 nla_len(a)); 405 return -EINVAL; 406 } 407 408 /* We need to record the length of the options passed 409 * down, otherwise packets with the same format but 410 * additional options will be silently matched. 411 */ 412 if (!is_mask) { 413 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), 414 false); 415 } else { 416 /* This is somewhat unusual because it looks at 417 * both the key and mask while parsing the 418 * attributes (and by extension assumes the key 419 * is parsed first). Normally, we would verify 420 * that each is the correct length and that the 421 * attributes line up in the validate function. 422 * However, that is difficult because this is 423 * variable length and we won't have the 424 * information later. 425 */ 426 if (match->key->tun_opts_len != nla_len(a)) { 427 OVS_NLERR(log, "Geneve option len %d != mask len %d", 428 match->key->tun_opts_len, nla_len(a)); 429 return -EINVAL; 430 } 431 432 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 433 } 434 435 opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, 436 nla_len(a)); 437 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 438 nla_len(a), is_mask); 439 return 0; 440 } 441 442 static int ipv4_tun_from_nlattr(const struct nlattr *attr, 443 struct sw_flow_match *match, bool is_mask, 444 bool log) 445 { 446 struct nlattr *a; 447 int rem; 448 bool ttl = false; 449 __be16 tun_flags = 0; 450 451 nla_for_each_nested(a, attr, rem) { 452 int type = nla_type(a); 453 int err; 454 455 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 456 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 457 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 458 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 459 [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 460 [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 461 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 462 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 463 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), 464 [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), 465 [OVS_TUNNEL_KEY_ATTR_OAM] = 0, 466 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, 467 }; 468 469 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 470 OVS_NLERR(log, "Tunnel attr %d out of range max %d", 471 type, OVS_TUNNEL_KEY_ATTR_MAX); 472 return -EINVAL; 473 } 474 475 if (ovs_tunnel_key_lens[type] != nla_len(a) && 476 ovs_tunnel_key_lens[type] != -1) { 477 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 478 type, nla_len(a), ovs_tunnel_key_lens[type]); 479 return -EINVAL; 480 } 481 482 switch (type) { 483 case OVS_TUNNEL_KEY_ATTR_ID: 484 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 485 nla_get_be64(a), is_mask); 486 tun_flags |= TUNNEL_KEY; 487 break; 488 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 489 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 490 nla_get_be32(a), is_mask); 491 break; 492 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 493 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 494 nla_get_be32(a), is_mask); 495 break; 496 case OVS_TUNNEL_KEY_ATTR_TOS: 497 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 498 nla_get_u8(a), is_mask); 499 break; 500 case OVS_TUNNEL_KEY_ATTR_TTL: 501 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 502 nla_get_u8(a), is_mask); 503 ttl = true; 504 break; 505 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 506 tun_flags |= TUNNEL_DONT_FRAGMENT; 507 break; 508 case OVS_TUNNEL_KEY_ATTR_CSUM: 509 tun_flags |= TUNNEL_CSUM; 510 break; 511 case OVS_TUNNEL_KEY_ATTR_TP_SRC: 512 SW_FLOW_KEY_PUT(match, tun_key.tp_src, 513 nla_get_be16(a), is_mask); 514 break; 515 case OVS_TUNNEL_KEY_ATTR_TP_DST: 516 SW_FLOW_KEY_PUT(match, tun_key.tp_dst, 517 nla_get_be16(a), is_mask); 518 break; 519 case OVS_TUNNEL_KEY_ATTR_OAM: 520 tun_flags |= TUNNEL_OAM; 521 break; 522 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 523 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 524 if (err) 525 return err; 526 527 tun_flags |= TUNNEL_OPTIONS_PRESENT; 528 break; 529 default: 530 OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", 531 type); 532 return -EINVAL; 533 } 534 } 535 536 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 537 538 if (rem > 0) { 539 OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", 540 rem); 541 return -EINVAL; 542 } 543 544 if (!is_mask) { 545 if (!match->key->tun_key.ipv4_dst) { 546 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 547 return -EINVAL; 548 } 549 550 if (!ttl) { 551 OVS_NLERR(log, "IPv4 tunnel TTL not specified."); 552 return -EINVAL; 553 } 554 } 555 556 return 0; 557 } 558 559 static int __ipv4_tun_to_nlattr(struct sk_buff *skb, 560 const struct ovs_key_ipv4_tunnel *output, 561 const struct geneve_opt *tun_opts, 562 int swkey_tun_opts_len) 563 { 564 if (output->tun_flags & TUNNEL_KEY && 565 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 566 return -EMSGSIZE; 567 if (output->ipv4_src && 568 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 569 return -EMSGSIZE; 570 if (output->ipv4_dst && 571 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 572 return -EMSGSIZE; 573 if (output->ipv4_tos && 574 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 575 return -EMSGSIZE; 576 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 577 return -EMSGSIZE; 578 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 579 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 580 return -EMSGSIZE; 581 if ((output->tun_flags & TUNNEL_CSUM) && 582 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 583 return -EMSGSIZE; 584 if (output->tp_src && 585 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) 586 return -EMSGSIZE; 587 if (output->tp_dst && 588 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) 589 return -EMSGSIZE; 590 if ((output->tun_flags & TUNNEL_OAM) && 591 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 592 return -EMSGSIZE; 593 if (tun_opts && 594 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 595 swkey_tun_opts_len, tun_opts)) 596 return -EMSGSIZE; 597 598 return 0; 599 } 600 601 static int ipv4_tun_to_nlattr(struct sk_buff *skb, 602 const struct ovs_key_ipv4_tunnel *output, 603 const struct geneve_opt *tun_opts, 604 int swkey_tun_opts_len) 605 { 606 struct nlattr *nla; 607 int err; 608 609 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 610 if (!nla) 611 return -EMSGSIZE; 612 613 err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len); 614 if (err) 615 return err; 616 617 nla_nest_end(skb, nla); 618 return 0; 619 } 620 621 int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, 622 const struct ovs_tunnel_info *egress_tun_info) 623 { 624 return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel, 625 egress_tun_info->options, 626 egress_tun_info->options_len); 627 } 628 629 static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 630 const struct nlattr **a, bool is_mask, 631 bool log) 632 { 633 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 634 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 635 636 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); 637 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); 638 } 639 640 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { 641 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); 642 643 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); 644 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); 645 } 646 647 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 648 SW_FLOW_KEY_PUT(match, phy.priority, 649 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 650 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 651 } 652 653 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 654 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 655 656 if (is_mask) { 657 in_port = 0xffffffff; /* Always exact match in_port. */ 658 } else if (in_port >= DP_MAX_PORTS) { 659 OVS_NLERR(log, "Port %d exceeds max allowable %d", 660 in_port, DP_MAX_PORTS); 661 return -EINVAL; 662 } 663 664 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 665 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 666 } else if (!is_mask) { 667 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 668 } 669 670 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 671 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 672 673 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 674 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 675 } 676 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 677 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 678 is_mask, log)) 679 return -EINVAL; 680 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 681 } 682 return 0; 683 } 684 685 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 686 const struct nlattr **a, bool is_mask, 687 bool log) 688 { 689 int err; 690 691 err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); 692 if (err) 693 return err; 694 695 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 696 const struct ovs_key_ethernet *eth_key; 697 698 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 699 SW_FLOW_KEY_MEMCPY(match, eth.src, 700 eth_key->eth_src, ETH_ALEN, is_mask); 701 SW_FLOW_KEY_MEMCPY(match, eth.dst, 702 eth_key->eth_dst, ETH_ALEN, is_mask); 703 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 704 } 705 706 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 707 __be16 tci; 708 709 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 710 if (!(tci & htons(VLAN_TAG_PRESENT))) { 711 if (is_mask) 712 OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); 713 else 714 OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); 715 716 return -EINVAL; 717 } 718 719 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 720 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 721 } 722 723 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 724 __be16 eth_type; 725 726 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 727 if (is_mask) { 728 /* Always exact match EtherType. */ 729 eth_type = htons(0xffff); 730 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 731 OVS_NLERR(log, "EtherType %x is less than min %x", 732 ntohs(eth_type), ETH_P_802_3_MIN); 733 return -EINVAL; 734 } 735 736 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 737 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 738 } else if (!is_mask) { 739 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 740 } 741 742 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 743 const struct ovs_key_ipv4 *ipv4_key; 744 745 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 746 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 747 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", 748 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 749 return -EINVAL; 750 } 751 SW_FLOW_KEY_PUT(match, ip.proto, 752 ipv4_key->ipv4_proto, is_mask); 753 SW_FLOW_KEY_PUT(match, ip.tos, 754 ipv4_key->ipv4_tos, is_mask); 755 SW_FLOW_KEY_PUT(match, ip.ttl, 756 ipv4_key->ipv4_ttl, is_mask); 757 SW_FLOW_KEY_PUT(match, ip.frag, 758 ipv4_key->ipv4_frag, is_mask); 759 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 760 ipv4_key->ipv4_src, is_mask); 761 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 762 ipv4_key->ipv4_dst, is_mask); 763 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 764 } 765 766 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 767 const struct ovs_key_ipv6 *ipv6_key; 768 769 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 770 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 771 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", 772 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 773 return -EINVAL; 774 } 775 776 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { 777 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", 778 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); 779 return -EINVAL; 780 } 781 782 SW_FLOW_KEY_PUT(match, ipv6.label, 783 ipv6_key->ipv6_label, is_mask); 784 SW_FLOW_KEY_PUT(match, ip.proto, 785 ipv6_key->ipv6_proto, is_mask); 786 SW_FLOW_KEY_PUT(match, ip.tos, 787 ipv6_key->ipv6_tclass, is_mask); 788 SW_FLOW_KEY_PUT(match, ip.ttl, 789 ipv6_key->ipv6_hlimit, is_mask); 790 SW_FLOW_KEY_PUT(match, ip.frag, 791 ipv6_key->ipv6_frag, is_mask); 792 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 793 ipv6_key->ipv6_src, 794 sizeof(match->key->ipv6.addr.src), 795 is_mask); 796 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 797 ipv6_key->ipv6_dst, 798 sizeof(match->key->ipv6.addr.dst), 799 is_mask); 800 801 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 802 } 803 804 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 805 const struct ovs_key_arp *arp_key; 806 807 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 808 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 809 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", 810 arp_key->arp_op); 811 return -EINVAL; 812 } 813 814 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 815 arp_key->arp_sip, is_mask); 816 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 817 arp_key->arp_tip, is_mask); 818 SW_FLOW_KEY_PUT(match, ip.proto, 819 ntohs(arp_key->arp_op), is_mask); 820 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 821 arp_key->arp_sha, ETH_ALEN, is_mask); 822 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 823 arp_key->arp_tha, ETH_ALEN, is_mask); 824 825 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 826 } 827 828 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 829 const struct ovs_key_mpls *mpls_key; 830 831 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 832 SW_FLOW_KEY_PUT(match, mpls.top_lse, 833 mpls_key->mpls_lse, is_mask); 834 835 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 836 } 837 838 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 839 const struct ovs_key_tcp *tcp_key; 840 841 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 842 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 843 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 844 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 845 } 846 847 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 848 SW_FLOW_KEY_PUT(match, tp.flags, 849 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 850 is_mask); 851 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 852 } 853 854 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 855 const struct ovs_key_udp *udp_key; 856 857 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 858 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 859 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 860 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 861 } 862 863 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 864 const struct ovs_key_sctp *sctp_key; 865 866 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 867 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 868 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 869 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 870 } 871 872 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 873 const struct ovs_key_icmp *icmp_key; 874 875 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 876 SW_FLOW_KEY_PUT(match, tp.src, 877 htons(icmp_key->icmp_type), is_mask); 878 SW_FLOW_KEY_PUT(match, tp.dst, 879 htons(icmp_key->icmp_code), is_mask); 880 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 881 } 882 883 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 884 const struct ovs_key_icmpv6 *icmpv6_key; 885 886 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 887 SW_FLOW_KEY_PUT(match, tp.src, 888 htons(icmpv6_key->icmpv6_type), is_mask); 889 SW_FLOW_KEY_PUT(match, tp.dst, 890 htons(icmpv6_key->icmpv6_code), is_mask); 891 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 892 } 893 894 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 895 const struct ovs_key_nd *nd_key; 896 897 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 898 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 899 nd_key->nd_target, 900 sizeof(match->key->ipv6.nd.target), 901 is_mask); 902 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 903 nd_key->nd_sll, ETH_ALEN, is_mask); 904 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 905 nd_key->nd_tll, ETH_ALEN, is_mask); 906 attrs &= ~(1 << OVS_KEY_ATTR_ND); 907 } 908 909 if (attrs != 0) { 910 OVS_NLERR(log, "Unknown key attributes %llx", 911 (unsigned long long)attrs); 912 return -EINVAL; 913 } 914 915 return 0; 916 } 917 918 static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) 919 { 920 struct nlattr *nla; 921 int rem; 922 923 /* The nlattr stream should already have been validated */ 924 nla_for_each_nested(nla, attr, rem) { 925 /* We assume that ovs_key_lens[type] == -1 means that type is a 926 * nested attribute 927 */ 928 if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) 929 nlattr_set(nla, val, false); 930 else 931 memset(nla_data(nla), val, nla_len(nla)); 932 } 933 } 934 935 static void mask_set_nlattr(struct nlattr *attr, u8 val) 936 { 937 nlattr_set(attr, val, true); 938 } 939 940 /** 941 * ovs_nla_get_match - parses Netlink attributes into a flow key and 942 * mask. In case the 'mask' is NULL, the flow is treated as exact match 943 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 944 * does not include any don't care bit. 945 * @match: receives the extracted flow match information. 946 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 947 * sequence. The fields should of the packet that triggered the creation 948 * of this flow. 949 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 950 * attribute specifies the mask field of the wildcarded flow. 951 * @log: Boolean to allow kernel error logging. Normally true, but when 952 * probing for feature compatibility this should be passed in as false to 953 * suppress unnecessary error logging. 954 */ 955 int ovs_nla_get_match(struct sw_flow_match *match, 956 const struct nlattr *nla_key, 957 const struct nlattr *nla_mask, 958 bool log) 959 { 960 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 961 const struct nlattr *encap; 962 struct nlattr *newmask = NULL; 963 u64 key_attrs = 0; 964 u64 mask_attrs = 0; 965 bool encap_valid = false; 966 int err; 967 968 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); 969 if (err) 970 return err; 971 972 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 973 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 974 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 975 __be16 tci; 976 977 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 978 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 979 OVS_NLERR(log, "Invalid Vlan frame."); 980 return -EINVAL; 981 } 982 983 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 984 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 985 encap = a[OVS_KEY_ATTR_ENCAP]; 986 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 987 encap_valid = true; 988 989 if (tci & htons(VLAN_TAG_PRESENT)) { 990 err = parse_flow_nlattrs(encap, a, &key_attrs, log); 991 if (err) 992 return err; 993 } else if (!tci) { 994 /* Corner case for truncated 802.1Q header. */ 995 if (nla_len(encap)) { 996 OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); 997 return -EINVAL; 998 } 999 } else { 1000 OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); 1001 return -EINVAL; 1002 } 1003 } 1004 1005 err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); 1006 if (err) 1007 return err; 1008 1009 if (match->mask) { 1010 if (!nla_mask) { 1011 /* Create an exact match mask. We need to set to 0xff 1012 * all the 'match->mask' fields that have been touched 1013 * in 'match->key'. We cannot simply memset 1014 * 'match->mask', because padding bytes and fields not 1015 * specified in 'match->key' should be left to 0. 1016 * Instead, we use a stream of netlink attributes, 1017 * copied from 'key' and set to 0xff. 1018 * ovs_key_from_nlattrs() will take care of filling 1019 * 'match->mask' appropriately. 1020 */ 1021 newmask = kmemdup(nla_key, 1022 nla_total_size(nla_len(nla_key)), 1023 GFP_KERNEL); 1024 if (!newmask) 1025 return -ENOMEM; 1026 1027 mask_set_nlattr(newmask, 0xff); 1028 1029 /* The userspace does not send tunnel attributes that 1030 * are 0, but we should not wildcard them nonetheless. 1031 */ 1032 if (match->key->tun_key.ipv4_dst) 1033 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1034 0xff, true); 1035 1036 nla_mask = newmask; 1037 } 1038 1039 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); 1040 if (err) 1041 goto free_newmask; 1042 1043 /* Always match on tci. */ 1044 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 1045 1046 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { 1047 __be16 eth_type = 0; 1048 __be16 tci = 0; 1049 1050 if (!encap_valid) { 1051 OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); 1052 err = -EINVAL; 1053 goto free_newmask; 1054 } 1055 1056 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1057 if (a[OVS_KEY_ATTR_ETHERTYPE]) 1058 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1059 1060 if (eth_type == htons(0xffff)) { 1061 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1062 encap = a[OVS_KEY_ATTR_ENCAP]; 1063 err = parse_flow_mask_nlattrs(encap, a, 1064 &mask_attrs, log); 1065 if (err) 1066 goto free_newmask; 1067 } else { 1068 OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", 1069 ntohs(eth_type)); 1070 err = -EINVAL; 1071 goto free_newmask; 1072 } 1073 1074 if (a[OVS_KEY_ATTR_VLAN]) 1075 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1076 1077 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1078 OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", 1079 ntohs(tci)); 1080 err = -EINVAL; 1081 goto free_newmask; 1082 } 1083 } 1084 1085 err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); 1086 if (err) 1087 goto free_newmask; 1088 } 1089 1090 if (!match_validate(match, key_attrs, mask_attrs, log)) 1091 err = -EINVAL; 1092 1093 free_newmask: 1094 kfree(newmask); 1095 return err; 1096 } 1097 1098 /** 1099 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 1100 * @key: Receives extracted in_port, priority, tun_key and skb_mark. 1101 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1102 * sequence. 1103 * @log: Boolean to allow kernel error logging. Normally true, but when 1104 * probing for feature compatibility this should be passed in as false to 1105 * suppress unnecessary error logging. 1106 * 1107 * This parses a series of Netlink attributes that form a flow key, which must 1108 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1109 * get the metadata, that is, the parts of the flow key that cannot be 1110 * extracted from the packet itself. 1111 */ 1112 1113 int ovs_nla_get_flow_metadata(const struct nlattr *attr, 1114 struct sw_flow_key *key, 1115 bool log) 1116 { 1117 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1118 struct sw_flow_match match; 1119 u64 attrs = 0; 1120 int err; 1121 1122 err = parse_flow_nlattrs(attr, a, &attrs, log); 1123 if (err) 1124 return -EINVAL; 1125 1126 memset(&match, 0, sizeof(match)); 1127 match.key = key; 1128 1129 key->phy.in_port = DP_MAX_PORTS; 1130 1131 return metadata_from_nlattrs(&match, &attrs, a, false, log); 1132 } 1133 1134 int ovs_nla_put_flow(const struct sw_flow_key *swkey, 1135 const struct sw_flow_key *output, struct sk_buff *skb) 1136 { 1137 struct ovs_key_ethernet *eth_key; 1138 struct nlattr *nla, *encap; 1139 bool is_mask = (swkey != output); 1140 1141 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 1142 goto nla_put_failure; 1143 1144 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 1145 goto nla_put_failure; 1146 1147 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1148 goto nla_put_failure; 1149 1150 if ((swkey->tun_key.ipv4_dst || is_mask)) { 1151 const struct geneve_opt *opts = NULL; 1152 1153 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1154 opts = GENEVE_OPTS(output, swkey->tun_opts_len); 1155 1156 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, 1157 swkey->tun_opts_len)) 1158 goto nla_put_failure; 1159 } 1160 1161 if (swkey->phy.in_port == DP_MAX_PORTS) { 1162 if (is_mask && (output->phy.in_port == 0xffff)) 1163 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1164 goto nla_put_failure; 1165 } else { 1166 u16 upper_u16; 1167 upper_u16 = !is_mask ? 0 : 0xffff; 1168 1169 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1170 (upper_u16 << 16) | output->phy.in_port)) 1171 goto nla_put_failure; 1172 } 1173 1174 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1175 goto nla_put_failure; 1176 1177 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1178 if (!nla) 1179 goto nla_put_failure; 1180 1181 eth_key = nla_data(nla); 1182 ether_addr_copy(eth_key->eth_src, output->eth.src); 1183 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 1184 1185 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1186 __be16 eth_type; 1187 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 1188 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1189 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 1190 goto nla_put_failure; 1191 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1192 if (!swkey->eth.tci) 1193 goto unencap; 1194 } else 1195 encap = NULL; 1196 1197 if (swkey->eth.type == htons(ETH_P_802_2)) { 1198 /* 1199 * Ethertype 802.2 is represented in the netlink with omitted 1200 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1201 * 0xffff in the mask attribute. Ethertype can also 1202 * be wildcarded. 1203 */ 1204 if (is_mask && output->eth.type) 1205 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1206 output->eth.type)) 1207 goto nla_put_failure; 1208 goto unencap; 1209 } 1210 1211 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1212 goto nla_put_failure; 1213 1214 if (swkey->eth.type == htons(ETH_P_IP)) { 1215 struct ovs_key_ipv4 *ipv4_key; 1216 1217 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1218 if (!nla) 1219 goto nla_put_failure; 1220 ipv4_key = nla_data(nla); 1221 ipv4_key->ipv4_src = output->ipv4.addr.src; 1222 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1223 ipv4_key->ipv4_proto = output->ip.proto; 1224 ipv4_key->ipv4_tos = output->ip.tos; 1225 ipv4_key->ipv4_ttl = output->ip.ttl; 1226 ipv4_key->ipv4_frag = output->ip.frag; 1227 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1228 struct ovs_key_ipv6 *ipv6_key; 1229 1230 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1231 if (!nla) 1232 goto nla_put_failure; 1233 ipv6_key = nla_data(nla); 1234 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1235 sizeof(ipv6_key->ipv6_src)); 1236 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1237 sizeof(ipv6_key->ipv6_dst)); 1238 ipv6_key->ipv6_label = output->ipv6.label; 1239 ipv6_key->ipv6_proto = output->ip.proto; 1240 ipv6_key->ipv6_tclass = output->ip.tos; 1241 ipv6_key->ipv6_hlimit = output->ip.ttl; 1242 ipv6_key->ipv6_frag = output->ip.frag; 1243 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1244 swkey->eth.type == htons(ETH_P_RARP)) { 1245 struct ovs_key_arp *arp_key; 1246 1247 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1248 if (!nla) 1249 goto nla_put_failure; 1250 arp_key = nla_data(nla); 1251 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1252 arp_key->arp_sip = output->ipv4.addr.src; 1253 arp_key->arp_tip = output->ipv4.addr.dst; 1254 arp_key->arp_op = htons(output->ip.proto); 1255 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1256 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1257 } else if (eth_p_mpls(swkey->eth.type)) { 1258 struct ovs_key_mpls *mpls_key; 1259 1260 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 1261 if (!nla) 1262 goto nla_put_failure; 1263 mpls_key = nla_data(nla); 1264 mpls_key->mpls_lse = output->mpls.top_lse; 1265 } 1266 1267 if ((swkey->eth.type == htons(ETH_P_IP) || 1268 swkey->eth.type == htons(ETH_P_IPV6)) && 1269 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1270 1271 if (swkey->ip.proto == IPPROTO_TCP) { 1272 struct ovs_key_tcp *tcp_key; 1273 1274 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1275 if (!nla) 1276 goto nla_put_failure; 1277 tcp_key = nla_data(nla); 1278 tcp_key->tcp_src = output->tp.src; 1279 tcp_key->tcp_dst = output->tp.dst; 1280 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1281 output->tp.flags)) 1282 goto nla_put_failure; 1283 } else if (swkey->ip.proto == IPPROTO_UDP) { 1284 struct ovs_key_udp *udp_key; 1285 1286 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1287 if (!nla) 1288 goto nla_put_failure; 1289 udp_key = nla_data(nla); 1290 udp_key->udp_src = output->tp.src; 1291 udp_key->udp_dst = output->tp.dst; 1292 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1293 struct ovs_key_sctp *sctp_key; 1294 1295 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1296 if (!nla) 1297 goto nla_put_failure; 1298 sctp_key = nla_data(nla); 1299 sctp_key->sctp_src = output->tp.src; 1300 sctp_key->sctp_dst = output->tp.dst; 1301 } else if (swkey->eth.type == htons(ETH_P_IP) && 1302 swkey->ip.proto == IPPROTO_ICMP) { 1303 struct ovs_key_icmp *icmp_key; 1304 1305 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1306 if (!nla) 1307 goto nla_put_failure; 1308 icmp_key = nla_data(nla); 1309 icmp_key->icmp_type = ntohs(output->tp.src); 1310 icmp_key->icmp_code = ntohs(output->tp.dst); 1311 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1312 swkey->ip.proto == IPPROTO_ICMPV6) { 1313 struct ovs_key_icmpv6 *icmpv6_key; 1314 1315 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1316 sizeof(*icmpv6_key)); 1317 if (!nla) 1318 goto nla_put_failure; 1319 icmpv6_key = nla_data(nla); 1320 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1321 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1322 1323 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1324 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1325 struct ovs_key_nd *nd_key; 1326 1327 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1328 if (!nla) 1329 goto nla_put_failure; 1330 nd_key = nla_data(nla); 1331 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1332 sizeof(nd_key->nd_target)); 1333 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1334 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1335 } 1336 } 1337 } 1338 1339 unencap: 1340 if (encap) 1341 nla_nest_end(skb, encap); 1342 1343 return 0; 1344 1345 nla_put_failure: 1346 return -EMSGSIZE; 1347 } 1348 1349 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1350 1351 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 1352 { 1353 struct sw_flow_actions *sfa; 1354 1355 if (size > MAX_ACTIONS_BUFSIZE) { 1356 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); 1357 return ERR_PTR(-EINVAL); 1358 } 1359 1360 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1361 if (!sfa) 1362 return ERR_PTR(-ENOMEM); 1363 1364 sfa->actions_len = 0; 1365 return sfa; 1366 } 1367 1368 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1369 * The caller must hold rcu_read_lock for this to be sensible. */ 1370 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1371 { 1372 kfree_rcu(sf_acts, rcu); 1373 } 1374 1375 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1376 int attr_len, bool log) 1377 { 1378 1379 struct sw_flow_actions *acts; 1380 int new_acts_size; 1381 int req_size = NLA_ALIGN(attr_len); 1382 int next_offset = offsetof(struct sw_flow_actions, actions) + 1383 (*sfa)->actions_len; 1384 1385 if (req_size <= (ksize(*sfa) - next_offset)) 1386 goto out; 1387 1388 new_acts_size = ksize(*sfa) * 2; 1389 1390 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1391 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1392 return ERR_PTR(-EMSGSIZE); 1393 new_acts_size = MAX_ACTIONS_BUFSIZE; 1394 } 1395 1396 acts = nla_alloc_flow_actions(new_acts_size, log); 1397 if (IS_ERR(acts)) 1398 return (void *)acts; 1399 1400 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1401 acts->actions_len = (*sfa)->actions_len; 1402 kfree(*sfa); 1403 *sfa = acts; 1404 1405 out: 1406 (*sfa)->actions_len += req_size; 1407 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1408 } 1409 1410 static struct nlattr *__add_action(struct sw_flow_actions **sfa, 1411 int attrtype, void *data, int len, bool log) 1412 { 1413 struct nlattr *a; 1414 1415 a = reserve_sfa_size(sfa, nla_attr_size(len), log); 1416 if (IS_ERR(a)) 1417 return a; 1418 1419 a->nla_type = attrtype; 1420 a->nla_len = nla_attr_size(len); 1421 1422 if (data) 1423 memcpy(nla_data(a), data, len); 1424 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1425 1426 return a; 1427 } 1428 1429 static int add_action(struct sw_flow_actions **sfa, int attrtype, 1430 void *data, int len, bool log) 1431 { 1432 struct nlattr *a; 1433 1434 a = __add_action(sfa, attrtype, data, len, log); 1435 1436 return PTR_ERR_OR_ZERO(a); 1437 } 1438 1439 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1440 int attrtype, bool log) 1441 { 1442 int used = (*sfa)->actions_len; 1443 int err; 1444 1445 err = add_action(sfa, attrtype, NULL, 0, log); 1446 if (err) 1447 return err; 1448 1449 return used; 1450 } 1451 1452 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 1453 int st_offset) 1454 { 1455 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 1456 st_offset); 1457 1458 a->nla_len = sfa->actions_len - st_offset; 1459 } 1460 1461 static int __ovs_nla_copy_actions(const struct nlattr *attr, 1462 const struct sw_flow_key *key, 1463 int depth, struct sw_flow_actions **sfa, 1464 __be16 eth_type, __be16 vlan_tci, bool log); 1465 1466 static int validate_and_copy_sample(const struct nlattr *attr, 1467 const struct sw_flow_key *key, int depth, 1468 struct sw_flow_actions **sfa, 1469 __be16 eth_type, __be16 vlan_tci, bool log) 1470 { 1471 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1472 const struct nlattr *probability, *actions; 1473 const struct nlattr *a; 1474 int rem, start, err, st_acts; 1475 1476 memset(attrs, 0, sizeof(attrs)); 1477 nla_for_each_nested(a, attr, rem) { 1478 int type = nla_type(a); 1479 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 1480 return -EINVAL; 1481 attrs[type] = a; 1482 } 1483 if (rem) 1484 return -EINVAL; 1485 1486 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 1487 if (!probability || nla_len(probability) != sizeof(u32)) 1488 return -EINVAL; 1489 1490 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 1491 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 1492 return -EINVAL; 1493 1494 /* validation done, copy sample action. */ 1495 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 1496 if (start < 0) 1497 return start; 1498 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1499 nla_data(probability), sizeof(u32), log); 1500 if (err) 1501 return err; 1502 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); 1503 if (st_acts < 0) 1504 return st_acts; 1505 1506 err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, 1507 eth_type, vlan_tci, log); 1508 if (err) 1509 return err; 1510 1511 add_nested_action_end(*sfa, st_acts); 1512 add_nested_action_end(*sfa, start); 1513 1514 return 0; 1515 } 1516 1517 static int validate_tp_port(const struct sw_flow_key *flow_key, 1518 __be16 eth_type) 1519 { 1520 if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && 1521 (flow_key->tp.src || flow_key->tp.dst)) 1522 return 0; 1523 1524 return -EINVAL; 1525 } 1526 1527 void ovs_match_init(struct sw_flow_match *match, 1528 struct sw_flow_key *key, 1529 struct sw_flow_mask *mask) 1530 { 1531 memset(match, 0, sizeof(*match)); 1532 match->key = key; 1533 match->mask = mask; 1534 1535 memset(key, 0, sizeof(*key)); 1536 1537 if (mask) { 1538 memset(&mask->key, 0, sizeof(mask->key)); 1539 mask->range.start = mask->range.end = 0; 1540 } 1541 } 1542 1543 static int validate_and_copy_set_tun(const struct nlattr *attr, 1544 struct sw_flow_actions **sfa, bool log) 1545 { 1546 struct sw_flow_match match; 1547 struct sw_flow_key key; 1548 struct ovs_tunnel_info *tun_info; 1549 struct nlattr *a; 1550 int err, start; 1551 1552 ovs_match_init(&match, &key, NULL); 1553 err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); 1554 if (err) 1555 return err; 1556 1557 if (key.tun_opts_len) { 1558 struct geneve_opt *option = GENEVE_OPTS(&key, 1559 key.tun_opts_len); 1560 int opts_len = key.tun_opts_len; 1561 bool crit_opt = false; 1562 1563 while (opts_len > 0) { 1564 int len; 1565 1566 if (opts_len < sizeof(*option)) 1567 return -EINVAL; 1568 1569 len = sizeof(*option) + option->length * 4; 1570 if (len > opts_len) 1571 return -EINVAL; 1572 1573 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 1574 1575 option = (struct geneve_opt *)((u8 *)option + len); 1576 opts_len -= len; 1577 }; 1578 1579 key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 1580 }; 1581 1582 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 1583 if (start < 0) 1584 return start; 1585 1586 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 1587 sizeof(*tun_info) + key.tun_opts_len, log); 1588 if (IS_ERR(a)) 1589 return PTR_ERR(a); 1590 1591 tun_info = nla_data(a); 1592 tun_info->tunnel = key.tun_key; 1593 tun_info->options_len = key.tun_opts_len; 1594 1595 if (tun_info->options_len) { 1596 /* We need to store the options in the action itself since 1597 * everything else will go away after flow setup. We can append 1598 * it to tun_info and then point there. 1599 */ 1600 memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), 1601 key.tun_opts_len); 1602 tun_info->options = (struct geneve_opt *)(tun_info + 1); 1603 } else { 1604 tun_info->options = NULL; 1605 } 1606 1607 add_nested_action_end(*sfa, start); 1608 1609 return err; 1610 } 1611 1612 static int validate_set(const struct nlattr *a, 1613 const struct sw_flow_key *flow_key, 1614 struct sw_flow_actions **sfa, 1615 bool *set_tun, __be16 eth_type, bool log) 1616 { 1617 const struct nlattr *ovs_key = nla_data(a); 1618 int key_type = nla_type(ovs_key); 1619 1620 /* There can be only one key in a action */ 1621 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1622 return -EINVAL; 1623 1624 if (key_type > OVS_KEY_ATTR_MAX || 1625 (ovs_key_lens[key_type] != nla_len(ovs_key) && 1626 ovs_key_lens[key_type] != -1)) 1627 return -EINVAL; 1628 1629 switch (key_type) { 1630 const struct ovs_key_ipv4 *ipv4_key; 1631 const struct ovs_key_ipv6 *ipv6_key; 1632 int err; 1633 1634 case OVS_KEY_ATTR_PRIORITY: 1635 case OVS_KEY_ATTR_SKB_MARK: 1636 case OVS_KEY_ATTR_ETHERNET: 1637 break; 1638 1639 case OVS_KEY_ATTR_TUNNEL: 1640 if (eth_p_mpls(eth_type)) 1641 return -EINVAL; 1642 1643 *set_tun = true; 1644 err = validate_and_copy_set_tun(a, sfa, log); 1645 if (err) 1646 return err; 1647 break; 1648 1649 case OVS_KEY_ATTR_IPV4: 1650 if (eth_type != htons(ETH_P_IP)) 1651 return -EINVAL; 1652 1653 if (!flow_key->ip.proto) 1654 return -EINVAL; 1655 1656 ipv4_key = nla_data(ovs_key); 1657 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1658 return -EINVAL; 1659 1660 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1661 return -EINVAL; 1662 1663 break; 1664 1665 case OVS_KEY_ATTR_IPV6: 1666 if (eth_type != htons(ETH_P_IPV6)) 1667 return -EINVAL; 1668 1669 if (!flow_key->ip.proto) 1670 return -EINVAL; 1671 1672 ipv6_key = nla_data(ovs_key); 1673 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1674 return -EINVAL; 1675 1676 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1677 return -EINVAL; 1678 1679 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1680 return -EINVAL; 1681 1682 break; 1683 1684 case OVS_KEY_ATTR_TCP: 1685 if (flow_key->ip.proto != IPPROTO_TCP) 1686 return -EINVAL; 1687 1688 return validate_tp_port(flow_key, eth_type); 1689 1690 case OVS_KEY_ATTR_UDP: 1691 if (flow_key->ip.proto != IPPROTO_UDP) 1692 return -EINVAL; 1693 1694 return validate_tp_port(flow_key, eth_type); 1695 1696 case OVS_KEY_ATTR_MPLS: 1697 if (!eth_p_mpls(eth_type)) 1698 return -EINVAL; 1699 break; 1700 1701 case OVS_KEY_ATTR_SCTP: 1702 if (flow_key->ip.proto != IPPROTO_SCTP) 1703 return -EINVAL; 1704 1705 return validate_tp_port(flow_key, eth_type); 1706 1707 default: 1708 return -EINVAL; 1709 } 1710 1711 return 0; 1712 } 1713 1714 static int validate_userspace(const struct nlattr *attr) 1715 { 1716 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 1717 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 1718 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 1719 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, 1720 }; 1721 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 1722 int error; 1723 1724 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 1725 attr, userspace_policy); 1726 if (error) 1727 return error; 1728 1729 if (!a[OVS_USERSPACE_ATTR_PID] || 1730 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 1731 return -EINVAL; 1732 1733 return 0; 1734 } 1735 1736 static int copy_action(const struct nlattr *from, 1737 struct sw_flow_actions **sfa, bool log) 1738 { 1739 int totlen = NLA_ALIGN(from->nla_len); 1740 struct nlattr *to; 1741 1742 to = reserve_sfa_size(sfa, from->nla_len, log); 1743 if (IS_ERR(to)) 1744 return PTR_ERR(to); 1745 1746 memcpy(to, from, totlen); 1747 return 0; 1748 } 1749 1750 static int __ovs_nla_copy_actions(const struct nlattr *attr, 1751 const struct sw_flow_key *key, 1752 int depth, struct sw_flow_actions **sfa, 1753 __be16 eth_type, __be16 vlan_tci, bool log) 1754 { 1755 const struct nlattr *a; 1756 int rem, err; 1757 1758 if (depth >= SAMPLE_ACTION_DEPTH) 1759 return -EOVERFLOW; 1760 1761 nla_for_each_nested(a, attr, rem) { 1762 /* Expected argument lengths, (u32)-1 for variable length. */ 1763 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 1764 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 1765 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 1766 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 1767 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), 1768 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), 1769 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 1770 [OVS_ACTION_ATTR_POP_VLAN] = 0, 1771 [OVS_ACTION_ATTR_SET] = (u32)-1, 1772 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 1773 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) 1774 }; 1775 const struct ovs_action_push_vlan *vlan; 1776 int type = nla_type(a); 1777 bool skip_copy; 1778 1779 if (type > OVS_ACTION_ATTR_MAX || 1780 (action_lens[type] != nla_len(a) && 1781 action_lens[type] != (u32)-1)) 1782 return -EINVAL; 1783 1784 skip_copy = false; 1785 switch (type) { 1786 case OVS_ACTION_ATTR_UNSPEC: 1787 return -EINVAL; 1788 1789 case OVS_ACTION_ATTR_USERSPACE: 1790 err = validate_userspace(a); 1791 if (err) 1792 return err; 1793 break; 1794 1795 case OVS_ACTION_ATTR_OUTPUT: 1796 if (nla_get_u32(a) >= DP_MAX_PORTS) 1797 return -EINVAL; 1798 break; 1799 1800 case OVS_ACTION_ATTR_HASH: { 1801 const struct ovs_action_hash *act_hash = nla_data(a); 1802 1803 switch (act_hash->hash_alg) { 1804 case OVS_HASH_ALG_L4: 1805 break; 1806 default: 1807 return -EINVAL; 1808 } 1809 1810 break; 1811 } 1812 1813 case OVS_ACTION_ATTR_POP_VLAN: 1814 vlan_tci = htons(0); 1815 break; 1816 1817 case OVS_ACTION_ATTR_PUSH_VLAN: 1818 vlan = nla_data(a); 1819 if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 1820 return -EINVAL; 1821 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 1822 return -EINVAL; 1823 vlan_tci = vlan->vlan_tci; 1824 break; 1825 1826 case OVS_ACTION_ATTR_RECIRC: 1827 break; 1828 1829 case OVS_ACTION_ATTR_PUSH_MPLS: { 1830 const struct ovs_action_push_mpls *mpls = nla_data(a); 1831 1832 if (!eth_p_mpls(mpls->mpls_ethertype)) 1833 return -EINVAL; 1834 /* Prohibit push MPLS other than to a white list 1835 * for packets that have a known tag order. 1836 */ 1837 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 1838 (eth_type != htons(ETH_P_IP) && 1839 eth_type != htons(ETH_P_IPV6) && 1840 eth_type != htons(ETH_P_ARP) && 1841 eth_type != htons(ETH_P_RARP) && 1842 !eth_p_mpls(eth_type))) 1843 return -EINVAL; 1844 eth_type = mpls->mpls_ethertype; 1845 break; 1846 } 1847 1848 case OVS_ACTION_ATTR_POP_MPLS: 1849 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 1850 !eth_p_mpls(eth_type)) 1851 return -EINVAL; 1852 1853 /* Disallow subsequent L2.5+ set and mpls_pop actions 1854 * as there is no check here to ensure that the new 1855 * eth_type is valid and thus set actions could 1856 * write off the end of the packet or otherwise 1857 * corrupt it. 1858 * 1859 * Support for these actions is planned using packet 1860 * recirculation. 1861 */ 1862 eth_type = htons(0); 1863 break; 1864 1865 case OVS_ACTION_ATTR_SET: 1866 err = validate_set(a, key, sfa, 1867 &skip_copy, eth_type, log); 1868 if (err) 1869 return err; 1870 break; 1871 1872 case OVS_ACTION_ATTR_SAMPLE: 1873 err = validate_and_copy_sample(a, key, depth, sfa, 1874 eth_type, vlan_tci, log); 1875 if (err) 1876 return err; 1877 skip_copy = true; 1878 break; 1879 1880 default: 1881 OVS_NLERR(log, "Unknown Action type %d", type); 1882 return -EINVAL; 1883 } 1884 if (!skip_copy) { 1885 err = copy_action(a, sfa, log); 1886 if (err) 1887 return err; 1888 } 1889 } 1890 1891 if (rem > 0) 1892 return -EINVAL; 1893 1894 return 0; 1895 } 1896 1897 int ovs_nla_copy_actions(const struct nlattr *attr, 1898 const struct sw_flow_key *key, 1899 struct sw_flow_actions **sfa, bool log) 1900 { 1901 int err; 1902 1903 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 1904 if (IS_ERR(*sfa)) 1905 return PTR_ERR(*sfa); 1906 1907 err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, 1908 key->eth.tci, log); 1909 if (err) 1910 kfree(*sfa); 1911 1912 return err; 1913 } 1914 1915 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 1916 { 1917 const struct nlattr *a; 1918 struct nlattr *start; 1919 int err = 0, rem; 1920 1921 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 1922 if (!start) 1923 return -EMSGSIZE; 1924 1925 nla_for_each_nested(a, attr, rem) { 1926 int type = nla_type(a); 1927 struct nlattr *st_sample; 1928 1929 switch (type) { 1930 case OVS_SAMPLE_ATTR_PROBABILITY: 1931 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 1932 sizeof(u32), nla_data(a))) 1933 return -EMSGSIZE; 1934 break; 1935 case OVS_SAMPLE_ATTR_ACTIONS: 1936 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 1937 if (!st_sample) 1938 return -EMSGSIZE; 1939 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 1940 if (err) 1941 return err; 1942 nla_nest_end(skb, st_sample); 1943 break; 1944 } 1945 } 1946 1947 nla_nest_end(skb, start); 1948 return err; 1949 } 1950 1951 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 1952 { 1953 const struct nlattr *ovs_key = nla_data(a); 1954 int key_type = nla_type(ovs_key); 1955 struct nlattr *start; 1956 int err; 1957 1958 switch (key_type) { 1959 case OVS_KEY_ATTR_TUNNEL_INFO: { 1960 struct ovs_tunnel_info *tun_info = nla_data(ovs_key); 1961 1962 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 1963 if (!start) 1964 return -EMSGSIZE; 1965 1966 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, 1967 tun_info->options_len ? 1968 tun_info->options : NULL, 1969 tun_info->options_len); 1970 if (err) 1971 return err; 1972 nla_nest_end(skb, start); 1973 break; 1974 } 1975 default: 1976 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 1977 return -EMSGSIZE; 1978 break; 1979 } 1980 1981 return 0; 1982 } 1983 1984 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 1985 { 1986 const struct nlattr *a; 1987 int rem, err; 1988 1989 nla_for_each_attr(a, attr, len, rem) { 1990 int type = nla_type(a); 1991 1992 switch (type) { 1993 case OVS_ACTION_ATTR_SET: 1994 err = set_action_to_attr(a, skb); 1995 if (err) 1996 return err; 1997 break; 1998 1999 case OVS_ACTION_ATTR_SAMPLE: 2000 err = sample_action_to_attr(a, skb); 2001 if (err) 2002 return err; 2003 break; 2004 default: 2005 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2006 return -EMSGSIZE; 2007 break; 2008 } 2009 } 2010 2011 return 0; 2012 } 2013