1 /* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include "flow.h" 22 #include "datapath.h" 23 #include <linux/uaccess.h> 24 #include <linux/netdevice.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <net/llc_pdu.h> 29 #include <linux/kernel.h> 30 #include <linux/jhash.h> 31 #include <linux/jiffies.h> 32 #include <linux/llc.h> 33 #include <linux/module.h> 34 #include <linux/in.h> 35 #include <linux/rcupdate.h> 36 #include <linux/if_arp.h> 37 #include <linux/ip.h> 38 #include <linux/ipv6.h> 39 #include <linux/sctp.h> 40 #include <linux/tcp.h> 41 #include <linux/udp.h> 42 #include <linux/icmp.h> 43 #include <linux/icmpv6.h> 44 #include <linux/rculist.h> 45 #include <net/geneve.h> 46 #include <net/ip.h> 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/mpls.h> 50 51 #include "flow_netlink.h" 52 #include "vport-vxlan.h" 53 54 struct ovs_len_tbl { 55 int len; 56 const struct ovs_len_tbl *next; 57 }; 58 59 #define OVS_ATTR_NESTED -1 60 61 static void update_range(struct sw_flow_match *match, 62 size_t offset, size_t size, bool is_mask) 63 { 64 struct sw_flow_key_range *range; 65 size_t start = rounddown(offset, sizeof(long)); 66 size_t end = roundup(offset + size, sizeof(long)); 67 68 if (!is_mask) 69 range = &match->range; 70 else 71 range = &match->mask->range; 72 73 if (range->start == range->end) { 74 range->start = start; 75 range->end = end; 76 return; 77 } 78 79 if (range->start > start) 80 range->start = start; 81 82 if (range->end < end) 83 range->end = end; 84 } 85 86 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 87 do { \ 88 update_range(match, offsetof(struct sw_flow_key, field), \ 89 sizeof((match)->key->field), is_mask); \ 90 if (is_mask) \ 91 (match)->mask->key.field = value; \ 92 else \ 93 (match)->key->field = value; \ 94 } while (0) 95 96 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 97 do { \ 98 update_range(match, offset, len, is_mask); \ 99 if (is_mask) \ 100 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 101 len); \ 102 else \ 103 memcpy((u8 *)(match)->key + offset, value_p, len); \ 104 } while (0) 105 106 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 107 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 108 value_p, len, is_mask) 109 110 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 111 do { \ 112 update_range(match, offsetof(struct sw_flow_key, field), \ 113 sizeof((match)->key->field), is_mask); \ 114 if (is_mask) \ 115 memset((u8 *)&(match)->mask->key.field, value, \ 116 sizeof((match)->mask->key.field)); \ 117 else \ 118 memset((u8 *)&(match)->key->field, value, \ 119 sizeof((match)->key->field)); \ 120 } while (0) 121 122 static bool match_validate(const struct sw_flow_match *match, 123 u64 key_attrs, u64 mask_attrs, bool log) 124 { 125 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 126 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 127 128 /* The following mask attributes allowed only if they 129 * pass the validation tests. */ 130 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 131 | (1 << OVS_KEY_ATTR_IPV6) 132 | (1 << OVS_KEY_ATTR_TCP) 133 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 134 | (1 << OVS_KEY_ATTR_UDP) 135 | (1 << OVS_KEY_ATTR_SCTP) 136 | (1 << OVS_KEY_ATTR_ICMP) 137 | (1 << OVS_KEY_ATTR_ICMPV6) 138 | (1 << OVS_KEY_ATTR_ARP) 139 | (1 << OVS_KEY_ATTR_ND) 140 | (1 << OVS_KEY_ATTR_MPLS)); 141 142 /* Always allowed mask fields. */ 143 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 144 | (1 << OVS_KEY_ATTR_IN_PORT) 145 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 146 147 /* Check key attributes. */ 148 if (match->key->eth.type == htons(ETH_P_ARP) 149 || match->key->eth.type == htons(ETH_P_RARP)) { 150 key_expected |= 1 << OVS_KEY_ATTR_ARP; 151 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 152 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 153 } 154 155 if (eth_p_mpls(match->key->eth.type)) { 156 key_expected |= 1 << OVS_KEY_ATTR_MPLS; 157 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 158 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; 159 } 160 161 if (match->key->eth.type == htons(ETH_P_IP)) { 162 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 163 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 164 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 165 166 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 167 if (match->key->ip.proto == IPPROTO_UDP) { 168 key_expected |= 1 << OVS_KEY_ATTR_UDP; 169 if (match->mask && (match->mask->key.ip.proto == 0xff)) 170 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 171 } 172 173 if (match->key->ip.proto == IPPROTO_SCTP) { 174 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 175 if (match->mask && (match->mask->key.ip.proto == 0xff)) 176 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 177 } 178 179 if (match->key->ip.proto == IPPROTO_TCP) { 180 key_expected |= 1 << OVS_KEY_ATTR_TCP; 181 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 182 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 183 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 184 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 185 } 186 } 187 188 if (match->key->ip.proto == IPPROTO_ICMP) { 189 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 190 if (match->mask && (match->mask->key.ip.proto == 0xff)) 191 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 192 } 193 } 194 } 195 196 if (match->key->eth.type == htons(ETH_P_IPV6)) { 197 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 198 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 199 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 200 201 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 202 if (match->key->ip.proto == IPPROTO_UDP) { 203 key_expected |= 1 << OVS_KEY_ATTR_UDP; 204 if (match->mask && (match->mask->key.ip.proto == 0xff)) 205 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 206 } 207 208 if (match->key->ip.proto == IPPROTO_SCTP) { 209 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 210 if (match->mask && (match->mask->key.ip.proto == 0xff)) 211 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 212 } 213 214 if (match->key->ip.proto == IPPROTO_TCP) { 215 key_expected |= 1 << OVS_KEY_ATTR_TCP; 216 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 217 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 218 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 219 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 220 } 221 } 222 223 if (match->key->ip.proto == IPPROTO_ICMPV6) { 224 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 225 if (match->mask && (match->mask->key.ip.proto == 0xff)) 226 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 227 228 if (match->key->tp.src == 229 htons(NDISC_NEIGHBOUR_SOLICITATION) || 230 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 231 key_expected |= 1 << OVS_KEY_ATTR_ND; 232 if (match->mask && (match->mask->key.tp.src == htons(0xff))) 233 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 234 } 235 } 236 } 237 } 238 239 if ((key_attrs & key_expected) != key_expected) { 240 /* Key attributes check failed. */ 241 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 242 (unsigned long long)key_attrs, 243 (unsigned long long)key_expected); 244 return false; 245 } 246 247 if ((mask_attrs & mask_allowed) != mask_attrs) { 248 /* Mask attributes check failed. */ 249 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", 250 (unsigned long long)mask_attrs, 251 (unsigned long long)mask_allowed); 252 return false; 253 } 254 255 return true; 256 } 257 258 size_t ovs_tun_key_attr_size(void) 259 { 260 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider 261 * updating this function. 262 */ 263 return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 264 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ 265 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ 266 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 267 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 268 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 269 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 270 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 271 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 272 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 273 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 274 */ 275 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 276 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 277 } 278 279 size_t ovs_key_attr_size(void) 280 { 281 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 282 * updating this function. 283 */ 284 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); 285 286 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 287 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 288 + ovs_tun_key_attr_size() 289 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 290 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 291 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 292 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 293 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 294 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 295 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 296 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 297 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 298 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 299 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 300 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 301 } 302 303 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 304 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 305 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 306 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, 307 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, 308 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, 309 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, 310 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, 311 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 312 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 313 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 314 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, 315 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, 316 }; 317 318 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 319 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 320 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, 321 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, 322 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, 323 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, 324 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, 325 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, 326 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, 327 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, 328 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, 329 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, 330 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, 331 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, 332 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, 333 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, 334 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, 335 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, 336 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, 337 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, 338 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 339 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 340 .next = ovs_tunnel_key_lens, }, 341 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 342 }; 343 344 static bool is_all_zero(const u8 *fp, size_t size) 345 { 346 int i; 347 348 if (!fp) 349 return false; 350 351 for (i = 0; i < size; i++) 352 if (fp[i]) 353 return false; 354 355 return true; 356 } 357 358 static int __parse_flow_nlattrs(const struct nlattr *attr, 359 const struct nlattr *a[], 360 u64 *attrsp, bool log, bool nz) 361 { 362 const struct nlattr *nla; 363 u64 attrs; 364 int rem; 365 366 attrs = *attrsp; 367 nla_for_each_nested(nla, attr, rem) { 368 u16 type = nla_type(nla); 369 int expected_len; 370 371 if (type > OVS_KEY_ATTR_MAX) { 372 OVS_NLERR(log, "Key type %d is out of range max %d", 373 type, OVS_KEY_ATTR_MAX); 374 return -EINVAL; 375 } 376 377 if (attrs & (1 << type)) { 378 OVS_NLERR(log, "Duplicate key (type %d).", type); 379 return -EINVAL; 380 } 381 382 expected_len = ovs_key_lens[type].len; 383 if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { 384 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 385 type, nla_len(nla), expected_len); 386 return -EINVAL; 387 } 388 389 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 390 attrs |= 1 << type; 391 a[type] = nla; 392 } 393 } 394 if (rem) { 395 OVS_NLERR(log, "Message has %d unknown bytes.", rem); 396 return -EINVAL; 397 } 398 399 *attrsp = attrs; 400 return 0; 401 } 402 403 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 404 const struct nlattr *a[], u64 *attrsp, 405 bool log) 406 { 407 return __parse_flow_nlattrs(attr, a, attrsp, log, true); 408 } 409 410 static int parse_flow_nlattrs(const struct nlattr *attr, 411 const struct nlattr *a[], u64 *attrsp, 412 bool log) 413 { 414 return __parse_flow_nlattrs(attr, a, attrsp, log, false); 415 } 416 417 static int genev_tun_opt_from_nlattr(const struct nlattr *a, 418 struct sw_flow_match *match, bool is_mask, 419 bool log) 420 { 421 unsigned long opt_key_offset; 422 423 if (nla_len(a) > sizeof(match->key->tun_opts)) { 424 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", 425 nla_len(a), sizeof(match->key->tun_opts)); 426 return -EINVAL; 427 } 428 429 if (nla_len(a) % 4 != 0) { 430 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", 431 nla_len(a)); 432 return -EINVAL; 433 } 434 435 /* We need to record the length of the options passed 436 * down, otherwise packets with the same format but 437 * additional options will be silently matched. 438 */ 439 if (!is_mask) { 440 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), 441 false); 442 } else { 443 /* This is somewhat unusual because it looks at 444 * both the key and mask while parsing the 445 * attributes (and by extension assumes the key 446 * is parsed first). Normally, we would verify 447 * that each is the correct length and that the 448 * attributes line up in the validate function. 449 * However, that is difficult because this is 450 * variable length and we won't have the 451 * information later. 452 */ 453 if (match->key->tun_opts_len != nla_len(a)) { 454 OVS_NLERR(log, "Geneve option len %d != mask len %d", 455 match->key->tun_opts_len, nla_len(a)); 456 return -EINVAL; 457 } 458 459 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 460 } 461 462 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); 463 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 464 nla_len(a), is_mask); 465 return 0; 466 } 467 468 static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { 469 [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, 470 }; 471 472 static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, 473 struct sw_flow_match *match, bool is_mask, 474 bool log) 475 { 476 struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; 477 unsigned long opt_key_offset; 478 struct ovs_vxlan_opts opts; 479 int err; 480 481 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 482 483 err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); 484 if (err < 0) 485 return err; 486 487 memset(&opts, 0, sizeof(opts)); 488 489 if (tb[OVS_VXLAN_EXT_GBP]) 490 opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); 491 492 if (!is_mask) 493 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 494 else 495 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 496 497 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); 498 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), 499 is_mask); 500 return 0; 501 } 502 503 static int ipv4_tun_from_nlattr(const struct nlattr *attr, 504 struct sw_flow_match *match, bool is_mask, 505 bool log) 506 { 507 struct nlattr *a; 508 int rem; 509 bool ttl = false; 510 __be16 tun_flags = 0; 511 int opts_type = 0; 512 513 nla_for_each_nested(a, attr, rem) { 514 int type = nla_type(a); 515 int err; 516 517 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 518 OVS_NLERR(log, "Tunnel attr %d out of range max %d", 519 type, OVS_TUNNEL_KEY_ATTR_MAX); 520 return -EINVAL; 521 } 522 523 if (ovs_tunnel_key_lens[type].len != nla_len(a) && 524 ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { 525 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 526 type, nla_len(a), ovs_tunnel_key_lens[type].len); 527 return -EINVAL; 528 } 529 530 switch (type) { 531 case OVS_TUNNEL_KEY_ATTR_ID: 532 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 533 nla_get_be64(a), is_mask); 534 tun_flags |= TUNNEL_KEY; 535 break; 536 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 537 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 538 nla_get_be32(a), is_mask); 539 break; 540 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 541 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 542 nla_get_be32(a), is_mask); 543 break; 544 case OVS_TUNNEL_KEY_ATTR_TOS: 545 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 546 nla_get_u8(a), is_mask); 547 break; 548 case OVS_TUNNEL_KEY_ATTR_TTL: 549 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 550 nla_get_u8(a), is_mask); 551 ttl = true; 552 break; 553 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 554 tun_flags |= TUNNEL_DONT_FRAGMENT; 555 break; 556 case OVS_TUNNEL_KEY_ATTR_CSUM: 557 tun_flags |= TUNNEL_CSUM; 558 break; 559 case OVS_TUNNEL_KEY_ATTR_TP_SRC: 560 SW_FLOW_KEY_PUT(match, tun_key.tp_src, 561 nla_get_be16(a), is_mask); 562 break; 563 case OVS_TUNNEL_KEY_ATTR_TP_DST: 564 SW_FLOW_KEY_PUT(match, tun_key.tp_dst, 565 nla_get_be16(a), is_mask); 566 break; 567 case OVS_TUNNEL_KEY_ATTR_OAM: 568 tun_flags |= TUNNEL_OAM; 569 break; 570 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 571 if (opts_type) { 572 OVS_NLERR(log, "Multiple metadata blocks provided"); 573 return -EINVAL; 574 } 575 576 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 577 if (err) 578 return err; 579 580 tun_flags |= TUNNEL_GENEVE_OPT; 581 opts_type = type; 582 break; 583 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 584 if (opts_type) { 585 OVS_NLERR(log, "Multiple metadata blocks provided"); 586 return -EINVAL; 587 } 588 589 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); 590 if (err) 591 return err; 592 593 tun_flags |= TUNNEL_VXLAN_OPT; 594 opts_type = type; 595 break; 596 default: 597 OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", 598 type); 599 return -EINVAL; 600 } 601 } 602 603 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 604 605 if (rem > 0) { 606 OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", 607 rem); 608 return -EINVAL; 609 } 610 611 if (!is_mask) { 612 if (!match->key->tun_key.ipv4_dst) { 613 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 614 return -EINVAL; 615 } 616 617 if (!ttl) { 618 OVS_NLERR(log, "IPv4 tunnel TTL not specified."); 619 return -EINVAL; 620 } 621 } 622 623 return opts_type; 624 } 625 626 static int vxlan_opt_to_nlattr(struct sk_buff *skb, 627 const void *tun_opts, int swkey_tun_opts_len) 628 { 629 const struct ovs_vxlan_opts *opts = tun_opts; 630 struct nlattr *nla; 631 632 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); 633 if (!nla) 634 return -EMSGSIZE; 635 636 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) 637 return -EMSGSIZE; 638 639 nla_nest_end(skb, nla); 640 return 0; 641 } 642 643 static int __ipv4_tun_to_nlattr(struct sk_buff *skb, 644 const struct ovs_key_ipv4_tunnel *output, 645 const void *tun_opts, int swkey_tun_opts_len) 646 { 647 if (output->tun_flags & TUNNEL_KEY && 648 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 649 return -EMSGSIZE; 650 if (output->ipv4_src && 651 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 652 return -EMSGSIZE; 653 if (output->ipv4_dst && 654 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 655 return -EMSGSIZE; 656 if (output->ipv4_tos && 657 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 658 return -EMSGSIZE; 659 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 660 return -EMSGSIZE; 661 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 662 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 663 return -EMSGSIZE; 664 if ((output->tun_flags & TUNNEL_CSUM) && 665 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 666 return -EMSGSIZE; 667 if (output->tp_src && 668 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) 669 return -EMSGSIZE; 670 if (output->tp_dst && 671 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) 672 return -EMSGSIZE; 673 if ((output->tun_flags & TUNNEL_OAM) && 674 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 675 return -EMSGSIZE; 676 if (tun_opts) { 677 if (output->tun_flags & TUNNEL_GENEVE_OPT && 678 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 679 swkey_tun_opts_len, tun_opts)) 680 return -EMSGSIZE; 681 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 682 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 683 return -EMSGSIZE; 684 } 685 686 return 0; 687 } 688 689 static int ipv4_tun_to_nlattr(struct sk_buff *skb, 690 const struct ovs_key_ipv4_tunnel *output, 691 const void *tun_opts, int swkey_tun_opts_len) 692 { 693 struct nlattr *nla; 694 int err; 695 696 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 697 if (!nla) 698 return -EMSGSIZE; 699 700 err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len); 701 if (err) 702 return err; 703 704 nla_nest_end(skb, nla); 705 return 0; 706 } 707 708 int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, 709 const struct ovs_tunnel_info *egress_tun_info) 710 { 711 return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel, 712 egress_tun_info->options, 713 egress_tun_info->options_len); 714 } 715 716 static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 717 const struct nlattr **a, bool is_mask, 718 bool log) 719 { 720 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 721 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 722 723 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); 724 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); 725 } 726 727 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { 728 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); 729 730 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); 731 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); 732 } 733 734 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 735 SW_FLOW_KEY_PUT(match, phy.priority, 736 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 737 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 738 } 739 740 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 741 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 742 743 if (is_mask) { 744 in_port = 0xffffffff; /* Always exact match in_port. */ 745 } else if (in_port >= DP_MAX_PORTS) { 746 OVS_NLERR(log, "Port %d exceeds max allowable %d", 747 in_port, DP_MAX_PORTS); 748 return -EINVAL; 749 } 750 751 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 752 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 753 } else if (!is_mask) { 754 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 755 } 756 757 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 758 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 759 760 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 761 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 762 } 763 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 764 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 765 is_mask, log) < 0) 766 return -EINVAL; 767 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 768 } 769 return 0; 770 } 771 772 static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 773 const struct nlattr **a, bool is_mask, 774 bool log) 775 { 776 int err; 777 778 err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); 779 if (err) 780 return err; 781 782 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 783 const struct ovs_key_ethernet *eth_key; 784 785 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 786 SW_FLOW_KEY_MEMCPY(match, eth.src, 787 eth_key->eth_src, ETH_ALEN, is_mask); 788 SW_FLOW_KEY_MEMCPY(match, eth.dst, 789 eth_key->eth_dst, ETH_ALEN, is_mask); 790 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 791 } 792 793 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 794 __be16 tci; 795 796 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 797 if (!(tci & htons(VLAN_TAG_PRESENT))) { 798 if (is_mask) 799 OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); 800 else 801 OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); 802 803 return -EINVAL; 804 } 805 806 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 807 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 808 } 809 810 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 811 __be16 eth_type; 812 813 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 814 if (is_mask) { 815 /* Always exact match EtherType. */ 816 eth_type = htons(0xffff); 817 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 818 OVS_NLERR(log, "EtherType %x is less than min %x", 819 ntohs(eth_type), ETH_P_802_3_MIN); 820 return -EINVAL; 821 } 822 823 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 824 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 825 } else if (!is_mask) { 826 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 827 } 828 829 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 830 const struct ovs_key_ipv4 *ipv4_key; 831 832 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 833 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 834 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", 835 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 836 return -EINVAL; 837 } 838 SW_FLOW_KEY_PUT(match, ip.proto, 839 ipv4_key->ipv4_proto, is_mask); 840 SW_FLOW_KEY_PUT(match, ip.tos, 841 ipv4_key->ipv4_tos, is_mask); 842 SW_FLOW_KEY_PUT(match, ip.ttl, 843 ipv4_key->ipv4_ttl, is_mask); 844 SW_FLOW_KEY_PUT(match, ip.frag, 845 ipv4_key->ipv4_frag, is_mask); 846 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 847 ipv4_key->ipv4_src, is_mask); 848 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 849 ipv4_key->ipv4_dst, is_mask); 850 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 851 } 852 853 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 854 const struct ovs_key_ipv6 *ipv6_key; 855 856 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 857 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 858 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", 859 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 860 return -EINVAL; 861 } 862 863 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { 864 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", 865 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); 866 return -EINVAL; 867 } 868 869 SW_FLOW_KEY_PUT(match, ipv6.label, 870 ipv6_key->ipv6_label, is_mask); 871 SW_FLOW_KEY_PUT(match, ip.proto, 872 ipv6_key->ipv6_proto, is_mask); 873 SW_FLOW_KEY_PUT(match, ip.tos, 874 ipv6_key->ipv6_tclass, is_mask); 875 SW_FLOW_KEY_PUT(match, ip.ttl, 876 ipv6_key->ipv6_hlimit, is_mask); 877 SW_FLOW_KEY_PUT(match, ip.frag, 878 ipv6_key->ipv6_frag, is_mask); 879 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 880 ipv6_key->ipv6_src, 881 sizeof(match->key->ipv6.addr.src), 882 is_mask); 883 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 884 ipv6_key->ipv6_dst, 885 sizeof(match->key->ipv6.addr.dst), 886 is_mask); 887 888 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 889 } 890 891 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 892 const struct ovs_key_arp *arp_key; 893 894 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 895 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 896 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", 897 arp_key->arp_op); 898 return -EINVAL; 899 } 900 901 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 902 arp_key->arp_sip, is_mask); 903 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 904 arp_key->arp_tip, is_mask); 905 SW_FLOW_KEY_PUT(match, ip.proto, 906 ntohs(arp_key->arp_op), is_mask); 907 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 908 arp_key->arp_sha, ETH_ALEN, is_mask); 909 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 910 arp_key->arp_tha, ETH_ALEN, is_mask); 911 912 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 913 } 914 915 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 916 const struct ovs_key_mpls *mpls_key; 917 918 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 919 SW_FLOW_KEY_PUT(match, mpls.top_lse, 920 mpls_key->mpls_lse, is_mask); 921 922 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 923 } 924 925 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 926 const struct ovs_key_tcp *tcp_key; 927 928 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 929 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 930 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 931 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 932 } 933 934 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 935 SW_FLOW_KEY_PUT(match, tp.flags, 936 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 937 is_mask); 938 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 939 } 940 941 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 942 const struct ovs_key_udp *udp_key; 943 944 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 945 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 946 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 947 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 948 } 949 950 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 951 const struct ovs_key_sctp *sctp_key; 952 953 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 954 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 955 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 956 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 957 } 958 959 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 960 const struct ovs_key_icmp *icmp_key; 961 962 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 963 SW_FLOW_KEY_PUT(match, tp.src, 964 htons(icmp_key->icmp_type), is_mask); 965 SW_FLOW_KEY_PUT(match, tp.dst, 966 htons(icmp_key->icmp_code), is_mask); 967 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 968 } 969 970 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 971 const struct ovs_key_icmpv6 *icmpv6_key; 972 973 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 974 SW_FLOW_KEY_PUT(match, tp.src, 975 htons(icmpv6_key->icmpv6_type), is_mask); 976 SW_FLOW_KEY_PUT(match, tp.dst, 977 htons(icmpv6_key->icmpv6_code), is_mask); 978 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 979 } 980 981 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 982 const struct ovs_key_nd *nd_key; 983 984 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 985 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 986 nd_key->nd_target, 987 sizeof(match->key->ipv6.nd.target), 988 is_mask); 989 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 990 nd_key->nd_sll, ETH_ALEN, is_mask); 991 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 992 nd_key->nd_tll, ETH_ALEN, is_mask); 993 attrs &= ~(1 << OVS_KEY_ATTR_ND); 994 } 995 996 if (attrs != 0) { 997 OVS_NLERR(log, "Unknown key attributes %llx", 998 (unsigned long long)attrs); 999 return -EINVAL; 1000 } 1001 1002 return 0; 1003 } 1004 1005 static void nlattr_set(struct nlattr *attr, u8 val, 1006 const struct ovs_len_tbl *tbl) 1007 { 1008 struct nlattr *nla; 1009 int rem; 1010 1011 /* The nlattr stream should already have been validated */ 1012 nla_for_each_nested(nla, attr, rem) { 1013 if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) 1014 nlattr_set(nla, val, tbl[nla_type(nla)].next); 1015 else 1016 memset(nla_data(nla), val, nla_len(nla)); 1017 } 1018 } 1019 1020 static void mask_set_nlattr(struct nlattr *attr, u8 val) 1021 { 1022 nlattr_set(attr, val, ovs_key_lens); 1023 } 1024 1025 /** 1026 * ovs_nla_get_match - parses Netlink attributes into a flow key and 1027 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1028 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1029 * does not include any don't care bit. 1030 * @match: receives the extracted flow match information. 1031 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1032 * sequence. The fields should of the packet that triggered the creation 1033 * of this flow. 1034 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1035 * attribute specifies the mask field of the wildcarded flow. 1036 * @log: Boolean to allow kernel error logging. Normally true, but when 1037 * probing for feature compatibility this should be passed in as false to 1038 * suppress unnecessary error logging. 1039 */ 1040 int ovs_nla_get_match(struct sw_flow_match *match, 1041 const struct nlattr *nla_key, 1042 const struct nlattr *nla_mask, 1043 bool log) 1044 { 1045 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1046 const struct nlattr *encap; 1047 struct nlattr *newmask = NULL; 1048 u64 key_attrs = 0; 1049 u64 mask_attrs = 0; 1050 bool encap_valid = false; 1051 int err; 1052 1053 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); 1054 if (err) 1055 return err; 1056 1057 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 1058 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 1059 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 1060 __be16 tci; 1061 1062 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 1063 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 1064 OVS_NLERR(log, "Invalid Vlan frame."); 1065 return -EINVAL; 1066 } 1067 1068 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1069 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1070 encap = a[OVS_KEY_ATTR_ENCAP]; 1071 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1072 encap_valid = true; 1073 1074 if (tci & htons(VLAN_TAG_PRESENT)) { 1075 err = parse_flow_nlattrs(encap, a, &key_attrs, log); 1076 if (err) 1077 return err; 1078 } else if (!tci) { 1079 /* Corner case for truncated 802.1Q header. */ 1080 if (nla_len(encap)) { 1081 OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); 1082 return -EINVAL; 1083 } 1084 } else { 1085 OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); 1086 return -EINVAL; 1087 } 1088 } 1089 1090 err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); 1091 if (err) 1092 return err; 1093 1094 if (match->mask) { 1095 if (!nla_mask) { 1096 /* Create an exact match mask. We need to set to 0xff 1097 * all the 'match->mask' fields that have been touched 1098 * in 'match->key'. We cannot simply memset 1099 * 'match->mask', because padding bytes and fields not 1100 * specified in 'match->key' should be left to 0. 1101 * Instead, we use a stream of netlink attributes, 1102 * copied from 'key' and set to 0xff. 1103 * ovs_key_from_nlattrs() will take care of filling 1104 * 'match->mask' appropriately. 1105 */ 1106 newmask = kmemdup(nla_key, 1107 nla_total_size(nla_len(nla_key)), 1108 GFP_KERNEL); 1109 if (!newmask) 1110 return -ENOMEM; 1111 1112 mask_set_nlattr(newmask, 0xff); 1113 1114 /* The userspace does not send tunnel attributes that 1115 * are 0, but we should not wildcard them nonetheless. 1116 */ 1117 if (match->key->tun_key.ipv4_dst) 1118 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1119 0xff, true); 1120 1121 nla_mask = newmask; 1122 } 1123 1124 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); 1125 if (err) 1126 goto free_newmask; 1127 1128 /* Always match on tci. */ 1129 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 1130 1131 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { 1132 __be16 eth_type = 0; 1133 __be16 tci = 0; 1134 1135 if (!encap_valid) { 1136 OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); 1137 err = -EINVAL; 1138 goto free_newmask; 1139 } 1140 1141 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1142 if (a[OVS_KEY_ATTR_ETHERTYPE]) 1143 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1144 1145 if (eth_type == htons(0xffff)) { 1146 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1147 encap = a[OVS_KEY_ATTR_ENCAP]; 1148 err = parse_flow_mask_nlattrs(encap, a, 1149 &mask_attrs, log); 1150 if (err) 1151 goto free_newmask; 1152 } else { 1153 OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", 1154 ntohs(eth_type)); 1155 err = -EINVAL; 1156 goto free_newmask; 1157 } 1158 1159 if (a[OVS_KEY_ATTR_VLAN]) 1160 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1161 1162 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1163 OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", 1164 ntohs(tci)); 1165 err = -EINVAL; 1166 goto free_newmask; 1167 } 1168 } 1169 1170 err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); 1171 if (err) 1172 goto free_newmask; 1173 } 1174 1175 if (!match_validate(match, key_attrs, mask_attrs, log)) 1176 err = -EINVAL; 1177 1178 free_newmask: 1179 kfree(newmask); 1180 return err; 1181 } 1182 1183 static size_t get_ufid_len(const struct nlattr *attr, bool log) 1184 { 1185 size_t len; 1186 1187 if (!attr) 1188 return 0; 1189 1190 len = nla_len(attr); 1191 if (len < 1 || len > MAX_UFID_LENGTH) { 1192 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", 1193 nla_len(attr), MAX_UFID_LENGTH); 1194 return 0; 1195 } 1196 1197 return len; 1198 } 1199 1200 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, 1201 * or false otherwise. 1202 */ 1203 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, 1204 bool log) 1205 { 1206 sfid->ufid_len = get_ufid_len(attr, log); 1207 if (sfid->ufid_len) 1208 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); 1209 1210 return sfid->ufid_len; 1211 } 1212 1213 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 1214 const struct sw_flow_key *key, bool log) 1215 { 1216 struct sw_flow_key *new_key; 1217 1218 if (ovs_nla_get_ufid(sfid, ufid, log)) 1219 return 0; 1220 1221 /* If UFID was not provided, use unmasked key. */ 1222 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); 1223 if (!new_key) 1224 return -ENOMEM; 1225 memcpy(new_key, key, sizeof(*key)); 1226 sfid->unmasked_key = new_key; 1227 1228 return 0; 1229 } 1230 1231 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) 1232 { 1233 return attr ? nla_get_u32(attr) : 0; 1234 } 1235 1236 /** 1237 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 1238 * @key: Receives extracted in_port, priority, tun_key and skb_mark. 1239 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1240 * sequence. 1241 * @log: Boolean to allow kernel error logging. Normally true, but when 1242 * probing for feature compatibility this should be passed in as false to 1243 * suppress unnecessary error logging. 1244 * 1245 * This parses a series of Netlink attributes that form a flow key, which must 1246 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1247 * get the metadata, that is, the parts of the flow key that cannot be 1248 * extracted from the packet itself. 1249 */ 1250 1251 int ovs_nla_get_flow_metadata(const struct nlattr *attr, 1252 struct sw_flow_key *key, 1253 bool log) 1254 { 1255 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1256 struct sw_flow_match match; 1257 u64 attrs = 0; 1258 int err; 1259 1260 err = parse_flow_nlattrs(attr, a, &attrs, log); 1261 if (err) 1262 return -EINVAL; 1263 1264 memset(&match, 0, sizeof(match)); 1265 match.key = key; 1266 1267 key->phy.in_port = DP_MAX_PORTS; 1268 1269 return metadata_from_nlattrs(&match, &attrs, a, false, log); 1270 } 1271 1272 static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1273 const struct sw_flow_key *output, bool is_mask, 1274 struct sk_buff *skb) 1275 { 1276 struct ovs_key_ethernet *eth_key; 1277 struct nlattr *nla, *encap; 1278 1279 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 1280 goto nla_put_failure; 1281 1282 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 1283 goto nla_put_failure; 1284 1285 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1286 goto nla_put_failure; 1287 1288 if ((swkey->tun_key.ipv4_dst || is_mask)) { 1289 const void *opts = NULL; 1290 1291 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1292 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1293 1294 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, 1295 swkey->tun_opts_len)) 1296 goto nla_put_failure; 1297 } 1298 1299 if (swkey->phy.in_port == DP_MAX_PORTS) { 1300 if (is_mask && (output->phy.in_port == 0xffff)) 1301 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1302 goto nla_put_failure; 1303 } else { 1304 u16 upper_u16; 1305 upper_u16 = !is_mask ? 0 : 0xffff; 1306 1307 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1308 (upper_u16 << 16) | output->phy.in_port)) 1309 goto nla_put_failure; 1310 } 1311 1312 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1313 goto nla_put_failure; 1314 1315 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1316 if (!nla) 1317 goto nla_put_failure; 1318 1319 eth_key = nla_data(nla); 1320 ether_addr_copy(eth_key->eth_src, output->eth.src); 1321 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 1322 1323 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1324 __be16 eth_type; 1325 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 1326 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1327 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 1328 goto nla_put_failure; 1329 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1330 if (!swkey->eth.tci) 1331 goto unencap; 1332 } else 1333 encap = NULL; 1334 1335 if (swkey->eth.type == htons(ETH_P_802_2)) { 1336 /* 1337 * Ethertype 802.2 is represented in the netlink with omitted 1338 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1339 * 0xffff in the mask attribute. Ethertype can also 1340 * be wildcarded. 1341 */ 1342 if (is_mask && output->eth.type) 1343 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1344 output->eth.type)) 1345 goto nla_put_failure; 1346 goto unencap; 1347 } 1348 1349 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1350 goto nla_put_failure; 1351 1352 if (swkey->eth.type == htons(ETH_P_IP)) { 1353 struct ovs_key_ipv4 *ipv4_key; 1354 1355 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1356 if (!nla) 1357 goto nla_put_failure; 1358 ipv4_key = nla_data(nla); 1359 ipv4_key->ipv4_src = output->ipv4.addr.src; 1360 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1361 ipv4_key->ipv4_proto = output->ip.proto; 1362 ipv4_key->ipv4_tos = output->ip.tos; 1363 ipv4_key->ipv4_ttl = output->ip.ttl; 1364 ipv4_key->ipv4_frag = output->ip.frag; 1365 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1366 struct ovs_key_ipv6 *ipv6_key; 1367 1368 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1369 if (!nla) 1370 goto nla_put_failure; 1371 ipv6_key = nla_data(nla); 1372 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1373 sizeof(ipv6_key->ipv6_src)); 1374 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1375 sizeof(ipv6_key->ipv6_dst)); 1376 ipv6_key->ipv6_label = output->ipv6.label; 1377 ipv6_key->ipv6_proto = output->ip.proto; 1378 ipv6_key->ipv6_tclass = output->ip.tos; 1379 ipv6_key->ipv6_hlimit = output->ip.ttl; 1380 ipv6_key->ipv6_frag = output->ip.frag; 1381 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1382 swkey->eth.type == htons(ETH_P_RARP)) { 1383 struct ovs_key_arp *arp_key; 1384 1385 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1386 if (!nla) 1387 goto nla_put_failure; 1388 arp_key = nla_data(nla); 1389 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1390 arp_key->arp_sip = output->ipv4.addr.src; 1391 arp_key->arp_tip = output->ipv4.addr.dst; 1392 arp_key->arp_op = htons(output->ip.proto); 1393 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1394 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1395 } else if (eth_p_mpls(swkey->eth.type)) { 1396 struct ovs_key_mpls *mpls_key; 1397 1398 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 1399 if (!nla) 1400 goto nla_put_failure; 1401 mpls_key = nla_data(nla); 1402 mpls_key->mpls_lse = output->mpls.top_lse; 1403 } 1404 1405 if ((swkey->eth.type == htons(ETH_P_IP) || 1406 swkey->eth.type == htons(ETH_P_IPV6)) && 1407 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1408 1409 if (swkey->ip.proto == IPPROTO_TCP) { 1410 struct ovs_key_tcp *tcp_key; 1411 1412 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1413 if (!nla) 1414 goto nla_put_failure; 1415 tcp_key = nla_data(nla); 1416 tcp_key->tcp_src = output->tp.src; 1417 tcp_key->tcp_dst = output->tp.dst; 1418 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1419 output->tp.flags)) 1420 goto nla_put_failure; 1421 } else if (swkey->ip.proto == IPPROTO_UDP) { 1422 struct ovs_key_udp *udp_key; 1423 1424 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1425 if (!nla) 1426 goto nla_put_failure; 1427 udp_key = nla_data(nla); 1428 udp_key->udp_src = output->tp.src; 1429 udp_key->udp_dst = output->tp.dst; 1430 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1431 struct ovs_key_sctp *sctp_key; 1432 1433 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1434 if (!nla) 1435 goto nla_put_failure; 1436 sctp_key = nla_data(nla); 1437 sctp_key->sctp_src = output->tp.src; 1438 sctp_key->sctp_dst = output->tp.dst; 1439 } else if (swkey->eth.type == htons(ETH_P_IP) && 1440 swkey->ip.proto == IPPROTO_ICMP) { 1441 struct ovs_key_icmp *icmp_key; 1442 1443 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1444 if (!nla) 1445 goto nla_put_failure; 1446 icmp_key = nla_data(nla); 1447 icmp_key->icmp_type = ntohs(output->tp.src); 1448 icmp_key->icmp_code = ntohs(output->tp.dst); 1449 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1450 swkey->ip.proto == IPPROTO_ICMPV6) { 1451 struct ovs_key_icmpv6 *icmpv6_key; 1452 1453 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1454 sizeof(*icmpv6_key)); 1455 if (!nla) 1456 goto nla_put_failure; 1457 icmpv6_key = nla_data(nla); 1458 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1459 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1460 1461 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1462 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1463 struct ovs_key_nd *nd_key; 1464 1465 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1466 if (!nla) 1467 goto nla_put_failure; 1468 nd_key = nla_data(nla); 1469 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1470 sizeof(nd_key->nd_target)); 1471 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1472 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1473 } 1474 } 1475 } 1476 1477 unencap: 1478 if (encap) 1479 nla_nest_end(skb, encap); 1480 1481 return 0; 1482 1483 nla_put_failure: 1484 return -EMSGSIZE; 1485 } 1486 1487 int ovs_nla_put_key(const struct sw_flow_key *swkey, 1488 const struct sw_flow_key *output, int attr, bool is_mask, 1489 struct sk_buff *skb) 1490 { 1491 int err; 1492 struct nlattr *nla; 1493 1494 nla = nla_nest_start(skb, attr); 1495 if (!nla) 1496 return -EMSGSIZE; 1497 err = __ovs_nla_put_key(swkey, output, is_mask, skb); 1498 if (err) 1499 return err; 1500 nla_nest_end(skb, nla); 1501 1502 return 0; 1503 } 1504 1505 /* Called with ovs_mutex or RCU read lock. */ 1506 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) 1507 { 1508 if (ovs_identifier_is_ufid(&flow->id)) 1509 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, 1510 flow->id.ufid); 1511 1512 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, 1513 OVS_FLOW_ATTR_KEY, false, skb); 1514 } 1515 1516 /* Called with ovs_mutex or RCU read lock. */ 1517 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) 1518 { 1519 return ovs_nla_put_key(&flow->key, &flow->key, 1520 OVS_FLOW_ATTR_KEY, false, skb); 1521 } 1522 1523 /* Called with ovs_mutex or RCU read lock. */ 1524 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) 1525 { 1526 return ovs_nla_put_key(&flow->key, &flow->mask->key, 1527 OVS_FLOW_ATTR_MASK, true, skb); 1528 } 1529 1530 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1531 1532 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 1533 { 1534 struct sw_flow_actions *sfa; 1535 1536 if (size > MAX_ACTIONS_BUFSIZE) { 1537 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); 1538 return ERR_PTR(-EINVAL); 1539 } 1540 1541 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1542 if (!sfa) 1543 return ERR_PTR(-ENOMEM); 1544 1545 sfa->actions_len = 0; 1546 return sfa; 1547 } 1548 1549 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1550 * The caller must hold rcu_read_lock for this to be sensible. */ 1551 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1552 { 1553 kfree_rcu(sf_acts, rcu); 1554 } 1555 1556 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1557 int attr_len, bool log) 1558 { 1559 1560 struct sw_flow_actions *acts; 1561 int new_acts_size; 1562 int req_size = NLA_ALIGN(attr_len); 1563 int next_offset = offsetof(struct sw_flow_actions, actions) + 1564 (*sfa)->actions_len; 1565 1566 if (req_size <= (ksize(*sfa) - next_offset)) 1567 goto out; 1568 1569 new_acts_size = ksize(*sfa) * 2; 1570 1571 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1572 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1573 return ERR_PTR(-EMSGSIZE); 1574 new_acts_size = MAX_ACTIONS_BUFSIZE; 1575 } 1576 1577 acts = nla_alloc_flow_actions(new_acts_size, log); 1578 if (IS_ERR(acts)) 1579 return (void *)acts; 1580 1581 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1582 acts->actions_len = (*sfa)->actions_len; 1583 kfree(*sfa); 1584 *sfa = acts; 1585 1586 out: 1587 (*sfa)->actions_len += req_size; 1588 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1589 } 1590 1591 static struct nlattr *__add_action(struct sw_flow_actions **sfa, 1592 int attrtype, void *data, int len, bool log) 1593 { 1594 struct nlattr *a; 1595 1596 a = reserve_sfa_size(sfa, nla_attr_size(len), log); 1597 if (IS_ERR(a)) 1598 return a; 1599 1600 a->nla_type = attrtype; 1601 a->nla_len = nla_attr_size(len); 1602 1603 if (data) 1604 memcpy(nla_data(a), data, len); 1605 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1606 1607 return a; 1608 } 1609 1610 static int add_action(struct sw_flow_actions **sfa, int attrtype, 1611 void *data, int len, bool log) 1612 { 1613 struct nlattr *a; 1614 1615 a = __add_action(sfa, attrtype, data, len, log); 1616 1617 return PTR_ERR_OR_ZERO(a); 1618 } 1619 1620 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1621 int attrtype, bool log) 1622 { 1623 int used = (*sfa)->actions_len; 1624 int err; 1625 1626 err = add_action(sfa, attrtype, NULL, 0, log); 1627 if (err) 1628 return err; 1629 1630 return used; 1631 } 1632 1633 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 1634 int st_offset) 1635 { 1636 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 1637 st_offset); 1638 1639 a->nla_len = sfa->actions_len - st_offset; 1640 } 1641 1642 static int __ovs_nla_copy_actions(const struct nlattr *attr, 1643 const struct sw_flow_key *key, 1644 int depth, struct sw_flow_actions **sfa, 1645 __be16 eth_type, __be16 vlan_tci, bool log); 1646 1647 static int validate_and_copy_sample(const struct nlattr *attr, 1648 const struct sw_flow_key *key, int depth, 1649 struct sw_flow_actions **sfa, 1650 __be16 eth_type, __be16 vlan_tci, bool log) 1651 { 1652 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1653 const struct nlattr *probability, *actions; 1654 const struct nlattr *a; 1655 int rem, start, err, st_acts; 1656 1657 memset(attrs, 0, sizeof(attrs)); 1658 nla_for_each_nested(a, attr, rem) { 1659 int type = nla_type(a); 1660 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 1661 return -EINVAL; 1662 attrs[type] = a; 1663 } 1664 if (rem) 1665 return -EINVAL; 1666 1667 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 1668 if (!probability || nla_len(probability) != sizeof(u32)) 1669 return -EINVAL; 1670 1671 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 1672 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 1673 return -EINVAL; 1674 1675 /* validation done, copy sample action. */ 1676 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 1677 if (start < 0) 1678 return start; 1679 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1680 nla_data(probability), sizeof(u32), log); 1681 if (err) 1682 return err; 1683 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); 1684 if (st_acts < 0) 1685 return st_acts; 1686 1687 err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, 1688 eth_type, vlan_tci, log); 1689 if (err) 1690 return err; 1691 1692 add_nested_action_end(*sfa, st_acts); 1693 add_nested_action_end(*sfa, start); 1694 1695 return 0; 1696 } 1697 1698 void ovs_match_init(struct sw_flow_match *match, 1699 struct sw_flow_key *key, 1700 struct sw_flow_mask *mask) 1701 { 1702 memset(match, 0, sizeof(*match)); 1703 match->key = key; 1704 match->mask = mask; 1705 1706 memset(key, 0, sizeof(*key)); 1707 1708 if (mask) { 1709 memset(&mask->key, 0, sizeof(mask->key)); 1710 mask->range.start = mask->range.end = 0; 1711 } 1712 } 1713 1714 static int validate_geneve_opts(struct sw_flow_key *key) 1715 { 1716 struct geneve_opt *option; 1717 int opts_len = key->tun_opts_len; 1718 bool crit_opt = false; 1719 1720 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); 1721 while (opts_len > 0) { 1722 int len; 1723 1724 if (opts_len < sizeof(*option)) 1725 return -EINVAL; 1726 1727 len = sizeof(*option) + option->length * 4; 1728 if (len > opts_len) 1729 return -EINVAL; 1730 1731 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 1732 1733 option = (struct geneve_opt *)((u8 *)option + len); 1734 opts_len -= len; 1735 }; 1736 1737 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 1738 1739 return 0; 1740 } 1741 1742 static int validate_and_copy_set_tun(const struct nlattr *attr, 1743 struct sw_flow_actions **sfa, bool log) 1744 { 1745 struct sw_flow_match match; 1746 struct sw_flow_key key; 1747 struct ovs_tunnel_info *tun_info; 1748 struct nlattr *a; 1749 int err = 0, start, opts_type; 1750 1751 ovs_match_init(&match, &key, NULL); 1752 opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); 1753 if (opts_type < 0) 1754 return opts_type; 1755 1756 if (key.tun_opts_len) { 1757 switch (opts_type) { 1758 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 1759 err = validate_geneve_opts(&key); 1760 if (err < 0) 1761 return err; 1762 break; 1763 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 1764 break; 1765 } 1766 }; 1767 1768 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 1769 if (start < 0) 1770 return start; 1771 1772 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 1773 sizeof(*tun_info) + key.tun_opts_len, log); 1774 if (IS_ERR(a)) 1775 return PTR_ERR(a); 1776 1777 tun_info = nla_data(a); 1778 tun_info->tunnel = key.tun_key; 1779 tun_info->options_len = key.tun_opts_len; 1780 1781 if (tun_info->options_len) { 1782 /* We need to store the options in the action itself since 1783 * everything else will go away after flow setup. We can append 1784 * it to tun_info and then point there. 1785 */ 1786 memcpy((tun_info + 1), 1787 TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); 1788 tun_info->options = (tun_info + 1); 1789 } else { 1790 tun_info->options = NULL; 1791 } 1792 1793 add_nested_action_end(*sfa, start); 1794 1795 return err; 1796 } 1797 1798 /* Return false if there are any non-masked bits set. 1799 * Mask follows data immediately, before any netlink padding. 1800 */ 1801 static bool validate_masked(u8 *data, int len) 1802 { 1803 u8 *mask = data + len; 1804 1805 while (len--) 1806 if (*data++ & ~*mask++) 1807 return false; 1808 1809 return true; 1810 } 1811 1812 static int validate_set(const struct nlattr *a, 1813 const struct sw_flow_key *flow_key, 1814 struct sw_flow_actions **sfa, 1815 bool *skip_copy, __be16 eth_type, bool masked, bool log) 1816 { 1817 const struct nlattr *ovs_key = nla_data(a); 1818 int key_type = nla_type(ovs_key); 1819 size_t key_len; 1820 1821 /* There can be only one key in a action */ 1822 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1823 return -EINVAL; 1824 1825 key_len = nla_len(ovs_key); 1826 if (masked) 1827 key_len /= 2; 1828 1829 if (key_type > OVS_KEY_ATTR_MAX || 1830 (ovs_key_lens[key_type].len != key_len && 1831 ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) 1832 return -EINVAL; 1833 1834 if (masked && !validate_masked(nla_data(ovs_key), key_len)) 1835 return -EINVAL; 1836 1837 switch (key_type) { 1838 const struct ovs_key_ipv4 *ipv4_key; 1839 const struct ovs_key_ipv6 *ipv6_key; 1840 int err; 1841 1842 case OVS_KEY_ATTR_PRIORITY: 1843 case OVS_KEY_ATTR_SKB_MARK: 1844 case OVS_KEY_ATTR_ETHERNET: 1845 break; 1846 1847 case OVS_KEY_ATTR_TUNNEL: 1848 if (eth_p_mpls(eth_type)) 1849 return -EINVAL; 1850 1851 if (masked) 1852 return -EINVAL; /* Masked tunnel set not supported. */ 1853 1854 *skip_copy = true; 1855 err = validate_and_copy_set_tun(a, sfa, log); 1856 if (err) 1857 return err; 1858 break; 1859 1860 case OVS_KEY_ATTR_IPV4: 1861 if (eth_type != htons(ETH_P_IP)) 1862 return -EINVAL; 1863 1864 ipv4_key = nla_data(ovs_key); 1865 1866 if (masked) { 1867 const struct ovs_key_ipv4 *mask = ipv4_key + 1; 1868 1869 /* Non-writeable fields. */ 1870 if (mask->ipv4_proto || mask->ipv4_frag) 1871 return -EINVAL; 1872 } else { 1873 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1874 return -EINVAL; 1875 1876 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1877 return -EINVAL; 1878 } 1879 break; 1880 1881 case OVS_KEY_ATTR_IPV6: 1882 if (eth_type != htons(ETH_P_IPV6)) 1883 return -EINVAL; 1884 1885 ipv6_key = nla_data(ovs_key); 1886 1887 if (masked) { 1888 const struct ovs_key_ipv6 *mask = ipv6_key + 1; 1889 1890 /* Non-writeable fields. */ 1891 if (mask->ipv6_proto || mask->ipv6_frag) 1892 return -EINVAL; 1893 1894 /* Invalid bits in the flow label mask? */ 1895 if (ntohl(mask->ipv6_label) & 0xFFF00000) 1896 return -EINVAL; 1897 } else { 1898 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1899 return -EINVAL; 1900 1901 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1902 return -EINVAL; 1903 } 1904 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1905 return -EINVAL; 1906 1907 break; 1908 1909 case OVS_KEY_ATTR_TCP: 1910 if ((eth_type != htons(ETH_P_IP) && 1911 eth_type != htons(ETH_P_IPV6)) || 1912 flow_key->ip.proto != IPPROTO_TCP) 1913 return -EINVAL; 1914 1915 break; 1916 1917 case OVS_KEY_ATTR_UDP: 1918 if ((eth_type != htons(ETH_P_IP) && 1919 eth_type != htons(ETH_P_IPV6)) || 1920 flow_key->ip.proto != IPPROTO_UDP) 1921 return -EINVAL; 1922 1923 break; 1924 1925 case OVS_KEY_ATTR_MPLS: 1926 if (!eth_p_mpls(eth_type)) 1927 return -EINVAL; 1928 break; 1929 1930 case OVS_KEY_ATTR_SCTP: 1931 if ((eth_type != htons(ETH_P_IP) && 1932 eth_type != htons(ETH_P_IPV6)) || 1933 flow_key->ip.proto != IPPROTO_SCTP) 1934 return -EINVAL; 1935 1936 break; 1937 1938 default: 1939 return -EINVAL; 1940 } 1941 1942 /* Convert non-masked non-tunnel set actions to masked set actions. */ 1943 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { 1944 int start, len = key_len * 2; 1945 struct nlattr *at; 1946 1947 *skip_copy = true; 1948 1949 start = add_nested_action_start(sfa, 1950 OVS_ACTION_ATTR_SET_TO_MASKED, 1951 log); 1952 if (start < 0) 1953 return start; 1954 1955 at = __add_action(sfa, key_type, NULL, len, log); 1956 if (IS_ERR(at)) 1957 return PTR_ERR(at); 1958 1959 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ 1960 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ 1961 /* Clear non-writeable bits from otherwise writeable fields. */ 1962 if (key_type == OVS_KEY_ATTR_IPV6) { 1963 struct ovs_key_ipv6 *mask = nla_data(at) + key_len; 1964 1965 mask->ipv6_label &= htonl(0x000FFFFF); 1966 } 1967 add_nested_action_end(*sfa, start); 1968 } 1969 1970 return 0; 1971 } 1972 1973 static int validate_userspace(const struct nlattr *attr) 1974 { 1975 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 1976 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 1977 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 1978 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, 1979 }; 1980 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 1981 int error; 1982 1983 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 1984 attr, userspace_policy); 1985 if (error) 1986 return error; 1987 1988 if (!a[OVS_USERSPACE_ATTR_PID] || 1989 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 1990 return -EINVAL; 1991 1992 return 0; 1993 } 1994 1995 static int copy_action(const struct nlattr *from, 1996 struct sw_flow_actions **sfa, bool log) 1997 { 1998 int totlen = NLA_ALIGN(from->nla_len); 1999 struct nlattr *to; 2000 2001 to = reserve_sfa_size(sfa, from->nla_len, log); 2002 if (IS_ERR(to)) 2003 return PTR_ERR(to); 2004 2005 memcpy(to, from, totlen); 2006 return 0; 2007 } 2008 2009 static int __ovs_nla_copy_actions(const struct nlattr *attr, 2010 const struct sw_flow_key *key, 2011 int depth, struct sw_flow_actions **sfa, 2012 __be16 eth_type, __be16 vlan_tci, bool log) 2013 { 2014 const struct nlattr *a; 2015 int rem, err; 2016 2017 if (depth >= SAMPLE_ACTION_DEPTH) 2018 return -EOVERFLOW; 2019 2020 nla_for_each_nested(a, attr, rem) { 2021 /* Expected argument lengths, (u32)-1 for variable length. */ 2022 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 2023 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 2024 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 2025 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 2026 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), 2027 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), 2028 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2029 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2030 [OVS_ACTION_ATTR_SET] = (u32)-1, 2031 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2032 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2033 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) 2034 }; 2035 const struct ovs_action_push_vlan *vlan; 2036 int type = nla_type(a); 2037 bool skip_copy; 2038 2039 if (type > OVS_ACTION_ATTR_MAX || 2040 (action_lens[type] != nla_len(a) && 2041 action_lens[type] != (u32)-1)) 2042 return -EINVAL; 2043 2044 skip_copy = false; 2045 switch (type) { 2046 case OVS_ACTION_ATTR_UNSPEC: 2047 return -EINVAL; 2048 2049 case OVS_ACTION_ATTR_USERSPACE: 2050 err = validate_userspace(a); 2051 if (err) 2052 return err; 2053 break; 2054 2055 case OVS_ACTION_ATTR_OUTPUT: 2056 if (nla_get_u32(a) >= DP_MAX_PORTS) 2057 return -EINVAL; 2058 break; 2059 2060 case OVS_ACTION_ATTR_HASH: { 2061 const struct ovs_action_hash *act_hash = nla_data(a); 2062 2063 switch (act_hash->hash_alg) { 2064 case OVS_HASH_ALG_L4: 2065 break; 2066 default: 2067 return -EINVAL; 2068 } 2069 2070 break; 2071 } 2072 2073 case OVS_ACTION_ATTR_POP_VLAN: 2074 vlan_tci = htons(0); 2075 break; 2076 2077 case OVS_ACTION_ATTR_PUSH_VLAN: 2078 vlan = nla_data(a); 2079 if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 2080 return -EINVAL; 2081 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 2082 return -EINVAL; 2083 vlan_tci = vlan->vlan_tci; 2084 break; 2085 2086 case OVS_ACTION_ATTR_RECIRC: 2087 break; 2088 2089 case OVS_ACTION_ATTR_PUSH_MPLS: { 2090 const struct ovs_action_push_mpls *mpls = nla_data(a); 2091 2092 if (!eth_p_mpls(mpls->mpls_ethertype)) 2093 return -EINVAL; 2094 /* Prohibit push MPLS other than to a white list 2095 * for packets that have a known tag order. 2096 */ 2097 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2098 (eth_type != htons(ETH_P_IP) && 2099 eth_type != htons(ETH_P_IPV6) && 2100 eth_type != htons(ETH_P_ARP) && 2101 eth_type != htons(ETH_P_RARP) && 2102 !eth_p_mpls(eth_type))) 2103 return -EINVAL; 2104 eth_type = mpls->mpls_ethertype; 2105 break; 2106 } 2107 2108 case OVS_ACTION_ATTR_POP_MPLS: 2109 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2110 !eth_p_mpls(eth_type)) 2111 return -EINVAL; 2112 2113 /* Disallow subsequent L2.5+ set and mpls_pop actions 2114 * as there is no check here to ensure that the new 2115 * eth_type is valid and thus set actions could 2116 * write off the end of the packet or otherwise 2117 * corrupt it. 2118 * 2119 * Support for these actions is planned using packet 2120 * recirculation. 2121 */ 2122 eth_type = htons(0); 2123 break; 2124 2125 case OVS_ACTION_ATTR_SET: 2126 err = validate_set(a, key, sfa, 2127 &skip_copy, eth_type, false, log); 2128 if (err) 2129 return err; 2130 break; 2131 2132 case OVS_ACTION_ATTR_SET_MASKED: 2133 err = validate_set(a, key, sfa, 2134 &skip_copy, eth_type, true, log); 2135 if (err) 2136 return err; 2137 break; 2138 2139 case OVS_ACTION_ATTR_SAMPLE: 2140 err = validate_and_copy_sample(a, key, depth, sfa, 2141 eth_type, vlan_tci, log); 2142 if (err) 2143 return err; 2144 skip_copy = true; 2145 break; 2146 2147 default: 2148 OVS_NLERR(log, "Unknown Action type %d", type); 2149 return -EINVAL; 2150 } 2151 if (!skip_copy) { 2152 err = copy_action(a, sfa, log); 2153 if (err) 2154 return err; 2155 } 2156 } 2157 2158 if (rem > 0) 2159 return -EINVAL; 2160 2161 return 0; 2162 } 2163 2164 /* 'key' must be the masked key. */ 2165 int ovs_nla_copy_actions(const struct nlattr *attr, 2166 const struct sw_flow_key *key, 2167 struct sw_flow_actions **sfa, bool log) 2168 { 2169 int err; 2170 2171 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 2172 if (IS_ERR(*sfa)) 2173 return PTR_ERR(*sfa); 2174 2175 err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, 2176 key->eth.tci, log); 2177 if (err) 2178 kfree(*sfa); 2179 2180 return err; 2181 } 2182 2183 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 2184 { 2185 const struct nlattr *a; 2186 struct nlattr *start; 2187 int err = 0, rem; 2188 2189 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 2190 if (!start) 2191 return -EMSGSIZE; 2192 2193 nla_for_each_nested(a, attr, rem) { 2194 int type = nla_type(a); 2195 struct nlattr *st_sample; 2196 2197 switch (type) { 2198 case OVS_SAMPLE_ATTR_PROBABILITY: 2199 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 2200 sizeof(u32), nla_data(a))) 2201 return -EMSGSIZE; 2202 break; 2203 case OVS_SAMPLE_ATTR_ACTIONS: 2204 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 2205 if (!st_sample) 2206 return -EMSGSIZE; 2207 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 2208 if (err) 2209 return err; 2210 nla_nest_end(skb, st_sample); 2211 break; 2212 } 2213 } 2214 2215 nla_nest_end(skb, start); 2216 return err; 2217 } 2218 2219 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 2220 { 2221 const struct nlattr *ovs_key = nla_data(a); 2222 int key_type = nla_type(ovs_key); 2223 struct nlattr *start; 2224 int err; 2225 2226 switch (key_type) { 2227 case OVS_KEY_ATTR_TUNNEL_INFO: { 2228 struct ovs_tunnel_info *tun_info = nla_data(ovs_key); 2229 2230 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2231 if (!start) 2232 return -EMSGSIZE; 2233 2234 err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, 2235 tun_info->options_len ? 2236 tun_info->options : NULL, 2237 tun_info->options_len); 2238 if (err) 2239 return err; 2240 nla_nest_end(skb, start); 2241 break; 2242 } 2243 default: 2244 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 2245 return -EMSGSIZE; 2246 break; 2247 } 2248 2249 return 0; 2250 } 2251 2252 static int masked_set_action_to_set_action_attr(const struct nlattr *a, 2253 struct sk_buff *skb) 2254 { 2255 const struct nlattr *ovs_key = nla_data(a); 2256 struct nlattr *nla; 2257 size_t key_len = nla_len(ovs_key) / 2; 2258 2259 /* Revert the conversion we did from a non-masked set action to 2260 * masked set action. 2261 */ 2262 nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2263 if (!nla) 2264 return -EMSGSIZE; 2265 2266 if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) 2267 return -EMSGSIZE; 2268 2269 nla_nest_end(skb, nla); 2270 return 0; 2271 } 2272 2273 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2274 { 2275 const struct nlattr *a; 2276 int rem, err; 2277 2278 nla_for_each_attr(a, attr, len, rem) { 2279 int type = nla_type(a); 2280 2281 switch (type) { 2282 case OVS_ACTION_ATTR_SET: 2283 err = set_action_to_attr(a, skb); 2284 if (err) 2285 return err; 2286 break; 2287 2288 case OVS_ACTION_ATTR_SET_TO_MASKED: 2289 err = masked_set_action_to_set_action_attr(a, skb); 2290 if (err) 2291 return err; 2292 break; 2293 2294 case OVS_ACTION_ATTR_SAMPLE: 2295 err = sample_action_to_attr(a, skb); 2296 if (err) 2297 return err; 2298 break; 2299 default: 2300 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2301 return -EMSGSIZE; 2302 break; 2303 } 2304 } 2305 2306 return 0; 2307 } 2308