1 /* 2 * Copyright (c) 2007-2017 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include "flow.h" 22 #include "datapath.h" 23 #include <linux/uaccess.h> 24 #include <linux/netdevice.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <net/llc_pdu.h> 29 #include <linux/kernel.h> 30 #include <linux/jhash.h> 31 #include <linux/jiffies.h> 32 #include <linux/llc.h> 33 #include <linux/module.h> 34 #include <linux/in.h> 35 #include <linux/rcupdate.h> 36 #include <linux/if_arp.h> 37 #include <linux/ip.h> 38 #include <linux/ipv6.h> 39 #include <linux/sctp.h> 40 #include <linux/tcp.h> 41 #include <linux/udp.h> 42 #include <linux/icmp.h> 43 #include <linux/icmpv6.h> 44 #include <linux/rculist.h> 45 #include <net/geneve.h> 46 #include <net/ip.h> 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/mpls.h> 50 #include <net/vxlan.h> 51 52 #include "flow_netlink.h" 53 54 struct ovs_len_tbl { 55 int len; 56 const struct ovs_len_tbl *next; 57 }; 58 59 #define OVS_ATTR_NESTED -1 60 #define OVS_ATTR_VARIABLE -2 61 62 static bool actions_may_change_flow(const struct nlattr *actions) 63 { 64 struct nlattr *nla; 65 int rem; 66 67 nla_for_each_nested(nla, actions, rem) { 68 u16 action = nla_type(nla); 69 70 switch (action) { 71 case OVS_ACTION_ATTR_OUTPUT: 72 case OVS_ACTION_ATTR_RECIRC: 73 case OVS_ACTION_ATTR_TRUNC: 74 case OVS_ACTION_ATTR_USERSPACE: 75 break; 76 77 case OVS_ACTION_ATTR_CT: 78 case OVS_ACTION_ATTR_HASH: 79 case OVS_ACTION_ATTR_POP_ETH: 80 case OVS_ACTION_ATTR_POP_MPLS: 81 case OVS_ACTION_ATTR_POP_VLAN: 82 case OVS_ACTION_ATTR_PUSH_ETH: 83 case OVS_ACTION_ATTR_PUSH_MPLS: 84 case OVS_ACTION_ATTR_PUSH_VLAN: 85 case OVS_ACTION_ATTR_SAMPLE: 86 case OVS_ACTION_ATTR_SET: 87 case OVS_ACTION_ATTR_SET_MASKED: 88 default: 89 return true; 90 } 91 } 92 return false; 93 } 94 95 static void update_range(struct sw_flow_match *match, 96 size_t offset, size_t size, bool is_mask) 97 { 98 struct sw_flow_key_range *range; 99 size_t start = rounddown(offset, sizeof(long)); 100 size_t end = roundup(offset + size, sizeof(long)); 101 102 if (!is_mask) 103 range = &match->range; 104 else 105 range = &match->mask->range; 106 107 if (range->start == range->end) { 108 range->start = start; 109 range->end = end; 110 return; 111 } 112 113 if (range->start > start) 114 range->start = start; 115 116 if (range->end < end) 117 range->end = end; 118 } 119 120 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 121 do { \ 122 update_range(match, offsetof(struct sw_flow_key, field), \ 123 sizeof((match)->key->field), is_mask); \ 124 if (is_mask) \ 125 (match)->mask->key.field = value; \ 126 else \ 127 (match)->key->field = value; \ 128 } while (0) 129 130 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 131 do { \ 132 update_range(match, offset, len, is_mask); \ 133 if (is_mask) \ 134 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 135 len); \ 136 else \ 137 memcpy((u8 *)(match)->key + offset, value_p, len); \ 138 } while (0) 139 140 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 141 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 142 value_p, len, is_mask) 143 144 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 145 do { \ 146 update_range(match, offsetof(struct sw_flow_key, field), \ 147 sizeof((match)->key->field), is_mask); \ 148 if (is_mask) \ 149 memset((u8 *)&(match)->mask->key.field, value, \ 150 sizeof((match)->mask->key.field)); \ 151 else \ 152 memset((u8 *)&(match)->key->field, value, \ 153 sizeof((match)->key->field)); \ 154 } while (0) 155 156 static bool match_validate(const struct sw_flow_match *match, 157 u64 key_attrs, u64 mask_attrs, bool log) 158 { 159 u64 key_expected = 0; 160 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 161 162 /* The following mask attributes allowed only if they 163 * pass the validation tests. */ 164 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 165 | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) 166 | (1 << OVS_KEY_ATTR_IPV6) 167 | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) 168 | (1 << OVS_KEY_ATTR_TCP) 169 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 170 | (1 << OVS_KEY_ATTR_UDP) 171 | (1 << OVS_KEY_ATTR_SCTP) 172 | (1 << OVS_KEY_ATTR_ICMP) 173 | (1 << OVS_KEY_ATTR_ICMPV6) 174 | (1 << OVS_KEY_ATTR_ARP) 175 | (1 << OVS_KEY_ATTR_ND) 176 | (1 << OVS_KEY_ATTR_MPLS)); 177 178 /* Always allowed mask fields. */ 179 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 180 | (1 << OVS_KEY_ATTR_IN_PORT) 181 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 182 183 /* Check key attributes. */ 184 if (match->key->eth.type == htons(ETH_P_ARP) 185 || match->key->eth.type == htons(ETH_P_RARP)) { 186 key_expected |= 1 << OVS_KEY_ATTR_ARP; 187 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 188 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 189 } 190 191 if (eth_p_mpls(match->key->eth.type)) { 192 key_expected |= 1 << OVS_KEY_ATTR_MPLS; 193 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 194 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; 195 } 196 197 if (match->key->eth.type == htons(ETH_P_IP)) { 198 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 199 if (match->mask && match->mask->key.eth.type == htons(0xffff)) { 200 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 201 mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4; 202 } 203 204 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 205 if (match->key->ip.proto == IPPROTO_UDP) { 206 key_expected |= 1 << OVS_KEY_ATTR_UDP; 207 if (match->mask && (match->mask->key.ip.proto == 0xff)) 208 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 209 } 210 211 if (match->key->ip.proto == IPPROTO_SCTP) { 212 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 213 if (match->mask && (match->mask->key.ip.proto == 0xff)) 214 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 215 } 216 217 if (match->key->ip.proto == IPPROTO_TCP) { 218 key_expected |= 1 << OVS_KEY_ATTR_TCP; 219 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 220 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 221 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 222 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 223 } 224 } 225 226 if (match->key->ip.proto == IPPROTO_ICMP) { 227 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 228 if (match->mask && (match->mask->key.ip.proto == 0xff)) 229 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 230 } 231 } 232 } 233 234 if (match->key->eth.type == htons(ETH_P_IPV6)) { 235 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 236 if (match->mask && match->mask->key.eth.type == htons(0xffff)) { 237 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 238 mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6; 239 } 240 241 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 242 if (match->key->ip.proto == IPPROTO_UDP) { 243 key_expected |= 1 << OVS_KEY_ATTR_UDP; 244 if (match->mask && (match->mask->key.ip.proto == 0xff)) 245 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 246 } 247 248 if (match->key->ip.proto == IPPROTO_SCTP) { 249 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 250 if (match->mask && (match->mask->key.ip.proto == 0xff)) 251 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 252 } 253 254 if (match->key->ip.proto == IPPROTO_TCP) { 255 key_expected |= 1 << OVS_KEY_ATTR_TCP; 256 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 257 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 258 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 259 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 260 } 261 } 262 263 if (match->key->ip.proto == IPPROTO_ICMPV6) { 264 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 265 if (match->mask && (match->mask->key.ip.proto == 0xff)) 266 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 267 268 if (match->key->tp.src == 269 htons(NDISC_NEIGHBOUR_SOLICITATION) || 270 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 271 key_expected |= 1 << OVS_KEY_ATTR_ND; 272 /* Original direction conntrack tuple 273 * uses the same space as the ND fields 274 * in the key, so both are not allowed 275 * at the same time. 276 */ 277 mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); 278 if (match->mask && (match->mask->key.tp.src == htons(0xff))) 279 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 280 } 281 } 282 } 283 } 284 285 if ((key_attrs & key_expected) != key_expected) { 286 /* Key attributes check failed. */ 287 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 288 (unsigned long long)key_attrs, 289 (unsigned long long)key_expected); 290 return false; 291 } 292 293 if ((mask_attrs & mask_allowed) != mask_attrs) { 294 /* Mask attributes check failed. */ 295 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", 296 (unsigned long long)mask_attrs, 297 (unsigned long long)mask_allowed); 298 return false; 299 } 300 301 return true; 302 } 303 304 size_t ovs_tun_key_attr_size(void) 305 { 306 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider 307 * updating this function. 308 */ 309 return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 310 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ 311 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ 312 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 313 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 314 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 315 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 316 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 317 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 318 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 319 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 320 */ 321 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 322 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 323 } 324 325 size_t ovs_key_attr_size(void) 326 { 327 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 328 * updating this function. 329 */ 330 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); 331 332 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 333 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 334 + ovs_tun_key_attr_size() 335 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 336 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 337 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 338 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 339 + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ 340 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ 341 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 342 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ 343 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ 344 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 345 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 346 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 347 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 348 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 349 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 350 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 351 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 352 } 353 354 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { 355 [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) }, 356 }; 357 358 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 359 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 360 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 361 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, 362 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, 363 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, 364 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, 365 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, 366 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 367 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 368 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 369 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, 370 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, 371 .next = ovs_vxlan_ext_key_lens }, 372 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 373 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 374 }; 375 376 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 377 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 378 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, 379 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, 380 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, 381 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, 382 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, 383 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, 384 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, 385 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, 386 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, 387 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, 388 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, 389 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, 390 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, 391 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, 392 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, 393 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, 394 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, 395 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, 396 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 397 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 398 .next = ovs_tunnel_key_lens, }, 399 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 400 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, 401 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, 402 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, 403 [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, 404 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { 405 .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, 406 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { 407 .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, 408 }; 409 410 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) 411 { 412 return expected_len == attr_len || 413 expected_len == OVS_ATTR_NESTED || 414 expected_len == OVS_ATTR_VARIABLE; 415 } 416 417 static bool is_all_zero(const u8 *fp, size_t size) 418 { 419 int i; 420 421 if (!fp) 422 return false; 423 424 for (i = 0; i < size; i++) 425 if (fp[i]) 426 return false; 427 428 return true; 429 } 430 431 static int __parse_flow_nlattrs(const struct nlattr *attr, 432 const struct nlattr *a[], 433 u64 *attrsp, bool log, bool nz) 434 { 435 const struct nlattr *nla; 436 u64 attrs; 437 int rem; 438 439 attrs = *attrsp; 440 nla_for_each_nested(nla, attr, rem) { 441 u16 type = nla_type(nla); 442 int expected_len; 443 444 if (type > OVS_KEY_ATTR_MAX) { 445 OVS_NLERR(log, "Key type %d is out of range max %d", 446 type, OVS_KEY_ATTR_MAX); 447 return -EINVAL; 448 } 449 450 if (attrs & (1 << type)) { 451 OVS_NLERR(log, "Duplicate key (type %d).", type); 452 return -EINVAL; 453 } 454 455 expected_len = ovs_key_lens[type].len; 456 if (!check_attr_len(nla_len(nla), expected_len)) { 457 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 458 type, nla_len(nla), expected_len); 459 return -EINVAL; 460 } 461 462 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 463 attrs |= 1 << type; 464 a[type] = nla; 465 } 466 } 467 if (rem) { 468 OVS_NLERR(log, "Message has %d unknown bytes.", rem); 469 return -EINVAL; 470 } 471 472 *attrsp = attrs; 473 return 0; 474 } 475 476 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 477 const struct nlattr *a[], u64 *attrsp, 478 bool log) 479 { 480 return __parse_flow_nlattrs(attr, a, attrsp, log, true); 481 } 482 483 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], 484 u64 *attrsp, bool log) 485 { 486 return __parse_flow_nlattrs(attr, a, attrsp, log, false); 487 } 488 489 static int genev_tun_opt_from_nlattr(const struct nlattr *a, 490 struct sw_flow_match *match, bool is_mask, 491 bool log) 492 { 493 unsigned long opt_key_offset; 494 495 if (nla_len(a) > sizeof(match->key->tun_opts)) { 496 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", 497 nla_len(a), sizeof(match->key->tun_opts)); 498 return -EINVAL; 499 } 500 501 if (nla_len(a) % 4 != 0) { 502 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", 503 nla_len(a)); 504 return -EINVAL; 505 } 506 507 /* We need to record the length of the options passed 508 * down, otherwise packets with the same format but 509 * additional options will be silently matched. 510 */ 511 if (!is_mask) { 512 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), 513 false); 514 } else { 515 /* This is somewhat unusual because it looks at 516 * both the key and mask while parsing the 517 * attributes (and by extension assumes the key 518 * is parsed first). Normally, we would verify 519 * that each is the correct length and that the 520 * attributes line up in the validate function. 521 * However, that is difficult because this is 522 * variable length and we won't have the 523 * information later. 524 */ 525 if (match->key->tun_opts_len != nla_len(a)) { 526 OVS_NLERR(log, "Geneve option len %d != mask len %d", 527 match->key->tun_opts_len, nla_len(a)); 528 return -EINVAL; 529 } 530 531 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 532 } 533 534 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); 535 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 536 nla_len(a), is_mask); 537 return 0; 538 } 539 540 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, 541 struct sw_flow_match *match, bool is_mask, 542 bool log) 543 { 544 struct nlattr *a; 545 int rem; 546 unsigned long opt_key_offset; 547 struct vxlan_metadata opts; 548 549 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 550 551 memset(&opts, 0, sizeof(opts)); 552 nla_for_each_nested(a, attr, rem) { 553 int type = nla_type(a); 554 555 if (type > OVS_VXLAN_EXT_MAX) { 556 OVS_NLERR(log, "VXLAN extension %d out of range max %d", 557 type, OVS_VXLAN_EXT_MAX); 558 return -EINVAL; 559 } 560 561 if (!check_attr_len(nla_len(a), 562 ovs_vxlan_ext_key_lens[type].len)) { 563 OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d", 564 type, nla_len(a), 565 ovs_vxlan_ext_key_lens[type].len); 566 return -EINVAL; 567 } 568 569 switch (type) { 570 case OVS_VXLAN_EXT_GBP: 571 opts.gbp = nla_get_u32(a); 572 break; 573 default: 574 OVS_NLERR(log, "Unknown VXLAN extension attribute %d", 575 type); 576 return -EINVAL; 577 } 578 } 579 if (rem) { 580 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.", 581 rem); 582 return -EINVAL; 583 } 584 585 if (!is_mask) 586 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 587 else 588 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 589 590 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); 591 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), 592 is_mask); 593 return 0; 594 } 595 596 static int ip_tun_from_nlattr(const struct nlattr *attr, 597 struct sw_flow_match *match, bool is_mask, 598 bool log) 599 { 600 bool ttl = false, ipv4 = false, ipv6 = false; 601 __be16 tun_flags = 0; 602 int opts_type = 0; 603 struct nlattr *a; 604 int rem; 605 606 nla_for_each_nested(a, attr, rem) { 607 int type = nla_type(a); 608 int err; 609 610 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 611 OVS_NLERR(log, "Tunnel attr %d out of range max %d", 612 type, OVS_TUNNEL_KEY_ATTR_MAX); 613 return -EINVAL; 614 } 615 616 if (!check_attr_len(nla_len(a), 617 ovs_tunnel_key_lens[type].len)) { 618 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 619 type, nla_len(a), ovs_tunnel_key_lens[type].len); 620 return -EINVAL; 621 } 622 623 switch (type) { 624 case OVS_TUNNEL_KEY_ATTR_ID: 625 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 626 nla_get_be64(a), is_mask); 627 tun_flags |= TUNNEL_KEY; 628 break; 629 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 630 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, 631 nla_get_in_addr(a), is_mask); 632 ipv4 = true; 633 break; 634 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 635 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, 636 nla_get_in_addr(a), is_mask); 637 ipv4 = true; 638 break; 639 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: 640 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, 641 nla_get_in6_addr(a), is_mask); 642 ipv6 = true; 643 break; 644 case OVS_TUNNEL_KEY_ATTR_IPV6_DST: 645 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, 646 nla_get_in6_addr(a), is_mask); 647 ipv6 = true; 648 break; 649 case OVS_TUNNEL_KEY_ATTR_TOS: 650 SW_FLOW_KEY_PUT(match, tun_key.tos, 651 nla_get_u8(a), is_mask); 652 break; 653 case OVS_TUNNEL_KEY_ATTR_TTL: 654 SW_FLOW_KEY_PUT(match, tun_key.ttl, 655 nla_get_u8(a), is_mask); 656 ttl = true; 657 break; 658 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 659 tun_flags |= TUNNEL_DONT_FRAGMENT; 660 break; 661 case OVS_TUNNEL_KEY_ATTR_CSUM: 662 tun_flags |= TUNNEL_CSUM; 663 break; 664 case OVS_TUNNEL_KEY_ATTR_TP_SRC: 665 SW_FLOW_KEY_PUT(match, tun_key.tp_src, 666 nla_get_be16(a), is_mask); 667 break; 668 case OVS_TUNNEL_KEY_ATTR_TP_DST: 669 SW_FLOW_KEY_PUT(match, tun_key.tp_dst, 670 nla_get_be16(a), is_mask); 671 break; 672 case OVS_TUNNEL_KEY_ATTR_OAM: 673 tun_flags |= TUNNEL_OAM; 674 break; 675 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 676 if (opts_type) { 677 OVS_NLERR(log, "Multiple metadata blocks provided"); 678 return -EINVAL; 679 } 680 681 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 682 if (err) 683 return err; 684 685 tun_flags |= TUNNEL_GENEVE_OPT; 686 opts_type = type; 687 break; 688 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 689 if (opts_type) { 690 OVS_NLERR(log, "Multiple metadata blocks provided"); 691 return -EINVAL; 692 } 693 694 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); 695 if (err) 696 return err; 697 698 tun_flags |= TUNNEL_VXLAN_OPT; 699 opts_type = type; 700 break; 701 case OVS_TUNNEL_KEY_ATTR_PAD: 702 break; 703 default: 704 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 705 type); 706 return -EINVAL; 707 } 708 } 709 710 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 711 if (is_mask) 712 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); 713 else 714 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET, 715 false); 716 717 if (rem > 0) { 718 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.", 719 rem); 720 return -EINVAL; 721 } 722 723 if (ipv4 && ipv6) { 724 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes"); 725 return -EINVAL; 726 } 727 728 if (!is_mask) { 729 if (!ipv4 && !ipv6) { 730 OVS_NLERR(log, "IP tunnel dst address not specified"); 731 return -EINVAL; 732 } 733 if (ipv4 && !match->key->tun_key.u.ipv4.dst) { 734 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 735 return -EINVAL; 736 } 737 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { 738 OVS_NLERR(log, "IPv6 tunnel dst address is zero"); 739 return -EINVAL; 740 } 741 742 if (!ttl) { 743 OVS_NLERR(log, "IP tunnel TTL not specified."); 744 return -EINVAL; 745 } 746 } 747 748 return opts_type; 749 } 750 751 static int vxlan_opt_to_nlattr(struct sk_buff *skb, 752 const void *tun_opts, int swkey_tun_opts_len) 753 { 754 const struct vxlan_metadata *opts = tun_opts; 755 struct nlattr *nla; 756 757 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); 758 if (!nla) 759 return -EMSGSIZE; 760 761 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) 762 return -EMSGSIZE; 763 764 nla_nest_end(skb, nla); 765 return 0; 766 } 767 768 static int __ip_tun_to_nlattr(struct sk_buff *skb, 769 const struct ip_tunnel_key *output, 770 const void *tun_opts, int swkey_tun_opts_len, 771 unsigned short tun_proto) 772 { 773 if (output->tun_flags & TUNNEL_KEY && 774 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, 775 OVS_TUNNEL_KEY_ATTR_PAD)) 776 return -EMSGSIZE; 777 switch (tun_proto) { 778 case AF_INET: 779 if (output->u.ipv4.src && 780 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, 781 output->u.ipv4.src)) 782 return -EMSGSIZE; 783 if (output->u.ipv4.dst && 784 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, 785 output->u.ipv4.dst)) 786 return -EMSGSIZE; 787 break; 788 case AF_INET6: 789 if (!ipv6_addr_any(&output->u.ipv6.src) && 790 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, 791 &output->u.ipv6.src)) 792 return -EMSGSIZE; 793 if (!ipv6_addr_any(&output->u.ipv6.dst) && 794 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST, 795 &output->u.ipv6.dst)) 796 return -EMSGSIZE; 797 break; 798 } 799 if (output->tos && 800 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) 801 return -EMSGSIZE; 802 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) 803 return -EMSGSIZE; 804 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 805 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 806 return -EMSGSIZE; 807 if ((output->tun_flags & TUNNEL_CSUM) && 808 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 809 return -EMSGSIZE; 810 if (output->tp_src && 811 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) 812 return -EMSGSIZE; 813 if (output->tp_dst && 814 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) 815 return -EMSGSIZE; 816 if ((output->tun_flags & TUNNEL_OAM) && 817 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 818 return -EMSGSIZE; 819 if (swkey_tun_opts_len) { 820 if (output->tun_flags & TUNNEL_GENEVE_OPT && 821 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 822 swkey_tun_opts_len, tun_opts)) 823 return -EMSGSIZE; 824 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 825 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 826 return -EMSGSIZE; 827 } 828 829 return 0; 830 } 831 832 static int ip_tun_to_nlattr(struct sk_buff *skb, 833 const struct ip_tunnel_key *output, 834 const void *tun_opts, int swkey_tun_opts_len, 835 unsigned short tun_proto) 836 { 837 struct nlattr *nla; 838 int err; 839 840 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 841 if (!nla) 842 return -EMSGSIZE; 843 844 err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, 845 tun_proto); 846 if (err) 847 return err; 848 849 nla_nest_end(skb, nla); 850 return 0; 851 } 852 853 int ovs_nla_put_tunnel_info(struct sk_buff *skb, 854 struct ip_tunnel_info *tun_info) 855 { 856 return __ip_tun_to_nlattr(skb, &tun_info->key, 857 ip_tunnel_info_opts(tun_info), 858 tun_info->options_len, 859 ip_tunnel_info_af(tun_info)); 860 } 861 862 static int encode_vlan_from_nlattrs(struct sw_flow_match *match, 863 const struct nlattr *a[], 864 bool is_mask, bool inner) 865 { 866 __be16 tci = 0; 867 __be16 tpid = 0; 868 869 if (a[OVS_KEY_ATTR_VLAN]) 870 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 871 872 if (a[OVS_KEY_ATTR_ETHERTYPE]) 873 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 874 875 if (likely(!inner)) { 876 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); 877 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); 878 } else { 879 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); 880 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); 881 } 882 return 0; 883 } 884 885 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, 886 u64 key_attrs, bool inner, 887 const struct nlattr **a, bool log) 888 { 889 __be16 tci = 0; 890 891 if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 892 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 893 eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { 894 /* Not a VLAN. */ 895 return 0; 896 } 897 898 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 899 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 900 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); 901 return -EINVAL; 902 } 903 904 if (a[OVS_KEY_ATTR_VLAN]) 905 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 906 907 if (!(tci & htons(VLAN_TAG_PRESENT))) { 908 if (tci) { 909 OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", 910 (inner) ? "C-VLAN" : "VLAN"); 911 return -EINVAL; 912 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { 913 /* Corner case for truncated VLAN header. */ 914 OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", 915 (inner) ? "C-VLAN" : "VLAN"); 916 return -EINVAL; 917 } 918 } 919 920 return 1; 921 } 922 923 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, 924 u64 key_attrs, bool inner, 925 const struct nlattr **a, bool log) 926 { 927 __be16 tci = 0; 928 __be16 tpid = 0; 929 bool encap_valid = !!(match->key->eth.vlan.tci & 930 htons(VLAN_TAG_PRESENT)); 931 bool i_encap_valid = !!(match->key->eth.cvlan.tci & 932 htons(VLAN_TAG_PRESENT)); 933 934 if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { 935 /* Not a VLAN. */ 936 return 0; 937 } 938 939 if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { 940 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", 941 (inner) ? "C-VLAN" : "VLAN"); 942 return -EINVAL; 943 } 944 945 if (a[OVS_KEY_ATTR_VLAN]) 946 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 947 948 if (a[OVS_KEY_ATTR_ETHERTYPE]) 949 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 950 951 if (tpid != htons(0xffff)) { 952 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", 953 (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); 954 return -EINVAL; 955 } 956 if (!(tci & htons(VLAN_TAG_PRESENT))) { 957 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", 958 (inner) ? "C-VLAN" : "VLAN"); 959 return -EINVAL; 960 } 961 962 return 1; 963 } 964 965 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, 966 u64 *key_attrs, bool inner, 967 const struct nlattr **a, bool is_mask, 968 bool log) 969 { 970 int err; 971 const struct nlattr *encap; 972 973 if (!is_mask) 974 err = validate_vlan_from_nlattrs(match, *key_attrs, inner, 975 a, log); 976 else 977 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, 978 a, log); 979 if (err <= 0) 980 return err; 981 982 err = encode_vlan_from_nlattrs(match, a, is_mask, inner); 983 if (err) 984 return err; 985 986 *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 987 *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 988 *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 989 990 encap = a[OVS_KEY_ATTR_ENCAP]; 991 992 if (!is_mask) 993 err = parse_flow_nlattrs(encap, a, key_attrs, log); 994 else 995 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); 996 997 return err; 998 } 999 1000 static int parse_vlan_from_nlattrs(struct sw_flow_match *match, 1001 u64 *key_attrs, const struct nlattr **a, 1002 bool is_mask, bool log) 1003 { 1004 int err; 1005 bool encap_valid = false; 1006 1007 err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, 1008 is_mask, log); 1009 if (err) 1010 return err; 1011 1012 encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); 1013 if (encap_valid) { 1014 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, 1015 is_mask, log); 1016 if (err) 1017 return err; 1018 } 1019 1020 return 0; 1021 } 1022 1023 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, 1024 u64 *attrs, const struct nlattr **a, 1025 bool is_mask, bool log) 1026 { 1027 __be16 eth_type; 1028 1029 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1030 if (is_mask) { 1031 /* Always exact match EtherType. */ 1032 eth_type = htons(0xffff); 1033 } else if (!eth_proto_is_802_3(eth_type)) { 1034 OVS_NLERR(log, "EtherType %x is less than min %x", 1035 ntohs(eth_type), ETH_P_802_3_MIN); 1036 return -EINVAL; 1037 } 1038 1039 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 1040 *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1041 return 0; 1042 } 1043 1044 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 1045 u64 *attrs, const struct nlattr **a, 1046 bool is_mask, bool log) 1047 { 1048 u8 mac_proto = MAC_PROTO_ETHERNET; 1049 1050 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 1051 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 1052 1053 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); 1054 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); 1055 } 1056 1057 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { 1058 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); 1059 1060 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); 1061 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); 1062 } 1063 1064 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1065 SW_FLOW_KEY_PUT(match, phy.priority, 1066 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 1067 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1068 } 1069 1070 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1071 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1072 1073 if (is_mask) { 1074 in_port = 0xffffffff; /* Always exact match in_port. */ 1075 } else if (in_port >= DP_MAX_PORTS) { 1076 OVS_NLERR(log, "Port %d exceeds max allowable %d", 1077 in_port, DP_MAX_PORTS); 1078 return -EINVAL; 1079 } 1080 1081 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 1082 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1083 } else if (!is_mask) { 1084 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 1085 } 1086 1087 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1088 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1089 1090 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 1091 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1092 } 1093 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1094 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 1095 is_mask, log) < 0) 1096 return -EINVAL; 1097 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1098 } 1099 1100 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && 1101 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { 1102 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); 1103 1104 if (ct_state & ~CT_SUPPORTED_MASK) { 1105 OVS_NLERR(log, "ct_state flags %08x unsupported", 1106 ct_state); 1107 return -EINVAL; 1108 } 1109 1110 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask); 1111 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); 1112 } 1113 if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && 1114 ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { 1115 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); 1116 1117 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask); 1118 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); 1119 } 1120 if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && 1121 ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) { 1122 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); 1123 1124 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); 1125 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); 1126 } 1127 if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && 1128 ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { 1129 const struct ovs_key_ct_labels *cl; 1130 1131 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); 1132 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, 1133 sizeof(*cl), is_mask); 1134 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); 1135 } 1136 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) { 1137 const struct ovs_key_ct_tuple_ipv4 *ct; 1138 1139 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]); 1140 1141 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask); 1142 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); 1143 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); 1144 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); 1145 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask); 1146 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); 1147 } 1148 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { 1149 const struct ovs_key_ct_tuple_ipv6 *ct; 1150 1151 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]); 1152 1153 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src, 1154 sizeof(match->key->ipv6.ct_orig.src), 1155 is_mask); 1156 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst, 1157 sizeof(match->key->ipv6.ct_orig.dst), 1158 is_mask); 1159 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); 1160 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); 1161 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask); 1162 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); 1163 } 1164 1165 /* For layer 3 packets the Ethernet type is provided 1166 * and treated as metadata but no MAC addresses are provided. 1167 */ 1168 if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && 1169 (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) 1170 mac_proto = MAC_PROTO_NONE; 1171 1172 /* Always exact match mac_proto */ 1173 SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); 1174 1175 if (mac_proto == MAC_PROTO_NONE) 1176 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, 1177 log); 1178 1179 return 0; 1180 } 1181 1182 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, 1183 u64 attrs, const struct nlattr **a, 1184 bool is_mask, bool log) 1185 { 1186 int err; 1187 1188 err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log); 1189 if (err) 1190 return err; 1191 1192 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 1193 const struct ovs_key_ethernet *eth_key; 1194 1195 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1196 SW_FLOW_KEY_MEMCPY(match, eth.src, 1197 eth_key->eth_src, ETH_ALEN, is_mask); 1198 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1199 eth_key->eth_dst, ETH_ALEN, is_mask); 1200 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1201 1202 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1203 /* VLAN attribute is always parsed before getting here since it 1204 * may occur multiple times. 1205 */ 1206 OVS_NLERR(log, "VLAN attribute unexpected."); 1207 return -EINVAL; 1208 } 1209 1210 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1211 err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, 1212 log); 1213 if (err) 1214 return err; 1215 } else if (!is_mask) { 1216 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1217 } 1218 } else if (!match->key->eth.type) { 1219 OVS_NLERR(log, "Either Ethernet header or EtherType is required."); 1220 return -EINVAL; 1221 } 1222 1223 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1224 const struct ovs_key_ipv4 *ipv4_key; 1225 1226 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1227 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 1228 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", 1229 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 1230 return -EINVAL; 1231 } 1232 SW_FLOW_KEY_PUT(match, ip.proto, 1233 ipv4_key->ipv4_proto, is_mask); 1234 SW_FLOW_KEY_PUT(match, ip.tos, 1235 ipv4_key->ipv4_tos, is_mask); 1236 SW_FLOW_KEY_PUT(match, ip.ttl, 1237 ipv4_key->ipv4_ttl, is_mask); 1238 SW_FLOW_KEY_PUT(match, ip.frag, 1239 ipv4_key->ipv4_frag, is_mask); 1240 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1241 ipv4_key->ipv4_src, is_mask); 1242 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1243 ipv4_key->ipv4_dst, is_mask); 1244 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 1245 } 1246 1247 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 1248 const struct ovs_key_ipv6 *ipv6_key; 1249 1250 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1251 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 1252 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", 1253 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 1254 return -EINVAL; 1255 } 1256 1257 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { 1258 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", 1259 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); 1260 return -EINVAL; 1261 } 1262 1263 SW_FLOW_KEY_PUT(match, ipv6.label, 1264 ipv6_key->ipv6_label, is_mask); 1265 SW_FLOW_KEY_PUT(match, ip.proto, 1266 ipv6_key->ipv6_proto, is_mask); 1267 SW_FLOW_KEY_PUT(match, ip.tos, 1268 ipv6_key->ipv6_tclass, is_mask); 1269 SW_FLOW_KEY_PUT(match, ip.ttl, 1270 ipv6_key->ipv6_hlimit, is_mask); 1271 SW_FLOW_KEY_PUT(match, ip.frag, 1272 ipv6_key->ipv6_frag, is_mask); 1273 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 1274 ipv6_key->ipv6_src, 1275 sizeof(match->key->ipv6.addr.src), 1276 is_mask); 1277 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 1278 ipv6_key->ipv6_dst, 1279 sizeof(match->key->ipv6.addr.dst), 1280 is_mask); 1281 1282 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1283 } 1284 1285 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 1286 const struct ovs_key_arp *arp_key; 1287 1288 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1289 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 1290 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", 1291 arp_key->arp_op); 1292 return -EINVAL; 1293 } 1294 1295 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1296 arp_key->arp_sip, is_mask); 1297 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1298 arp_key->arp_tip, is_mask); 1299 SW_FLOW_KEY_PUT(match, ip.proto, 1300 ntohs(arp_key->arp_op), is_mask); 1301 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 1302 arp_key->arp_sha, ETH_ALEN, is_mask); 1303 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 1304 arp_key->arp_tha, ETH_ALEN, is_mask); 1305 1306 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1307 } 1308 1309 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 1310 const struct ovs_key_mpls *mpls_key; 1311 1312 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 1313 SW_FLOW_KEY_PUT(match, mpls.top_lse, 1314 mpls_key->mpls_lse, is_mask); 1315 1316 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 1317 } 1318 1319 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 1320 const struct ovs_key_tcp *tcp_key; 1321 1322 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1323 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 1324 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 1325 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 1326 } 1327 1328 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 1329 SW_FLOW_KEY_PUT(match, tp.flags, 1330 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 1331 is_mask); 1332 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 1333 } 1334 1335 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 1336 const struct ovs_key_udp *udp_key; 1337 1338 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1339 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 1340 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 1341 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1342 } 1343 1344 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 1345 const struct ovs_key_sctp *sctp_key; 1346 1347 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 1348 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 1349 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 1350 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 1351 } 1352 1353 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 1354 const struct ovs_key_icmp *icmp_key; 1355 1356 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1357 SW_FLOW_KEY_PUT(match, tp.src, 1358 htons(icmp_key->icmp_type), is_mask); 1359 SW_FLOW_KEY_PUT(match, tp.dst, 1360 htons(icmp_key->icmp_code), is_mask); 1361 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1362 } 1363 1364 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 1365 const struct ovs_key_icmpv6 *icmpv6_key; 1366 1367 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 1368 SW_FLOW_KEY_PUT(match, tp.src, 1369 htons(icmpv6_key->icmpv6_type), is_mask); 1370 SW_FLOW_KEY_PUT(match, tp.dst, 1371 htons(icmpv6_key->icmpv6_code), is_mask); 1372 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1373 } 1374 1375 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 1376 const struct ovs_key_nd *nd_key; 1377 1378 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1379 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 1380 nd_key->nd_target, 1381 sizeof(match->key->ipv6.nd.target), 1382 is_mask); 1383 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 1384 nd_key->nd_sll, ETH_ALEN, is_mask); 1385 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 1386 nd_key->nd_tll, ETH_ALEN, is_mask); 1387 attrs &= ~(1 << OVS_KEY_ATTR_ND); 1388 } 1389 1390 if (attrs != 0) { 1391 OVS_NLERR(log, "Unknown key attributes %llx", 1392 (unsigned long long)attrs); 1393 return -EINVAL; 1394 } 1395 1396 return 0; 1397 } 1398 1399 static void nlattr_set(struct nlattr *attr, u8 val, 1400 const struct ovs_len_tbl *tbl) 1401 { 1402 struct nlattr *nla; 1403 int rem; 1404 1405 /* The nlattr stream should already have been validated */ 1406 nla_for_each_nested(nla, attr, rem) { 1407 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { 1408 if (tbl[nla_type(nla)].next) 1409 tbl = tbl[nla_type(nla)].next; 1410 nlattr_set(nla, val, tbl); 1411 } else { 1412 memset(nla_data(nla), val, nla_len(nla)); 1413 } 1414 1415 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) 1416 *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; 1417 } 1418 } 1419 1420 static void mask_set_nlattr(struct nlattr *attr, u8 val) 1421 { 1422 nlattr_set(attr, val, ovs_key_lens); 1423 } 1424 1425 /** 1426 * ovs_nla_get_match - parses Netlink attributes into a flow key and 1427 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1428 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1429 * does not include any don't care bit. 1430 * @net: Used to determine per-namespace field support. 1431 * @match: receives the extracted flow match information. 1432 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1433 * sequence. The fields should of the packet that triggered the creation 1434 * of this flow. 1435 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1436 * attribute specifies the mask field of the wildcarded flow. 1437 * @log: Boolean to allow kernel error logging. Normally true, but when 1438 * probing for feature compatibility this should be passed in as false to 1439 * suppress unnecessary error logging. 1440 */ 1441 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, 1442 const struct nlattr *nla_key, 1443 const struct nlattr *nla_mask, 1444 bool log) 1445 { 1446 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1447 struct nlattr *newmask = NULL; 1448 u64 key_attrs = 0; 1449 u64 mask_attrs = 0; 1450 int err; 1451 1452 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); 1453 if (err) 1454 return err; 1455 1456 err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); 1457 if (err) 1458 return err; 1459 1460 err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); 1461 if (err) 1462 return err; 1463 1464 if (match->mask) { 1465 if (!nla_mask) { 1466 /* Create an exact match mask. We need to set to 0xff 1467 * all the 'match->mask' fields that have been touched 1468 * in 'match->key'. We cannot simply memset 1469 * 'match->mask', because padding bytes and fields not 1470 * specified in 'match->key' should be left to 0. 1471 * Instead, we use a stream of netlink attributes, 1472 * copied from 'key' and set to 0xff. 1473 * ovs_key_from_nlattrs() will take care of filling 1474 * 'match->mask' appropriately. 1475 */ 1476 newmask = kmemdup(nla_key, 1477 nla_total_size(nla_len(nla_key)), 1478 GFP_KERNEL); 1479 if (!newmask) 1480 return -ENOMEM; 1481 1482 mask_set_nlattr(newmask, 0xff); 1483 1484 /* The userspace does not send tunnel attributes that 1485 * are 0, but we should not wildcard them nonetheless. 1486 */ 1487 if (match->key->tun_proto) 1488 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1489 0xff, true); 1490 1491 nla_mask = newmask; 1492 } 1493 1494 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); 1495 if (err) 1496 goto free_newmask; 1497 1498 /* Always match on tci. */ 1499 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); 1500 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); 1501 1502 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); 1503 if (err) 1504 goto free_newmask; 1505 1506 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, 1507 log); 1508 if (err) 1509 goto free_newmask; 1510 } 1511 1512 if (!match_validate(match, key_attrs, mask_attrs, log)) 1513 err = -EINVAL; 1514 1515 free_newmask: 1516 kfree(newmask); 1517 return err; 1518 } 1519 1520 static size_t get_ufid_len(const struct nlattr *attr, bool log) 1521 { 1522 size_t len; 1523 1524 if (!attr) 1525 return 0; 1526 1527 len = nla_len(attr); 1528 if (len < 1 || len > MAX_UFID_LENGTH) { 1529 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", 1530 nla_len(attr), MAX_UFID_LENGTH); 1531 return 0; 1532 } 1533 1534 return len; 1535 } 1536 1537 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, 1538 * or false otherwise. 1539 */ 1540 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, 1541 bool log) 1542 { 1543 sfid->ufid_len = get_ufid_len(attr, log); 1544 if (sfid->ufid_len) 1545 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); 1546 1547 return sfid->ufid_len; 1548 } 1549 1550 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 1551 const struct sw_flow_key *key, bool log) 1552 { 1553 struct sw_flow_key *new_key; 1554 1555 if (ovs_nla_get_ufid(sfid, ufid, log)) 1556 return 0; 1557 1558 /* If UFID was not provided, use unmasked key. */ 1559 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); 1560 if (!new_key) 1561 return -ENOMEM; 1562 memcpy(new_key, key, sizeof(*key)); 1563 sfid->unmasked_key = new_key; 1564 1565 return 0; 1566 } 1567 1568 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) 1569 { 1570 return attr ? nla_get_u32(attr) : 0; 1571 } 1572 1573 /** 1574 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 1575 * @net: Network namespace. 1576 * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack 1577 * metadata. 1578 * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink 1579 * attributes. 1580 * @attrs: Bit mask for the netlink attributes included in @a. 1581 * @log: Boolean to allow kernel error logging. Normally true, but when 1582 * probing for feature compatibility this should be passed in as false to 1583 * suppress unnecessary error logging. 1584 * 1585 * This parses a series of Netlink attributes that form a flow key, which must 1586 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1587 * get the metadata, that is, the parts of the flow key that cannot be 1588 * extracted from the packet itself. 1589 * 1590 * This must be called before the packet key fields are filled in 'key'. 1591 */ 1592 1593 int ovs_nla_get_flow_metadata(struct net *net, 1594 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], 1595 u64 attrs, struct sw_flow_key *key, bool log) 1596 { 1597 struct sw_flow_match match; 1598 1599 memset(&match, 0, sizeof(match)); 1600 match.key = key; 1601 1602 key->ct_state = 0; 1603 key->ct_zone = 0; 1604 key->ct_orig_proto = 0; 1605 memset(&key->ct, 0, sizeof(key->ct)); 1606 memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); 1607 memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig)); 1608 1609 key->phy.in_port = DP_MAX_PORTS; 1610 1611 return metadata_from_nlattrs(net, &match, &attrs, a, false, log); 1612 } 1613 1614 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, 1615 bool is_mask) 1616 { 1617 __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); 1618 1619 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1620 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) 1621 return -EMSGSIZE; 1622 return 0; 1623 } 1624 1625 static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1626 const struct sw_flow_key *output, bool is_mask, 1627 struct sk_buff *skb) 1628 { 1629 struct ovs_key_ethernet *eth_key; 1630 struct nlattr *nla; 1631 struct nlattr *encap = NULL; 1632 struct nlattr *in_encap = NULL; 1633 1634 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 1635 goto nla_put_failure; 1636 1637 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 1638 goto nla_put_failure; 1639 1640 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1641 goto nla_put_failure; 1642 1643 if ((swkey->tun_proto || is_mask)) { 1644 const void *opts = NULL; 1645 1646 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1647 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1648 1649 if (ip_tun_to_nlattr(skb, &output->tun_key, opts, 1650 swkey->tun_opts_len, swkey->tun_proto)) 1651 goto nla_put_failure; 1652 } 1653 1654 if (swkey->phy.in_port == DP_MAX_PORTS) { 1655 if (is_mask && (output->phy.in_port == 0xffff)) 1656 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1657 goto nla_put_failure; 1658 } else { 1659 u16 upper_u16; 1660 upper_u16 = !is_mask ? 0 : 0xffff; 1661 1662 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1663 (upper_u16 << 16) | output->phy.in_port)) 1664 goto nla_put_failure; 1665 } 1666 1667 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1668 goto nla_put_failure; 1669 1670 if (ovs_ct_put_key(swkey, output, skb)) 1671 goto nla_put_failure; 1672 1673 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { 1674 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1675 if (!nla) 1676 goto nla_put_failure; 1677 1678 eth_key = nla_data(nla); 1679 ether_addr_copy(eth_key->eth_src, output->eth.src); 1680 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 1681 1682 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { 1683 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) 1684 goto nla_put_failure; 1685 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1686 if (!swkey->eth.vlan.tci) 1687 goto unencap; 1688 1689 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { 1690 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) 1691 goto nla_put_failure; 1692 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1693 if (!swkey->eth.cvlan.tci) 1694 goto unencap; 1695 } 1696 } 1697 1698 if (swkey->eth.type == htons(ETH_P_802_2)) { 1699 /* 1700 * Ethertype 802.2 is represented in the netlink with omitted 1701 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1702 * 0xffff in the mask attribute. Ethertype can also 1703 * be wildcarded. 1704 */ 1705 if (is_mask && output->eth.type) 1706 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1707 output->eth.type)) 1708 goto nla_put_failure; 1709 goto unencap; 1710 } 1711 } 1712 1713 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1714 goto nla_put_failure; 1715 1716 if (eth_type_vlan(swkey->eth.type)) { 1717 /* There are 3 VLAN tags, we don't know anything about the rest 1718 * of the packet, so truncate here. 1719 */ 1720 WARN_ON_ONCE(!(encap && in_encap)); 1721 goto unencap; 1722 } 1723 1724 if (swkey->eth.type == htons(ETH_P_IP)) { 1725 struct ovs_key_ipv4 *ipv4_key; 1726 1727 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1728 if (!nla) 1729 goto nla_put_failure; 1730 ipv4_key = nla_data(nla); 1731 ipv4_key->ipv4_src = output->ipv4.addr.src; 1732 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1733 ipv4_key->ipv4_proto = output->ip.proto; 1734 ipv4_key->ipv4_tos = output->ip.tos; 1735 ipv4_key->ipv4_ttl = output->ip.ttl; 1736 ipv4_key->ipv4_frag = output->ip.frag; 1737 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1738 struct ovs_key_ipv6 *ipv6_key; 1739 1740 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1741 if (!nla) 1742 goto nla_put_failure; 1743 ipv6_key = nla_data(nla); 1744 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1745 sizeof(ipv6_key->ipv6_src)); 1746 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1747 sizeof(ipv6_key->ipv6_dst)); 1748 ipv6_key->ipv6_label = output->ipv6.label; 1749 ipv6_key->ipv6_proto = output->ip.proto; 1750 ipv6_key->ipv6_tclass = output->ip.tos; 1751 ipv6_key->ipv6_hlimit = output->ip.ttl; 1752 ipv6_key->ipv6_frag = output->ip.frag; 1753 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1754 swkey->eth.type == htons(ETH_P_RARP)) { 1755 struct ovs_key_arp *arp_key; 1756 1757 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1758 if (!nla) 1759 goto nla_put_failure; 1760 arp_key = nla_data(nla); 1761 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1762 arp_key->arp_sip = output->ipv4.addr.src; 1763 arp_key->arp_tip = output->ipv4.addr.dst; 1764 arp_key->arp_op = htons(output->ip.proto); 1765 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1766 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1767 } else if (eth_p_mpls(swkey->eth.type)) { 1768 struct ovs_key_mpls *mpls_key; 1769 1770 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 1771 if (!nla) 1772 goto nla_put_failure; 1773 mpls_key = nla_data(nla); 1774 mpls_key->mpls_lse = output->mpls.top_lse; 1775 } 1776 1777 if ((swkey->eth.type == htons(ETH_P_IP) || 1778 swkey->eth.type == htons(ETH_P_IPV6)) && 1779 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1780 1781 if (swkey->ip.proto == IPPROTO_TCP) { 1782 struct ovs_key_tcp *tcp_key; 1783 1784 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1785 if (!nla) 1786 goto nla_put_failure; 1787 tcp_key = nla_data(nla); 1788 tcp_key->tcp_src = output->tp.src; 1789 tcp_key->tcp_dst = output->tp.dst; 1790 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1791 output->tp.flags)) 1792 goto nla_put_failure; 1793 } else if (swkey->ip.proto == IPPROTO_UDP) { 1794 struct ovs_key_udp *udp_key; 1795 1796 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1797 if (!nla) 1798 goto nla_put_failure; 1799 udp_key = nla_data(nla); 1800 udp_key->udp_src = output->tp.src; 1801 udp_key->udp_dst = output->tp.dst; 1802 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1803 struct ovs_key_sctp *sctp_key; 1804 1805 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1806 if (!nla) 1807 goto nla_put_failure; 1808 sctp_key = nla_data(nla); 1809 sctp_key->sctp_src = output->tp.src; 1810 sctp_key->sctp_dst = output->tp.dst; 1811 } else if (swkey->eth.type == htons(ETH_P_IP) && 1812 swkey->ip.proto == IPPROTO_ICMP) { 1813 struct ovs_key_icmp *icmp_key; 1814 1815 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1816 if (!nla) 1817 goto nla_put_failure; 1818 icmp_key = nla_data(nla); 1819 icmp_key->icmp_type = ntohs(output->tp.src); 1820 icmp_key->icmp_code = ntohs(output->tp.dst); 1821 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1822 swkey->ip.proto == IPPROTO_ICMPV6) { 1823 struct ovs_key_icmpv6 *icmpv6_key; 1824 1825 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1826 sizeof(*icmpv6_key)); 1827 if (!nla) 1828 goto nla_put_failure; 1829 icmpv6_key = nla_data(nla); 1830 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1831 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1832 1833 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1834 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1835 struct ovs_key_nd *nd_key; 1836 1837 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1838 if (!nla) 1839 goto nla_put_failure; 1840 nd_key = nla_data(nla); 1841 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1842 sizeof(nd_key->nd_target)); 1843 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1844 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1845 } 1846 } 1847 } 1848 1849 unencap: 1850 if (in_encap) 1851 nla_nest_end(skb, in_encap); 1852 if (encap) 1853 nla_nest_end(skb, encap); 1854 1855 return 0; 1856 1857 nla_put_failure: 1858 return -EMSGSIZE; 1859 } 1860 1861 int ovs_nla_put_key(const struct sw_flow_key *swkey, 1862 const struct sw_flow_key *output, int attr, bool is_mask, 1863 struct sk_buff *skb) 1864 { 1865 int err; 1866 struct nlattr *nla; 1867 1868 nla = nla_nest_start(skb, attr); 1869 if (!nla) 1870 return -EMSGSIZE; 1871 err = __ovs_nla_put_key(swkey, output, is_mask, skb); 1872 if (err) 1873 return err; 1874 nla_nest_end(skb, nla); 1875 1876 return 0; 1877 } 1878 1879 /* Called with ovs_mutex or RCU read lock. */ 1880 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) 1881 { 1882 if (ovs_identifier_is_ufid(&flow->id)) 1883 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, 1884 flow->id.ufid); 1885 1886 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, 1887 OVS_FLOW_ATTR_KEY, false, skb); 1888 } 1889 1890 /* Called with ovs_mutex or RCU read lock. */ 1891 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) 1892 { 1893 return ovs_nla_put_key(&flow->key, &flow->key, 1894 OVS_FLOW_ATTR_KEY, false, skb); 1895 } 1896 1897 /* Called with ovs_mutex or RCU read lock. */ 1898 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) 1899 { 1900 return ovs_nla_put_key(&flow->key, &flow->mask->key, 1901 OVS_FLOW_ATTR_MASK, true, skb); 1902 } 1903 1904 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1905 1906 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 1907 { 1908 struct sw_flow_actions *sfa; 1909 1910 if (size > MAX_ACTIONS_BUFSIZE) { 1911 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); 1912 return ERR_PTR(-EINVAL); 1913 } 1914 1915 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1916 if (!sfa) 1917 return ERR_PTR(-ENOMEM); 1918 1919 sfa->actions_len = 0; 1920 return sfa; 1921 } 1922 1923 static void ovs_nla_free_set_action(const struct nlattr *a) 1924 { 1925 const struct nlattr *ovs_key = nla_data(a); 1926 struct ovs_tunnel_info *ovs_tun; 1927 1928 switch (nla_type(ovs_key)) { 1929 case OVS_KEY_ATTR_TUNNEL_INFO: 1930 ovs_tun = nla_data(ovs_key); 1931 dst_release((struct dst_entry *)ovs_tun->tun_dst); 1932 break; 1933 } 1934 } 1935 1936 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1937 { 1938 const struct nlattr *a; 1939 int rem; 1940 1941 if (!sf_acts) 1942 return; 1943 1944 nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { 1945 switch (nla_type(a)) { 1946 case OVS_ACTION_ATTR_SET: 1947 ovs_nla_free_set_action(a); 1948 break; 1949 case OVS_ACTION_ATTR_CT: 1950 ovs_ct_free_action(a); 1951 break; 1952 } 1953 } 1954 1955 kfree(sf_acts); 1956 } 1957 1958 static void __ovs_nla_free_flow_actions(struct rcu_head *head) 1959 { 1960 ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu)); 1961 } 1962 1963 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1964 * The caller must hold rcu_read_lock for this to be sensible. */ 1965 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts) 1966 { 1967 call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions); 1968 } 1969 1970 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1971 int attr_len, bool log) 1972 { 1973 1974 struct sw_flow_actions *acts; 1975 int new_acts_size; 1976 int req_size = NLA_ALIGN(attr_len); 1977 int next_offset = offsetof(struct sw_flow_actions, actions) + 1978 (*sfa)->actions_len; 1979 1980 if (req_size <= (ksize(*sfa) - next_offset)) 1981 goto out; 1982 1983 new_acts_size = ksize(*sfa) * 2; 1984 1985 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1986 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1987 return ERR_PTR(-EMSGSIZE); 1988 new_acts_size = MAX_ACTIONS_BUFSIZE; 1989 } 1990 1991 acts = nla_alloc_flow_actions(new_acts_size, log); 1992 if (IS_ERR(acts)) 1993 return (void *)acts; 1994 1995 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1996 acts->actions_len = (*sfa)->actions_len; 1997 acts->orig_len = (*sfa)->orig_len; 1998 kfree(*sfa); 1999 *sfa = acts; 2000 2001 out: 2002 (*sfa)->actions_len += req_size; 2003 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 2004 } 2005 2006 static struct nlattr *__add_action(struct sw_flow_actions **sfa, 2007 int attrtype, void *data, int len, bool log) 2008 { 2009 struct nlattr *a; 2010 2011 a = reserve_sfa_size(sfa, nla_attr_size(len), log); 2012 if (IS_ERR(a)) 2013 return a; 2014 2015 a->nla_type = attrtype; 2016 a->nla_len = nla_attr_size(len); 2017 2018 if (data) 2019 memcpy(nla_data(a), data, len); 2020 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 2021 2022 return a; 2023 } 2024 2025 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data, 2026 int len, bool log) 2027 { 2028 struct nlattr *a; 2029 2030 a = __add_action(sfa, attrtype, data, len, log); 2031 2032 return PTR_ERR_OR_ZERO(a); 2033 } 2034 2035 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 2036 int attrtype, bool log) 2037 { 2038 int used = (*sfa)->actions_len; 2039 int err; 2040 2041 err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log); 2042 if (err) 2043 return err; 2044 2045 return used; 2046 } 2047 2048 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 2049 int st_offset) 2050 { 2051 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 2052 st_offset); 2053 2054 a->nla_len = sfa->actions_len - st_offset; 2055 } 2056 2057 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2058 const struct sw_flow_key *key, 2059 struct sw_flow_actions **sfa, 2060 __be16 eth_type, __be16 vlan_tci, bool log); 2061 2062 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, 2063 const struct sw_flow_key *key, 2064 struct sw_flow_actions **sfa, 2065 __be16 eth_type, __be16 vlan_tci, 2066 bool log, bool last) 2067 { 2068 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 2069 const struct nlattr *probability, *actions; 2070 const struct nlattr *a; 2071 int rem, start, err; 2072 struct sample_arg arg; 2073 2074 memset(attrs, 0, sizeof(attrs)); 2075 nla_for_each_nested(a, attr, rem) { 2076 int type = nla_type(a); 2077 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 2078 return -EINVAL; 2079 attrs[type] = a; 2080 } 2081 if (rem) 2082 return -EINVAL; 2083 2084 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 2085 if (!probability || nla_len(probability) != sizeof(u32)) 2086 return -EINVAL; 2087 2088 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 2089 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 2090 return -EINVAL; 2091 2092 /* validation done, copy sample action. */ 2093 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 2094 if (start < 0) 2095 return start; 2096 2097 /* When both skb and flow may be changed, put the sample 2098 * into a deferred fifo. On the other hand, if only skb 2099 * may be modified, the actions can be executed in place. 2100 * 2101 * Do this analysis at the flow installation time. 2102 * Set 'clone_action->exec' to true if the actions can be 2103 * executed without being deferred. 2104 * 2105 * If the sample is the last action, it can always be excuted 2106 * rather than deferred. 2107 */ 2108 arg.exec = last || !actions_may_change_flow(actions); 2109 arg.probability = nla_get_u32(probability); 2110 2111 err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg), 2112 log); 2113 if (err) 2114 return err; 2115 2116 err = __ovs_nla_copy_actions(net, actions, key, sfa, 2117 eth_type, vlan_tci, log); 2118 2119 if (err) 2120 return err; 2121 2122 add_nested_action_end(*sfa, start); 2123 2124 return 0; 2125 } 2126 2127 void ovs_match_init(struct sw_flow_match *match, 2128 struct sw_flow_key *key, 2129 bool reset_key, 2130 struct sw_flow_mask *mask) 2131 { 2132 memset(match, 0, sizeof(*match)); 2133 match->key = key; 2134 match->mask = mask; 2135 2136 if (reset_key) 2137 memset(key, 0, sizeof(*key)); 2138 2139 if (mask) { 2140 memset(&mask->key, 0, sizeof(mask->key)); 2141 mask->range.start = mask->range.end = 0; 2142 } 2143 } 2144 2145 static int validate_geneve_opts(struct sw_flow_key *key) 2146 { 2147 struct geneve_opt *option; 2148 int opts_len = key->tun_opts_len; 2149 bool crit_opt = false; 2150 2151 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); 2152 while (opts_len > 0) { 2153 int len; 2154 2155 if (opts_len < sizeof(*option)) 2156 return -EINVAL; 2157 2158 len = sizeof(*option) + option->length * 4; 2159 if (len > opts_len) 2160 return -EINVAL; 2161 2162 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 2163 2164 option = (struct geneve_opt *)((u8 *)option + len); 2165 opts_len -= len; 2166 }; 2167 2168 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 2169 2170 return 0; 2171 } 2172 2173 static int validate_and_copy_set_tun(const struct nlattr *attr, 2174 struct sw_flow_actions **sfa, bool log) 2175 { 2176 struct sw_flow_match match; 2177 struct sw_flow_key key; 2178 struct metadata_dst *tun_dst; 2179 struct ip_tunnel_info *tun_info; 2180 struct ovs_tunnel_info *ovs_tun; 2181 struct nlattr *a; 2182 int err = 0, start, opts_type; 2183 2184 ovs_match_init(&match, &key, true, NULL); 2185 opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); 2186 if (opts_type < 0) 2187 return opts_type; 2188 2189 if (key.tun_opts_len) { 2190 switch (opts_type) { 2191 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 2192 err = validate_geneve_opts(&key); 2193 if (err < 0) 2194 return err; 2195 break; 2196 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2197 break; 2198 } 2199 }; 2200 2201 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 2202 if (start < 0) 2203 return start; 2204 2205 tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL, 2206 GFP_KERNEL); 2207 2208 if (!tun_dst) 2209 return -ENOMEM; 2210 2211 err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); 2212 if (err) { 2213 dst_release((struct dst_entry *)tun_dst); 2214 return err; 2215 } 2216 2217 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 2218 sizeof(*ovs_tun), log); 2219 if (IS_ERR(a)) { 2220 dst_release((struct dst_entry *)tun_dst); 2221 return PTR_ERR(a); 2222 } 2223 2224 ovs_tun = nla_data(a); 2225 ovs_tun->tun_dst = tun_dst; 2226 2227 tun_info = &tun_dst->u.tun_info; 2228 tun_info->mode = IP_TUNNEL_INFO_TX; 2229 if (key.tun_proto == AF_INET6) 2230 tun_info->mode |= IP_TUNNEL_INFO_IPV6; 2231 tun_info->key = key.tun_key; 2232 2233 /* We need to store the options in the action itself since 2234 * everything else will go away after flow setup. We can append 2235 * it to tun_info and then point there. 2236 */ 2237 ip_tunnel_info_opts_set(tun_info, 2238 TUN_METADATA_OPTS(&key, key.tun_opts_len), 2239 key.tun_opts_len); 2240 add_nested_action_end(*sfa, start); 2241 2242 return err; 2243 } 2244 2245 /* Return false if there are any non-masked bits set. 2246 * Mask follows data immediately, before any netlink padding. 2247 */ 2248 static bool validate_masked(u8 *data, int len) 2249 { 2250 u8 *mask = data + len; 2251 2252 while (len--) 2253 if (*data++ & ~*mask++) 2254 return false; 2255 2256 return true; 2257 } 2258 2259 static int validate_set(const struct nlattr *a, 2260 const struct sw_flow_key *flow_key, 2261 struct sw_flow_actions **sfa, bool *skip_copy, 2262 u8 mac_proto, __be16 eth_type, bool masked, bool log) 2263 { 2264 const struct nlattr *ovs_key = nla_data(a); 2265 int key_type = nla_type(ovs_key); 2266 size_t key_len; 2267 2268 /* There can be only one key in a action */ 2269 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 2270 return -EINVAL; 2271 2272 key_len = nla_len(ovs_key); 2273 if (masked) 2274 key_len /= 2; 2275 2276 if (key_type > OVS_KEY_ATTR_MAX || 2277 !check_attr_len(key_len, ovs_key_lens[key_type].len)) 2278 return -EINVAL; 2279 2280 if (masked && !validate_masked(nla_data(ovs_key), key_len)) 2281 return -EINVAL; 2282 2283 switch (key_type) { 2284 const struct ovs_key_ipv4 *ipv4_key; 2285 const struct ovs_key_ipv6 *ipv6_key; 2286 int err; 2287 2288 case OVS_KEY_ATTR_PRIORITY: 2289 case OVS_KEY_ATTR_SKB_MARK: 2290 case OVS_KEY_ATTR_CT_MARK: 2291 case OVS_KEY_ATTR_CT_LABELS: 2292 break; 2293 2294 case OVS_KEY_ATTR_ETHERNET: 2295 if (mac_proto != MAC_PROTO_ETHERNET) 2296 return -EINVAL; 2297 break; 2298 2299 case OVS_KEY_ATTR_TUNNEL: 2300 if (masked) 2301 return -EINVAL; /* Masked tunnel set not supported. */ 2302 2303 *skip_copy = true; 2304 err = validate_and_copy_set_tun(a, sfa, log); 2305 if (err) 2306 return err; 2307 break; 2308 2309 case OVS_KEY_ATTR_IPV4: 2310 if (eth_type != htons(ETH_P_IP)) 2311 return -EINVAL; 2312 2313 ipv4_key = nla_data(ovs_key); 2314 2315 if (masked) { 2316 const struct ovs_key_ipv4 *mask = ipv4_key + 1; 2317 2318 /* Non-writeable fields. */ 2319 if (mask->ipv4_proto || mask->ipv4_frag) 2320 return -EINVAL; 2321 } else { 2322 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 2323 return -EINVAL; 2324 2325 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 2326 return -EINVAL; 2327 } 2328 break; 2329 2330 case OVS_KEY_ATTR_IPV6: 2331 if (eth_type != htons(ETH_P_IPV6)) 2332 return -EINVAL; 2333 2334 ipv6_key = nla_data(ovs_key); 2335 2336 if (masked) { 2337 const struct ovs_key_ipv6 *mask = ipv6_key + 1; 2338 2339 /* Non-writeable fields. */ 2340 if (mask->ipv6_proto || mask->ipv6_frag) 2341 return -EINVAL; 2342 2343 /* Invalid bits in the flow label mask? */ 2344 if (ntohl(mask->ipv6_label) & 0xFFF00000) 2345 return -EINVAL; 2346 } else { 2347 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 2348 return -EINVAL; 2349 2350 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 2351 return -EINVAL; 2352 } 2353 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 2354 return -EINVAL; 2355 2356 break; 2357 2358 case OVS_KEY_ATTR_TCP: 2359 if ((eth_type != htons(ETH_P_IP) && 2360 eth_type != htons(ETH_P_IPV6)) || 2361 flow_key->ip.proto != IPPROTO_TCP) 2362 return -EINVAL; 2363 2364 break; 2365 2366 case OVS_KEY_ATTR_UDP: 2367 if ((eth_type != htons(ETH_P_IP) && 2368 eth_type != htons(ETH_P_IPV6)) || 2369 flow_key->ip.proto != IPPROTO_UDP) 2370 return -EINVAL; 2371 2372 break; 2373 2374 case OVS_KEY_ATTR_MPLS: 2375 if (!eth_p_mpls(eth_type)) 2376 return -EINVAL; 2377 break; 2378 2379 case OVS_KEY_ATTR_SCTP: 2380 if ((eth_type != htons(ETH_P_IP) && 2381 eth_type != htons(ETH_P_IPV6)) || 2382 flow_key->ip.proto != IPPROTO_SCTP) 2383 return -EINVAL; 2384 2385 break; 2386 2387 default: 2388 return -EINVAL; 2389 } 2390 2391 /* Convert non-masked non-tunnel set actions to masked set actions. */ 2392 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { 2393 int start, len = key_len * 2; 2394 struct nlattr *at; 2395 2396 *skip_copy = true; 2397 2398 start = add_nested_action_start(sfa, 2399 OVS_ACTION_ATTR_SET_TO_MASKED, 2400 log); 2401 if (start < 0) 2402 return start; 2403 2404 at = __add_action(sfa, key_type, NULL, len, log); 2405 if (IS_ERR(at)) 2406 return PTR_ERR(at); 2407 2408 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ 2409 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ 2410 /* Clear non-writeable bits from otherwise writeable fields. */ 2411 if (key_type == OVS_KEY_ATTR_IPV6) { 2412 struct ovs_key_ipv6 *mask = nla_data(at) + key_len; 2413 2414 mask->ipv6_label &= htonl(0x000FFFFF); 2415 } 2416 add_nested_action_end(*sfa, start); 2417 } 2418 2419 return 0; 2420 } 2421 2422 static int validate_userspace(const struct nlattr *attr) 2423 { 2424 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 2425 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 2426 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 2427 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, 2428 }; 2429 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 2430 int error; 2431 2432 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, 2433 userspace_policy, NULL); 2434 if (error) 2435 return error; 2436 2437 if (!a[OVS_USERSPACE_ATTR_PID] || 2438 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 2439 return -EINVAL; 2440 2441 return 0; 2442 } 2443 2444 static int copy_action(const struct nlattr *from, 2445 struct sw_flow_actions **sfa, bool log) 2446 { 2447 int totlen = NLA_ALIGN(from->nla_len); 2448 struct nlattr *to; 2449 2450 to = reserve_sfa_size(sfa, from->nla_len, log); 2451 if (IS_ERR(to)) 2452 return PTR_ERR(to); 2453 2454 memcpy(to, from, totlen); 2455 return 0; 2456 } 2457 2458 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2459 const struct sw_flow_key *key, 2460 struct sw_flow_actions **sfa, 2461 __be16 eth_type, __be16 vlan_tci, bool log) 2462 { 2463 u8 mac_proto = ovs_key_mac_proto(key); 2464 const struct nlattr *a; 2465 int rem, err; 2466 2467 nla_for_each_nested(a, attr, rem) { 2468 /* Expected argument lengths, (u32)-1 for variable length. */ 2469 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 2470 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 2471 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 2472 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 2473 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), 2474 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), 2475 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2476 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2477 [OVS_ACTION_ATTR_SET] = (u32)-1, 2478 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2479 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2480 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2481 [OVS_ACTION_ATTR_CT] = (u32)-1, 2482 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2483 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), 2484 [OVS_ACTION_ATTR_POP_ETH] = 0, 2485 }; 2486 const struct ovs_action_push_vlan *vlan; 2487 int type = nla_type(a); 2488 bool skip_copy; 2489 2490 if (type > OVS_ACTION_ATTR_MAX || 2491 (action_lens[type] != nla_len(a) && 2492 action_lens[type] != (u32)-1)) 2493 return -EINVAL; 2494 2495 skip_copy = false; 2496 switch (type) { 2497 case OVS_ACTION_ATTR_UNSPEC: 2498 return -EINVAL; 2499 2500 case OVS_ACTION_ATTR_USERSPACE: 2501 err = validate_userspace(a); 2502 if (err) 2503 return err; 2504 break; 2505 2506 case OVS_ACTION_ATTR_OUTPUT: 2507 if (nla_get_u32(a) >= DP_MAX_PORTS) 2508 return -EINVAL; 2509 break; 2510 2511 case OVS_ACTION_ATTR_TRUNC: { 2512 const struct ovs_action_trunc *trunc = nla_data(a); 2513 2514 if (trunc->max_len < ETH_HLEN) 2515 return -EINVAL; 2516 break; 2517 } 2518 2519 case OVS_ACTION_ATTR_HASH: { 2520 const struct ovs_action_hash *act_hash = nla_data(a); 2521 2522 switch (act_hash->hash_alg) { 2523 case OVS_HASH_ALG_L4: 2524 break; 2525 default: 2526 return -EINVAL; 2527 } 2528 2529 break; 2530 } 2531 2532 case OVS_ACTION_ATTR_POP_VLAN: 2533 if (mac_proto != MAC_PROTO_ETHERNET) 2534 return -EINVAL; 2535 vlan_tci = htons(0); 2536 break; 2537 2538 case OVS_ACTION_ATTR_PUSH_VLAN: 2539 if (mac_proto != MAC_PROTO_ETHERNET) 2540 return -EINVAL; 2541 vlan = nla_data(a); 2542 if (!eth_type_vlan(vlan->vlan_tpid)) 2543 return -EINVAL; 2544 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 2545 return -EINVAL; 2546 vlan_tci = vlan->vlan_tci; 2547 break; 2548 2549 case OVS_ACTION_ATTR_RECIRC: 2550 break; 2551 2552 case OVS_ACTION_ATTR_PUSH_MPLS: { 2553 const struct ovs_action_push_mpls *mpls = nla_data(a); 2554 2555 if (!eth_p_mpls(mpls->mpls_ethertype)) 2556 return -EINVAL; 2557 /* Prohibit push MPLS other than to a white list 2558 * for packets that have a known tag order. 2559 */ 2560 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2561 (eth_type != htons(ETH_P_IP) && 2562 eth_type != htons(ETH_P_IPV6) && 2563 eth_type != htons(ETH_P_ARP) && 2564 eth_type != htons(ETH_P_RARP) && 2565 !eth_p_mpls(eth_type))) 2566 return -EINVAL; 2567 eth_type = mpls->mpls_ethertype; 2568 break; 2569 } 2570 2571 case OVS_ACTION_ATTR_POP_MPLS: 2572 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2573 !eth_p_mpls(eth_type)) 2574 return -EINVAL; 2575 2576 /* Disallow subsequent L2.5+ set and mpls_pop actions 2577 * as there is no check here to ensure that the new 2578 * eth_type is valid and thus set actions could 2579 * write off the end of the packet or otherwise 2580 * corrupt it. 2581 * 2582 * Support for these actions is planned using packet 2583 * recirculation. 2584 */ 2585 eth_type = htons(0); 2586 break; 2587 2588 case OVS_ACTION_ATTR_SET: 2589 err = validate_set(a, key, sfa, 2590 &skip_copy, mac_proto, eth_type, 2591 false, log); 2592 if (err) 2593 return err; 2594 break; 2595 2596 case OVS_ACTION_ATTR_SET_MASKED: 2597 err = validate_set(a, key, sfa, 2598 &skip_copy, mac_proto, eth_type, 2599 true, log); 2600 if (err) 2601 return err; 2602 break; 2603 2604 case OVS_ACTION_ATTR_SAMPLE: { 2605 bool last = nla_is_last(a, rem); 2606 2607 err = validate_and_copy_sample(net, a, key, sfa, 2608 eth_type, vlan_tci, 2609 log, last); 2610 if (err) 2611 return err; 2612 skip_copy = true; 2613 break; 2614 } 2615 2616 case OVS_ACTION_ATTR_CT: 2617 err = ovs_ct_copy_action(net, a, key, sfa, log); 2618 if (err) 2619 return err; 2620 skip_copy = true; 2621 break; 2622 2623 case OVS_ACTION_ATTR_PUSH_ETH: 2624 /* Disallow pushing an Ethernet header if one 2625 * is already present */ 2626 if (mac_proto != MAC_PROTO_NONE) 2627 return -EINVAL; 2628 mac_proto = MAC_PROTO_NONE; 2629 break; 2630 2631 case OVS_ACTION_ATTR_POP_ETH: 2632 if (mac_proto != MAC_PROTO_ETHERNET) 2633 return -EINVAL; 2634 if (vlan_tci & htons(VLAN_TAG_PRESENT)) 2635 return -EINVAL; 2636 mac_proto = MAC_PROTO_ETHERNET; 2637 break; 2638 2639 default: 2640 OVS_NLERR(log, "Unknown Action type %d", type); 2641 return -EINVAL; 2642 } 2643 if (!skip_copy) { 2644 err = copy_action(a, sfa, log); 2645 if (err) 2646 return err; 2647 } 2648 } 2649 2650 if (rem > 0) 2651 return -EINVAL; 2652 2653 return 0; 2654 } 2655 2656 /* 'key' must be the masked key. */ 2657 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2658 const struct sw_flow_key *key, 2659 struct sw_flow_actions **sfa, bool log) 2660 { 2661 int err; 2662 2663 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 2664 if (IS_ERR(*sfa)) 2665 return PTR_ERR(*sfa); 2666 2667 (*sfa)->orig_len = nla_len(attr); 2668 err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, 2669 key->eth.vlan.tci, log); 2670 if (err) 2671 ovs_nla_free_flow_actions(*sfa); 2672 2673 return err; 2674 } 2675 2676 static int sample_action_to_attr(const struct nlattr *attr, 2677 struct sk_buff *skb) 2678 { 2679 struct nlattr *start, *ac_start = NULL, *sample_arg; 2680 int err = 0, rem = nla_len(attr); 2681 const struct sample_arg *arg; 2682 struct nlattr *actions; 2683 2684 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 2685 if (!start) 2686 return -EMSGSIZE; 2687 2688 sample_arg = nla_data(attr); 2689 arg = nla_data(sample_arg); 2690 actions = nla_next(sample_arg, &rem); 2691 2692 if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) { 2693 err = -EMSGSIZE; 2694 goto out; 2695 } 2696 2697 ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 2698 if (!ac_start) { 2699 err = -EMSGSIZE; 2700 goto out; 2701 } 2702 2703 err = ovs_nla_put_actions(actions, rem, skb); 2704 2705 out: 2706 if (err) { 2707 nla_nest_cancel(skb, ac_start); 2708 nla_nest_cancel(skb, start); 2709 } else { 2710 nla_nest_end(skb, ac_start); 2711 nla_nest_end(skb, start); 2712 } 2713 2714 return err; 2715 } 2716 2717 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 2718 { 2719 const struct nlattr *ovs_key = nla_data(a); 2720 int key_type = nla_type(ovs_key); 2721 struct nlattr *start; 2722 int err; 2723 2724 switch (key_type) { 2725 case OVS_KEY_ATTR_TUNNEL_INFO: { 2726 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); 2727 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; 2728 2729 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2730 if (!start) 2731 return -EMSGSIZE; 2732 2733 err = ip_tun_to_nlattr(skb, &tun_info->key, 2734 ip_tunnel_info_opts(tun_info), 2735 tun_info->options_len, 2736 ip_tunnel_info_af(tun_info)); 2737 if (err) 2738 return err; 2739 nla_nest_end(skb, start); 2740 break; 2741 } 2742 default: 2743 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 2744 return -EMSGSIZE; 2745 break; 2746 } 2747 2748 return 0; 2749 } 2750 2751 static int masked_set_action_to_set_action_attr(const struct nlattr *a, 2752 struct sk_buff *skb) 2753 { 2754 const struct nlattr *ovs_key = nla_data(a); 2755 struct nlattr *nla; 2756 size_t key_len = nla_len(ovs_key) / 2; 2757 2758 /* Revert the conversion we did from a non-masked set action to 2759 * masked set action. 2760 */ 2761 nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2762 if (!nla) 2763 return -EMSGSIZE; 2764 2765 if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) 2766 return -EMSGSIZE; 2767 2768 nla_nest_end(skb, nla); 2769 return 0; 2770 } 2771 2772 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2773 { 2774 const struct nlattr *a; 2775 int rem, err; 2776 2777 nla_for_each_attr(a, attr, len, rem) { 2778 int type = nla_type(a); 2779 2780 switch (type) { 2781 case OVS_ACTION_ATTR_SET: 2782 err = set_action_to_attr(a, skb); 2783 if (err) 2784 return err; 2785 break; 2786 2787 case OVS_ACTION_ATTR_SET_TO_MASKED: 2788 err = masked_set_action_to_set_action_attr(a, skb); 2789 if (err) 2790 return err; 2791 break; 2792 2793 case OVS_ACTION_ATTR_SAMPLE: 2794 err = sample_action_to_attr(a, skb); 2795 if (err) 2796 return err; 2797 break; 2798 2799 case OVS_ACTION_ATTR_CT: 2800 err = ovs_ct_action_to_attr(nla_data(a), skb); 2801 if (err) 2802 return err; 2803 break; 2804 2805 default: 2806 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2807 return -EMSGSIZE; 2808 break; 2809 } 2810 } 2811 2812 return 0; 2813 } 2814