1 /* 2 * Copyright (c) 2007-2017 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include "flow.h" 22 #include "datapath.h" 23 #include <linux/uaccess.h> 24 #include <linux/netdevice.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <net/llc_pdu.h> 29 #include <linux/kernel.h> 30 #include <linux/jhash.h> 31 #include <linux/jiffies.h> 32 #include <linux/llc.h> 33 #include <linux/module.h> 34 #include <linux/in.h> 35 #include <linux/rcupdate.h> 36 #include <linux/if_arp.h> 37 #include <linux/ip.h> 38 #include <linux/ipv6.h> 39 #include <linux/sctp.h> 40 #include <linux/tcp.h> 41 #include <linux/udp.h> 42 #include <linux/icmp.h> 43 #include <linux/icmpv6.h> 44 #include <linux/rculist.h> 45 #include <net/geneve.h> 46 #include <net/ip.h> 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/mpls.h> 50 #include <net/vxlan.h> 51 52 #include "flow_netlink.h" 53 54 struct ovs_len_tbl { 55 int len; 56 const struct ovs_len_tbl *next; 57 }; 58 59 #define OVS_ATTR_NESTED -1 60 #define OVS_ATTR_VARIABLE -2 61 62 static bool actions_may_change_flow(const struct nlattr *actions) 63 { 64 struct nlattr *nla; 65 int rem; 66 67 nla_for_each_nested(nla, actions, rem) { 68 u16 action = nla_type(nla); 69 70 switch (action) { 71 case OVS_ACTION_ATTR_OUTPUT: 72 case OVS_ACTION_ATTR_RECIRC: 73 case OVS_ACTION_ATTR_TRUNC: 74 case OVS_ACTION_ATTR_USERSPACE: 75 break; 76 77 case OVS_ACTION_ATTR_CT: 78 case OVS_ACTION_ATTR_HASH: 79 case OVS_ACTION_ATTR_POP_ETH: 80 case OVS_ACTION_ATTR_POP_MPLS: 81 case OVS_ACTION_ATTR_POP_VLAN: 82 case OVS_ACTION_ATTR_PUSH_ETH: 83 case OVS_ACTION_ATTR_PUSH_MPLS: 84 case OVS_ACTION_ATTR_PUSH_VLAN: 85 case OVS_ACTION_ATTR_SAMPLE: 86 case OVS_ACTION_ATTR_SET: 87 case OVS_ACTION_ATTR_SET_MASKED: 88 default: 89 return true; 90 } 91 } 92 return false; 93 } 94 95 static void update_range(struct sw_flow_match *match, 96 size_t offset, size_t size, bool is_mask) 97 { 98 struct sw_flow_key_range *range; 99 size_t start = rounddown(offset, sizeof(long)); 100 size_t end = roundup(offset + size, sizeof(long)); 101 102 if (!is_mask) 103 range = &match->range; 104 else 105 range = &match->mask->range; 106 107 if (range->start == range->end) { 108 range->start = start; 109 range->end = end; 110 return; 111 } 112 113 if (range->start > start) 114 range->start = start; 115 116 if (range->end < end) 117 range->end = end; 118 } 119 120 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 121 do { \ 122 update_range(match, offsetof(struct sw_flow_key, field), \ 123 sizeof((match)->key->field), is_mask); \ 124 if (is_mask) \ 125 (match)->mask->key.field = value; \ 126 else \ 127 (match)->key->field = value; \ 128 } while (0) 129 130 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 131 do { \ 132 update_range(match, offset, len, is_mask); \ 133 if (is_mask) \ 134 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 135 len); \ 136 else \ 137 memcpy((u8 *)(match)->key + offset, value_p, len); \ 138 } while (0) 139 140 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 141 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 142 value_p, len, is_mask) 143 144 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 145 do { \ 146 update_range(match, offsetof(struct sw_flow_key, field), \ 147 sizeof((match)->key->field), is_mask); \ 148 if (is_mask) \ 149 memset((u8 *)&(match)->mask->key.field, value, \ 150 sizeof((match)->mask->key.field)); \ 151 else \ 152 memset((u8 *)&(match)->key->field, value, \ 153 sizeof((match)->key->field)); \ 154 } while (0) 155 156 static bool match_validate(const struct sw_flow_match *match, 157 u64 key_attrs, u64 mask_attrs, bool log) 158 { 159 u64 key_expected = 0; 160 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 161 162 /* The following mask attributes allowed only if they 163 * pass the validation tests. */ 164 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 165 | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) 166 | (1 << OVS_KEY_ATTR_IPV6) 167 | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) 168 | (1 << OVS_KEY_ATTR_TCP) 169 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 170 | (1 << OVS_KEY_ATTR_UDP) 171 | (1 << OVS_KEY_ATTR_SCTP) 172 | (1 << OVS_KEY_ATTR_ICMP) 173 | (1 << OVS_KEY_ATTR_ICMPV6) 174 | (1 << OVS_KEY_ATTR_ARP) 175 | (1 << OVS_KEY_ATTR_ND) 176 | (1 << OVS_KEY_ATTR_MPLS)); 177 178 /* Always allowed mask fields. */ 179 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 180 | (1 << OVS_KEY_ATTR_IN_PORT) 181 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 182 183 /* Check key attributes. */ 184 if (match->key->eth.type == htons(ETH_P_ARP) 185 || match->key->eth.type == htons(ETH_P_RARP)) { 186 key_expected |= 1 << OVS_KEY_ATTR_ARP; 187 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 188 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 189 } 190 191 if (eth_p_mpls(match->key->eth.type)) { 192 key_expected |= 1 << OVS_KEY_ATTR_MPLS; 193 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 194 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; 195 } 196 197 if (match->key->eth.type == htons(ETH_P_IP)) { 198 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 199 if (match->mask && match->mask->key.eth.type == htons(0xffff)) { 200 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 201 mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4; 202 } 203 204 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 205 if (match->key->ip.proto == IPPROTO_UDP) { 206 key_expected |= 1 << OVS_KEY_ATTR_UDP; 207 if (match->mask && (match->mask->key.ip.proto == 0xff)) 208 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 209 } 210 211 if (match->key->ip.proto == IPPROTO_SCTP) { 212 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 213 if (match->mask && (match->mask->key.ip.proto == 0xff)) 214 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 215 } 216 217 if (match->key->ip.proto == IPPROTO_TCP) { 218 key_expected |= 1 << OVS_KEY_ATTR_TCP; 219 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 220 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 221 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 222 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 223 } 224 } 225 226 if (match->key->ip.proto == IPPROTO_ICMP) { 227 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 228 if (match->mask && (match->mask->key.ip.proto == 0xff)) 229 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 230 } 231 } 232 } 233 234 if (match->key->eth.type == htons(ETH_P_IPV6)) { 235 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 236 if (match->mask && match->mask->key.eth.type == htons(0xffff)) { 237 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 238 mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6; 239 } 240 241 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 242 if (match->key->ip.proto == IPPROTO_UDP) { 243 key_expected |= 1 << OVS_KEY_ATTR_UDP; 244 if (match->mask && (match->mask->key.ip.proto == 0xff)) 245 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 246 } 247 248 if (match->key->ip.proto == IPPROTO_SCTP) { 249 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 250 if (match->mask && (match->mask->key.ip.proto == 0xff)) 251 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 252 } 253 254 if (match->key->ip.proto == IPPROTO_TCP) { 255 key_expected |= 1 << OVS_KEY_ATTR_TCP; 256 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 257 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 258 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 259 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 260 } 261 } 262 263 if (match->key->ip.proto == IPPROTO_ICMPV6) { 264 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 265 if (match->mask && (match->mask->key.ip.proto == 0xff)) 266 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 267 268 if (match->key->tp.src == 269 htons(NDISC_NEIGHBOUR_SOLICITATION) || 270 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 271 key_expected |= 1 << OVS_KEY_ATTR_ND; 272 /* Original direction conntrack tuple 273 * uses the same space as the ND fields 274 * in the key, so both are not allowed 275 * at the same time. 276 */ 277 mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); 278 if (match->mask && (match->mask->key.tp.src == htons(0xff))) 279 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 280 } 281 } 282 } 283 } 284 285 if ((key_attrs & key_expected) != key_expected) { 286 /* Key attributes check failed. */ 287 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 288 (unsigned long long)key_attrs, 289 (unsigned long long)key_expected); 290 return false; 291 } 292 293 if ((mask_attrs & mask_allowed) != mask_attrs) { 294 /* Mask attributes check failed. */ 295 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", 296 (unsigned long long)mask_attrs, 297 (unsigned long long)mask_allowed); 298 return false; 299 } 300 301 return true; 302 } 303 304 size_t ovs_tun_key_attr_size(void) 305 { 306 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider 307 * updating this function. 308 */ 309 return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 310 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ 311 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ 312 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 313 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 314 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 315 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 316 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 317 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 318 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 319 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 320 */ 321 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 322 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 323 } 324 325 size_t ovs_key_attr_size(void) 326 { 327 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 328 * updating this function. 329 */ 330 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); 331 332 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 333 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 334 + ovs_tun_key_attr_size() 335 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 336 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 337 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 338 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 339 + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ 340 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ 341 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 342 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ 343 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ 344 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 345 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 346 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 347 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 348 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 349 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 350 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 351 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 352 } 353 354 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { 355 [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) }, 356 }; 357 358 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 359 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 360 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 361 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, 362 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, 363 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, 364 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, 365 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, 366 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 367 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 368 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 369 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, 370 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, 371 .next = ovs_vxlan_ext_key_lens }, 372 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 373 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 374 }; 375 376 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 377 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 378 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, 379 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, 380 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, 381 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, 382 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, 383 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, 384 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, 385 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, 386 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, 387 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, 388 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, 389 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, 390 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, 391 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, 392 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, 393 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, 394 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, 395 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, 396 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 397 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 398 .next = ovs_tunnel_key_lens, }, 399 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 400 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, 401 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, 402 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, 403 [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, 404 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { 405 .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, 406 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { 407 .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, 408 }; 409 410 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) 411 { 412 return expected_len == attr_len || 413 expected_len == OVS_ATTR_NESTED || 414 expected_len == OVS_ATTR_VARIABLE; 415 } 416 417 static bool is_all_zero(const u8 *fp, size_t size) 418 { 419 int i; 420 421 if (!fp) 422 return false; 423 424 for (i = 0; i < size; i++) 425 if (fp[i]) 426 return false; 427 428 return true; 429 } 430 431 static int __parse_flow_nlattrs(const struct nlattr *attr, 432 const struct nlattr *a[], 433 u64 *attrsp, bool log, bool nz) 434 { 435 const struct nlattr *nla; 436 u64 attrs; 437 int rem; 438 439 attrs = *attrsp; 440 nla_for_each_nested(nla, attr, rem) { 441 u16 type = nla_type(nla); 442 int expected_len; 443 444 if (type > OVS_KEY_ATTR_MAX) { 445 OVS_NLERR(log, "Key type %d is out of range max %d", 446 type, OVS_KEY_ATTR_MAX); 447 return -EINVAL; 448 } 449 450 if (attrs & (1 << type)) { 451 OVS_NLERR(log, "Duplicate key (type %d).", type); 452 return -EINVAL; 453 } 454 455 expected_len = ovs_key_lens[type].len; 456 if (!check_attr_len(nla_len(nla), expected_len)) { 457 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 458 type, nla_len(nla), expected_len); 459 return -EINVAL; 460 } 461 462 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 463 attrs |= 1 << type; 464 a[type] = nla; 465 } 466 } 467 if (rem) { 468 OVS_NLERR(log, "Message has %d unknown bytes.", rem); 469 return -EINVAL; 470 } 471 472 *attrsp = attrs; 473 return 0; 474 } 475 476 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 477 const struct nlattr *a[], u64 *attrsp, 478 bool log) 479 { 480 return __parse_flow_nlattrs(attr, a, attrsp, log, true); 481 } 482 483 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], 484 u64 *attrsp, bool log) 485 { 486 return __parse_flow_nlattrs(attr, a, attrsp, log, false); 487 } 488 489 static int genev_tun_opt_from_nlattr(const struct nlattr *a, 490 struct sw_flow_match *match, bool is_mask, 491 bool log) 492 { 493 unsigned long opt_key_offset; 494 495 if (nla_len(a) > sizeof(match->key->tun_opts)) { 496 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", 497 nla_len(a), sizeof(match->key->tun_opts)); 498 return -EINVAL; 499 } 500 501 if (nla_len(a) % 4 != 0) { 502 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", 503 nla_len(a)); 504 return -EINVAL; 505 } 506 507 /* We need to record the length of the options passed 508 * down, otherwise packets with the same format but 509 * additional options will be silently matched. 510 */ 511 if (!is_mask) { 512 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), 513 false); 514 } else { 515 /* This is somewhat unusual because it looks at 516 * both the key and mask while parsing the 517 * attributes (and by extension assumes the key 518 * is parsed first). Normally, we would verify 519 * that each is the correct length and that the 520 * attributes line up in the validate function. 521 * However, that is difficult because this is 522 * variable length and we won't have the 523 * information later. 524 */ 525 if (match->key->tun_opts_len != nla_len(a)) { 526 OVS_NLERR(log, "Geneve option len %d != mask len %d", 527 match->key->tun_opts_len, nla_len(a)); 528 return -EINVAL; 529 } 530 531 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 532 } 533 534 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); 535 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 536 nla_len(a), is_mask); 537 return 0; 538 } 539 540 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, 541 struct sw_flow_match *match, bool is_mask, 542 bool log) 543 { 544 struct nlattr *a; 545 int rem; 546 unsigned long opt_key_offset; 547 struct vxlan_metadata opts; 548 549 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 550 551 memset(&opts, 0, sizeof(opts)); 552 nla_for_each_nested(a, attr, rem) { 553 int type = nla_type(a); 554 555 if (type > OVS_VXLAN_EXT_MAX) { 556 OVS_NLERR(log, "VXLAN extension %d out of range max %d", 557 type, OVS_VXLAN_EXT_MAX); 558 return -EINVAL; 559 } 560 561 if (!check_attr_len(nla_len(a), 562 ovs_vxlan_ext_key_lens[type].len)) { 563 OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d", 564 type, nla_len(a), 565 ovs_vxlan_ext_key_lens[type].len); 566 return -EINVAL; 567 } 568 569 switch (type) { 570 case OVS_VXLAN_EXT_GBP: 571 opts.gbp = nla_get_u32(a); 572 break; 573 default: 574 OVS_NLERR(log, "Unknown VXLAN extension attribute %d", 575 type); 576 return -EINVAL; 577 } 578 } 579 if (rem) { 580 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.", 581 rem); 582 return -EINVAL; 583 } 584 585 if (!is_mask) 586 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 587 else 588 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 589 590 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); 591 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), 592 is_mask); 593 return 0; 594 } 595 596 static int ip_tun_from_nlattr(const struct nlattr *attr, 597 struct sw_flow_match *match, bool is_mask, 598 bool log) 599 { 600 bool ttl = false, ipv4 = false, ipv6 = false; 601 __be16 tun_flags = 0; 602 int opts_type = 0; 603 struct nlattr *a; 604 int rem; 605 606 nla_for_each_nested(a, attr, rem) { 607 int type = nla_type(a); 608 int err; 609 610 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 611 OVS_NLERR(log, "Tunnel attr %d out of range max %d", 612 type, OVS_TUNNEL_KEY_ATTR_MAX); 613 return -EINVAL; 614 } 615 616 if (!check_attr_len(nla_len(a), 617 ovs_tunnel_key_lens[type].len)) { 618 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 619 type, nla_len(a), ovs_tunnel_key_lens[type].len); 620 return -EINVAL; 621 } 622 623 switch (type) { 624 case OVS_TUNNEL_KEY_ATTR_ID: 625 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 626 nla_get_be64(a), is_mask); 627 tun_flags |= TUNNEL_KEY; 628 break; 629 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 630 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, 631 nla_get_in_addr(a), is_mask); 632 ipv4 = true; 633 break; 634 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 635 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, 636 nla_get_in_addr(a), is_mask); 637 ipv4 = true; 638 break; 639 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: 640 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, 641 nla_get_in6_addr(a), is_mask); 642 ipv6 = true; 643 break; 644 case OVS_TUNNEL_KEY_ATTR_IPV6_DST: 645 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, 646 nla_get_in6_addr(a), is_mask); 647 ipv6 = true; 648 break; 649 case OVS_TUNNEL_KEY_ATTR_TOS: 650 SW_FLOW_KEY_PUT(match, tun_key.tos, 651 nla_get_u8(a), is_mask); 652 break; 653 case OVS_TUNNEL_KEY_ATTR_TTL: 654 SW_FLOW_KEY_PUT(match, tun_key.ttl, 655 nla_get_u8(a), is_mask); 656 ttl = true; 657 break; 658 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 659 tun_flags |= TUNNEL_DONT_FRAGMENT; 660 break; 661 case OVS_TUNNEL_KEY_ATTR_CSUM: 662 tun_flags |= TUNNEL_CSUM; 663 break; 664 case OVS_TUNNEL_KEY_ATTR_TP_SRC: 665 SW_FLOW_KEY_PUT(match, tun_key.tp_src, 666 nla_get_be16(a), is_mask); 667 break; 668 case OVS_TUNNEL_KEY_ATTR_TP_DST: 669 SW_FLOW_KEY_PUT(match, tun_key.tp_dst, 670 nla_get_be16(a), is_mask); 671 break; 672 case OVS_TUNNEL_KEY_ATTR_OAM: 673 tun_flags |= TUNNEL_OAM; 674 break; 675 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 676 if (opts_type) { 677 OVS_NLERR(log, "Multiple metadata blocks provided"); 678 return -EINVAL; 679 } 680 681 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 682 if (err) 683 return err; 684 685 tun_flags |= TUNNEL_GENEVE_OPT; 686 opts_type = type; 687 break; 688 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 689 if (opts_type) { 690 OVS_NLERR(log, "Multiple metadata blocks provided"); 691 return -EINVAL; 692 } 693 694 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); 695 if (err) 696 return err; 697 698 tun_flags |= TUNNEL_VXLAN_OPT; 699 opts_type = type; 700 break; 701 case OVS_TUNNEL_KEY_ATTR_PAD: 702 break; 703 default: 704 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 705 type); 706 return -EINVAL; 707 } 708 } 709 710 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 711 if (is_mask) 712 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); 713 else 714 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET, 715 false); 716 717 if (rem > 0) { 718 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.", 719 rem); 720 return -EINVAL; 721 } 722 723 if (ipv4 && ipv6) { 724 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes"); 725 return -EINVAL; 726 } 727 728 if (!is_mask) { 729 if (!ipv4 && !ipv6) { 730 OVS_NLERR(log, "IP tunnel dst address not specified"); 731 return -EINVAL; 732 } 733 if (ipv4 && !match->key->tun_key.u.ipv4.dst) { 734 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 735 return -EINVAL; 736 } 737 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { 738 OVS_NLERR(log, "IPv6 tunnel dst address is zero"); 739 return -EINVAL; 740 } 741 742 if (!ttl) { 743 OVS_NLERR(log, "IP tunnel TTL not specified."); 744 return -EINVAL; 745 } 746 } 747 748 return opts_type; 749 } 750 751 static int vxlan_opt_to_nlattr(struct sk_buff *skb, 752 const void *tun_opts, int swkey_tun_opts_len) 753 { 754 const struct vxlan_metadata *opts = tun_opts; 755 struct nlattr *nla; 756 757 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); 758 if (!nla) 759 return -EMSGSIZE; 760 761 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) 762 return -EMSGSIZE; 763 764 nla_nest_end(skb, nla); 765 return 0; 766 } 767 768 static int __ip_tun_to_nlattr(struct sk_buff *skb, 769 const struct ip_tunnel_key *output, 770 const void *tun_opts, int swkey_tun_opts_len, 771 unsigned short tun_proto) 772 { 773 if (output->tun_flags & TUNNEL_KEY && 774 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, 775 OVS_TUNNEL_KEY_ATTR_PAD)) 776 return -EMSGSIZE; 777 switch (tun_proto) { 778 case AF_INET: 779 if (output->u.ipv4.src && 780 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, 781 output->u.ipv4.src)) 782 return -EMSGSIZE; 783 if (output->u.ipv4.dst && 784 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, 785 output->u.ipv4.dst)) 786 return -EMSGSIZE; 787 break; 788 case AF_INET6: 789 if (!ipv6_addr_any(&output->u.ipv6.src) && 790 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, 791 &output->u.ipv6.src)) 792 return -EMSGSIZE; 793 if (!ipv6_addr_any(&output->u.ipv6.dst) && 794 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST, 795 &output->u.ipv6.dst)) 796 return -EMSGSIZE; 797 break; 798 } 799 if (output->tos && 800 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) 801 return -EMSGSIZE; 802 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) 803 return -EMSGSIZE; 804 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 805 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 806 return -EMSGSIZE; 807 if ((output->tun_flags & TUNNEL_CSUM) && 808 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 809 return -EMSGSIZE; 810 if (output->tp_src && 811 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) 812 return -EMSGSIZE; 813 if (output->tp_dst && 814 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) 815 return -EMSGSIZE; 816 if ((output->tun_flags & TUNNEL_OAM) && 817 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 818 return -EMSGSIZE; 819 if (swkey_tun_opts_len) { 820 if (output->tun_flags & TUNNEL_GENEVE_OPT && 821 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 822 swkey_tun_opts_len, tun_opts)) 823 return -EMSGSIZE; 824 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 825 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 826 return -EMSGSIZE; 827 } 828 829 return 0; 830 } 831 832 static int ip_tun_to_nlattr(struct sk_buff *skb, 833 const struct ip_tunnel_key *output, 834 const void *tun_opts, int swkey_tun_opts_len, 835 unsigned short tun_proto) 836 { 837 struct nlattr *nla; 838 int err; 839 840 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 841 if (!nla) 842 return -EMSGSIZE; 843 844 err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, 845 tun_proto); 846 if (err) 847 return err; 848 849 nla_nest_end(skb, nla); 850 return 0; 851 } 852 853 int ovs_nla_put_tunnel_info(struct sk_buff *skb, 854 struct ip_tunnel_info *tun_info) 855 { 856 return __ip_tun_to_nlattr(skb, &tun_info->key, 857 ip_tunnel_info_opts(tun_info), 858 tun_info->options_len, 859 ip_tunnel_info_af(tun_info)); 860 } 861 862 static int encode_vlan_from_nlattrs(struct sw_flow_match *match, 863 const struct nlattr *a[], 864 bool is_mask, bool inner) 865 { 866 __be16 tci = 0; 867 __be16 tpid = 0; 868 869 if (a[OVS_KEY_ATTR_VLAN]) 870 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 871 872 if (a[OVS_KEY_ATTR_ETHERTYPE]) 873 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 874 875 if (likely(!inner)) { 876 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); 877 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); 878 } else { 879 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); 880 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); 881 } 882 return 0; 883 } 884 885 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, 886 u64 key_attrs, bool inner, 887 const struct nlattr **a, bool log) 888 { 889 __be16 tci = 0; 890 891 if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 892 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 893 eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { 894 /* Not a VLAN. */ 895 return 0; 896 } 897 898 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 899 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 900 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); 901 return -EINVAL; 902 } 903 904 if (a[OVS_KEY_ATTR_VLAN]) 905 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 906 907 if (!(tci & htons(VLAN_TAG_PRESENT))) { 908 if (tci) { 909 OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", 910 (inner) ? "C-VLAN" : "VLAN"); 911 return -EINVAL; 912 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { 913 /* Corner case for truncated VLAN header. */ 914 OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", 915 (inner) ? "C-VLAN" : "VLAN"); 916 return -EINVAL; 917 } 918 } 919 920 return 1; 921 } 922 923 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, 924 u64 key_attrs, bool inner, 925 const struct nlattr **a, bool log) 926 { 927 __be16 tci = 0; 928 __be16 tpid = 0; 929 bool encap_valid = !!(match->key->eth.vlan.tci & 930 htons(VLAN_TAG_PRESENT)); 931 bool i_encap_valid = !!(match->key->eth.cvlan.tci & 932 htons(VLAN_TAG_PRESENT)); 933 934 if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { 935 /* Not a VLAN. */ 936 return 0; 937 } 938 939 if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { 940 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", 941 (inner) ? "C-VLAN" : "VLAN"); 942 return -EINVAL; 943 } 944 945 if (a[OVS_KEY_ATTR_VLAN]) 946 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 947 948 if (a[OVS_KEY_ATTR_ETHERTYPE]) 949 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 950 951 if (tpid != htons(0xffff)) { 952 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", 953 (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); 954 return -EINVAL; 955 } 956 if (!(tci & htons(VLAN_TAG_PRESENT))) { 957 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", 958 (inner) ? "C-VLAN" : "VLAN"); 959 return -EINVAL; 960 } 961 962 return 1; 963 } 964 965 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, 966 u64 *key_attrs, bool inner, 967 const struct nlattr **a, bool is_mask, 968 bool log) 969 { 970 int err; 971 const struct nlattr *encap; 972 973 if (!is_mask) 974 err = validate_vlan_from_nlattrs(match, *key_attrs, inner, 975 a, log); 976 else 977 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, 978 a, log); 979 if (err <= 0) 980 return err; 981 982 err = encode_vlan_from_nlattrs(match, a, is_mask, inner); 983 if (err) 984 return err; 985 986 *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 987 *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 988 *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 989 990 encap = a[OVS_KEY_ATTR_ENCAP]; 991 992 if (!is_mask) 993 err = parse_flow_nlattrs(encap, a, key_attrs, log); 994 else 995 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); 996 997 return err; 998 } 999 1000 static int parse_vlan_from_nlattrs(struct sw_flow_match *match, 1001 u64 *key_attrs, const struct nlattr **a, 1002 bool is_mask, bool log) 1003 { 1004 int err; 1005 bool encap_valid = false; 1006 1007 err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, 1008 is_mask, log); 1009 if (err) 1010 return err; 1011 1012 encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); 1013 if (encap_valid) { 1014 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, 1015 is_mask, log); 1016 if (err) 1017 return err; 1018 } 1019 1020 return 0; 1021 } 1022 1023 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, 1024 u64 *attrs, const struct nlattr **a, 1025 bool is_mask, bool log) 1026 { 1027 __be16 eth_type; 1028 1029 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1030 if (is_mask) { 1031 /* Always exact match EtherType. */ 1032 eth_type = htons(0xffff); 1033 } else if (!eth_proto_is_802_3(eth_type)) { 1034 OVS_NLERR(log, "EtherType %x is less than min %x", 1035 ntohs(eth_type), ETH_P_802_3_MIN); 1036 return -EINVAL; 1037 } 1038 1039 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 1040 *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1041 return 0; 1042 } 1043 1044 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 1045 u64 *attrs, const struct nlattr **a, 1046 bool is_mask, bool log) 1047 { 1048 u8 mac_proto = MAC_PROTO_ETHERNET; 1049 1050 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 1051 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 1052 1053 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); 1054 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); 1055 } 1056 1057 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { 1058 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); 1059 1060 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); 1061 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); 1062 } 1063 1064 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1065 SW_FLOW_KEY_PUT(match, phy.priority, 1066 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 1067 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1068 } 1069 1070 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1071 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1072 1073 if (is_mask) { 1074 in_port = 0xffffffff; /* Always exact match in_port. */ 1075 } else if (in_port >= DP_MAX_PORTS) { 1076 OVS_NLERR(log, "Port %d exceeds max allowable %d", 1077 in_port, DP_MAX_PORTS); 1078 return -EINVAL; 1079 } 1080 1081 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 1082 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1083 } else if (!is_mask) { 1084 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 1085 } 1086 1087 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1088 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1089 1090 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 1091 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1092 } 1093 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1094 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 1095 is_mask, log) < 0) 1096 return -EINVAL; 1097 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1098 } 1099 1100 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && 1101 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { 1102 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); 1103 1104 if (ct_state & ~CT_SUPPORTED_MASK) { 1105 OVS_NLERR(log, "ct_state flags %08x unsupported", 1106 ct_state); 1107 return -EINVAL; 1108 } 1109 1110 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask); 1111 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); 1112 } 1113 if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && 1114 ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { 1115 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); 1116 1117 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask); 1118 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); 1119 } 1120 if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && 1121 ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) { 1122 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); 1123 1124 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); 1125 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); 1126 } 1127 if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && 1128 ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { 1129 const struct ovs_key_ct_labels *cl; 1130 1131 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); 1132 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, 1133 sizeof(*cl), is_mask); 1134 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); 1135 } 1136 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) { 1137 const struct ovs_key_ct_tuple_ipv4 *ct; 1138 1139 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]); 1140 1141 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask); 1142 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); 1143 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); 1144 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); 1145 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask); 1146 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); 1147 } 1148 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { 1149 const struct ovs_key_ct_tuple_ipv6 *ct; 1150 1151 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]); 1152 1153 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src, 1154 sizeof(match->key->ipv6.ct_orig.src), 1155 is_mask); 1156 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst, 1157 sizeof(match->key->ipv6.ct_orig.dst), 1158 is_mask); 1159 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); 1160 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); 1161 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask); 1162 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); 1163 } 1164 1165 /* For layer 3 packets the Ethernet type is provided 1166 * and treated as metadata but no MAC addresses are provided. 1167 */ 1168 if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && 1169 (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) 1170 mac_proto = MAC_PROTO_NONE; 1171 1172 /* Always exact match mac_proto */ 1173 SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); 1174 1175 if (mac_proto == MAC_PROTO_NONE) 1176 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, 1177 log); 1178 1179 return 0; 1180 } 1181 1182 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, 1183 u64 attrs, const struct nlattr **a, 1184 bool is_mask, bool log) 1185 { 1186 int err; 1187 1188 err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log); 1189 if (err) 1190 return err; 1191 1192 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 1193 const struct ovs_key_ethernet *eth_key; 1194 1195 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1196 SW_FLOW_KEY_MEMCPY(match, eth.src, 1197 eth_key->eth_src, ETH_ALEN, is_mask); 1198 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1199 eth_key->eth_dst, ETH_ALEN, is_mask); 1200 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1201 1202 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1203 /* VLAN attribute is always parsed before getting here since it 1204 * may occur multiple times. 1205 */ 1206 OVS_NLERR(log, "VLAN attribute unexpected."); 1207 return -EINVAL; 1208 } 1209 1210 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1211 err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, 1212 log); 1213 if (err) 1214 return err; 1215 } else if (!is_mask) { 1216 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1217 } 1218 } else if (!match->key->eth.type) { 1219 OVS_NLERR(log, "Either Ethernet header or EtherType is required."); 1220 return -EINVAL; 1221 } 1222 1223 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1224 const struct ovs_key_ipv4 *ipv4_key; 1225 1226 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1227 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 1228 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", 1229 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 1230 return -EINVAL; 1231 } 1232 SW_FLOW_KEY_PUT(match, ip.proto, 1233 ipv4_key->ipv4_proto, is_mask); 1234 SW_FLOW_KEY_PUT(match, ip.tos, 1235 ipv4_key->ipv4_tos, is_mask); 1236 SW_FLOW_KEY_PUT(match, ip.ttl, 1237 ipv4_key->ipv4_ttl, is_mask); 1238 SW_FLOW_KEY_PUT(match, ip.frag, 1239 ipv4_key->ipv4_frag, is_mask); 1240 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1241 ipv4_key->ipv4_src, is_mask); 1242 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1243 ipv4_key->ipv4_dst, is_mask); 1244 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 1245 } 1246 1247 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 1248 const struct ovs_key_ipv6 *ipv6_key; 1249 1250 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1251 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 1252 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", 1253 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 1254 return -EINVAL; 1255 } 1256 1257 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { 1258 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", 1259 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); 1260 return -EINVAL; 1261 } 1262 1263 SW_FLOW_KEY_PUT(match, ipv6.label, 1264 ipv6_key->ipv6_label, is_mask); 1265 SW_FLOW_KEY_PUT(match, ip.proto, 1266 ipv6_key->ipv6_proto, is_mask); 1267 SW_FLOW_KEY_PUT(match, ip.tos, 1268 ipv6_key->ipv6_tclass, is_mask); 1269 SW_FLOW_KEY_PUT(match, ip.ttl, 1270 ipv6_key->ipv6_hlimit, is_mask); 1271 SW_FLOW_KEY_PUT(match, ip.frag, 1272 ipv6_key->ipv6_frag, is_mask); 1273 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 1274 ipv6_key->ipv6_src, 1275 sizeof(match->key->ipv6.addr.src), 1276 is_mask); 1277 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 1278 ipv6_key->ipv6_dst, 1279 sizeof(match->key->ipv6.addr.dst), 1280 is_mask); 1281 1282 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1283 } 1284 1285 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 1286 const struct ovs_key_arp *arp_key; 1287 1288 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1289 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 1290 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", 1291 arp_key->arp_op); 1292 return -EINVAL; 1293 } 1294 1295 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1296 arp_key->arp_sip, is_mask); 1297 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1298 arp_key->arp_tip, is_mask); 1299 SW_FLOW_KEY_PUT(match, ip.proto, 1300 ntohs(arp_key->arp_op), is_mask); 1301 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 1302 arp_key->arp_sha, ETH_ALEN, is_mask); 1303 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 1304 arp_key->arp_tha, ETH_ALEN, is_mask); 1305 1306 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1307 } 1308 1309 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 1310 const struct ovs_key_mpls *mpls_key; 1311 1312 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 1313 SW_FLOW_KEY_PUT(match, mpls.top_lse, 1314 mpls_key->mpls_lse, is_mask); 1315 1316 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 1317 } 1318 1319 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 1320 const struct ovs_key_tcp *tcp_key; 1321 1322 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1323 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 1324 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 1325 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 1326 } 1327 1328 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 1329 SW_FLOW_KEY_PUT(match, tp.flags, 1330 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 1331 is_mask); 1332 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 1333 } 1334 1335 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 1336 const struct ovs_key_udp *udp_key; 1337 1338 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1339 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 1340 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 1341 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1342 } 1343 1344 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 1345 const struct ovs_key_sctp *sctp_key; 1346 1347 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 1348 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 1349 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 1350 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 1351 } 1352 1353 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 1354 const struct ovs_key_icmp *icmp_key; 1355 1356 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1357 SW_FLOW_KEY_PUT(match, tp.src, 1358 htons(icmp_key->icmp_type), is_mask); 1359 SW_FLOW_KEY_PUT(match, tp.dst, 1360 htons(icmp_key->icmp_code), is_mask); 1361 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1362 } 1363 1364 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 1365 const struct ovs_key_icmpv6 *icmpv6_key; 1366 1367 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 1368 SW_FLOW_KEY_PUT(match, tp.src, 1369 htons(icmpv6_key->icmpv6_type), is_mask); 1370 SW_FLOW_KEY_PUT(match, tp.dst, 1371 htons(icmpv6_key->icmpv6_code), is_mask); 1372 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1373 } 1374 1375 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 1376 const struct ovs_key_nd *nd_key; 1377 1378 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1379 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 1380 nd_key->nd_target, 1381 sizeof(match->key->ipv6.nd.target), 1382 is_mask); 1383 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 1384 nd_key->nd_sll, ETH_ALEN, is_mask); 1385 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 1386 nd_key->nd_tll, ETH_ALEN, is_mask); 1387 attrs &= ~(1 << OVS_KEY_ATTR_ND); 1388 } 1389 1390 if (attrs != 0) { 1391 OVS_NLERR(log, "Unknown key attributes %llx", 1392 (unsigned long long)attrs); 1393 return -EINVAL; 1394 } 1395 1396 return 0; 1397 } 1398 1399 static void nlattr_set(struct nlattr *attr, u8 val, 1400 const struct ovs_len_tbl *tbl) 1401 { 1402 struct nlattr *nla; 1403 int rem; 1404 1405 /* The nlattr stream should already have been validated */ 1406 nla_for_each_nested(nla, attr, rem) { 1407 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { 1408 if (tbl[nla_type(nla)].next) 1409 tbl = tbl[nla_type(nla)].next; 1410 nlattr_set(nla, val, tbl); 1411 } else { 1412 memset(nla_data(nla), val, nla_len(nla)); 1413 } 1414 1415 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) 1416 *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; 1417 } 1418 } 1419 1420 static void mask_set_nlattr(struct nlattr *attr, u8 val) 1421 { 1422 nlattr_set(attr, val, ovs_key_lens); 1423 } 1424 1425 /** 1426 * ovs_nla_get_match - parses Netlink attributes into a flow key and 1427 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1428 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1429 * does not include any don't care bit. 1430 * @net: Used to determine per-namespace field support. 1431 * @match: receives the extracted flow match information. 1432 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1433 * sequence. The fields should of the packet that triggered the creation 1434 * of this flow. 1435 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1436 * attribute specifies the mask field of the wildcarded flow. 1437 * @log: Boolean to allow kernel error logging. Normally true, but when 1438 * probing for feature compatibility this should be passed in as false to 1439 * suppress unnecessary error logging. 1440 */ 1441 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, 1442 const struct nlattr *nla_key, 1443 const struct nlattr *nla_mask, 1444 bool log) 1445 { 1446 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1447 struct nlattr *newmask = NULL; 1448 u64 key_attrs = 0; 1449 u64 mask_attrs = 0; 1450 int err; 1451 1452 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); 1453 if (err) 1454 return err; 1455 1456 err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); 1457 if (err) 1458 return err; 1459 1460 err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); 1461 if (err) 1462 return err; 1463 1464 if (match->mask) { 1465 if (!nla_mask) { 1466 /* Create an exact match mask. We need to set to 0xff 1467 * all the 'match->mask' fields that have been touched 1468 * in 'match->key'. We cannot simply memset 1469 * 'match->mask', because padding bytes and fields not 1470 * specified in 'match->key' should be left to 0. 1471 * Instead, we use a stream of netlink attributes, 1472 * copied from 'key' and set to 0xff. 1473 * ovs_key_from_nlattrs() will take care of filling 1474 * 'match->mask' appropriately. 1475 */ 1476 newmask = kmemdup(nla_key, 1477 nla_total_size(nla_len(nla_key)), 1478 GFP_KERNEL); 1479 if (!newmask) 1480 return -ENOMEM; 1481 1482 mask_set_nlattr(newmask, 0xff); 1483 1484 /* The userspace does not send tunnel attributes that 1485 * are 0, but we should not wildcard them nonetheless. 1486 */ 1487 if (match->key->tun_proto) 1488 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1489 0xff, true); 1490 1491 nla_mask = newmask; 1492 } 1493 1494 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); 1495 if (err) 1496 goto free_newmask; 1497 1498 /* Always match on tci. */ 1499 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); 1500 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); 1501 1502 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); 1503 if (err) 1504 goto free_newmask; 1505 1506 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, 1507 log); 1508 if (err) 1509 goto free_newmask; 1510 } 1511 1512 if (!match_validate(match, key_attrs, mask_attrs, log)) 1513 err = -EINVAL; 1514 1515 free_newmask: 1516 kfree(newmask); 1517 return err; 1518 } 1519 1520 static size_t get_ufid_len(const struct nlattr *attr, bool log) 1521 { 1522 size_t len; 1523 1524 if (!attr) 1525 return 0; 1526 1527 len = nla_len(attr); 1528 if (len < 1 || len > MAX_UFID_LENGTH) { 1529 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", 1530 nla_len(attr), MAX_UFID_LENGTH); 1531 return 0; 1532 } 1533 1534 return len; 1535 } 1536 1537 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, 1538 * or false otherwise. 1539 */ 1540 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, 1541 bool log) 1542 { 1543 sfid->ufid_len = get_ufid_len(attr, log); 1544 if (sfid->ufid_len) 1545 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); 1546 1547 return sfid->ufid_len; 1548 } 1549 1550 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 1551 const struct sw_flow_key *key, bool log) 1552 { 1553 struct sw_flow_key *new_key; 1554 1555 if (ovs_nla_get_ufid(sfid, ufid, log)) 1556 return 0; 1557 1558 /* If UFID was not provided, use unmasked key. */ 1559 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); 1560 if (!new_key) 1561 return -ENOMEM; 1562 memcpy(new_key, key, sizeof(*key)); 1563 sfid->unmasked_key = new_key; 1564 1565 return 0; 1566 } 1567 1568 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) 1569 { 1570 return attr ? nla_get_u32(attr) : 0; 1571 } 1572 1573 /** 1574 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 1575 * @net: Network namespace. 1576 * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack 1577 * metadata. 1578 * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink 1579 * attributes. 1580 * @attrs: Bit mask for the netlink attributes included in @a. 1581 * @log: Boolean to allow kernel error logging. Normally true, but when 1582 * probing for feature compatibility this should be passed in as false to 1583 * suppress unnecessary error logging. 1584 * 1585 * This parses a series of Netlink attributes that form a flow key, which must 1586 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1587 * get the metadata, that is, the parts of the flow key that cannot be 1588 * extracted from the packet itself. 1589 * 1590 * This must be called before the packet key fields are filled in 'key'. 1591 */ 1592 1593 int ovs_nla_get_flow_metadata(struct net *net, 1594 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], 1595 u64 attrs, struct sw_flow_key *key, bool log) 1596 { 1597 struct sw_flow_match match; 1598 1599 memset(&match, 0, sizeof(match)); 1600 match.key = key; 1601 1602 key->ct_state = 0; 1603 key->ct_zone = 0; 1604 key->ct_orig_proto = 0; 1605 memset(&key->ct, 0, sizeof(key->ct)); 1606 memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); 1607 memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig)); 1608 1609 key->phy.in_port = DP_MAX_PORTS; 1610 1611 return metadata_from_nlattrs(net, &match, &attrs, a, false, log); 1612 } 1613 1614 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, 1615 bool is_mask) 1616 { 1617 __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); 1618 1619 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1620 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) 1621 return -EMSGSIZE; 1622 return 0; 1623 } 1624 1625 static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1626 const struct sw_flow_key *output, bool is_mask, 1627 struct sk_buff *skb) 1628 { 1629 struct ovs_key_ethernet *eth_key; 1630 struct nlattr *nla; 1631 struct nlattr *encap = NULL; 1632 struct nlattr *in_encap = NULL; 1633 1634 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 1635 goto nla_put_failure; 1636 1637 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 1638 goto nla_put_failure; 1639 1640 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1641 goto nla_put_failure; 1642 1643 if ((swkey->tun_proto || is_mask)) { 1644 const void *opts = NULL; 1645 1646 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1647 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1648 1649 if (ip_tun_to_nlattr(skb, &output->tun_key, opts, 1650 swkey->tun_opts_len, swkey->tun_proto)) 1651 goto nla_put_failure; 1652 } 1653 1654 if (swkey->phy.in_port == DP_MAX_PORTS) { 1655 if (is_mask && (output->phy.in_port == 0xffff)) 1656 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1657 goto nla_put_failure; 1658 } else { 1659 u16 upper_u16; 1660 upper_u16 = !is_mask ? 0 : 0xffff; 1661 1662 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1663 (upper_u16 << 16) | output->phy.in_port)) 1664 goto nla_put_failure; 1665 } 1666 1667 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1668 goto nla_put_failure; 1669 1670 if (ovs_ct_put_key(swkey, output, skb)) 1671 goto nla_put_failure; 1672 1673 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { 1674 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1675 if (!nla) 1676 goto nla_put_failure; 1677 1678 eth_key = nla_data(nla); 1679 ether_addr_copy(eth_key->eth_src, output->eth.src); 1680 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 1681 1682 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { 1683 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) 1684 goto nla_put_failure; 1685 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1686 if (!swkey->eth.vlan.tci) 1687 goto unencap; 1688 1689 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { 1690 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) 1691 goto nla_put_failure; 1692 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1693 if (!swkey->eth.cvlan.tci) 1694 goto unencap; 1695 } 1696 } 1697 1698 if (swkey->eth.type == htons(ETH_P_802_2)) { 1699 /* 1700 * Ethertype 802.2 is represented in the netlink with omitted 1701 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1702 * 0xffff in the mask attribute. Ethertype can also 1703 * be wildcarded. 1704 */ 1705 if (is_mask && output->eth.type) 1706 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1707 output->eth.type)) 1708 goto nla_put_failure; 1709 goto unencap; 1710 } 1711 } 1712 1713 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1714 goto nla_put_failure; 1715 1716 if (eth_type_vlan(swkey->eth.type)) { 1717 /* There are 3 VLAN tags, we don't know anything about the rest 1718 * of the packet, so truncate here. 1719 */ 1720 WARN_ON_ONCE(!(encap && in_encap)); 1721 goto unencap; 1722 } 1723 1724 if (swkey->eth.type == htons(ETH_P_IP)) { 1725 struct ovs_key_ipv4 *ipv4_key; 1726 1727 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1728 if (!nla) 1729 goto nla_put_failure; 1730 ipv4_key = nla_data(nla); 1731 ipv4_key->ipv4_src = output->ipv4.addr.src; 1732 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1733 ipv4_key->ipv4_proto = output->ip.proto; 1734 ipv4_key->ipv4_tos = output->ip.tos; 1735 ipv4_key->ipv4_ttl = output->ip.ttl; 1736 ipv4_key->ipv4_frag = output->ip.frag; 1737 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1738 struct ovs_key_ipv6 *ipv6_key; 1739 1740 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1741 if (!nla) 1742 goto nla_put_failure; 1743 ipv6_key = nla_data(nla); 1744 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1745 sizeof(ipv6_key->ipv6_src)); 1746 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1747 sizeof(ipv6_key->ipv6_dst)); 1748 ipv6_key->ipv6_label = output->ipv6.label; 1749 ipv6_key->ipv6_proto = output->ip.proto; 1750 ipv6_key->ipv6_tclass = output->ip.tos; 1751 ipv6_key->ipv6_hlimit = output->ip.ttl; 1752 ipv6_key->ipv6_frag = output->ip.frag; 1753 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1754 swkey->eth.type == htons(ETH_P_RARP)) { 1755 struct ovs_key_arp *arp_key; 1756 1757 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1758 if (!nla) 1759 goto nla_put_failure; 1760 arp_key = nla_data(nla); 1761 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1762 arp_key->arp_sip = output->ipv4.addr.src; 1763 arp_key->arp_tip = output->ipv4.addr.dst; 1764 arp_key->arp_op = htons(output->ip.proto); 1765 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1766 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1767 } else if (eth_p_mpls(swkey->eth.type)) { 1768 struct ovs_key_mpls *mpls_key; 1769 1770 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 1771 if (!nla) 1772 goto nla_put_failure; 1773 mpls_key = nla_data(nla); 1774 mpls_key->mpls_lse = output->mpls.top_lse; 1775 } 1776 1777 if ((swkey->eth.type == htons(ETH_P_IP) || 1778 swkey->eth.type == htons(ETH_P_IPV6)) && 1779 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1780 1781 if (swkey->ip.proto == IPPROTO_TCP) { 1782 struct ovs_key_tcp *tcp_key; 1783 1784 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1785 if (!nla) 1786 goto nla_put_failure; 1787 tcp_key = nla_data(nla); 1788 tcp_key->tcp_src = output->tp.src; 1789 tcp_key->tcp_dst = output->tp.dst; 1790 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1791 output->tp.flags)) 1792 goto nla_put_failure; 1793 } else if (swkey->ip.proto == IPPROTO_UDP) { 1794 struct ovs_key_udp *udp_key; 1795 1796 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1797 if (!nla) 1798 goto nla_put_failure; 1799 udp_key = nla_data(nla); 1800 udp_key->udp_src = output->tp.src; 1801 udp_key->udp_dst = output->tp.dst; 1802 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1803 struct ovs_key_sctp *sctp_key; 1804 1805 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1806 if (!nla) 1807 goto nla_put_failure; 1808 sctp_key = nla_data(nla); 1809 sctp_key->sctp_src = output->tp.src; 1810 sctp_key->sctp_dst = output->tp.dst; 1811 } else if (swkey->eth.type == htons(ETH_P_IP) && 1812 swkey->ip.proto == IPPROTO_ICMP) { 1813 struct ovs_key_icmp *icmp_key; 1814 1815 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1816 if (!nla) 1817 goto nla_put_failure; 1818 icmp_key = nla_data(nla); 1819 icmp_key->icmp_type = ntohs(output->tp.src); 1820 icmp_key->icmp_code = ntohs(output->tp.dst); 1821 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1822 swkey->ip.proto == IPPROTO_ICMPV6) { 1823 struct ovs_key_icmpv6 *icmpv6_key; 1824 1825 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1826 sizeof(*icmpv6_key)); 1827 if (!nla) 1828 goto nla_put_failure; 1829 icmpv6_key = nla_data(nla); 1830 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1831 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1832 1833 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1834 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1835 struct ovs_key_nd *nd_key; 1836 1837 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1838 if (!nla) 1839 goto nla_put_failure; 1840 nd_key = nla_data(nla); 1841 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1842 sizeof(nd_key->nd_target)); 1843 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1844 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1845 } 1846 } 1847 } 1848 1849 unencap: 1850 if (in_encap) 1851 nla_nest_end(skb, in_encap); 1852 if (encap) 1853 nla_nest_end(skb, encap); 1854 1855 return 0; 1856 1857 nla_put_failure: 1858 return -EMSGSIZE; 1859 } 1860 1861 int ovs_nla_put_key(const struct sw_flow_key *swkey, 1862 const struct sw_flow_key *output, int attr, bool is_mask, 1863 struct sk_buff *skb) 1864 { 1865 int err; 1866 struct nlattr *nla; 1867 1868 nla = nla_nest_start(skb, attr); 1869 if (!nla) 1870 return -EMSGSIZE; 1871 err = __ovs_nla_put_key(swkey, output, is_mask, skb); 1872 if (err) 1873 return err; 1874 nla_nest_end(skb, nla); 1875 1876 return 0; 1877 } 1878 1879 /* Called with ovs_mutex or RCU read lock. */ 1880 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) 1881 { 1882 if (ovs_identifier_is_ufid(&flow->id)) 1883 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, 1884 flow->id.ufid); 1885 1886 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, 1887 OVS_FLOW_ATTR_KEY, false, skb); 1888 } 1889 1890 /* Called with ovs_mutex or RCU read lock. */ 1891 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) 1892 { 1893 return ovs_nla_put_key(&flow->key, &flow->key, 1894 OVS_FLOW_ATTR_KEY, false, skb); 1895 } 1896 1897 /* Called with ovs_mutex or RCU read lock. */ 1898 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) 1899 { 1900 return ovs_nla_put_key(&flow->key, &flow->mask->key, 1901 OVS_FLOW_ATTR_MASK, true, skb); 1902 } 1903 1904 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1905 1906 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 1907 { 1908 struct sw_flow_actions *sfa; 1909 1910 if (size > MAX_ACTIONS_BUFSIZE) { 1911 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); 1912 return ERR_PTR(-EINVAL); 1913 } 1914 1915 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1916 if (!sfa) 1917 return ERR_PTR(-ENOMEM); 1918 1919 sfa->actions_len = 0; 1920 return sfa; 1921 } 1922 1923 static void ovs_nla_free_set_action(const struct nlattr *a) 1924 { 1925 const struct nlattr *ovs_key = nla_data(a); 1926 struct ovs_tunnel_info *ovs_tun; 1927 1928 switch (nla_type(ovs_key)) { 1929 case OVS_KEY_ATTR_TUNNEL_INFO: 1930 ovs_tun = nla_data(ovs_key); 1931 dst_release((struct dst_entry *)ovs_tun->tun_dst); 1932 break; 1933 } 1934 } 1935 1936 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1937 { 1938 const struct nlattr *a; 1939 int rem; 1940 1941 if (!sf_acts) 1942 return; 1943 1944 nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { 1945 switch (nla_type(a)) { 1946 case OVS_ACTION_ATTR_SET: 1947 ovs_nla_free_set_action(a); 1948 break; 1949 case OVS_ACTION_ATTR_CT: 1950 ovs_ct_free_action(a); 1951 break; 1952 } 1953 } 1954 1955 kfree(sf_acts); 1956 } 1957 1958 static void __ovs_nla_free_flow_actions(struct rcu_head *head) 1959 { 1960 ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu)); 1961 } 1962 1963 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1964 * The caller must hold rcu_read_lock for this to be sensible. */ 1965 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts) 1966 { 1967 call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions); 1968 } 1969 1970 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1971 int attr_len, bool log) 1972 { 1973 1974 struct sw_flow_actions *acts; 1975 int new_acts_size; 1976 int req_size = NLA_ALIGN(attr_len); 1977 int next_offset = offsetof(struct sw_flow_actions, actions) + 1978 (*sfa)->actions_len; 1979 1980 if (req_size <= (ksize(*sfa) - next_offset)) 1981 goto out; 1982 1983 new_acts_size = ksize(*sfa) * 2; 1984 1985 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1986 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1987 return ERR_PTR(-EMSGSIZE); 1988 new_acts_size = MAX_ACTIONS_BUFSIZE; 1989 } 1990 1991 acts = nla_alloc_flow_actions(new_acts_size, log); 1992 if (IS_ERR(acts)) 1993 return (void *)acts; 1994 1995 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1996 acts->actions_len = (*sfa)->actions_len; 1997 acts->orig_len = (*sfa)->orig_len; 1998 kfree(*sfa); 1999 *sfa = acts; 2000 2001 out: 2002 (*sfa)->actions_len += req_size; 2003 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 2004 } 2005 2006 static struct nlattr *__add_action(struct sw_flow_actions **sfa, 2007 int attrtype, void *data, int len, bool log) 2008 { 2009 struct nlattr *a; 2010 2011 a = reserve_sfa_size(sfa, nla_attr_size(len), log); 2012 if (IS_ERR(a)) 2013 return a; 2014 2015 a->nla_type = attrtype; 2016 a->nla_len = nla_attr_size(len); 2017 2018 if (data) 2019 memcpy(nla_data(a), data, len); 2020 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 2021 2022 return a; 2023 } 2024 2025 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data, 2026 int len, bool log) 2027 { 2028 struct nlattr *a; 2029 2030 a = __add_action(sfa, attrtype, data, len, log); 2031 2032 return PTR_ERR_OR_ZERO(a); 2033 } 2034 2035 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 2036 int attrtype, bool log) 2037 { 2038 int used = (*sfa)->actions_len; 2039 int err; 2040 2041 err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log); 2042 if (err) 2043 return err; 2044 2045 return used; 2046 } 2047 2048 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 2049 int st_offset) 2050 { 2051 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 2052 st_offset); 2053 2054 a->nla_len = sfa->actions_len - st_offset; 2055 } 2056 2057 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2058 const struct sw_flow_key *key, 2059 struct sw_flow_actions **sfa, 2060 __be16 eth_type, __be16 vlan_tci, bool log); 2061 2062 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, 2063 const struct sw_flow_key *key, 2064 struct sw_flow_actions **sfa, 2065 __be16 eth_type, __be16 vlan_tci, 2066 bool log, bool last) 2067 { 2068 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 2069 const struct nlattr *probability, *actions; 2070 const struct nlattr *a; 2071 int rem, start, err; 2072 struct sample_arg arg; 2073 2074 memset(attrs, 0, sizeof(attrs)); 2075 nla_for_each_nested(a, attr, rem) { 2076 int type = nla_type(a); 2077 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 2078 return -EINVAL; 2079 attrs[type] = a; 2080 } 2081 if (rem) 2082 return -EINVAL; 2083 2084 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 2085 if (!probability || nla_len(probability) != sizeof(u32)) 2086 return -EINVAL; 2087 2088 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 2089 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 2090 return -EINVAL; 2091 2092 /* validation done, copy sample action. */ 2093 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 2094 if (start < 0) 2095 return start; 2096 2097 /* When both skb and flow may be changed, put the sample 2098 * into a deferred fifo. On the other hand, if only skb 2099 * may be modified, the actions can be executed in place. 2100 * 2101 * Do this analysis at the flow installation time. 2102 * Set 'clone_action->exec' to true if the actions can be 2103 * executed without being deferred. 2104 * 2105 * If the sample is the last action, it can always be excuted 2106 * rather than deferred. 2107 */ 2108 arg.exec = last || !actions_may_change_flow(actions); 2109 arg.probability = nla_get_u32(probability); 2110 2111 err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg), 2112 log); 2113 if (err) 2114 return err; 2115 2116 err = __ovs_nla_copy_actions(net, actions, key, sfa, 2117 eth_type, vlan_tci, log); 2118 2119 if (err) 2120 return err; 2121 2122 add_nested_action_end(*sfa, start); 2123 2124 return 0; 2125 } 2126 2127 void ovs_match_init(struct sw_flow_match *match, 2128 struct sw_flow_key *key, 2129 bool reset_key, 2130 struct sw_flow_mask *mask) 2131 { 2132 memset(match, 0, sizeof(*match)); 2133 match->key = key; 2134 match->mask = mask; 2135 2136 if (reset_key) 2137 memset(key, 0, sizeof(*key)); 2138 2139 if (mask) { 2140 memset(&mask->key, 0, sizeof(mask->key)); 2141 mask->range.start = mask->range.end = 0; 2142 } 2143 } 2144 2145 static int validate_geneve_opts(struct sw_flow_key *key) 2146 { 2147 struct geneve_opt *option; 2148 int opts_len = key->tun_opts_len; 2149 bool crit_opt = false; 2150 2151 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); 2152 while (opts_len > 0) { 2153 int len; 2154 2155 if (opts_len < sizeof(*option)) 2156 return -EINVAL; 2157 2158 len = sizeof(*option) + option->length * 4; 2159 if (len > opts_len) 2160 return -EINVAL; 2161 2162 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 2163 2164 option = (struct geneve_opt *)((u8 *)option + len); 2165 opts_len -= len; 2166 }; 2167 2168 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 2169 2170 return 0; 2171 } 2172 2173 static int validate_and_copy_set_tun(const struct nlattr *attr, 2174 struct sw_flow_actions **sfa, bool log) 2175 { 2176 struct sw_flow_match match; 2177 struct sw_flow_key key; 2178 struct metadata_dst *tun_dst; 2179 struct ip_tunnel_info *tun_info; 2180 struct ovs_tunnel_info *ovs_tun; 2181 struct nlattr *a; 2182 int err = 0, start, opts_type; 2183 2184 ovs_match_init(&match, &key, true, NULL); 2185 opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); 2186 if (opts_type < 0) 2187 return opts_type; 2188 2189 if (key.tun_opts_len) { 2190 switch (opts_type) { 2191 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 2192 err = validate_geneve_opts(&key); 2193 if (err < 0) 2194 return err; 2195 break; 2196 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2197 break; 2198 } 2199 }; 2200 2201 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 2202 if (start < 0) 2203 return start; 2204 2205 tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL); 2206 if (!tun_dst) 2207 return -ENOMEM; 2208 2209 err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); 2210 if (err) { 2211 dst_release((struct dst_entry *)tun_dst); 2212 return err; 2213 } 2214 2215 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 2216 sizeof(*ovs_tun), log); 2217 if (IS_ERR(a)) { 2218 dst_release((struct dst_entry *)tun_dst); 2219 return PTR_ERR(a); 2220 } 2221 2222 ovs_tun = nla_data(a); 2223 ovs_tun->tun_dst = tun_dst; 2224 2225 tun_info = &tun_dst->u.tun_info; 2226 tun_info->mode = IP_TUNNEL_INFO_TX; 2227 if (key.tun_proto == AF_INET6) 2228 tun_info->mode |= IP_TUNNEL_INFO_IPV6; 2229 tun_info->key = key.tun_key; 2230 2231 /* We need to store the options in the action itself since 2232 * everything else will go away after flow setup. We can append 2233 * it to tun_info and then point there. 2234 */ 2235 ip_tunnel_info_opts_set(tun_info, 2236 TUN_METADATA_OPTS(&key, key.tun_opts_len), 2237 key.tun_opts_len); 2238 add_nested_action_end(*sfa, start); 2239 2240 return err; 2241 } 2242 2243 /* Return false if there are any non-masked bits set. 2244 * Mask follows data immediately, before any netlink padding. 2245 */ 2246 static bool validate_masked(u8 *data, int len) 2247 { 2248 u8 *mask = data + len; 2249 2250 while (len--) 2251 if (*data++ & ~*mask++) 2252 return false; 2253 2254 return true; 2255 } 2256 2257 static int validate_set(const struct nlattr *a, 2258 const struct sw_flow_key *flow_key, 2259 struct sw_flow_actions **sfa, bool *skip_copy, 2260 u8 mac_proto, __be16 eth_type, bool masked, bool log) 2261 { 2262 const struct nlattr *ovs_key = nla_data(a); 2263 int key_type = nla_type(ovs_key); 2264 size_t key_len; 2265 2266 /* There can be only one key in a action */ 2267 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 2268 return -EINVAL; 2269 2270 key_len = nla_len(ovs_key); 2271 if (masked) 2272 key_len /= 2; 2273 2274 if (key_type > OVS_KEY_ATTR_MAX || 2275 !check_attr_len(key_len, ovs_key_lens[key_type].len)) 2276 return -EINVAL; 2277 2278 if (masked && !validate_masked(nla_data(ovs_key), key_len)) 2279 return -EINVAL; 2280 2281 switch (key_type) { 2282 const struct ovs_key_ipv4 *ipv4_key; 2283 const struct ovs_key_ipv6 *ipv6_key; 2284 int err; 2285 2286 case OVS_KEY_ATTR_PRIORITY: 2287 case OVS_KEY_ATTR_SKB_MARK: 2288 case OVS_KEY_ATTR_CT_MARK: 2289 case OVS_KEY_ATTR_CT_LABELS: 2290 break; 2291 2292 case OVS_KEY_ATTR_ETHERNET: 2293 if (mac_proto != MAC_PROTO_ETHERNET) 2294 return -EINVAL; 2295 break; 2296 2297 case OVS_KEY_ATTR_TUNNEL: 2298 if (masked) 2299 return -EINVAL; /* Masked tunnel set not supported. */ 2300 2301 *skip_copy = true; 2302 err = validate_and_copy_set_tun(a, sfa, log); 2303 if (err) 2304 return err; 2305 break; 2306 2307 case OVS_KEY_ATTR_IPV4: 2308 if (eth_type != htons(ETH_P_IP)) 2309 return -EINVAL; 2310 2311 ipv4_key = nla_data(ovs_key); 2312 2313 if (masked) { 2314 const struct ovs_key_ipv4 *mask = ipv4_key + 1; 2315 2316 /* Non-writeable fields. */ 2317 if (mask->ipv4_proto || mask->ipv4_frag) 2318 return -EINVAL; 2319 } else { 2320 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 2321 return -EINVAL; 2322 2323 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 2324 return -EINVAL; 2325 } 2326 break; 2327 2328 case OVS_KEY_ATTR_IPV6: 2329 if (eth_type != htons(ETH_P_IPV6)) 2330 return -EINVAL; 2331 2332 ipv6_key = nla_data(ovs_key); 2333 2334 if (masked) { 2335 const struct ovs_key_ipv6 *mask = ipv6_key + 1; 2336 2337 /* Non-writeable fields. */ 2338 if (mask->ipv6_proto || mask->ipv6_frag) 2339 return -EINVAL; 2340 2341 /* Invalid bits in the flow label mask? */ 2342 if (ntohl(mask->ipv6_label) & 0xFFF00000) 2343 return -EINVAL; 2344 } else { 2345 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 2346 return -EINVAL; 2347 2348 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 2349 return -EINVAL; 2350 } 2351 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 2352 return -EINVAL; 2353 2354 break; 2355 2356 case OVS_KEY_ATTR_TCP: 2357 if ((eth_type != htons(ETH_P_IP) && 2358 eth_type != htons(ETH_P_IPV6)) || 2359 flow_key->ip.proto != IPPROTO_TCP) 2360 return -EINVAL; 2361 2362 break; 2363 2364 case OVS_KEY_ATTR_UDP: 2365 if ((eth_type != htons(ETH_P_IP) && 2366 eth_type != htons(ETH_P_IPV6)) || 2367 flow_key->ip.proto != IPPROTO_UDP) 2368 return -EINVAL; 2369 2370 break; 2371 2372 case OVS_KEY_ATTR_MPLS: 2373 if (!eth_p_mpls(eth_type)) 2374 return -EINVAL; 2375 break; 2376 2377 case OVS_KEY_ATTR_SCTP: 2378 if ((eth_type != htons(ETH_P_IP) && 2379 eth_type != htons(ETH_P_IPV6)) || 2380 flow_key->ip.proto != IPPROTO_SCTP) 2381 return -EINVAL; 2382 2383 break; 2384 2385 default: 2386 return -EINVAL; 2387 } 2388 2389 /* Convert non-masked non-tunnel set actions to masked set actions. */ 2390 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { 2391 int start, len = key_len * 2; 2392 struct nlattr *at; 2393 2394 *skip_copy = true; 2395 2396 start = add_nested_action_start(sfa, 2397 OVS_ACTION_ATTR_SET_TO_MASKED, 2398 log); 2399 if (start < 0) 2400 return start; 2401 2402 at = __add_action(sfa, key_type, NULL, len, log); 2403 if (IS_ERR(at)) 2404 return PTR_ERR(at); 2405 2406 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ 2407 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ 2408 /* Clear non-writeable bits from otherwise writeable fields. */ 2409 if (key_type == OVS_KEY_ATTR_IPV6) { 2410 struct ovs_key_ipv6 *mask = nla_data(at) + key_len; 2411 2412 mask->ipv6_label &= htonl(0x000FFFFF); 2413 } 2414 add_nested_action_end(*sfa, start); 2415 } 2416 2417 return 0; 2418 } 2419 2420 static int validate_userspace(const struct nlattr *attr) 2421 { 2422 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 2423 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 2424 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 2425 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, 2426 }; 2427 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 2428 int error; 2429 2430 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 2431 attr, userspace_policy); 2432 if (error) 2433 return error; 2434 2435 if (!a[OVS_USERSPACE_ATTR_PID] || 2436 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 2437 return -EINVAL; 2438 2439 return 0; 2440 } 2441 2442 static int copy_action(const struct nlattr *from, 2443 struct sw_flow_actions **sfa, bool log) 2444 { 2445 int totlen = NLA_ALIGN(from->nla_len); 2446 struct nlattr *to; 2447 2448 to = reserve_sfa_size(sfa, from->nla_len, log); 2449 if (IS_ERR(to)) 2450 return PTR_ERR(to); 2451 2452 memcpy(to, from, totlen); 2453 return 0; 2454 } 2455 2456 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2457 const struct sw_flow_key *key, 2458 struct sw_flow_actions **sfa, 2459 __be16 eth_type, __be16 vlan_tci, bool log) 2460 { 2461 u8 mac_proto = ovs_key_mac_proto(key); 2462 const struct nlattr *a; 2463 int rem, err; 2464 2465 nla_for_each_nested(a, attr, rem) { 2466 /* Expected argument lengths, (u32)-1 for variable length. */ 2467 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 2468 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 2469 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 2470 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 2471 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), 2472 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), 2473 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2474 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2475 [OVS_ACTION_ATTR_SET] = (u32)-1, 2476 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2477 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2478 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2479 [OVS_ACTION_ATTR_CT] = (u32)-1, 2480 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2481 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), 2482 [OVS_ACTION_ATTR_POP_ETH] = 0, 2483 }; 2484 const struct ovs_action_push_vlan *vlan; 2485 int type = nla_type(a); 2486 bool skip_copy; 2487 2488 if (type > OVS_ACTION_ATTR_MAX || 2489 (action_lens[type] != nla_len(a) && 2490 action_lens[type] != (u32)-1)) 2491 return -EINVAL; 2492 2493 skip_copy = false; 2494 switch (type) { 2495 case OVS_ACTION_ATTR_UNSPEC: 2496 return -EINVAL; 2497 2498 case OVS_ACTION_ATTR_USERSPACE: 2499 err = validate_userspace(a); 2500 if (err) 2501 return err; 2502 break; 2503 2504 case OVS_ACTION_ATTR_OUTPUT: 2505 if (nla_get_u32(a) >= DP_MAX_PORTS) 2506 return -EINVAL; 2507 break; 2508 2509 case OVS_ACTION_ATTR_TRUNC: { 2510 const struct ovs_action_trunc *trunc = nla_data(a); 2511 2512 if (trunc->max_len < ETH_HLEN) 2513 return -EINVAL; 2514 break; 2515 } 2516 2517 case OVS_ACTION_ATTR_HASH: { 2518 const struct ovs_action_hash *act_hash = nla_data(a); 2519 2520 switch (act_hash->hash_alg) { 2521 case OVS_HASH_ALG_L4: 2522 break; 2523 default: 2524 return -EINVAL; 2525 } 2526 2527 break; 2528 } 2529 2530 case OVS_ACTION_ATTR_POP_VLAN: 2531 if (mac_proto != MAC_PROTO_ETHERNET) 2532 return -EINVAL; 2533 vlan_tci = htons(0); 2534 break; 2535 2536 case OVS_ACTION_ATTR_PUSH_VLAN: 2537 if (mac_proto != MAC_PROTO_ETHERNET) 2538 return -EINVAL; 2539 vlan = nla_data(a); 2540 if (!eth_type_vlan(vlan->vlan_tpid)) 2541 return -EINVAL; 2542 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 2543 return -EINVAL; 2544 vlan_tci = vlan->vlan_tci; 2545 break; 2546 2547 case OVS_ACTION_ATTR_RECIRC: 2548 break; 2549 2550 case OVS_ACTION_ATTR_PUSH_MPLS: { 2551 const struct ovs_action_push_mpls *mpls = nla_data(a); 2552 2553 if (!eth_p_mpls(mpls->mpls_ethertype)) 2554 return -EINVAL; 2555 /* Prohibit push MPLS other than to a white list 2556 * for packets that have a known tag order. 2557 */ 2558 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2559 (eth_type != htons(ETH_P_IP) && 2560 eth_type != htons(ETH_P_IPV6) && 2561 eth_type != htons(ETH_P_ARP) && 2562 eth_type != htons(ETH_P_RARP) && 2563 !eth_p_mpls(eth_type))) 2564 return -EINVAL; 2565 eth_type = mpls->mpls_ethertype; 2566 break; 2567 } 2568 2569 case OVS_ACTION_ATTR_POP_MPLS: 2570 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2571 !eth_p_mpls(eth_type)) 2572 return -EINVAL; 2573 2574 /* Disallow subsequent L2.5+ set and mpls_pop actions 2575 * as there is no check here to ensure that the new 2576 * eth_type is valid and thus set actions could 2577 * write off the end of the packet or otherwise 2578 * corrupt it. 2579 * 2580 * Support for these actions is planned using packet 2581 * recirculation. 2582 */ 2583 eth_type = htons(0); 2584 break; 2585 2586 case OVS_ACTION_ATTR_SET: 2587 err = validate_set(a, key, sfa, 2588 &skip_copy, mac_proto, eth_type, 2589 false, log); 2590 if (err) 2591 return err; 2592 break; 2593 2594 case OVS_ACTION_ATTR_SET_MASKED: 2595 err = validate_set(a, key, sfa, 2596 &skip_copy, mac_proto, eth_type, 2597 true, log); 2598 if (err) 2599 return err; 2600 break; 2601 2602 case OVS_ACTION_ATTR_SAMPLE: { 2603 bool last = nla_is_last(a, rem); 2604 2605 err = validate_and_copy_sample(net, a, key, sfa, 2606 eth_type, vlan_tci, 2607 log, last); 2608 if (err) 2609 return err; 2610 skip_copy = true; 2611 break; 2612 } 2613 2614 case OVS_ACTION_ATTR_CT: 2615 err = ovs_ct_copy_action(net, a, key, sfa, log); 2616 if (err) 2617 return err; 2618 skip_copy = true; 2619 break; 2620 2621 case OVS_ACTION_ATTR_PUSH_ETH: 2622 /* Disallow pushing an Ethernet header if one 2623 * is already present */ 2624 if (mac_proto != MAC_PROTO_NONE) 2625 return -EINVAL; 2626 mac_proto = MAC_PROTO_NONE; 2627 break; 2628 2629 case OVS_ACTION_ATTR_POP_ETH: 2630 if (mac_proto != MAC_PROTO_ETHERNET) 2631 return -EINVAL; 2632 if (vlan_tci & htons(VLAN_TAG_PRESENT)) 2633 return -EINVAL; 2634 mac_proto = MAC_PROTO_ETHERNET; 2635 break; 2636 2637 default: 2638 OVS_NLERR(log, "Unknown Action type %d", type); 2639 return -EINVAL; 2640 } 2641 if (!skip_copy) { 2642 err = copy_action(a, sfa, log); 2643 if (err) 2644 return err; 2645 } 2646 } 2647 2648 if (rem > 0) 2649 return -EINVAL; 2650 2651 return 0; 2652 } 2653 2654 /* 'key' must be the masked key. */ 2655 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2656 const struct sw_flow_key *key, 2657 struct sw_flow_actions **sfa, bool log) 2658 { 2659 int err; 2660 2661 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 2662 if (IS_ERR(*sfa)) 2663 return PTR_ERR(*sfa); 2664 2665 (*sfa)->orig_len = nla_len(attr); 2666 err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, 2667 key->eth.vlan.tci, log); 2668 if (err) 2669 ovs_nla_free_flow_actions(*sfa); 2670 2671 return err; 2672 } 2673 2674 static int sample_action_to_attr(const struct nlattr *attr, 2675 struct sk_buff *skb) 2676 { 2677 struct nlattr *start, *ac_start = NULL, *sample_arg; 2678 int err = 0, rem = nla_len(attr); 2679 const struct sample_arg *arg; 2680 struct nlattr *actions; 2681 2682 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 2683 if (!start) 2684 return -EMSGSIZE; 2685 2686 sample_arg = nla_data(attr); 2687 arg = nla_data(sample_arg); 2688 actions = nla_next(sample_arg, &rem); 2689 2690 if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) { 2691 err = -EMSGSIZE; 2692 goto out; 2693 } 2694 2695 ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 2696 if (!ac_start) { 2697 err = -EMSGSIZE; 2698 goto out; 2699 } 2700 2701 err = ovs_nla_put_actions(actions, rem, skb); 2702 2703 out: 2704 if (err) { 2705 nla_nest_cancel(skb, ac_start); 2706 nla_nest_cancel(skb, start); 2707 } else { 2708 nla_nest_end(skb, ac_start); 2709 nla_nest_end(skb, start); 2710 } 2711 2712 return err; 2713 } 2714 2715 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 2716 { 2717 const struct nlattr *ovs_key = nla_data(a); 2718 int key_type = nla_type(ovs_key); 2719 struct nlattr *start; 2720 int err; 2721 2722 switch (key_type) { 2723 case OVS_KEY_ATTR_TUNNEL_INFO: { 2724 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); 2725 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; 2726 2727 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2728 if (!start) 2729 return -EMSGSIZE; 2730 2731 err = ip_tun_to_nlattr(skb, &tun_info->key, 2732 ip_tunnel_info_opts(tun_info), 2733 tun_info->options_len, 2734 ip_tunnel_info_af(tun_info)); 2735 if (err) 2736 return err; 2737 nla_nest_end(skb, start); 2738 break; 2739 } 2740 default: 2741 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 2742 return -EMSGSIZE; 2743 break; 2744 } 2745 2746 return 0; 2747 } 2748 2749 static int masked_set_action_to_set_action_attr(const struct nlattr *a, 2750 struct sk_buff *skb) 2751 { 2752 const struct nlattr *ovs_key = nla_data(a); 2753 struct nlattr *nla; 2754 size_t key_len = nla_len(ovs_key) / 2; 2755 2756 /* Revert the conversion we did from a non-masked set action to 2757 * masked set action. 2758 */ 2759 nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2760 if (!nla) 2761 return -EMSGSIZE; 2762 2763 if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) 2764 return -EMSGSIZE; 2765 2766 nla_nest_end(skb, nla); 2767 return 0; 2768 } 2769 2770 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2771 { 2772 const struct nlattr *a; 2773 int rem, err; 2774 2775 nla_for_each_attr(a, attr, len, rem) { 2776 int type = nla_type(a); 2777 2778 switch (type) { 2779 case OVS_ACTION_ATTR_SET: 2780 err = set_action_to_attr(a, skb); 2781 if (err) 2782 return err; 2783 break; 2784 2785 case OVS_ACTION_ATTR_SET_TO_MASKED: 2786 err = masked_set_action_to_set_action_attr(a, skb); 2787 if (err) 2788 return err; 2789 break; 2790 2791 case OVS_ACTION_ATTR_SAMPLE: 2792 err = sample_action_to_attr(a, skb); 2793 if (err) 2794 return err; 2795 break; 2796 2797 case OVS_ACTION_ATTR_CT: 2798 err = ovs_ct_action_to_attr(nla_data(a), skb); 2799 if (err) 2800 return err; 2801 break; 2802 2803 default: 2804 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2805 return -EMSGSIZE; 2806 break; 2807 } 2808 } 2809 2810 return 0; 2811 } 2812