1 /* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include "flow.h" 22 #include "datapath.h" 23 #include <linux/uaccess.h> 24 #include <linux/netdevice.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <net/llc_pdu.h> 29 #include <linux/kernel.h> 30 #include <linux/jhash.h> 31 #include <linux/jiffies.h> 32 #include <linux/llc.h> 33 #include <linux/module.h> 34 #include <linux/in.h> 35 #include <linux/rcupdate.h> 36 #include <linux/if_arp.h> 37 #include <linux/ip.h> 38 #include <linux/ipv6.h> 39 #include <linux/sctp.h> 40 #include <linux/tcp.h> 41 #include <linux/udp.h> 42 #include <linux/icmp.h> 43 #include <linux/icmpv6.h> 44 #include <linux/rculist.h> 45 #include <net/geneve.h> 46 #include <net/ip.h> 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/mpls.h> 50 #include <net/vxlan.h> 51 52 #include "flow_netlink.h" 53 54 struct ovs_len_tbl { 55 int len; 56 const struct ovs_len_tbl *next; 57 }; 58 59 #define OVS_ATTR_NESTED -1 60 #define OVS_ATTR_VARIABLE -2 61 62 static void update_range(struct sw_flow_match *match, 63 size_t offset, size_t size, bool is_mask) 64 { 65 struct sw_flow_key_range *range; 66 size_t start = rounddown(offset, sizeof(long)); 67 size_t end = roundup(offset + size, sizeof(long)); 68 69 if (!is_mask) 70 range = &match->range; 71 else 72 range = &match->mask->range; 73 74 if (range->start == range->end) { 75 range->start = start; 76 range->end = end; 77 return; 78 } 79 80 if (range->start > start) 81 range->start = start; 82 83 if (range->end < end) 84 range->end = end; 85 } 86 87 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 88 do { \ 89 update_range(match, offsetof(struct sw_flow_key, field), \ 90 sizeof((match)->key->field), is_mask); \ 91 if (is_mask) \ 92 (match)->mask->key.field = value; \ 93 else \ 94 (match)->key->field = value; \ 95 } while (0) 96 97 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 98 do { \ 99 update_range(match, offset, len, is_mask); \ 100 if (is_mask) \ 101 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 102 len); \ 103 else \ 104 memcpy((u8 *)(match)->key + offset, value_p, len); \ 105 } while (0) 106 107 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 108 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 109 value_p, len, is_mask) 110 111 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 112 do { \ 113 update_range(match, offsetof(struct sw_flow_key, field), \ 114 sizeof((match)->key->field), is_mask); \ 115 if (is_mask) \ 116 memset((u8 *)&(match)->mask->key.field, value, \ 117 sizeof((match)->mask->key.field)); \ 118 else \ 119 memset((u8 *)&(match)->key->field, value, \ 120 sizeof((match)->key->field)); \ 121 } while (0) 122 123 static bool match_validate(const struct sw_flow_match *match, 124 u64 key_attrs, u64 mask_attrs, bool log) 125 { 126 u64 key_expected = 0; 127 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 128 129 /* The following mask attributes allowed only if they 130 * pass the validation tests. */ 131 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 132 | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) 133 | (1 << OVS_KEY_ATTR_IPV6) 134 | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) 135 | (1 << OVS_KEY_ATTR_TCP) 136 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 137 | (1 << OVS_KEY_ATTR_UDP) 138 | (1 << OVS_KEY_ATTR_SCTP) 139 | (1 << OVS_KEY_ATTR_ICMP) 140 | (1 << OVS_KEY_ATTR_ICMPV6) 141 | (1 << OVS_KEY_ATTR_ARP) 142 | (1 << OVS_KEY_ATTR_ND) 143 | (1 << OVS_KEY_ATTR_MPLS)); 144 145 /* Always allowed mask fields. */ 146 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 147 | (1 << OVS_KEY_ATTR_IN_PORT) 148 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 149 150 /* Check key attributes. */ 151 if (match->key->eth.type == htons(ETH_P_ARP) 152 || match->key->eth.type == htons(ETH_P_RARP)) { 153 key_expected |= 1 << OVS_KEY_ATTR_ARP; 154 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 155 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 156 } 157 158 if (eth_p_mpls(match->key->eth.type)) { 159 key_expected |= 1 << OVS_KEY_ATTR_MPLS; 160 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 161 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; 162 } 163 164 if (match->key->eth.type == htons(ETH_P_IP)) { 165 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 166 if (match->mask && match->mask->key.eth.type == htons(0xffff)) { 167 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 168 mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4; 169 } 170 171 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 172 if (match->key->ip.proto == IPPROTO_UDP) { 173 key_expected |= 1 << OVS_KEY_ATTR_UDP; 174 if (match->mask && (match->mask->key.ip.proto == 0xff)) 175 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 176 } 177 178 if (match->key->ip.proto == IPPROTO_SCTP) { 179 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 180 if (match->mask && (match->mask->key.ip.proto == 0xff)) 181 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 182 } 183 184 if (match->key->ip.proto == IPPROTO_TCP) { 185 key_expected |= 1 << OVS_KEY_ATTR_TCP; 186 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 187 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 188 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 189 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 190 } 191 } 192 193 if (match->key->ip.proto == IPPROTO_ICMP) { 194 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 195 if (match->mask && (match->mask->key.ip.proto == 0xff)) 196 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 197 } 198 } 199 } 200 201 if (match->key->eth.type == htons(ETH_P_IPV6)) { 202 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 203 if (match->mask && match->mask->key.eth.type == htons(0xffff)) { 204 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 205 mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6; 206 } 207 208 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 209 if (match->key->ip.proto == IPPROTO_UDP) { 210 key_expected |= 1 << OVS_KEY_ATTR_UDP; 211 if (match->mask && (match->mask->key.ip.proto == 0xff)) 212 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 213 } 214 215 if (match->key->ip.proto == IPPROTO_SCTP) { 216 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 217 if (match->mask && (match->mask->key.ip.proto == 0xff)) 218 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 219 } 220 221 if (match->key->ip.proto == IPPROTO_TCP) { 222 key_expected |= 1 << OVS_KEY_ATTR_TCP; 223 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 224 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 225 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 226 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 227 } 228 } 229 230 if (match->key->ip.proto == IPPROTO_ICMPV6) { 231 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 232 if (match->mask && (match->mask->key.ip.proto == 0xff)) 233 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 234 235 if (match->key->tp.src == 236 htons(NDISC_NEIGHBOUR_SOLICITATION) || 237 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 238 key_expected |= 1 << OVS_KEY_ATTR_ND; 239 /* Original direction conntrack tuple 240 * uses the same space as the ND fields 241 * in the key, so both are not allowed 242 * at the same time. 243 */ 244 mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); 245 if (match->mask && (match->mask->key.tp.src == htons(0xff))) 246 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 247 } 248 } 249 } 250 } 251 252 if ((key_attrs & key_expected) != key_expected) { 253 /* Key attributes check failed. */ 254 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 255 (unsigned long long)key_attrs, 256 (unsigned long long)key_expected); 257 return false; 258 } 259 260 if ((mask_attrs & mask_allowed) != mask_attrs) { 261 /* Mask attributes check failed. */ 262 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", 263 (unsigned long long)mask_attrs, 264 (unsigned long long)mask_allowed); 265 return false; 266 } 267 268 return true; 269 } 270 271 size_t ovs_tun_key_attr_size(void) 272 { 273 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider 274 * updating this function. 275 */ 276 return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 277 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ 278 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ 279 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 280 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 281 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 282 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 283 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 284 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 285 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 286 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 287 */ 288 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 289 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 290 } 291 292 size_t ovs_key_attr_size(void) 293 { 294 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 295 * updating this function. 296 */ 297 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); 298 299 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 300 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 301 + ovs_tun_key_attr_size() 302 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 303 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 304 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 305 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 306 + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ 307 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ 308 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 309 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ 310 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ 311 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 312 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 313 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 314 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 315 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 316 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 317 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 318 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 319 } 320 321 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { 322 [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) }, 323 }; 324 325 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 326 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 327 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 328 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, 329 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, 330 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, 331 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, 332 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, 333 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 334 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 335 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 336 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, 337 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, 338 .next = ovs_vxlan_ext_key_lens }, 339 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 340 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 341 }; 342 343 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 344 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 345 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, 346 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, 347 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, 348 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, 349 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, 350 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, 351 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, 352 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, 353 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, 354 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, 355 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, 356 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, 357 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, 358 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, 359 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, 360 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, 361 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, 362 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, 363 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 364 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 365 .next = ovs_tunnel_key_lens, }, 366 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 367 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, 368 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, 369 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, 370 [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, 371 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { 372 .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, 373 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { 374 .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, 375 }; 376 377 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) 378 { 379 return expected_len == attr_len || 380 expected_len == OVS_ATTR_NESTED || 381 expected_len == OVS_ATTR_VARIABLE; 382 } 383 384 static bool is_all_zero(const u8 *fp, size_t size) 385 { 386 int i; 387 388 if (!fp) 389 return false; 390 391 for (i = 0; i < size; i++) 392 if (fp[i]) 393 return false; 394 395 return true; 396 } 397 398 static int __parse_flow_nlattrs(const struct nlattr *attr, 399 const struct nlattr *a[], 400 u64 *attrsp, bool log, bool nz) 401 { 402 const struct nlattr *nla; 403 u64 attrs; 404 int rem; 405 406 attrs = *attrsp; 407 nla_for_each_nested(nla, attr, rem) { 408 u16 type = nla_type(nla); 409 int expected_len; 410 411 if (type > OVS_KEY_ATTR_MAX) { 412 OVS_NLERR(log, "Key type %d is out of range max %d", 413 type, OVS_KEY_ATTR_MAX); 414 return -EINVAL; 415 } 416 417 if (attrs & (1 << type)) { 418 OVS_NLERR(log, "Duplicate key (type %d).", type); 419 return -EINVAL; 420 } 421 422 expected_len = ovs_key_lens[type].len; 423 if (!check_attr_len(nla_len(nla), expected_len)) { 424 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 425 type, nla_len(nla), expected_len); 426 return -EINVAL; 427 } 428 429 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 430 attrs |= 1 << type; 431 a[type] = nla; 432 } 433 } 434 if (rem) { 435 OVS_NLERR(log, "Message has %d unknown bytes.", rem); 436 return -EINVAL; 437 } 438 439 *attrsp = attrs; 440 return 0; 441 } 442 443 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 444 const struct nlattr *a[], u64 *attrsp, 445 bool log) 446 { 447 return __parse_flow_nlattrs(attr, a, attrsp, log, true); 448 } 449 450 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], 451 u64 *attrsp, bool log) 452 { 453 return __parse_flow_nlattrs(attr, a, attrsp, log, false); 454 } 455 456 static int genev_tun_opt_from_nlattr(const struct nlattr *a, 457 struct sw_flow_match *match, bool is_mask, 458 bool log) 459 { 460 unsigned long opt_key_offset; 461 462 if (nla_len(a) > sizeof(match->key->tun_opts)) { 463 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", 464 nla_len(a), sizeof(match->key->tun_opts)); 465 return -EINVAL; 466 } 467 468 if (nla_len(a) % 4 != 0) { 469 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", 470 nla_len(a)); 471 return -EINVAL; 472 } 473 474 /* We need to record the length of the options passed 475 * down, otherwise packets with the same format but 476 * additional options will be silently matched. 477 */ 478 if (!is_mask) { 479 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), 480 false); 481 } else { 482 /* This is somewhat unusual because it looks at 483 * both the key and mask while parsing the 484 * attributes (and by extension assumes the key 485 * is parsed first). Normally, we would verify 486 * that each is the correct length and that the 487 * attributes line up in the validate function. 488 * However, that is difficult because this is 489 * variable length and we won't have the 490 * information later. 491 */ 492 if (match->key->tun_opts_len != nla_len(a)) { 493 OVS_NLERR(log, "Geneve option len %d != mask len %d", 494 match->key->tun_opts_len, nla_len(a)); 495 return -EINVAL; 496 } 497 498 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 499 } 500 501 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); 502 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 503 nla_len(a), is_mask); 504 return 0; 505 } 506 507 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, 508 struct sw_flow_match *match, bool is_mask, 509 bool log) 510 { 511 struct nlattr *a; 512 int rem; 513 unsigned long opt_key_offset; 514 struct vxlan_metadata opts; 515 516 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 517 518 memset(&opts, 0, sizeof(opts)); 519 nla_for_each_nested(a, attr, rem) { 520 int type = nla_type(a); 521 522 if (type > OVS_VXLAN_EXT_MAX) { 523 OVS_NLERR(log, "VXLAN extension %d out of range max %d", 524 type, OVS_VXLAN_EXT_MAX); 525 return -EINVAL; 526 } 527 528 if (!check_attr_len(nla_len(a), 529 ovs_vxlan_ext_key_lens[type].len)) { 530 OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d", 531 type, nla_len(a), 532 ovs_vxlan_ext_key_lens[type].len); 533 return -EINVAL; 534 } 535 536 switch (type) { 537 case OVS_VXLAN_EXT_GBP: 538 opts.gbp = nla_get_u32(a); 539 break; 540 default: 541 OVS_NLERR(log, "Unknown VXLAN extension attribute %d", 542 type); 543 return -EINVAL; 544 } 545 } 546 if (rem) { 547 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.", 548 rem); 549 return -EINVAL; 550 } 551 552 if (!is_mask) 553 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 554 else 555 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 556 557 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); 558 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), 559 is_mask); 560 return 0; 561 } 562 563 static int ip_tun_from_nlattr(const struct nlattr *attr, 564 struct sw_flow_match *match, bool is_mask, 565 bool log) 566 { 567 bool ttl = false, ipv4 = false, ipv6 = false; 568 __be16 tun_flags = 0; 569 int opts_type = 0; 570 struct nlattr *a; 571 int rem; 572 573 nla_for_each_nested(a, attr, rem) { 574 int type = nla_type(a); 575 int err; 576 577 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 578 OVS_NLERR(log, "Tunnel attr %d out of range max %d", 579 type, OVS_TUNNEL_KEY_ATTR_MAX); 580 return -EINVAL; 581 } 582 583 if (!check_attr_len(nla_len(a), 584 ovs_tunnel_key_lens[type].len)) { 585 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 586 type, nla_len(a), ovs_tunnel_key_lens[type].len); 587 return -EINVAL; 588 } 589 590 switch (type) { 591 case OVS_TUNNEL_KEY_ATTR_ID: 592 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 593 nla_get_be64(a), is_mask); 594 tun_flags |= TUNNEL_KEY; 595 break; 596 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 597 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, 598 nla_get_in_addr(a), is_mask); 599 ipv4 = true; 600 break; 601 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 602 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, 603 nla_get_in_addr(a), is_mask); 604 ipv4 = true; 605 break; 606 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: 607 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, 608 nla_get_in6_addr(a), is_mask); 609 ipv6 = true; 610 break; 611 case OVS_TUNNEL_KEY_ATTR_IPV6_DST: 612 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, 613 nla_get_in6_addr(a), is_mask); 614 ipv6 = true; 615 break; 616 case OVS_TUNNEL_KEY_ATTR_TOS: 617 SW_FLOW_KEY_PUT(match, tun_key.tos, 618 nla_get_u8(a), is_mask); 619 break; 620 case OVS_TUNNEL_KEY_ATTR_TTL: 621 SW_FLOW_KEY_PUT(match, tun_key.ttl, 622 nla_get_u8(a), is_mask); 623 ttl = true; 624 break; 625 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 626 tun_flags |= TUNNEL_DONT_FRAGMENT; 627 break; 628 case OVS_TUNNEL_KEY_ATTR_CSUM: 629 tun_flags |= TUNNEL_CSUM; 630 break; 631 case OVS_TUNNEL_KEY_ATTR_TP_SRC: 632 SW_FLOW_KEY_PUT(match, tun_key.tp_src, 633 nla_get_be16(a), is_mask); 634 break; 635 case OVS_TUNNEL_KEY_ATTR_TP_DST: 636 SW_FLOW_KEY_PUT(match, tun_key.tp_dst, 637 nla_get_be16(a), is_mask); 638 break; 639 case OVS_TUNNEL_KEY_ATTR_OAM: 640 tun_flags |= TUNNEL_OAM; 641 break; 642 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 643 if (opts_type) { 644 OVS_NLERR(log, "Multiple metadata blocks provided"); 645 return -EINVAL; 646 } 647 648 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 649 if (err) 650 return err; 651 652 tun_flags |= TUNNEL_GENEVE_OPT; 653 opts_type = type; 654 break; 655 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 656 if (opts_type) { 657 OVS_NLERR(log, "Multiple metadata blocks provided"); 658 return -EINVAL; 659 } 660 661 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); 662 if (err) 663 return err; 664 665 tun_flags |= TUNNEL_VXLAN_OPT; 666 opts_type = type; 667 break; 668 case OVS_TUNNEL_KEY_ATTR_PAD: 669 break; 670 default: 671 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 672 type); 673 return -EINVAL; 674 } 675 } 676 677 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 678 if (is_mask) 679 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); 680 else 681 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET, 682 false); 683 684 if (rem > 0) { 685 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.", 686 rem); 687 return -EINVAL; 688 } 689 690 if (ipv4 && ipv6) { 691 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes"); 692 return -EINVAL; 693 } 694 695 if (!is_mask) { 696 if (!ipv4 && !ipv6) { 697 OVS_NLERR(log, "IP tunnel dst address not specified"); 698 return -EINVAL; 699 } 700 if (ipv4 && !match->key->tun_key.u.ipv4.dst) { 701 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 702 return -EINVAL; 703 } 704 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { 705 OVS_NLERR(log, "IPv6 tunnel dst address is zero"); 706 return -EINVAL; 707 } 708 709 if (!ttl) { 710 OVS_NLERR(log, "IP tunnel TTL not specified."); 711 return -EINVAL; 712 } 713 } 714 715 return opts_type; 716 } 717 718 static int vxlan_opt_to_nlattr(struct sk_buff *skb, 719 const void *tun_opts, int swkey_tun_opts_len) 720 { 721 const struct vxlan_metadata *opts = tun_opts; 722 struct nlattr *nla; 723 724 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); 725 if (!nla) 726 return -EMSGSIZE; 727 728 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) 729 return -EMSGSIZE; 730 731 nla_nest_end(skb, nla); 732 return 0; 733 } 734 735 static int __ip_tun_to_nlattr(struct sk_buff *skb, 736 const struct ip_tunnel_key *output, 737 const void *tun_opts, int swkey_tun_opts_len, 738 unsigned short tun_proto) 739 { 740 if (output->tun_flags & TUNNEL_KEY && 741 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, 742 OVS_TUNNEL_KEY_ATTR_PAD)) 743 return -EMSGSIZE; 744 switch (tun_proto) { 745 case AF_INET: 746 if (output->u.ipv4.src && 747 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, 748 output->u.ipv4.src)) 749 return -EMSGSIZE; 750 if (output->u.ipv4.dst && 751 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, 752 output->u.ipv4.dst)) 753 return -EMSGSIZE; 754 break; 755 case AF_INET6: 756 if (!ipv6_addr_any(&output->u.ipv6.src) && 757 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, 758 &output->u.ipv6.src)) 759 return -EMSGSIZE; 760 if (!ipv6_addr_any(&output->u.ipv6.dst) && 761 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST, 762 &output->u.ipv6.dst)) 763 return -EMSGSIZE; 764 break; 765 } 766 if (output->tos && 767 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) 768 return -EMSGSIZE; 769 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) 770 return -EMSGSIZE; 771 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 772 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 773 return -EMSGSIZE; 774 if ((output->tun_flags & TUNNEL_CSUM) && 775 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 776 return -EMSGSIZE; 777 if (output->tp_src && 778 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) 779 return -EMSGSIZE; 780 if (output->tp_dst && 781 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) 782 return -EMSGSIZE; 783 if ((output->tun_flags & TUNNEL_OAM) && 784 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 785 return -EMSGSIZE; 786 if (swkey_tun_opts_len) { 787 if (output->tun_flags & TUNNEL_GENEVE_OPT && 788 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 789 swkey_tun_opts_len, tun_opts)) 790 return -EMSGSIZE; 791 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 792 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 793 return -EMSGSIZE; 794 } 795 796 return 0; 797 } 798 799 static int ip_tun_to_nlattr(struct sk_buff *skb, 800 const struct ip_tunnel_key *output, 801 const void *tun_opts, int swkey_tun_opts_len, 802 unsigned short tun_proto) 803 { 804 struct nlattr *nla; 805 int err; 806 807 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 808 if (!nla) 809 return -EMSGSIZE; 810 811 err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, 812 tun_proto); 813 if (err) 814 return err; 815 816 nla_nest_end(skb, nla); 817 return 0; 818 } 819 820 int ovs_nla_put_tunnel_info(struct sk_buff *skb, 821 struct ip_tunnel_info *tun_info) 822 { 823 return __ip_tun_to_nlattr(skb, &tun_info->key, 824 ip_tunnel_info_opts(tun_info), 825 tun_info->options_len, 826 ip_tunnel_info_af(tun_info)); 827 } 828 829 static int encode_vlan_from_nlattrs(struct sw_flow_match *match, 830 const struct nlattr *a[], 831 bool is_mask, bool inner) 832 { 833 __be16 tci = 0; 834 __be16 tpid = 0; 835 836 if (a[OVS_KEY_ATTR_VLAN]) 837 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 838 839 if (a[OVS_KEY_ATTR_ETHERTYPE]) 840 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 841 842 if (likely(!inner)) { 843 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); 844 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); 845 } else { 846 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); 847 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); 848 } 849 return 0; 850 } 851 852 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, 853 u64 key_attrs, bool inner, 854 const struct nlattr **a, bool log) 855 { 856 __be16 tci = 0; 857 858 if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 859 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 860 eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { 861 /* Not a VLAN. */ 862 return 0; 863 } 864 865 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 866 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 867 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); 868 return -EINVAL; 869 } 870 871 if (a[OVS_KEY_ATTR_VLAN]) 872 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 873 874 if (!(tci & htons(VLAN_TAG_PRESENT))) { 875 if (tci) { 876 OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", 877 (inner) ? "C-VLAN" : "VLAN"); 878 return -EINVAL; 879 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { 880 /* Corner case for truncated VLAN header. */ 881 OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", 882 (inner) ? "C-VLAN" : "VLAN"); 883 return -EINVAL; 884 } 885 } 886 887 return 1; 888 } 889 890 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, 891 u64 key_attrs, bool inner, 892 const struct nlattr **a, bool log) 893 { 894 __be16 tci = 0; 895 __be16 tpid = 0; 896 bool encap_valid = !!(match->key->eth.vlan.tci & 897 htons(VLAN_TAG_PRESENT)); 898 bool i_encap_valid = !!(match->key->eth.cvlan.tci & 899 htons(VLAN_TAG_PRESENT)); 900 901 if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { 902 /* Not a VLAN. */ 903 return 0; 904 } 905 906 if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { 907 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", 908 (inner) ? "C-VLAN" : "VLAN"); 909 return -EINVAL; 910 } 911 912 if (a[OVS_KEY_ATTR_VLAN]) 913 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 914 915 if (a[OVS_KEY_ATTR_ETHERTYPE]) 916 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 917 918 if (tpid != htons(0xffff)) { 919 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", 920 (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); 921 return -EINVAL; 922 } 923 if (!(tci & htons(VLAN_TAG_PRESENT))) { 924 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", 925 (inner) ? "C-VLAN" : "VLAN"); 926 return -EINVAL; 927 } 928 929 return 1; 930 } 931 932 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, 933 u64 *key_attrs, bool inner, 934 const struct nlattr **a, bool is_mask, 935 bool log) 936 { 937 int err; 938 const struct nlattr *encap; 939 940 if (!is_mask) 941 err = validate_vlan_from_nlattrs(match, *key_attrs, inner, 942 a, log); 943 else 944 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, 945 a, log); 946 if (err <= 0) 947 return err; 948 949 err = encode_vlan_from_nlattrs(match, a, is_mask, inner); 950 if (err) 951 return err; 952 953 *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 954 *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 955 *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 956 957 encap = a[OVS_KEY_ATTR_ENCAP]; 958 959 if (!is_mask) 960 err = parse_flow_nlattrs(encap, a, key_attrs, log); 961 else 962 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); 963 964 return err; 965 } 966 967 static int parse_vlan_from_nlattrs(struct sw_flow_match *match, 968 u64 *key_attrs, const struct nlattr **a, 969 bool is_mask, bool log) 970 { 971 int err; 972 bool encap_valid = false; 973 974 err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, 975 is_mask, log); 976 if (err) 977 return err; 978 979 encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); 980 if (encap_valid) { 981 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, 982 is_mask, log); 983 if (err) 984 return err; 985 } 986 987 return 0; 988 } 989 990 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, 991 u64 *attrs, const struct nlattr **a, 992 bool is_mask, bool log) 993 { 994 __be16 eth_type; 995 996 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 997 if (is_mask) { 998 /* Always exact match EtherType. */ 999 eth_type = htons(0xffff); 1000 } else if (!eth_proto_is_802_3(eth_type)) { 1001 OVS_NLERR(log, "EtherType %x is less than min %x", 1002 ntohs(eth_type), ETH_P_802_3_MIN); 1003 return -EINVAL; 1004 } 1005 1006 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 1007 *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1008 return 0; 1009 } 1010 1011 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 1012 u64 *attrs, const struct nlattr **a, 1013 bool is_mask, bool log) 1014 { 1015 u8 mac_proto = MAC_PROTO_ETHERNET; 1016 1017 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 1018 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 1019 1020 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); 1021 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); 1022 } 1023 1024 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { 1025 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); 1026 1027 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); 1028 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); 1029 } 1030 1031 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1032 SW_FLOW_KEY_PUT(match, phy.priority, 1033 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 1034 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1035 } 1036 1037 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1038 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1039 1040 if (is_mask) { 1041 in_port = 0xffffffff; /* Always exact match in_port. */ 1042 } else if (in_port >= DP_MAX_PORTS) { 1043 OVS_NLERR(log, "Port %d exceeds max allowable %d", 1044 in_port, DP_MAX_PORTS); 1045 return -EINVAL; 1046 } 1047 1048 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 1049 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1050 } else if (!is_mask) { 1051 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 1052 } 1053 1054 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1055 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1056 1057 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 1058 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1059 } 1060 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1061 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 1062 is_mask, log) < 0) 1063 return -EINVAL; 1064 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1065 } 1066 1067 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && 1068 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { 1069 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); 1070 1071 if (ct_state & ~CT_SUPPORTED_MASK) { 1072 OVS_NLERR(log, "ct_state flags %08x unsupported", 1073 ct_state); 1074 return -EINVAL; 1075 } 1076 1077 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask); 1078 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); 1079 } 1080 if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && 1081 ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { 1082 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); 1083 1084 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask); 1085 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); 1086 } 1087 if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && 1088 ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) { 1089 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); 1090 1091 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); 1092 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); 1093 } 1094 if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && 1095 ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { 1096 const struct ovs_key_ct_labels *cl; 1097 1098 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); 1099 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, 1100 sizeof(*cl), is_mask); 1101 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); 1102 } 1103 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) { 1104 const struct ovs_key_ct_tuple_ipv4 *ct; 1105 1106 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]); 1107 1108 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask); 1109 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); 1110 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); 1111 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); 1112 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask); 1113 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); 1114 } 1115 if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { 1116 const struct ovs_key_ct_tuple_ipv6 *ct; 1117 1118 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]); 1119 1120 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src, 1121 sizeof(match->key->ipv6.ct_orig.src), 1122 is_mask); 1123 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst, 1124 sizeof(match->key->ipv6.ct_orig.dst), 1125 is_mask); 1126 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); 1127 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); 1128 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask); 1129 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); 1130 } 1131 1132 /* For layer 3 packets the Ethernet type is provided 1133 * and treated as metadata but no MAC addresses are provided. 1134 */ 1135 if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && 1136 (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) 1137 mac_proto = MAC_PROTO_NONE; 1138 1139 /* Always exact match mac_proto */ 1140 SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); 1141 1142 if (mac_proto == MAC_PROTO_NONE) 1143 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, 1144 log); 1145 1146 return 0; 1147 } 1148 1149 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, 1150 u64 attrs, const struct nlattr **a, 1151 bool is_mask, bool log) 1152 { 1153 int err; 1154 1155 err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log); 1156 if (err) 1157 return err; 1158 1159 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 1160 const struct ovs_key_ethernet *eth_key; 1161 1162 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1163 SW_FLOW_KEY_MEMCPY(match, eth.src, 1164 eth_key->eth_src, ETH_ALEN, is_mask); 1165 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1166 eth_key->eth_dst, ETH_ALEN, is_mask); 1167 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1168 1169 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1170 /* VLAN attribute is always parsed before getting here since it 1171 * may occur multiple times. 1172 */ 1173 OVS_NLERR(log, "VLAN attribute unexpected."); 1174 return -EINVAL; 1175 } 1176 1177 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1178 err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, 1179 log); 1180 if (err) 1181 return err; 1182 } else if (!is_mask) { 1183 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1184 } 1185 } else if (!match->key->eth.type) { 1186 OVS_NLERR(log, "Either Ethernet header or EtherType is required."); 1187 return -EINVAL; 1188 } 1189 1190 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1191 const struct ovs_key_ipv4 *ipv4_key; 1192 1193 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1194 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 1195 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", 1196 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 1197 return -EINVAL; 1198 } 1199 SW_FLOW_KEY_PUT(match, ip.proto, 1200 ipv4_key->ipv4_proto, is_mask); 1201 SW_FLOW_KEY_PUT(match, ip.tos, 1202 ipv4_key->ipv4_tos, is_mask); 1203 SW_FLOW_KEY_PUT(match, ip.ttl, 1204 ipv4_key->ipv4_ttl, is_mask); 1205 SW_FLOW_KEY_PUT(match, ip.frag, 1206 ipv4_key->ipv4_frag, is_mask); 1207 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1208 ipv4_key->ipv4_src, is_mask); 1209 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1210 ipv4_key->ipv4_dst, is_mask); 1211 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 1212 } 1213 1214 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 1215 const struct ovs_key_ipv6 *ipv6_key; 1216 1217 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1218 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 1219 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", 1220 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 1221 return -EINVAL; 1222 } 1223 1224 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { 1225 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", 1226 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); 1227 return -EINVAL; 1228 } 1229 1230 SW_FLOW_KEY_PUT(match, ipv6.label, 1231 ipv6_key->ipv6_label, is_mask); 1232 SW_FLOW_KEY_PUT(match, ip.proto, 1233 ipv6_key->ipv6_proto, is_mask); 1234 SW_FLOW_KEY_PUT(match, ip.tos, 1235 ipv6_key->ipv6_tclass, is_mask); 1236 SW_FLOW_KEY_PUT(match, ip.ttl, 1237 ipv6_key->ipv6_hlimit, is_mask); 1238 SW_FLOW_KEY_PUT(match, ip.frag, 1239 ipv6_key->ipv6_frag, is_mask); 1240 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 1241 ipv6_key->ipv6_src, 1242 sizeof(match->key->ipv6.addr.src), 1243 is_mask); 1244 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 1245 ipv6_key->ipv6_dst, 1246 sizeof(match->key->ipv6.addr.dst), 1247 is_mask); 1248 1249 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1250 } 1251 1252 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 1253 const struct ovs_key_arp *arp_key; 1254 1255 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1256 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 1257 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", 1258 arp_key->arp_op); 1259 return -EINVAL; 1260 } 1261 1262 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1263 arp_key->arp_sip, is_mask); 1264 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1265 arp_key->arp_tip, is_mask); 1266 SW_FLOW_KEY_PUT(match, ip.proto, 1267 ntohs(arp_key->arp_op), is_mask); 1268 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 1269 arp_key->arp_sha, ETH_ALEN, is_mask); 1270 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 1271 arp_key->arp_tha, ETH_ALEN, is_mask); 1272 1273 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1274 } 1275 1276 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 1277 const struct ovs_key_mpls *mpls_key; 1278 1279 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 1280 SW_FLOW_KEY_PUT(match, mpls.top_lse, 1281 mpls_key->mpls_lse, is_mask); 1282 1283 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 1284 } 1285 1286 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 1287 const struct ovs_key_tcp *tcp_key; 1288 1289 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1290 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 1291 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 1292 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 1293 } 1294 1295 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 1296 SW_FLOW_KEY_PUT(match, tp.flags, 1297 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 1298 is_mask); 1299 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 1300 } 1301 1302 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 1303 const struct ovs_key_udp *udp_key; 1304 1305 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1306 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 1307 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 1308 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1309 } 1310 1311 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 1312 const struct ovs_key_sctp *sctp_key; 1313 1314 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 1315 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 1316 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 1317 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 1318 } 1319 1320 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 1321 const struct ovs_key_icmp *icmp_key; 1322 1323 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1324 SW_FLOW_KEY_PUT(match, tp.src, 1325 htons(icmp_key->icmp_type), is_mask); 1326 SW_FLOW_KEY_PUT(match, tp.dst, 1327 htons(icmp_key->icmp_code), is_mask); 1328 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1329 } 1330 1331 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 1332 const struct ovs_key_icmpv6 *icmpv6_key; 1333 1334 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 1335 SW_FLOW_KEY_PUT(match, tp.src, 1336 htons(icmpv6_key->icmpv6_type), is_mask); 1337 SW_FLOW_KEY_PUT(match, tp.dst, 1338 htons(icmpv6_key->icmpv6_code), is_mask); 1339 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1340 } 1341 1342 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 1343 const struct ovs_key_nd *nd_key; 1344 1345 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1346 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 1347 nd_key->nd_target, 1348 sizeof(match->key->ipv6.nd.target), 1349 is_mask); 1350 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 1351 nd_key->nd_sll, ETH_ALEN, is_mask); 1352 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 1353 nd_key->nd_tll, ETH_ALEN, is_mask); 1354 attrs &= ~(1 << OVS_KEY_ATTR_ND); 1355 } 1356 1357 if (attrs != 0) { 1358 OVS_NLERR(log, "Unknown key attributes %llx", 1359 (unsigned long long)attrs); 1360 return -EINVAL; 1361 } 1362 1363 return 0; 1364 } 1365 1366 static void nlattr_set(struct nlattr *attr, u8 val, 1367 const struct ovs_len_tbl *tbl) 1368 { 1369 struct nlattr *nla; 1370 int rem; 1371 1372 /* The nlattr stream should already have been validated */ 1373 nla_for_each_nested(nla, attr, rem) { 1374 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { 1375 if (tbl[nla_type(nla)].next) 1376 tbl = tbl[nla_type(nla)].next; 1377 nlattr_set(nla, val, tbl); 1378 } else { 1379 memset(nla_data(nla), val, nla_len(nla)); 1380 } 1381 1382 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) 1383 *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; 1384 } 1385 } 1386 1387 static void mask_set_nlattr(struct nlattr *attr, u8 val) 1388 { 1389 nlattr_set(attr, val, ovs_key_lens); 1390 } 1391 1392 /** 1393 * ovs_nla_get_match - parses Netlink attributes into a flow key and 1394 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1395 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1396 * does not include any don't care bit. 1397 * @net: Used to determine per-namespace field support. 1398 * @match: receives the extracted flow match information. 1399 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1400 * sequence. The fields should of the packet that triggered the creation 1401 * of this flow. 1402 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1403 * attribute specifies the mask field of the wildcarded flow. 1404 * @log: Boolean to allow kernel error logging. Normally true, but when 1405 * probing for feature compatibility this should be passed in as false to 1406 * suppress unnecessary error logging. 1407 */ 1408 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, 1409 const struct nlattr *nla_key, 1410 const struct nlattr *nla_mask, 1411 bool log) 1412 { 1413 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1414 struct nlattr *newmask = NULL; 1415 u64 key_attrs = 0; 1416 u64 mask_attrs = 0; 1417 int err; 1418 1419 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); 1420 if (err) 1421 return err; 1422 1423 err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); 1424 if (err) 1425 return err; 1426 1427 err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); 1428 if (err) 1429 return err; 1430 1431 if (match->mask) { 1432 if (!nla_mask) { 1433 /* Create an exact match mask. We need to set to 0xff 1434 * all the 'match->mask' fields that have been touched 1435 * in 'match->key'. We cannot simply memset 1436 * 'match->mask', because padding bytes and fields not 1437 * specified in 'match->key' should be left to 0. 1438 * Instead, we use a stream of netlink attributes, 1439 * copied from 'key' and set to 0xff. 1440 * ovs_key_from_nlattrs() will take care of filling 1441 * 'match->mask' appropriately. 1442 */ 1443 newmask = kmemdup(nla_key, 1444 nla_total_size(nla_len(nla_key)), 1445 GFP_KERNEL); 1446 if (!newmask) 1447 return -ENOMEM; 1448 1449 mask_set_nlattr(newmask, 0xff); 1450 1451 /* The userspace does not send tunnel attributes that 1452 * are 0, but we should not wildcard them nonetheless. 1453 */ 1454 if (match->key->tun_proto) 1455 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1456 0xff, true); 1457 1458 nla_mask = newmask; 1459 } 1460 1461 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); 1462 if (err) 1463 goto free_newmask; 1464 1465 /* Always match on tci. */ 1466 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); 1467 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); 1468 1469 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); 1470 if (err) 1471 goto free_newmask; 1472 1473 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, 1474 log); 1475 if (err) 1476 goto free_newmask; 1477 } 1478 1479 if (!match_validate(match, key_attrs, mask_attrs, log)) 1480 err = -EINVAL; 1481 1482 free_newmask: 1483 kfree(newmask); 1484 return err; 1485 } 1486 1487 static size_t get_ufid_len(const struct nlattr *attr, bool log) 1488 { 1489 size_t len; 1490 1491 if (!attr) 1492 return 0; 1493 1494 len = nla_len(attr); 1495 if (len < 1 || len > MAX_UFID_LENGTH) { 1496 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", 1497 nla_len(attr), MAX_UFID_LENGTH); 1498 return 0; 1499 } 1500 1501 return len; 1502 } 1503 1504 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, 1505 * or false otherwise. 1506 */ 1507 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, 1508 bool log) 1509 { 1510 sfid->ufid_len = get_ufid_len(attr, log); 1511 if (sfid->ufid_len) 1512 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); 1513 1514 return sfid->ufid_len; 1515 } 1516 1517 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 1518 const struct sw_flow_key *key, bool log) 1519 { 1520 struct sw_flow_key *new_key; 1521 1522 if (ovs_nla_get_ufid(sfid, ufid, log)) 1523 return 0; 1524 1525 /* If UFID was not provided, use unmasked key. */ 1526 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); 1527 if (!new_key) 1528 return -ENOMEM; 1529 memcpy(new_key, key, sizeof(*key)); 1530 sfid->unmasked_key = new_key; 1531 1532 return 0; 1533 } 1534 1535 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) 1536 { 1537 return attr ? nla_get_u32(attr) : 0; 1538 } 1539 1540 /** 1541 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 1542 * @net: Network namespace. 1543 * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack 1544 * metadata. 1545 * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink 1546 * attributes. 1547 * @attrs: Bit mask for the netlink attributes included in @a. 1548 * @log: Boolean to allow kernel error logging. Normally true, but when 1549 * probing for feature compatibility this should be passed in as false to 1550 * suppress unnecessary error logging. 1551 * 1552 * This parses a series of Netlink attributes that form a flow key, which must 1553 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1554 * get the metadata, that is, the parts of the flow key that cannot be 1555 * extracted from the packet itself. 1556 * 1557 * This must be called before the packet key fields are filled in 'key'. 1558 */ 1559 1560 int ovs_nla_get_flow_metadata(struct net *net, 1561 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], 1562 u64 attrs, struct sw_flow_key *key, bool log) 1563 { 1564 struct sw_flow_match match; 1565 1566 memset(&match, 0, sizeof(match)); 1567 match.key = key; 1568 1569 key->ct_state = 0; 1570 key->ct_zone = 0; 1571 key->ct_orig_proto = 0; 1572 memset(&key->ct, 0, sizeof(key->ct)); 1573 memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); 1574 memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig)); 1575 1576 key->phy.in_port = DP_MAX_PORTS; 1577 1578 return metadata_from_nlattrs(net, &match, &attrs, a, false, log); 1579 } 1580 1581 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, 1582 bool is_mask) 1583 { 1584 __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); 1585 1586 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1587 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) 1588 return -EMSGSIZE; 1589 return 0; 1590 } 1591 1592 static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1593 const struct sw_flow_key *output, bool is_mask, 1594 struct sk_buff *skb) 1595 { 1596 struct ovs_key_ethernet *eth_key; 1597 struct nlattr *nla; 1598 struct nlattr *encap = NULL; 1599 struct nlattr *in_encap = NULL; 1600 1601 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 1602 goto nla_put_failure; 1603 1604 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 1605 goto nla_put_failure; 1606 1607 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1608 goto nla_put_failure; 1609 1610 if ((swkey->tun_proto || is_mask)) { 1611 const void *opts = NULL; 1612 1613 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1614 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1615 1616 if (ip_tun_to_nlattr(skb, &output->tun_key, opts, 1617 swkey->tun_opts_len, swkey->tun_proto)) 1618 goto nla_put_failure; 1619 } 1620 1621 if (swkey->phy.in_port == DP_MAX_PORTS) { 1622 if (is_mask && (output->phy.in_port == 0xffff)) 1623 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1624 goto nla_put_failure; 1625 } else { 1626 u16 upper_u16; 1627 upper_u16 = !is_mask ? 0 : 0xffff; 1628 1629 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1630 (upper_u16 << 16) | output->phy.in_port)) 1631 goto nla_put_failure; 1632 } 1633 1634 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1635 goto nla_put_failure; 1636 1637 if (ovs_ct_put_key(swkey, output, skb)) 1638 goto nla_put_failure; 1639 1640 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { 1641 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1642 if (!nla) 1643 goto nla_put_failure; 1644 1645 eth_key = nla_data(nla); 1646 ether_addr_copy(eth_key->eth_src, output->eth.src); 1647 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 1648 1649 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { 1650 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) 1651 goto nla_put_failure; 1652 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1653 if (!swkey->eth.vlan.tci) 1654 goto unencap; 1655 1656 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { 1657 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) 1658 goto nla_put_failure; 1659 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1660 if (!swkey->eth.cvlan.tci) 1661 goto unencap; 1662 } 1663 } 1664 1665 if (swkey->eth.type == htons(ETH_P_802_2)) { 1666 /* 1667 * Ethertype 802.2 is represented in the netlink with omitted 1668 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1669 * 0xffff in the mask attribute. Ethertype can also 1670 * be wildcarded. 1671 */ 1672 if (is_mask && output->eth.type) 1673 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1674 output->eth.type)) 1675 goto nla_put_failure; 1676 goto unencap; 1677 } 1678 } 1679 1680 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1681 goto nla_put_failure; 1682 1683 if (eth_type_vlan(swkey->eth.type)) { 1684 /* There are 3 VLAN tags, we don't know anything about the rest 1685 * of the packet, so truncate here. 1686 */ 1687 WARN_ON_ONCE(!(encap && in_encap)); 1688 goto unencap; 1689 } 1690 1691 if (swkey->eth.type == htons(ETH_P_IP)) { 1692 struct ovs_key_ipv4 *ipv4_key; 1693 1694 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1695 if (!nla) 1696 goto nla_put_failure; 1697 ipv4_key = nla_data(nla); 1698 ipv4_key->ipv4_src = output->ipv4.addr.src; 1699 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1700 ipv4_key->ipv4_proto = output->ip.proto; 1701 ipv4_key->ipv4_tos = output->ip.tos; 1702 ipv4_key->ipv4_ttl = output->ip.ttl; 1703 ipv4_key->ipv4_frag = output->ip.frag; 1704 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1705 struct ovs_key_ipv6 *ipv6_key; 1706 1707 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1708 if (!nla) 1709 goto nla_put_failure; 1710 ipv6_key = nla_data(nla); 1711 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1712 sizeof(ipv6_key->ipv6_src)); 1713 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1714 sizeof(ipv6_key->ipv6_dst)); 1715 ipv6_key->ipv6_label = output->ipv6.label; 1716 ipv6_key->ipv6_proto = output->ip.proto; 1717 ipv6_key->ipv6_tclass = output->ip.tos; 1718 ipv6_key->ipv6_hlimit = output->ip.ttl; 1719 ipv6_key->ipv6_frag = output->ip.frag; 1720 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1721 swkey->eth.type == htons(ETH_P_RARP)) { 1722 struct ovs_key_arp *arp_key; 1723 1724 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1725 if (!nla) 1726 goto nla_put_failure; 1727 arp_key = nla_data(nla); 1728 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1729 arp_key->arp_sip = output->ipv4.addr.src; 1730 arp_key->arp_tip = output->ipv4.addr.dst; 1731 arp_key->arp_op = htons(output->ip.proto); 1732 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1733 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1734 } else if (eth_p_mpls(swkey->eth.type)) { 1735 struct ovs_key_mpls *mpls_key; 1736 1737 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 1738 if (!nla) 1739 goto nla_put_failure; 1740 mpls_key = nla_data(nla); 1741 mpls_key->mpls_lse = output->mpls.top_lse; 1742 } 1743 1744 if ((swkey->eth.type == htons(ETH_P_IP) || 1745 swkey->eth.type == htons(ETH_P_IPV6)) && 1746 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1747 1748 if (swkey->ip.proto == IPPROTO_TCP) { 1749 struct ovs_key_tcp *tcp_key; 1750 1751 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1752 if (!nla) 1753 goto nla_put_failure; 1754 tcp_key = nla_data(nla); 1755 tcp_key->tcp_src = output->tp.src; 1756 tcp_key->tcp_dst = output->tp.dst; 1757 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1758 output->tp.flags)) 1759 goto nla_put_failure; 1760 } else if (swkey->ip.proto == IPPROTO_UDP) { 1761 struct ovs_key_udp *udp_key; 1762 1763 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1764 if (!nla) 1765 goto nla_put_failure; 1766 udp_key = nla_data(nla); 1767 udp_key->udp_src = output->tp.src; 1768 udp_key->udp_dst = output->tp.dst; 1769 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1770 struct ovs_key_sctp *sctp_key; 1771 1772 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1773 if (!nla) 1774 goto nla_put_failure; 1775 sctp_key = nla_data(nla); 1776 sctp_key->sctp_src = output->tp.src; 1777 sctp_key->sctp_dst = output->tp.dst; 1778 } else if (swkey->eth.type == htons(ETH_P_IP) && 1779 swkey->ip.proto == IPPROTO_ICMP) { 1780 struct ovs_key_icmp *icmp_key; 1781 1782 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1783 if (!nla) 1784 goto nla_put_failure; 1785 icmp_key = nla_data(nla); 1786 icmp_key->icmp_type = ntohs(output->tp.src); 1787 icmp_key->icmp_code = ntohs(output->tp.dst); 1788 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1789 swkey->ip.proto == IPPROTO_ICMPV6) { 1790 struct ovs_key_icmpv6 *icmpv6_key; 1791 1792 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1793 sizeof(*icmpv6_key)); 1794 if (!nla) 1795 goto nla_put_failure; 1796 icmpv6_key = nla_data(nla); 1797 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1798 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1799 1800 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1801 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1802 struct ovs_key_nd *nd_key; 1803 1804 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1805 if (!nla) 1806 goto nla_put_failure; 1807 nd_key = nla_data(nla); 1808 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1809 sizeof(nd_key->nd_target)); 1810 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1811 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1812 } 1813 } 1814 } 1815 1816 unencap: 1817 if (in_encap) 1818 nla_nest_end(skb, in_encap); 1819 if (encap) 1820 nla_nest_end(skb, encap); 1821 1822 return 0; 1823 1824 nla_put_failure: 1825 return -EMSGSIZE; 1826 } 1827 1828 int ovs_nla_put_key(const struct sw_flow_key *swkey, 1829 const struct sw_flow_key *output, int attr, bool is_mask, 1830 struct sk_buff *skb) 1831 { 1832 int err; 1833 struct nlattr *nla; 1834 1835 nla = nla_nest_start(skb, attr); 1836 if (!nla) 1837 return -EMSGSIZE; 1838 err = __ovs_nla_put_key(swkey, output, is_mask, skb); 1839 if (err) 1840 return err; 1841 nla_nest_end(skb, nla); 1842 1843 return 0; 1844 } 1845 1846 /* Called with ovs_mutex or RCU read lock. */ 1847 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) 1848 { 1849 if (ovs_identifier_is_ufid(&flow->id)) 1850 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, 1851 flow->id.ufid); 1852 1853 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, 1854 OVS_FLOW_ATTR_KEY, false, skb); 1855 } 1856 1857 /* Called with ovs_mutex or RCU read lock. */ 1858 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) 1859 { 1860 return ovs_nla_put_key(&flow->key, &flow->key, 1861 OVS_FLOW_ATTR_KEY, false, skb); 1862 } 1863 1864 /* Called with ovs_mutex or RCU read lock. */ 1865 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) 1866 { 1867 return ovs_nla_put_key(&flow->key, &flow->mask->key, 1868 OVS_FLOW_ATTR_MASK, true, skb); 1869 } 1870 1871 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1872 1873 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 1874 { 1875 struct sw_flow_actions *sfa; 1876 1877 if (size > MAX_ACTIONS_BUFSIZE) { 1878 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); 1879 return ERR_PTR(-EINVAL); 1880 } 1881 1882 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1883 if (!sfa) 1884 return ERR_PTR(-ENOMEM); 1885 1886 sfa->actions_len = 0; 1887 return sfa; 1888 } 1889 1890 static void ovs_nla_free_set_action(const struct nlattr *a) 1891 { 1892 const struct nlattr *ovs_key = nla_data(a); 1893 struct ovs_tunnel_info *ovs_tun; 1894 1895 switch (nla_type(ovs_key)) { 1896 case OVS_KEY_ATTR_TUNNEL_INFO: 1897 ovs_tun = nla_data(ovs_key); 1898 dst_release((struct dst_entry *)ovs_tun->tun_dst); 1899 break; 1900 } 1901 } 1902 1903 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1904 { 1905 const struct nlattr *a; 1906 int rem; 1907 1908 if (!sf_acts) 1909 return; 1910 1911 nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { 1912 switch (nla_type(a)) { 1913 case OVS_ACTION_ATTR_SET: 1914 ovs_nla_free_set_action(a); 1915 break; 1916 case OVS_ACTION_ATTR_CT: 1917 ovs_ct_free_action(a); 1918 break; 1919 } 1920 } 1921 1922 kfree(sf_acts); 1923 } 1924 1925 static void __ovs_nla_free_flow_actions(struct rcu_head *head) 1926 { 1927 ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu)); 1928 } 1929 1930 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1931 * The caller must hold rcu_read_lock for this to be sensible. */ 1932 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts) 1933 { 1934 call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions); 1935 } 1936 1937 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1938 int attr_len, bool log) 1939 { 1940 1941 struct sw_flow_actions *acts; 1942 int new_acts_size; 1943 int req_size = NLA_ALIGN(attr_len); 1944 int next_offset = offsetof(struct sw_flow_actions, actions) + 1945 (*sfa)->actions_len; 1946 1947 if (req_size <= (ksize(*sfa) - next_offset)) 1948 goto out; 1949 1950 new_acts_size = ksize(*sfa) * 2; 1951 1952 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1953 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1954 return ERR_PTR(-EMSGSIZE); 1955 new_acts_size = MAX_ACTIONS_BUFSIZE; 1956 } 1957 1958 acts = nla_alloc_flow_actions(new_acts_size, log); 1959 if (IS_ERR(acts)) 1960 return (void *)acts; 1961 1962 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1963 acts->actions_len = (*sfa)->actions_len; 1964 acts->orig_len = (*sfa)->orig_len; 1965 kfree(*sfa); 1966 *sfa = acts; 1967 1968 out: 1969 (*sfa)->actions_len += req_size; 1970 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1971 } 1972 1973 static struct nlattr *__add_action(struct sw_flow_actions **sfa, 1974 int attrtype, void *data, int len, bool log) 1975 { 1976 struct nlattr *a; 1977 1978 a = reserve_sfa_size(sfa, nla_attr_size(len), log); 1979 if (IS_ERR(a)) 1980 return a; 1981 1982 a->nla_type = attrtype; 1983 a->nla_len = nla_attr_size(len); 1984 1985 if (data) 1986 memcpy(nla_data(a), data, len); 1987 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1988 1989 return a; 1990 } 1991 1992 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data, 1993 int len, bool log) 1994 { 1995 struct nlattr *a; 1996 1997 a = __add_action(sfa, attrtype, data, len, log); 1998 1999 return PTR_ERR_OR_ZERO(a); 2000 } 2001 2002 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 2003 int attrtype, bool log) 2004 { 2005 int used = (*sfa)->actions_len; 2006 int err; 2007 2008 err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log); 2009 if (err) 2010 return err; 2011 2012 return used; 2013 } 2014 2015 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 2016 int st_offset) 2017 { 2018 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 2019 st_offset); 2020 2021 a->nla_len = sfa->actions_len - st_offset; 2022 } 2023 2024 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2025 const struct sw_flow_key *key, 2026 int depth, struct sw_flow_actions **sfa, 2027 __be16 eth_type, __be16 vlan_tci, bool log); 2028 2029 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, 2030 const struct sw_flow_key *key, int depth, 2031 struct sw_flow_actions **sfa, 2032 __be16 eth_type, __be16 vlan_tci, bool log) 2033 { 2034 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 2035 const struct nlattr *probability, *actions; 2036 const struct nlattr *a; 2037 int rem, start, err, st_acts; 2038 2039 memset(attrs, 0, sizeof(attrs)); 2040 nla_for_each_nested(a, attr, rem) { 2041 int type = nla_type(a); 2042 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 2043 return -EINVAL; 2044 attrs[type] = a; 2045 } 2046 if (rem) 2047 return -EINVAL; 2048 2049 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 2050 if (!probability || nla_len(probability) != sizeof(u32)) 2051 return -EINVAL; 2052 2053 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 2054 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 2055 return -EINVAL; 2056 2057 /* validation done, copy sample action. */ 2058 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 2059 if (start < 0) 2060 return start; 2061 err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 2062 nla_data(probability), sizeof(u32), log); 2063 if (err) 2064 return err; 2065 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); 2066 if (st_acts < 0) 2067 return st_acts; 2068 2069 err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, 2070 eth_type, vlan_tci, log); 2071 if (err) 2072 return err; 2073 2074 add_nested_action_end(*sfa, st_acts); 2075 add_nested_action_end(*sfa, start); 2076 2077 return 0; 2078 } 2079 2080 void ovs_match_init(struct sw_flow_match *match, 2081 struct sw_flow_key *key, 2082 bool reset_key, 2083 struct sw_flow_mask *mask) 2084 { 2085 memset(match, 0, sizeof(*match)); 2086 match->key = key; 2087 match->mask = mask; 2088 2089 if (reset_key) 2090 memset(key, 0, sizeof(*key)); 2091 2092 if (mask) { 2093 memset(&mask->key, 0, sizeof(mask->key)); 2094 mask->range.start = mask->range.end = 0; 2095 } 2096 } 2097 2098 static int validate_geneve_opts(struct sw_flow_key *key) 2099 { 2100 struct geneve_opt *option; 2101 int opts_len = key->tun_opts_len; 2102 bool crit_opt = false; 2103 2104 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); 2105 while (opts_len > 0) { 2106 int len; 2107 2108 if (opts_len < sizeof(*option)) 2109 return -EINVAL; 2110 2111 len = sizeof(*option) + option->length * 4; 2112 if (len > opts_len) 2113 return -EINVAL; 2114 2115 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 2116 2117 option = (struct geneve_opt *)((u8 *)option + len); 2118 opts_len -= len; 2119 }; 2120 2121 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 2122 2123 return 0; 2124 } 2125 2126 static int validate_and_copy_set_tun(const struct nlattr *attr, 2127 struct sw_flow_actions **sfa, bool log) 2128 { 2129 struct sw_flow_match match; 2130 struct sw_flow_key key; 2131 struct metadata_dst *tun_dst; 2132 struct ip_tunnel_info *tun_info; 2133 struct ovs_tunnel_info *ovs_tun; 2134 struct nlattr *a; 2135 int err = 0, start, opts_type; 2136 2137 ovs_match_init(&match, &key, true, NULL); 2138 opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); 2139 if (opts_type < 0) 2140 return opts_type; 2141 2142 if (key.tun_opts_len) { 2143 switch (opts_type) { 2144 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 2145 err = validate_geneve_opts(&key); 2146 if (err < 0) 2147 return err; 2148 break; 2149 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2150 break; 2151 } 2152 }; 2153 2154 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 2155 if (start < 0) 2156 return start; 2157 2158 tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL); 2159 if (!tun_dst) 2160 return -ENOMEM; 2161 2162 err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); 2163 if (err) { 2164 dst_release((struct dst_entry *)tun_dst); 2165 return err; 2166 } 2167 2168 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 2169 sizeof(*ovs_tun), log); 2170 if (IS_ERR(a)) { 2171 dst_release((struct dst_entry *)tun_dst); 2172 return PTR_ERR(a); 2173 } 2174 2175 ovs_tun = nla_data(a); 2176 ovs_tun->tun_dst = tun_dst; 2177 2178 tun_info = &tun_dst->u.tun_info; 2179 tun_info->mode = IP_TUNNEL_INFO_TX; 2180 if (key.tun_proto == AF_INET6) 2181 tun_info->mode |= IP_TUNNEL_INFO_IPV6; 2182 tun_info->key = key.tun_key; 2183 2184 /* We need to store the options in the action itself since 2185 * everything else will go away after flow setup. We can append 2186 * it to tun_info and then point there. 2187 */ 2188 ip_tunnel_info_opts_set(tun_info, 2189 TUN_METADATA_OPTS(&key, key.tun_opts_len), 2190 key.tun_opts_len); 2191 add_nested_action_end(*sfa, start); 2192 2193 return err; 2194 } 2195 2196 /* Return false if there are any non-masked bits set. 2197 * Mask follows data immediately, before any netlink padding. 2198 */ 2199 static bool validate_masked(u8 *data, int len) 2200 { 2201 u8 *mask = data + len; 2202 2203 while (len--) 2204 if (*data++ & ~*mask++) 2205 return false; 2206 2207 return true; 2208 } 2209 2210 static int validate_set(const struct nlattr *a, 2211 const struct sw_flow_key *flow_key, 2212 struct sw_flow_actions **sfa, bool *skip_copy, 2213 u8 mac_proto, __be16 eth_type, bool masked, bool log) 2214 { 2215 const struct nlattr *ovs_key = nla_data(a); 2216 int key_type = nla_type(ovs_key); 2217 size_t key_len; 2218 2219 /* There can be only one key in a action */ 2220 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 2221 return -EINVAL; 2222 2223 key_len = nla_len(ovs_key); 2224 if (masked) 2225 key_len /= 2; 2226 2227 if (key_type > OVS_KEY_ATTR_MAX || 2228 !check_attr_len(key_len, ovs_key_lens[key_type].len)) 2229 return -EINVAL; 2230 2231 if (masked && !validate_masked(nla_data(ovs_key), key_len)) 2232 return -EINVAL; 2233 2234 switch (key_type) { 2235 const struct ovs_key_ipv4 *ipv4_key; 2236 const struct ovs_key_ipv6 *ipv6_key; 2237 int err; 2238 2239 case OVS_KEY_ATTR_PRIORITY: 2240 case OVS_KEY_ATTR_SKB_MARK: 2241 case OVS_KEY_ATTR_CT_MARK: 2242 case OVS_KEY_ATTR_CT_LABELS: 2243 break; 2244 2245 case OVS_KEY_ATTR_ETHERNET: 2246 if (mac_proto != MAC_PROTO_ETHERNET) 2247 return -EINVAL; 2248 break; 2249 2250 case OVS_KEY_ATTR_TUNNEL: 2251 if (masked) 2252 return -EINVAL; /* Masked tunnel set not supported. */ 2253 2254 *skip_copy = true; 2255 err = validate_and_copy_set_tun(a, sfa, log); 2256 if (err) 2257 return err; 2258 break; 2259 2260 case OVS_KEY_ATTR_IPV4: 2261 if (eth_type != htons(ETH_P_IP)) 2262 return -EINVAL; 2263 2264 ipv4_key = nla_data(ovs_key); 2265 2266 if (masked) { 2267 const struct ovs_key_ipv4 *mask = ipv4_key + 1; 2268 2269 /* Non-writeable fields. */ 2270 if (mask->ipv4_proto || mask->ipv4_frag) 2271 return -EINVAL; 2272 } else { 2273 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 2274 return -EINVAL; 2275 2276 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 2277 return -EINVAL; 2278 } 2279 break; 2280 2281 case OVS_KEY_ATTR_IPV6: 2282 if (eth_type != htons(ETH_P_IPV6)) 2283 return -EINVAL; 2284 2285 ipv6_key = nla_data(ovs_key); 2286 2287 if (masked) { 2288 const struct ovs_key_ipv6 *mask = ipv6_key + 1; 2289 2290 /* Non-writeable fields. */ 2291 if (mask->ipv6_proto || mask->ipv6_frag) 2292 return -EINVAL; 2293 2294 /* Invalid bits in the flow label mask? */ 2295 if (ntohl(mask->ipv6_label) & 0xFFF00000) 2296 return -EINVAL; 2297 } else { 2298 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 2299 return -EINVAL; 2300 2301 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 2302 return -EINVAL; 2303 } 2304 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 2305 return -EINVAL; 2306 2307 break; 2308 2309 case OVS_KEY_ATTR_TCP: 2310 if ((eth_type != htons(ETH_P_IP) && 2311 eth_type != htons(ETH_P_IPV6)) || 2312 flow_key->ip.proto != IPPROTO_TCP) 2313 return -EINVAL; 2314 2315 break; 2316 2317 case OVS_KEY_ATTR_UDP: 2318 if ((eth_type != htons(ETH_P_IP) && 2319 eth_type != htons(ETH_P_IPV6)) || 2320 flow_key->ip.proto != IPPROTO_UDP) 2321 return -EINVAL; 2322 2323 break; 2324 2325 case OVS_KEY_ATTR_MPLS: 2326 if (!eth_p_mpls(eth_type)) 2327 return -EINVAL; 2328 break; 2329 2330 case OVS_KEY_ATTR_SCTP: 2331 if ((eth_type != htons(ETH_P_IP) && 2332 eth_type != htons(ETH_P_IPV6)) || 2333 flow_key->ip.proto != IPPROTO_SCTP) 2334 return -EINVAL; 2335 2336 break; 2337 2338 default: 2339 return -EINVAL; 2340 } 2341 2342 /* Convert non-masked non-tunnel set actions to masked set actions. */ 2343 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { 2344 int start, len = key_len * 2; 2345 struct nlattr *at; 2346 2347 *skip_copy = true; 2348 2349 start = add_nested_action_start(sfa, 2350 OVS_ACTION_ATTR_SET_TO_MASKED, 2351 log); 2352 if (start < 0) 2353 return start; 2354 2355 at = __add_action(sfa, key_type, NULL, len, log); 2356 if (IS_ERR(at)) 2357 return PTR_ERR(at); 2358 2359 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ 2360 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ 2361 /* Clear non-writeable bits from otherwise writeable fields. */ 2362 if (key_type == OVS_KEY_ATTR_IPV6) { 2363 struct ovs_key_ipv6 *mask = nla_data(at) + key_len; 2364 2365 mask->ipv6_label &= htonl(0x000FFFFF); 2366 } 2367 add_nested_action_end(*sfa, start); 2368 } 2369 2370 return 0; 2371 } 2372 2373 static int validate_userspace(const struct nlattr *attr) 2374 { 2375 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 2376 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 2377 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 2378 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, 2379 }; 2380 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 2381 int error; 2382 2383 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 2384 attr, userspace_policy); 2385 if (error) 2386 return error; 2387 2388 if (!a[OVS_USERSPACE_ATTR_PID] || 2389 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 2390 return -EINVAL; 2391 2392 return 0; 2393 } 2394 2395 static int copy_action(const struct nlattr *from, 2396 struct sw_flow_actions **sfa, bool log) 2397 { 2398 int totlen = NLA_ALIGN(from->nla_len); 2399 struct nlattr *to; 2400 2401 to = reserve_sfa_size(sfa, from->nla_len, log); 2402 if (IS_ERR(to)) 2403 return PTR_ERR(to); 2404 2405 memcpy(to, from, totlen); 2406 return 0; 2407 } 2408 2409 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2410 const struct sw_flow_key *key, 2411 int depth, struct sw_flow_actions **sfa, 2412 __be16 eth_type, __be16 vlan_tci, bool log) 2413 { 2414 u8 mac_proto = ovs_key_mac_proto(key); 2415 const struct nlattr *a; 2416 int rem, err; 2417 2418 if (depth >= SAMPLE_ACTION_DEPTH) 2419 return -EOVERFLOW; 2420 2421 nla_for_each_nested(a, attr, rem) { 2422 /* Expected argument lengths, (u32)-1 for variable length. */ 2423 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 2424 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 2425 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 2426 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 2427 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), 2428 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), 2429 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2430 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2431 [OVS_ACTION_ATTR_SET] = (u32)-1, 2432 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2433 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2434 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2435 [OVS_ACTION_ATTR_CT] = (u32)-1, 2436 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2437 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), 2438 [OVS_ACTION_ATTR_POP_ETH] = 0, 2439 }; 2440 const struct ovs_action_push_vlan *vlan; 2441 int type = nla_type(a); 2442 bool skip_copy; 2443 2444 if (type > OVS_ACTION_ATTR_MAX || 2445 (action_lens[type] != nla_len(a) && 2446 action_lens[type] != (u32)-1)) 2447 return -EINVAL; 2448 2449 skip_copy = false; 2450 switch (type) { 2451 case OVS_ACTION_ATTR_UNSPEC: 2452 return -EINVAL; 2453 2454 case OVS_ACTION_ATTR_USERSPACE: 2455 err = validate_userspace(a); 2456 if (err) 2457 return err; 2458 break; 2459 2460 case OVS_ACTION_ATTR_OUTPUT: 2461 if (nla_get_u32(a) >= DP_MAX_PORTS) 2462 return -EINVAL; 2463 break; 2464 2465 case OVS_ACTION_ATTR_TRUNC: { 2466 const struct ovs_action_trunc *trunc = nla_data(a); 2467 2468 if (trunc->max_len < ETH_HLEN) 2469 return -EINVAL; 2470 break; 2471 } 2472 2473 case OVS_ACTION_ATTR_HASH: { 2474 const struct ovs_action_hash *act_hash = nla_data(a); 2475 2476 switch (act_hash->hash_alg) { 2477 case OVS_HASH_ALG_L4: 2478 break; 2479 default: 2480 return -EINVAL; 2481 } 2482 2483 break; 2484 } 2485 2486 case OVS_ACTION_ATTR_POP_VLAN: 2487 if (mac_proto != MAC_PROTO_ETHERNET) 2488 return -EINVAL; 2489 vlan_tci = htons(0); 2490 break; 2491 2492 case OVS_ACTION_ATTR_PUSH_VLAN: 2493 if (mac_proto != MAC_PROTO_ETHERNET) 2494 return -EINVAL; 2495 vlan = nla_data(a); 2496 if (!eth_type_vlan(vlan->vlan_tpid)) 2497 return -EINVAL; 2498 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 2499 return -EINVAL; 2500 vlan_tci = vlan->vlan_tci; 2501 break; 2502 2503 case OVS_ACTION_ATTR_RECIRC: 2504 break; 2505 2506 case OVS_ACTION_ATTR_PUSH_MPLS: { 2507 const struct ovs_action_push_mpls *mpls = nla_data(a); 2508 2509 if (!eth_p_mpls(mpls->mpls_ethertype)) 2510 return -EINVAL; 2511 /* Prohibit push MPLS other than to a white list 2512 * for packets that have a known tag order. 2513 */ 2514 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2515 (eth_type != htons(ETH_P_IP) && 2516 eth_type != htons(ETH_P_IPV6) && 2517 eth_type != htons(ETH_P_ARP) && 2518 eth_type != htons(ETH_P_RARP) && 2519 !eth_p_mpls(eth_type))) 2520 return -EINVAL; 2521 eth_type = mpls->mpls_ethertype; 2522 break; 2523 } 2524 2525 case OVS_ACTION_ATTR_POP_MPLS: 2526 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2527 !eth_p_mpls(eth_type)) 2528 return -EINVAL; 2529 2530 /* Disallow subsequent L2.5+ set and mpls_pop actions 2531 * as there is no check here to ensure that the new 2532 * eth_type is valid and thus set actions could 2533 * write off the end of the packet or otherwise 2534 * corrupt it. 2535 * 2536 * Support for these actions is planned using packet 2537 * recirculation. 2538 */ 2539 eth_type = htons(0); 2540 break; 2541 2542 case OVS_ACTION_ATTR_SET: 2543 err = validate_set(a, key, sfa, 2544 &skip_copy, mac_proto, eth_type, 2545 false, log); 2546 if (err) 2547 return err; 2548 break; 2549 2550 case OVS_ACTION_ATTR_SET_MASKED: 2551 err = validate_set(a, key, sfa, 2552 &skip_copy, mac_proto, eth_type, 2553 true, log); 2554 if (err) 2555 return err; 2556 break; 2557 2558 case OVS_ACTION_ATTR_SAMPLE: 2559 err = validate_and_copy_sample(net, a, key, depth, sfa, 2560 eth_type, vlan_tci, log); 2561 if (err) 2562 return err; 2563 skip_copy = true; 2564 break; 2565 2566 case OVS_ACTION_ATTR_CT: 2567 err = ovs_ct_copy_action(net, a, key, sfa, log); 2568 if (err) 2569 return err; 2570 skip_copy = true; 2571 break; 2572 2573 case OVS_ACTION_ATTR_PUSH_ETH: 2574 /* Disallow pushing an Ethernet header if one 2575 * is already present */ 2576 if (mac_proto != MAC_PROTO_NONE) 2577 return -EINVAL; 2578 mac_proto = MAC_PROTO_NONE; 2579 break; 2580 2581 case OVS_ACTION_ATTR_POP_ETH: 2582 if (mac_proto != MAC_PROTO_ETHERNET) 2583 return -EINVAL; 2584 if (vlan_tci & htons(VLAN_TAG_PRESENT)) 2585 return -EINVAL; 2586 mac_proto = MAC_PROTO_ETHERNET; 2587 break; 2588 2589 default: 2590 OVS_NLERR(log, "Unknown Action type %d", type); 2591 return -EINVAL; 2592 } 2593 if (!skip_copy) { 2594 err = copy_action(a, sfa, log); 2595 if (err) 2596 return err; 2597 } 2598 } 2599 2600 if (rem > 0) 2601 return -EINVAL; 2602 2603 return 0; 2604 } 2605 2606 /* 'key' must be the masked key. */ 2607 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2608 const struct sw_flow_key *key, 2609 struct sw_flow_actions **sfa, bool log) 2610 { 2611 int err; 2612 2613 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 2614 if (IS_ERR(*sfa)) 2615 return PTR_ERR(*sfa); 2616 2617 (*sfa)->orig_len = nla_len(attr); 2618 err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, 2619 key->eth.vlan.tci, log); 2620 if (err) 2621 ovs_nla_free_flow_actions(*sfa); 2622 2623 return err; 2624 } 2625 2626 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 2627 { 2628 const struct nlattr *a; 2629 struct nlattr *start; 2630 int err = 0, rem; 2631 2632 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 2633 if (!start) 2634 return -EMSGSIZE; 2635 2636 nla_for_each_nested(a, attr, rem) { 2637 int type = nla_type(a); 2638 struct nlattr *st_sample; 2639 2640 switch (type) { 2641 case OVS_SAMPLE_ATTR_PROBABILITY: 2642 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 2643 sizeof(u32), nla_data(a))) 2644 return -EMSGSIZE; 2645 break; 2646 case OVS_SAMPLE_ATTR_ACTIONS: 2647 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 2648 if (!st_sample) 2649 return -EMSGSIZE; 2650 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 2651 if (err) 2652 return err; 2653 nla_nest_end(skb, st_sample); 2654 break; 2655 } 2656 } 2657 2658 nla_nest_end(skb, start); 2659 return err; 2660 } 2661 2662 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 2663 { 2664 const struct nlattr *ovs_key = nla_data(a); 2665 int key_type = nla_type(ovs_key); 2666 struct nlattr *start; 2667 int err; 2668 2669 switch (key_type) { 2670 case OVS_KEY_ATTR_TUNNEL_INFO: { 2671 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); 2672 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; 2673 2674 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2675 if (!start) 2676 return -EMSGSIZE; 2677 2678 err = ip_tun_to_nlattr(skb, &tun_info->key, 2679 ip_tunnel_info_opts(tun_info), 2680 tun_info->options_len, 2681 ip_tunnel_info_af(tun_info)); 2682 if (err) 2683 return err; 2684 nla_nest_end(skb, start); 2685 break; 2686 } 2687 default: 2688 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 2689 return -EMSGSIZE; 2690 break; 2691 } 2692 2693 return 0; 2694 } 2695 2696 static int masked_set_action_to_set_action_attr(const struct nlattr *a, 2697 struct sk_buff *skb) 2698 { 2699 const struct nlattr *ovs_key = nla_data(a); 2700 struct nlattr *nla; 2701 size_t key_len = nla_len(ovs_key) / 2; 2702 2703 /* Revert the conversion we did from a non-masked set action to 2704 * masked set action. 2705 */ 2706 nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2707 if (!nla) 2708 return -EMSGSIZE; 2709 2710 if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) 2711 return -EMSGSIZE; 2712 2713 nla_nest_end(skb, nla); 2714 return 0; 2715 } 2716 2717 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2718 { 2719 const struct nlattr *a; 2720 int rem, err; 2721 2722 nla_for_each_attr(a, attr, len, rem) { 2723 int type = nla_type(a); 2724 2725 switch (type) { 2726 case OVS_ACTION_ATTR_SET: 2727 err = set_action_to_attr(a, skb); 2728 if (err) 2729 return err; 2730 break; 2731 2732 case OVS_ACTION_ATTR_SET_TO_MASKED: 2733 err = masked_set_action_to_set_action_attr(a, skb); 2734 if (err) 2735 return err; 2736 break; 2737 2738 case OVS_ACTION_ATTR_SAMPLE: 2739 err = sample_action_to_attr(a, skb); 2740 if (err) 2741 return err; 2742 break; 2743 2744 case OVS_ACTION_ATTR_CT: 2745 err = ovs_ct_action_to_attr(nla_data(a), skb); 2746 if (err) 2747 return err; 2748 break; 2749 2750 default: 2751 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2752 return -EMSGSIZE; 2753 break; 2754 } 2755 } 2756 2757 return 0; 2758 } 2759