1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2007-2014 Nicira, Inc. 4 */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/init.h> 9 #include <linux/module.h> 10 #include <linux/if_arp.h> 11 #include <linux/if_vlan.h> 12 #include <linux/in.h> 13 #include <linux/ip.h> 14 #include <linux/jhash.h> 15 #include <linux/delay.h> 16 #include <linux/time.h> 17 #include <linux/etherdevice.h> 18 #include <linux/genetlink.h> 19 #include <linux/kernel.h> 20 #include <linux/kthread.h> 21 #include <linux/mutex.h> 22 #include <linux/percpu.h> 23 #include <linux/rcupdate.h> 24 #include <linux/tcp.h> 25 #include <linux/udp.h> 26 #include <linux/ethtool.h> 27 #include <linux/wait.h> 28 #include <asm/div64.h> 29 #include <linux/highmem.h> 30 #include <linux/netfilter_bridge.h> 31 #include <linux/netfilter_ipv4.h> 32 #include <linux/inetdevice.h> 33 #include <linux/list.h> 34 #include <linux/openvswitch.h> 35 #include <linux/rculist.h> 36 #include <linux/dmi.h> 37 #include <net/genetlink.h> 38 #include <net/gso.h> 39 #include <net/net_namespace.h> 40 #include <net/netns/generic.h> 41 #include <net/pkt_cls.h> 42 43 #include "datapath.h" 44 #include "drop.h" 45 #include "flow.h" 46 #include "flow_table.h" 47 #include "flow_netlink.h" 48 #include "meter.h" 49 #include "openvswitch_trace.h" 50 #include "vport-internal_dev.h" 51 #include "vport-netdev.h" 52 53 unsigned int ovs_net_id __read_mostly; 54 55 static struct genl_family dp_packet_genl_family; 56 static struct genl_family dp_flow_genl_family; 57 static struct genl_family dp_datapath_genl_family; 58 59 static const struct nla_policy flow_policy[]; 60 61 static const struct genl_multicast_group ovs_dp_flow_multicast_group = { 62 .name = OVS_FLOW_MCGROUP, 63 }; 64 65 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { 66 .name = OVS_DATAPATH_MCGROUP, 67 }; 68 69 static const struct genl_multicast_group ovs_dp_vport_multicast_group = { 70 .name = OVS_VPORT_MCGROUP, 71 }; 72 73 /* Check if need to build a reply message. 74 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ 75 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, 76 unsigned int group) 77 { 78 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 79 genl_has_listeners(family, genl_info_net(info), group); 80 } 81 82 static void ovs_notify(struct genl_family *family, 83 struct sk_buff *skb, struct genl_info *info) 84 { 85 genl_notify(family, skb, info, 0, GFP_KERNEL); 86 } 87 88 /** 89 * DOC: Locking: 90 * 91 * All writes e.g. Writes to device state (add/remove datapath, port, set 92 * operations on vports, etc.), Writes to other state (flow table 93 * modifications, set miscellaneous datapath parameters, etc.) are protected 94 * by ovs_lock. 95 * 96 * Reads are protected by RCU. 97 * 98 * There are a few special cases (mostly stats) that have their own 99 * synchronization but they nest under all of above and don't interact with 100 * each other. 101 * 102 * The RTNL lock nests inside ovs_mutex. 103 */ 104 105 static DEFINE_MUTEX(ovs_mutex); 106 107 void ovs_lock(void) 108 { 109 mutex_lock(&ovs_mutex); 110 } 111 112 void ovs_unlock(void) 113 { 114 mutex_unlock(&ovs_mutex); 115 } 116 117 #ifdef CONFIG_LOCKDEP 118 int lockdep_ovsl_is_held(void) 119 { 120 if (debug_locks) 121 return lockdep_is_held(&ovs_mutex); 122 else 123 return 1; 124 } 125 #endif 126 127 static struct vport *new_vport(const struct vport_parms *); 128 static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 129 const struct sw_flow_key *, 130 const struct dp_upcall_info *, 131 uint32_t cutlen); 132 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 133 const struct sw_flow_key *, 134 const struct dp_upcall_info *, 135 uint32_t cutlen); 136 137 static void ovs_dp_masks_rebalance(struct work_struct *work); 138 139 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *); 140 141 /* Must be called with rcu_read_lock or ovs_mutex. */ 142 const char *ovs_dp_name(const struct datapath *dp) 143 { 144 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 145 return ovs_vport_name(vport); 146 } 147 148 static int get_dpifindex(const struct datapath *dp) 149 { 150 struct vport *local; 151 int ifindex; 152 153 rcu_read_lock(); 154 155 local = ovs_vport_rcu(dp, OVSP_LOCAL); 156 if (local) 157 ifindex = local->dev->ifindex; 158 else 159 ifindex = 0; 160 161 rcu_read_unlock(); 162 163 return ifindex; 164 } 165 166 static void destroy_dp_rcu(struct rcu_head *rcu) 167 { 168 struct datapath *dp = container_of(rcu, struct datapath, rcu); 169 170 ovs_flow_tbl_destroy(&dp->table); 171 free_percpu(dp->stats_percpu); 172 kfree(dp->ports); 173 ovs_meters_exit(dp); 174 kfree(rcu_dereference_raw(dp->upcall_portids)); 175 kfree(dp); 176 } 177 178 static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 179 u16 port_no) 180 { 181 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 182 } 183 184 /* Called with ovs_mutex or RCU read lock. */ 185 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 186 { 187 struct vport *vport; 188 struct hlist_head *head; 189 190 head = vport_hash_bucket(dp, port_no); 191 hlist_for_each_entry_rcu(vport, head, dp_hash_node, 192 lockdep_ovsl_is_held()) { 193 if (vport->port_no == port_no) 194 return vport; 195 } 196 return NULL; 197 } 198 199 /* Called with ovs_mutex. */ 200 static struct vport *new_vport(const struct vport_parms *parms) 201 { 202 struct vport *vport; 203 204 vport = ovs_vport_add(parms); 205 if (!IS_ERR(vport)) { 206 struct datapath *dp = parms->dp; 207 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 208 209 hlist_add_head_rcu(&vport->dp_hash_node, head); 210 } 211 return vport; 212 } 213 214 static void ovs_vport_update_upcall_stats(struct sk_buff *skb, 215 const struct dp_upcall_info *upcall_info, 216 bool upcall_result) 217 { 218 struct vport *p = OVS_CB(skb)->input_vport; 219 struct vport_upcall_stats_percpu *stats; 220 221 if (upcall_info->cmd != OVS_PACKET_CMD_MISS && 222 upcall_info->cmd != OVS_PACKET_CMD_ACTION) 223 return; 224 225 stats = this_cpu_ptr(p->upcall_stats); 226 u64_stats_update_begin(&stats->syncp); 227 if (upcall_result) 228 u64_stats_inc(&stats->n_success); 229 else 230 u64_stats_inc(&stats->n_fail); 231 u64_stats_update_end(&stats->syncp); 232 } 233 234 void ovs_dp_detach_port(struct vport *p) 235 { 236 ASSERT_OVSL(); 237 238 /* First drop references to device. */ 239 hlist_del_rcu(&p->dp_hash_node); 240 241 /* Then destroy it. */ 242 ovs_vport_del(p); 243 } 244 245 /* Must be called with rcu_read_lock. */ 246 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 247 { 248 const struct vport *p = OVS_CB(skb)->input_vport; 249 struct datapath *dp = p->dp; 250 struct sw_flow *flow; 251 struct sw_flow_actions *sf_acts; 252 struct dp_stats_percpu *stats; 253 u64 *stats_counter; 254 u32 n_mask_hit; 255 u32 n_cache_hit; 256 int error; 257 258 stats = this_cpu_ptr(dp->stats_percpu); 259 260 /* Look up flow. */ 261 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), 262 &n_mask_hit, &n_cache_hit); 263 if (unlikely(!flow)) { 264 struct dp_upcall_info upcall; 265 266 memset(&upcall, 0, sizeof(upcall)); 267 upcall.cmd = OVS_PACKET_CMD_MISS; 268 269 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) 270 upcall.portid = 271 ovs_dp_get_upcall_portid(dp, smp_processor_id()); 272 else 273 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 274 275 upcall.mru = OVS_CB(skb)->mru; 276 error = ovs_dp_upcall(dp, skb, key, &upcall, 0); 277 switch (error) { 278 case 0: 279 case -EAGAIN: 280 case -ERESTARTSYS: 281 case -EINTR: 282 consume_skb(skb); 283 break; 284 default: 285 kfree_skb(skb); 286 break; 287 } 288 stats_counter = &stats->n_missed; 289 goto out; 290 } 291 292 ovs_flow_stats_update(flow, key->tp.flags, skb); 293 sf_acts = rcu_dereference(flow->sf_acts); 294 error = ovs_execute_actions(dp, skb, sf_acts, key); 295 if (unlikely(error)) 296 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", 297 ovs_dp_name(dp), error); 298 299 stats_counter = &stats->n_hit; 300 301 out: 302 /* Update datapath statistics. */ 303 u64_stats_update_begin(&stats->syncp); 304 (*stats_counter)++; 305 stats->n_mask_hit += n_mask_hit; 306 stats->n_cache_hit += n_cache_hit; 307 u64_stats_update_end(&stats->syncp); 308 } 309 310 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 311 const struct sw_flow_key *key, 312 const struct dp_upcall_info *upcall_info, 313 uint32_t cutlen) 314 { 315 struct dp_stats_percpu *stats; 316 int err; 317 318 if (trace_ovs_dp_upcall_enabled()) 319 trace_ovs_dp_upcall(dp, skb, key, upcall_info); 320 321 if (upcall_info->portid == 0) { 322 err = -ENOTCONN; 323 goto err; 324 } 325 326 if (!skb_is_gso(skb)) 327 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 328 else 329 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); 330 331 ovs_vport_update_upcall_stats(skb, upcall_info, !err); 332 if (err) 333 goto err; 334 335 return 0; 336 337 err: 338 stats = this_cpu_ptr(dp->stats_percpu); 339 340 u64_stats_update_begin(&stats->syncp); 341 stats->n_lost++; 342 u64_stats_update_end(&stats->syncp); 343 344 return err; 345 } 346 347 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 348 const struct sw_flow_key *key, 349 const struct dp_upcall_info *upcall_info, 350 uint32_t cutlen) 351 { 352 unsigned int gso_type = skb_shinfo(skb)->gso_type; 353 struct sw_flow_key later_key; 354 struct sk_buff *segs, *nskb; 355 int err; 356 357 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET); 358 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 359 if (IS_ERR(segs)) 360 return PTR_ERR(segs); 361 if (segs == NULL) 362 return -EINVAL; 363 364 if (gso_type & SKB_GSO_UDP) { 365 /* The initial flow key extracted by ovs_flow_key_extract() 366 * in this case is for a first fragment, so we need to 367 * properly mark later fragments. 368 */ 369 later_key = *key; 370 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 371 } 372 373 /* Queue all of the segments. */ 374 skb_list_walk_safe(segs, skb, nskb) { 375 if (gso_type & SKB_GSO_UDP && skb != segs) 376 key = &later_key; 377 378 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 379 if (err) 380 break; 381 382 } 383 384 /* Free all of the segments. */ 385 skb_list_walk_safe(segs, skb, nskb) { 386 if (err) 387 kfree_skb(skb); 388 else 389 consume_skb(skb); 390 } 391 return err; 392 } 393 394 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, 395 unsigned int hdrlen, int actions_attrlen) 396 { 397 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 398 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 399 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ 400 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ 401 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ 402 403 /* OVS_PACKET_ATTR_USERDATA */ 404 if (upcall_info->userdata) 405 size += NLA_ALIGN(upcall_info->userdata->nla_len); 406 407 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ 408 if (upcall_info->egress_tun_info) 409 size += nla_total_size(ovs_tun_key_attr_size()); 410 411 /* OVS_PACKET_ATTR_ACTIONS */ 412 if (upcall_info->actions_len) 413 size += nla_total_size(actions_attrlen); 414 415 /* OVS_PACKET_ATTR_MRU */ 416 if (upcall_info->mru) 417 size += nla_total_size(sizeof(upcall_info->mru)); 418 419 return size; 420 } 421 422 static void pad_packet(struct datapath *dp, struct sk_buff *skb) 423 { 424 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 425 size_t plen = NLA_ALIGN(skb->len) - skb->len; 426 427 if (plen > 0) 428 skb_put_zero(skb, plen); 429 } 430 } 431 432 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 433 const struct sw_flow_key *key, 434 const struct dp_upcall_info *upcall_info, 435 uint32_t cutlen) 436 { 437 struct ovs_header *upcall; 438 struct sk_buff *nskb = NULL; 439 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 440 struct nlattr *nla; 441 size_t len; 442 unsigned int hlen; 443 int err, dp_ifindex; 444 u64 hash; 445 446 dp_ifindex = get_dpifindex(dp); 447 if (!dp_ifindex) 448 return -ENODEV; 449 450 if (skb_vlan_tag_present(skb)) { 451 nskb = skb_clone(skb, GFP_ATOMIC); 452 if (!nskb) 453 return -ENOMEM; 454 455 nskb = __vlan_hwaccel_push_inside(nskb); 456 if (!nskb) 457 return -ENOMEM; 458 459 skb = nskb; 460 } 461 462 if (nla_attr_size(skb->len) > USHRT_MAX) { 463 err = -EFBIG; 464 goto out; 465 } 466 467 /* Complete checksum if needed */ 468 if (skb->ip_summed == CHECKSUM_PARTIAL && 469 (err = skb_csum_hwoffload_help(skb, 0))) 470 goto out; 471 472 /* Older versions of OVS user space enforce alignment of the last 473 * Netlink attribute to NLA_ALIGNTO which would require extensive 474 * padding logic. Only perform zerocopy if padding is not required. 475 */ 476 if (dp->user_features & OVS_DP_F_UNALIGNED) 477 hlen = skb_zerocopy_headlen(skb); 478 else 479 hlen = skb->len; 480 481 len = upcall_msg_size(upcall_info, hlen - cutlen, 482 OVS_CB(skb)->acts_origlen); 483 user_skb = genlmsg_new(len, GFP_ATOMIC); 484 if (!user_skb) { 485 err = -ENOMEM; 486 goto out; 487 } 488 489 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 490 0, upcall_info->cmd); 491 if (!upcall) { 492 err = -EINVAL; 493 goto out; 494 } 495 upcall->dp_ifindex = dp_ifindex; 496 497 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); 498 if (err) 499 goto out; 500 501 if (upcall_info->userdata) 502 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 503 nla_len(upcall_info->userdata), 504 nla_data(upcall_info->userdata)); 505 506 if (upcall_info->egress_tun_info) { 507 nla = nla_nest_start_noflag(user_skb, 508 OVS_PACKET_ATTR_EGRESS_TUN_KEY); 509 if (!nla) { 510 err = -EMSGSIZE; 511 goto out; 512 } 513 err = ovs_nla_put_tunnel_info(user_skb, 514 upcall_info->egress_tun_info); 515 if (err) 516 goto out; 517 518 nla_nest_end(user_skb, nla); 519 } 520 521 if (upcall_info->actions_len) { 522 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); 523 if (!nla) { 524 err = -EMSGSIZE; 525 goto out; 526 } 527 err = ovs_nla_put_actions(upcall_info->actions, 528 upcall_info->actions_len, 529 user_skb); 530 if (!err) 531 nla_nest_end(user_skb, nla); 532 else 533 nla_nest_cancel(user_skb, nla); 534 } 535 536 /* Add OVS_PACKET_ATTR_MRU */ 537 if (upcall_info->mru && 538 nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { 539 err = -ENOBUFS; 540 goto out; 541 } 542 543 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ 544 if (cutlen > 0 && 545 nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { 546 err = -ENOBUFS; 547 goto out; 548 } 549 550 /* Add OVS_PACKET_ATTR_HASH */ 551 hash = skb_get_hash_raw(skb); 552 if (skb->sw_hash) 553 hash |= OVS_PACKET_HASH_SW_BIT; 554 555 if (skb->l4_hash) 556 hash |= OVS_PACKET_HASH_L4_BIT; 557 558 if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { 559 err = -ENOBUFS; 560 goto out; 561 } 562 563 /* Only reserve room for attribute header, packet data is added 564 * in skb_zerocopy() */ 565 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 566 err = -ENOBUFS; 567 goto out; 568 } 569 nla->nla_len = nla_attr_size(skb->len - cutlen); 570 571 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen); 572 if (err) 573 goto out; 574 575 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 576 pad_packet(dp, user_skb); 577 578 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 579 580 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 581 user_skb = NULL; 582 out: 583 if (err) 584 skb_tx_error(skb); 585 consume_skb(user_skb); 586 consume_skb(nskb); 587 588 return err; 589 } 590 591 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 592 { 593 struct ovs_header *ovs_header = genl_info_userhdr(info); 594 struct net *net = sock_net(skb->sk); 595 struct nlattr **a = info->attrs; 596 struct sw_flow_actions *acts; 597 struct sk_buff *packet; 598 struct sw_flow *flow; 599 struct sw_flow_actions *sf_acts; 600 struct datapath *dp; 601 struct vport *input_vport; 602 u16 mru = 0; 603 u64 hash; 604 int len; 605 int err; 606 bool log = !a[OVS_PACKET_ATTR_PROBE]; 607 608 err = -EINVAL; 609 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 610 !a[OVS_PACKET_ATTR_ACTIONS]) 611 goto err; 612 613 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 614 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 615 err = -ENOMEM; 616 if (!packet) 617 goto err; 618 skb_reserve(packet, NET_IP_ALIGN); 619 620 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 621 622 /* Set packet's mru */ 623 if (a[OVS_PACKET_ATTR_MRU]) { 624 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); 625 packet->ignore_df = 1; 626 } 627 OVS_CB(packet)->mru = mru; 628 629 if (a[OVS_PACKET_ATTR_HASH]) { 630 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); 631 632 __skb_set_hash(packet, hash & 0xFFFFFFFFULL, 633 !!(hash & OVS_PACKET_HASH_SW_BIT), 634 !!(hash & OVS_PACKET_HASH_L4_BIT)); 635 } 636 637 /* Build an sw_flow for sending this packet. */ 638 flow = ovs_flow_alloc(); 639 err = PTR_ERR(flow); 640 if (IS_ERR(flow)) 641 goto err_kfree_skb; 642 643 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY], 644 packet, &flow->key, log); 645 if (err) 646 goto err_flow_free; 647 648 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], 649 &flow->key, &acts, log); 650 if (err) 651 goto err_flow_free; 652 653 rcu_assign_pointer(flow->sf_acts, acts); 654 packet->priority = flow->key.phy.priority; 655 packet->mark = flow->key.phy.skb_mark; 656 657 rcu_read_lock(); 658 dp = get_dp_rcu(net, ovs_header->dp_ifindex); 659 err = -ENODEV; 660 if (!dp) 661 goto err_unlock; 662 663 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port); 664 if (!input_vport) 665 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL); 666 667 if (!input_vport) 668 goto err_unlock; 669 670 packet->dev = input_vport->dev; 671 OVS_CB(packet)->input_vport = input_vport; 672 sf_acts = rcu_dereference(flow->sf_acts); 673 674 local_bh_disable(); 675 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); 676 local_bh_enable(); 677 rcu_read_unlock(); 678 679 ovs_flow_free(flow, false); 680 return err; 681 682 err_unlock: 683 rcu_read_unlock(); 684 err_flow_free: 685 ovs_flow_free(flow, false); 686 err_kfree_skb: 687 kfree_skb(packet); 688 err: 689 return err; 690 } 691 692 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 693 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, 694 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 695 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 696 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, 697 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, 698 [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, 699 }; 700 701 static const struct genl_small_ops dp_packet_genl_ops[] = { 702 { .cmd = OVS_PACKET_CMD_EXECUTE, 703 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 704 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 705 .doit = ovs_packet_cmd_execute 706 } 707 }; 708 709 static struct genl_family dp_packet_genl_family __ro_after_init = { 710 .hdrsize = sizeof(struct ovs_header), 711 .name = OVS_PACKET_FAMILY, 712 .version = OVS_PACKET_VERSION, 713 .maxattr = OVS_PACKET_ATTR_MAX, 714 .policy = packet_policy, 715 .netnsok = true, 716 .parallel_ops = true, 717 .small_ops = dp_packet_genl_ops, 718 .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops), 719 .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1, 720 .module = THIS_MODULE, 721 }; 722 723 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, 724 struct ovs_dp_megaflow_stats *mega_stats) 725 { 726 int i; 727 728 memset(mega_stats, 0, sizeof(*mega_stats)); 729 730 stats->n_flows = ovs_flow_tbl_count(&dp->table); 731 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 732 733 stats->n_hit = stats->n_missed = stats->n_lost = 0; 734 735 for_each_possible_cpu(i) { 736 const struct dp_stats_percpu *percpu_stats; 737 struct dp_stats_percpu local_stats; 738 unsigned int start; 739 740 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 741 742 do { 743 start = u64_stats_fetch_begin(&percpu_stats->syncp); 744 local_stats = *percpu_stats; 745 } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); 746 747 stats->n_hit += local_stats.n_hit; 748 stats->n_missed += local_stats.n_missed; 749 stats->n_lost += local_stats.n_lost; 750 mega_stats->n_mask_hit += local_stats.n_mask_hit; 751 mega_stats->n_cache_hit += local_stats.n_cache_hit; 752 } 753 } 754 755 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) 756 { 757 return ovs_identifier_is_ufid(sfid) && 758 !(ufid_flags & OVS_UFID_F_OMIT_KEY); 759 } 760 761 static bool should_fill_mask(uint32_t ufid_flags) 762 { 763 return !(ufid_flags & OVS_UFID_F_OMIT_MASK); 764 } 765 766 static bool should_fill_actions(uint32_t ufid_flags) 767 { 768 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); 769 } 770 771 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, 772 const struct sw_flow_id *sfid, 773 uint32_t ufid_flags) 774 { 775 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); 776 777 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback 778 * see ovs_nla_put_identifier() 779 */ 780 if (sfid && ovs_identifier_is_ufid(sfid)) 781 len += nla_total_size(sfid->ufid_len); 782 else 783 len += nla_total_size(ovs_key_attr_size()); 784 785 /* OVS_FLOW_ATTR_KEY */ 786 if (!sfid || should_fill_key(sfid, ufid_flags)) 787 len += nla_total_size(ovs_key_attr_size()); 788 789 /* OVS_FLOW_ATTR_MASK */ 790 if (should_fill_mask(ufid_flags)) 791 len += nla_total_size(ovs_key_attr_size()); 792 793 /* OVS_FLOW_ATTR_ACTIONS */ 794 if (should_fill_actions(ufid_flags)) 795 len += nla_total_size(acts->orig_len); 796 797 return len 798 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 799 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 800 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */ 801 } 802 803 /* Called with ovs_mutex or RCU read lock. */ 804 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, 805 struct sk_buff *skb) 806 { 807 struct ovs_flow_stats stats; 808 __be16 tcp_flags; 809 unsigned long used; 810 811 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 812 813 if (used && 814 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used), 815 OVS_FLOW_ATTR_PAD)) 816 return -EMSGSIZE; 817 818 if (stats.n_packets && 819 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS, 820 sizeof(struct ovs_flow_stats), &stats, 821 OVS_FLOW_ATTR_PAD)) 822 return -EMSGSIZE; 823 824 if ((u8)ntohs(tcp_flags) && 825 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 826 return -EMSGSIZE; 827 828 return 0; 829 } 830 831 /* Called with ovs_mutex or RCU read lock. */ 832 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, 833 struct sk_buff *skb, int skb_orig_len) 834 { 835 struct nlattr *start; 836 int err; 837 838 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 839 * this is the first flow to be dumped into 'skb'. This is unusual for 840 * Netlink but individual action lists can be longer than 841 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 842 * The userspace caller can always fetch the actions separately if it 843 * really wants them. (Most userspace callers in fact don't care.) 844 * 845 * This can only fail for dump operations because the skb is always 846 * properly sized for single flows. 847 */ 848 start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); 849 if (start) { 850 const struct sw_flow_actions *sf_acts; 851 852 sf_acts = rcu_dereference_ovsl(flow->sf_acts); 853 err = ovs_nla_put_actions(sf_acts->actions, 854 sf_acts->actions_len, skb); 855 856 if (!err) 857 nla_nest_end(skb, start); 858 else { 859 if (skb_orig_len) 860 return err; 861 862 nla_nest_cancel(skb, start); 863 } 864 } else if (skb_orig_len) { 865 return -EMSGSIZE; 866 } 867 868 return 0; 869 } 870 871 /* Called with ovs_mutex or RCU read lock. */ 872 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 873 struct sk_buff *skb, u32 portid, 874 u32 seq, u32 flags, u8 cmd, u32 ufid_flags) 875 { 876 const int skb_orig_len = skb->len; 877 struct ovs_header *ovs_header; 878 int err; 879 880 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, 881 flags, cmd); 882 if (!ovs_header) 883 return -EMSGSIZE; 884 885 ovs_header->dp_ifindex = dp_ifindex; 886 887 err = ovs_nla_put_identifier(flow, skb); 888 if (err) 889 goto error; 890 891 if (should_fill_key(&flow->id, ufid_flags)) { 892 err = ovs_nla_put_masked_key(flow, skb); 893 if (err) 894 goto error; 895 } 896 897 if (should_fill_mask(ufid_flags)) { 898 err = ovs_nla_put_mask(flow, skb); 899 if (err) 900 goto error; 901 } 902 903 err = ovs_flow_cmd_fill_stats(flow, skb); 904 if (err) 905 goto error; 906 907 if (should_fill_actions(ufid_flags)) { 908 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); 909 if (err) 910 goto error; 911 } 912 913 genlmsg_end(skb, ovs_header); 914 return 0; 915 916 error: 917 genlmsg_cancel(skb, ovs_header); 918 return err; 919 } 920 921 /* May not be called with RCU read lock. */ 922 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, 923 const struct sw_flow_id *sfid, 924 struct genl_info *info, 925 bool always, 926 uint32_t ufid_flags) 927 { 928 struct sk_buff *skb; 929 size_t len; 930 931 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) 932 return NULL; 933 934 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); 935 skb = genlmsg_new(len, GFP_KERNEL); 936 if (!skb) 937 return ERR_PTR(-ENOMEM); 938 939 return skb; 940 } 941 942 /* Called with ovs_mutex. */ 943 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, 944 int dp_ifindex, 945 struct genl_info *info, u8 cmd, 946 bool always, u32 ufid_flags) 947 { 948 struct sk_buff *skb; 949 int retval; 950 951 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), 952 &flow->id, info, always, ufid_flags); 953 if (IS_ERR_OR_NULL(skb)) 954 return skb; 955 956 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, 957 info->snd_portid, info->snd_seq, 0, 958 cmd, ufid_flags); 959 if (WARN_ON_ONCE(retval < 0)) { 960 kfree_skb(skb); 961 skb = ERR_PTR(retval); 962 } 963 return skb; 964 } 965 966 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 967 { 968 struct net *net = sock_net(skb->sk); 969 struct nlattr **a = info->attrs; 970 struct ovs_header *ovs_header = genl_info_userhdr(info); 971 struct sw_flow *flow = NULL, *new_flow; 972 struct sw_flow_mask mask; 973 struct sk_buff *reply; 974 struct datapath *dp; 975 struct sw_flow_key *key; 976 struct sw_flow_actions *acts; 977 struct sw_flow_match match; 978 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 979 int error; 980 bool log = !a[OVS_FLOW_ATTR_PROBE]; 981 982 /* Must have key and actions. */ 983 error = -EINVAL; 984 if (!a[OVS_FLOW_ATTR_KEY]) { 985 OVS_NLERR(log, "Flow key attr not present in new flow."); 986 goto error; 987 } 988 if (!a[OVS_FLOW_ATTR_ACTIONS]) { 989 OVS_NLERR(log, "Flow actions attr not present in new flow."); 990 goto error; 991 } 992 993 /* Most of the time we need to allocate a new flow, do it before 994 * locking. 995 */ 996 new_flow = ovs_flow_alloc(); 997 if (IS_ERR(new_flow)) { 998 error = PTR_ERR(new_flow); 999 goto error; 1000 } 1001 1002 /* Extract key. */ 1003 key = kzalloc(sizeof(*key), GFP_KERNEL); 1004 if (!key) { 1005 error = -ENOMEM; 1006 goto err_kfree_flow; 1007 } 1008 1009 ovs_match_init(&match, key, false, &mask); 1010 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1011 a[OVS_FLOW_ATTR_MASK], log); 1012 if (error) 1013 goto err_kfree_key; 1014 1015 ovs_flow_mask_key(&new_flow->key, key, true, &mask); 1016 1017 /* Extract flow identifier. */ 1018 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], 1019 key, log); 1020 if (error) 1021 goto err_kfree_key; 1022 1023 /* Validate actions. */ 1024 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], 1025 &new_flow->key, &acts, log); 1026 if (error) { 1027 OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); 1028 goto err_kfree_key; 1029 } 1030 1031 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, 1032 ufid_flags); 1033 if (IS_ERR(reply)) { 1034 error = PTR_ERR(reply); 1035 goto err_kfree_acts; 1036 } 1037 1038 ovs_lock(); 1039 dp = get_dp(net, ovs_header->dp_ifindex); 1040 if (unlikely(!dp)) { 1041 error = -ENODEV; 1042 goto err_unlock_ovs; 1043 } 1044 1045 /* Check if this is a duplicate flow */ 1046 if (ovs_identifier_is_ufid(&new_flow->id)) 1047 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); 1048 if (!flow) 1049 flow = ovs_flow_tbl_lookup(&dp->table, key); 1050 if (likely(!flow)) { 1051 rcu_assign_pointer(new_flow->sf_acts, acts); 1052 1053 /* Put flow in bucket. */ 1054 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); 1055 if (unlikely(error)) { 1056 acts = NULL; 1057 goto err_unlock_ovs; 1058 } 1059 1060 if (unlikely(reply)) { 1061 error = ovs_flow_cmd_fill_info(new_flow, 1062 ovs_header->dp_ifindex, 1063 reply, info->snd_portid, 1064 info->snd_seq, 0, 1065 OVS_FLOW_CMD_NEW, 1066 ufid_flags); 1067 BUG_ON(error < 0); 1068 } 1069 ovs_unlock(); 1070 } else { 1071 struct sw_flow_actions *old_acts; 1072 1073 /* Bail out if we're not allowed to modify an existing flow. 1074 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 1075 * because Generic Netlink treats the latter as a dump 1076 * request. We also accept NLM_F_EXCL in case that bug ever 1077 * gets fixed. 1078 */ 1079 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE 1080 | NLM_F_EXCL))) { 1081 error = -EEXIST; 1082 goto err_unlock_ovs; 1083 } 1084 /* The flow identifier has to be the same for flow updates. 1085 * Look for any overlapping flow. 1086 */ 1087 if (unlikely(!ovs_flow_cmp(flow, &match))) { 1088 if (ovs_identifier_is_key(&flow->id)) 1089 flow = ovs_flow_tbl_lookup_exact(&dp->table, 1090 &match); 1091 else /* UFID matches but key is different */ 1092 flow = NULL; 1093 if (!flow) { 1094 error = -ENOENT; 1095 goto err_unlock_ovs; 1096 } 1097 } 1098 /* Update actions. */ 1099 old_acts = ovsl_dereference(flow->sf_acts); 1100 rcu_assign_pointer(flow->sf_acts, acts); 1101 1102 if (unlikely(reply)) { 1103 error = ovs_flow_cmd_fill_info(flow, 1104 ovs_header->dp_ifindex, 1105 reply, info->snd_portid, 1106 info->snd_seq, 0, 1107 OVS_FLOW_CMD_NEW, 1108 ufid_flags); 1109 BUG_ON(error < 0); 1110 } 1111 ovs_unlock(); 1112 1113 ovs_nla_free_flow_actions_rcu(old_acts); 1114 ovs_flow_free(new_flow, false); 1115 } 1116 1117 if (reply) 1118 ovs_notify(&dp_flow_genl_family, reply, info); 1119 1120 kfree(key); 1121 return 0; 1122 1123 err_unlock_ovs: 1124 ovs_unlock(); 1125 kfree_skb(reply); 1126 err_kfree_acts: 1127 ovs_nla_free_flow_actions(acts); 1128 err_kfree_key: 1129 kfree(key); 1130 err_kfree_flow: 1131 ovs_flow_free(new_flow, false); 1132 error: 1133 return error; 1134 } 1135 1136 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ 1137 static noinline_for_stack 1138 struct sw_flow_actions *get_flow_actions(struct net *net, 1139 const struct nlattr *a, 1140 const struct sw_flow_key *key, 1141 const struct sw_flow_mask *mask, 1142 bool log) 1143 { 1144 struct sw_flow_actions *acts; 1145 struct sw_flow_key masked_key; 1146 int error; 1147 1148 ovs_flow_mask_key(&masked_key, key, true, mask); 1149 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); 1150 if (error) { 1151 OVS_NLERR(log, 1152 "Actions may not be safe on all matching packets"); 1153 return ERR_PTR(error); 1154 } 1155 1156 return acts; 1157 } 1158 1159 /* Factor out match-init and action-copy to avoid 1160 * "Wframe-larger-than=1024" warning. Because mask is only 1161 * used to get actions, we new a function to save some 1162 * stack space. 1163 * 1164 * If there are not key and action attrs, we return 0 1165 * directly. In the case, the caller will also not use the 1166 * match as before. If there is action attr, we try to get 1167 * actions and save them to *acts. Before returning from 1168 * the function, we reset the match->mask pointer. Because 1169 * we should not to return match object with dangling reference 1170 * to mask. 1171 * */ 1172 static noinline_for_stack int 1173 ovs_nla_init_match_and_action(struct net *net, 1174 struct sw_flow_match *match, 1175 struct sw_flow_key *key, 1176 struct nlattr **a, 1177 struct sw_flow_actions **acts, 1178 bool log) 1179 { 1180 struct sw_flow_mask mask; 1181 int error = 0; 1182 1183 if (a[OVS_FLOW_ATTR_KEY]) { 1184 ovs_match_init(match, key, true, &mask); 1185 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY], 1186 a[OVS_FLOW_ATTR_MASK], log); 1187 if (error) 1188 goto error; 1189 } 1190 1191 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1192 if (!a[OVS_FLOW_ATTR_KEY]) { 1193 OVS_NLERR(log, 1194 "Flow key attribute not present in set flow."); 1195 error = -EINVAL; 1196 goto error; 1197 } 1198 1199 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, 1200 &mask, log); 1201 if (IS_ERR(*acts)) { 1202 error = PTR_ERR(*acts); 1203 goto error; 1204 } 1205 } 1206 1207 /* On success, error is 0. */ 1208 error: 1209 match->mask = NULL; 1210 return error; 1211 } 1212 1213 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 1214 { 1215 struct net *net = sock_net(skb->sk); 1216 struct nlattr **a = info->attrs; 1217 struct ovs_header *ovs_header = genl_info_userhdr(info); 1218 struct sw_flow_key key; 1219 struct sw_flow *flow; 1220 struct sk_buff *reply = NULL; 1221 struct datapath *dp; 1222 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 1223 struct sw_flow_match match; 1224 struct sw_flow_id sfid; 1225 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1226 int error = 0; 1227 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1228 bool ufid_present; 1229 1230 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); 1231 if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) { 1232 OVS_NLERR(log, 1233 "Flow set message rejected, Key attribute missing."); 1234 return -EINVAL; 1235 } 1236 1237 error = ovs_nla_init_match_and_action(net, &match, &key, a, 1238 &acts, log); 1239 if (error) 1240 goto error; 1241 1242 if (acts) { 1243 /* Can allocate before locking if have acts. */ 1244 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, 1245 ufid_flags); 1246 if (IS_ERR(reply)) { 1247 error = PTR_ERR(reply); 1248 goto err_kfree_acts; 1249 } 1250 } 1251 1252 ovs_lock(); 1253 dp = get_dp(net, ovs_header->dp_ifindex); 1254 if (unlikely(!dp)) { 1255 error = -ENODEV; 1256 goto err_unlock_ovs; 1257 } 1258 /* Check that the flow exists. */ 1259 if (ufid_present) 1260 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); 1261 else 1262 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1263 if (unlikely(!flow)) { 1264 error = -ENOENT; 1265 goto err_unlock_ovs; 1266 } 1267 1268 /* Update actions, if present. */ 1269 if (likely(acts)) { 1270 old_acts = ovsl_dereference(flow->sf_acts); 1271 rcu_assign_pointer(flow->sf_acts, acts); 1272 1273 if (unlikely(reply)) { 1274 error = ovs_flow_cmd_fill_info(flow, 1275 ovs_header->dp_ifindex, 1276 reply, info->snd_portid, 1277 info->snd_seq, 0, 1278 OVS_FLOW_CMD_SET, 1279 ufid_flags); 1280 BUG_ON(error < 0); 1281 } 1282 } else { 1283 /* Could not alloc without acts before locking. */ 1284 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, 1285 info, OVS_FLOW_CMD_SET, false, 1286 ufid_flags); 1287 1288 if (IS_ERR(reply)) { 1289 error = PTR_ERR(reply); 1290 goto err_unlock_ovs; 1291 } 1292 } 1293 1294 /* Clear stats. */ 1295 if (a[OVS_FLOW_ATTR_CLEAR]) 1296 ovs_flow_stats_clear(flow); 1297 ovs_unlock(); 1298 1299 if (reply) 1300 ovs_notify(&dp_flow_genl_family, reply, info); 1301 if (old_acts) 1302 ovs_nla_free_flow_actions_rcu(old_acts); 1303 1304 return 0; 1305 1306 err_unlock_ovs: 1307 ovs_unlock(); 1308 kfree_skb(reply); 1309 err_kfree_acts: 1310 ovs_nla_free_flow_actions(acts); 1311 error: 1312 return error; 1313 } 1314 1315 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1316 { 1317 struct nlattr **a = info->attrs; 1318 struct ovs_header *ovs_header = genl_info_userhdr(info); 1319 struct net *net = sock_net(skb->sk); 1320 struct sw_flow_key key; 1321 struct sk_buff *reply; 1322 struct sw_flow *flow; 1323 struct datapath *dp; 1324 struct sw_flow_match match; 1325 struct sw_flow_id ufid; 1326 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1327 int err = 0; 1328 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1329 bool ufid_present; 1330 1331 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1332 if (a[OVS_FLOW_ATTR_KEY]) { 1333 ovs_match_init(&match, &key, true, NULL); 1334 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, 1335 log); 1336 } else if (!ufid_present) { 1337 OVS_NLERR(log, 1338 "Flow get message rejected, Key attribute missing."); 1339 err = -EINVAL; 1340 } 1341 if (err) 1342 return err; 1343 1344 ovs_lock(); 1345 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1346 if (!dp) { 1347 err = -ENODEV; 1348 goto unlock; 1349 } 1350 1351 if (ufid_present) 1352 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1353 else 1354 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1355 if (!flow) { 1356 err = -ENOENT; 1357 goto unlock; 1358 } 1359 1360 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, 1361 OVS_FLOW_CMD_GET, true, ufid_flags); 1362 if (IS_ERR(reply)) { 1363 err = PTR_ERR(reply); 1364 goto unlock; 1365 } 1366 1367 ovs_unlock(); 1368 return genlmsg_reply(reply, info); 1369 unlock: 1370 ovs_unlock(); 1371 return err; 1372 } 1373 1374 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1375 { 1376 struct nlattr **a = info->attrs; 1377 struct ovs_header *ovs_header = genl_info_userhdr(info); 1378 struct net *net = sock_net(skb->sk); 1379 struct sw_flow_key key; 1380 struct sk_buff *reply; 1381 struct sw_flow *flow = NULL; 1382 struct datapath *dp; 1383 struct sw_flow_match match; 1384 struct sw_flow_id ufid; 1385 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1386 int err; 1387 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1388 bool ufid_present; 1389 1390 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1391 if (a[OVS_FLOW_ATTR_KEY]) { 1392 ovs_match_init(&match, &key, true, NULL); 1393 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1394 NULL, log); 1395 if (unlikely(err)) 1396 return err; 1397 } 1398 1399 ovs_lock(); 1400 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1401 if (unlikely(!dp)) { 1402 err = -ENODEV; 1403 goto unlock; 1404 } 1405 1406 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { 1407 err = ovs_flow_tbl_flush(&dp->table); 1408 goto unlock; 1409 } 1410 1411 if (ufid_present) 1412 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1413 else 1414 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1415 if (unlikely(!flow)) { 1416 err = -ENOENT; 1417 goto unlock; 1418 } 1419 1420 ovs_flow_tbl_remove(&dp->table, flow); 1421 ovs_unlock(); 1422 1423 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, 1424 &flow->id, info, false, ufid_flags); 1425 if (likely(reply)) { 1426 if (!IS_ERR(reply)) { 1427 rcu_read_lock(); /*To keep RCU checker happy. */ 1428 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, 1429 reply, info->snd_portid, 1430 info->snd_seq, 0, 1431 OVS_FLOW_CMD_DEL, 1432 ufid_flags); 1433 rcu_read_unlock(); 1434 if (WARN_ON_ONCE(err < 0)) { 1435 kfree_skb(reply); 1436 goto out_free; 1437 } 1438 1439 ovs_notify(&dp_flow_genl_family, reply, info); 1440 } else { 1441 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, 1442 PTR_ERR(reply)); 1443 } 1444 } 1445 1446 out_free: 1447 ovs_flow_free(flow, true); 1448 return 0; 1449 unlock: 1450 ovs_unlock(); 1451 return err; 1452 } 1453 1454 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1455 { 1456 struct nlattr *a[__OVS_FLOW_ATTR_MAX]; 1457 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1458 struct table_instance *ti; 1459 struct datapath *dp; 1460 u32 ufid_flags; 1461 int err; 1462 1463 err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, 1464 OVS_FLOW_ATTR_MAX, flow_policy, NULL); 1465 if (err) 1466 return err; 1467 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1468 1469 rcu_read_lock(); 1470 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 1471 if (!dp) { 1472 rcu_read_unlock(); 1473 return -ENODEV; 1474 } 1475 1476 ti = rcu_dereference(dp->table.ti); 1477 for (;;) { 1478 struct sw_flow *flow; 1479 u32 bucket, obj; 1480 1481 bucket = cb->args[0]; 1482 obj = cb->args[1]; 1483 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1484 if (!flow) 1485 break; 1486 1487 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, 1488 NETLINK_CB(cb->skb).portid, 1489 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1490 OVS_FLOW_CMD_GET, ufid_flags) < 0) 1491 break; 1492 1493 cb->args[0] = bucket; 1494 cb->args[1] = obj; 1495 } 1496 rcu_read_unlock(); 1497 return skb->len; 1498 } 1499 1500 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1501 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1502 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, 1503 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1504 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1505 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, 1506 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, 1507 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, 1508 }; 1509 1510 static const struct genl_small_ops dp_flow_genl_ops[] = { 1511 { .cmd = OVS_FLOW_CMD_NEW, 1512 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1513 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1514 .doit = ovs_flow_cmd_new 1515 }, 1516 { .cmd = OVS_FLOW_CMD_DEL, 1517 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1518 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1519 .doit = ovs_flow_cmd_del 1520 }, 1521 { .cmd = OVS_FLOW_CMD_GET, 1522 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1523 .flags = 0, /* OK for unprivileged users. */ 1524 .doit = ovs_flow_cmd_get, 1525 .dumpit = ovs_flow_cmd_dump 1526 }, 1527 { .cmd = OVS_FLOW_CMD_SET, 1528 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1529 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1530 .doit = ovs_flow_cmd_set, 1531 }, 1532 }; 1533 1534 static struct genl_family dp_flow_genl_family __ro_after_init = { 1535 .hdrsize = sizeof(struct ovs_header), 1536 .name = OVS_FLOW_FAMILY, 1537 .version = OVS_FLOW_VERSION, 1538 .maxattr = OVS_FLOW_ATTR_MAX, 1539 .policy = flow_policy, 1540 .netnsok = true, 1541 .parallel_ops = true, 1542 .small_ops = dp_flow_genl_ops, 1543 .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops), 1544 .resv_start_op = OVS_FLOW_CMD_SET + 1, 1545 .mcgrps = &ovs_dp_flow_multicast_group, 1546 .n_mcgrps = 1, 1547 .module = THIS_MODULE, 1548 }; 1549 1550 static size_t ovs_dp_cmd_msg_size(void) 1551 { 1552 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1553 1554 msgsize += nla_total_size(IFNAMSIZ); 1555 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); 1556 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); 1557 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1558 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ 1559 msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */ 1560 1561 return msgsize; 1562 } 1563 1564 /* Called with ovs_mutex. */ 1565 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1566 u32 portid, u32 seq, u32 flags, u8 cmd) 1567 { 1568 struct ovs_header *ovs_header; 1569 struct ovs_dp_stats dp_stats; 1570 struct ovs_dp_megaflow_stats dp_megaflow_stats; 1571 struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids); 1572 int err, pids_len; 1573 1574 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1575 flags, cmd); 1576 if (!ovs_header) 1577 goto error; 1578 1579 ovs_header->dp_ifindex = get_dpifindex(dp); 1580 1581 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1582 if (err) 1583 goto nla_put_failure; 1584 1585 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1586 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1587 &dp_stats, OVS_DP_ATTR_PAD)) 1588 goto nla_put_failure; 1589 1590 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1591 sizeof(struct ovs_dp_megaflow_stats), 1592 &dp_megaflow_stats, OVS_DP_ATTR_PAD)) 1593 goto nla_put_failure; 1594 1595 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1596 goto nla_put_failure; 1597 1598 if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, 1599 ovs_flow_tbl_masks_cache_size(&dp->table))) 1600 goto nla_put_failure; 1601 1602 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) { 1603 pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32); 1604 if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids)) 1605 goto nla_put_failure; 1606 } 1607 1608 genlmsg_end(skb, ovs_header); 1609 return 0; 1610 1611 nla_put_failure: 1612 genlmsg_cancel(skb, ovs_header); 1613 error: 1614 return -EMSGSIZE; 1615 } 1616 1617 static struct sk_buff *ovs_dp_cmd_alloc_info(void) 1618 { 1619 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); 1620 } 1621 1622 /* Called with rcu_read_lock or ovs_mutex. */ 1623 static struct datapath *lookup_datapath(struct net *net, 1624 const struct ovs_header *ovs_header, 1625 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1626 { 1627 struct datapath *dp; 1628 1629 if (!a[OVS_DP_ATTR_NAME]) 1630 dp = get_dp(net, ovs_header->dp_ifindex); 1631 else { 1632 struct vport *vport; 1633 1634 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1635 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1636 } 1637 return dp ? dp : ERR_PTR(-ENODEV); 1638 } 1639 1640 static void ovs_dp_reset_user_features(struct sk_buff *skb, 1641 struct genl_info *info) 1642 { 1643 struct datapath *dp; 1644 1645 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 1646 info->attrs); 1647 if (IS_ERR(dp)) 1648 return; 1649 1650 pr_warn("%s: Dropping previously announced user features\n", 1651 ovs_dp_name(dp)); 1652 dp->user_features = 0; 1653 } 1654 1655 static int ovs_dp_set_upcall_portids(struct datapath *dp, 1656 const struct nlattr *ids) 1657 { 1658 struct dp_nlsk_pids *old, *dp_nlsk_pids; 1659 1660 if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) 1661 return -EINVAL; 1662 1663 old = ovsl_dereference(dp->upcall_portids); 1664 1665 dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids), 1666 GFP_KERNEL); 1667 if (!dp_nlsk_pids) 1668 return -ENOMEM; 1669 1670 dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32); 1671 nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids)); 1672 1673 rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids); 1674 1675 kfree_rcu(old, rcu); 1676 1677 return 0; 1678 } 1679 1680 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) 1681 { 1682 struct dp_nlsk_pids *dp_nlsk_pids; 1683 1684 dp_nlsk_pids = rcu_dereference(dp->upcall_portids); 1685 1686 if (dp_nlsk_pids) { 1687 if (cpu_id < dp_nlsk_pids->n_pids) { 1688 return dp_nlsk_pids->pids[cpu_id]; 1689 } else if (dp_nlsk_pids->n_pids > 0 && 1690 cpu_id >= dp_nlsk_pids->n_pids) { 1691 /* If the number of netlink PIDs is mismatched with 1692 * the number of CPUs as seen by the kernel, log this 1693 * and send the upcall to an arbitrary socket (0) in 1694 * order to not drop packets 1695 */ 1696 pr_info_ratelimited("cpu_id mismatch with handler threads"); 1697 return dp_nlsk_pids->pids[cpu_id % 1698 dp_nlsk_pids->n_pids]; 1699 } else { 1700 return 0; 1701 } 1702 } else { 1703 return 0; 1704 } 1705 } 1706 1707 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) 1708 { 1709 u32 user_features = 0, old_features = dp->user_features; 1710 int err; 1711 1712 if (a[OVS_DP_ATTR_USER_FEATURES]) { 1713 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1714 1715 if (user_features & ~(OVS_DP_F_VPORT_PIDS | 1716 OVS_DP_F_UNALIGNED | 1717 OVS_DP_F_TC_RECIRC_SHARING | 1718 OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) 1719 return -EOPNOTSUPP; 1720 1721 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1722 if (user_features & OVS_DP_F_TC_RECIRC_SHARING) 1723 return -EOPNOTSUPP; 1724 #endif 1725 } 1726 1727 if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { 1728 int err; 1729 u32 cache_size; 1730 1731 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); 1732 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); 1733 if (err) 1734 return err; 1735 } 1736 1737 dp->user_features = user_features; 1738 1739 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && 1740 a[OVS_DP_ATTR_PER_CPU_PIDS]) { 1741 /* Upcall Netlink Port IDs have been updated */ 1742 err = ovs_dp_set_upcall_portids(dp, 1743 a[OVS_DP_ATTR_PER_CPU_PIDS]); 1744 if (err) 1745 return err; 1746 } 1747 1748 if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && 1749 !(old_features & OVS_DP_F_TC_RECIRC_SHARING)) 1750 tc_skb_ext_tc_enable(); 1751 else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && 1752 (old_features & OVS_DP_F_TC_RECIRC_SHARING)) 1753 tc_skb_ext_tc_disable(); 1754 1755 return 0; 1756 } 1757 1758 static int ovs_dp_stats_init(struct datapath *dp) 1759 { 1760 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); 1761 if (!dp->stats_percpu) 1762 return -ENOMEM; 1763 1764 return 0; 1765 } 1766 1767 static int ovs_dp_vport_init(struct datapath *dp) 1768 { 1769 int i; 1770 1771 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, 1772 sizeof(struct hlist_head), 1773 GFP_KERNEL); 1774 if (!dp->ports) 1775 return -ENOMEM; 1776 1777 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1778 INIT_HLIST_HEAD(&dp->ports[i]); 1779 1780 return 0; 1781 } 1782 1783 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1784 { 1785 struct nlattr **a = info->attrs; 1786 struct vport_parms parms; 1787 struct sk_buff *reply; 1788 struct datapath *dp; 1789 struct vport *vport; 1790 struct ovs_net *ovs_net; 1791 int err; 1792 1793 err = -EINVAL; 1794 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1795 goto err; 1796 1797 reply = ovs_dp_cmd_alloc_info(); 1798 if (!reply) 1799 return -ENOMEM; 1800 1801 err = -ENOMEM; 1802 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1803 if (dp == NULL) 1804 goto err_destroy_reply; 1805 1806 ovs_dp_set_net(dp, sock_net(skb->sk)); 1807 1808 /* Allocate table. */ 1809 err = ovs_flow_tbl_init(&dp->table); 1810 if (err) 1811 goto err_destroy_dp; 1812 1813 err = ovs_dp_stats_init(dp); 1814 if (err) 1815 goto err_destroy_table; 1816 1817 err = ovs_dp_vport_init(dp); 1818 if (err) 1819 goto err_destroy_stats; 1820 1821 err = ovs_meters_init(dp); 1822 if (err) 1823 goto err_destroy_ports; 1824 1825 /* Set up our datapath device. */ 1826 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1827 parms.type = OVS_VPORT_TYPE_INTERNAL; 1828 parms.options = NULL; 1829 parms.dp = dp; 1830 parms.port_no = OVSP_LOCAL; 1831 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; 1832 parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX] 1833 ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0; 1834 1835 /* So far only local changes have been made, now need the lock. */ 1836 ovs_lock(); 1837 1838 err = ovs_dp_change(dp, a); 1839 if (err) 1840 goto err_unlock_and_destroy_meters; 1841 1842 vport = new_vport(&parms); 1843 if (IS_ERR(vport)) { 1844 err = PTR_ERR(vport); 1845 if (err == -EBUSY) 1846 err = -EEXIST; 1847 1848 if (err == -EEXIST) { 1849 /* An outdated user space instance that does not understand 1850 * the concept of user_features has attempted to create a new 1851 * datapath and is likely to reuse it. Drop all user features. 1852 */ 1853 if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1854 ovs_dp_reset_user_features(skb, info); 1855 } 1856 1857 goto err_destroy_portids; 1858 } 1859 1860 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1861 info->snd_seq, 0, OVS_DP_CMD_NEW); 1862 BUG_ON(err < 0); 1863 1864 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1865 list_add_tail_rcu(&dp->list_node, &ovs_net->dps); 1866 1867 ovs_unlock(); 1868 1869 ovs_notify(&dp_datapath_genl_family, reply, info); 1870 return 0; 1871 1872 err_destroy_portids: 1873 kfree(rcu_dereference_raw(dp->upcall_portids)); 1874 err_unlock_and_destroy_meters: 1875 ovs_unlock(); 1876 ovs_meters_exit(dp); 1877 err_destroy_ports: 1878 kfree(dp->ports); 1879 err_destroy_stats: 1880 free_percpu(dp->stats_percpu); 1881 err_destroy_table: 1882 ovs_flow_tbl_destroy(&dp->table); 1883 err_destroy_dp: 1884 kfree(dp); 1885 err_destroy_reply: 1886 kfree_skb(reply); 1887 err: 1888 return err; 1889 } 1890 1891 /* Called with ovs_mutex. */ 1892 static void __dp_destroy(struct datapath *dp) 1893 { 1894 struct flow_table *table = &dp->table; 1895 int i; 1896 1897 if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) 1898 tc_skb_ext_tc_disable(); 1899 1900 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1901 struct vport *vport; 1902 struct hlist_node *n; 1903 1904 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) 1905 if (vport->port_no != OVSP_LOCAL) 1906 ovs_dp_detach_port(vport); 1907 } 1908 1909 list_del_rcu(&dp->list_node); 1910 1911 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1912 * all ports in datapath are destroyed first before freeing datapath. 1913 */ 1914 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1915 1916 /* Flush sw_flow in the tables. RCU cb only releases resource 1917 * such as dp, ports and tables. That may avoid some issues 1918 * such as RCU usage warning. 1919 */ 1920 table_instance_flow_flush(table, ovsl_dereference(table->ti), 1921 ovsl_dereference(table->ufid_ti)); 1922 1923 /* RCU destroy the ports, meters and flow tables. */ 1924 call_rcu(&dp->rcu, destroy_dp_rcu); 1925 } 1926 1927 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1928 { 1929 struct sk_buff *reply; 1930 struct datapath *dp; 1931 int err; 1932 1933 reply = ovs_dp_cmd_alloc_info(); 1934 if (!reply) 1935 return -ENOMEM; 1936 1937 ovs_lock(); 1938 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 1939 info->attrs); 1940 err = PTR_ERR(dp); 1941 if (IS_ERR(dp)) 1942 goto err_unlock_free; 1943 1944 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1945 info->snd_seq, 0, OVS_DP_CMD_DEL); 1946 BUG_ON(err < 0); 1947 1948 __dp_destroy(dp); 1949 ovs_unlock(); 1950 1951 ovs_notify(&dp_datapath_genl_family, reply, info); 1952 1953 return 0; 1954 1955 err_unlock_free: 1956 ovs_unlock(); 1957 kfree_skb(reply); 1958 return err; 1959 } 1960 1961 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1962 { 1963 struct sk_buff *reply; 1964 struct datapath *dp; 1965 int err; 1966 1967 reply = ovs_dp_cmd_alloc_info(); 1968 if (!reply) 1969 return -ENOMEM; 1970 1971 ovs_lock(); 1972 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 1973 info->attrs); 1974 err = PTR_ERR(dp); 1975 if (IS_ERR(dp)) 1976 goto err_unlock_free; 1977 1978 err = ovs_dp_change(dp, info->attrs); 1979 if (err) 1980 goto err_unlock_free; 1981 1982 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1983 info->snd_seq, 0, OVS_DP_CMD_SET); 1984 BUG_ON(err < 0); 1985 1986 ovs_unlock(); 1987 ovs_notify(&dp_datapath_genl_family, reply, info); 1988 1989 return 0; 1990 1991 err_unlock_free: 1992 ovs_unlock(); 1993 kfree_skb(reply); 1994 return err; 1995 } 1996 1997 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1998 { 1999 struct sk_buff *reply; 2000 struct datapath *dp; 2001 int err; 2002 2003 reply = ovs_dp_cmd_alloc_info(); 2004 if (!reply) 2005 return -ENOMEM; 2006 2007 ovs_lock(); 2008 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info), 2009 info->attrs); 2010 if (IS_ERR(dp)) { 2011 err = PTR_ERR(dp); 2012 goto err_unlock_free; 2013 } 2014 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 2015 info->snd_seq, 0, OVS_DP_CMD_GET); 2016 BUG_ON(err < 0); 2017 ovs_unlock(); 2018 2019 return genlmsg_reply(reply, info); 2020 2021 err_unlock_free: 2022 ovs_unlock(); 2023 kfree_skb(reply); 2024 return err; 2025 } 2026 2027 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 2028 { 2029 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 2030 struct datapath *dp; 2031 int skip = cb->args[0]; 2032 int i = 0; 2033 2034 ovs_lock(); 2035 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2036 if (i >= skip && 2037 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 2038 cb->nlh->nlmsg_seq, NLM_F_MULTI, 2039 OVS_DP_CMD_GET) < 0) 2040 break; 2041 i++; 2042 } 2043 ovs_unlock(); 2044 2045 cb->args[0] = i; 2046 2047 return skb->len; 2048 } 2049 2050 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 2051 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 2052 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 2053 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 2054 [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, 2055 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), 2056 [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), 2057 }; 2058 2059 static const struct genl_small_ops dp_datapath_genl_ops[] = { 2060 { .cmd = OVS_DP_CMD_NEW, 2061 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2062 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2063 .doit = ovs_dp_cmd_new 2064 }, 2065 { .cmd = OVS_DP_CMD_DEL, 2066 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2067 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2068 .doit = ovs_dp_cmd_del 2069 }, 2070 { .cmd = OVS_DP_CMD_GET, 2071 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2072 .flags = 0, /* OK for unprivileged users. */ 2073 .doit = ovs_dp_cmd_get, 2074 .dumpit = ovs_dp_cmd_dump 2075 }, 2076 { .cmd = OVS_DP_CMD_SET, 2077 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2078 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2079 .doit = ovs_dp_cmd_set, 2080 }, 2081 }; 2082 2083 static struct genl_family dp_datapath_genl_family __ro_after_init = { 2084 .hdrsize = sizeof(struct ovs_header), 2085 .name = OVS_DATAPATH_FAMILY, 2086 .version = OVS_DATAPATH_VERSION, 2087 .maxattr = OVS_DP_ATTR_MAX, 2088 .policy = datapath_policy, 2089 .netnsok = true, 2090 .parallel_ops = true, 2091 .small_ops = dp_datapath_genl_ops, 2092 .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops), 2093 .resv_start_op = OVS_DP_CMD_SET + 1, 2094 .mcgrps = &ovs_dp_datapath_multicast_group, 2095 .n_mcgrps = 1, 2096 .module = THIS_MODULE, 2097 }; 2098 2099 /* Called with ovs_mutex or RCU read lock. */ 2100 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 2101 struct net *net, u32 portid, u32 seq, 2102 u32 flags, u8 cmd, gfp_t gfp) 2103 { 2104 struct ovs_header *ovs_header; 2105 struct ovs_vport_stats vport_stats; 2106 int err; 2107 2108 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 2109 flags, cmd); 2110 if (!ovs_header) 2111 return -EMSGSIZE; 2112 2113 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 2114 2115 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 2116 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 2117 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 2118 ovs_vport_name(vport)) || 2119 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) 2120 goto nla_put_failure; 2121 2122 if (!net_eq(net, dev_net(vport->dev))) { 2123 int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); 2124 2125 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) 2126 goto nla_put_failure; 2127 } 2128 2129 ovs_vport_get_stats(vport, &vport_stats); 2130 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, 2131 sizeof(struct ovs_vport_stats), &vport_stats, 2132 OVS_VPORT_ATTR_PAD)) 2133 goto nla_put_failure; 2134 2135 if (ovs_vport_get_upcall_stats(vport, skb)) 2136 goto nla_put_failure; 2137 2138 if (ovs_vport_get_upcall_portids(vport, skb)) 2139 goto nla_put_failure; 2140 2141 err = ovs_vport_get_options(vport, skb); 2142 if (err == -EMSGSIZE) 2143 goto error; 2144 2145 genlmsg_end(skb, ovs_header); 2146 return 0; 2147 2148 nla_put_failure: 2149 err = -EMSGSIZE; 2150 error: 2151 genlmsg_cancel(skb, ovs_header); 2152 return err; 2153 } 2154 2155 static struct sk_buff *ovs_vport_cmd_alloc_info(void) 2156 { 2157 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2158 } 2159 2160 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 2161 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, 2162 u32 portid, u32 seq, u8 cmd) 2163 { 2164 struct sk_buff *skb; 2165 int retval; 2166 2167 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2168 if (!skb) 2169 return ERR_PTR(-ENOMEM); 2170 2171 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, 2172 GFP_KERNEL); 2173 BUG_ON(retval < 0); 2174 2175 return skb; 2176 } 2177 2178 /* Called with ovs_mutex or RCU read lock. */ 2179 static struct vport *lookup_vport(struct net *net, 2180 const struct ovs_header *ovs_header, 2181 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 2182 { 2183 struct datapath *dp; 2184 struct vport *vport; 2185 2186 if (a[OVS_VPORT_ATTR_IFINDEX]) 2187 return ERR_PTR(-EOPNOTSUPP); 2188 if (a[OVS_VPORT_ATTR_NAME]) { 2189 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 2190 if (!vport) 2191 return ERR_PTR(-ENODEV); 2192 if (ovs_header->dp_ifindex && 2193 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 2194 return ERR_PTR(-ENODEV); 2195 return vport; 2196 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 2197 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 2198 2199 if (port_no >= DP_MAX_PORTS) 2200 return ERR_PTR(-EFBIG); 2201 2202 dp = get_dp(net, ovs_header->dp_ifindex); 2203 if (!dp) 2204 return ERR_PTR(-ENODEV); 2205 2206 vport = ovs_vport_ovsl_rcu(dp, port_no); 2207 if (!vport) 2208 return ERR_PTR(-ENODEV); 2209 return vport; 2210 } else 2211 return ERR_PTR(-EINVAL); 2212 2213 } 2214 2215 static unsigned int ovs_get_max_headroom(struct datapath *dp) 2216 { 2217 unsigned int dev_headroom, max_headroom = 0; 2218 struct net_device *dev; 2219 struct vport *vport; 2220 int i; 2221 2222 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2223 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2224 lockdep_ovsl_is_held()) { 2225 dev = vport->dev; 2226 dev_headroom = netdev_get_fwd_headroom(dev); 2227 if (dev_headroom > max_headroom) 2228 max_headroom = dev_headroom; 2229 } 2230 } 2231 2232 return max_headroom; 2233 } 2234 2235 /* Called with ovs_mutex */ 2236 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) 2237 { 2238 struct vport *vport; 2239 int i; 2240 2241 dp->max_headroom = new_headroom; 2242 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2243 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2244 lockdep_ovsl_is_held()) 2245 netdev_set_rx_headroom(vport->dev, new_headroom); 2246 } 2247 } 2248 2249 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 2250 { 2251 struct nlattr **a = info->attrs; 2252 struct ovs_header *ovs_header = genl_info_userhdr(info); 2253 struct vport_parms parms; 2254 struct sk_buff *reply; 2255 struct vport *vport; 2256 struct datapath *dp; 2257 unsigned int new_headroom; 2258 u32 port_no; 2259 int err; 2260 2261 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 2262 !a[OVS_VPORT_ATTR_UPCALL_PID]) 2263 return -EINVAL; 2264 2265 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 2266 2267 if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL) 2268 return -EOPNOTSUPP; 2269 2270 port_no = a[OVS_VPORT_ATTR_PORT_NO] 2271 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 2272 if (port_no >= DP_MAX_PORTS) 2273 return -EFBIG; 2274 2275 reply = ovs_vport_cmd_alloc_info(); 2276 if (!reply) 2277 return -ENOMEM; 2278 2279 ovs_lock(); 2280 restart: 2281 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 2282 err = -ENODEV; 2283 if (!dp) 2284 goto exit_unlock_free; 2285 2286 if (port_no) { 2287 vport = ovs_vport_ovsl(dp, port_no); 2288 err = -EBUSY; 2289 if (vport) 2290 goto exit_unlock_free; 2291 } else { 2292 for (port_no = 1; ; port_no++) { 2293 if (port_no >= DP_MAX_PORTS) { 2294 err = -EFBIG; 2295 goto exit_unlock_free; 2296 } 2297 vport = ovs_vport_ovsl(dp, port_no); 2298 if (!vport) 2299 break; 2300 } 2301 } 2302 2303 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 2304 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 2305 parms.dp = dp; 2306 parms.port_no = port_no; 2307 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2308 parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX] 2309 ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0; 2310 2311 vport = new_vport(&parms); 2312 err = PTR_ERR(vport); 2313 if (IS_ERR(vport)) { 2314 if (err == -EAGAIN) 2315 goto restart; 2316 goto exit_unlock_free; 2317 } 2318 2319 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2320 info->snd_portid, info->snd_seq, 0, 2321 OVS_VPORT_CMD_NEW, GFP_KERNEL); 2322 2323 new_headroom = netdev_get_fwd_headroom(vport->dev); 2324 2325 if (new_headroom > dp->max_headroom) 2326 ovs_update_headroom(dp, new_headroom); 2327 else 2328 netdev_set_rx_headroom(vport->dev, dp->max_headroom); 2329 2330 BUG_ON(err < 0); 2331 ovs_unlock(); 2332 2333 ovs_notify(&dp_vport_genl_family, reply, info); 2334 return 0; 2335 2336 exit_unlock_free: 2337 ovs_unlock(); 2338 kfree_skb(reply); 2339 return err; 2340 } 2341 2342 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 2343 { 2344 struct nlattr **a = info->attrs; 2345 struct sk_buff *reply; 2346 struct vport *vport; 2347 int err; 2348 2349 reply = ovs_vport_cmd_alloc_info(); 2350 if (!reply) 2351 return -ENOMEM; 2352 2353 ovs_lock(); 2354 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a); 2355 err = PTR_ERR(vport); 2356 if (IS_ERR(vport)) 2357 goto exit_unlock_free; 2358 2359 if (a[OVS_VPORT_ATTR_TYPE] && 2360 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { 2361 err = -EINVAL; 2362 goto exit_unlock_free; 2363 } 2364 2365 if (a[OVS_VPORT_ATTR_OPTIONS]) { 2366 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 2367 if (err) 2368 goto exit_unlock_free; 2369 } 2370 2371 2372 if (a[OVS_VPORT_ATTR_UPCALL_PID]) { 2373 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2374 2375 err = ovs_vport_set_upcall_portids(vport, ids); 2376 if (err) 2377 goto exit_unlock_free; 2378 } 2379 2380 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2381 info->snd_portid, info->snd_seq, 0, 2382 OVS_VPORT_CMD_SET, GFP_KERNEL); 2383 BUG_ON(err < 0); 2384 2385 ovs_unlock(); 2386 ovs_notify(&dp_vport_genl_family, reply, info); 2387 return 0; 2388 2389 exit_unlock_free: 2390 ovs_unlock(); 2391 kfree_skb(reply); 2392 return err; 2393 } 2394 2395 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 2396 { 2397 bool update_headroom = false; 2398 struct nlattr **a = info->attrs; 2399 struct sk_buff *reply; 2400 struct datapath *dp; 2401 struct vport *vport; 2402 unsigned int new_headroom; 2403 int err; 2404 2405 reply = ovs_vport_cmd_alloc_info(); 2406 if (!reply) 2407 return -ENOMEM; 2408 2409 ovs_lock(); 2410 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a); 2411 err = PTR_ERR(vport); 2412 if (IS_ERR(vport)) 2413 goto exit_unlock_free; 2414 2415 if (vport->port_no == OVSP_LOCAL) { 2416 err = -EINVAL; 2417 goto exit_unlock_free; 2418 } 2419 2420 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2421 info->snd_portid, info->snd_seq, 0, 2422 OVS_VPORT_CMD_DEL, GFP_KERNEL); 2423 BUG_ON(err < 0); 2424 2425 /* the vport deletion may trigger dp headroom update */ 2426 dp = vport->dp; 2427 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) 2428 update_headroom = true; 2429 2430 netdev_reset_rx_headroom(vport->dev); 2431 ovs_dp_detach_port(vport); 2432 2433 if (update_headroom) { 2434 new_headroom = ovs_get_max_headroom(dp); 2435 2436 if (new_headroom < dp->max_headroom) 2437 ovs_update_headroom(dp, new_headroom); 2438 } 2439 ovs_unlock(); 2440 2441 ovs_notify(&dp_vport_genl_family, reply, info); 2442 return 0; 2443 2444 exit_unlock_free: 2445 ovs_unlock(); 2446 kfree_skb(reply); 2447 return err; 2448 } 2449 2450 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 2451 { 2452 struct nlattr **a = info->attrs; 2453 struct ovs_header *ovs_header = genl_info_userhdr(info); 2454 struct sk_buff *reply; 2455 struct vport *vport; 2456 int err; 2457 2458 reply = ovs_vport_cmd_alloc_info(); 2459 if (!reply) 2460 return -ENOMEM; 2461 2462 rcu_read_lock(); 2463 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 2464 err = PTR_ERR(vport); 2465 if (IS_ERR(vport)) 2466 goto exit_unlock_free; 2467 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2468 info->snd_portid, info->snd_seq, 0, 2469 OVS_VPORT_CMD_GET, GFP_ATOMIC); 2470 BUG_ON(err < 0); 2471 rcu_read_unlock(); 2472 2473 return genlmsg_reply(reply, info); 2474 2475 exit_unlock_free: 2476 rcu_read_unlock(); 2477 kfree_skb(reply); 2478 return err; 2479 } 2480 2481 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 2482 { 2483 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 2484 struct datapath *dp; 2485 int bucket = cb->args[0], skip = cb->args[1]; 2486 int i, j = 0; 2487 2488 rcu_read_lock(); 2489 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 2490 if (!dp) { 2491 rcu_read_unlock(); 2492 return -ENODEV; 2493 } 2494 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 2495 struct vport *vport; 2496 2497 j = 0; 2498 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 2499 if (j >= skip && 2500 ovs_vport_cmd_fill_info(vport, skb, 2501 sock_net(skb->sk), 2502 NETLINK_CB(cb->skb).portid, 2503 cb->nlh->nlmsg_seq, 2504 NLM_F_MULTI, 2505 OVS_VPORT_CMD_GET, 2506 GFP_ATOMIC) < 0) 2507 goto out; 2508 2509 j++; 2510 } 2511 skip = 0; 2512 } 2513 out: 2514 rcu_read_unlock(); 2515 2516 cb->args[0] = i; 2517 cb->args[1] = j; 2518 2519 return skb->len; 2520 } 2521 2522 static void ovs_dp_masks_rebalance(struct work_struct *work) 2523 { 2524 struct ovs_net *ovs_net = container_of(work, struct ovs_net, 2525 masks_rebalance.work); 2526 struct datapath *dp; 2527 2528 ovs_lock(); 2529 2530 list_for_each_entry(dp, &ovs_net->dps, list_node) 2531 ovs_flow_masks_rebalance(&dp->table); 2532 2533 ovs_unlock(); 2534 2535 schedule_delayed_work(&ovs_net->masks_rebalance, 2536 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2537 } 2538 2539 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 2540 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 2541 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 2542 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 2543 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 2544 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC }, 2545 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 2546 [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0), 2547 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, 2548 [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED }, 2549 }; 2550 2551 static const struct genl_small_ops dp_vport_genl_ops[] = { 2552 { .cmd = OVS_VPORT_CMD_NEW, 2553 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2554 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2555 .doit = ovs_vport_cmd_new 2556 }, 2557 { .cmd = OVS_VPORT_CMD_DEL, 2558 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2559 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2560 .doit = ovs_vport_cmd_del 2561 }, 2562 { .cmd = OVS_VPORT_CMD_GET, 2563 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2564 .flags = 0, /* OK for unprivileged users. */ 2565 .doit = ovs_vport_cmd_get, 2566 .dumpit = ovs_vport_cmd_dump 2567 }, 2568 { .cmd = OVS_VPORT_CMD_SET, 2569 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2570 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2571 .doit = ovs_vport_cmd_set, 2572 }, 2573 }; 2574 2575 struct genl_family dp_vport_genl_family __ro_after_init = { 2576 .hdrsize = sizeof(struct ovs_header), 2577 .name = OVS_VPORT_FAMILY, 2578 .version = OVS_VPORT_VERSION, 2579 .maxattr = OVS_VPORT_ATTR_MAX, 2580 .policy = vport_policy, 2581 .netnsok = true, 2582 .parallel_ops = true, 2583 .small_ops = dp_vport_genl_ops, 2584 .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), 2585 .resv_start_op = OVS_VPORT_CMD_SET + 1, 2586 .mcgrps = &ovs_dp_vport_multicast_group, 2587 .n_mcgrps = 1, 2588 .module = THIS_MODULE, 2589 }; 2590 2591 static struct genl_family * const dp_genl_families[] = { 2592 &dp_datapath_genl_family, 2593 &dp_vport_genl_family, 2594 &dp_flow_genl_family, 2595 &dp_packet_genl_family, 2596 &dp_meter_genl_family, 2597 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 2598 &dp_ct_limit_genl_family, 2599 #endif 2600 }; 2601 2602 static void dp_unregister_genl(int n_families) 2603 { 2604 int i; 2605 2606 for (i = 0; i < n_families; i++) 2607 genl_unregister_family(dp_genl_families[i]); 2608 } 2609 2610 static int __init dp_register_genl(void) 2611 { 2612 int err; 2613 int i; 2614 2615 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 2616 2617 err = genl_register_family(dp_genl_families[i]); 2618 if (err) 2619 goto error; 2620 } 2621 2622 return 0; 2623 2624 error: 2625 dp_unregister_genl(i); 2626 return err; 2627 } 2628 2629 static int __net_init ovs_init_net(struct net *net) 2630 { 2631 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2632 int err; 2633 2634 INIT_LIST_HEAD(&ovs_net->dps); 2635 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); 2636 INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); 2637 2638 err = ovs_ct_init(net); 2639 if (err) 2640 return err; 2641 2642 schedule_delayed_work(&ovs_net->masks_rebalance, 2643 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2644 return 0; 2645 } 2646 2647 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, 2648 struct list_head *head) 2649 { 2650 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2651 struct datapath *dp; 2652 2653 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2654 int i; 2655 2656 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2657 struct vport *vport; 2658 2659 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { 2660 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) 2661 continue; 2662 2663 if (dev_net(vport->dev) == dnet) 2664 list_add(&vport->detach_list, head); 2665 } 2666 } 2667 } 2668 } 2669 2670 static void __net_exit ovs_exit_net(struct net *dnet) 2671 { 2672 struct datapath *dp, *dp_next; 2673 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); 2674 struct vport *vport, *vport_next; 2675 struct net *net; 2676 LIST_HEAD(head); 2677 2678 ovs_lock(); 2679 2680 ovs_ct_exit(dnet); 2681 2682 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2683 __dp_destroy(dp); 2684 2685 down_read(&net_rwsem); 2686 for_each_net(net) 2687 list_vports_from_net(net, dnet, &head); 2688 up_read(&net_rwsem); 2689 2690 /* Detach all vports from given namespace. */ 2691 list_for_each_entry_safe(vport, vport_next, &head, detach_list) { 2692 list_del(&vport->detach_list); 2693 ovs_dp_detach_port(vport); 2694 } 2695 2696 ovs_unlock(); 2697 2698 cancel_delayed_work_sync(&ovs_net->masks_rebalance); 2699 cancel_work_sync(&ovs_net->dp_notify_work); 2700 } 2701 2702 static struct pernet_operations ovs_net_ops = { 2703 .init = ovs_init_net, 2704 .exit = ovs_exit_net, 2705 .id = &ovs_net_id, 2706 .size = sizeof(struct ovs_net), 2707 }; 2708 2709 static const char * const ovs_drop_reasons[] = { 2710 #define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), 2711 OVS_DROP_REASONS(S) 2712 #undef S 2713 }; 2714 2715 static struct drop_reason_list drop_reason_list_ovs = { 2716 .reasons = ovs_drop_reasons, 2717 .n_reasons = ARRAY_SIZE(ovs_drop_reasons), 2718 }; 2719 2720 static int __init dp_init(void) 2721 { 2722 int err; 2723 2724 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > 2725 sizeof_field(struct sk_buff, cb)); 2726 2727 pr_info("Open vSwitch switching datapath\n"); 2728 2729 err = action_fifos_init(); 2730 if (err) 2731 goto error; 2732 2733 err = ovs_internal_dev_rtnl_link_register(); 2734 if (err) 2735 goto error_action_fifos_exit; 2736 2737 err = ovs_flow_init(); 2738 if (err) 2739 goto error_unreg_rtnl_link; 2740 2741 err = ovs_vport_init(); 2742 if (err) 2743 goto error_flow_exit; 2744 2745 err = register_pernet_device(&ovs_net_ops); 2746 if (err) 2747 goto error_vport_exit; 2748 2749 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2750 if (err) 2751 goto error_netns_exit; 2752 2753 err = ovs_netdev_init(); 2754 if (err) 2755 goto error_unreg_notifier; 2756 2757 err = dp_register_genl(); 2758 if (err < 0) 2759 goto error_unreg_netdev; 2760 2761 drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH, 2762 &drop_reason_list_ovs); 2763 2764 return 0; 2765 2766 error_unreg_netdev: 2767 ovs_netdev_exit(); 2768 error_unreg_notifier: 2769 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2770 error_netns_exit: 2771 unregister_pernet_device(&ovs_net_ops); 2772 error_vport_exit: 2773 ovs_vport_exit(); 2774 error_flow_exit: 2775 ovs_flow_exit(); 2776 error_unreg_rtnl_link: 2777 ovs_internal_dev_rtnl_link_unregister(); 2778 error_action_fifos_exit: 2779 action_fifos_exit(); 2780 error: 2781 return err; 2782 } 2783 2784 static void dp_cleanup(void) 2785 { 2786 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2787 ovs_netdev_exit(); 2788 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2789 unregister_pernet_device(&ovs_net_ops); 2790 drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH); 2791 rcu_barrier(); 2792 ovs_vport_exit(); 2793 ovs_flow_exit(); 2794 ovs_internal_dev_rtnl_link_unregister(); 2795 action_fifos_exit(); 2796 } 2797 2798 module_init(dp_init); 2799 module_exit(dp_cleanup); 2800 2801 MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2802 MODULE_LICENSE("GPL"); 2803 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); 2804 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); 2805 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); 2806 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); 2807 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); 2808 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY); 2809