1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2007-2014 Nicira, Inc. 4 */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/init.h> 9 #include <linux/module.h> 10 #include <linux/if_arp.h> 11 #include <linux/if_vlan.h> 12 #include <linux/in.h> 13 #include <linux/ip.h> 14 #include <linux/jhash.h> 15 #include <linux/delay.h> 16 #include <linux/time.h> 17 #include <linux/etherdevice.h> 18 #include <linux/genetlink.h> 19 #include <linux/kernel.h> 20 #include <linux/kthread.h> 21 #include <linux/mutex.h> 22 #include <linux/percpu.h> 23 #include <linux/rcupdate.h> 24 #include <linux/tcp.h> 25 #include <linux/udp.h> 26 #include <linux/ethtool.h> 27 #include <linux/wait.h> 28 #include <asm/div64.h> 29 #include <linux/highmem.h> 30 #include <linux/netfilter_bridge.h> 31 #include <linux/netfilter_ipv4.h> 32 #include <linux/inetdevice.h> 33 #include <linux/list.h> 34 #include <linux/openvswitch.h> 35 #include <linux/rculist.h> 36 #include <linux/dmi.h> 37 #include <net/genetlink.h> 38 #include <net/net_namespace.h> 39 #include <net/netns/generic.h> 40 #include <net/pkt_cls.h> 41 42 #include "datapath.h" 43 #include "flow.h" 44 #include "flow_table.h" 45 #include "flow_netlink.h" 46 #include "meter.h" 47 #include "openvswitch_trace.h" 48 #include "vport-internal_dev.h" 49 #include "vport-netdev.h" 50 51 unsigned int ovs_net_id __read_mostly; 52 53 static struct genl_family dp_packet_genl_family; 54 static struct genl_family dp_flow_genl_family; 55 static struct genl_family dp_datapath_genl_family; 56 57 static const struct nla_policy flow_policy[]; 58 59 static const struct genl_multicast_group ovs_dp_flow_multicast_group = { 60 .name = OVS_FLOW_MCGROUP, 61 }; 62 63 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { 64 .name = OVS_DATAPATH_MCGROUP, 65 }; 66 67 static const struct genl_multicast_group ovs_dp_vport_multicast_group = { 68 .name = OVS_VPORT_MCGROUP, 69 }; 70 71 /* Check if need to build a reply message. 72 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ 73 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, 74 unsigned int group) 75 { 76 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 77 genl_has_listeners(family, genl_info_net(info), group); 78 } 79 80 static void ovs_notify(struct genl_family *family, 81 struct sk_buff *skb, struct genl_info *info) 82 { 83 genl_notify(family, skb, info, 0, GFP_KERNEL); 84 } 85 86 /** 87 * DOC: Locking: 88 * 89 * All writes e.g. Writes to device state (add/remove datapath, port, set 90 * operations on vports, etc.), Writes to other state (flow table 91 * modifications, set miscellaneous datapath parameters, etc.) are protected 92 * by ovs_lock. 93 * 94 * Reads are protected by RCU. 95 * 96 * There are a few special cases (mostly stats) that have their own 97 * synchronization but they nest under all of above and don't interact with 98 * each other. 99 * 100 * The RTNL lock nests inside ovs_mutex. 101 */ 102 103 static DEFINE_MUTEX(ovs_mutex); 104 105 void ovs_lock(void) 106 { 107 mutex_lock(&ovs_mutex); 108 } 109 110 void ovs_unlock(void) 111 { 112 mutex_unlock(&ovs_mutex); 113 } 114 115 #ifdef CONFIG_LOCKDEP 116 int lockdep_ovsl_is_held(void) 117 { 118 if (debug_locks) 119 return lockdep_is_held(&ovs_mutex); 120 else 121 return 1; 122 } 123 #endif 124 125 static struct vport *new_vport(const struct vport_parms *); 126 static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 127 const struct sw_flow_key *, 128 const struct dp_upcall_info *, 129 uint32_t cutlen); 130 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 131 const struct sw_flow_key *, 132 const struct dp_upcall_info *, 133 uint32_t cutlen); 134 135 static void ovs_dp_masks_rebalance(struct work_struct *work); 136 137 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *); 138 139 /* Must be called with rcu_read_lock or ovs_mutex. */ 140 const char *ovs_dp_name(const struct datapath *dp) 141 { 142 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 143 return ovs_vport_name(vport); 144 } 145 146 static int get_dpifindex(const struct datapath *dp) 147 { 148 struct vport *local; 149 int ifindex; 150 151 rcu_read_lock(); 152 153 local = ovs_vport_rcu(dp, OVSP_LOCAL); 154 if (local) 155 ifindex = local->dev->ifindex; 156 else 157 ifindex = 0; 158 159 rcu_read_unlock(); 160 161 return ifindex; 162 } 163 164 static void destroy_dp_rcu(struct rcu_head *rcu) 165 { 166 struct datapath *dp = container_of(rcu, struct datapath, rcu); 167 168 ovs_flow_tbl_destroy(&dp->table); 169 free_percpu(dp->stats_percpu); 170 kfree(dp->ports); 171 ovs_meters_exit(dp); 172 kfree(rcu_dereference_raw(dp->upcall_portids)); 173 kfree(dp); 174 } 175 176 static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 177 u16 port_no) 178 { 179 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 180 } 181 182 /* Called with ovs_mutex or RCU read lock. */ 183 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 184 { 185 struct vport *vport; 186 struct hlist_head *head; 187 188 head = vport_hash_bucket(dp, port_no); 189 hlist_for_each_entry_rcu(vport, head, dp_hash_node, 190 lockdep_ovsl_is_held()) { 191 if (vport->port_no == port_no) 192 return vport; 193 } 194 return NULL; 195 } 196 197 /* Called with ovs_mutex. */ 198 static struct vport *new_vport(const struct vport_parms *parms) 199 { 200 struct vport *vport; 201 202 vport = ovs_vport_add(parms); 203 if (!IS_ERR(vport)) { 204 struct datapath *dp = parms->dp; 205 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 206 207 hlist_add_head_rcu(&vport->dp_hash_node, head); 208 } 209 return vport; 210 } 211 212 void ovs_dp_detach_port(struct vport *p) 213 { 214 ASSERT_OVSL(); 215 216 /* First drop references to device. */ 217 hlist_del_rcu(&p->dp_hash_node); 218 219 /* Then destroy it. */ 220 ovs_vport_del(p); 221 } 222 223 /* Must be called with rcu_read_lock. */ 224 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 225 { 226 const struct vport *p = OVS_CB(skb)->input_vport; 227 struct datapath *dp = p->dp; 228 struct sw_flow *flow; 229 struct sw_flow_actions *sf_acts; 230 struct dp_stats_percpu *stats; 231 u64 *stats_counter; 232 u32 n_mask_hit; 233 u32 n_cache_hit; 234 int error; 235 236 stats = this_cpu_ptr(dp->stats_percpu); 237 238 /* Look up flow. */ 239 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), 240 &n_mask_hit, &n_cache_hit); 241 if (unlikely(!flow)) { 242 struct dp_upcall_info upcall; 243 244 memset(&upcall, 0, sizeof(upcall)); 245 upcall.cmd = OVS_PACKET_CMD_MISS; 246 247 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) 248 upcall.portid = 249 ovs_dp_get_upcall_portid(dp, smp_processor_id()); 250 else 251 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 252 253 upcall.mru = OVS_CB(skb)->mru; 254 error = ovs_dp_upcall(dp, skb, key, &upcall, 0); 255 if (unlikely(error)) 256 kfree_skb(skb); 257 else 258 consume_skb(skb); 259 stats_counter = &stats->n_missed; 260 goto out; 261 } 262 263 ovs_flow_stats_update(flow, key->tp.flags, skb); 264 sf_acts = rcu_dereference(flow->sf_acts); 265 error = ovs_execute_actions(dp, skb, sf_acts, key); 266 if (unlikely(error)) 267 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", 268 ovs_dp_name(dp), error); 269 270 stats_counter = &stats->n_hit; 271 272 out: 273 /* Update datapath statistics. */ 274 u64_stats_update_begin(&stats->syncp); 275 (*stats_counter)++; 276 stats->n_mask_hit += n_mask_hit; 277 stats->n_cache_hit += n_cache_hit; 278 u64_stats_update_end(&stats->syncp); 279 } 280 281 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 282 const struct sw_flow_key *key, 283 const struct dp_upcall_info *upcall_info, 284 uint32_t cutlen) 285 { 286 struct dp_stats_percpu *stats; 287 int err; 288 289 if (trace_ovs_dp_upcall_enabled()) 290 trace_ovs_dp_upcall(dp, skb, key, upcall_info); 291 292 if (upcall_info->portid == 0) { 293 err = -ENOTCONN; 294 goto err; 295 } 296 297 if (!skb_is_gso(skb)) 298 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 299 else 300 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); 301 if (err) 302 goto err; 303 304 return 0; 305 306 err: 307 stats = this_cpu_ptr(dp->stats_percpu); 308 309 u64_stats_update_begin(&stats->syncp); 310 stats->n_lost++; 311 u64_stats_update_end(&stats->syncp); 312 313 return err; 314 } 315 316 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 317 const struct sw_flow_key *key, 318 const struct dp_upcall_info *upcall_info, 319 uint32_t cutlen) 320 { 321 unsigned int gso_type = skb_shinfo(skb)->gso_type; 322 struct sw_flow_key later_key; 323 struct sk_buff *segs, *nskb; 324 int err; 325 326 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET); 327 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 328 if (IS_ERR(segs)) 329 return PTR_ERR(segs); 330 if (segs == NULL) 331 return -EINVAL; 332 333 if (gso_type & SKB_GSO_UDP) { 334 /* The initial flow key extracted by ovs_flow_key_extract() 335 * in this case is for a first fragment, so we need to 336 * properly mark later fragments. 337 */ 338 later_key = *key; 339 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 340 } 341 342 /* Queue all of the segments. */ 343 skb_list_walk_safe(segs, skb, nskb) { 344 if (gso_type & SKB_GSO_UDP && skb != segs) 345 key = &later_key; 346 347 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 348 if (err) 349 break; 350 351 } 352 353 /* Free all of the segments. */ 354 skb_list_walk_safe(segs, skb, nskb) { 355 if (err) 356 kfree_skb(skb); 357 else 358 consume_skb(skb); 359 } 360 return err; 361 } 362 363 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, 364 unsigned int hdrlen, int actions_attrlen) 365 { 366 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 367 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 368 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ 369 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ 370 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ 371 372 /* OVS_PACKET_ATTR_USERDATA */ 373 if (upcall_info->userdata) 374 size += NLA_ALIGN(upcall_info->userdata->nla_len); 375 376 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ 377 if (upcall_info->egress_tun_info) 378 size += nla_total_size(ovs_tun_key_attr_size()); 379 380 /* OVS_PACKET_ATTR_ACTIONS */ 381 if (upcall_info->actions_len) 382 size += nla_total_size(actions_attrlen); 383 384 /* OVS_PACKET_ATTR_MRU */ 385 if (upcall_info->mru) 386 size += nla_total_size(sizeof(upcall_info->mru)); 387 388 return size; 389 } 390 391 static void pad_packet(struct datapath *dp, struct sk_buff *skb) 392 { 393 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 394 size_t plen = NLA_ALIGN(skb->len) - skb->len; 395 396 if (plen > 0) 397 skb_put_zero(skb, plen); 398 } 399 } 400 401 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 402 const struct sw_flow_key *key, 403 const struct dp_upcall_info *upcall_info, 404 uint32_t cutlen) 405 { 406 struct ovs_header *upcall; 407 struct sk_buff *nskb = NULL; 408 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 409 struct nlattr *nla; 410 size_t len; 411 unsigned int hlen; 412 int err, dp_ifindex; 413 u64 hash; 414 415 dp_ifindex = get_dpifindex(dp); 416 if (!dp_ifindex) 417 return -ENODEV; 418 419 if (skb_vlan_tag_present(skb)) { 420 nskb = skb_clone(skb, GFP_ATOMIC); 421 if (!nskb) 422 return -ENOMEM; 423 424 nskb = __vlan_hwaccel_push_inside(nskb); 425 if (!nskb) 426 return -ENOMEM; 427 428 skb = nskb; 429 } 430 431 if (nla_attr_size(skb->len) > USHRT_MAX) { 432 err = -EFBIG; 433 goto out; 434 } 435 436 /* Complete checksum if needed */ 437 if (skb->ip_summed == CHECKSUM_PARTIAL && 438 (err = skb_csum_hwoffload_help(skb, 0))) 439 goto out; 440 441 /* Older versions of OVS user space enforce alignment of the last 442 * Netlink attribute to NLA_ALIGNTO which would require extensive 443 * padding logic. Only perform zerocopy if padding is not required. 444 */ 445 if (dp->user_features & OVS_DP_F_UNALIGNED) 446 hlen = skb_zerocopy_headlen(skb); 447 else 448 hlen = skb->len; 449 450 len = upcall_msg_size(upcall_info, hlen - cutlen, 451 OVS_CB(skb)->acts_origlen); 452 user_skb = genlmsg_new(len, GFP_ATOMIC); 453 if (!user_skb) { 454 err = -ENOMEM; 455 goto out; 456 } 457 458 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 459 0, upcall_info->cmd); 460 if (!upcall) { 461 err = -EINVAL; 462 goto out; 463 } 464 upcall->dp_ifindex = dp_ifindex; 465 466 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); 467 if (err) 468 goto out; 469 470 if (upcall_info->userdata) 471 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 472 nla_len(upcall_info->userdata), 473 nla_data(upcall_info->userdata)); 474 475 if (upcall_info->egress_tun_info) { 476 nla = nla_nest_start_noflag(user_skb, 477 OVS_PACKET_ATTR_EGRESS_TUN_KEY); 478 if (!nla) { 479 err = -EMSGSIZE; 480 goto out; 481 } 482 err = ovs_nla_put_tunnel_info(user_skb, 483 upcall_info->egress_tun_info); 484 if (err) 485 goto out; 486 487 nla_nest_end(user_skb, nla); 488 } 489 490 if (upcall_info->actions_len) { 491 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); 492 if (!nla) { 493 err = -EMSGSIZE; 494 goto out; 495 } 496 err = ovs_nla_put_actions(upcall_info->actions, 497 upcall_info->actions_len, 498 user_skb); 499 if (!err) 500 nla_nest_end(user_skb, nla); 501 else 502 nla_nest_cancel(user_skb, nla); 503 } 504 505 /* Add OVS_PACKET_ATTR_MRU */ 506 if (upcall_info->mru && 507 nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { 508 err = -ENOBUFS; 509 goto out; 510 } 511 512 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ 513 if (cutlen > 0 && 514 nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { 515 err = -ENOBUFS; 516 goto out; 517 } 518 519 /* Add OVS_PACKET_ATTR_HASH */ 520 hash = skb_get_hash_raw(skb); 521 if (skb->sw_hash) 522 hash |= OVS_PACKET_HASH_SW_BIT; 523 524 if (skb->l4_hash) 525 hash |= OVS_PACKET_HASH_L4_BIT; 526 527 if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { 528 err = -ENOBUFS; 529 goto out; 530 } 531 532 /* Only reserve room for attribute header, packet data is added 533 * in skb_zerocopy() */ 534 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 535 err = -ENOBUFS; 536 goto out; 537 } 538 nla->nla_len = nla_attr_size(skb->len - cutlen); 539 540 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen); 541 if (err) 542 goto out; 543 544 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 545 pad_packet(dp, user_skb); 546 547 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 548 549 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 550 user_skb = NULL; 551 out: 552 if (err) 553 skb_tx_error(skb); 554 kfree_skb(user_skb); 555 kfree_skb(nskb); 556 return err; 557 } 558 559 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 560 { 561 struct ovs_header *ovs_header = info->userhdr; 562 struct net *net = sock_net(skb->sk); 563 struct nlattr **a = info->attrs; 564 struct sw_flow_actions *acts; 565 struct sk_buff *packet; 566 struct sw_flow *flow; 567 struct sw_flow_actions *sf_acts; 568 struct datapath *dp; 569 struct vport *input_vport; 570 u16 mru = 0; 571 u64 hash; 572 int len; 573 int err; 574 bool log = !a[OVS_PACKET_ATTR_PROBE]; 575 576 err = -EINVAL; 577 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 578 !a[OVS_PACKET_ATTR_ACTIONS]) 579 goto err; 580 581 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 582 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 583 err = -ENOMEM; 584 if (!packet) 585 goto err; 586 skb_reserve(packet, NET_IP_ALIGN); 587 588 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 589 590 /* Set packet's mru */ 591 if (a[OVS_PACKET_ATTR_MRU]) { 592 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); 593 packet->ignore_df = 1; 594 } 595 OVS_CB(packet)->mru = mru; 596 597 if (a[OVS_PACKET_ATTR_HASH]) { 598 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); 599 600 __skb_set_hash(packet, hash & 0xFFFFFFFFULL, 601 !!(hash & OVS_PACKET_HASH_SW_BIT), 602 !!(hash & OVS_PACKET_HASH_L4_BIT)); 603 } 604 605 /* Build an sw_flow for sending this packet. */ 606 flow = ovs_flow_alloc(); 607 err = PTR_ERR(flow); 608 if (IS_ERR(flow)) 609 goto err_kfree_skb; 610 611 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY], 612 packet, &flow->key, log); 613 if (err) 614 goto err_flow_free; 615 616 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], 617 &flow->key, &acts, log); 618 if (err) 619 goto err_flow_free; 620 621 rcu_assign_pointer(flow->sf_acts, acts); 622 packet->priority = flow->key.phy.priority; 623 packet->mark = flow->key.phy.skb_mark; 624 625 rcu_read_lock(); 626 dp = get_dp_rcu(net, ovs_header->dp_ifindex); 627 err = -ENODEV; 628 if (!dp) 629 goto err_unlock; 630 631 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port); 632 if (!input_vport) 633 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL); 634 635 if (!input_vport) 636 goto err_unlock; 637 638 packet->dev = input_vport->dev; 639 OVS_CB(packet)->input_vport = input_vport; 640 sf_acts = rcu_dereference(flow->sf_acts); 641 642 local_bh_disable(); 643 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); 644 local_bh_enable(); 645 rcu_read_unlock(); 646 647 ovs_flow_free(flow, false); 648 return err; 649 650 err_unlock: 651 rcu_read_unlock(); 652 err_flow_free: 653 ovs_flow_free(flow, false); 654 err_kfree_skb: 655 kfree_skb(packet); 656 err: 657 return err; 658 } 659 660 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 661 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, 662 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 663 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 664 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, 665 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, 666 [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, 667 }; 668 669 static const struct genl_small_ops dp_packet_genl_ops[] = { 670 { .cmd = OVS_PACKET_CMD_EXECUTE, 671 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 672 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 673 .doit = ovs_packet_cmd_execute 674 } 675 }; 676 677 static struct genl_family dp_packet_genl_family __ro_after_init = { 678 .hdrsize = sizeof(struct ovs_header), 679 .name = OVS_PACKET_FAMILY, 680 .version = OVS_PACKET_VERSION, 681 .maxattr = OVS_PACKET_ATTR_MAX, 682 .policy = packet_policy, 683 .netnsok = true, 684 .parallel_ops = true, 685 .small_ops = dp_packet_genl_ops, 686 .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops), 687 .module = THIS_MODULE, 688 }; 689 690 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, 691 struct ovs_dp_megaflow_stats *mega_stats) 692 { 693 int i; 694 695 memset(mega_stats, 0, sizeof(*mega_stats)); 696 697 stats->n_flows = ovs_flow_tbl_count(&dp->table); 698 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 699 700 stats->n_hit = stats->n_missed = stats->n_lost = 0; 701 702 for_each_possible_cpu(i) { 703 const struct dp_stats_percpu *percpu_stats; 704 struct dp_stats_percpu local_stats; 705 unsigned int start; 706 707 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 708 709 do { 710 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); 711 local_stats = *percpu_stats; 712 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); 713 714 stats->n_hit += local_stats.n_hit; 715 stats->n_missed += local_stats.n_missed; 716 stats->n_lost += local_stats.n_lost; 717 mega_stats->n_mask_hit += local_stats.n_mask_hit; 718 mega_stats->n_cache_hit += local_stats.n_cache_hit; 719 } 720 } 721 722 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) 723 { 724 return ovs_identifier_is_ufid(sfid) && 725 !(ufid_flags & OVS_UFID_F_OMIT_KEY); 726 } 727 728 static bool should_fill_mask(uint32_t ufid_flags) 729 { 730 return !(ufid_flags & OVS_UFID_F_OMIT_MASK); 731 } 732 733 static bool should_fill_actions(uint32_t ufid_flags) 734 { 735 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); 736 } 737 738 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, 739 const struct sw_flow_id *sfid, 740 uint32_t ufid_flags) 741 { 742 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); 743 744 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback 745 * see ovs_nla_put_identifier() 746 */ 747 if (sfid && ovs_identifier_is_ufid(sfid)) 748 len += nla_total_size(sfid->ufid_len); 749 else 750 len += nla_total_size(ovs_key_attr_size()); 751 752 /* OVS_FLOW_ATTR_KEY */ 753 if (!sfid || should_fill_key(sfid, ufid_flags)) 754 len += nla_total_size(ovs_key_attr_size()); 755 756 /* OVS_FLOW_ATTR_MASK */ 757 if (should_fill_mask(ufid_flags)) 758 len += nla_total_size(ovs_key_attr_size()); 759 760 /* OVS_FLOW_ATTR_ACTIONS */ 761 if (should_fill_actions(ufid_flags)) 762 len += nla_total_size(acts->orig_len); 763 764 return len 765 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 766 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 767 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */ 768 } 769 770 /* Called with ovs_mutex or RCU read lock. */ 771 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, 772 struct sk_buff *skb) 773 { 774 struct ovs_flow_stats stats; 775 __be16 tcp_flags; 776 unsigned long used; 777 778 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 779 780 if (used && 781 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used), 782 OVS_FLOW_ATTR_PAD)) 783 return -EMSGSIZE; 784 785 if (stats.n_packets && 786 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS, 787 sizeof(struct ovs_flow_stats), &stats, 788 OVS_FLOW_ATTR_PAD)) 789 return -EMSGSIZE; 790 791 if ((u8)ntohs(tcp_flags) && 792 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 793 return -EMSGSIZE; 794 795 return 0; 796 } 797 798 /* Called with ovs_mutex or RCU read lock. */ 799 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, 800 struct sk_buff *skb, int skb_orig_len) 801 { 802 struct nlattr *start; 803 int err; 804 805 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 806 * this is the first flow to be dumped into 'skb'. This is unusual for 807 * Netlink but individual action lists can be longer than 808 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 809 * The userspace caller can always fetch the actions separately if it 810 * really wants them. (Most userspace callers in fact don't care.) 811 * 812 * This can only fail for dump operations because the skb is always 813 * properly sized for single flows. 814 */ 815 start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); 816 if (start) { 817 const struct sw_flow_actions *sf_acts; 818 819 sf_acts = rcu_dereference_ovsl(flow->sf_acts); 820 err = ovs_nla_put_actions(sf_acts->actions, 821 sf_acts->actions_len, skb); 822 823 if (!err) 824 nla_nest_end(skb, start); 825 else { 826 if (skb_orig_len) 827 return err; 828 829 nla_nest_cancel(skb, start); 830 } 831 } else if (skb_orig_len) { 832 return -EMSGSIZE; 833 } 834 835 return 0; 836 } 837 838 /* Called with ovs_mutex or RCU read lock. */ 839 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 840 struct sk_buff *skb, u32 portid, 841 u32 seq, u32 flags, u8 cmd, u32 ufid_flags) 842 { 843 const int skb_orig_len = skb->len; 844 struct ovs_header *ovs_header; 845 int err; 846 847 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, 848 flags, cmd); 849 if (!ovs_header) 850 return -EMSGSIZE; 851 852 ovs_header->dp_ifindex = dp_ifindex; 853 854 err = ovs_nla_put_identifier(flow, skb); 855 if (err) 856 goto error; 857 858 if (should_fill_key(&flow->id, ufid_flags)) { 859 err = ovs_nla_put_masked_key(flow, skb); 860 if (err) 861 goto error; 862 } 863 864 if (should_fill_mask(ufid_flags)) { 865 err = ovs_nla_put_mask(flow, skb); 866 if (err) 867 goto error; 868 } 869 870 err = ovs_flow_cmd_fill_stats(flow, skb); 871 if (err) 872 goto error; 873 874 if (should_fill_actions(ufid_flags)) { 875 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); 876 if (err) 877 goto error; 878 } 879 880 genlmsg_end(skb, ovs_header); 881 return 0; 882 883 error: 884 genlmsg_cancel(skb, ovs_header); 885 return err; 886 } 887 888 /* May not be called with RCU read lock. */ 889 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, 890 const struct sw_flow_id *sfid, 891 struct genl_info *info, 892 bool always, 893 uint32_t ufid_flags) 894 { 895 struct sk_buff *skb; 896 size_t len; 897 898 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) 899 return NULL; 900 901 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); 902 skb = genlmsg_new(len, GFP_KERNEL); 903 if (!skb) 904 return ERR_PTR(-ENOMEM); 905 906 return skb; 907 } 908 909 /* Called with ovs_mutex. */ 910 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, 911 int dp_ifindex, 912 struct genl_info *info, u8 cmd, 913 bool always, u32 ufid_flags) 914 { 915 struct sk_buff *skb; 916 int retval; 917 918 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), 919 &flow->id, info, always, ufid_flags); 920 if (IS_ERR_OR_NULL(skb)) 921 return skb; 922 923 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, 924 info->snd_portid, info->snd_seq, 0, 925 cmd, ufid_flags); 926 if (WARN_ON_ONCE(retval < 0)) { 927 kfree_skb(skb); 928 skb = ERR_PTR(retval); 929 } 930 return skb; 931 } 932 933 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 934 { 935 struct net *net = sock_net(skb->sk); 936 struct nlattr **a = info->attrs; 937 struct ovs_header *ovs_header = info->userhdr; 938 struct sw_flow *flow = NULL, *new_flow; 939 struct sw_flow_mask mask; 940 struct sk_buff *reply; 941 struct datapath *dp; 942 struct sw_flow_actions *acts; 943 struct sw_flow_match match; 944 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 945 int error; 946 bool log = !a[OVS_FLOW_ATTR_PROBE]; 947 948 /* Must have key and actions. */ 949 error = -EINVAL; 950 if (!a[OVS_FLOW_ATTR_KEY]) { 951 OVS_NLERR(log, "Flow key attr not present in new flow."); 952 goto error; 953 } 954 if (!a[OVS_FLOW_ATTR_ACTIONS]) { 955 OVS_NLERR(log, "Flow actions attr not present in new flow."); 956 goto error; 957 } 958 959 /* Most of the time we need to allocate a new flow, do it before 960 * locking. 961 */ 962 new_flow = ovs_flow_alloc(); 963 if (IS_ERR(new_flow)) { 964 error = PTR_ERR(new_flow); 965 goto error; 966 } 967 968 /* Extract key. */ 969 ovs_match_init(&match, &new_flow->key, false, &mask); 970 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 971 a[OVS_FLOW_ATTR_MASK], log); 972 if (error) 973 goto err_kfree_flow; 974 975 /* Extract flow identifier. */ 976 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], 977 &new_flow->key, log); 978 if (error) 979 goto err_kfree_flow; 980 981 /* unmasked key is needed to match when ufid is not used. */ 982 if (ovs_identifier_is_key(&new_flow->id)) 983 match.key = new_flow->id.unmasked_key; 984 985 ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); 986 987 /* Validate actions. */ 988 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], 989 &new_flow->key, &acts, log); 990 if (error) { 991 OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); 992 goto err_kfree_flow; 993 } 994 995 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, 996 ufid_flags); 997 if (IS_ERR(reply)) { 998 error = PTR_ERR(reply); 999 goto err_kfree_acts; 1000 } 1001 1002 ovs_lock(); 1003 dp = get_dp(net, ovs_header->dp_ifindex); 1004 if (unlikely(!dp)) { 1005 error = -ENODEV; 1006 goto err_unlock_ovs; 1007 } 1008 1009 /* Check if this is a duplicate flow */ 1010 if (ovs_identifier_is_ufid(&new_flow->id)) 1011 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); 1012 if (!flow) 1013 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); 1014 if (likely(!flow)) { 1015 rcu_assign_pointer(new_flow->sf_acts, acts); 1016 1017 /* Put flow in bucket. */ 1018 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); 1019 if (unlikely(error)) { 1020 acts = NULL; 1021 goto err_unlock_ovs; 1022 } 1023 1024 if (unlikely(reply)) { 1025 error = ovs_flow_cmd_fill_info(new_flow, 1026 ovs_header->dp_ifindex, 1027 reply, info->snd_portid, 1028 info->snd_seq, 0, 1029 OVS_FLOW_CMD_NEW, 1030 ufid_flags); 1031 BUG_ON(error < 0); 1032 } 1033 ovs_unlock(); 1034 } else { 1035 struct sw_flow_actions *old_acts; 1036 1037 /* Bail out if we're not allowed to modify an existing flow. 1038 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 1039 * because Generic Netlink treats the latter as a dump 1040 * request. We also accept NLM_F_EXCL in case that bug ever 1041 * gets fixed. 1042 */ 1043 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE 1044 | NLM_F_EXCL))) { 1045 error = -EEXIST; 1046 goto err_unlock_ovs; 1047 } 1048 /* The flow identifier has to be the same for flow updates. 1049 * Look for any overlapping flow. 1050 */ 1051 if (unlikely(!ovs_flow_cmp(flow, &match))) { 1052 if (ovs_identifier_is_key(&flow->id)) 1053 flow = ovs_flow_tbl_lookup_exact(&dp->table, 1054 &match); 1055 else /* UFID matches but key is different */ 1056 flow = NULL; 1057 if (!flow) { 1058 error = -ENOENT; 1059 goto err_unlock_ovs; 1060 } 1061 } 1062 /* Update actions. */ 1063 old_acts = ovsl_dereference(flow->sf_acts); 1064 rcu_assign_pointer(flow->sf_acts, acts); 1065 1066 if (unlikely(reply)) { 1067 error = ovs_flow_cmd_fill_info(flow, 1068 ovs_header->dp_ifindex, 1069 reply, info->snd_portid, 1070 info->snd_seq, 0, 1071 OVS_FLOW_CMD_NEW, 1072 ufid_flags); 1073 BUG_ON(error < 0); 1074 } 1075 ovs_unlock(); 1076 1077 ovs_nla_free_flow_actions_rcu(old_acts); 1078 ovs_flow_free(new_flow, false); 1079 } 1080 1081 if (reply) 1082 ovs_notify(&dp_flow_genl_family, reply, info); 1083 return 0; 1084 1085 err_unlock_ovs: 1086 ovs_unlock(); 1087 kfree_skb(reply); 1088 err_kfree_acts: 1089 ovs_nla_free_flow_actions(acts); 1090 err_kfree_flow: 1091 ovs_flow_free(new_flow, false); 1092 error: 1093 return error; 1094 } 1095 1096 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ 1097 static noinline_for_stack 1098 struct sw_flow_actions *get_flow_actions(struct net *net, 1099 const struct nlattr *a, 1100 const struct sw_flow_key *key, 1101 const struct sw_flow_mask *mask, 1102 bool log) 1103 { 1104 struct sw_flow_actions *acts; 1105 struct sw_flow_key masked_key; 1106 int error; 1107 1108 ovs_flow_mask_key(&masked_key, key, true, mask); 1109 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); 1110 if (error) { 1111 OVS_NLERR(log, 1112 "Actions may not be safe on all matching packets"); 1113 return ERR_PTR(error); 1114 } 1115 1116 return acts; 1117 } 1118 1119 /* Factor out match-init and action-copy to avoid 1120 * "Wframe-larger-than=1024" warning. Because mask is only 1121 * used to get actions, we new a function to save some 1122 * stack space. 1123 * 1124 * If there are not key and action attrs, we return 0 1125 * directly. In the case, the caller will also not use the 1126 * match as before. If there is action attr, we try to get 1127 * actions and save them to *acts. Before returning from 1128 * the function, we reset the match->mask pointer. Because 1129 * we should not to return match object with dangling reference 1130 * to mask. 1131 * */ 1132 static noinline_for_stack int 1133 ovs_nla_init_match_and_action(struct net *net, 1134 struct sw_flow_match *match, 1135 struct sw_flow_key *key, 1136 struct nlattr **a, 1137 struct sw_flow_actions **acts, 1138 bool log) 1139 { 1140 struct sw_flow_mask mask; 1141 int error = 0; 1142 1143 if (a[OVS_FLOW_ATTR_KEY]) { 1144 ovs_match_init(match, key, true, &mask); 1145 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY], 1146 a[OVS_FLOW_ATTR_MASK], log); 1147 if (error) 1148 goto error; 1149 } 1150 1151 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1152 if (!a[OVS_FLOW_ATTR_KEY]) { 1153 OVS_NLERR(log, 1154 "Flow key attribute not present in set flow."); 1155 error = -EINVAL; 1156 goto error; 1157 } 1158 1159 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, 1160 &mask, log); 1161 if (IS_ERR(*acts)) { 1162 error = PTR_ERR(*acts); 1163 goto error; 1164 } 1165 } 1166 1167 /* On success, error is 0. */ 1168 error: 1169 match->mask = NULL; 1170 return error; 1171 } 1172 1173 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 1174 { 1175 struct net *net = sock_net(skb->sk); 1176 struct nlattr **a = info->attrs; 1177 struct ovs_header *ovs_header = info->userhdr; 1178 struct sw_flow_key key; 1179 struct sw_flow *flow; 1180 struct sk_buff *reply = NULL; 1181 struct datapath *dp; 1182 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 1183 struct sw_flow_match match; 1184 struct sw_flow_id sfid; 1185 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1186 int error = 0; 1187 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1188 bool ufid_present; 1189 1190 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); 1191 if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) { 1192 OVS_NLERR(log, 1193 "Flow set message rejected, Key attribute missing."); 1194 return -EINVAL; 1195 } 1196 1197 error = ovs_nla_init_match_and_action(net, &match, &key, a, 1198 &acts, log); 1199 if (error) 1200 goto error; 1201 1202 if (acts) { 1203 /* Can allocate before locking if have acts. */ 1204 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, 1205 ufid_flags); 1206 if (IS_ERR(reply)) { 1207 error = PTR_ERR(reply); 1208 goto err_kfree_acts; 1209 } 1210 } 1211 1212 ovs_lock(); 1213 dp = get_dp(net, ovs_header->dp_ifindex); 1214 if (unlikely(!dp)) { 1215 error = -ENODEV; 1216 goto err_unlock_ovs; 1217 } 1218 /* Check that the flow exists. */ 1219 if (ufid_present) 1220 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); 1221 else 1222 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1223 if (unlikely(!flow)) { 1224 error = -ENOENT; 1225 goto err_unlock_ovs; 1226 } 1227 1228 /* Update actions, if present. */ 1229 if (likely(acts)) { 1230 old_acts = ovsl_dereference(flow->sf_acts); 1231 rcu_assign_pointer(flow->sf_acts, acts); 1232 1233 if (unlikely(reply)) { 1234 error = ovs_flow_cmd_fill_info(flow, 1235 ovs_header->dp_ifindex, 1236 reply, info->snd_portid, 1237 info->snd_seq, 0, 1238 OVS_FLOW_CMD_SET, 1239 ufid_flags); 1240 BUG_ON(error < 0); 1241 } 1242 } else { 1243 /* Could not alloc without acts before locking. */ 1244 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, 1245 info, OVS_FLOW_CMD_SET, false, 1246 ufid_flags); 1247 1248 if (IS_ERR(reply)) { 1249 error = PTR_ERR(reply); 1250 goto err_unlock_ovs; 1251 } 1252 } 1253 1254 /* Clear stats. */ 1255 if (a[OVS_FLOW_ATTR_CLEAR]) 1256 ovs_flow_stats_clear(flow); 1257 ovs_unlock(); 1258 1259 if (reply) 1260 ovs_notify(&dp_flow_genl_family, reply, info); 1261 if (old_acts) 1262 ovs_nla_free_flow_actions_rcu(old_acts); 1263 1264 return 0; 1265 1266 err_unlock_ovs: 1267 ovs_unlock(); 1268 kfree_skb(reply); 1269 err_kfree_acts: 1270 ovs_nla_free_flow_actions(acts); 1271 error: 1272 return error; 1273 } 1274 1275 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1276 { 1277 struct nlattr **a = info->attrs; 1278 struct ovs_header *ovs_header = info->userhdr; 1279 struct net *net = sock_net(skb->sk); 1280 struct sw_flow_key key; 1281 struct sk_buff *reply; 1282 struct sw_flow *flow; 1283 struct datapath *dp; 1284 struct sw_flow_match match; 1285 struct sw_flow_id ufid; 1286 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1287 int err = 0; 1288 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1289 bool ufid_present; 1290 1291 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1292 if (a[OVS_FLOW_ATTR_KEY]) { 1293 ovs_match_init(&match, &key, true, NULL); 1294 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, 1295 log); 1296 } else if (!ufid_present) { 1297 OVS_NLERR(log, 1298 "Flow get message rejected, Key attribute missing."); 1299 err = -EINVAL; 1300 } 1301 if (err) 1302 return err; 1303 1304 ovs_lock(); 1305 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1306 if (!dp) { 1307 err = -ENODEV; 1308 goto unlock; 1309 } 1310 1311 if (ufid_present) 1312 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1313 else 1314 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1315 if (!flow) { 1316 err = -ENOENT; 1317 goto unlock; 1318 } 1319 1320 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, 1321 OVS_FLOW_CMD_GET, true, ufid_flags); 1322 if (IS_ERR(reply)) { 1323 err = PTR_ERR(reply); 1324 goto unlock; 1325 } 1326 1327 ovs_unlock(); 1328 return genlmsg_reply(reply, info); 1329 unlock: 1330 ovs_unlock(); 1331 return err; 1332 } 1333 1334 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1335 { 1336 struct nlattr **a = info->attrs; 1337 struct ovs_header *ovs_header = info->userhdr; 1338 struct net *net = sock_net(skb->sk); 1339 struct sw_flow_key key; 1340 struct sk_buff *reply; 1341 struct sw_flow *flow = NULL; 1342 struct datapath *dp; 1343 struct sw_flow_match match; 1344 struct sw_flow_id ufid; 1345 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1346 int err; 1347 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1348 bool ufid_present; 1349 1350 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); 1351 if (a[OVS_FLOW_ATTR_KEY]) { 1352 ovs_match_init(&match, &key, true, NULL); 1353 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1354 NULL, log); 1355 if (unlikely(err)) 1356 return err; 1357 } 1358 1359 ovs_lock(); 1360 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1361 if (unlikely(!dp)) { 1362 err = -ENODEV; 1363 goto unlock; 1364 } 1365 1366 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { 1367 err = ovs_flow_tbl_flush(&dp->table); 1368 goto unlock; 1369 } 1370 1371 if (ufid_present) 1372 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); 1373 else 1374 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1375 if (unlikely(!flow)) { 1376 err = -ENOENT; 1377 goto unlock; 1378 } 1379 1380 ovs_flow_tbl_remove(&dp->table, flow); 1381 ovs_unlock(); 1382 1383 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, 1384 &flow->id, info, false, ufid_flags); 1385 if (likely(reply)) { 1386 if (!IS_ERR(reply)) { 1387 rcu_read_lock(); /*To keep RCU checker happy. */ 1388 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, 1389 reply, info->snd_portid, 1390 info->snd_seq, 0, 1391 OVS_FLOW_CMD_DEL, 1392 ufid_flags); 1393 rcu_read_unlock(); 1394 if (WARN_ON_ONCE(err < 0)) { 1395 kfree_skb(reply); 1396 goto out_free; 1397 } 1398 1399 ovs_notify(&dp_flow_genl_family, reply, info); 1400 } else { 1401 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, 1402 PTR_ERR(reply)); 1403 } 1404 } 1405 1406 out_free: 1407 ovs_flow_free(flow, true); 1408 return 0; 1409 unlock: 1410 ovs_unlock(); 1411 return err; 1412 } 1413 1414 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1415 { 1416 struct nlattr *a[__OVS_FLOW_ATTR_MAX]; 1417 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1418 struct table_instance *ti; 1419 struct datapath *dp; 1420 u32 ufid_flags; 1421 int err; 1422 1423 err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, 1424 OVS_FLOW_ATTR_MAX, flow_policy, NULL); 1425 if (err) 1426 return err; 1427 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1428 1429 rcu_read_lock(); 1430 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 1431 if (!dp) { 1432 rcu_read_unlock(); 1433 return -ENODEV; 1434 } 1435 1436 ti = rcu_dereference(dp->table.ti); 1437 for (;;) { 1438 struct sw_flow *flow; 1439 u32 bucket, obj; 1440 1441 bucket = cb->args[0]; 1442 obj = cb->args[1]; 1443 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1444 if (!flow) 1445 break; 1446 1447 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, 1448 NETLINK_CB(cb->skb).portid, 1449 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1450 OVS_FLOW_CMD_GET, ufid_flags) < 0) 1451 break; 1452 1453 cb->args[0] = bucket; 1454 cb->args[1] = obj; 1455 } 1456 rcu_read_unlock(); 1457 return skb->len; 1458 } 1459 1460 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1461 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1462 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, 1463 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1464 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1465 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, 1466 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, 1467 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, 1468 }; 1469 1470 static const struct genl_small_ops dp_flow_genl_ops[] = { 1471 { .cmd = OVS_FLOW_CMD_NEW, 1472 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1473 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1474 .doit = ovs_flow_cmd_new 1475 }, 1476 { .cmd = OVS_FLOW_CMD_DEL, 1477 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1478 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1479 .doit = ovs_flow_cmd_del 1480 }, 1481 { .cmd = OVS_FLOW_CMD_GET, 1482 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1483 .flags = 0, /* OK for unprivileged users. */ 1484 .doit = ovs_flow_cmd_get, 1485 .dumpit = ovs_flow_cmd_dump 1486 }, 1487 { .cmd = OVS_FLOW_CMD_SET, 1488 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1489 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1490 .doit = ovs_flow_cmd_set, 1491 }, 1492 }; 1493 1494 static struct genl_family dp_flow_genl_family __ro_after_init = { 1495 .hdrsize = sizeof(struct ovs_header), 1496 .name = OVS_FLOW_FAMILY, 1497 .version = OVS_FLOW_VERSION, 1498 .maxattr = OVS_FLOW_ATTR_MAX, 1499 .policy = flow_policy, 1500 .netnsok = true, 1501 .parallel_ops = true, 1502 .small_ops = dp_flow_genl_ops, 1503 .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops), 1504 .mcgrps = &ovs_dp_flow_multicast_group, 1505 .n_mcgrps = 1, 1506 .module = THIS_MODULE, 1507 }; 1508 1509 static size_t ovs_dp_cmd_msg_size(void) 1510 { 1511 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1512 1513 msgsize += nla_total_size(IFNAMSIZ); 1514 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); 1515 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); 1516 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1517 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ 1518 1519 return msgsize; 1520 } 1521 1522 /* Called with ovs_mutex. */ 1523 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1524 u32 portid, u32 seq, u32 flags, u8 cmd) 1525 { 1526 struct ovs_header *ovs_header; 1527 struct ovs_dp_stats dp_stats; 1528 struct ovs_dp_megaflow_stats dp_megaflow_stats; 1529 int err; 1530 1531 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1532 flags, cmd); 1533 if (!ovs_header) 1534 goto error; 1535 1536 ovs_header->dp_ifindex = get_dpifindex(dp); 1537 1538 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1539 if (err) 1540 goto nla_put_failure; 1541 1542 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1543 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1544 &dp_stats, OVS_DP_ATTR_PAD)) 1545 goto nla_put_failure; 1546 1547 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1548 sizeof(struct ovs_dp_megaflow_stats), 1549 &dp_megaflow_stats, OVS_DP_ATTR_PAD)) 1550 goto nla_put_failure; 1551 1552 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1553 goto nla_put_failure; 1554 1555 if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, 1556 ovs_flow_tbl_masks_cache_size(&dp->table))) 1557 goto nla_put_failure; 1558 1559 genlmsg_end(skb, ovs_header); 1560 return 0; 1561 1562 nla_put_failure: 1563 genlmsg_cancel(skb, ovs_header); 1564 error: 1565 return -EMSGSIZE; 1566 } 1567 1568 static struct sk_buff *ovs_dp_cmd_alloc_info(void) 1569 { 1570 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); 1571 } 1572 1573 /* Called with rcu_read_lock or ovs_mutex. */ 1574 static struct datapath *lookup_datapath(struct net *net, 1575 const struct ovs_header *ovs_header, 1576 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1577 { 1578 struct datapath *dp; 1579 1580 if (!a[OVS_DP_ATTR_NAME]) 1581 dp = get_dp(net, ovs_header->dp_ifindex); 1582 else { 1583 struct vport *vport; 1584 1585 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1586 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1587 } 1588 return dp ? dp : ERR_PTR(-ENODEV); 1589 } 1590 1591 static void ovs_dp_reset_user_features(struct sk_buff *skb, 1592 struct genl_info *info) 1593 { 1594 struct datapath *dp; 1595 1596 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, 1597 info->attrs); 1598 if (IS_ERR(dp)) 1599 return; 1600 1601 WARN(dp->user_features, "Dropping previously announced user features\n"); 1602 dp->user_features = 0; 1603 } 1604 1605 static int ovs_dp_set_upcall_portids(struct datapath *dp, 1606 const struct nlattr *ids) 1607 { 1608 struct dp_nlsk_pids *old, *dp_nlsk_pids; 1609 1610 if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) 1611 return -EINVAL; 1612 1613 old = ovsl_dereference(dp->upcall_portids); 1614 1615 dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids), 1616 GFP_KERNEL); 1617 if (!dp_nlsk_pids) 1618 return -ENOMEM; 1619 1620 dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32); 1621 nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids)); 1622 1623 rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids); 1624 1625 kfree_rcu(old, rcu); 1626 1627 return 0; 1628 } 1629 1630 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) 1631 { 1632 struct dp_nlsk_pids *dp_nlsk_pids; 1633 1634 dp_nlsk_pids = rcu_dereference(dp->upcall_portids); 1635 1636 if (dp_nlsk_pids) { 1637 if (cpu_id < dp_nlsk_pids->n_pids) { 1638 return dp_nlsk_pids->pids[cpu_id]; 1639 } else if (dp_nlsk_pids->n_pids > 0 && 1640 cpu_id >= dp_nlsk_pids->n_pids) { 1641 /* If the number of netlink PIDs is mismatched with 1642 * the number of CPUs as seen by the kernel, log this 1643 * and send the upcall to an arbitrary socket (0) in 1644 * order to not drop packets 1645 */ 1646 pr_info_ratelimited("cpu_id mismatch with handler threads"); 1647 return dp_nlsk_pids->pids[cpu_id % 1648 dp_nlsk_pids->n_pids]; 1649 } else { 1650 return 0; 1651 } 1652 } else { 1653 return 0; 1654 } 1655 } 1656 1657 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) 1658 { 1659 u32 user_features = 0, old_features = dp->user_features; 1660 int err; 1661 1662 if (a[OVS_DP_ATTR_USER_FEATURES]) { 1663 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1664 1665 if (user_features & ~(OVS_DP_F_VPORT_PIDS | 1666 OVS_DP_F_UNALIGNED | 1667 OVS_DP_F_TC_RECIRC_SHARING | 1668 OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) 1669 return -EOPNOTSUPP; 1670 1671 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1672 if (user_features & OVS_DP_F_TC_RECIRC_SHARING) 1673 return -EOPNOTSUPP; 1674 #endif 1675 } 1676 1677 if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { 1678 int err; 1679 u32 cache_size; 1680 1681 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); 1682 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); 1683 if (err) 1684 return err; 1685 } 1686 1687 dp->user_features = user_features; 1688 1689 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && 1690 a[OVS_DP_ATTR_PER_CPU_PIDS]) { 1691 /* Upcall Netlink Port IDs have been updated */ 1692 err = ovs_dp_set_upcall_portids(dp, 1693 a[OVS_DP_ATTR_PER_CPU_PIDS]); 1694 if (err) 1695 return err; 1696 } 1697 1698 if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && 1699 !(old_features & OVS_DP_F_TC_RECIRC_SHARING)) 1700 tc_skb_ext_tc_enable(); 1701 else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) && 1702 (old_features & OVS_DP_F_TC_RECIRC_SHARING)) 1703 tc_skb_ext_tc_disable(); 1704 1705 return 0; 1706 } 1707 1708 static int ovs_dp_stats_init(struct datapath *dp) 1709 { 1710 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); 1711 if (!dp->stats_percpu) 1712 return -ENOMEM; 1713 1714 return 0; 1715 } 1716 1717 static int ovs_dp_vport_init(struct datapath *dp) 1718 { 1719 int i; 1720 1721 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, 1722 sizeof(struct hlist_head), 1723 GFP_KERNEL); 1724 if (!dp->ports) 1725 return -ENOMEM; 1726 1727 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1728 INIT_HLIST_HEAD(&dp->ports[i]); 1729 1730 return 0; 1731 } 1732 1733 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1734 { 1735 struct nlattr **a = info->attrs; 1736 struct vport_parms parms; 1737 struct sk_buff *reply; 1738 struct datapath *dp; 1739 struct vport *vport; 1740 struct ovs_net *ovs_net; 1741 int err; 1742 1743 err = -EINVAL; 1744 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1745 goto err; 1746 1747 reply = ovs_dp_cmd_alloc_info(); 1748 if (!reply) 1749 return -ENOMEM; 1750 1751 err = -ENOMEM; 1752 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1753 if (dp == NULL) 1754 goto err_destroy_reply; 1755 1756 ovs_dp_set_net(dp, sock_net(skb->sk)); 1757 1758 /* Allocate table. */ 1759 err = ovs_flow_tbl_init(&dp->table); 1760 if (err) 1761 goto err_destroy_dp; 1762 1763 err = ovs_dp_stats_init(dp); 1764 if (err) 1765 goto err_destroy_table; 1766 1767 err = ovs_dp_vport_init(dp); 1768 if (err) 1769 goto err_destroy_stats; 1770 1771 err = ovs_meters_init(dp); 1772 if (err) 1773 goto err_destroy_ports; 1774 1775 /* Set up our datapath device. */ 1776 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1777 parms.type = OVS_VPORT_TYPE_INTERNAL; 1778 parms.options = NULL; 1779 parms.dp = dp; 1780 parms.port_no = OVSP_LOCAL; 1781 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; 1782 1783 /* So far only local changes have been made, now need the lock. */ 1784 ovs_lock(); 1785 1786 err = ovs_dp_change(dp, a); 1787 if (err) 1788 goto err_unlock_and_destroy_meters; 1789 1790 vport = new_vport(&parms); 1791 if (IS_ERR(vport)) { 1792 err = PTR_ERR(vport); 1793 if (err == -EBUSY) 1794 err = -EEXIST; 1795 1796 if (err == -EEXIST) { 1797 /* An outdated user space instance that does not understand 1798 * the concept of user_features has attempted to create a new 1799 * datapath and is likely to reuse it. Drop all user features. 1800 */ 1801 if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1802 ovs_dp_reset_user_features(skb, info); 1803 } 1804 1805 goto err_unlock_and_destroy_meters; 1806 } 1807 1808 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1809 info->snd_seq, 0, OVS_DP_CMD_NEW); 1810 BUG_ON(err < 0); 1811 1812 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1813 list_add_tail_rcu(&dp->list_node, &ovs_net->dps); 1814 1815 ovs_unlock(); 1816 1817 ovs_notify(&dp_datapath_genl_family, reply, info); 1818 return 0; 1819 1820 err_unlock_and_destroy_meters: 1821 ovs_unlock(); 1822 ovs_meters_exit(dp); 1823 err_destroy_ports: 1824 kfree(dp->ports); 1825 err_destroy_stats: 1826 free_percpu(dp->stats_percpu); 1827 err_destroy_table: 1828 ovs_flow_tbl_destroy(&dp->table); 1829 err_destroy_dp: 1830 kfree(dp); 1831 err_destroy_reply: 1832 kfree_skb(reply); 1833 err: 1834 return err; 1835 } 1836 1837 /* Called with ovs_mutex. */ 1838 static void __dp_destroy(struct datapath *dp) 1839 { 1840 struct flow_table *table = &dp->table; 1841 int i; 1842 1843 if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) 1844 tc_skb_ext_tc_disable(); 1845 1846 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1847 struct vport *vport; 1848 struct hlist_node *n; 1849 1850 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) 1851 if (vport->port_no != OVSP_LOCAL) 1852 ovs_dp_detach_port(vport); 1853 } 1854 1855 list_del_rcu(&dp->list_node); 1856 1857 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1858 * all ports in datapath are destroyed first before freeing datapath. 1859 */ 1860 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1861 1862 /* Flush sw_flow in the tables. RCU cb only releases resource 1863 * such as dp, ports and tables. That may avoid some issues 1864 * such as RCU usage warning. 1865 */ 1866 table_instance_flow_flush(table, ovsl_dereference(table->ti), 1867 ovsl_dereference(table->ufid_ti)); 1868 1869 /* RCU destroy the ports, meters and flow tables. */ 1870 call_rcu(&dp->rcu, destroy_dp_rcu); 1871 } 1872 1873 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1874 { 1875 struct sk_buff *reply; 1876 struct datapath *dp; 1877 int err; 1878 1879 reply = ovs_dp_cmd_alloc_info(); 1880 if (!reply) 1881 return -ENOMEM; 1882 1883 ovs_lock(); 1884 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1885 err = PTR_ERR(dp); 1886 if (IS_ERR(dp)) 1887 goto err_unlock_free; 1888 1889 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1890 info->snd_seq, 0, OVS_DP_CMD_DEL); 1891 BUG_ON(err < 0); 1892 1893 __dp_destroy(dp); 1894 ovs_unlock(); 1895 1896 ovs_notify(&dp_datapath_genl_family, reply, info); 1897 1898 return 0; 1899 1900 err_unlock_free: 1901 ovs_unlock(); 1902 kfree_skb(reply); 1903 return err; 1904 } 1905 1906 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1907 { 1908 struct sk_buff *reply; 1909 struct datapath *dp; 1910 int err; 1911 1912 reply = ovs_dp_cmd_alloc_info(); 1913 if (!reply) 1914 return -ENOMEM; 1915 1916 ovs_lock(); 1917 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1918 err = PTR_ERR(dp); 1919 if (IS_ERR(dp)) 1920 goto err_unlock_free; 1921 1922 err = ovs_dp_change(dp, info->attrs); 1923 if (err) 1924 goto err_unlock_free; 1925 1926 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1927 info->snd_seq, 0, OVS_DP_CMD_SET); 1928 BUG_ON(err < 0); 1929 1930 ovs_unlock(); 1931 ovs_notify(&dp_datapath_genl_family, reply, info); 1932 1933 return 0; 1934 1935 err_unlock_free: 1936 ovs_unlock(); 1937 kfree_skb(reply); 1938 return err; 1939 } 1940 1941 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1942 { 1943 struct sk_buff *reply; 1944 struct datapath *dp; 1945 int err; 1946 1947 reply = ovs_dp_cmd_alloc_info(); 1948 if (!reply) 1949 return -ENOMEM; 1950 1951 ovs_lock(); 1952 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1953 if (IS_ERR(dp)) { 1954 err = PTR_ERR(dp); 1955 goto err_unlock_free; 1956 } 1957 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1958 info->snd_seq, 0, OVS_DP_CMD_GET); 1959 BUG_ON(err < 0); 1960 ovs_unlock(); 1961 1962 return genlmsg_reply(reply, info); 1963 1964 err_unlock_free: 1965 ovs_unlock(); 1966 kfree_skb(reply); 1967 return err; 1968 } 1969 1970 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1971 { 1972 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1973 struct datapath *dp; 1974 int skip = cb->args[0]; 1975 int i = 0; 1976 1977 ovs_lock(); 1978 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1979 if (i >= skip && 1980 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1981 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1982 OVS_DP_CMD_GET) < 0) 1983 break; 1984 i++; 1985 } 1986 ovs_unlock(); 1987 1988 cb->args[0] = i; 1989 1990 return skb->len; 1991 } 1992 1993 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 1994 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1995 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1996 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 1997 [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, 1998 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), 1999 }; 2000 2001 static const struct genl_small_ops dp_datapath_genl_ops[] = { 2002 { .cmd = OVS_DP_CMD_NEW, 2003 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2004 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2005 .doit = ovs_dp_cmd_new 2006 }, 2007 { .cmd = OVS_DP_CMD_DEL, 2008 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2009 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2010 .doit = ovs_dp_cmd_del 2011 }, 2012 { .cmd = OVS_DP_CMD_GET, 2013 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2014 .flags = 0, /* OK for unprivileged users. */ 2015 .doit = ovs_dp_cmd_get, 2016 .dumpit = ovs_dp_cmd_dump 2017 }, 2018 { .cmd = OVS_DP_CMD_SET, 2019 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2020 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2021 .doit = ovs_dp_cmd_set, 2022 }, 2023 }; 2024 2025 static struct genl_family dp_datapath_genl_family __ro_after_init = { 2026 .hdrsize = sizeof(struct ovs_header), 2027 .name = OVS_DATAPATH_FAMILY, 2028 .version = OVS_DATAPATH_VERSION, 2029 .maxattr = OVS_DP_ATTR_MAX, 2030 .policy = datapath_policy, 2031 .netnsok = true, 2032 .parallel_ops = true, 2033 .small_ops = dp_datapath_genl_ops, 2034 .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops), 2035 .mcgrps = &ovs_dp_datapath_multicast_group, 2036 .n_mcgrps = 1, 2037 .module = THIS_MODULE, 2038 }; 2039 2040 /* Called with ovs_mutex or RCU read lock. */ 2041 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 2042 struct net *net, u32 portid, u32 seq, 2043 u32 flags, u8 cmd, gfp_t gfp) 2044 { 2045 struct ovs_header *ovs_header; 2046 struct ovs_vport_stats vport_stats; 2047 int err; 2048 2049 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 2050 flags, cmd); 2051 if (!ovs_header) 2052 return -EMSGSIZE; 2053 2054 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 2055 2056 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 2057 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 2058 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 2059 ovs_vport_name(vport)) || 2060 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) 2061 goto nla_put_failure; 2062 2063 if (!net_eq(net, dev_net(vport->dev))) { 2064 int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); 2065 2066 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) 2067 goto nla_put_failure; 2068 } 2069 2070 ovs_vport_get_stats(vport, &vport_stats); 2071 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, 2072 sizeof(struct ovs_vport_stats), &vport_stats, 2073 OVS_VPORT_ATTR_PAD)) 2074 goto nla_put_failure; 2075 2076 if (ovs_vport_get_upcall_portids(vport, skb)) 2077 goto nla_put_failure; 2078 2079 err = ovs_vport_get_options(vport, skb); 2080 if (err == -EMSGSIZE) 2081 goto error; 2082 2083 genlmsg_end(skb, ovs_header); 2084 return 0; 2085 2086 nla_put_failure: 2087 err = -EMSGSIZE; 2088 error: 2089 genlmsg_cancel(skb, ovs_header); 2090 return err; 2091 } 2092 2093 static struct sk_buff *ovs_vport_cmd_alloc_info(void) 2094 { 2095 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2096 } 2097 2098 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 2099 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, 2100 u32 portid, u32 seq, u8 cmd) 2101 { 2102 struct sk_buff *skb; 2103 int retval; 2104 2105 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2106 if (!skb) 2107 return ERR_PTR(-ENOMEM); 2108 2109 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, 2110 GFP_KERNEL); 2111 BUG_ON(retval < 0); 2112 2113 return skb; 2114 } 2115 2116 /* Called with ovs_mutex or RCU read lock. */ 2117 static struct vport *lookup_vport(struct net *net, 2118 const struct ovs_header *ovs_header, 2119 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 2120 { 2121 struct datapath *dp; 2122 struct vport *vport; 2123 2124 if (a[OVS_VPORT_ATTR_IFINDEX]) 2125 return ERR_PTR(-EOPNOTSUPP); 2126 if (a[OVS_VPORT_ATTR_NAME]) { 2127 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 2128 if (!vport) 2129 return ERR_PTR(-ENODEV); 2130 if (ovs_header->dp_ifindex && 2131 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 2132 return ERR_PTR(-ENODEV); 2133 return vport; 2134 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 2135 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 2136 2137 if (port_no >= DP_MAX_PORTS) 2138 return ERR_PTR(-EFBIG); 2139 2140 dp = get_dp(net, ovs_header->dp_ifindex); 2141 if (!dp) 2142 return ERR_PTR(-ENODEV); 2143 2144 vport = ovs_vport_ovsl_rcu(dp, port_no); 2145 if (!vport) 2146 return ERR_PTR(-ENODEV); 2147 return vport; 2148 } else 2149 return ERR_PTR(-EINVAL); 2150 2151 } 2152 2153 static unsigned int ovs_get_max_headroom(struct datapath *dp) 2154 { 2155 unsigned int dev_headroom, max_headroom = 0; 2156 struct net_device *dev; 2157 struct vport *vport; 2158 int i; 2159 2160 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2161 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2162 lockdep_ovsl_is_held()) { 2163 dev = vport->dev; 2164 dev_headroom = netdev_get_fwd_headroom(dev); 2165 if (dev_headroom > max_headroom) 2166 max_headroom = dev_headroom; 2167 } 2168 } 2169 2170 return max_headroom; 2171 } 2172 2173 /* Called with ovs_mutex */ 2174 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) 2175 { 2176 struct vport *vport; 2177 int i; 2178 2179 dp->max_headroom = new_headroom; 2180 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2181 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, 2182 lockdep_ovsl_is_held()) 2183 netdev_set_rx_headroom(vport->dev, new_headroom); 2184 } 2185 } 2186 2187 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 2188 { 2189 struct nlattr **a = info->attrs; 2190 struct ovs_header *ovs_header = info->userhdr; 2191 struct vport_parms parms; 2192 struct sk_buff *reply; 2193 struct vport *vport; 2194 struct datapath *dp; 2195 unsigned int new_headroom; 2196 u32 port_no; 2197 int err; 2198 2199 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 2200 !a[OVS_VPORT_ATTR_UPCALL_PID]) 2201 return -EINVAL; 2202 if (a[OVS_VPORT_ATTR_IFINDEX]) 2203 return -EOPNOTSUPP; 2204 2205 port_no = a[OVS_VPORT_ATTR_PORT_NO] 2206 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 2207 if (port_no >= DP_MAX_PORTS) 2208 return -EFBIG; 2209 2210 reply = ovs_vport_cmd_alloc_info(); 2211 if (!reply) 2212 return -ENOMEM; 2213 2214 ovs_lock(); 2215 restart: 2216 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 2217 err = -ENODEV; 2218 if (!dp) 2219 goto exit_unlock_free; 2220 2221 if (port_no) { 2222 vport = ovs_vport_ovsl(dp, port_no); 2223 err = -EBUSY; 2224 if (vport) 2225 goto exit_unlock_free; 2226 } else { 2227 for (port_no = 1; ; port_no++) { 2228 if (port_no >= DP_MAX_PORTS) { 2229 err = -EFBIG; 2230 goto exit_unlock_free; 2231 } 2232 vport = ovs_vport_ovsl(dp, port_no); 2233 if (!vport) 2234 break; 2235 } 2236 } 2237 2238 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 2239 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 2240 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 2241 parms.dp = dp; 2242 parms.port_no = port_no; 2243 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2244 2245 vport = new_vport(&parms); 2246 err = PTR_ERR(vport); 2247 if (IS_ERR(vport)) { 2248 if (err == -EAGAIN) 2249 goto restart; 2250 goto exit_unlock_free; 2251 } 2252 2253 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2254 info->snd_portid, info->snd_seq, 0, 2255 OVS_VPORT_CMD_NEW, GFP_KERNEL); 2256 2257 new_headroom = netdev_get_fwd_headroom(vport->dev); 2258 2259 if (new_headroom > dp->max_headroom) 2260 ovs_update_headroom(dp, new_headroom); 2261 else 2262 netdev_set_rx_headroom(vport->dev, dp->max_headroom); 2263 2264 BUG_ON(err < 0); 2265 ovs_unlock(); 2266 2267 ovs_notify(&dp_vport_genl_family, reply, info); 2268 return 0; 2269 2270 exit_unlock_free: 2271 ovs_unlock(); 2272 kfree_skb(reply); 2273 return err; 2274 } 2275 2276 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 2277 { 2278 struct nlattr **a = info->attrs; 2279 struct sk_buff *reply; 2280 struct vport *vport; 2281 int err; 2282 2283 reply = ovs_vport_cmd_alloc_info(); 2284 if (!reply) 2285 return -ENOMEM; 2286 2287 ovs_lock(); 2288 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 2289 err = PTR_ERR(vport); 2290 if (IS_ERR(vport)) 2291 goto exit_unlock_free; 2292 2293 if (a[OVS_VPORT_ATTR_TYPE] && 2294 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { 2295 err = -EINVAL; 2296 goto exit_unlock_free; 2297 } 2298 2299 if (a[OVS_VPORT_ATTR_OPTIONS]) { 2300 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 2301 if (err) 2302 goto exit_unlock_free; 2303 } 2304 2305 2306 if (a[OVS_VPORT_ATTR_UPCALL_PID]) { 2307 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; 2308 2309 err = ovs_vport_set_upcall_portids(vport, ids); 2310 if (err) 2311 goto exit_unlock_free; 2312 } 2313 2314 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2315 info->snd_portid, info->snd_seq, 0, 2316 OVS_VPORT_CMD_SET, GFP_KERNEL); 2317 BUG_ON(err < 0); 2318 2319 ovs_unlock(); 2320 ovs_notify(&dp_vport_genl_family, reply, info); 2321 return 0; 2322 2323 exit_unlock_free: 2324 ovs_unlock(); 2325 kfree_skb(reply); 2326 return err; 2327 } 2328 2329 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 2330 { 2331 bool update_headroom = false; 2332 struct nlattr **a = info->attrs; 2333 struct sk_buff *reply; 2334 struct datapath *dp; 2335 struct vport *vport; 2336 unsigned int new_headroom; 2337 int err; 2338 2339 reply = ovs_vport_cmd_alloc_info(); 2340 if (!reply) 2341 return -ENOMEM; 2342 2343 ovs_lock(); 2344 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 2345 err = PTR_ERR(vport); 2346 if (IS_ERR(vport)) 2347 goto exit_unlock_free; 2348 2349 if (vport->port_no == OVSP_LOCAL) { 2350 err = -EINVAL; 2351 goto exit_unlock_free; 2352 } 2353 2354 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2355 info->snd_portid, info->snd_seq, 0, 2356 OVS_VPORT_CMD_DEL, GFP_KERNEL); 2357 BUG_ON(err < 0); 2358 2359 /* the vport deletion may trigger dp headroom update */ 2360 dp = vport->dp; 2361 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) 2362 update_headroom = true; 2363 2364 netdev_reset_rx_headroom(vport->dev); 2365 ovs_dp_detach_port(vport); 2366 2367 if (update_headroom) { 2368 new_headroom = ovs_get_max_headroom(dp); 2369 2370 if (new_headroom < dp->max_headroom) 2371 ovs_update_headroom(dp, new_headroom); 2372 } 2373 ovs_unlock(); 2374 2375 ovs_notify(&dp_vport_genl_family, reply, info); 2376 return 0; 2377 2378 exit_unlock_free: 2379 ovs_unlock(); 2380 kfree_skb(reply); 2381 return err; 2382 } 2383 2384 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 2385 { 2386 struct nlattr **a = info->attrs; 2387 struct ovs_header *ovs_header = info->userhdr; 2388 struct sk_buff *reply; 2389 struct vport *vport; 2390 int err; 2391 2392 reply = ovs_vport_cmd_alloc_info(); 2393 if (!reply) 2394 return -ENOMEM; 2395 2396 rcu_read_lock(); 2397 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 2398 err = PTR_ERR(vport); 2399 if (IS_ERR(vport)) 2400 goto exit_unlock_free; 2401 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), 2402 info->snd_portid, info->snd_seq, 0, 2403 OVS_VPORT_CMD_GET, GFP_ATOMIC); 2404 BUG_ON(err < 0); 2405 rcu_read_unlock(); 2406 2407 return genlmsg_reply(reply, info); 2408 2409 exit_unlock_free: 2410 rcu_read_unlock(); 2411 kfree_skb(reply); 2412 return err; 2413 } 2414 2415 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 2416 { 2417 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 2418 struct datapath *dp; 2419 int bucket = cb->args[0], skip = cb->args[1]; 2420 int i, j = 0; 2421 2422 rcu_read_lock(); 2423 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 2424 if (!dp) { 2425 rcu_read_unlock(); 2426 return -ENODEV; 2427 } 2428 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 2429 struct vport *vport; 2430 2431 j = 0; 2432 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 2433 if (j >= skip && 2434 ovs_vport_cmd_fill_info(vport, skb, 2435 sock_net(skb->sk), 2436 NETLINK_CB(cb->skb).portid, 2437 cb->nlh->nlmsg_seq, 2438 NLM_F_MULTI, 2439 OVS_VPORT_CMD_GET, 2440 GFP_ATOMIC) < 0) 2441 goto out; 2442 2443 j++; 2444 } 2445 skip = 0; 2446 } 2447 out: 2448 rcu_read_unlock(); 2449 2450 cb->args[0] = i; 2451 cb->args[1] = j; 2452 2453 return skb->len; 2454 } 2455 2456 static void ovs_dp_masks_rebalance(struct work_struct *work) 2457 { 2458 struct ovs_net *ovs_net = container_of(work, struct ovs_net, 2459 masks_rebalance.work); 2460 struct datapath *dp; 2461 2462 ovs_lock(); 2463 2464 list_for_each_entry(dp, &ovs_net->dps, list_node) 2465 ovs_flow_masks_rebalance(&dp->table); 2466 2467 ovs_unlock(); 2468 2469 schedule_delayed_work(&ovs_net->masks_rebalance, 2470 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2471 } 2472 2473 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 2474 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 2475 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 2476 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 2477 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 2478 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC }, 2479 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 2480 [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, 2481 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, 2482 }; 2483 2484 static const struct genl_small_ops dp_vport_genl_ops[] = { 2485 { .cmd = OVS_VPORT_CMD_NEW, 2486 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2487 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2488 .doit = ovs_vport_cmd_new 2489 }, 2490 { .cmd = OVS_VPORT_CMD_DEL, 2491 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2492 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2493 .doit = ovs_vport_cmd_del 2494 }, 2495 { .cmd = OVS_VPORT_CMD_GET, 2496 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2497 .flags = 0, /* OK for unprivileged users. */ 2498 .doit = ovs_vport_cmd_get, 2499 .dumpit = ovs_vport_cmd_dump 2500 }, 2501 { .cmd = OVS_VPORT_CMD_SET, 2502 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2503 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2504 .doit = ovs_vport_cmd_set, 2505 }, 2506 }; 2507 2508 struct genl_family dp_vport_genl_family __ro_after_init = { 2509 .hdrsize = sizeof(struct ovs_header), 2510 .name = OVS_VPORT_FAMILY, 2511 .version = OVS_VPORT_VERSION, 2512 .maxattr = OVS_VPORT_ATTR_MAX, 2513 .policy = vport_policy, 2514 .netnsok = true, 2515 .parallel_ops = true, 2516 .small_ops = dp_vport_genl_ops, 2517 .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops), 2518 .mcgrps = &ovs_dp_vport_multicast_group, 2519 .n_mcgrps = 1, 2520 .module = THIS_MODULE, 2521 }; 2522 2523 static struct genl_family * const dp_genl_families[] = { 2524 &dp_datapath_genl_family, 2525 &dp_vport_genl_family, 2526 &dp_flow_genl_family, 2527 &dp_packet_genl_family, 2528 &dp_meter_genl_family, 2529 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) 2530 &dp_ct_limit_genl_family, 2531 #endif 2532 }; 2533 2534 static void dp_unregister_genl(int n_families) 2535 { 2536 int i; 2537 2538 for (i = 0; i < n_families; i++) 2539 genl_unregister_family(dp_genl_families[i]); 2540 } 2541 2542 static int __init dp_register_genl(void) 2543 { 2544 int err; 2545 int i; 2546 2547 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 2548 2549 err = genl_register_family(dp_genl_families[i]); 2550 if (err) 2551 goto error; 2552 } 2553 2554 return 0; 2555 2556 error: 2557 dp_unregister_genl(i); 2558 return err; 2559 } 2560 2561 static int __net_init ovs_init_net(struct net *net) 2562 { 2563 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2564 int err; 2565 2566 INIT_LIST_HEAD(&ovs_net->dps); 2567 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); 2568 INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); 2569 2570 err = ovs_ct_init(net); 2571 if (err) 2572 return err; 2573 2574 schedule_delayed_work(&ovs_net->masks_rebalance, 2575 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); 2576 return 0; 2577 } 2578 2579 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, 2580 struct list_head *head) 2581 { 2582 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2583 struct datapath *dp; 2584 2585 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2586 int i; 2587 2588 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 2589 struct vport *vport; 2590 2591 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { 2592 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) 2593 continue; 2594 2595 if (dev_net(vport->dev) == dnet) 2596 list_add(&vport->detach_list, head); 2597 } 2598 } 2599 } 2600 } 2601 2602 static void __net_exit ovs_exit_net(struct net *dnet) 2603 { 2604 struct datapath *dp, *dp_next; 2605 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); 2606 struct vport *vport, *vport_next; 2607 struct net *net; 2608 LIST_HEAD(head); 2609 2610 ovs_lock(); 2611 2612 ovs_ct_exit(dnet); 2613 2614 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2615 __dp_destroy(dp); 2616 2617 down_read(&net_rwsem); 2618 for_each_net(net) 2619 list_vports_from_net(net, dnet, &head); 2620 up_read(&net_rwsem); 2621 2622 /* Detach all vports from given namespace. */ 2623 list_for_each_entry_safe(vport, vport_next, &head, detach_list) { 2624 list_del(&vport->detach_list); 2625 ovs_dp_detach_port(vport); 2626 } 2627 2628 ovs_unlock(); 2629 2630 cancel_delayed_work_sync(&ovs_net->masks_rebalance); 2631 cancel_work_sync(&ovs_net->dp_notify_work); 2632 } 2633 2634 static struct pernet_operations ovs_net_ops = { 2635 .init = ovs_init_net, 2636 .exit = ovs_exit_net, 2637 .id = &ovs_net_id, 2638 .size = sizeof(struct ovs_net), 2639 }; 2640 2641 static int __init dp_init(void) 2642 { 2643 int err; 2644 2645 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > 2646 sizeof_field(struct sk_buff, cb)); 2647 2648 pr_info("Open vSwitch switching datapath\n"); 2649 2650 err = action_fifos_init(); 2651 if (err) 2652 goto error; 2653 2654 err = ovs_internal_dev_rtnl_link_register(); 2655 if (err) 2656 goto error_action_fifos_exit; 2657 2658 err = ovs_flow_init(); 2659 if (err) 2660 goto error_unreg_rtnl_link; 2661 2662 err = ovs_vport_init(); 2663 if (err) 2664 goto error_flow_exit; 2665 2666 err = register_pernet_device(&ovs_net_ops); 2667 if (err) 2668 goto error_vport_exit; 2669 2670 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2671 if (err) 2672 goto error_netns_exit; 2673 2674 err = ovs_netdev_init(); 2675 if (err) 2676 goto error_unreg_notifier; 2677 2678 err = dp_register_genl(); 2679 if (err < 0) 2680 goto error_unreg_netdev; 2681 2682 return 0; 2683 2684 error_unreg_netdev: 2685 ovs_netdev_exit(); 2686 error_unreg_notifier: 2687 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2688 error_netns_exit: 2689 unregister_pernet_device(&ovs_net_ops); 2690 error_vport_exit: 2691 ovs_vport_exit(); 2692 error_flow_exit: 2693 ovs_flow_exit(); 2694 error_unreg_rtnl_link: 2695 ovs_internal_dev_rtnl_link_unregister(); 2696 error_action_fifos_exit: 2697 action_fifos_exit(); 2698 error: 2699 return err; 2700 } 2701 2702 static void dp_cleanup(void) 2703 { 2704 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2705 ovs_netdev_exit(); 2706 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2707 unregister_pernet_device(&ovs_net_ops); 2708 rcu_barrier(); 2709 ovs_vport_exit(); 2710 ovs_flow_exit(); 2711 ovs_internal_dev_rtnl_link_unregister(); 2712 action_fifos_exit(); 2713 } 2714 2715 module_init(dp_init); 2716 module_exit(dp_cleanup); 2717 2718 MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2719 MODULE_LICENSE("GPL"); 2720 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); 2721 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); 2722 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); 2723 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); 2724 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); 2725 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY); 2726