1 /* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/init.h> 22 #include <linux/module.h> 23 #include <linux/if_arp.h> 24 #include <linux/if_vlan.h> 25 #include <linux/in.h> 26 #include <linux/ip.h> 27 #include <linux/jhash.h> 28 #include <linux/delay.h> 29 #include <linux/time.h> 30 #include <linux/etherdevice.h> 31 #include <linux/genetlink.h> 32 #include <linux/kernel.h> 33 #include <linux/kthread.h> 34 #include <linux/mutex.h> 35 #include <linux/percpu.h> 36 #include <linux/rcupdate.h> 37 #include <linux/tcp.h> 38 #include <linux/udp.h> 39 #include <linux/ethtool.h> 40 #include <linux/wait.h> 41 #include <asm/div64.h> 42 #include <linux/highmem.h> 43 #include <linux/netfilter_bridge.h> 44 #include <linux/netfilter_ipv4.h> 45 #include <linux/inetdevice.h> 46 #include <linux/list.h> 47 #include <linux/openvswitch.h> 48 #include <linux/rculist.h> 49 #include <linux/dmi.h> 50 #include <net/genetlink.h> 51 #include <net/net_namespace.h> 52 #include <net/netns/generic.h> 53 54 #include "datapath.h" 55 #include "flow.h" 56 #include "flow_table.h" 57 #include "flow_netlink.h" 58 #include "vport-internal_dev.h" 59 #include "vport-netdev.h" 60 61 int ovs_net_id __read_mostly; 62 EXPORT_SYMBOL_GPL(ovs_net_id); 63 64 static struct genl_family dp_packet_genl_family; 65 static struct genl_family dp_flow_genl_family; 66 static struct genl_family dp_datapath_genl_family; 67 68 static const struct genl_multicast_group ovs_dp_flow_multicast_group = { 69 .name = OVS_FLOW_MCGROUP, 70 }; 71 72 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { 73 .name = OVS_DATAPATH_MCGROUP, 74 }; 75 76 static const struct genl_multicast_group ovs_dp_vport_multicast_group = { 77 .name = OVS_VPORT_MCGROUP, 78 }; 79 80 /* Check if need to build a reply message. 81 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ 82 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, 83 unsigned int group) 84 { 85 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 86 genl_has_listeners(family, genl_info_net(info)->genl_sock, 87 group); 88 } 89 90 static void ovs_notify(struct genl_family *family, 91 struct sk_buff *skb, struct genl_info *info) 92 { 93 genl_notify(family, skb, genl_info_net(info), info->snd_portid, 94 0, info->nlhdr, GFP_KERNEL); 95 } 96 97 /** 98 * DOC: Locking: 99 * 100 * All writes e.g. Writes to device state (add/remove datapath, port, set 101 * operations on vports, etc.), Writes to other state (flow table 102 * modifications, set miscellaneous datapath parameters, etc.) are protected 103 * by ovs_lock. 104 * 105 * Reads are protected by RCU. 106 * 107 * There are a few special cases (mostly stats) that have their own 108 * synchronization but they nest under all of above and don't interact with 109 * each other. 110 * 111 * The RTNL lock nests inside ovs_mutex. 112 */ 113 114 static DEFINE_MUTEX(ovs_mutex); 115 116 void ovs_lock(void) 117 { 118 mutex_lock(&ovs_mutex); 119 } 120 121 void ovs_unlock(void) 122 { 123 mutex_unlock(&ovs_mutex); 124 } 125 126 #ifdef CONFIG_LOCKDEP 127 int lockdep_ovsl_is_held(void) 128 { 129 if (debug_locks) 130 return lockdep_is_held(&ovs_mutex); 131 else 132 return 1; 133 } 134 EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held); 135 #endif 136 137 static struct vport *new_vport(const struct vport_parms *); 138 static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 139 const struct sw_flow_key *, 140 const struct dp_upcall_info *); 141 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 142 const struct sw_flow_key *, 143 const struct dp_upcall_info *); 144 145 /* Must be called with rcu_read_lock. */ 146 static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) 147 { 148 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); 149 150 if (dev) { 151 struct vport *vport = ovs_internal_dev_get_vport(dev); 152 if (vport) 153 return vport->dp; 154 } 155 156 return NULL; 157 } 158 159 /* The caller must hold either ovs_mutex or rcu_read_lock to keep the 160 * returned dp pointer valid. 161 */ 162 static inline struct datapath *get_dp(struct net *net, int dp_ifindex) 163 { 164 struct datapath *dp; 165 166 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); 167 rcu_read_lock(); 168 dp = get_dp_rcu(net, dp_ifindex); 169 rcu_read_unlock(); 170 171 return dp; 172 } 173 174 /* Must be called with rcu_read_lock or ovs_mutex. */ 175 const char *ovs_dp_name(const struct datapath *dp) 176 { 177 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 178 return vport->ops->get_name(vport); 179 } 180 181 static int get_dpifindex(struct datapath *dp) 182 { 183 struct vport *local; 184 int ifindex; 185 186 rcu_read_lock(); 187 188 local = ovs_vport_rcu(dp, OVSP_LOCAL); 189 if (local) 190 ifindex = netdev_vport_priv(local)->dev->ifindex; 191 else 192 ifindex = 0; 193 194 rcu_read_unlock(); 195 196 return ifindex; 197 } 198 199 static void destroy_dp_rcu(struct rcu_head *rcu) 200 { 201 struct datapath *dp = container_of(rcu, struct datapath, rcu); 202 203 ovs_flow_tbl_destroy(&dp->table); 204 free_percpu(dp->stats_percpu); 205 release_net(ovs_dp_get_net(dp)); 206 kfree(dp->ports); 207 kfree(dp); 208 } 209 210 static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 211 u16 port_no) 212 { 213 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 214 } 215 216 /* Called with ovs_mutex or RCU read lock. */ 217 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 218 { 219 struct vport *vport; 220 struct hlist_head *head; 221 222 head = vport_hash_bucket(dp, port_no); 223 hlist_for_each_entry_rcu(vport, head, dp_hash_node) { 224 if (vport->port_no == port_no) 225 return vport; 226 } 227 return NULL; 228 } 229 230 /* Called with ovs_mutex. */ 231 static struct vport *new_vport(const struct vport_parms *parms) 232 { 233 struct vport *vport; 234 235 vport = ovs_vport_add(parms); 236 if (!IS_ERR(vport)) { 237 struct datapath *dp = parms->dp; 238 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 239 240 hlist_add_head_rcu(&vport->dp_hash_node, head); 241 } 242 return vport; 243 } 244 245 void ovs_dp_detach_port(struct vport *p) 246 { 247 ASSERT_OVSL(); 248 249 /* First drop references to device. */ 250 hlist_del_rcu(&p->dp_hash_node); 251 252 /* Then destroy it. */ 253 ovs_vport_del(p); 254 } 255 256 /* Must be called with rcu_read_lock. */ 257 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 258 { 259 const struct vport *p = OVS_CB(skb)->input_vport; 260 struct datapath *dp = p->dp; 261 struct sw_flow *flow; 262 struct sw_flow_actions *sf_acts; 263 struct dp_stats_percpu *stats; 264 u64 *stats_counter; 265 u32 n_mask_hit; 266 267 stats = this_cpu_ptr(dp->stats_percpu); 268 269 /* Look up flow. */ 270 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); 271 if (unlikely(!flow)) { 272 struct dp_upcall_info upcall; 273 int error; 274 275 upcall.cmd = OVS_PACKET_CMD_MISS; 276 upcall.userdata = NULL; 277 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 278 upcall.egress_tun_info = NULL; 279 error = ovs_dp_upcall(dp, skb, key, &upcall); 280 if (unlikely(error)) 281 kfree_skb(skb); 282 else 283 consume_skb(skb); 284 stats_counter = &stats->n_missed; 285 goto out; 286 } 287 288 ovs_flow_stats_update(flow, key->tp.flags, skb); 289 sf_acts = rcu_dereference(flow->sf_acts); 290 ovs_execute_actions(dp, skb, sf_acts, key); 291 292 stats_counter = &stats->n_hit; 293 294 out: 295 /* Update datapath statistics. */ 296 u64_stats_update_begin(&stats->syncp); 297 (*stats_counter)++; 298 stats->n_mask_hit += n_mask_hit; 299 u64_stats_update_end(&stats->syncp); 300 } 301 302 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 303 const struct sw_flow_key *key, 304 const struct dp_upcall_info *upcall_info) 305 { 306 struct dp_stats_percpu *stats; 307 int err; 308 309 if (upcall_info->portid == 0) { 310 err = -ENOTCONN; 311 goto err; 312 } 313 314 if (!skb_is_gso(skb)) 315 err = queue_userspace_packet(dp, skb, key, upcall_info); 316 else 317 err = queue_gso_packets(dp, skb, key, upcall_info); 318 if (err) 319 goto err; 320 321 return 0; 322 323 err: 324 stats = this_cpu_ptr(dp->stats_percpu); 325 326 u64_stats_update_begin(&stats->syncp); 327 stats->n_lost++; 328 u64_stats_update_end(&stats->syncp); 329 330 return err; 331 } 332 333 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 334 const struct sw_flow_key *key, 335 const struct dp_upcall_info *upcall_info) 336 { 337 unsigned short gso_type = skb_shinfo(skb)->gso_type; 338 struct sw_flow_key later_key; 339 struct sk_buff *segs, *nskb; 340 struct ovs_skb_cb ovs_cb; 341 int err; 342 343 ovs_cb = *OVS_CB(skb); 344 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 345 *OVS_CB(skb) = ovs_cb; 346 if (IS_ERR(segs)) 347 return PTR_ERR(segs); 348 if (segs == NULL) 349 return -EINVAL; 350 351 if (gso_type & SKB_GSO_UDP) { 352 /* The initial flow key extracted by ovs_flow_key_extract() 353 * in this case is for a first fragment, so we need to 354 * properly mark later fragments. 355 */ 356 later_key = *key; 357 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 358 } 359 360 /* Queue all of the segments. */ 361 skb = segs; 362 do { 363 *OVS_CB(skb) = ovs_cb; 364 if (gso_type & SKB_GSO_UDP && skb != segs) 365 key = &later_key; 366 367 err = queue_userspace_packet(dp, skb, key, upcall_info); 368 if (err) 369 break; 370 371 } while ((skb = skb->next)); 372 373 /* Free all of the segments. */ 374 skb = segs; 375 do { 376 nskb = skb->next; 377 if (err) 378 kfree_skb(skb); 379 else 380 consume_skb(skb); 381 } while ((skb = nskb)); 382 return err; 383 } 384 385 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, 386 unsigned int hdrlen) 387 { 388 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 389 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 390 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */ 391 392 /* OVS_PACKET_ATTR_USERDATA */ 393 if (upcall_info->userdata) 394 size += NLA_ALIGN(upcall_info->userdata->nla_len); 395 396 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ 397 if (upcall_info->egress_tun_info) 398 size += nla_total_size(ovs_tun_key_attr_size()); 399 400 return size; 401 } 402 403 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 404 const struct sw_flow_key *key, 405 const struct dp_upcall_info *upcall_info) 406 { 407 struct ovs_header *upcall; 408 struct sk_buff *nskb = NULL; 409 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 410 struct nlattr *nla; 411 struct genl_info info = { 412 .dst_sk = ovs_dp_get_net(dp)->genl_sock, 413 .snd_portid = upcall_info->portid, 414 }; 415 size_t len; 416 unsigned int hlen; 417 int err, dp_ifindex; 418 419 dp_ifindex = get_dpifindex(dp); 420 if (!dp_ifindex) 421 return -ENODEV; 422 423 if (vlan_tx_tag_present(skb)) { 424 nskb = skb_clone(skb, GFP_ATOMIC); 425 if (!nskb) 426 return -ENOMEM; 427 428 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb)); 429 if (!nskb) 430 return -ENOMEM; 431 432 nskb->vlan_tci = 0; 433 skb = nskb; 434 } 435 436 if (nla_attr_size(skb->len) > USHRT_MAX) { 437 err = -EFBIG; 438 goto out; 439 } 440 441 /* Complete checksum if needed */ 442 if (skb->ip_summed == CHECKSUM_PARTIAL && 443 (err = skb_checksum_help(skb))) 444 goto out; 445 446 /* Older versions of OVS user space enforce alignment of the last 447 * Netlink attribute to NLA_ALIGNTO which would require extensive 448 * padding logic. Only perform zerocopy if padding is not required. 449 */ 450 if (dp->user_features & OVS_DP_F_UNALIGNED) 451 hlen = skb_zerocopy_headlen(skb); 452 else 453 hlen = skb->len; 454 455 len = upcall_msg_size(upcall_info, hlen); 456 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 457 if (!user_skb) { 458 err = -ENOMEM; 459 goto out; 460 } 461 462 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 463 0, upcall_info->cmd); 464 upcall->dp_ifindex = dp_ifindex; 465 466 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 467 err = ovs_nla_put_flow(key, key, user_skb); 468 BUG_ON(err); 469 nla_nest_end(user_skb, nla); 470 471 if (upcall_info->userdata) 472 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 473 nla_len(upcall_info->userdata), 474 nla_data(upcall_info->userdata)); 475 476 if (upcall_info->egress_tun_info) { 477 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); 478 err = ovs_nla_put_egress_tunnel_key(user_skb, 479 upcall_info->egress_tun_info); 480 BUG_ON(err); 481 nla_nest_end(user_skb, nla); 482 } 483 484 /* Only reserve room for attribute header, packet data is added 485 * in skb_zerocopy() */ 486 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 487 err = -ENOBUFS; 488 goto out; 489 } 490 nla->nla_len = nla_attr_size(skb->len); 491 492 err = skb_zerocopy(user_skb, skb, skb->len, hlen); 493 if (err) 494 goto out; 495 496 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 497 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 498 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len; 499 500 if (plen > 0) 501 memset(skb_put(user_skb, plen), 0, plen); 502 } 503 504 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 505 506 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 507 user_skb = NULL; 508 out: 509 if (err) 510 skb_tx_error(skb); 511 kfree_skb(user_skb); 512 kfree_skb(nskb); 513 return err; 514 } 515 516 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 517 { 518 struct ovs_header *ovs_header = info->userhdr; 519 struct nlattr **a = info->attrs; 520 struct sw_flow_actions *acts; 521 struct sk_buff *packet; 522 struct sw_flow *flow; 523 struct sw_flow_actions *sf_acts; 524 struct datapath *dp; 525 struct ethhdr *eth; 526 struct vport *input_vport; 527 int len; 528 int err; 529 530 err = -EINVAL; 531 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 532 !a[OVS_PACKET_ATTR_ACTIONS]) 533 goto err; 534 535 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 536 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 537 err = -ENOMEM; 538 if (!packet) 539 goto err; 540 skb_reserve(packet, NET_IP_ALIGN); 541 542 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 543 544 skb_reset_mac_header(packet); 545 eth = eth_hdr(packet); 546 547 /* Normally, setting the skb 'protocol' field would be handled by a 548 * call to eth_type_trans(), but it assumes there's a sending 549 * device, which we may not have. */ 550 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) 551 packet->protocol = eth->h_proto; 552 else 553 packet->protocol = htons(ETH_P_802_2); 554 555 /* Build an sw_flow for sending this packet. */ 556 flow = ovs_flow_alloc(); 557 err = PTR_ERR(flow); 558 if (IS_ERR(flow)) 559 goto err_kfree_skb; 560 561 err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, 562 &flow->key); 563 if (err) 564 goto err_flow_free; 565 566 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 567 &flow->key, &acts); 568 if (err) 569 goto err_flow_free; 570 571 rcu_assign_pointer(flow->sf_acts, acts); 572 OVS_CB(packet)->egress_tun_info = NULL; 573 packet->priority = flow->key.phy.priority; 574 packet->mark = flow->key.phy.skb_mark; 575 576 rcu_read_lock(); 577 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 578 err = -ENODEV; 579 if (!dp) 580 goto err_unlock; 581 582 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port); 583 if (!input_vport) 584 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL); 585 586 if (!input_vport) 587 goto err_unlock; 588 589 OVS_CB(packet)->input_vport = input_vport; 590 sf_acts = rcu_dereference(flow->sf_acts); 591 592 local_bh_disable(); 593 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); 594 local_bh_enable(); 595 rcu_read_unlock(); 596 597 ovs_flow_free(flow, false); 598 return err; 599 600 err_unlock: 601 rcu_read_unlock(); 602 err_flow_free: 603 ovs_flow_free(flow, false); 604 err_kfree_skb: 605 kfree_skb(packet); 606 err: 607 return err; 608 } 609 610 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 611 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, 612 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 613 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 614 }; 615 616 static const struct genl_ops dp_packet_genl_ops[] = { 617 { .cmd = OVS_PACKET_CMD_EXECUTE, 618 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 619 .policy = packet_policy, 620 .doit = ovs_packet_cmd_execute 621 } 622 }; 623 624 static struct genl_family dp_packet_genl_family = { 625 .id = GENL_ID_GENERATE, 626 .hdrsize = sizeof(struct ovs_header), 627 .name = OVS_PACKET_FAMILY, 628 .version = OVS_PACKET_VERSION, 629 .maxattr = OVS_PACKET_ATTR_MAX, 630 .netnsok = true, 631 .parallel_ops = true, 632 .ops = dp_packet_genl_ops, 633 .n_ops = ARRAY_SIZE(dp_packet_genl_ops), 634 }; 635 636 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, 637 struct ovs_dp_megaflow_stats *mega_stats) 638 { 639 int i; 640 641 memset(mega_stats, 0, sizeof(*mega_stats)); 642 643 stats->n_flows = ovs_flow_tbl_count(&dp->table); 644 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 645 646 stats->n_hit = stats->n_missed = stats->n_lost = 0; 647 648 for_each_possible_cpu(i) { 649 const struct dp_stats_percpu *percpu_stats; 650 struct dp_stats_percpu local_stats; 651 unsigned int start; 652 653 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 654 655 do { 656 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); 657 local_stats = *percpu_stats; 658 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); 659 660 stats->n_hit += local_stats.n_hit; 661 stats->n_missed += local_stats.n_missed; 662 stats->n_lost += local_stats.n_lost; 663 mega_stats->n_mask_hit += local_stats.n_mask_hit; 664 } 665 } 666 667 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 668 { 669 return NLMSG_ALIGN(sizeof(struct ovs_header)) 670 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 671 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */ 672 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 673 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 674 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 675 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ 676 } 677 678 /* Called with ovs_mutex or RCU read lock. */ 679 static int ovs_flow_cmd_fill_match(const struct sw_flow *flow, 680 struct sk_buff *skb) 681 { 682 struct nlattr *nla; 683 int err; 684 685 /* Fill flow key. */ 686 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 687 if (!nla) 688 return -EMSGSIZE; 689 690 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); 691 if (err) 692 return err; 693 694 nla_nest_end(skb, nla); 695 696 /* Fill flow mask. */ 697 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); 698 if (!nla) 699 return -EMSGSIZE; 700 701 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); 702 if (err) 703 return err; 704 705 nla_nest_end(skb, nla); 706 return 0; 707 } 708 709 /* Called with ovs_mutex or RCU read lock. */ 710 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, 711 struct sk_buff *skb) 712 { 713 struct ovs_flow_stats stats; 714 __be16 tcp_flags; 715 unsigned long used; 716 717 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 718 719 if (used && 720 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) 721 return -EMSGSIZE; 722 723 if (stats.n_packets && 724 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) 725 return -EMSGSIZE; 726 727 if ((u8)ntohs(tcp_flags) && 728 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 729 return -EMSGSIZE; 730 731 return 0; 732 } 733 734 /* Called with ovs_mutex or RCU read lock. */ 735 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, 736 struct sk_buff *skb, int skb_orig_len) 737 { 738 struct nlattr *start; 739 int err; 740 741 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 742 * this is the first flow to be dumped into 'skb'. This is unusual for 743 * Netlink but individual action lists can be longer than 744 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 745 * The userspace caller can always fetch the actions separately if it 746 * really wants them. (Most userspace callers in fact don't care.) 747 * 748 * This can only fail for dump operations because the skb is always 749 * properly sized for single flows. 750 */ 751 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); 752 if (start) { 753 const struct sw_flow_actions *sf_acts; 754 755 sf_acts = rcu_dereference_ovsl(flow->sf_acts); 756 err = ovs_nla_put_actions(sf_acts->actions, 757 sf_acts->actions_len, skb); 758 759 if (!err) 760 nla_nest_end(skb, start); 761 else { 762 if (skb_orig_len) 763 return err; 764 765 nla_nest_cancel(skb, start); 766 } 767 } else if (skb_orig_len) { 768 return -EMSGSIZE; 769 } 770 771 return 0; 772 } 773 774 /* Called with ovs_mutex or RCU read lock. */ 775 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 776 struct sk_buff *skb, u32 portid, 777 u32 seq, u32 flags, u8 cmd) 778 { 779 const int skb_orig_len = skb->len; 780 struct ovs_header *ovs_header; 781 int err; 782 783 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, 784 flags, cmd); 785 if (!ovs_header) 786 return -EMSGSIZE; 787 788 ovs_header->dp_ifindex = dp_ifindex; 789 790 err = ovs_flow_cmd_fill_match(flow, skb); 791 if (err) 792 goto error; 793 794 err = ovs_flow_cmd_fill_stats(flow, skb); 795 if (err) 796 goto error; 797 798 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); 799 if (err) 800 goto error; 801 802 return genlmsg_end(skb, ovs_header); 803 804 error: 805 genlmsg_cancel(skb, ovs_header); 806 return err; 807 } 808 809 /* May not be called with RCU read lock. */ 810 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, 811 struct genl_info *info, 812 bool always) 813 { 814 struct sk_buff *skb; 815 816 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) 817 return NULL; 818 819 skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); 820 if (!skb) 821 return ERR_PTR(-ENOMEM); 822 823 return skb; 824 } 825 826 /* Called with ovs_mutex. */ 827 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, 828 int dp_ifindex, 829 struct genl_info *info, u8 cmd, 830 bool always) 831 { 832 struct sk_buff *skb; 833 int retval; 834 835 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, 836 always); 837 if (IS_ERR_OR_NULL(skb)) 838 return skb; 839 840 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, 841 info->snd_portid, info->snd_seq, 0, 842 cmd); 843 BUG_ON(retval < 0); 844 return skb; 845 } 846 847 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 848 { 849 struct nlattr **a = info->attrs; 850 struct ovs_header *ovs_header = info->userhdr; 851 struct sw_flow *flow, *new_flow; 852 struct sw_flow_mask mask; 853 struct sk_buff *reply; 854 struct datapath *dp; 855 struct sw_flow_actions *acts; 856 struct sw_flow_match match; 857 int error; 858 859 /* Must have key and actions. */ 860 error = -EINVAL; 861 if (!a[OVS_FLOW_ATTR_KEY]) { 862 OVS_NLERR("Flow key attribute not present in new flow.\n"); 863 goto error; 864 } 865 if (!a[OVS_FLOW_ATTR_ACTIONS]) { 866 OVS_NLERR("Flow actions attribute not present in new flow.\n"); 867 goto error; 868 } 869 870 /* Most of the time we need to allocate a new flow, do it before 871 * locking. 872 */ 873 new_flow = ovs_flow_alloc(); 874 if (IS_ERR(new_flow)) { 875 error = PTR_ERR(new_flow); 876 goto error; 877 } 878 879 /* Extract key. */ 880 ovs_match_init(&match, &new_flow->unmasked_key, &mask); 881 error = ovs_nla_get_match(&match, 882 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 883 if (error) 884 goto err_kfree_flow; 885 886 ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); 887 888 /* Validate actions. */ 889 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, 890 &acts); 891 if (error) { 892 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 893 goto err_kfree_flow; 894 } 895 896 reply = ovs_flow_cmd_alloc_info(acts, info, false); 897 if (IS_ERR(reply)) { 898 error = PTR_ERR(reply); 899 goto err_kfree_acts; 900 } 901 902 ovs_lock(); 903 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 904 if (unlikely(!dp)) { 905 error = -ENODEV; 906 goto err_unlock_ovs; 907 } 908 /* Check if this is a duplicate flow */ 909 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); 910 if (likely(!flow)) { 911 rcu_assign_pointer(new_flow->sf_acts, acts); 912 913 /* Put flow in bucket. */ 914 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); 915 if (unlikely(error)) { 916 acts = NULL; 917 goto err_unlock_ovs; 918 } 919 920 if (unlikely(reply)) { 921 error = ovs_flow_cmd_fill_info(new_flow, 922 ovs_header->dp_ifindex, 923 reply, info->snd_portid, 924 info->snd_seq, 0, 925 OVS_FLOW_CMD_NEW); 926 BUG_ON(error < 0); 927 } 928 ovs_unlock(); 929 } else { 930 struct sw_flow_actions *old_acts; 931 932 /* Bail out if we're not allowed to modify an existing flow. 933 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 934 * because Generic Netlink treats the latter as a dump 935 * request. We also accept NLM_F_EXCL in case that bug ever 936 * gets fixed. 937 */ 938 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE 939 | NLM_F_EXCL))) { 940 error = -EEXIST; 941 goto err_unlock_ovs; 942 } 943 /* The unmasked key has to be the same for flow updates. */ 944 if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { 945 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 946 if (!flow) { 947 error = -ENOENT; 948 goto err_unlock_ovs; 949 } 950 } 951 /* Update actions. */ 952 old_acts = ovsl_dereference(flow->sf_acts); 953 rcu_assign_pointer(flow->sf_acts, acts); 954 955 if (unlikely(reply)) { 956 error = ovs_flow_cmd_fill_info(flow, 957 ovs_header->dp_ifindex, 958 reply, info->snd_portid, 959 info->snd_seq, 0, 960 OVS_FLOW_CMD_NEW); 961 BUG_ON(error < 0); 962 } 963 ovs_unlock(); 964 965 ovs_nla_free_flow_actions(old_acts); 966 ovs_flow_free(new_flow, false); 967 } 968 969 if (reply) 970 ovs_notify(&dp_flow_genl_family, reply, info); 971 return 0; 972 973 err_unlock_ovs: 974 ovs_unlock(); 975 kfree_skb(reply); 976 err_kfree_acts: 977 kfree(acts); 978 err_kfree_flow: 979 ovs_flow_free(new_flow, false); 980 error: 981 return error; 982 } 983 984 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ 985 static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, 986 const struct sw_flow_key *key, 987 const struct sw_flow_mask *mask) 988 { 989 struct sw_flow_actions *acts; 990 struct sw_flow_key masked_key; 991 int error; 992 993 ovs_flow_mask_key(&masked_key, key, mask); 994 error = ovs_nla_copy_actions(a, &masked_key, &acts); 995 if (error) { 996 OVS_NLERR("Actions may not be safe on all matching packets.\n"); 997 return ERR_PTR(error); 998 } 999 1000 return acts; 1001 } 1002 1003 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 1004 { 1005 struct nlattr **a = info->attrs; 1006 struct ovs_header *ovs_header = info->userhdr; 1007 struct sw_flow_key key; 1008 struct sw_flow *flow; 1009 struct sw_flow_mask mask; 1010 struct sk_buff *reply = NULL; 1011 struct datapath *dp; 1012 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 1013 struct sw_flow_match match; 1014 int error; 1015 1016 /* Extract key. */ 1017 error = -EINVAL; 1018 if (!a[OVS_FLOW_ATTR_KEY]) { 1019 OVS_NLERR("Flow key attribute not present in set flow.\n"); 1020 goto error; 1021 } 1022 1023 ovs_match_init(&match, &key, &mask); 1024 error = ovs_nla_get_match(&match, 1025 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 1026 if (error) 1027 goto error; 1028 1029 /* Validate actions. */ 1030 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1031 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); 1032 if (IS_ERR(acts)) { 1033 error = PTR_ERR(acts); 1034 goto error; 1035 } 1036 1037 /* Can allocate before locking if have acts. */ 1038 reply = ovs_flow_cmd_alloc_info(acts, info, false); 1039 if (IS_ERR(reply)) { 1040 error = PTR_ERR(reply); 1041 goto err_kfree_acts; 1042 } 1043 } 1044 1045 ovs_lock(); 1046 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1047 if (unlikely(!dp)) { 1048 error = -ENODEV; 1049 goto err_unlock_ovs; 1050 } 1051 /* Check that the flow exists. */ 1052 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1053 if (unlikely(!flow)) { 1054 error = -ENOENT; 1055 goto err_unlock_ovs; 1056 } 1057 1058 /* Update actions, if present. */ 1059 if (likely(acts)) { 1060 old_acts = ovsl_dereference(flow->sf_acts); 1061 rcu_assign_pointer(flow->sf_acts, acts); 1062 1063 if (unlikely(reply)) { 1064 error = ovs_flow_cmd_fill_info(flow, 1065 ovs_header->dp_ifindex, 1066 reply, info->snd_portid, 1067 info->snd_seq, 0, 1068 OVS_FLOW_CMD_NEW); 1069 BUG_ON(error < 0); 1070 } 1071 } else { 1072 /* Could not alloc without acts before locking. */ 1073 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, 1074 info, OVS_FLOW_CMD_NEW, false); 1075 if (unlikely(IS_ERR(reply))) { 1076 error = PTR_ERR(reply); 1077 goto err_unlock_ovs; 1078 } 1079 } 1080 1081 /* Clear stats. */ 1082 if (a[OVS_FLOW_ATTR_CLEAR]) 1083 ovs_flow_stats_clear(flow); 1084 ovs_unlock(); 1085 1086 if (reply) 1087 ovs_notify(&dp_flow_genl_family, reply, info); 1088 if (old_acts) 1089 ovs_nla_free_flow_actions(old_acts); 1090 1091 return 0; 1092 1093 err_unlock_ovs: 1094 ovs_unlock(); 1095 kfree_skb(reply); 1096 err_kfree_acts: 1097 kfree(acts); 1098 error: 1099 return error; 1100 } 1101 1102 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1103 { 1104 struct nlattr **a = info->attrs; 1105 struct ovs_header *ovs_header = info->userhdr; 1106 struct sw_flow_key key; 1107 struct sk_buff *reply; 1108 struct sw_flow *flow; 1109 struct datapath *dp; 1110 struct sw_flow_match match; 1111 int err; 1112 1113 if (!a[OVS_FLOW_ATTR_KEY]) { 1114 OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); 1115 return -EINVAL; 1116 } 1117 1118 ovs_match_init(&match, &key, NULL); 1119 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1120 if (err) 1121 return err; 1122 1123 ovs_lock(); 1124 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1125 if (!dp) { 1126 err = -ENODEV; 1127 goto unlock; 1128 } 1129 1130 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1131 if (!flow) { 1132 err = -ENOENT; 1133 goto unlock; 1134 } 1135 1136 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, 1137 OVS_FLOW_CMD_NEW, true); 1138 if (IS_ERR(reply)) { 1139 err = PTR_ERR(reply); 1140 goto unlock; 1141 } 1142 1143 ovs_unlock(); 1144 return genlmsg_reply(reply, info); 1145 unlock: 1146 ovs_unlock(); 1147 return err; 1148 } 1149 1150 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1151 { 1152 struct nlattr **a = info->attrs; 1153 struct ovs_header *ovs_header = info->userhdr; 1154 struct sw_flow_key key; 1155 struct sk_buff *reply; 1156 struct sw_flow *flow; 1157 struct datapath *dp; 1158 struct sw_flow_match match; 1159 int err; 1160 1161 if (likely(a[OVS_FLOW_ATTR_KEY])) { 1162 ovs_match_init(&match, &key, NULL); 1163 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1164 if (unlikely(err)) 1165 return err; 1166 } 1167 1168 ovs_lock(); 1169 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1170 if (unlikely(!dp)) { 1171 err = -ENODEV; 1172 goto unlock; 1173 } 1174 1175 if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { 1176 err = ovs_flow_tbl_flush(&dp->table); 1177 goto unlock; 1178 } 1179 1180 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1181 if (unlikely(!flow)) { 1182 err = -ENOENT; 1183 goto unlock; 1184 } 1185 1186 ovs_flow_tbl_remove(&dp->table, flow); 1187 ovs_unlock(); 1188 1189 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, 1190 info, false); 1191 if (likely(reply)) { 1192 if (likely(!IS_ERR(reply))) { 1193 rcu_read_lock(); /*To keep RCU checker happy. */ 1194 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, 1195 reply, info->snd_portid, 1196 info->snd_seq, 0, 1197 OVS_FLOW_CMD_DEL); 1198 rcu_read_unlock(); 1199 BUG_ON(err < 0); 1200 1201 ovs_notify(&dp_flow_genl_family, reply, info); 1202 } else { 1203 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); 1204 } 1205 } 1206 1207 ovs_flow_free(flow, true); 1208 return 0; 1209 unlock: 1210 ovs_unlock(); 1211 return err; 1212 } 1213 1214 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1215 { 1216 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1217 struct table_instance *ti; 1218 struct datapath *dp; 1219 1220 rcu_read_lock(); 1221 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 1222 if (!dp) { 1223 rcu_read_unlock(); 1224 return -ENODEV; 1225 } 1226 1227 ti = rcu_dereference(dp->table.ti); 1228 for (;;) { 1229 struct sw_flow *flow; 1230 u32 bucket, obj; 1231 1232 bucket = cb->args[0]; 1233 obj = cb->args[1]; 1234 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1235 if (!flow) 1236 break; 1237 1238 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, 1239 NETLINK_CB(cb->skb).portid, 1240 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1241 OVS_FLOW_CMD_NEW) < 0) 1242 break; 1243 1244 cb->args[0] = bucket; 1245 cb->args[1] = obj; 1246 } 1247 rcu_read_unlock(); 1248 return skb->len; 1249 } 1250 1251 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1252 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1253 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1254 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1255 }; 1256 1257 static const struct genl_ops dp_flow_genl_ops[] = { 1258 { .cmd = OVS_FLOW_CMD_NEW, 1259 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1260 .policy = flow_policy, 1261 .doit = ovs_flow_cmd_new 1262 }, 1263 { .cmd = OVS_FLOW_CMD_DEL, 1264 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1265 .policy = flow_policy, 1266 .doit = ovs_flow_cmd_del 1267 }, 1268 { .cmd = OVS_FLOW_CMD_GET, 1269 .flags = 0, /* OK for unprivileged users. */ 1270 .policy = flow_policy, 1271 .doit = ovs_flow_cmd_get, 1272 .dumpit = ovs_flow_cmd_dump 1273 }, 1274 { .cmd = OVS_FLOW_CMD_SET, 1275 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1276 .policy = flow_policy, 1277 .doit = ovs_flow_cmd_set, 1278 }, 1279 }; 1280 1281 static struct genl_family dp_flow_genl_family = { 1282 .id = GENL_ID_GENERATE, 1283 .hdrsize = sizeof(struct ovs_header), 1284 .name = OVS_FLOW_FAMILY, 1285 .version = OVS_FLOW_VERSION, 1286 .maxattr = OVS_FLOW_ATTR_MAX, 1287 .netnsok = true, 1288 .parallel_ops = true, 1289 .ops = dp_flow_genl_ops, 1290 .n_ops = ARRAY_SIZE(dp_flow_genl_ops), 1291 .mcgrps = &ovs_dp_flow_multicast_group, 1292 .n_mcgrps = 1, 1293 }; 1294 1295 static size_t ovs_dp_cmd_msg_size(void) 1296 { 1297 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1298 1299 msgsize += nla_total_size(IFNAMSIZ); 1300 msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); 1301 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); 1302 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1303 1304 return msgsize; 1305 } 1306 1307 /* Called with ovs_mutex or RCU read lock. */ 1308 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1309 u32 portid, u32 seq, u32 flags, u8 cmd) 1310 { 1311 struct ovs_header *ovs_header; 1312 struct ovs_dp_stats dp_stats; 1313 struct ovs_dp_megaflow_stats dp_megaflow_stats; 1314 int err; 1315 1316 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1317 flags, cmd); 1318 if (!ovs_header) 1319 goto error; 1320 1321 ovs_header->dp_ifindex = get_dpifindex(dp); 1322 1323 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1324 if (err) 1325 goto nla_put_failure; 1326 1327 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1328 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1329 &dp_stats)) 1330 goto nla_put_failure; 1331 1332 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1333 sizeof(struct ovs_dp_megaflow_stats), 1334 &dp_megaflow_stats)) 1335 goto nla_put_failure; 1336 1337 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1338 goto nla_put_failure; 1339 1340 return genlmsg_end(skb, ovs_header); 1341 1342 nla_put_failure: 1343 genlmsg_cancel(skb, ovs_header); 1344 error: 1345 return -EMSGSIZE; 1346 } 1347 1348 static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) 1349 { 1350 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); 1351 } 1352 1353 /* Called with rcu_read_lock or ovs_mutex. */ 1354 static struct datapath *lookup_datapath(struct net *net, 1355 struct ovs_header *ovs_header, 1356 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1357 { 1358 struct datapath *dp; 1359 1360 if (!a[OVS_DP_ATTR_NAME]) 1361 dp = get_dp(net, ovs_header->dp_ifindex); 1362 else { 1363 struct vport *vport; 1364 1365 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1366 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1367 } 1368 return dp ? dp : ERR_PTR(-ENODEV); 1369 } 1370 1371 static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) 1372 { 1373 struct datapath *dp; 1374 1375 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1376 if (IS_ERR(dp)) 1377 return; 1378 1379 WARN(dp->user_features, "Dropping previously announced user features\n"); 1380 dp->user_features = 0; 1381 } 1382 1383 static void ovs_dp_change(struct datapath *dp, struct nlattr **a) 1384 { 1385 if (a[OVS_DP_ATTR_USER_FEATURES]) 1386 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1387 } 1388 1389 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1390 { 1391 struct nlattr **a = info->attrs; 1392 struct vport_parms parms; 1393 struct sk_buff *reply; 1394 struct datapath *dp; 1395 struct vport *vport; 1396 struct ovs_net *ovs_net; 1397 int err, i; 1398 1399 err = -EINVAL; 1400 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1401 goto err; 1402 1403 reply = ovs_dp_cmd_alloc_info(info); 1404 if (!reply) 1405 return -ENOMEM; 1406 1407 err = -ENOMEM; 1408 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1409 if (dp == NULL) 1410 goto err_free_reply; 1411 1412 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1413 1414 /* Allocate table. */ 1415 err = ovs_flow_tbl_init(&dp->table); 1416 if (err) 1417 goto err_free_dp; 1418 1419 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); 1420 if (!dp->stats_percpu) { 1421 err = -ENOMEM; 1422 goto err_destroy_table; 1423 } 1424 1425 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), 1426 GFP_KERNEL); 1427 if (!dp->ports) { 1428 err = -ENOMEM; 1429 goto err_destroy_percpu; 1430 } 1431 1432 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1433 INIT_HLIST_HEAD(&dp->ports[i]); 1434 1435 /* Set up our datapath device. */ 1436 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1437 parms.type = OVS_VPORT_TYPE_INTERNAL; 1438 parms.options = NULL; 1439 parms.dp = dp; 1440 parms.port_no = OVSP_LOCAL; 1441 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; 1442 1443 ovs_dp_change(dp, a); 1444 1445 /* So far only local changes have been made, now need the lock. */ 1446 ovs_lock(); 1447 1448 vport = new_vport(&parms); 1449 if (IS_ERR(vport)) { 1450 err = PTR_ERR(vport); 1451 if (err == -EBUSY) 1452 err = -EEXIST; 1453 1454 if (err == -EEXIST) { 1455 /* An outdated user space instance that does not understand 1456 * the concept of user_features has attempted to create a new 1457 * datapath and is likely to reuse it. Drop all user features. 1458 */ 1459 if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1460 ovs_dp_reset_user_features(skb, info); 1461 } 1462 1463 goto err_destroy_ports_array; 1464 } 1465 1466 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1467 info->snd_seq, 0, OVS_DP_CMD_NEW); 1468 BUG_ON(err < 0); 1469 1470 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1471 list_add_tail_rcu(&dp->list_node, &ovs_net->dps); 1472 1473 ovs_unlock(); 1474 1475 ovs_notify(&dp_datapath_genl_family, reply, info); 1476 return 0; 1477 1478 err_destroy_ports_array: 1479 ovs_unlock(); 1480 kfree(dp->ports); 1481 err_destroy_percpu: 1482 free_percpu(dp->stats_percpu); 1483 err_destroy_table: 1484 ovs_flow_tbl_destroy(&dp->table); 1485 err_free_dp: 1486 release_net(ovs_dp_get_net(dp)); 1487 kfree(dp); 1488 err_free_reply: 1489 kfree_skb(reply); 1490 err: 1491 return err; 1492 } 1493 1494 /* Called with ovs_mutex. */ 1495 static void __dp_destroy(struct datapath *dp) 1496 { 1497 int i; 1498 1499 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1500 struct vport *vport; 1501 struct hlist_node *n; 1502 1503 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) 1504 if (vport->port_no != OVSP_LOCAL) 1505 ovs_dp_detach_port(vport); 1506 } 1507 1508 list_del_rcu(&dp->list_node); 1509 1510 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1511 * all ports in datapath are destroyed first before freeing datapath. 1512 */ 1513 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1514 1515 /* RCU destroy the flow table */ 1516 call_rcu(&dp->rcu, destroy_dp_rcu); 1517 } 1518 1519 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1520 { 1521 struct sk_buff *reply; 1522 struct datapath *dp; 1523 int err; 1524 1525 reply = ovs_dp_cmd_alloc_info(info); 1526 if (!reply) 1527 return -ENOMEM; 1528 1529 ovs_lock(); 1530 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1531 err = PTR_ERR(dp); 1532 if (IS_ERR(dp)) 1533 goto err_unlock_free; 1534 1535 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1536 info->snd_seq, 0, OVS_DP_CMD_DEL); 1537 BUG_ON(err < 0); 1538 1539 __dp_destroy(dp); 1540 ovs_unlock(); 1541 1542 ovs_notify(&dp_datapath_genl_family, reply, info); 1543 1544 return 0; 1545 1546 err_unlock_free: 1547 ovs_unlock(); 1548 kfree_skb(reply); 1549 return err; 1550 } 1551 1552 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1553 { 1554 struct sk_buff *reply; 1555 struct datapath *dp; 1556 int err; 1557 1558 reply = ovs_dp_cmd_alloc_info(info); 1559 if (!reply) 1560 return -ENOMEM; 1561 1562 ovs_lock(); 1563 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1564 err = PTR_ERR(dp); 1565 if (IS_ERR(dp)) 1566 goto err_unlock_free; 1567 1568 ovs_dp_change(dp, info->attrs); 1569 1570 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1571 info->snd_seq, 0, OVS_DP_CMD_NEW); 1572 BUG_ON(err < 0); 1573 1574 ovs_unlock(); 1575 ovs_notify(&dp_datapath_genl_family, reply, info); 1576 1577 return 0; 1578 1579 err_unlock_free: 1580 ovs_unlock(); 1581 kfree_skb(reply); 1582 return err; 1583 } 1584 1585 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1586 { 1587 struct sk_buff *reply; 1588 struct datapath *dp; 1589 int err; 1590 1591 reply = ovs_dp_cmd_alloc_info(info); 1592 if (!reply) 1593 return -ENOMEM; 1594 1595 rcu_read_lock(); 1596 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1597 if (IS_ERR(dp)) { 1598 err = PTR_ERR(dp); 1599 goto err_unlock_free; 1600 } 1601 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1602 info->snd_seq, 0, OVS_DP_CMD_NEW); 1603 BUG_ON(err < 0); 1604 rcu_read_unlock(); 1605 1606 return genlmsg_reply(reply, info); 1607 1608 err_unlock_free: 1609 rcu_read_unlock(); 1610 kfree_skb(reply); 1611 return err; 1612 } 1613 1614 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1615 { 1616 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1617 struct datapath *dp; 1618 int skip = cb->args[0]; 1619 int i = 0; 1620 1621 rcu_read_lock(); 1622 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { 1623 if (i >= skip && 1624 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1625 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1626 OVS_DP_CMD_NEW) < 0) 1627 break; 1628 i++; 1629 } 1630 rcu_read_unlock(); 1631 1632 cb->args[0] = i; 1633 1634 return skb->len; 1635 } 1636 1637 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 1638 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1639 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1640 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 1641 }; 1642 1643 static const struct genl_ops dp_datapath_genl_ops[] = { 1644 { .cmd = OVS_DP_CMD_NEW, 1645 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1646 .policy = datapath_policy, 1647 .doit = ovs_dp_cmd_new 1648 }, 1649 { .cmd = OVS_DP_CMD_DEL, 1650 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1651 .policy = datapath_policy, 1652 .doit = ovs_dp_cmd_del 1653 }, 1654 { .cmd = OVS_DP_CMD_GET, 1655 .flags = 0, /* OK for unprivileged users. */ 1656 .policy = datapath_policy, 1657 .doit = ovs_dp_cmd_get, 1658 .dumpit = ovs_dp_cmd_dump 1659 }, 1660 { .cmd = OVS_DP_CMD_SET, 1661 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1662 .policy = datapath_policy, 1663 .doit = ovs_dp_cmd_set, 1664 }, 1665 }; 1666 1667 static struct genl_family dp_datapath_genl_family = { 1668 .id = GENL_ID_GENERATE, 1669 .hdrsize = sizeof(struct ovs_header), 1670 .name = OVS_DATAPATH_FAMILY, 1671 .version = OVS_DATAPATH_VERSION, 1672 .maxattr = OVS_DP_ATTR_MAX, 1673 .netnsok = true, 1674 .parallel_ops = true, 1675 .ops = dp_datapath_genl_ops, 1676 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), 1677 .mcgrps = &ovs_dp_datapath_multicast_group, 1678 .n_mcgrps = 1, 1679 }; 1680 1681 /* Called with ovs_mutex or RCU read lock. */ 1682 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1683 u32 portid, u32 seq, u32 flags, u8 cmd) 1684 { 1685 struct ovs_header *ovs_header; 1686 struct ovs_vport_stats vport_stats; 1687 int err; 1688 1689 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 1690 flags, cmd); 1691 if (!ovs_header) 1692 return -EMSGSIZE; 1693 1694 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 1695 1696 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 1697 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 1698 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 1699 vport->ops->get_name(vport))) 1700 goto nla_put_failure; 1701 1702 ovs_vport_get_stats(vport, &vport_stats); 1703 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), 1704 &vport_stats)) 1705 goto nla_put_failure; 1706 1707 if (ovs_vport_get_upcall_portids(vport, skb)) 1708 goto nla_put_failure; 1709 1710 err = ovs_vport_get_options(vport, skb); 1711 if (err == -EMSGSIZE) 1712 goto error; 1713 1714 return genlmsg_end(skb, ovs_header); 1715 1716 nla_put_failure: 1717 err = -EMSGSIZE; 1718 error: 1719 genlmsg_cancel(skb, ovs_header); 1720 return err; 1721 } 1722 1723 static struct sk_buff *ovs_vport_cmd_alloc_info(void) 1724 { 1725 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1726 } 1727 1728 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 1729 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1730 u32 seq, u8 cmd) 1731 { 1732 struct sk_buff *skb; 1733 int retval; 1734 1735 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1736 if (!skb) 1737 return ERR_PTR(-ENOMEM); 1738 1739 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); 1740 BUG_ON(retval < 0); 1741 1742 return skb; 1743 } 1744 1745 /* Called with ovs_mutex or RCU read lock. */ 1746 static struct vport *lookup_vport(struct net *net, 1747 struct ovs_header *ovs_header, 1748 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1749 { 1750 struct datapath *dp; 1751 struct vport *vport; 1752 1753 if (a[OVS_VPORT_ATTR_NAME]) { 1754 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 1755 if (!vport) 1756 return ERR_PTR(-ENODEV); 1757 if (ovs_header->dp_ifindex && 1758 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 1759 return ERR_PTR(-ENODEV); 1760 return vport; 1761 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 1762 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 1763 1764 if (port_no >= DP_MAX_PORTS) 1765 return ERR_PTR(-EFBIG); 1766 1767 dp = get_dp(net, ovs_header->dp_ifindex); 1768 if (!dp) 1769 return ERR_PTR(-ENODEV); 1770 1771 vport = ovs_vport_ovsl_rcu(dp, port_no); 1772 if (!vport) 1773 return ERR_PTR(-ENODEV); 1774 return vport; 1775 } else 1776 return ERR_PTR(-EINVAL); 1777 } 1778 1779 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 1780 { 1781 struct nlattr **a = info->attrs; 1782 struct ovs_header *ovs_header = info->userhdr; 1783 struct vport_parms parms; 1784 struct sk_buff *reply; 1785 struct vport *vport; 1786 struct datapath *dp; 1787 u32 port_no; 1788 int err; 1789 1790 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 1791 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1792 return -EINVAL; 1793 1794 port_no = a[OVS_VPORT_ATTR_PORT_NO] 1795 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 1796 if (port_no >= DP_MAX_PORTS) 1797 return -EFBIG; 1798 1799 reply = ovs_vport_cmd_alloc_info(); 1800 if (!reply) 1801 return -ENOMEM; 1802 1803 ovs_lock(); 1804 restart: 1805 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1806 err = -ENODEV; 1807 if (!dp) 1808 goto exit_unlock_free; 1809 1810 if (port_no) { 1811 vport = ovs_vport_ovsl(dp, port_no); 1812 err = -EBUSY; 1813 if (vport) 1814 goto exit_unlock_free; 1815 } else { 1816 for (port_no = 1; ; port_no++) { 1817 if (port_no >= DP_MAX_PORTS) { 1818 err = -EFBIG; 1819 goto exit_unlock_free; 1820 } 1821 vport = ovs_vport_ovsl(dp, port_no); 1822 if (!vport) 1823 break; 1824 } 1825 } 1826 1827 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 1828 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 1829 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 1830 parms.dp = dp; 1831 parms.port_no = port_no; 1832 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; 1833 1834 vport = new_vport(&parms); 1835 err = PTR_ERR(vport); 1836 if (IS_ERR(vport)) { 1837 if (err == -EAGAIN) 1838 goto restart; 1839 goto exit_unlock_free; 1840 } 1841 1842 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1843 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1844 BUG_ON(err < 0); 1845 ovs_unlock(); 1846 1847 ovs_notify(&dp_vport_genl_family, reply, info); 1848 return 0; 1849 1850 exit_unlock_free: 1851 ovs_unlock(); 1852 kfree_skb(reply); 1853 return err; 1854 } 1855 1856 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 1857 { 1858 struct nlattr **a = info->attrs; 1859 struct sk_buff *reply; 1860 struct vport *vport; 1861 int err; 1862 1863 reply = ovs_vport_cmd_alloc_info(); 1864 if (!reply) 1865 return -ENOMEM; 1866 1867 ovs_lock(); 1868 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1869 err = PTR_ERR(vport); 1870 if (IS_ERR(vport)) 1871 goto exit_unlock_free; 1872 1873 if (a[OVS_VPORT_ATTR_TYPE] && 1874 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { 1875 err = -EINVAL; 1876 goto exit_unlock_free; 1877 } 1878 1879 if (a[OVS_VPORT_ATTR_OPTIONS]) { 1880 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 1881 if (err) 1882 goto exit_unlock_free; 1883 } 1884 1885 1886 if (a[OVS_VPORT_ATTR_UPCALL_PID]) { 1887 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; 1888 1889 err = ovs_vport_set_upcall_portids(vport, ids); 1890 if (err) 1891 goto exit_unlock_free; 1892 } 1893 1894 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1895 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1896 BUG_ON(err < 0); 1897 1898 ovs_unlock(); 1899 ovs_notify(&dp_vport_genl_family, reply, info); 1900 return 0; 1901 1902 exit_unlock_free: 1903 ovs_unlock(); 1904 kfree_skb(reply); 1905 return err; 1906 } 1907 1908 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 1909 { 1910 struct nlattr **a = info->attrs; 1911 struct sk_buff *reply; 1912 struct vport *vport; 1913 int err; 1914 1915 reply = ovs_vport_cmd_alloc_info(); 1916 if (!reply) 1917 return -ENOMEM; 1918 1919 ovs_lock(); 1920 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1921 err = PTR_ERR(vport); 1922 if (IS_ERR(vport)) 1923 goto exit_unlock_free; 1924 1925 if (vport->port_no == OVSP_LOCAL) { 1926 err = -EINVAL; 1927 goto exit_unlock_free; 1928 } 1929 1930 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1931 info->snd_seq, 0, OVS_VPORT_CMD_DEL); 1932 BUG_ON(err < 0); 1933 ovs_dp_detach_port(vport); 1934 ovs_unlock(); 1935 1936 ovs_notify(&dp_vport_genl_family, reply, info); 1937 return 0; 1938 1939 exit_unlock_free: 1940 ovs_unlock(); 1941 kfree_skb(reply); 1942 return err; 1943 } 1944 1945 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 1946 { 1947 struct nlattr **a = info->attrs; 1948 struct ovs_header *ovs_header = info->userhdr; 1949 struct sk_buff *reply; 1950 struct vport *vport; 1951 int err; 1952 1953 reply = ovs_vport_cmd_alloc_info(); 1954 if (!reply) 1955 return -ENOMEM; 1956 1957 rcu_read_lock(); 1958 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 1959 err = PTR_ERR(vport); 1960 if (IS_ERR(vport)) 1961 goto exit_unlock_free; 1962 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1963 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1964 BUG_ON(err < 0); 1965 rcu_read_unlock(); 1966 1967 return genlmsg_reply(reply, info); 1968 1969 exit_unlock_free: 1970 rcu_read_unlock(); 1971 kfree_skb(reply); 1972 return err; 1973 } 1974 1975 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1976 { 1977 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1978 struct datapath *dp; 1979 int bucket = cb->args[0], skip = cb->args[1]; 1980 int i, j = 0; 1981 1982 rcu_read_lock(); 1983 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); 1984 if (!dp) { 1985 rcu_read_unlock(); 1986 return -ENODEV; 1987 } 1988 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 1989 struct vport *vport; 1990 1991 j = 0; 1992 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 1993 if (j >= skip && 1994 ovs_vport_cmd_fill_info(vport, skb, 1995 NETLINK_CB(cb->skb).portid, 1996 cb->nlh->nlmsg_seq, 1997 NLM_F_MULTI, 1998 OVS_VPORT_CMD_NEW) < 0) 1999 goto out; 2000 2001 j++; 2002 } 2003 skip = 0; 2004 } 2005 out: 2006 rcu_read_unlock(); 2007 2008 cb->args[0] = i; 2009 cb->args[1] = j; 2010 2011 return skb->len; 2012 } 2013 2014 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 2015 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 2016 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 2017 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 2018 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 2019 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 2020 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 2021 }; 2022 2023 static const struct genl_ops dp_vport_genl_ops[] = { 2024 { .cmd = OVS_VPORT_CMD_NEW, 2025 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2026 .policy = vport_policy, 2027 .doit = ovs_vport_cmd_new 2028 }, 2029 { .cmd = OVS_VPORT_CMD_DEL, 2030 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2031 .policy = vport_policy, 2032 .doit = ovs_vport_cmd_del 2033 }, 2034 { .cmd = OVS_VPORT_CMD_GET, 2035 .flags = 0, /* OK for unprivileged users. */ 2036 .policy = vport_policy, 2037 .doit = ovs_vport_cmd_get, 2038 .dumpit = ovs_vport_cmd_dump 2039 }, 2040 { .cmd = OVS_VPORT_CMD_SET, 2041 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2042 .policy = vport_policy, 2043 .doit = ovs_vport_cmd_set, 2044 }, 2045 }; 2046 2047 struct genl_family dp_vport_genl_family = { 2048 .id = GENL_ID_GENERATE, 2049 .hdrsize = sizeof(struct ovs_header), 2050 .name = OVS_VPORT_FAMILY, 2051 .version = OVS_VPORT_VERSION, 2052 .maxattr = OVS_VPORT_ATTR_MAX, 2053 .netnsok = true, 2054 .parallel_ops = true, 2055 .ops = dp_vport_genl_ops, 2056 .n_ops = ARRAY_SIZE(dp_vport_genl_ops), 2057 .mcgrps = &ovs_dp_vport_multicast_group, 2058 .n_mcgrps = 1, 2059 }; 2060 2061 static struct genl_family * const dp_genl_families[] = { 2062 &dp_datapath_genl_family, 2063 &dp_vport_genl_family, 2064 &dp_flow_genl_family, 2065 &dp_packet_genl_family, 2066 }; 2067 2068 static void dp_unregister_genl(int n_families) 2069 { 2070 int i; 2071 2072 for (i = 0; i < n_families; i++) 2073 genl_unregister_family(dp_genl_families[i]); 2074 } 2075 2076 static int dp_register_genl(void) 2077 { 2078 int err; 2079 int i; 2080 2081 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 2082 2083 err = genl_register_family(dp_genl_families[i]); 2084 if (err) 2085 goto error; 2086 } 2087 2088 return 0; 2089 2090 error: 2091 dp_unregister_genl(i); 2092 return err; 2093 } 2094 2095 static int __net_init ovs_init_net(struct net *net) 2096 { 2097 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2098 2099 INIT_LIST_HEAD(&ovs_net->dps); 2100 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); 2101 return 0; 2102 } 2103 2104 static void __net_exit ovs_exit_net(struct net *net) 2105 { 2106 struct datapath *dp, *dp_next; 2107 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2108 2109 ovs_lock(); 2110 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2111 __dp_destroy(dp); 2112 ovs_unlock(); 2113 2114 cancel_work_sync(&ovs_net->dp_notify_work); 2115 } 2116 2117 static struct pernet_operations ovs_net_ops = { 2118 .init = ovs_init_net, 2119 .exit = ovs_exit_net, 2120 .id = &ovs_net_id, 2121 .size = sizeof(struct ovs_net), 2122 }; 2123 2124 static int __init dp_init(void) 2125 { 2126 int err; 2127 2128 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); 2129 2130 pr_info("Open vSwitch switching datapath\n"); 2131 2132 err = action_fifos_init(); 2133 if (err) 2134 goto error; 2135 2136 err = ovs_internal_dev_rtnl_link_register(); 2137 if (err) 2138 goto error_action_fifos_exit; 2139 2140 err = ovs_flow_init(); 2141 if (err) 2142 goto error_unreg_rtnl_link; 2143 2144 err = ovs_vport_init(); 2145 if (err) 2146 goto error_flow_exit; 2147 2148 err = register_pernet_device(&ovs_net_ops); 2149 if (err) 2150 goto error_vport_exit; 2151 2152 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2153 if (err) 2154 goto error_netns_exit; 2155 2156 err = ovs_netdev_init(); 2157 if (err) 2158 goto error_unreg_notifier; 2159 2160 err = dp_register_genl(); 2161 if (err < 0) 2162 goto error_unreg_netdev; 2163 2164 return 0; 2165 2166 error_unreg_netdev: 2167 ovs_netdev_exit(); 2168 error_unreg_notifier: 2169 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2170 error_netns_exit: 2171 unregister_pernet_device(&ovs_net_ops); 2172 error_vport_exit: 2173 ovs_vport_exit(); 2174 error_flow_exit: 2175 ovs_flow_exit(); 2176 error_unreg_rtnl_link: 2177 ovs_internal_dev_rtnl_link_unregister(); 2178 error_action_fifos_exit: 2179 action_fifos_exit(); 2180 error: 2181 return err; 2182 } 2183 2184 static void dp_cleanup(void) 2185 { 2186 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2187 ovs_netdev_exit(); 2188 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2189 unregister_pernet_device(&ovs_net_ops); 2190 rcu_barrier(); 2191 ovs_vport_exit(); 2192 ovs_flow_exit(); 2193 ovs_internal_dev_rtnl_link_unregister(); 2194 action_fifos_exit(); 2195 } 2196 2197 module_init(dp_init); 2198 module_exit(dp_cleanup); 2199 2200 MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2201 MODULE_LICENSE("GPL"); 2202