1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2017 Nicira, Inc. 4 */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/if.h> 9 #include <linux/skbuff.h> 10 #include <linux/ip.h> 11 #include <linux/kernel.h> 12 #include <linux/openvswitch.h> 13 #include <linux/netlink.h> 14 #include <linux/rculist.h> 15 16 #include <net/netlink.h> 17 #include <net/genetlink.h> 18 19 #include "datapath.h" 20 #include "meter.h" 21 22 #define METER_HASH_BUCKETS 1024 23 24 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { 25 [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, 26 [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, 27 [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, 28 [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, 29 [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, 30 [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, 31 [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, 32 [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, 33 }; 34 35 static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { 36 [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, 37 [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, 38 [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, 39 [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, 40 }; 41 42 static void ovs_meter_free(struct dp_meter *meter) 43 { 44 if (!meter) 45 return; 46 47 kfree_rcu(meter, rcu); 48 } 49 50 static struct hlist_head *meter_hash_bucket(const struct datapath *dp, 51 u32 meter_id) 52 { 53 return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)]; 54 } 55 56 /* Call with ovs_mutex or RCU read lock. */ 57 static struct dp_meter *lookup_meter(const struct datapath *dp, 58 u32 meter_id) 59 { 60 struct dp_meter *meter; 61 struct hlist_head *head; 62 63 head = meter_hash_bucket(dp, meter_id); 64 hlist_for_each_entry_rcu(meter, head, dp_hash_node) { 65 if (meter->id == meter_id) 66 return meter; 67 } 68 return NULL; 69 } 70 71 static void attach_meter(struct datapath *dp, struct dp_meter *meter) 72 { 73 struct hlist_head *head = meter_hash_bucket(dp, meter->id); 74 75 hlist_add_head_rcu(&meter->dp_hash_node, head); 76 } 77 78 static void detach_meter(struct dp_meter *meter) 79 { 80 ASSERT_OVSL(); 81 if (meter) 82 hlist_del_rcu(&meter->dp_hash_node); 83 } 84 85 static struct sk_buff * 86 ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, 87 struct ovs_header **ovs_reply_header) 88 { 89 struct sk_buff *skb; 90 struct ovs_header *ovs_header = info->userhdr; 91 92 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 93 if (!skb) 94 return ERR_PTR(-ENOMEM); 95 96 *ovs_reply_header = genlmsg_put(skb, info->snd_portid, 97 info->snd_seq, 98 &dp_meter_genl_family, 0, cmd); 99 if (!*ovs_reply_header) { 100 nlmsg_free(skb); 101 return ERR_PTR(-EMSGSIZE); 102 } 103 (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; 104 105 return skb; 106 } 107 108 static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, 109 struct dp_meter *meter) 110 { 111 struct nlattr *nla; 112 struct dp_meter_band *band; 113 u16 i; 114 115 if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) 116 goto error; 117 118 if (!meter) 119 return 0; 120 121 if (nla_put(reply, OVS_METER_ATTR_STATS, 122 sizeof(struct ovs_flow_stats), &meter->stats) || 123 nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, 124 OVS_METER_ATTR_PAD)) 125 goto error; 126 127 nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); 128 if (!nla) 129 goto error; 130 131 band = meter->bands; 132 133 for (i = 0; i < meter->n_bands; ++i, ++band) { 134 struct nlattr *band_nla; 135 136 band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); 137 if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, 138 sizeof(struct ovs_flow_stats), 139 &band->stats)) 140 goto error; 141 nla_nest_end(reply, band_nla); 142 } 143 nla_nest_end(reply, nla); 144 145 return 0; 146 error: 147 return -EMSGSIZE; 148 } 149 150 static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) 151 { 152 struct sk_buff *reply; 153 struct ovs_header *ovs_reply_header; 154 struct nlattr *nla, *band_nla; 155 int err; 156 157 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, 158 &ovs_reply_header); 159 if (IS_ERR(reply)) 160 return PTR_ERR(reply); 161 162 if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || 163 nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) 164 goto nla_put_failure; 165 166 nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); 167 if (!nla) 168 goto nla_put_failure; 169 170 band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); 171 if (!band_nla) 172 goto nla_put_failure; 173 /* Currently only DROP band type is supported. */ 174 if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP)) 175 goto nla_put_failure; 176 nla_nest_end(reply, band_nla); 177 nla_nest_end(reply, nla); 178 179 genlmsg_end(reply, ovs_reply_header); 180 return genlmsg_reply(reply, info); 181 182 nla_put_failure: 183 nlmsg_free(reply); 184 err = -EMSGSIZE; 185 return err; 186 } 187 188 static struct dp_meter *dp_meter_create(struct nlattr **a) 189 { 190 struct nlattr *nla; 191 int rem; 192 u16 n_bands = 0; 193 struct dp_meter *meter; 194 struct dp_meter_band *band; 195 int err; 196 197 /* Validate attributes, count the bands. */ 198 if (!a[OVS_METER_ATTR_BANDS]) 199 return ERR_PTR(-EINVAL); 200 201 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) 202 if (++n_bands > DP_MAX_BANDS) 203 return ERR_PTR(-EINVAL); 204 205 /* Allocate and set up the meter before locking anything. */ 206 meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL); 207 if (!meter) 208 return ERR_PTR(-ENOMEM); 209 210 meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]); 211 meter->used = div_u64(ktime_get_ns(), 1000 * 1000); 212 meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; 213 meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; 214 spin_lock_init(&meter->lock); 215 if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { 216 meter->stats = *(struct ovs_flow_stats *) 217 nla_data(a[OVS_METER_ATTR_STATS]); 218 } 219 meter->n_bands = n_bands; 220 221 /* Set up meter bands. */ 222 band = meter->bands; 223 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { 224 struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; 225 u32 band_max_delta_t; 226 227 err = nla_parse_deprecated((struct nlattr **)&attr, 228 OVS_BAND_ATTR_MAX, nla_data(nla), 229 nla_len(nla), band_policy, NULL); 230 if (err) 231 goto exit_free_meter; 232 233 if (!attr[OVS_BAND_ATTR_TYPE] || 234 !attr[OVS_BAND_ATTR_RATE] || 235 !attr[OVS_BAND_ATTR_BURST]) { 236 err = -EINVAL; 237 goto exit_free_meter; 238 } 239 240 band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); 241 band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); 242 if (band->rate == 0) { 243 err = -EINVAL; 244 goto exit_free_meter; 245 } 246 247 band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); 248 /* Figure out max delta_t that is enough to fill any bucket. 249 * Keep max_delta_t size to the bucket units: 250 * pkts => 1/1000 packets, kilobits => bits. 251 * 252 * Start with a full bucket. 253 */ 254 band->bucket = (band->burst_size + band->rate) * 1000; 255 band_max_delta_t = band->bucket / band->rate; 256 if (band_max_delta_t > meter->max_delta_t) 257 meter->max_delta_t = band_max_delta_t; 258 band++; 259 } 260 261 return meter; 262 263 exit_free_meter: 264 kfree(meter); 265 return ERR_PTR(err); 266 } 267 268 static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) 269 { 270 struct nlattr **a = info->attrs; 271 struct dp_meter *meter, *old_meter; 272 struct sk_buff *reply; 273 struct ovs_header *ovs_reply_header; 274 struct ovs_header *ovs_header = info->userhdr; 275 struct datapath *dp; 276 int err; 277 u32 meter_id; 278 bool failed; 279 280 if (!a[OVS_METER_ATTR_ID]) { 281 return -ENODEV; 282 } 283 284 meter = dp_meter_create(a); 285 if (IS_ERR_OR_NULL(meter)) 286 return PTR_ERR(meter); 287 288 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, 289 &ovs_reply_header); 290 if (IS_ERR(reply)) { 291 err = PTR_ERR(reply); 292 goto exit_free_meter; 293 } 294 295 ovs_lock(); 296 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 297 if (!dp) { 298 err = -ENODEV; 299 goto exit_unlock; 300 } 301 302 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 303 304 /* Cannot fail after this. */ 305 old_meter = lookup_meter(dp, meter_id); 306 detach_meter(old_meter); 307 attach_meter(dp, meter); 308 ovs_unlock(); 309 310 /* Build response with the meter_id and stats from 311 * the old meter, if any. 312 */ 313 failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id); 314 WARN_ON(failed); 315 if (old_meter) { 316 spin_lock_bh(&old_meter->lock); 317 if (old_meter->keep_stats) { 318 err = ovs_meter_cmd_reply_stats(reply, meter_id, 319 old_meter); 320 WARN_ON(err); 321 } 322 spin_unlock_bh(&old_meter->lock); 323 ovs_meter_free(old_meter); 324 } 325 326 genlmsg_end(reply, ovs_reply_header); 327 return genlmsg_reply(reply, info); 328 329 exit_unlock: 330 ovs_unlock(); 331 nlmsg_free(reply); 332 exit_free_meter: 333 kfree(meter); 334 return err; 335 } 336 337 static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) 338 { 339 struct nlattr **a = info->attrs; 340 u32 meter_id; 341 struct ovs_header *ovs_header = info->userhdr; 342 struct ovs_header *ovs_reply_header; 343 struct datapath *dp; 344 int err; 345 struct sk_buff *reply; 346 struct dp_meter *meter; 347 348 if (!a[OVS_METER_ATTR_ID]) 349 return -EINVAL; 350 351 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 352 353 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET, 354 &ovs_reply_header); 355 if (IS_ERR(reply)) 356 return PTR_ERR(reply); 357 358 ovs_lock(); 359 360 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 361 if (!dp) { 362 err = -ENODEV; 363 goto exit_unlock; 364 } 365 366 /* Locate meter, copy stats. */ 367 meter = lookup_meter(dp, meter_id); 368 if (!meter) { 369 err = -ENOENT; 370 goto exit_unlock; 371 } 372 373 spin_lock_bh(&meter->lock); 374 err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); 375 spin_unlock_bh(&meter->lock); 376 if (err) 377 goto exit_unlock; 378 379 ovs_unlock(); 380 381 genlmsg_end(reply, ovs_reply_header); 382 return genlmsg_reply(reply, info); 383 384 exit_unlock: 385 ovs_unlock(); 386 nlmsg_free(reply); 387 return err; 388 } 389 390 static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) 391 { 392 struct nlattr **a = info->attrs; 393 u32 meter_id; 394 struct ovs_header *ovs_header = info->userhdr; 395 struct ovs_header *ovs_reply_header; 396 struct datapath *dp; 397 int err; 398 struct sk_buff *reply; 399 struct dp_meter *old_meter; 400 401 if (!a[OVS_METER_ATTR_ID]) 402 return -EINVAL; 403 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 404 405 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, 406 &ovs_reply_header); 407 if (IS_ERR(reply)) 408 return PTR_ERR(reply); 409 410 ovs_lock(); 411 412 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 413 if (!dp) { 414 err = -ENODEV; 415 goto exit_unlock; 416 } 417 418 old_meter = lookup_meter(dp, meter_id); 419 if (old_meter) { 420 spin_lock_bh(&old_meter->lock); 421 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); 422 WARN_ON(err); 423 spin_unlock_bh(&old_meter->lock); 424 detach_meter(old_meter); 425 } 426 ovs_unlock(); 427 ovs_meter_free(old_meter); 428 genlmsg_end(reply, ovs_reply_header); 429 return genlmsg_reply(reply, info); 430 431 exit_unlock: 432 ovs_unlock(); 433 nlmsg_free(reply); 434 return err; 435 } 436 437 /* Meter action execution. 438 * 439 * Return true 'meter_id' drop band is triggered. The 'skb' should be 440 * dropped by the caller'. 441 */ 442 bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, 443 struct sw_flow_key *key, u32 meter_id) 444 { 445 struct dp_meter *meter; 446 struct dp_meter_band *band; 447 long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); 448 long long int long_delta_ms; 449 u32 delta_ms; 450 u32 cost; 451 int i, band_exceeded_max = -1; 452 u32 band_exceeded_rate = 0; 453 454 meter = lookup_meter(dp, meter_id); 455 /* Do not drop the packet when there is no meter. */ 456 if (!meter) 457 return false; 458 459 /* Lock the meter while using it. */ 460 spin_lock(&meter->lock); 461 462 long_delta_ms = (now_ms - meter->used); /* ms */ 463 464 /* Make sure delta_ms will not be too large, so that bucket will not 465 * wrap around below. 466 */ 467 delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) 468 ? meter->max_delta_t : (u32)long_delta_ms; 469 470 /* Update meter statistics. 471 */ 472 meter->used = now_ms; 473 meter->stats.n_packets += 1; 474 meter->stats.n_bytes += skb->len; 475 476 /* Bucket rate is either in kilobits per second, or in packets per 477 * second. We maintain the bucket in the units of either bits or 478 * 1/1000th of a packet, correspondingly. 479 * Then, when rate is multiplied with milliseconds, we get the 480 * bucket units: 481 * msec * kbps = bits, and 482 * msec * packets/sec = 1/1000 packets. 483 * 484 * 'cost' is the number of bucket units in this packet. 485 */ 486 cost = (meter->kbps) ? skb->len * 8 : 1000; 487 488 /* Update all bands and find the one hit with the highest rate. */ 489 for (i = 0; i < meter->n_bands; ++i) { 490 long long int max_bucket_size; 491 492 band = &meter->bands[i]; 493 max_bucket_size = (band->burst_size + band->rate) * 1000LL; 494 495 band->bucket += delta_ms * band->rate; 496 if (band->bucket > max_bucket_size) 497 band->bucket = max_bucket_size; 498 499 if (band->bucket >= cost) { 500 band->bucket -= cost; 501 } else if (band->rate > band_exceeded_rate) { 502 band_exceeded_rate = band->rate; 503 band_exceeded_max = i; 504 } 505 } 506 507 if (band_exceeded_max >= 0) { 508 /* Update band statistics. */ 509 band = &meter->bands[band_exceeded_max]; 510 band->stats.n_packets += 1; 511 band->stats.n_bytes += skb->len; 512 513 /* Drop band triggered, let the caller drop the 'skb'. */ 514 if (band->type == OVS_METER_BAND_TYPE_DROP) { 515 spin_unlock(&meter->lock); 516 return true; 517 } 518 } 519 520 spin_unlock(&meter->lock); 521 return false; 522 } 523 524 static struct genl_ops dp_meter_genl_ops[] = { 525 { .cmd = OVS_METER_CMD_FEATURES, 526 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 527 .flags = 0, /* OK for unprivileged users. */ 528 .doit = ovs_meter_cmd_features 529 }, 530 { .cmd = OVS_METER_CMD_SET, 531 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 532 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN 533 * privilege. 534 */ 535 .doit = ovs_meter_cmd_set, 536 }, 537 { .cmd = OVS_METER_CMD_GET, 538 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 539 .flags = 0, /* OK for unprivileged users. */ 540 .doit = ovs_meter_cmd_get, 541 }, 542 { .cmd = OVS_METER_CMD_DEL, 543 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 544 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN 545 * privilege. 546 */ 547 .doit = ovs_meter_cmd_del 548 }, 549 }; 550 551 static const struct genl_multicast_group ovs_meter_multicast_group = { 552 .name = OVS_METER_MCGROUP, 553 }; 554 555 struct genl_family dp_meter_genl_family __ro_after_init = { 556 .hdrsize = sizeof(struct ovs_header), 557 .name = OVS_METER_FAMILY, 558 .version = OVS_METER_VERSION, 559 .maxattr = OVS_METER_ATTR_MAX, 560 .policy = meter_policy, 561 .netnsok = true, 562 .parallel_ops = true, 563 .ops = dp_meter_genl_ops, 564 .n_ops = ARRAY_SIZE(dp_meter_genl_ops), 565 .mcgrps = &ovs_meter_multicast_group, 566 .n_mcgrps = 1, 567 .module = THIS_MODULE, 568 }; 569 570 int ovs_meters_init(struct datapath *dp) 571 { 572 int i; 573 574 dp->meters = kmalloc_array(METER_HASH_BUCKETS, 575 sizeof(struct hlist_head), GFP_KERNEL); 576 577 if (!dp->meters) 578 return -ENOMEM; 579 580 for (i = 0; i < METER_HASH_BUCKETS; i++) 581 INIT_HLIST_HEAD(&dp->meters[i]); 582 583 return 0; 584 } 585 586 void ovs_meters_exit(struct datapath *dp) 587 { 588 int i; 589 590 for (i = 0; i < METER_HASH_BUCKETS; i++) { 591 struct hlist_head *head = &dp->meters[i]; 592 struct dp_meter *meter; 593 struct hlist_node *n; 594 595 hlist_for_each_entry_safe(meter, n, head, dp_hash_node) 596 kfree(meter); 597 } 598 599 kfree(dp->meters); 600 } 601