1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2017 Nicira, Inc. 4 */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/if.h> 9 #include <linux/skbuff.h> 10 #include <linux/ip.h> 11 #include <linux/kernel.h> 12 #include <linux/openvswitch.h> 13 #include <linux/netlink.h> 14 #include <linux/rculist.h> 15 16 #include <net/netlink.h> 17 #include <net/genetlink.h> 18 19 #include "datapath.h" 20 #include "meter.h" 21 22 #define METER_HASH_BUCKETS 1024 23 24 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { 25 [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, 26 [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, 27 [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, 28 [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, 29 [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, 30 [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, 31 [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, 32 [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, 33 }; 34 35 static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { 36 [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, 37 [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, 38 [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, 39 [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, 40 }; 41 42 static void ovs_meter_free(struct dp_meter *meter) 43 { 44 if (!meter) 45 return; 46 47 kfree_rcu(meter, rcu); 48 } 49 50 static struct hlist_head *meter_hash_bucket(const struct datapath *dp, 51 u32 meter_id) 52 { 53 return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)]; 54 } 55 56 /* Call with ovs_mutex or RCU read lock. */ 57 static struct dp_meter *lookup_meter(const struct datapath *dp, 58 u32 meter_id) 59 { 60 struct dp_meter *meter; 61 struct hlist_head *head; 62 63 head = meter_hash_bucket(dp, meter_id); 64 hlist_for_each_entry_rcu(meter, head, dp_hash_node, 65 lockdep_ovsl_is_held()) { 66 if (meter->id == meter_id) 67 return meter; 68 } 69 return NULL; 70 } 71 72 static void attach_meter(struct datapath *dp, struct dp_meter *meter) 73 { 74 struct hlist_head *head = meter_hash_bucket(dp, meter->id); 75 76 hlist_add_head_rcu(&meter->dp_hash_node, head); 77 } 78 79 static void detach_meter(struct dp_meter *meter) 80 { 81 ASSERT_OVSL(); 82 if (meter) 83 hlist_del_rcu(&meter->dp_hash_node); 84 } 85 86 static struct sk_buff * 87 ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, 88 struct ovs_header **ovs_reply_header) 89 { 90 struct sk_buff *skb; 91 struct ovs_header *ovs_header = info->userhdr; 92 93 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 94 if (!skb) 95 return ERR_PTR(-ENOMEM); 96 97 *ovs_reply_header = genlmsg_put(skb, info->snd_portid, 98 info->snd_seq, 99 &dp_meter_genl_family, 0, cmd); 100 if (!*ovs_reply_header) { 101 nlmsg_free(skb); 102 return ERR_PTR(-EMSGSIZE); 103 } 104 (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; 105 106 return skb; 107 } 108 109 static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, 110 struct dp_meter *meter) 111 { 112 struct nlattr *nla; 113 struct dp_meter_band *band; 114 u16 i; 115 116 if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) 117 goto error; 118 119 if (!meter) 120 return 0; 121 122 if (nla_put(reply, OVS_METER_ATTR_STATS, 123 sizeof(struct ovs_flow_stats), &meter->stats) || 124 nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, 125 OVS_METER_ATTR_PAD)) 126 goto error; 127 128 nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); 129 if (!nla) 130 goto error; 131 132 band = meter->bands; 133 134 for (i = 0; i < meter->n_bands; ++i, ++band) { 135 struct nlattr *band_nla; 136 137 band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); 138 if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, 139 sizeof(struct ovs_flow_stats), 140 &band->stats)) 141 goto error; 142 nla_nest_end(reply, band_nla); 143 } 144 nla_nest_end(reply, nla); 145 146 return 0; 147 error: 148 return -EMSGSIZE; 149 } 150 151 static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) 152 { 153 struct sk_buff *reply; 154 struct ovs_header *ovs_reply_header; 155 struct nlattr *nla, *band_nla; 156 int err; 157 158 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, 159 &ovs_reply_header); 160 if (IS_ERR(reply)) 161 return PTR_ERR(reply); 162 163 if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || 164 nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) 165 goto nla_put_failure; 166 167 nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); 168 if (!nla) 169 goto nla_put_failure; 170 171 band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); 172 if (!band_nla) 173 goto nla_put_failure; 174 /* Currently only DROP band type is supported. */ 175 if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP)) 176 goto nla_put_failure; 177 nla_nest_end(reply, band_nla); 178 nla_nest_end(reply, nla); 179 180 genlmsg_end(reply, ovs_reply_header); 181 return genlmsg_reply(reply, info); 182 183 nla_put_failure: 184 nlmsg_free(reply); 185 err = -EMSGSIZE; 186 return err; 187 } 188 189 static struct dp_meter *dp_meter_create(struct nlattr **a) 190 { 191 struct nlattr *nla; 192 int rem; 193 u16 n_bands = 0; 194 struct dp_meter *meter; 195 struct dp_meter_band *band; 196 int err; 197 198 /* Validate attributes, count the bands. */ 199 if (!a[OVS_METER_ATTR_BANDS]) 200 return ERR_PTR(-EINVAL); 201 202 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) 203 if (++n_bands > DP_MAX_BANDS) 204 return ERR_PTR(-EINVAL); 205 206 /* Allocate and set up the meter before locking anything. */ 207 meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL); 208 if (!meter) 209 return ERR_PTR(-ENOMEM); 210 211 meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]); 212 meter->used = div_u64(ktime_get_ns(), 1000 * 1000); 213 meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; 214 meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; 215 spin_lock_init(&meter->lock); 216 if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { 217 meter->stats = *(struct ovs_flow_stats *) 218 nla_data(a[OVS_METER_ATTR_STATS]); 219 } 220 meter->n_bands = n_bands; 221 222 /* Set up meter bands. */ 223 band = meter->bands; 224 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { 225 struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; 226 u32 band_max_delta_t; 227 228 err = nla_parse_deprecated((struct nlattr **)&attr, 229 OVS_BAND_ATTR_MAX, nla_data(nla), 230 nla_len(nla), band_policy, NULL); 231 if (err) 232 goto exit_free_meter; 233 234 if (!attr[OVS_BAND_ATTR_TYPE] || 235 !attr[OVS_BAND_ATTR_RATE] || 236 !attr[OVS_BAND_ATTR_BURST]) { 237 err = -EINVAL; 238 goto exit_free_meter; 239 } 240 241 band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); 242 band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); 243 if (band->rate == 0) { 244 err = -EINVAL; 245 goto exit_free_meter; 246 } 247 248 band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); 249 /* Figure out max delta_t that is enough to fill any bucket. 250 * Keep max_delta_t size to the bucket units: 251 * pkts => 1/1000 packets, kilobits => bits. 252 * 253 * Start with a full bucket. 254 */ 255 band->bucket = (band->burst_size + band->rate) * 1000; 256 band_max_delta_t = band->bucket / band->rate; 257 if (band_max_delta_t > meter->max_delta_t) 258 meter->max_delta_t = band_max_delta_t; 259 band++; 260 } 261 262 return meter; 263 264 exit_free_meter: 265 kfree(meter); 266 return ERR_PTR(err); 267 } 268 269 static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) 270 { 271 struct nlattr **a = info->attrs; 272 struct dp_meter *meter, *old_meter; 273 struct sk_buff *reply; 274 struct ovs_header *ovs_reply_header; 275 struct ovs_header *ovs_header = info->userhdr; 276 struct datapath *dp; 277 int err; 278 u32 meter_id; 279 bool failed; 280 281 if (!a[OVS_METER_ATTR_ID]) { 282 return -ENODEV; 283 } 284 285 meter = dp_meter_create(a); 286 if (IS_ERR_OR_NULL(meter)) 287 return PTR_ERR(meter); 288 289 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, 290 &ovs_reply_header); 291 if (IS_ERR(reply)) { 292 err = PTR_ERR(reply); 293 goto exit_free_meter; 294 } 295 296 ovs_lock(); 297 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 298 if (!dp) { 299 err = -ENODEV; 300 goto exit_unlock; 301 } 302 303 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 304 305 /* Cannot fail after this. */ 306 old_meter = lookup_meter(dp, meter_id); 307 detach_meter(old_meter); 308 attach_meter(dp, meter); 309 ovs_unlock(); 310 311 /* Build response with the meter_id and stats from 312 * the old meter, if any. 313 */ 314 failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id); 315 WARN_ON(failed); 316 if (old_meter) { 317 spin_lock_bh(&old_meter->lock); 318 if (old_meter->keep_stats) { 319 err = ovs_meter_cmd_reply_stats(reply, meter_id, 320 old_meter); 321 WARN_ON(err); 322 } 323 spin_unlock_bh(&old_meter->lock); 324 ovs_meter_free(old_meter); 325 } 326 327 genlmsg_end(reply, ovs_reply_header); 328 return genlmsg_reply(reply, info); 329 330 exit_unlock: 331 ovs_unlock(); 332 nlmsg_free(reply); 333 exit_free_meter: 334 kfree(meter); 335 return err; 336 } 337 338 static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) 339 { 340 struct nlattr **a = info->attrs; 341 u32 meter_id; 342 struct ovs_header *ovs_header = info->userhdr; 343 struct ovs_header *ovs_reply_header; 344 struct datapath *dp; 345 int err; 346 struct sk_buff *reply; 347 struct dp_meter *meter; 348 349 if (!a[OVS_METER_ATTR_ID]) 350 return -EINVAL; 351 352 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 353 354 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET, 355 &ovs_reply_header); 356 if (IS_ERR(reply)) 357 return PTR_ERR(reply); 358 359 ovs_lock(); 360 361 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 362 if (!dp) { 363 err = -ENODEV; 364 goto exit_unlock; 365 } 366 367 /* Locate meter, copy stats. */ 368 meter = lookup_meter(dp, meter_id); 369 if (!meter) { 370 err = -ENOENT; 371 goto exit_unlock; 372 } 373 374 spin_lock_bh(&meter->lock); 375 err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); 376 spin_unlock_bh(&meter->lock); 377 if (err) 378 goto exit_unlock; 379 380 ovs_unlock(); 381 382 genlmsg_end(reply, ovs_reply_header); 383 return genlmsg_reply(reply, info); 384 385 exit_unlock: 386 ovs_unlock(); 387 nlmsg_free(reply); 388 return err; 389 } 390 391 static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) 392 { 393 struct nlattr **a = info->attrs; 394 u32 meter_id; 395 struct ovs_header *ovs_header = info->userhdr; 396 struct ovs_header *ovs_reply_header; 397 struct datapath *dp; 398 int err; 399 struct sk_buff *reply; 400 struct dp_meter *old_meter; 401 402 if (!a[OVS_METER_ATTR_ID]) 403 return -EINVAL; 404 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 405 406 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, 407 &ovs_reply_header); 408 if (IS_ERR(reply)) 409 return PTR_ERR(reply); 410 411 ovs_lock(); 412 413 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 414 if (!dp) { 415 err = -ENODEV; 416 goto exit_unlock; 417 } 418 419 old_meter = lookup_meter(dp, meter_id); 420 if (old_meter) { 421 spin_lock_bh(&old_meter->lock); 422 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); 423 WARN_ON(err); 424 spin_unlock_bh(&old_meter->lock); 425 detach_meter(old_meter); 426 } 427 ovs_unlock(); 428 ovs_meter_free(old_meter); 429 genlmsg_end(reply, ovs_reply_header); 430 return genlmsg_reply(reply, info); 431 432 exit_unlock: 433 ovs_unlock(); 434 nlmsg_free(reply); 435 return err; 436 } 437 438 /* Meter action execution. 439 * 440 * Return true 'meter_id' drop band is triggered. The 'skb' should be 441 * dropped by the caller'. 442 */ 443 bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, 444 struct sw_flow_key *key, u32 meter_id) 445 { 446 struct dp_meter *meter; 447 struct dp_meter_band *band; 448 long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); 449 long long int long_delta_ms; 450 u32 delta_ms; 451 u32 cost; 452 int i, band_exceeded_max = -1; 453 u32 band_exceeded_rate = 0; 454 455 meter = lookup_meter(dp, meter_id); 456 /* Do not drop the packet when there is no meter. */ 457 if (!meter) 458 return false; 459 460 /* Lock the meter while using it. */ 461 spin_lock(&meter->lock); 462 463 long_delta_ms = (now_ms - meter->used); /* ms */ 464 465 /* Make sure delta_ms will not be too large, so that bucket will not 466 * wrap around below. 467 */ 468 delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) 469 ? meter->max_delta_t : (u32)long_delta_ms; 470 471 /* Update meter statistics. 472 */ 473 meter->used = now_ms; 474 meter->stats.n_packets += 1; 475 meter->stats.n_bytes += skb->len; 476 477 /* Bucket rate is either in kilobits per second, or in packets per 478 * second. We maintain the bucket in the units of either bits or 479 * 1/1000th of a packet, correspondingly. 480 * Then, when rate is multiplied with milliseconds, we get the 481 * bucket units: 482 * msec * kbps = bits, and 483 * msec * packets/sec = 1/1000 packets. 484 * 485 * 'cost' is the number of bucket units in this packet. 486 */ 487 cost = (meter->kbps) ? skb->len * 8 : 1000; 488 489 /* Update all bands and find the one hit with the highest rate. */ 490 for (i = 0; i < meter->n_bands; ++i) { 491 long long int max_bucket_size; 492 493 band = &meter->bands[i]; 494 max_bucket_size = (band->burst_size + band->rate) * 1000LL; 495 496 band->bucket += delta_ms * band->rate; 497 if (band->bucket > max_bucket_size) 498 band->bucket = max_bucket_size; 499 500 if (band->bucket >= cost) { 501 band->bucket -= cost; 502 } else if (band->rate > band_exceeded_rate) { 503 band_exceeded_rate = band->rate; 504 band_exceeded_max = i; 505 } 506 } 507 508 if (band_exceeded_max >= 0) { 509 /* Update band statistics. */ 510 band = &meter->bands[band_exceeded_max]; 511 band->stats.n_packets += 1; 512 band->stats.n_bytes += skb->len; 513 514 /* Drop band triggered, let the caller drop the 'skb'. */ 515 if (band->type == OVS_METER_BAND_TYPE_DROP) { 516 spin_unlock(&meter->lock); 517 return true; 518 } 519 } 520 521 spin_unlock(&meter->lock); 522 return false; 523 } 524 525 static struct genl_ops dp_meter_genl_ops[] = { 526 { .cmd = OVS_METER_CMD_FEATURES, 527 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 528 .flags = 0, /* OK for unprivileged users. */ 529 .doit = ovs_meter_cmd_features 530 }, 531 { .cmd = OVS_METER_CMD_SET, 532 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 533 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN 534 * privilege. 535 */ 536 .doit = ovs_meter_cmd_set, 537 }, 538 { .cmd = OVS_METER_CMD_GET, 539 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 540 .flags = 0, /* OK for unprivileged users. */ 541 .doit = ovs_meter_cmd_get, 542 }, 543 { .cmd = OVS_METER_CMD_DEL, 544 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 545 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN 546 * privilege. 547 */ 548 .doit = ovs_meter_cmd_del 549 }, 550 }; 551 552 static const struct genl_multicast_group ovs_meter_multicast_group = { 553 .name = OVS_METER_MCGROUP, 554 }; 555 556 struct genl_family dp_meter_genl_family __ro_after_init = { 557 .hdrsize = sizeof(struct ovs_header), 558 .name = OVS_METER_FAMILY, 559 .version = OVS_METER_VERSION, 560 .maxattr = OVS_METER_ATTR_MAX, 561 .policy = meter_policy, 562 .netnsok = true, 563 .parallel_ops = true, 564 .ops = dp_meter_genl_ops, 565 .n_ops = ARRAY_SIZE(dp_meter_genl_ops), 566 .mcgrps = &ovs_meter_multicast_group, 567 .n_mcgrps = 1, 568 .module = THIS_MODULE, 569 }; 570 571 int ovs_meters_init(struct datapath *dp) 572 { 573 int i; 574 575 dp->meters = kmalloc_array(METER_HASH_BUCKETS, 576 sizeof(struct hlist_head), GFP_KERNEL); 577 578 if (!dp->meters) 579 return -ENOMEM; 580 581 for (i = 0; i < METER_HASH_BUCKETS; i++) 582 INIT_HLIST_HEAD(&dp->meters[i]); 583 584 return 0; 585 } 586 587 void ovs_meters_exit(struct datapath *dp) 588 { 589 int i; 590 591 for (i = 0; i < METER_HASH_BUCKETS; i++) { 592 struct hlist_head *head = &dp->meters[i]; 593 struct dp_meter *meter; 594 struct hlist_node *n; 595 596 hlist_for_each_entry_safe(meter, n, head, dp_hash_node) 597 kfree(meter); 598 } 599 600 kfree(dp->meters); 601 } 602