1 /* 2 * net/sched/police.c Input police filter. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * J Hadi Salim (action changes) 11 */ 12 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/string.h> 17 #include <linux/errno.h> 18 #include <linux/skbuff.h> 19 #include <linux/rtnetlink.h> 20 #include <linux/init.h> 21 #include <linux/slab.h> 22 #include <net/act_api.h> 23 #include <net/netlink.h> 24 25 struct tcf_police { 26 struct tcf_common common; 27 int tcfp_result; 28 u32 tcfp_ewma_rate; 29 s64 tcfp_burst; 30 u32 tcfp_mtu; 31 s64 tcfp_toks; 32 s64 tcfp_ptoks; 33 s64 tcfp_mtu_ptoks; 34 s64 tcfp_t_c; 35 struct psched_ratecfg rate; 36 bool rate_present; 37 struct psched_ratecfg peak; 38 bool peak_present; 39 }; 40 #define to_police(pc) \ 41 container_of(pc, struct tcf_police, common) 42 43 #define POL_TAB_MASK 15 44 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; 45 static u32 police_idx_gen; 46 static DEFINE_RWLOCK(police_lock); 47 48 static struct tcf_hashinfo police_hash_info = { 49 .htab = tcf_police_ht, 50 .hmask = POL_TAB_MASK, 51 .lock = &police_lock, 52 }; 53 54 /* old policer structure from before tc actions */ 55 struct tc_police_compat { 56 u32 index; 57 int action; 58 u32 limit; 59 u32 burst; 60 u32 mtu; 61 struct tc_ratespec rate; 62 struct tc_ratespec peakrate; 63 }; 64 65 /* Each policer is serialized by its individual spinlock */ 66 67 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 68 int type, struct tc_action *a) 69 { 70 struct tcf_common *p; 71 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 72 struct nlattr *nest; 73 74 read_lock_bh(&police_lock); 75 76 s_i = cb->args[0]; 77 78 for (i = 0; i < (POL_TAB_MASK + 1); i++) { 79 p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; 80 81 for (; p; p = p->tcfc_next) { 82 index++; 83 if (index < s_i) 84 continue; 85 a->priv = p; 86 a->order = index; 87 nest = nla_nest_start(skb, a->order); 88 if (nest == NULL) 89 goto nla_put_failure; 90 if (type == RTM_DELACTION) 91 err = tcf_action_dump_1(skb, a, 0, 1); 92 else 93 err = tcf_action_dump_1(skb, a, 0, 0); 94 if (err < 0) { 95 index--; 96 nla_nest_cancel(skb, nest); 97 goto done; 98 } 99 nla_nest_end(skb, nest); 100 n_i++; 101 } 102 } 103 done: 104 read_unlock_bh(&police_lock); 105 if (n_i) 106 cb->args[0] += n_i; 107 return n_i; 108 109 nla_put_failure: 110 nla_nest_cancel(skb, nest); 111 goto done; 112 } 113 114 static void tcf_police_destroy(struct tcf_police *p) 115 { 116 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 117 struct tcf_common **p1p; 118 119 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { 120 if (*p1p == &p->common) { 121 write_lock_bh(&police_lock); 122 *p1p = p->tcf_next; 123 write_unlock_bh(&police_lock); 124 gen_kill_estimator(&p->tcf_bstats, 125 &p->tcf_rate_est); 126 /* 127 * gen_estimator est_timer() might access p->tcf_lock 128 * or bstats, wait a RCU grace period before freeing p 129 */ 130 kfree_rcu(p, tcf_rcu); 131 return; 132 } 133 } 134 WARN_ON(1); 135 } 136 137 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { 138 [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE }, 139 [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, 140 [TCA_POLICE_AVRATE] = { .type = NLA_U32 }, 141 [TCA_POLICE_RESULT] = { .type = NLA_U32 }, 142 }; 143 144 static int tcf_act_police_locate(struct net *net, struct nlattr *nla, 145 struct nlattr *est, struct tc_action *a, 146 int ovr, int bind) 147 { 148 unsigned int h; 149 int ret = 0, err; 150 struct nlattr *tb[TCA_POLICE_MAX + 1]; 151 struct tc_police *parm; 152 struct tcf_police *police; 153 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 154 int size; 155 156 if (nla == NULL) 157 return -EINVAL; 158 159 err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy); 160 if (err < 0) 161 return err; 162 163 if (tb[TCA_POLICE_TBF] == NULL) 164 return -EINVAL; 165 size = nla_len(tb[TCA_POLICE_TBF]); 166 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) 167 return -EINVAL; 168 parm = nla_data(tb[TCA_POLICE_TBF]); 169 170 if (parm->index) { 171 struct tcf_common *pc; 172 173 pc = tcf_hash_lookup(parm->index, &police_hash_info); 174 if (pc != NULL) { 175 a->priv = pc; 176 police = to_police(pc); 177 if (bind) { 178 police->tcf_bindcnt += 1; 179 police->tcf_refcnt += 1; 180 } 181 if (ovr) 182 goto override; 183 return ret; 184 } 185 } 186 187 police = kzalloc(sizeof(*police), GFP_KERNEL); 188 if (police == NULL) 189 return -ENOMEM; 190 ret = ACT_P_CREATED; 191 police->tcf_refcnt = 1; 192 spin_lock_init(&police->tcf_lock); 193 if (bind) 194 police->tcf_bindcnt = 1; 195 override: 196 if (parm->rate.rate) { 197 err = -ENOMEM; 198 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); 199 if (R_tab == NULL) 200 goto failure; 201 202 if (parm->peakrate.rate) { 203 P_tab = qdisc_get_rtab(&parm->peakrate, 204 tb[TCA_POLICE_PEAKRATE]); 205 if (P_tab == NULL) 206 goto failure; 207 } 208 } 209 210 spin_lock_bh(&police->tcf_lock); 211 if (est) { 212 err = gen_replace_estimator(&police->tcf_bstats, 213 &police->tcf_rate_est, 214 &police->tcf_lock, est); 215 if (err) 216 goto failure_unlock; 217 } else if (tb[TCA_POLICE_AVRATE] && 218 (ret == ACT_P_CREATED || 219 !gen_estimator_active(&police->tcf_bstats, 220 &police->tcf_rate_est))) { 221 err = -EINVAL; 222 goto failure_unlock; 223 } 224 225 /* No failure allowed after this point */ 226 police->tcfp_mtu = parm->mtu; 227 if (police->tcfp_mtu == 0) { 228 police->tcfp_mtu = ~0; 229 if (R_tab) 230 police->tcfp_mtu = 255 << R_tab->rate.cell_log; 231 } 232 if (R_tab) { 233 police->rate_present = true; 234 psched_ratecfg_precompute(&police->rate, &R_tab->rate); 235 qdisc_put_rtab(R_tab); 236 } else { 237 police->rate_present = false; 238 } 239 if (P_tab) { 240 police->peak_present = true; 241 psched_ratecfg_precompute(&police->peak, &P_tab->rate); 242 qdisc_put_rtab(P_tab); 243 } else { 244 police->peak_present = false; 245 } 246 247 if (tb[TCA_POLICE_RESULT]) 248 police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]); 249 police->tcfp_burst = PSCHED_TICKS2NS(parm->burst); 250 police->tcfp_toks = police->tcfp_burst; 251 if (police->peak_present) { 252 police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak, 253 police->tcfp_mtu); 254 police->tcfp_ptoks = police->tcfp_mtu_ptoks; 255 } 256 police->tcf_action = parm->action; 257 258 if (tb[TCA_POLICE_AVRATE]) 259 police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); 260 261 spin_unlock_bh(&police->tcf_lock); 262 if (ret != ACT_P_CREATED) 263 return ret; 264 265 police->tcfp_t_c = ktime_to_ns(ktime_get()); 266 police->tcf_index = parm->index ? parm->index : 267 tcf_hash_new_index(&police_idx_gen, &police_hash_info); 268 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 269 write_lock_bh(&police_lock); 270 police->tcf_next = tcf_police_ht[h]; 271 tcf_police_ht[h] = &police->common; 272 write_unlock_bh(&police_lock); 273 274 a->priv = police; 275 return ret; 276 277 failure_unlock: 278 spin_unlock_bh(&police->tcf_lock); 279 failure: 280 if (P_tab) 281 qdisc_put_rtab(P_tab); 282 if (R_tab) 283 qdisc_put_rtab(R_tab); 284 if (ret == ACT_P_CREATED) 285 kfree(police); 286 return err; 287 } 288 289 static int tcf_act_police_cleanup(struct tc_action *a, int bind) 290 { 291 struct tcf_police *p = a->priv; 292 int ret = 0; 293 294 if (p != NULL) { 295 if (bind) 296 p->tcf_bindcnt--; 297 298 p->tcf_refcnt--; 299 if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { 300 tcf_police_destroy(p); 301 ret = 1; 302 } 303 } 304 return ret; 305 } 306 307 static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, 308 struct tcf_result *res) 309 { 310 struct tcf_police *police = a->priv; 311 s64 now; 312 s64 toks; 313 s64 ptoks = 0; 314 315 spin_lock(&police->tcf_lock); 316 317 bstats_update(&police->tcf_bstats, skb); 318 319 if (police->tcfp_ewma_rate && 320 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 321 police->tcf_qstats.overlimits++; 322 if (police->tcf_action == TC_ACT_SHOT) 323 police->tcf_qstats.drops++; 324 spin_unlock(&police->tcf_lock); 325 return police->tcf_action; 326 } 327 328 if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { 329 if (!police->rate_present) { 330 spin_unlock(&police->tcf_lock); 331 return police->tcfp_result; 332 } 333 334 now = ktime_to_ns(ktime_get()); 335 toks = min_t(s64, now - police->tcfp_t_c, 336 police->tcfp_burst); 337 if (police->peak_present) { 338 ptoks = toks + police->tcfp_ptoks; 339 if (ptoks > police->tcfp_mtu_ptoks) 340 ptoks = police->tcfp_mtu_ptoks; 341 ptoks -= (s64) psched_l2t_ns(&police->peak, 342 qdisc_pkt_len(skb)); 343 } 344 toks += police->tcfp_toks; 345 if (toks > police->tcfp_burst) 346 toks = police->tcfp_burst; 347 toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb)); 348 if ((toks|ptoks) >= 0) { 349 police->tcfp_t_c = now; 350 police->tcfp_toks = toks; 351 police->tcfp_ptoks = ptoks; 352 spin_unlock(&police->tcf_lock); 353 return police->tcfp_result; 354 } 355 } 356 357 police->tcf_qstats.overlimits++; 358 if (police->tcf_action == TC_ACT_SHOT) 359 police->tcf_qstats.drops++; 360 spin_unlock(&police->tcf_lock); 361 return police->tcf_action; 362 } 363 364 static int 365 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 366 { 367 unsigned char *b = skb_tail_pointer(skb); 368 struct tcf_police *police = a->priv; 369 struct tc_police opt = { 370 .index = police->tcf_index, 371 .action = police->tcf_action, 372 .mtu = police->tcfp_mtu, 373 .burst = PSCHED_NS2TICKS(police->tcfp_burst), 374 .refcnt = police->tcf_refcnt - ref, 375 .bindcnt = police->tcf_bindcnt - bind, 376 }; 377 378 if (police->rate_present) 379 psched_ratecfg_getrate(&opt.rate, &police->rate); 380 if (police->peak_present) 381 psched_ratecfg_getrate(&opt.peakrate, &police->peak); 382 if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) 383 goto nla_put_failure; 384 if (police->tcfp_result && 385 nla_put_u32(skb, TCA_POLICE_RESULT, police->tcfp_result)) 386 goto nla_put_failure; 387 if (police->tcfp_ewma_rate && 388 nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate)) 389 goto nla_put_failure; 390 return skb->len; 391 392 nla_put_failure: 393 nlmsg_trim(skb, b); 394 return -1; 395 } 396 397 MODULE_AUTHOR("Alexey Kuznetsov"); 398 MODULE_DESCRIPTION("Policing actions"); 399 MODULE_LICENSE("GPL"); 400 401 static struct tc_action_ops act_police_ops = { 402 .kind = "police", 403 .hinfo = &police_hash_info, 404 .type = TCA_ID_POLICE, 405 .capab = TCA_CAP_NONE, 406 .owner = THIS_MODULE, 407 .act = tcf_act_police, 408 .dump = tcf_act_police_dump, 409 .cleanup = tcf_act_police_cleanup, 410 .lookup = tcf_hash_search, 411 .init = tcf_act_police_locate, 412 .walk = tcf_act_police_walker 413 }; 414 415 static int __init 416 police_init_module(void) 417 { 418 return tcf_register_action(&act_police_ops); 419 } 420 421 static void __exit 422 police_cleanup_module(void) 423 { 424 tcf_unregister_action(&act_police_ops); 425 } 426 427 module_init(police_init_module); 428 module_exit(police_cleanup_module); 429