1 /* 2 * net/sched/police.c Input police filter. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * J Hadi Salim (action changes) 11 */ 12 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/string.h> 17 #include <linux/errno.h> 18 #include <linux/skbuff.h> 19 #include <linux/rtnetlink.h> 20 #include <linux/init.h> 21 #include <linux/slab.h> 22 #include <net/act_api.h> 23 #include <net/netlink.h> 24 25 struct tcf_police { 26 struct tcf_common common; 27 int tcfp_result; 28 u32 tcfp_ewma_rate; 29 s64 tcfp_burst; 30 u32 tcfp_mtu; 31 s64 tcfp_toks; 32 s64 tcfp_ptoks; 33 s64 tcfp_mtu_ptoks; 34 s64 tcfp_t_c; 35 struct psched_ratecfg rate; 36 bool rate_present; 37 struct psched_ratecfg peak; 38 bool peak_present; 39 }; 40 #define to_police(pc) \ 41 container_of(pc, struct tcf_police, common) 42 43 #define POL_TAB_MASK 15 44 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; 45 static u32 police_idx_gen; 46 static DEFINE_RWLOCK(police_lock); 47 48 static struct tcf_hashinfo police_hash_info = { 49 .htab = tcf_police_ht, 50 .hmask = POL_TAB_MASK, 51 .lock = &police_lock, 52 }; 53 54 /* old policer structure from before tc actions */ 55 struct tc_police_compat { 56 u32 index; 57 int action; 58 u32 limit; 59 u32 burst; 60 u32 mtu; 61 struct tc_ratespec rate; 62 struct tc_ratespec peakrate; 63 }; 64 65 /* Each policer is serialized by its individual spinlock */ 66 67 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 68 int type, struct tc_action *a) 69 { 70 struct tcf_common *p; 71 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 72 struct nlattr *nest; 73 74 read_lock_bh(&police_lock); 75 76 s_i = cb->args[0]; 77 78 for (i = 0; i < (POL_TAB_MASK + 1); i++) { 79 p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; 80 81 for (; p; p = p->tcfc_next) { 82 index++; 83 if (index < s_i) 84 continue; 85 a->priv = p; 86 a->order = index; 87 nest = nla_nest_start(skb, a->order); 88 if (nest == NULL) 89 goto nla_put_failure; 90 if (type == RTM_DELACTION) 91 err = tcf_action_dump_1(skb, a, 0, 1); 92 else 93 err = tcf_action_dump_1(skb, a, 0, 0); 94 if (err < 0) { 95 index--; 96 nla_nest_cancel(skb, nest); 97 goto done; 98 } 99 nla_nest_end(skb, nest); 100 n_i++; 101 } 102 } 103 done: 104 read_unlock_bh(&police_lock); 105 if (n_i) 106 cb->args[0] += n_i; 107 return n_i; 108 109 nla_put_failure: 110 nla_nest_cancel(skb, nest); 111 goto done; 112 } 113 114 static void tcf_police_destroy(struct tcf_police *p) 115 { 116 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 117 struct tcf_common **p1p; 118 119 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { 120 if (*p1p == &p->common) { 121 write_lock_bh(&police_lock); 122 *p1p = p->tcf_next; 123 write_unlock_bh(&police_lock); 124 gen_kill_estimator(&p->tcf_bstats, 125 &p->tcf_rate_est); 126 /* 127 * gen_estimator est_timer() might access p->tcf_lock 128 * or bstats, wait a RCU grace period before freeing p 129 */ 130 kfree_rcu(p, tcf_rcu); 131 return; 132 } 133 } 134 WARN_ON(1); 135 } 136 137 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { 138 [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE }, 139 [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, 140 [TCA_POLICE_AVRATE] = { .type = NLA_U32 }, 141 [TCA_POLICE_RESULT] = { .type = NLA_U32 }, 142 }; 143 144 static int tcf_act_police_locate(struct net *net, struct nlattr *nla, 145 struct nlattr *est, struct tc_action *a, 146 int ovr, int bind) 147 { 148 unsigned int h; 149 int ret = 0, err; 150 struct nlattr *tb[TCA_POLICE_MAX + 1]; 151 struct tc_police *parm; 152 struct tcf_police *police; 153 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 154 int size; 155 156 if (nla == NULL) 157 return -EINVAL; 158 159 err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy); 160 if (err < 0) 161 return err; 162 163 if (tb[TCA_POLICE_TBF] == NULL) 164 return -EINVAL; 165 size = nla_len(tb[TCA_POLICE_TBF]); 166 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) 167 return -EINVAL; 168 parm = nla_data(tb[TCA_POLICE_TBF]); 169 170 if (parm->index) { 171 struct tcf_common *pc; 172 173 pc = tcf_hash_lookup(parm->index, &police_hash_info); 174 if (pc != NULL) { 175 a->priv = pc; 176 police = to_police(pc); 177 if (bind) { 178 police->tcf_bindcnt += 1; 179 police->tcf_refcnt += 1; 180 return 0; 181 } 182 if (ovr) 183 goto override; 184 /* not replacing */ 185 return -EEXIST; 186 } 187 } 188 189 police = kzalloc(sizeof(*police), GFP_KERNEL); 190 if (police == NULL) 191 return -ENOMEM; 192 ret = ACT_P_CREATED; 193 police->tcf_refcnt = 1; 194 spin_lock_init(&police->tcf_lock); 195 if (bind) 196 police->tcf_bindcnt = 1; 197 override: 198 if (parm->rate.rate) { 199 err = -ENOMEM; 200 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); 201 if (R_tab == NULL) 202 goto failure; 203 204 if (parm->peakrate.rate) { 205 P_tab = qdisc_get_rtab(&parm->peakrate, 206 tb[TCA_POLICE_PEAKRATE]); 207 if (P_tab == NULL) 208 goto failure; 209 } 210 } 211 212 spin_lock_bh(&police->tcf_lock); 213 if (est) { 214 err = gen_replace_estimator(&police->tcf_bstats, 215 &police->tcf_rate_est, 216 &police->tcf_lock, est); 217 if (err) 218 goto failure_unlock; 219 } else if (tb[TCA_POLICE_AVRATE] && 220 (ret == ACT_P_CREATED || 221 !gen_estimator_active(&police->tcf_bstats, 222 &police->tcf_rate_est))) { 223 err = -EINVAL; 224 goto failure_unlock; 225 } 226 227 /* No failure allowed after this point */ 228 police->tcfp_mtu = parm->mtu; 229 if (police->tcfp_mtu == 0) { 230 police->tcfp_mtu = ~0; 231 if (R_tab) 232 police->tcfp_mtu = 255 << R_tab->rate.cell_log; 233 } 234 if (R_tab) { 235 police->rate_present = true; 236 psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0); 237 qdisc_put_rtab(R_tab); 238 } else { 239 police->rate_present = false; 240 } 241 if (P_tab) { 242 police->peak_present = true; 243 psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0); 244 qdisc_put_rtab(P_tab); 245 } else { 246 police->peak_present = false; 247 } 248 249 if (tb[TCA_POLICE_RESULT]) 250 police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]); 251 police->tcfp_burst = PSCHED_TICKS2NS(parm->burst); 252 police->tcfp_toks = police->tcfp_burst; 253 if (police->peak_present) { 254 police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak, 255 police->tcfp_mtu); 256 police->tcfp_ptoks = police->tcfp_mtu_ptoks; 257 } 258 police->tcf_action = parm->action; 259 260 if (tb[TCA_POLICE_AVRATE]) 261 police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); 262 263 spin_unlock_bh(&police->tcf_lock); 264 if (ret != ACT_P_CREATED) 265 return ret; 266 267 police->tcfp_t_c = ktime_to_ns(ktime_get()); 268 police->tcf_index = parm->index ? parm->index : 269 tcf_hash_new_index(&police_idx_gen, &police_hash_info); 270 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 271 write_lock_bh(&police_lock); 272 police->tcf_next = tcf_police_ht[h]; 273 tcf_police_ht[h] = &police->common; 274 write_unlock_bh(&police_lock); 275 276 a->priv = police; 277 return ret; 278 279 failure_unlock: 280 spin_unlock_bh(&police->tcf_lock); 281 failure: 282 if (P_tab) 283 qdisc_put_rtab(P_tab); 284 if (R_tab) 285 qdisc_put_rtab(R_tab); 286 if (ret == ACT_P_CREATED) 287 kfree(police); 288 return err; 289 } 290 291 static int tcf_act_police_cleanup(struct tc_action *a, int bind) 292 { 293 struct tcf_police *p = a->priv; 294 int ret = 0; 295 296 if (p != NULL) { 297 if (bind) 298 p->tcf_bindcnt--; 299 300 p->tcf_refcnt--; 301 if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { 302 tcf_police_destroy(p); 303 ret = 1; 304 } 305 } 306 return ret; 307 } 308 309 static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, 310 struct tcf_result *res) 311 { 312 struct tcf_police *police = a->priv; 313 s64 now; 314 s64 toks; 315 s64 ptoks = 0; 316 317 spin_lock(&police->tcf_lock); 318 319 bstats_update(&police->tcf_bstats, skb); 320 321 if (police->tcfp_ewma_rate && 322 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 323 police->tcf_qstats.overlimits++; 324 if (police->tcf_action == TC_ACT_SHOT) 325 police->tcf_qstats.drops++; 326 spin_unlock(&police->tcf_lock); 327 return police->tcf_action; 328 } 329 330 if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { 331 if (!police->rate_present) { 332 spin_unlock(&police->tcf_lock); 333 return police->tcfp_result; 334 } 335 336 now = ktime_to_ns(ktime_get()); 337 toks = min_t(s64, now - police->tcfp_t_c, 338 police->tcfp_burst); 339 if (police->peak_present) { 340 ptoks = toks + police->tcfp_ptoks; 341 if (ptoks > police->tcfp_mtu_ptoks) 342 ptoks = police->tcfp_mtu_ptoks; 343 ptoks -= (s64) psched_l2t_ns(&police->peak, 344 qdisc_pkt_len(skb)); 345 } 346 toks += police->tcfp_toks; 347 if (toks > police->tcfp_burst) 348 toks = police->tcfp_burst; 349 toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb)); 350 if ((toks|ptoks) >= 0) { 351 police->tcfp_t_c = now; 352 police->tcfp_toks = toks; 353 police->tcfp_ptoks = ptoks; 354 spin_unlock(&police->tcf_lock); 355 return police->tcfp_result; 356 } 357 } 358 359 police->tcf_qstats.overlimits++; 360 if (police->tcf_action == TC_ACT_SHOT) 361 police->tcf_qstats.drops++; 362 spin_unlock(&police->tcf_lock); 363 return police->tcf_action; 364 } 365 366 static int 367 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 368 { 369 unsigned char *b = skb_tail_pointer(skb); 370 struct tcf_police *police = a->priv; 371 struct tc_police opt = { 372 .index = police->tcf_index, 373 .action = police->tcf_action, 374 .mtu = police->tcfp_mtu, 375 .burst = PSCHED_NS2TICKS(police->tcfp_burst), 376 .refcnt = police->tcf_refcnt - ref, 377 .bindcnt = police->tcf_bindcnt - bind, 378 }; 379 380 if (police->rate_present) 381 psched_ratecfg_getrate(&opt.rate, &police->rate); 382 if (police->peak_present) 383 psched_ratecfg_getrate(&opt.peakrate, &police->peak); 384 if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) 385 goto nla_put_failure; 386 if (police->tcfp_result && 387 nla_put_u32(skb, TCA_POLICE_RESULT, police->tcfp_result)) 388 goto nla_put_failure; 389 if (police->tcfp_ewma_rate && 390 nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate)) 391 goto nla_put_failure; 392 return skb->len; 393 394 nla_put_failure: 395 nlmsg_trim(skb, b); 396 return -1; 397 } 398 399 MODULE_AUTHOR("Alexey Kuznetsov"); 400 MODULE_DESCRIPTION("Policing actions"); 401 MODULE_LICENSE("GPL"); 402 403 static struct tc_action_ops act_police_ops = { 404 .kind = "police", 405 .hinfo = &police_hash_info, 406 .type = TCA_ID_POLICE, 407 .capab = TCA_CAP_NONE, 408 .owner = THIS_MODULE, 409 .act = tcf_act_police, 410 .dump = tcf_act_police_dump, 411 .cleanup = tcf_act_police_cleanup, 412 .init = tcf_act_police_locate, 413 .walk = tcf_act_police_walker 414 }; 415 416 static int __init 417 police_init_module(void) 418 { 419 return tcf_register_action(&act_police_ops); 420 } 421 422 static void __exit 423 police_cleanup_module(void) 424 { 425 tcf_unregister_action(&act_police_ops); 426 } 427 428 module_init(police_init_module); 429 module_exit(police_cleanup_module); 430