1 /* Event cache for netfilter. */ 2 3 /* 4 * (C) 2005 Harald Welte <laforge@gnumonks.org> 5 * (C) 2005 Patrick McHardy <kaber@trash.net> 6 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> 7 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/types.h> 17 #include <linux/netfilter.h> 18 #include <linux/skbuff.h> 19 #include <linux/vmalloc.h> 20 #include <linux/stddef.h> 21 #include <linux/err.h> 22 #include <linux/percpu.h> 23 #include <linux/kernel.h> 24 #include <linux/netdevice.h> 25 #include <linux/slab.h> 26 #include <linux/export.h> 27 28 #include <net/netfilter/nf_conntrack.h> 29 #include <net/netfilter/nf_conntrack_core.h> 30 #include <net/netfilter/nf_conntrack_extend.h> 31 32 static DEFINE_MUTEX(nf_ct_ecache_mutex); 33 34 #define ECACHE_RETRY_WAIT (HZ/10) 35 36 enum retry_state { 37 STATE_CONGESTED, 38 STATE_RESTART, 39 STATE_DONE, 40 }; 41 42 static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) 43 { 44 struct nf_conn *refs[16]; 45 struct nf_conntrack_tuple_hash *h; 46 struct hlist_nulls_node *n; 47 unsigned int evicted = 0; 48 enum retry_state ret = STATE_DONE; 49 50 spin_lock(&pcpu->lock); 51 52 hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { 53 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 54 struct nf_conntrack_ecache *e; 55 56 if (!nf_ct_is_confirmed(ct)) 57 continue; 58 59 e = nf_ct_ecache_find(ct); 60 if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) 61 continue; 62 63 if (nf_conntrack_event(IPCT_DESTROY, ct)) { 64 ret = STATE_CONGESTED; 65 break; 66 } 67 68 e->state = NFCT_ECACHE_DESTROY_SENT; 69 refs[evicted] = ct; 70 71 if (++evicted >= ARRAY_SIZE(refs)) { 72 ret = STATE_RESTART; 73 break; 74 } 75 } 76 77 spin_unlock(&pcpu->lock); 78 79 /* can't _put while holding lock */ 80 while (evicted) 81 nf_ct_put(refs[--evicted]); 82 83 return ret; 84 } 85 86 static void ecache_work(struct work_struct *work) 87 { 88 struct netns_ct *ctnet = 89 container_of(work, struct netns_ct, ecache_dwork.work); 90 int cpu, delay = -1; 91 struct ct_pcpu *pcpu; 92 93 local_bh_disable(); 94 95 for_each_possible_cpu(cpu) { 96 enum retry_state ret; 97 98 pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); 99 100 ret = ecache_work_evict_list(pcpu); 101 102 switch (ret) { 103 case STATE_CONGESTED: 104 delay = ECACHE_RETRY_WAIT; 105 goto out; 106 case STATE_RESTART: 107 delay = 0; 108 break; 109 case STATE_DONE: 110 break; 111 } 112 } 113 114 out: 115 local_bh_enable(); 116 117 ctnet->ecache_dwork_pending = delay > 0; 118 if (delay >= 0) 119 schedule_delayed_work(&ctnet->ecache_dwork, delay); 120 } 121 122 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, 123 u32 portid, int report) 124 { 125 int ret = 0; 126 struct net *net = nf_ct_net(ct); 127 struct nf_ct_event_notifier *notify; 128 struct nf_conntrack_ecache *e; 129 130 rcu_read_lock(); 131 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 132 if (!notify) 133 goto out_unlock; 134 135 e = nf_ct_ecache_find(ct); 136 if (!e) 137 goto out_unlock; 138 139 if (nf_ct_is_confirmed(ct)) { 140 struct nf_ct_event item = { 141 .ct = ct, 142 .portid = e->portid ? e->portid : portid, 143 .report = report 144 }; 145 /* This is a resent of a destroy event? If so, skip missed */ 146 unsigned long missed = e->portid ? 0 : e->missed; 147 148 if (!((eventmask | missed) & e->ctmask)) 149 goto out_unlock; 150 151 ret = notify->fcn(eventmask | missed, &item); 152 if (unlikely(ret < 0 || missed)) { 153 spin_lock_bh(&ct->lock); 154 if (ret < 0) { 155 /* This is a destroy event that has been 156 * triggered by a process, we store the PORTID 157 * to include it in the retransmission. 158 */ 159 if (eventmask & (1 << IPCT_DESTROY)) { 160 if (e->portid == 0 && portid != 0) 161 e->portid = portid; 162 e->state = NFCT_ECACHE_DESTROY_FAIL; 163 } else { 164 e->missed |= eventmask; 165 } 166 } else { 167 e->missed &= ~missed; 168 } 169 spin_unlock_bh(&ct->lock); 170 } 171 } 172 out_unlock: 173 rcu_read_unlock(); 174 return ret; 175 } 176 EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 177 178 /* deliver cached events and clear cache entry - must be called with locally 179 * disabled softirqs */ 180 void nf_ct_deliver_cached_events(struct nf_conn *ct) 181 { 182 struct net *net = nf_ct_net(ct); 183 unsigned long events, missed; 184 struct nf_ct_event_notifier *notify; 185 struct nf_conntrack_ecache *e; 186 struct nf_ct_event item; 187 int ret; 188 189 rcu_read_lock(); 190 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 191 if (notify == NULL) 192 goto out_unlock; 193 194 e = nf_ct_ecache_find(ct); 195 if (e == NULL) 196 goto out_unlock; 197 198 events = xchg(&e->cache, 0); 199 200 if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 201 goto out_unlock; 202 203 /* We make a copy of the missed event cache without taking 204 * the lock, thus we may send missed events twice. However, 205 * this does not harm and it happens very rarely. */ 206 missed = e->missed; 207 208 if (!((events | missed) & e->ctmask)) 209 goto out_unlock; 210 211 item.ct = ct; 212 item.portid = 0; 213 item.report = 0; 214 215 ret = notify->fcn(events | missed, &item); 216 217 if (likely(ret == 0 && !missed)) 218 goto out_unlock; 219 220 spin_lock_bh(&ct->lock); 221 if (ret < 0) 222 e->missed |= events; 223 else 224 e->missed &= ~missed; 225 spin_unlock_bh(&ct->lock); 226 227 out_unlock: 228 rcu_read_unlock(); 229 } 230 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 231 232 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 233 struct nf_conntrack_expect *exp, 234 u32 portid, int report) 235 236 { 237 struct net *net = nf_ct_exp_net(exp); 238 struct nf_exp_event_notifier *notify; 239 struct nf_conntrack_ecache *e; 240 241 rcu_read_lock(); 242 notify = rcu_dereference(net->ct.nf_expect_event_cb); 243 if (!notify) 244 goto out_unlock; 245 246 e = nf_ct_ecache_find(exp->master); 247 if (!e) 248 goto out_unlock; 249 250 if (e->expmask & (1 << event)) { 251 struct nf_exp_event item = { 252 .exp = exp, 253 .portid = portid, 254 .report = report 255 }; 256 notify->fcn(1 << event, &item); 257 } 258 out_unlock: 259 rcu_read_unlock(); 260 } 261 262 int nf_conntrack_register_notifier(struct net *net, 263 struct nf_ct_event_notifier *new) 264 { 265 int ret; 266 struct nf_ct_event_notifier *notify; 267 268 mutex_lock(&nf_ct_ecache_mutex); 269 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 270 lockdep_is_held(&nf_ct_ecache_mutex)); 271 if (notify != NULL) { 272 ret = -EBUSY; 273 goto out_unlock; 274 } 275 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 276 ret = 0; 277 278 out_unlock: 279 mutex_unlock(&nf_ct_ecache_mutex); 280 return ret; 281 } 282 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 283 284 void nf_conntrack_unregister_notifier(struct net *net, 285 struct nf_ct_event_notifier *new) 286 { 287 struct nf_ct_event_notifier *notify; 288 289 mutex_lock(&nf_ct_ecache_mutex); 290 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 291 lockdep_is_held(&nf_ct_ecache_mutex)); 292 BUG_ON(notify != new); 293 RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); 294 mutex_unlock(&nf_ct_ecache_mutex); 295 /* synchronize_rcu() is called from ctnetlink_exit. */ 296 } 297 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 298 299 int nf_ct_expect_register_notifier(struct net *net, 300 struct nf_exp_event_notifier *new) 301 { 302 int ret; 303 struct nf_exp_event_notifier *notify; 304 305 mutex_lock(&nf_ct_ecache_mutex); 306 notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 307 lockdep_is_held(&nf_ct_ecache_mutex)); 308 if (notify != NULL) { 309 ret = -EBUSY; 310 goto out_unlock; 311 } 312 rcu_assign_pointer(net->ct.nf_expect_event_cb, new); 313 ret = 0; 314 315 out_unlock: 316 mutex_unlock(&nf_ct_ecache_mutex); 317 return ret; 318 } 319 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); 320 321 void nf_ct_expect_unregister_notifier(struct net *net, 322 struct nf_exp_event_notifier *new) 323 { 324 struct nf_exp_event_notifier *notify; 325 326 mutex_lock(&nf_ct_ecache_mutex); 327 notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 328 lockdep_is_held(&nf_ct_ecache_mutex)); 329 BUG_ON(notify != new); 330 RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); 331 mutex_unlock(&nf_ct_ecache_mutex); 332 /* synchronize_rcu() is called from ctnetlink_exit. */ 333 } 334 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 335 336 #define NF_CT_EVENTS_DEFAULT 1 337 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; 338 339 #ifdef CONFIG_SYSCTL 340 static struct ctl_table event_sysctl_table[] = { 341 { 342 .procname = "nf_conntrack_events", 343 .data = &init_net.ct.sysctl_events, 344 .maxlen = sizeof(unsigned int), 345 .mode = 0644, 346 .proc_handler = proc_dointvec, 347 }, 348 {} 349 }; 350 #endif /* CONFIG_SYSCTL */ 351 352 static const struct nf_ct_ext_type event_extend = { 353 .len = sizeof(struct nf_conntrack_ecache), 354 .align = __alignof__(struct nf_conntrack_ecache), 355 .id = NF_CT_EXT_ECACHE, 356 }; 357 358 #ifdef CONFIG_SYSCTL 359 static int nf_conntrack_event_init_sysctl(struct net *net) 360 { 361 struct ctl_table *table; 362 363 table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), 364 GFP_KERNEL); 365 if (!table) 366 goto out; 367 368 table[0].data = &net->ct.sysctl_events; 369 370 /* Don't export sysctls to unprivileged users */ 371 if (net->user_ns != &init_user_ns) 372 table[0].procname = NULL; 373 374 net->ct.event_sysctl_header = 375 register_net_sysctl(net, "net/netfilter", table); 376 if (!net->ct.event_sysctl_header) { 377 pr_err("can't register to sysctl\n"); 378 goto out_register; 379 } 380 return 0; 381 382 out_register: 383 kfree(table); 384 out: 385 return -ENOMEM; 386 } 387 388 static void nf_conntrack_event_fini_sysctl(struct net *net) 389 { 390 struct ctl_table *table; 391 392 table = net->ct.event_sysctl_header->ctl_table_arg; 393 unregister_net_sysctl_table(net->ct.event_sysctl_header); 394 kfree(table); 395 } 396 #else 397 static int nf_conntrack_event_init_sysctl(struct net *net) 398 { 399 return 0; 400 } 401 402 static void nf_conntrack_event_fini_sysctl(struct net *net) 403 { 404 } 405 #endif /* CONFIG_SYSCTL */ 406 407 int nf_conntrack_ecache_pernet_init(struct net *net) 408 { 409 net->ct.sysctl_events = nf_ct_events; 410 INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); 411 return nf_conntrack_event_init_sysctl(net); 412 } 413 414 void nf_conntrack_ecache_pernet_fini(struct net *net) 415 { 416 cancel_delayed_work_sync(&net->ct.ecache_dwork); 417 nf_conntrack_event_fini_sysctl(net); 418 } 419 420 int nf_conntrack_ecache_init(void) 421 { 422 int ret = nf_ct_extend_register(&event_extend); 423 if (ret < 0) 424 pr_err("Unable to register event extension\n"); 425 426 BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ 427 428 return ret; 429 } 430 431 void nf_conntrack_ecache_fini(void) 432 { 433 nf_ct_extend_unregister(&event_extend); 434 } 435