1 /* Event cache for netfilter. */ 2 3 /* 4 * (C) 2005 Harald Welte <laforge@gnumonks.org> 5 * (C) 2005 Patrick McHardy <kaber@trash.net> 6 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> 7 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #include <linux/types.h> 15 #include <linux/netfilter.h> 16 #include <linux/skbuff.h> 17 #include <linux/vmalloc.h> 18 #include <linux/stddef.h> 19 #include <linux/err.h> 20 #include <linux/percpu.h> 21 #include <linux/kernel.h> 22 #include <linux/netdevice.h> 23 #include <linux/slab.h> 24 #include <linux/export.h> 25 26 #include <net/netfilter/nf_conntrack.h> 27 #include <net/netfilter/nf_conntrack_core.h> 28 #include <net/netfilter/nf_conntrack_extend.h> 29 30 static DEFINE_MUTEX(nf_ct_ecache_mutex); 31 32 #define ECACHE_RETRY_WAIT (HZ/10) 33 34 enum retry_state { 35 STATE_CONGESTED, 36 STATE_RESTART, 37 STATE_DONE, 38 }; 39 40 static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) 41 { 42 struct nf_conn *refs[16]; 43 struct nf_conntrack_tuple_hash *h; 44 struct hlist_nulls_node *n; 45 unsigned int evicted = 0; 46 enum retry_state ret = STATE_DONE; 47 48 spin_lock(&pcpu->lock); 49 50 hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { 51 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 52 struct nf_conntrack_ecache *e; 53 54 if (!nf_ct_is_confirmed(ct)) 55 continue; 56 57 e = nf_ct_ecache_find(ct); 58 if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) 59 continue; 60 61 if (nf_conntrack_event(IPCT_DESTROY, ct)) { 62 ret = STATE_CONGESTED; 63 break; 64 } 65 66 e->state = NFCT_ECACHE_DESTROY_SENT; 67 refs[evicted] = ct; 68 69 if (++evicted >= ARRAY_SIZE(refs)) { 70 ret = STATE_RESTART; 71 break; 72 } 73 } 74 75 spin_unlock(&pcpu->lock); 76 77 /* can't _put while holding lock */ 78 while (evicted) 79 nf_ct_put(refs[--evicted]); 80 81 return ret; 82 } 83 84 static void ecache_work(struct work_struct *work) 85 { 86 struct netns_ct *ctnet = 87 container_of(work, struct netns_ct, ecache_dwork.work); 88 int cpu, delay = -1; 89 struct ct_pcpu *pcpu; 90 91 local_bh_disable(); 92 93 for_each_possible_cpu(cpu) { 94 enum retry_state ret; 95 96 pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); 97 98 ret = ecache_work_evict_list(pcpu); 99 100 switch (ret) { 101 case STATE_CONGESTED: 102 delay = ECACHE_RETRY_WAIT; 103 goto out; 104 case STATE_RESTART: 105 delay = 0; 106 break; 107 case STATE_DONE: 108 break; 109 } 110 } 111 112 out: 113 local_bh_enable(); 114 115 ctnet->ecache_dwork_pending = delay > 0; 116 if (delay >= 0) 117 schedule_delayed_work(&ctnet->ecache_dwork, delay); 118 } 119 120 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, 121 u32 portid, int report) 122 { 123 int ret = 0; 124 struct net *net = nf_ct_net(ct); 125 struct nf_ct_event_notifier *notify; 126 struct nf_conntrack_ecache *e; 127 128 rcu_read_lock(); 129 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 130 if (!notify) 131 goto out_unlock; 132 133 e = nf_ct_ecache_find(ct); 134 if (!e) 135 goto out_unlock; 136 137 if (nf_ct_is_confirmed(ct)) { 138 struct nf_ct_event item = { 139 .ct = ct, 140 .portid = e->portid ? e->portid : portid, 141 .report = report 142 }; 143 /* This is a resent of a destroy event? If so, skip missed */ 144 unsigned long missed = e->portid ? 0 : e->missed; 145 146 if (!((eventmask | missed) & e->ctmask)) 147 goto out_unlock; 148 149 ret = notify->fcn(eventmask | missed, &item); 150 if (unlikely(ret < 0 || missed)) { 151 spin_lock_bh(&ct->lock); 152 if (ret < 0) { 153 /* This is a destroy event that has been 154 * triggered by a process, we store the PORTID 155 * to include it in the retransmission. 156 */ 157 if (eventmask & (1 << IPCT_DESTROY)) { 158 if (e->portid == 0 && portid != 0) 159 e->portid = portid; 160 e->state = NFCT_ECACHE_DESTROY_FAIL; 161 } else { 162 e->missed |= eventmask; 163 } 164 } else { 165 e->missed &= ~missed; 166 } 167 spin_unlock_bh(&ct->lock); 168 } 169 } 170 out_unlock: 171 rcu_read_unlock(); 172 return ret; 173 } 174 EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 175 176 /* deliver cached events and clear cache entry - must be called with locally 177 * disabled softirqs */ 178 void nf_ct_deliver_cached_events(struct nf_conn *ct) 179 { 180 struct net *net = nf_ct_net(ct); 181 unsigned long events, missed; 182 struct nf_ct_event_notifier *notify; 183 struct nf_conntrack_ecache *e; 184 struct nf_ct_event item; 185 int ret; 186 187 rcu_read_lock(); 188 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 189 if (notify == NULL) 190 goto out_unlock; 191 192 e = nf_ct_ecache_find(ct); 193 if (e == NULL) 194 goto out_unlock; 195 196 events = xchg(&e->cache, 0); 197 198 if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 199 goto out_unlock; 200 201 /* We make a copy of the missed event cache without taking 202 * the lock, thus we may send missed events twice. However, 203 * this does not harm and it happens very rarely. */ 204 missed = e->missed; 205 206 if (!((events | missed) & e->ctmask)) 207 goto out_unlock; 208 209 item.ct = ct; 210 item.portid = 0; 211 item.report = 0; 212 213 ret = notify->fcn(events | missed, &item); 214 215 if (likely(ret == 0 && !missed)) 216 goto out_unlock; 217 218 spin_lock_bh(&ct->lock); 219 if (ret < 0) 220 e->missed |= events; 221 else 222 e->missed &= ~missed; 223 spin_unlock_bh(&ct->lock); 224 225 out_unlock: 226 rcu_read_unlock(); 227 } 228 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 229 230 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 231 struct nf_conntrack_expect *exp, 232 u32 portid, int report) 233 234 { 235 struct net *net = nf_ct_exp_net(exp); 236 struct nf_exp_event_notifier *notify; 237 struct nf_conntrack_ecache *e; 238 239 rcu_read_lock(); 240 notify = rcu_dereference(net->ct.nf_expect_event_cb); 241 if (!notify) 242 goto out_unlock; 243 244 e = nf_ct_ecache_find(exp->master); 245 if (!e) 246 goto out_unlock; 247 248 if (e->expmask & (1 << event)) { 249 struct nf_exp_event item = { 250 .exp = exp, 251 .portid = portid, 252 .report = report 253 }; 254 notify->fcn(1 << event, &item); 255 } 256 out_unlock: 257 rcu_read_unlock(); 258 } 259 260 int nf_conntrack_register_notifier(struct net *net, 261 struct nf_ct_event_notifier *new) 262 { 263 int ret; 264 struct nf_ct_event_notifier *notify; 265 266 mutex_lock(&nf_ct_ecache_mutex); 267 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 268 lockdep_is_held(&nf_ct_ecache_mutex)); 269 if (notify != NULL) { 270 ret = -EBUSY; 271 goto out_unlock; 272 } 273 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 274 ret = 0; 275 276 out_unlock: 277 mutex_unlock(&nf_ct_ecache_mutex); 278 return ret; 279 } 280 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 281 282 void nf_conntrack_unregister_notifier(struct net *net, 283 struct nf_ct_event_notifier *new) 284 { 285 struct nf_ct_event_notifier *notify; 286 287 mutex_lock(&nf_ct_ecache_mutex); 288 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 289 lockdep_is_held(&nf_ct_ecache_mutex)); 290 BUG_ON(notify != new); 291 RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); 292 mutex_unlock(&nf_ct_ecache_mutex); 293 /* synchronize_rcu() is called from ctnetlink_exit. */ 294 } 295 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 296 297 int nf_ct_expect_register_notifier(struct net *net, 298 struct nf_exp_event_notifier *new) 299 { 300 int ret; 301 struct nf_exp_event_notifier *notify; 302 303 mutex_lock(&nf_ct_ecache_mutex); 304 notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 305 lockdep_is_held(&nf_ct_ecache_mutex)); 306 if (notify != NULL) { 307 ret = -EBUSY; 308 goto out_unlock; 309 } 310 rcu_assign_pointer(net->ct.nf_expect_event_cb, new); 311 ret = 0; 312 313 out_unlock: 314 mutex_unlock(&nf_ct_ecache_mutex); 315 return ret; 316 } 317 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); 318 319 void nf_ct_expect_unregister_notifier(struct net *net, 320 struct nf_exp_event_notifier *new) 321 { 322 struct nf_exp_event_notifier *notify; 323 324 mutex_lock(&nf_ct_ecache_mutex); 325 notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 326 lockdep_is_held(&nf_ct_ecache_mutex)); 327 BUG_ON(notify != new); 328 RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); 329 mutex_unlock(&nf_ct_ecache_mutex); 330 /* synchronize_rcu() is called from ctnetlink_exit. */ 331 } 332 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 333 334 #define NF_CT_EVENTS_DEFAULT 1 335 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; 336 337 #ifdef CONFIG_SYSCTL 338 static struct ctl_table event_sysctl_table[] = { 339 { 340 .procname = "nf_conntrack_events", 341 .data = &init_net.ct.sysctl_events, 342 .maxlen = sizeof(unsigned int), 343 .mode = 0644, 344 .proc_handler = proc_dointvec, 345 }, 346 {} 347 }; 348 #endif /* CONFIG_SYSCTL */ 349 350 static const struct nf_ct_ext_type event_extend = { 351 .len = sizeof(struct nf_conntrack_ecache), 352 .align = __alignof__(struct nf_conntrack_ecache), 353 .id = NF_CT_EXT_ECACHE, 354 }; 355 356 #ifdef CONFIG_SYSCTL 357 static int nf_conntrack_event_init_sysctl(struct net *net) 358 { 359 struct ctl_table *table; 360 361 table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), 362 GFP_KERNEL); 363 if (!table) 364 goto out; 365 366 table[0].data = &net->ct.sysctl_events; 367 368 /* Don't export sysctls to unprivileged users */ 369 if (net->user_ns != &init_user_ns) 370 table[0].procname = NULL; 371 372 net->ct.event_sysctl_header = 373 register_net_sysctl(net, "net/netfilter", table); 374 if (!net->ct.event_sysctl_header) { 375 printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n"); 376 goto out_register; 377 } 378 return 0; 379 380 out_register: 381 kfree(table); 382 out: 383 return -ENOMEM; 384 } 385 386 static void nf_conntrack_event_fini_sysctl(struct net *net) 387 { 388 struct ctl_table *table; 389 390 table = net->ct.event_sysctl_header->ctl_table_arg; 391 unregister_net_sysctl_table(net->ct.event_sysctl_header); 392 kfree(table); 393 } 394 #else 395 static int nf_conntrack_event_init_sysctl(struct net *net) 396 { 397 return 0; 398 } 399 400 static void nf_conntrack_event_fini_sysctl(struct net *net) 401 { 402 } 403 #endif /* CONFIG_SYSCTL */ 404 405 int nf_conntrack_ecache_pernet_init(struct net *net) 406 { 407 net->ct.sysctl_events = nf_ct_events; 408 INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); 409 return nf_conntrack_event_init_sysctl(net); 410 } 411 412 void nf_conntrack_ecache_pernet_fini(struct net *net) 413 { 414 cancel_delayed_work_sync(&net->ct.ecache_dwork); 415 nf_conntrack_event_fini_sysctl(net); 416 } 417 418 int nf_conntrack_ecache_init(void) 419 { 420 int ret = nf_ct_extend_register(&event_extend); 421 if (ret < 0) 422 pr_err("nf_ct_event: Unable to register event extension.\n"); 423 424 BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ 425 426 return ret; 427 } 428 429 void nf_conntrack_ecache_fini(void) 430 { 431 nf_ct_extend_unregister(&event_extend); 432 } 433