1 /* Event cache for netfilter. */
2 
3 /*
4  * (C) 2005 Harald Welte <laforge@gnumonks.org>
5  * (C) 2005 Patrick McHardy <kaber@trash.net>
6  * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
7  * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  */
13 
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 
16 #include <linux/types.h>
17 #include <linux/netfilter.h>
18 #include <linux/skbuff.h>
19 #include <linux/vmalloc.h>
20 #include <linux/stddef.h>
21 #include <linux/err.h>
22 #include <linux/percpu.h>
23 #include <linux/kernel.h>
24 #include <linux/netdevice.h>
25 #include <linux/slab.h>
26 #include <linux/export.h>
27 
28 #include <net/netfilter/nf_conntrack.h>
29 #include <net/netfilter/nf_conntrack_core.h>
30 #include <net/netfilter/nf_conntrack_extend.h>
31 
32 static DEFINE_MUTEX(nf_ct_ecache_mutex);
33 
34 #define ECACHE_RETRY_WAIT (HZ/10)
35 
36 enum retry_state {
37 	STATE_CONGESTED,
38 	STATE_RESTART,
39 	STATE_DONE,
40 };
41 
42 static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
43 {
44 	struct nf_conn *refs[16];
45 	struct nf_conntrack_tuple_hash *h;
46 	struct hlist_nulls_node *n;
47 	unsigned int evicted = 0;
48 	enum retry_state ret = STATE_DONE;
49 
50 	spin_lock(&pcpu->lock);
51 
52 	hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
53 		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
54 		struct nf_conntrack_ecache *e;
55 
56 		if (!nf_ct_is_confirmed(ct))
57 			continue;
58 
59 		e = nf_ct_ecache_find(ct);
60 		if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
61 			continue;
62 
63 		if (nf_conntrack_event(IPCT_DESTROY, ct)) {
64 			ret = STATE_CONGESTED;
65 			break;
66 		}
67 
68 		e->state = NFCT_ECACHE_DESTROY_SENT;
69 		refs[evicted] = ct;
70 
71 		if (++evicted >= ARRAY_SIZE(refs)) {
72 			ret = STATE_RESTART;
73 			break;
74 		}
75 	}
76 
77 	spin_unlock(&pcpu->lock);
78 
79 	/* can't _put while holding lock */
80 	while (evicted)
81 		nf_ct_put(refs[--evicted]);
82 
83 	return ret;
84 }
85 
86 static void ecache_work(struct work_struct *work)
87 {
88 	struct netns_ct *ctnet =
89 		container_of(work, struct netns_ct, ecache_dwork.work);
90 	int cpu, delay = -1;
91 	struct ct_pcpu *pcpu;
92 
93 	local_bh_disable();
94 
95 	for_each_possible_cpu(cpu) {
96 		enum retry_state ret;
97 
98 		pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
99 
100 		ret = ecache_work_evict_list(pcpu);
101 
102 		switch (ret) {
103 		case STATE_CONGESTED:
104 			delay = ECACHE_RETRY_WAIT;
105 			goto out;
106 		case STATE_RESTART:
107 			delay = 0;
108 			break;
109 		case STATE_DONE:
110 			break;
111 		}
112 	}
113 
114  out:
115 	local_bh_enable();
116 
117 	ctnet->ecache_dwork_pending = delay > 0;
118 	if (delay >= 0)
119 		schedule_delayed_work(&ctnet->ecache_dwork, delay);
120 }
121 
122 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
123 				  u32 portid, int report)
124 {
125 	int ret = 0;
126 	struct net *net = nf_ct_net(ct);
127 	struct nf_ct_event_notifier *notify;
128 	struct nf_conntrack_ecache *e;
129 
130 	rcu_read_lock();
131 	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
132 	if (!notify)
133 		goto out_unlock;
134 
135 	e = nf_ct_ecache_find(ct);
136 	if (!e)
137 		goto out_unlock;
138 
139 	if (nf_ct_is_confirmed(ct)) {
140 		struct nf_ct_event item = {
141 			.ct	= ct,
142 			.portid	= e->portid ? e->portid : portid,
143 			.report = report
144 		};
145 		/* This is a resent of a destroy event? If so, skip missed */
146 		unsigned long missed = e->portid ? 0 : e->missed;
147 
148 		if (!((eventmask | missed) & e->ctmask))
149 			goto out_unlock;
150 
151 		ret = notify->fcn(eventmask | missed, &item);
152 		if (unlikely(ret < 0 || missed)) {
153 			spin_lock_bh(&ct->lock);
154 			if (ret < 0) {
155 				/* This is a destroy event that has been
156 				 * triggered by a process, we store the PORTID
157 				 * to include it in the retransmission.
158 				 */
159 				if (eventmask & (1 << IPCT_DESTROY)) {
160 					if (e->portid == 0 && portid != 0)
161 						e->portid = portid;
162 					e->state = NFCT_ECACHE_DESTROY_FAIL;
163 				} else {
164 					e->missed |= eventmask;
165 				}
166 			} else {
167 				e->missed &= ~missed;
168 			}
169 			spin_unlock_bh(&ct->lock);
170 		}
171 	}
172 out_unlock:
173 	rcu_read_unlock();
174 	return ret;
175 }
176 EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
177 
178 /* deliver cached events and clear cache entry - must be called with locally
179  * disabled softirqs */
180 void nf_ct_deliver_cached_events(struct nf_conn *ct)
181 {
182 	struct net *net = nf_ct_net(ct);
183 	unsigned long events, missed;
184 	struct nf_ct_event_notifier *notify;
185 	struct nf_conntrack_ecache *e;
186 	struct nf_ct_event item;
187 	int ret;
188 
189 	rcu_read_lock();
190 	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
191 	if (notify == NULL)
192 		goto out_unlock;
193 
194 	e = nf_ct_ecache_find(ct);
195 	if (e == NULL)
196 		goto out_unlock;
197 
198 	events = xchg(&e->cache, 0);
199 
200 	if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
201 		goto out_unlock;
202 
203 	/* We make a copy of the missed event cache without taking
204 	 * the lock, thus we may send missed events twice. However,
205 	 * this does not harm and it happens very rarely. */
206 	missed = e->missed;
207 
208 	if (!((events | missed) & e->ctmask))
209 		goto out_unlock;
210 
211 	item.ct = ct;
212 	item.portid = 0;
213 	item.report = 0;
214 
215 	ret = notify->fcn(events | missed, &item);
216 
217 	if (likely(ret == 0 && !missed))
218 		goto out_unlock;
219 
220 	spin_lock_bh(&ct->lock);
221 	if (ret < 0)
222 		e->missed |= events;
223 	else
224 		e->missed &= ~missed;
225 	spin_unlock_bh(&ct->lock);
226 
227 out_unlock:
228 	rcu_read_unlock();
229 }
230 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
231 
232 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
233 			       struct nf_conntrack_expect *exp,
234 			       u32 portid, int report)
235 
236 {
237 	struct net *net = nf_ct_exp_net(exp);
238 	struct nf_exp_event_notifier *notify;
239 	struct nf_conntrack_ecache *e;
240 
241 	rcu_read_lock();
242 	notify = rcu_dereference(net->ct.nf_expect_event_cb);
243 	if (!notify)
244 		goto out_unlock;
245 
246 	e = nf_ct_ecache_find(exp->master);
247 	if (!e)
248 		goto out_unlock;
249 
250 	if (e->expmask & (1 << event)) {
251 		struct nf_exp_event item = {
252 			.exp	= exp,
253 			.portid	= portid,
254 			.report = report
255 		};
256 		notify->fcn(1 << event, &item);
257 	}
258 out_unlock:
259 	rcu_read_unlock();
260 }
261 
262 int nf_conntrack_register_notifier(struct net *net,
263 				   struct nf_ct_event_notifier *new)
264 {
265 	int ret;
266 	struct nf_ct_event_notifier *notify;
267 
268 	mutex_lock(&nf_ct_ecache_mutex);
269 	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
270 					   lockdep_is_held(&nf_ct_ecache_mutex));
271 	if (notify != NULL) {
272 		ret = -EBUSY;
273 		goto out_unlock;
274 	}
275 	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
276 	ret = 0;
277 
278 out_unlock:
279 	mutex_unlock(&nf_ct_ecache_mutex);
280 	return ret;
281 }
282 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
283 
284 void nf_conntrack_unregister_notifier(struct net *net,
285 				      struct nf_ct_event_notifier *new)
286 {
287 	struct nf_ct_event_notifier *notify;
288 
289 	mutex_lock(&nf_ct_ecache_mutex);
290 	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
291 					   lockdep_is_held(&nf_ct_ecache_mutex));
292 	BUG_ON(notify != new);
293 	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
294 	mutex_unlock(&nf_ct_ecache_mutex);
295 	/* synchronize_rcu() is called from ctnetlink_exit. */
296 }
297 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
298 
299 int nf_ct_expect_register_notifier(struct net *net,
300 				   struct nf_exp_event_notifier *new)
301 {
302 	int ret;
303 	struct nf_exp_event_notifier *notify;
304 
305 	mutex_lock(&nf_ct_ecache_mutex);
306 	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
307 					   lockdep_is_held(&nf_ct_ecache_mutex));
308 	if (notify != NULL) {
309 		ret = -EBUSY;
310 		goto out_unlock;
311 	}
312 	rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
313 	ret = 0;
314 
315 out_unlock:
316 	mutex_unlock(&nf_ct_ecache_mutex);
317 	return ret;
318 }
319 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
320 
321 void nf_ct_expect_unregister_notifier(struct net *net,
322 				      struct nf_exp_event_notifier *new)
323 {
324 	struct nf_exp_event_notifier *notify;
325 
326 	mutex_lock(&nf_ct_ecache_mutex);
327 	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
328 					   lockdep_is_held(&nf_ct_ecache_mutex));
329 	BUG_ON(notify != new);
330 	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
331 	mutex_unlock(&nf_ct_ecache_mutex);
332 	/* synchronize_rcu() is called from ctnetlink_exit. */
333 }
334 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
335 
336 #define NF_CT_EVENTS_DEFAULT 1
337 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
338 
339 static const struct nf_ct_ext_type event_extend = {
340 	.len	= sizeof(struct nf_conntrack_ecache),
341 	.align	= __alignof__(struct nf_conntrack_ecache),
342 	.id	= NF_CT_EXT_ECACHE,
343 };
344 
345 void nf_conntrack_ecache_pernet_init(struct net *net)
346 {
347 	net->ct.sysctl_events = nf_ct_events;
348 	INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
349 }
350 
351 void nf_conntrack_ecache_pernet_fini(struct net *net)
352 {
353 	cancel_delayed_work_sync(&net->ct.ecache_dwork);
354 }
355 
356 int nf_conntrack_ecache_init(void)
357 {
358 	int ret = nf_ct_extend_register(&event_extend);
359 	if (ret < 0)
360 		pr_err("Unable to register event extension\n");
361 
362 	BUILD_BUG_ON(__IPCT_MAX >= 16);	/* ctmask, missed use u16 */
363 
364 	return ret;
365 }
366 
367 void nf_conntrack_ecache_fini(void)
368 {
369 	nf_ct_extend_unregister(&event_extend);
370 }
371