1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Event cache for netfilter. */ 3 4 /* 5 * (C) 2005 Harald Welte <laforge@gnumonks.org> 6 * (C) 2005 Patrick McHardy <kaber@trash.net> 7 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> 8 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> 9 */ 10 11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13 #include <linux/types.h> 14 #include <linux/netfilter.h> 15 #include <linux/skbuff.h> 16 #include <linux/vmalloc.h> 17 #include <linux/stddef.h> 18 #include <linux/err.h> 19 #include <linux/percpu.h> 20 #include <linux/kernel.h> 21 #include <linux/netdevice.h> 22 #include <linux/slab.h> 23 #include <linux/export.h> 24 25 #include <net/netfilter/nf_conntrack.h> 26 #include <net/netfilter/nf_conntrack_core.h> 27 #include <net/netfilter/nf_conntrack_ecache.h> 28 #include <net/netfilter/nf_conntrack_extend.h> 29 30 static DEFINE_MUTEX(nf_ct_ecache_mutex); 31 32 #define ECACHE_RETRY_WAIT (HZ/10) 33 #define ECACHE_STACK_ALLOC (256 / sizeof(void *)) 34 35 enum retry_state { 36 STATE_CONGESTED, 37 STATE_RESTART, 38 STATE_DONE, 39 }; 40 41 static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) 42 { 43 struct nf_conn *refs[ECACHE_STACK_ALLOC]; 44 enum retry_state ret = STATE_DONE; 45 struct nf_conntrack_tuple_hash *h; 46 struct hlist_nulls_node *n; 47 unsigned int evicted = 0; 48 49 spin_lock(&pcpu->lock); 50 51 hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { 52 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 53 struct nf_conntrack_ecache *e; 54 55 if (!nf_ct_is_confirmed(ct)) 56 continue; 57 58 /* This ecache access is safe because the ct is on the 59 * pcpu dying list and we hold the spinlock -- the entry 60 * cannot be free'd until after the lock is released. 61 * 62 * This is true even if ct has a refcount of 0: the 63 * cpu that is about to free the entry must remove it 64 * from the dying list and needs the lock to do so. 65 */ 66 e = nf_ct_ecache_find(ct); 67 if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) 68 continue; 69 70 /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means 71 * the worker owns this entry: the ct will remain valid 72 * until the worker puts its ct reference. 73 */ 74 if (nf_conntrack_event(IPCT_DESTROY, ct)) { 75 ret = STATE_CONGESTED; 76 break; 77 } 78 79 e->state = NFCT_ECACHE_DESTROY_SENT; 80 refs[evicted] = ct; 81 82 if (++evicted >= ARRAY_SIZE(refs)) { 83 ret = STATE_RESTART; 84 break; 85 } 86 } 87 88 spin_unlock(&pcpu->lock); 89 90 /* can't _put while holding lock */ 91 while (evicted) 92 nf_ct_put(refs[--evicted]); 93 94 return ret; 95 } 96 97 static void ecache_work(struct work_struct *work) 98 { 99 struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work); 100 struct netns_ct *ctnet = cnet->ct_net; 101 int cpu, delay = -1; 102 struct ct_pcpu *pcpu; 103 104 local_bh_disable(); 105 106 for_each_possible_cpu(cpu) { 107 enum retry_state ret; 108 109 pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); 110 111 ret = ecache_work_evict_list(pcpu); 112 113 switch (ret) { 114 case STATE_CONGESTED: 115 delay = ECACHE_RETRY_WAIT; 116 goto out; 117 case STATE_RESTART: 118 delay = 0; 119 break; 120 case STATE_DONE: 121 break; 122 } 123 } 124 125 out: 126 local_bh_enable(); 127 128 ctnet->ecache_dwork_pending = delay > 0; 129 if (delay >= 0) 130 schedule_delayed_work(&cnet->ecache_dwork, delay); 131 } 132 133 static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, 134 const unsigned int events, 135 const unsigned long missed, 136 const struct nf_ct_event *item) 137 { 138 struct nf_conn *ct = item->ct; 139 struct net *net = nf_ct_net(item->ct); 140 struct nf_ct_event_notifier *notify; 141 int ret; 142 143 if (!((events | missed) & e->ctmask)) 144 return 0; 145 146 rcu_read_lock(); 147 148 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 149 if (!notify) { 150 rcu_read_unlock(); 151 return 0; 152 } 153 154 ret = notify->ct_event(events | missed, item); 155 rcu_read_unlock(); 156 157 if (likely(ret >= 0 && missed == 0)) 158 return 0; 159 160 spin_lock_bh(&ct->lock); 161 if (ret < 0) 162 e->missed |= events; 163 else 164 e->missed &= ~missed; 165 spin_unlock_bh(&ct->lock); 166 167 return ret; 168 } 169 170 int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, 171 u32 portid, int report) 172 { 173 struct nf_conntrack_ecache *e; 174 struct nf_ct_event item; 175 unsigned long missed; 176 int ret; 177 178 if (!nf_ct_is_confirmed(ct)) 179 return 0; 180 181 e = nf_ct_ecache_find(ct); 182 if (!e) 183 return 0; 184 185 memset(&item, 0, sizeof(item)); 186 187 item.ct = ct; 188 item.portid = e->portid ? e->portid : portid; 189 item.report = report; 190 191 /* This is a resent of a destroy event? If so, skip missed */ 192 missed = e->portid ? 0 : e->missed; 193 194 ret = __nf_conntrack_eventmask_report(e, events, missed, &item); 195 if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) { 196 /* This is a destroy event that has been triggered by a process, 197 * we store the PORTID to include it in the retransmission. 198 */ 199 if (e->portid == 0 && portid != 0) 200 e->portid = portid; 201 e->state = NFCT_ECACHE_DESTROY_FAIL; 202 } 203 204 return ret; 205 } 206 EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 207 208 /* deliver cached events and clear cache entry - must be called with locally 209 * disabled softirqs */ 210 void nf_ct_deliver_cached_events(struct nf_conn *ct) 211 { 212 struct nf_conntrack_ecache *e; 213 struct nf_ct_event item; 214 unsigned long events; 215 216 if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 217 return; 218 219 e = nf_ct_ecache_find(ct); 220 if (e == NULL) 221 return; 222 223 events = xchg(&e->cache, 0); 224 225 item.ct = ct; 226 item.portid = 0; 227 item.report = 0; 228 229 /* We make a copy of the missed event cache without taking 230 * the lock, thus we may send missed events twice. However, 231 * this does not harm and it happens very rarely. 232 */ 233 __nf_conntrack_eventmask_report(e, events, e->missed, &item); 234 } 235 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 236 237 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 238 struct nf_conntrack_expect *exp, 239 u32 portid, int report) 240 241 { 242 struct net *net = nf_ct_exp_net(exp); 243 struct nf_ct_event_notifier *notify; 244 struct nf_conntrack_ecache *e; 245 246 rcu_read_lock(); 247 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 248 if (!notify) 249 goto out_unlock; 250 251 e = nf_ct_ecache_find(exp->master); 252 if (!e) 253 goto out_unlock; 254 255 if (e->expmask & (1 << event)) { 256 struct nf_exp_event item = { 257 .exp = exp, 258 .portid = portid, 259 .report = report 260 }; 261 notify->exp_event(1 << event, &item); 262 } 263 out_unlock: 264 rcu_read_unlock(); 265 } 266 267 void nf_conntrack_register_notifier(struct net *net, 268 const struct nf_ct_event_notifier *new) 269 { 270 struct nf_ct_event_notifier *notify; 271 272 mutex_lock(&nf_ct_ecache_mutex); 273 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 274 lockdep_is_held(&nf_ct_ecache_mutex)); 275 WARN_ON_ONCE(notify); 276 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 277 mutex_unlock(&nf_ct_ecache_mutex); 278 } 279 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 280 281 void nf_conntrack_unregister_notifier(struct net *net) 282 { 283 mutex_lock(&nf_ct_ecache_mutex); 284 RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); 285 mutex_unlock(&nf_ct_ecache_mutex); 286 /* synchronize_rcu() is called after netns pre_exit */ 287 } 288 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 289 290 void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) 291 { 292 struct nf_conntrack_net *cnet = nf_ct_pernet(net); 293 294 if (state == NFCT_ECACHE_DESTROY_FAIL && 295 !delayed_work_pending(&cnet->ecache_dwork)) { 296 schedule_delayed_work(&cnet->ecache_dwork, HZ); 297 net->ct.ecache_dwork_pending = true; 298 } else if (state == NFCT_ECACHE_DESTROY_SENT) { 299 net->ct.ecache_dwork_pending = false; 300 mod_delayed_work(system_wq, &cnet->ecache_dwork, 0); 301 } 302 } 303 304 #define NF_CT_EVENTS_DEFAULT 1 305 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; 306 307 static const struct nf_ct_ext_type event_extend = { 308 .len = sizeof(struct nf_conntrack_ecache), 309 .align = __alignof__(struct nf_conntrack_ecache), 310 .id = NF_CT_EXT_ECACHE, 311 }; 312 313 void nf_conntrack_ecache_pernet_init(struct net *net) 314 { 315 struct nf_conntrack_net *cnet = nf_ct_pernet(net); 316 317 net->ct.sysctl_events = nf_ct_events; 318 cnet->ct_net = &net->ct; 319 INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work); 320 } 321 322 void nf_conntrack_ecache_pernet_fini(struct net *net) 323 { 324 struct nf_conntrack_net *cnet = nf_ct_pernet(net); 325 326 cancel_delayed_work_sync(&cnet->ecache_dwork); 327 } 328 329 int nf_conntrack_ecache_init(void) 330 { 331 int ret = nf_ct_extend_register(&event_extend); 332 if (ret < 0) 333 pr_err("Unable to register event extension\n"); 334 335 BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ 336 337 return ret; 338 } 339 340 void nf_conntrack_ecache_fini(void) 341 { 342 nf_ct_extend_unregister(&event_extend); 343 } 344