1 /* 2 * inet fragments management 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Pavel Emelyanov <xemul@openvz.org> 10 * Started as consolidation of ipv4/ip_fragment.c, 11 * ipv6/reassembly. and ipv6 nf conntrack reassembly 12 */ 13 14 #include <linux/list.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/timer.h> 18 #include <linux/mm.h> 19 #include <linux/random.h> 20 #include <linux/skbuff.h> 21 #include <linux/rtnetlink.h> 22 #include <linux/slab.h> 23 24 #include <net/sock.h> 25 #include <net/inet_frag.h> 26 #include <net/inet_ecn.h> 27 28 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 29 * Value : 0xff if frame should be dropped. 30 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 31 */ 32 const u8 ip_frag_ecn_table[16] = { 33 /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 34 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 35 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 36 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 37 38 /* invalid combinations : drop frame */ 39 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 40 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 41 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 42 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 43 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 44 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 45 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 46 }; 47 EXPORT_SYMBOL(ip_frag_ecn_table); 48 49 static void inet_frag_secret_rebuild(unsigned long dummy) 50 { 51 struct inet_frags *f = (struct inet_frags *)dummy; 52 unsigned long now = jiffies; 53 int i; 54 55 /* Per bucket lock NOT needed here, due to write lock protection */ 56 write_lock(&f->lock); 57 58 get_random_bytes(&f->rnd, sizeof(u32)); 59 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 60 struct inet_frag_bucket *hb; 61 struct inet_frag_queue *q; 62 struct hlist_node *n; 63 64 hb = &f->hash[i]; 65 hlist_for_each_entry_safe(q, n, &hb->chain, list) { 66 unsigned int hval = f->hashfn(q); 67 68 if (hval != i) { 69 struct inet_frag_bucket *hb_dest; 70 71 hlist_del(&q->list); 72 73 /* Relink to new hash chain. */ 74 hb_dest = &f->hash[hval]; 75 hlist_add_head(&q->list, &hb_dest->chain); 76 } 77 } 78 } 79 write_unlock(&f->lock); 80 81 mod_timer(&f->secret_timer, now + f->secret_interval); 82 } 83 84 void inet_frags_init(struct inet_frags *f) 85 { 86 int i; 87 88 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 89 struct inet_frag_bucket *hb = &f->hash[i]; 90 91 spin_lock_init(&hb->chain_lock); 92 INIT_HLIST_HEAD(&hb->chain); 93 } 94 rwlock_init(&f->lock); 95 96 setup_timer(&f->secret_timer, inet_frag_secret_rebuild, 97 (unsigned long)f); 98 f->secret_timer.expires = jiffies + f->secret_interval; 99 add_timer(&f->secret_timer); 100 } 101 EXPORT_SYMBOL(inet_frags_init); 102 103 void inet_frags_init_net(struct netns_frags *nf) 104 { 105 nf->nqueues = 0; 106 init_frag_mem_limit(nf); 107 INIT_LIST_HEAD(&nf->lru_list); 108 spin_lock_init(&nf->lru_lock); 109 } 110 EXPORT_SYMBOL(inet_frags_init_net); 111 112 void inet_frags_fini(struct inet_frags *f) 113 { 114 del_timer(&f->secret_timer); 115 } 116 EXPORT_SYMBOL(inet_frags_fini); 117 118 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) 119 { 120 nf->low_thresh = 0; 121 122 local_bh_disable(); 123 inet_frag_evictor(nf, f, true); 124 local_bh_enable(); 125 126 percpu_counter_destroy(&nf->mem); 127 } 128 EXPORT_SYMBOL(inet_frags_exit_net); 129 130 static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 131 { 132 struct inet_frag_bucket *hb; 133 unsigned int hash; 134 135 read_lock(&f->lock); 136 hash = f->hashfn(fq); 137 hb = &f->hash[hash]; 138 139 spin_lock(&hb->chain_lock); 140 hlist_del(&fq->list); 141 spin_unlock(&hb->chain_lock); 142 143 read_unlock(&f->lock); 144 inet_frag_lru_del(fq); 145 } 146 147 void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) 148 { 149 if (del_timer(&fq->timer)) 150 atomic_dec(&fq->refcnt); 151 152 if (!(fq->last_in & INET_FRAG_COMPLETE)) { 153 fq_unlink(fq, f); 154 atomic_dec(&fq->refcnt); 155 fq->last_in |= INET_FRAG_COMPLETE; 156 } 157 } 158 EXPORT_SYMBOL(inet_frag_kill); 159 160 static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, 161 struct sk_buff *skb) 162 { 163 if (f->skb_free) 164 f->skb_free(skb); 165 kfree_skb(skb); 166 } 167 168 void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, 169 int *work) 170 { 171 struct sk_buff *fp; 172 struct netns_frags *nf; 173 unsigned int sum, sum_truesize = 0; 174 175 WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); 176 WARN_ON(del_timer(&q->timer) != 0); 177 178 /* Release all fragment data. */ 179 fp = q->fragments; 180 nf = q->net; 181 while (fp) { 182 struct sk_buff *xp = fp->next; 183 184 sum_truesize += fp->truesize; 185 frag_kfree_skb(nf, f, fp); 186 fp = xp; 187 } 188 sum = sum_truesize + f->qsize; 189 if (work) 190 *work -= sum; 191 sub_frag_mem_limit(q, sum); 192 193 if (f->destructor) 194 f->destructor(q); 195 kfree(q); 196 197 } 198 EXPORT_SYMBOL(inet_frag_destroy); 199 200 int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) 201 { 202 struct inet_frag_queue *q; 203 int work, evicted = 0; 204 205 if (!force) { 206 if (frag_mem_limit(nf) <= nf->high_thresh) 207 return 0; 208 } 209 210 work = frag_mem_limit(nf) - nf->low_thresh; 211 while (work > 0 || force) { 212 spin_lock(&nf->lru_lock); 213 214 if (list_empty(&nf->lru_list)) { 215 spin_unlock(&nf->lru_lock); 216 break; 217 } 218 219 q = list_first_entry(&nf->lru_list, 220 struct inet_frag_queue, lru_list); 221 atomic_inc(&q->refcnt); 222 /* Remove q from list to avoid several CPUs grabbing it */ 223 list_del_init(&q->lru_list); 224 225 spin_unlock(&nf->lru_lock); 226 227 spin_lock(&q->lock); 228 if (!(q->last_in & INET_FRAG_COMPLETE)) 229 inet_frag_kill(q, f); 230 spin_unlock(&q->lock); 231 232 if (atomic_dec_and_test(&q->refcnt)) 233 inet_frag_destroy(q, f, &work); 234 evicted++; 235 } 236 237 return evicted; 238 } 239 EXPORT_SYMBOL(inet_frag_evictor); 240 241 static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, 242 struct inet_frag_queue *qp_in, struct inet_frags *f, 243 void *arg) 244 { 245 struct inet_frag_bucket *hb; 246 struct inet_frag_queue *qp; 247 unsigned int hash; 248 249 read_lock(&f->lock); /* Protects against hash rebuild */ 250 /* 251 * While we stayed w/o the lock other CPU could update 252 * the rnd seed, so we need to re-calculate the hash 253 * chain. Fortunatelly the qp_in can be used to get one. 254 */ 255 hash = f->hashfn(qp_in); 256 hb = &f->hash[hash]; 257 spin_lock(&hb->chain_lock); 258 259 #ifdef CONFIG_SMP 260 /* With SMP race we have to recheck hash table, because 261 * such entry could be created on other cpu, while we 262 * released the hash bucket lock. 263 */ 264 hlist_for_each_entry(qp, &hb->chain, list) { 265 if (qp->net == nf && f->match(qp, arg)) { 266 atomic_inc(&qp->refcnt); 267 spin_unlock(&hb->chain_lock); 268 read_unlock(&f->lock); 269 qp_in->last_in |= INET_FRAG_COMPLETE; 270 inet_frag_put(qp_in, f); 271 return qp; 272 } 273 } 274 #endif 275 qp = qp_in; 276 if (!mod_timer(&qp->timer, jiffies + nf->timeout)) 277 atomic_inc(&qp->refcnt); 278 279 atomic_inc(&qp->refcnt); 280 hlist_add_head(&qp->list, &hb->chain); 281 inet_frag_lru_add(nf, qp); 282 spin_unlock(&hb->chain_lock); 283 read_unlock(&f->lock); 284 285 return qp; 286 } 287 288 static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, 289 struct inet_frags *f, void *arg) 290 { 291 struct inet_frag_queue *q; 292 293 q = kzalloc(f->qsize, GFP_ATOMIC); 294 if (q == NULL) 295 return NULL; 296 297 q->net = nf; 298 f->constructor(q, arg); 299 add_frag_mem_limit(q, f->qsize); 300 301 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 302 spin_lock_init(&q->lock); 303 atomic_set(&q->refcnt, 1); 304 INIT_LIST_HEAD(&q->lru_list); 305 306 return q; 307 } 308 309 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 310 struct inet_frags *f, void *arg) 311 { 312 struct inet_frag_queue *q; 313 314 q = inet_frag_alloc(nf, f, arg); 315 if (q == NULL) 316 return NULL; 317 318 return inet_frag_intern(nf, q, f, arg); 319 } 320 321 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 322 struct inet_frags *f, void *key, unsigned int hash) 323 __releases(&f->lock) 324 { 325 struct inet_frag_bucket *hb; 326 struct inet_frag_queue *q; 327 int depth = 0; 328 329 hb = &f->hash[hash]; 330 331 spin_lock(&hb->chain_lock); 332 hlist_for_each_entry(q, &hb->chain, list) { 333 if (q->net == nf && f->match(q, key)) { 334 atomic_inc(&q->refcnt); 335 spin_unlock(&hb->chain_lock); 336 read_unlock(&f->lock); 337 return q; 338 } 339 depth++; 340 } 341 spin_unlock(&hb->chain_lock); 342 read_unlock(&f->lock); 343 344 if (depth <= INETFRAGS_MAXDEPTH) 345 return inet_frag_create(nf, f, key); 346 else 347 return ERR_PTR(-ENOBUFS); 348 } 349 EXPORT_SYMBOL(inet_frag_find); 350 351 void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, 352 const char *prefix) 353 { 354 static const char msg[] = "inet_frag_find: Fragment hash bucket" 355 " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) 356 ". Dropping fragment.\n"; 357 358 if (PTR_ERR(q) == -ENOBUFS) 359 LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); 360 } 361 EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); 362