1 /* 2 * inet fragments management 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Pavel Emelyanov <xemul@openvz.org> 10 * Started as consolidation of ipv4/ip_fragment.c, 11 * ipv6/reassembly. and ipv6 nf conntrack reassembly 12 */ 13 14 #include <linux/list.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/timer.h> 18 #include <linux/mm.h> 19 #include <linux/random.h> 20 #include <linux/skbuff.h> 21 #include <linux/rtnetlink.h> 22 #include <linux/slab.h> 23 #include <linux/rhashtable.h> 24 25 #include <net/sock.h> 26 #include <net/inet_frag.h> 27 #include <net/inet_ecn.h> 28 29 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 30 * Value : 0xff if frame should be dropped. 31 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 32 */ 33 const u8 ip_frag_ecn_table[16] = { 34 /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 35 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 36 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 37 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 38 39 /* invalid combinations : drop frame */ 40 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 41 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 42 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 43 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 44 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 45 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 46 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 47 }; 48 EXPORT_SYMBOL(ip_frag_ecn_table); 49 50 int inet_frags_init(struct inet_frags *f) 51 { 52 f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, 53 NULL); 54 if (!f->frags_cachep) 55 return -ENOMEM; 56 57 return 0; 58 } 59 EXPORT_SYMBOL(inet_frags_init); 60 61 void inet_frags_fini(struct inet_frags *f) 62 { 63 /* We must wait that all inet_frag_destroy_rcu() have completed. */ 64 rcu_barrier(); 65 66 kmem_cache_destroy(f->frags_cachep); 67 f->frags_cachep = NULL; 68 } 69 EXPORT_SYMBOL(inet_frags_fini); 70 71 static void inet_frags_free_cb(void *ptr, void *arg) 72 { 73 struct inet_frag_queue *fq = ptr; 74 75 /* If we can not cancel the timer, it means this frag_queue 76 * is already disappearing, we have nothing to do. 77 * Otherwise, we own a refcount until the end of this function. 78 */ 79 if (!del_timer(&fq->timer)) 80 return; 81 82 spin_lock_bh(&fq->lock); 83 if (!(fq->flags & INET_FRAG_COMPLETE)) { 84 fq->flags |= INET_FRAG_COMPLETE; 85 refcount_dec(&fq->refcnt); 86 } 87 spin_unlock_bh(&fq->lock); 88 89 inet_frag_put(fq); 90 } 91 92 void inet_frags_exit_net(struct netns_frags *nf) 93 { 94 nf->high_thresh = 0; /* prevent creation of new frags */ 95 96 rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); 97 } 98 EXPORT_SYMBOL(inet_frags_exit_net); 99 100 void inet_frag_kill(struct inet_frag_queue *fq) 101 { 102 if (del_timer(&fq->timer)) 103 refcount_dec(&fq->refcnt); 104 105 if (!(fq->flags & INET_FRAG_COMPLETE)) { 106 struct netns_frags *nf = fq->net; 107 108 fq->flags |= INET_FRAG_COMPLETE; 109 rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); 110 refcount_dec(&fq->refcnt); 111 } 112 } 113 EXPORT_SYMBOL(inet_frag_kill); 114 115 static void inet_frag_destroy_rcu(struct rcu_head *head) 116 { 117 struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, 118 rcu); 119 struct inet_frags *f = q->net->f; 120 121 if (f->destructor) 122 f->destructor(q); 123 kmem_cache_free(f->frags_cachep, q); 124 } 125 126 void inet_frag_destroy(struct inet_frag_queue *q) 127 { 128 struct sk_buff *fp; 129 struct netns_frags *nf; 130 unsigned int sum, sum_truesize = 0; 131 struct inet_frags *f; 132 133 WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); 134 WARN_ON(del_timer(&q->timer) != 0); 135 136 /* Release all fragment data. */ 137 fp = q->fragments; 138 nf = q->net; 139 f = nf->f; 140 if (fp) { 141 do { 142 struct sk_buff *xp = fp->next; 143 144 sum_truesize += fp->truesize; 145 kfree_skb(fp); 146 fp = xp; 147 } while (fp); 148 } else { 149 sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); 150 } 151 sum = sum_truesize + f->qsize; 152 153 call_rcu(&q->rcu, inet_frag_destroy_rcu); 154 155 sub_frag_mem_limit(nf, sum); 156 } 157 EXPORT_SYMBOL(inet_frag_destroy); 158 159 static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, 160 struct inet_frags *f, 161 void *arg) 162 { 163 struct inet_frag_queue *q; 164 165 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); 166 if (!q) 167 return NULL; 168 169 q->net = nf; 170 f->constructor(q, arg); 171 add_frag_mem_limit(nf, f->qsize); 172 173 timer_setup(&q->timer, f->frag_expire, 0); 174 spin_lock_init(&q->lock); 175 refcount_set(&q->refcnt, 3); 176 177 return q; 178 } 179 180 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 181 void *arg, 182 struct inet_frag_queue **prev) 183 { 184 struct inet_frags *f = nf->f; 185 struct inet_frag_queue *q; 186 187 q = inet_frag_alloc(nf, f, arg); 188 if (!q) { 189 *prev = ERR_PTR(-ENOMEM); 190 return NULL; 191 } 192 mod_timer(&q->timer, jiffies + nf->timeout); 193 194 *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, 195 &q->node, f->rhash_params); 196 if (*prev) { 197 q->flags |= INET_FRAG_COMPLETE; 198 inet_frag_kill(q); 199 inet_frag_destroy(q); 200 return NULL; 201 } 202 return q; 203 } 204 205 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ 206 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) 207 { 208 struct inet_frag_queue *fq = NULL, *prev; 209 210 if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) 211 return NULL; 212 213 rcu_read_lock(); 214 215 prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); 216 if (!prev) 217 fq = inet_frag_create(nf, key, &prev); 218 if (prev && !IS_ERR(prev)) { 219 fq = prev; 220 if (!refcount_inc_not_zero(&fq->refcnt)) 221 fq = NULL; 222 } 223 rcu_read_unlock(); 224 return fq; 225 } 226 EXPORT_SYMBOL(inet_frag_find); 227