1 /* 2 * inet fragments management 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Pavel Emelyanov <xemul@openvz.org> 10 * Started as consolidation of ipv4/ip_fragment.c, 11 * ipv6/reassembly. and ipv6 nf conntrack reassembly 12 */ 13 14 #include <linux/list.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/timer.h> 18 #include <linux/mm.h> 19 #include <linux/random.h> 20 #include <linux/skbuff.h> 21 #include <linux/rtnetlink.h> 22 #include <linux/slab.h> 23 24 #include <net/sock.h> 25 #include <net/inet_frag.h> 26 #include <net/inet_ecn.h> 27 28 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 29 * Value : 0xff if frame should be dropped. 30 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 31 */ 32 const u8 ip_frag_ecn_table[16] = { 33 /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 34 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 35 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 36 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 37 38 /* invalid combinations : drop frame */ 39 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 40 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 41 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 42 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 43 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 44 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 45 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 46 }; 47 EXPORT_SYMBOL(ip_frag_ecn_table); 48 49 int inet_frags_init(struct inet_frags *f) 50 { 51 f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, 52 NULL); 53 if (!f->frags_cachep) 54 return -ENOMEM; 55 56 return 0; 57 } 58 EXPORT_SYMBOL(inet_frags_init); 59 60 void inet_frags_fini(struct inet_frags *f) 61 { 62 /* We must wait that all inet_frag_destroy_rcu() have completed. */ 63 rcu_barrier(); 64 65 kmem_cache_destroy(f->frags_cachep); 66 f->frags_cachep = NULL; 67 } 68 EXPORT_SYMBOL(inet_frags_fini); 69 70 static void inet_frags_free_cb(void *ptr, void *arg) 71 { 72 struct inet_frag_queue *fq = ptr; 73 74 /* If we can not cancel the timer, it means this frag_queue 75 * is already disappearing, we have nothing to do. 76 * Otherwise, we own a refcount until the end of this function. 77 */ 78 if (!del_timer(&fq->timer)) 79 return; 80 81 spin_lock_bh(&fq->lock); 82 if (!(fq->flags & INET_FRAG_COMPLETE)) { 83 fq->flags |= INET_FRAG_COMPLETE; 84 refcount_dec(&fq->refcnt); 85 } 86 spin_unlock_bh(&fq->lock); 87 88 inet_frag_put(fq); 89 } 90 91 void inet_frags_exit_net(struct netns_frags *nf) 92 { 93 nf->low_thresh = 0; /* prevent creation of new frags */ 94 95 rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); 96 } 97 EXPORT_SYMBOL(inet_frags_exit_net); 98 99 void inet_frag_kill(struct inet_frag_queue *fq) 100 { 101 if (del_timer(&fq->timer)) 102 refcount_dec(&fq->refcnt); 103 104 if (!(fq->flags & INET_FRAG_COMPLETE)) { 105 struct netns_frags *nf = fq->net; 106 107 fq->flags |= INET_FRAG_COMPLETE; 108 rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); 109 refcount_dec(&fq->refcnt); 110 } 111 } 112 EXPORT_SYMBOL(inet_frag_kill); 113 114 static void inet_frag_destroy_rcu(struct rcu_head *head) 115 { 116 struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, 117 rcu); 118 struct inet_frags *f = q->net->f; 119 120 if (f->destructor) 121 f->destructor(q); 122 kmem_cache_free(f->frags_cachep, q); 123 } 124 125 void inet_frag_destroy(struct inet_frag_queue *q) 126 { 127 struct sk_buff *fp; 128 struct netns_frags *nf; 129 unsigned int sum, sum_truesize = 0; 130 struct inet_frags *f; 131 132 WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); 133 WARN_ON(del_timer(&q->timer) != 0); 134 135 /* Release all fragment data. */ 136 fp = q->fragments; 137 nf = q->net; 138 f = nf->f; 139 while (fp) { 140 struct sk_buff *xp = fp->next; 141 142 sum_truesize += fp->truesize; 143 kfree_skb(fp); 144 fp = xp; 145 } 146 sum = sum_truesize + f->qsize; 147 148 call_rcu(&q->rcu, inet_frag_destroy_rcu); 149 150 sub_frag_mem_limit(nf, sum); 151 } 152 EXPORT_SYMBOL(inet_frag_destroy); 153 154 static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, 155 struct inet_frags *f, 156 void *arg) 157 { 158 struct inet_frag_queue *q; 159 160 if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) 161 return NULL; 162 163 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); 164 if (!q) 165 return NULL; 166 167 q->net = nf; 168 f->constructor(q, arg); 169 add_frag_mem_limit(nf, f->qsize); 170 171 timer_setup(&q->timer, f->frag_expire, 0); 172 spin_lock_init(&q->lock); 173 refcount_set(&q->refcnt, 3); 174 175 return q; 176 } 177 178 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 179 void *arg) 180 { 181 struct inet_frags *f = nf->f; 182 struct inet_frag_queue *q; 183 int err; 184 185 q = inet_frag_alloc(nf, f, arg); 186 if (!q) 187 return NULL; 188 189 mod_timer(&q->timer, jiffies + nf->timeout); 190 191 err = rhashtable_insert_fast(&nf->rhashtable, &q->node, 192 f->rhash_params); 193 if (err < 0) { 194 q->flags |= INET_FRAG_COMPLETE; 195 inet_frag_kill(q); 196 inet_frag_destroy(q); 197 return NULL; 198 } 199 return q; 200 } 201 202 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ 203 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) 204 { 205 struct inet_frag_queue *fq; 206 207 rcu_read_lock(); 208 209 fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); 210 if (fq) { 211 if (!refcount_inc_not_zero(&fq->refcnt)) 212 fq = NULL; 213 rcu_read_unlock(); 214 return fq; 215 } 216 rcu_read_unlock(); 217 218 return inet_frag_create(nf, key); 219 } 220 EXPORT_SYMBOL(inet_frag_find); 221