inetpeer.c (1cc26bada9f6807814806db2f0d78792eecdac71) inetpeer.c (7b46ac4e77f3224a1befe032c77f1df31d1b42c4)
1/*
2 * INETPEER - A storage for permanent information about peers
3 *
4 * This source is covered by the GNU GPL, the same as all kernel sources.
5 *
6 * Authors: Andrey V. Savochkin <saw@msu.ru>
7 */
8

--- 67 unchanged lines hidden (view full) ---

76static const struct inet_peer peer_fake_node = {
77 .avl_left = peer_avl_empty_rcu,
78 .avl_right = peer_avl_empty_rcu,
79 .avl_height = 0
80};
81
82struct inet_peer_base {
83 struct inet_peer __rcu *root;
1/*
2 * INETPEER - A storage for permanent information about peers
3 *
4 * This source is covered by the GNU GPL, the same as all kernel sources.
5 *
6 * Authors: Andrey V. Savochkin <saw@msu.ru>
7 */
8

--- 67 unchanged lines hidden (view full) ---

76static const struct inet_peer peer_fake_node = {
77 .avl_left = peer_avl_empty_rcu,
78 .avl_right = peer_avl_empty_rcu,
79 .avl_height = 0
80};
81
82struct inet_peer_base {
83 struct inet_peer __rcu *root;
84 spinlock_t lock;
84 seqlock_t lock;
85 int total;
86};
87
88static struct inet_peer_base v4_peers = {
89 .root = peer_avl_empty_rcu,
85 int total;
86};
87
88static struct inet_peer_base v4_peers = {
89 .root = peer_avl_empty_rcu,
90 .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock),
90 .lock = __SEQLOCK_UNLOCKED(v4_peers.lock),
91 .total = 0,
92};
93
94static struct inet_peer_base v6_peers = {
95 .root = peer_avl_empty_rcu,
91 .total = 0,
92};
93
94static struct inet_peer_base v6_peers = {
95 .root = peer_avl_empty_rcu,
96 .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock),
96 .lock = __SEQLOCK_UNLOCKED(v6_peers.lock),
97 .total = 0,
98};
99
100#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
101
102/* Exported for sysctl_net_ipv4. */
103int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
104 * aggressively at this stage */

--- 57 unchanged lines hidden (view full) ---

162}
163
164static int addr_compare(const struct inetpeer_addr *a,
165 const struct inetpeer_addr *b)
166{
167 int i, n = (a->family == AF_INET ? 1 : 4);
168
169 for (i = 0; i < n; i++) {
97 .total = 0,
98};
99
100#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
101
102/* Exported for sysctl_net_ipv4. */
103int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
104 * aggressively at this stage */

--- 57 unchanged lines hidden (view full) ---

162}
163
164static int addr_compare(const struct inetpeer_addr *a,
165 const struct inetpeer_addr *b)
166{
167 int i, n = (a->family == AF_INET ? 1 : 4);
168
169 for (i = 0; i < n; i++) {
170 if (a->a6[i] == b->a6[i])
170 if (a->addr.a6[i] == b->addr.a6[i])
171 continue;
171 continue;
172 if (a->a6[i] < b->a6[i])
172 if (a->addr.a6[i] < b->addr.a6[i])
173 return -1;
174 return 1;
175 }
176
177 return 0;
178}
179
173 return -1;
174 return 1;
175 }
176
177 return 0;
178}
179
180#define rcu_deref_locked(X, BASE) \
181 rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
182
180/*
181 * Called with local BH disabled and the pool lock held.
182 */
183#define lookup(_daddr, _stack, _base) \
184({ \
185 struct inet_peer *u; \
186 struct inet_peer __rcu **v; \
187 \
188 stackptr = _stack; \
189 *stackptr++ = &_base->root; \
183/*
184 * Called with local BH disabled and the pool lock held.
185 */
186#define lookup(_daddr, _stack, _base) \
187({ \
188 struct inet_peer *u; \
189 struct inet_peer __rcu **v; \
190 \
191 stackptr = _stack; \
192 *stackptr++ = &_base->root; \
190 for (u = rcu_dereference_protected(_base->root, \
191 lockdep_is_held(&_base->lock)); \
193 for (u = rcu_deref_locked(_base->root, _base); \
192 u != peer_avl_empty; ) { \
193 int cmp = addr_compare(_daddr, &u->daddr); \
194 if (cmp == 0) \
195 break; \
196 if (cmp == -1) \
197 v = &u->avl_left; \
198 else \
199 v = &u->avl_right; \
200 *stackptr++ = v; \
194 u != peer_avl_empty; ) { \
195 int cmp = addr_compare(_daddr, &u->daddr); \
196 if (cmp == 0) \
197 break; \
198 if (cmp == -1) \
199 v = &u->avl_left; \
200 else \
201 v = &u->avl_right; \
202 *stackptr++ = v; \
201 u = rcu_dereference_protected(*v, \
202 lockdep_is_held(&_base->lock)); \
203 u = rcu_deref_locked(*v, _base); \
203 } \
204 u; \
205})
206
207/*
204 } \
205 u; \
206})
207
208/*
208 * Called with rcu_read_lock_bh()
209 * Called with rcu_read_lock()
209 * Because we hold no lock against a writer, its quite possible we fall
210 * in an endless loop.
211 * But every pointer we follow is guaranteed to be valid thanks to RCU.
212 * We exit from this function if number of links exceeds PEER_MAXDEPTH
213 */
210 * Because we hold no lock against a writer, its quite possible we fall
211 * in an endless loop.
212 * But every pointer we follow is guaranteed to be valid thanks to RCU.
213 * We exit from this function if number of links exceeds PEER_MAXDEPTH
214 */
214static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
215 struct inet_peer_base *base)
215static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
216 struct inet_peer_base *base)
216{
217{
217 struct inet_peer *u = rcu_dereference_bh(base->root);
218 struct inet_peer *u = rcu_dereference(base->root);
218 int count = 0;
219
220 while (u != peer_avl_empty) {
221 int cmp = addr_compare(daddr, &u->daddr);
222 if (cmp == 0) {
223 /* Before taking a reference, check if this entry was
224 * deleted, unlink_from_pool() sets refcnt=-1 to make
225 * distinction between an unused entry (refcnt=0) and
226 * a freed one.
227 */
228 if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
229 u = NULL;
230 return u;
231 }
232 if (cmp == -1)
219 int count = 0;
220
221 while (u != peer_avl_empty) {
222 int cmp = addr_compare(daddr, &u->daddr);
223 if (cmp == 0) {
224 /* Before taking a reference, check if this entry was
225 * deleted, unlink_from_pool() sets refcnt=-1 to make
226 * distinction between an unused entry (refcnt=0) and
227 * a freed one.
228 */
229 if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
230 u = NULL;
231 return u;
232 }
233 if (cmp == -1)
233 u = rcu_dereference_bh(u->avl_left);
234 u = rcu_dereference(u->avl_left);
234 else
235 else
235 u = rcu_dereference_bh(u->avl_right);
236 u = rcu_dereference(u->avl_right);
236 if (unlikely(++count == PEER_MAXDEPTH))
237 break;
238 }
239 return NULL;
240}
241
242/* Called with local BH disabled and the pool lock held. */
243#define lookup_rightempty(start, base) \
244({ \
245 struct inet_peer *u; \
246 struct inet_peer __rcu **v; \
247 *stackptr++ = &start->avl_left; \
248 v = &start->avl_left; \
237 if (unlikely(++count == PEER_MAXDEPTH))
238 break;
239 }
240 return NULL;
241}
242
243/* Called with local BH disabled and the pool lock held. */
244#define lookup_rightempty(start, base) \
245({ \
246 struct inet_peer *u; \
247 struct inet_peer __rcu **v; \
248 *stackptr++ = &start->avl_left; \
249 v = &start->avl_left; \
249 for (u = rcu_dereference_protected(*v, \
250 lockdep_is_held(&base->lock)); \
250 for (u = rcu_deref_locked(*v, base); \
251 u->avl_right != peer_avl_empty_rcu; ) { \
252 v = &u->avl_right; \
253 *stackptr++ = v; \
251 u->avl_right != peer_avl_empty_rcu; ) { \
252 v = &u->avl_right; \
253 *stackptr++ = v; \
254 u = rcu_dereference_protected(*v, \
255 lockdep_is_held(&base->lock)); \
254 u = rcu_deref_locked(*v, base); \
256 } \
257 u; \
258})
259
260/* Called with local BH disabled and the pool lock held.
261 * Variable names are the proof of operation correctness.
262 * Look into mm/map_avl.c for more detail description of the ideas.
263 */
264static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
265 struct inet_peer __rcu ***stackend,
266 struct inet_peer_base *base)
267{
268 struct inet_peer __rcu **nodep;
269 struct inet_peer *node, *l, *r;
270 int lh, rh;
271
272 while (stackend > stack) {
273 nodep = *--stackend;
255 } \
256 u; \
257})
258
259/* Called with local BH disabled and the pool lock held.
260 * Variable names are the proof of operation correctness.
261 * Look into mm/map_avl.c for more detail description of the ideas.
262 */
263static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
264 struct inet_peer __rcu ***stackend,
265 struct inet_peer_base *base)
266{
267 struct inet_peer __rcu **nodep;
268 struct inet_peer *node, *l, *r;
269 int lh, rh;
270
271 while (stackend > stack) {
272 nodep = *--stackend;
274 node = rcu_dereference_protected(*nodep,
275 lockdep_is_held(&base->lock));
276 l = rcu_dereference_protected(node->avl_left,
277 lockdep_is_held(&base->lock));
278 r = rcu_dereference_protected(node->avl_right,
279 lockdep_is_held(&base->lock));
273 node = rcu_deref_locked(*nodep, base);
274 l = rcu_deref_locked(node->avl_left, base);
275 r = rcu_deref_locked(node->avl_right, base);
280 lh = node_height(l);
281 rh = node_height(r);
282 if (lh > rh + 1) { /* l: RH+2 */
283 struct inet_peer *ll, *lr, *lrl, *lrr;
284 int lrh;
276 lh = node_height(l);
277 rh = node_height(r);
278 if (lh > rh + 1) { /* l: RH+2 */
279 struct inet_peer *ll, *lr, *lrl, *lrr;
280 int lrh;
285 ll = rcu_dereference_protected(l->avl_left,
286 lockdep_is_held(&base->lock));
287 lr = rcu_dereference_protected(l->avl_right,
288 lockdep_is_held(&base->lock));
281 ll = rcu_deref_locked(l->avl_left, base);
282 lr = rcu_deref_locked(l->avl_right, base);
289 lrh = node_height(lr);
290 if (lrh <= node_height(ll)) { /* ll: RH+1 */
291 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
292 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
293 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
294 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
295 RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
296 l->avl_height = node->avl_height + 1;
297 RCU_INIT_POINTER(*nodep, l);
298 } else { /* ll: RH, lr: RH+1 */
283 lrh = node_height(lr);
284 if (lrh <= node_height(ll)) { /* ll: RH+1 */
285 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
286 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
287 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
288 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
289 RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
290 l->avl_height = node->avl_height + 1;
291 RCU_INIT_POINTER(*nodep, l);
292 } else { /* ll: RH, lr: RH+1 */
299 lrl = rcu_dereference_protected(lr->avl_left,
300 lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */
301 lrr = rcu_dereference_protected(lr->avl_right,
302 lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */
293 lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
294 lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
303 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
304 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
305 node->avl_height = rh + 1; /* node: RH+1 */
306 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
307 RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
308 l->avl_height = rh + 1; /* l: RH+1 */
309 RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
310 RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
311 lr->avl_height = rh + 2;
312 RCU_INIT_POINTER(*nodep, lr);
313 }
314 } else if (rh > lh + 1) { /* r: LH+2 */
315 struct inet_peer *rr, *rl, *rlr, *rll;
316 int rlh;
295 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
296 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
297 node->avl_height = rh + 1; /* node: RH+1 */
298 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
299 RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
300 l->avl_height = rh + 1; /* l: RH+1 */
301 RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
302 RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
303 lr->avl_height = rh + 2;
304 RCU_INIT_POINTER(*nodep, lr);
305 }
306 } else if (rh > lh + 1) { /* r: LH+2 */
307 struct inet_peer *rr, *rl, *rlr, *rll;
308 int rlh;
317 rr = rcu_dereference_protected(r->avl_right,
318 lockdep_is_held(&base->lock));
319 rl = rcu_dereference_protected(r->avl_left,
320 lockdep_is_held(&base->lock));
309 rr = rcu_deref_locked(r->avl_right, base);
310 rl = rcu_deref_locked(r->avl_left, base);
321 rlh = node_height(rl);
322 if (rlh <= node_height(rr)) { /* rr: LH+1 */
323 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
324 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
325 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
326 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
327 RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
328 r->avl_height = node->avl_height + 1;
329 RCU_INIT_POINTER(*nodep, r);
330 } else { /* rr: RH, rl: RH+1 */
311 rlh = node_height(rl);
312 if (rlh <= node_height(rr)) { /* rr: LH+1 */
313 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
314 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
315 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
316 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
317 RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
318 r->avl_height = node->avl_height + 1;
319 RCU_INIT_POINTER(*nodep, r);
320 } else { /* rr: RH, rl: RH+1 */
331 rlr = rcu_dereference_protected(rl->avl_right,
332 lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */
333 rll = rcu_dereference_protected(rl->avl_left,
334 lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */
321 rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
322 rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
335 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
336 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
337 node->avl_height = lh + 1; /* node: LH+1 */
338 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
339 RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
340 r->avl_height = lh + 1; /* r: LH+1 */
341 RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
342 RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */

--- 24 unchanged lines hidden (view full) ---

367
368/* May be called with local BH enabled. */
369static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
370{
371 int do_free;
372
373 do_free = 0;
374
323 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
324 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
325 node->avl_height = lh + 1; /* node: LH+1 */
326 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
327 RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
328 r->avl_height = lh + 1; /* r: LH+1 */
329 RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
330 RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */

--- 24 unchanged lines hidden (view full) ---

355
356/* May be called with local BH enabled. */
357static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
358{
359 int do_free;
360
361 do_free = 0;
362
375 spin_lock_bh(&base->lock);
363 write_seqlock_bh(&base->lock);
376 /* Check the reference counter. It was artificially incremented by 1
377 * in cleanup() function to prevent sudden disappearing. If we can
378 * atomically (because of lockless readers) take this last reference,
379 * it's safe to remove the node and free it later.
380 * We use refcnt=-1 to alert lockless readers this entry is deleted.
381 */
382 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
383 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
384 struct inet_peer __rcu ***stackptr, ***delp;
385 if (lookup(&p->daddr, stack, base) != p)
386 BUG();
387 delp = stackptr - 1; /* *delp[0] == p */
388 if (p->avl_left == peer_avl_empty_rcu) {
389 *delp[0] = p->avl_right;
390 --stackptr;
391 } else {
392 /* look for a node to insert instead of p */
393 struct inet_peer *t;
394 t = lookup_rightempty(p, base);
364 /* Check the reference counter. It was artificially incremented by 1
365 * in cleanup() function to prevent sudden disappearing. If we can
366 * atomically (because of lockless readers) take this last reference,
367 * it's safe to remove the node and free it later.
368 * We use refcnt=-1 to alert lockless readers this entry is deleted.
369 */
370 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
371 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
372 struct inet_peer __rcu ***stackptr, ***delp;
373 if (lookup(&p->daddr, stack, base) != p)
374 BUG();
375 delp = stackptr - 1; /* *delp[0] == p */
376 if (p->avl_left == peer_avl_empty_rcu) {
377 *delp[0] = p->avl_right;
378 --stackptr;
379 } else {
380 /* look for a node to insert instead of p */
381 struct inet_peer *t;
382 t = lookup_rightempty(p, base);
395 BUG_ON(rcu_dereference_protected(*stackptr[-1],
396 lockdep_is_held(&base->lock)) != t);
383 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
397 **--stackptr = t->avl_left;
398 /* t is removed, t->daddr > x->daddr for any
399 * x in p->avl_left subtree.
400 * Put t in the old place of p. */
401 RCU_INIT_POINTER(*delp[0], t);
402 t->avl_left = p->avl_left;
403 t->avl_right = p->avl_right;
404 t->avl_height = p->avl_height;
405 BUG_ON(delp[1] != &p->avl_left);
406 delp[1] = &t->avl_left; /* was &p->avl_left */
407 }
408 peer_avl_rebalance(stack, stackptr, base);
409 base->total--;
410 do_free = 1;
411 }
384 **--stackptr = t->avl_left;
385 /* t is removed, t->daddr > x->daddr for any
386 * x in p->avl_left subtree.
387 * Put t in the old place of p. */
388 RCU_INIT_POINTER(*delp[0], t);
389 t->avl_left = p->avl_left;
390 t->avl_right = p->avl_right;
391 t->avl_height = p->avl_height;
392 BUG_ON(delp[1] != &p->avl_left);
393 delp[1] = &t->avl_left; /* was &p->avl_left */
394 }
395 peer_avl_rebalance(stack, stackptr, base);
396 base->total--;
397 do_free = 1;
398 }
412 spin_unlock_bh(&base->lock);
399 write_sequnlock_bh(&base->lock);
413
414 if (do_free)
415 call_rcu_bh(&p->rcu, inetpeer_free_rcu);
416 else
417 /* The node is used again. Decrease the reference counter
418 * back. The loop "cleanup -> unlink_from_unused
419 * -> unlink_from_pool -> putpeer -> link_to_unused
420 * -> cleanup (for the same node)"

--- 51 unchanged lines hidden (view full) ---

472}
473
474/* Called with or without local BH being disabled. */
475struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
476{
477 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
478 struct inet_peer_base *base = family_to_base(daddr->family);
479 struct inet_peer *p;
400
401 if (do_free)
402 call_rcu_bh(&p->rcu, inetpeer_free_rcu);
403 else
404 /* The node is used again. Decrease the reference counter
405 * back. The loop "cleanup -> unlink_from_unused
406 * -> unlink_from_pool -> putpeer -> link_to_unused
407 * -> cleanup (for the same node)"

--- 51 unchanged lines hidden (view full) ---

459}
460
461/* Called with or without local BH being disabled. */
462struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
463{
464 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
465 struct inet_peer_base *base = family_to_base(daddr->family);
466 struct inet_peer *p;
467 unsigned int sequence;
468 int invalidated;
480
481 /* Look up for the address quickly, lockless.
482 * Because of a concurrent writer, we might not find an existing entry.
483 */
469
470 /* Look up for the address quickly, lockless.
471 * Because of a concurrent writer, we might not find an existing entry.
472 */
484 rcu_read_lock_bh();
485 p = lookup_rcu_bh(daddr, base);
486 rcu_read_unlock_bh();
473 rcu_read_lock();
474 sequence = read_seqbegin(&base->lock);
475 p = lookup_rcu(daddr, base);
476 invalidated = read_seqretry(&base->lock, sequence);
477 rcu_read_unlock();
487
488 if (p) {
489 /* The existing node has been found.
490 * Remove the entry from unused list if it was there.
491 */
492 unlink_from_unused(p);
493 return p;
494 }
495
478
479 if (p) {
480 /* The existing node has been found.
481 * Remove the entry from unused list if it was there.
482 */
483 unlink_from_unused(p);
484 return p;
485 }
486
487 /* If no writer did a change during our lookup, we can return early. */
488 if (!create && !invalidated)
489 return NULL;
490
496 /* retry an exact lookup, taking the lock before.
497 * At least, nodes should be hot in our cache.
498 */
491 /* retry an exact lookup, taking the lock before.
492 * At least, nodes should be hot in our cache.
493 */
499 spin_lock_bh(&base->lock);
494 write_seqlock_bh(&base->lock);
500 p = lookup(daddr, stack, base);
501 if (p != peer_avl_empty) {
502 atomic_inc(&p->refcnt);
495 p = lookup(daddr, stack, base);
496 if (p != peer_avl_empty) {
497 atomic_inc(&p->refcnt);
503 spin_unlock_bh(&base->lock);
498 write_sequnlock_bh(&base->lock);
504 /* Remove the entry from unused list if it was there. */
505 unlink_from_unused(p);
506 return p;
507 }
508 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
509 if (p) {
510 p->daddr = *daddr;
511 atomic_set(&p->refcnt, 1);
512 atomic_set(&p->rid, 0);
499 /* Remove the entry from unused list if it was there. */
500 unlink_from_unused(p);
501 return p;
502 }
503 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
504 if (p) {
505 p->daddr = *daddr;
506 atomic_set(&p->refcnt, 1);
507 atomic_set(&p->rid, 0);
513 atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
508 atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4));
514 p->tcp_ts_stamp = 0;
509 p->tcp_ts_stamp = 0;
510 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
511 p->rate_tokens = 0;
512 p->rate_last = 0;
513 p->pmtu_expires = 0;
514 memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
515 INIT_LIST_HEAD(&p->unused);
516
517
518 /* Link the node. */
519 link_to_pool(p, base);
520 base->total++;
521 }
515 INIT_LIST_HEAD(&p->unused);
516
517
518 /* Link the node. */
519 link_to_pool(p, base);
520 base->total++;
521 }
522 spin_unlock_bh(&base->lock);
522 write_sequnlock_bh(&base->lock);
523
524 if (base->total >= inet_peer_threshold)
525 /* Remove one less-recently-used entry. */
526 cleanup_once(0);
527
528 return p;
529}
530

--- 43 unchanged lines hidden (view full) ---

574 list_add_tail(&p->unused, &unused_peers.list);
575 p->dtime = (__u32)jiffies;
576 spin_unlock(&unused_peers.lock);
577 }
578
579 local_bh_enable();
580}
581EXPORT_SYMBOL_GPL(inet_putpeer);
523
524 if (base->total >= inet_peer_threshold)
525 /* Remove one less-recently-used entry. */
526 cleanup_once(0);
527
528 return p;
529}
530

--- 43 unchanged lines hidden (view full) ---

574 list_add_tail(&p->unused, &unused_peers.list);
575 p->dtime = (__u32)jiffies;
576 spin_unlock(&unused_peers.lock);
577 }
578
579 local_bh_enable();
580}
581EXPORT_SYMBOL_GPL(inet_putpeer);
582
583/*
584 * Check transmit rate limitation for given message.
585 * The rate information is held in the inet_peer entries now.
586 * This function is generic and could be used for other purposes
587 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
588 *
589 * Note that the same inet_peer fields are modified by functions in
590 * route.c too, but these work for packet destinations while xrlim_allow
591 * works for icmp destinations. This means the rate limiting information
592 * for one "ip object" is shared - and these ICMPs are twice limited:
593 * by source and by destination.
594 *
595 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
596 * SHOULD allow setting of rate limits
597 *
598 * Shared between ICMPv4 and ICMPv6.
599 */
600#define XRLIM_BURST_FACTOR 6
601bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
602{
603 unsigned long now, token;
604 bool rc = false;
605
606 if (!peer)
607 return true;
608
609 token = peer->rate_tokens;
610 now = jiffies;
611 token += now - peer->rate_last;
612 peer->rate_last = now;
613 if (token > XRLIM_BURST_FACTOR * timeout)
614 token = XRLIM_BURST_FACTOR * timeout;
615 if (token >= timeout) {
616 token -= timeout;
617 rc = true;
618 }
619 peer->rate_tokens = token;
620 return rc;
621}
622EXPORT_SYMBOL(inet_peer_xrlim_allow);