ip6_fib.c (7cc482634f1f1e1db5401007658c8e8d6cf1617d) ip6_fib.c (c376222960ae91d5ffb9197ee36771aaed1d9f90)
1/*
1/*
2 * Linux INET6 implementation
2 * Linux INET6 implementation
3 * Forwarding Information Database
4 *
5 * Authors:
3 * Forwarding Information Database
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */

--- 77 unchanged lines hidden (view full) ---

92 */
93
94static __u32 rt_sernum;
95
96static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
97
98static struct fib6_walker_t fib6_walker_list = {
99 .prev = &fib6_walker_list,
7 *
8 * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */

--- 77 unchanged lines hidden (view full) ---

92 */
93
94static __u32 rt_sernum;
95
96static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
97
98static struct fib6_walker_t fib6_walker_list = {
99 .prev = &fib6_walker_list,
100 .next = &fib6_walker_list,
100 .next = &fib6_walker_list,
101};
102
103#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
104
105static inline void fib6_walker_link(struct fib6_walker_t *w)
106{
107 write_lock_bh(&fib6_walker_lock);
108 w->next = fib6_walker_list.next;

--- 17 unchanged lines hidden (view full) ---

126 if ((__s32)n <= 0)
127 rt_sernum = n = 1;
128 return n;
129}
130
131/*
132 * Auxiliary address test functions for the radix tree.
133 *
101};
102
103#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
104
105static inline void fib6_walker_link(struct fib6_walker_t *w)
106{
107 write_lock_bh(&fib6_walker_lock);
108 w->next = fib6_walker_list.next;

--- 17 unchanged lines hidden (view full) ---

126 if ((__s32)n <= 0)
127 rt_sernum = n = 1;
128 return n;
129}
130
131/*
132 * Auxiliary address test functions for the radix tree.
133 *
134 * These assume a 32bit processor (although it will work on
134 * These assume a 32bit processor (although it will work on
135 * 64bit processors)
136 */
137
138/*
139 * test bit
140 */
141
142static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
143{
144 __be32 *addr = token;
145
146 return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
147}
148
149static __inline__ struct fib6_node * node_alloc(void)
150{
151 struct fib6_node *fn;
152
135 * 64bit processors)
136 */
137
138/*
139 * test bit
140 */
141
142static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
143{
144 __be32 *addr = token;
145
146 return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
147}
148
149static __inline__ struct fib6_node * node_alloc(void)
150{
151 struct fib6_node *fn;
152
153 if ((fn = kmem_cache_alloc(fib6_node_kmem, GFP_ATOMIC)) != NULL)
154 memset(fn, 0, sizeof(struct fib6_node));
153 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
155
156 return fn;
157}
158
159static __inline__ void node_free(struct fib6_node * fn)
160{
161 kmem_cache_free(fib6_node_kmem, fn);
162}

--- 130 unchanged lines hidden (view full) ---

293
294#endif
295
296static int fib6_dump_node(struct fib6_walker_t *w)
297{
298 int res;
299 struct rt6_info *rt;
300
154
155 return fn;
156}
157
158static __inline__ void node_free(struct fib6_node * fn)
159{
160 kmem_cache_free(fib6_node_kmem, fn);
161}

--- 130 unchanged lines hidden (view full) ---

292
293#endif
294
295static int fib6_dump_node(struct fib6_walker_t *w)
296{
297 int res;
298 struct rt6_info *rt;
299
301 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
300 for (rt = w->leaf; rt; rt = rt->u.next) {
302 res = rt6_dump_route(rt, w->args);
303 if (res < 0) {
304 /* Frame is full, suspend walking */
305 w->leaf = rt;
306 return 1;
307 }
308 BUG_TRAP(res!=0);
309 }

--- 119 unchanged lines hidden (view full) ---

429static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
430 int addrlen, int plen,
431 int offset)
432{
433 struct fib6_node *fn, *in, *ln;
434 struct fib6_node *pn = NULL;
435 struct rt6key *key;
436 int bit;
301 res = rt6_dump_route(rt, w->args);
302 if (res < 0) {
303 /* Frame is full, suspend walking */
304 w->leaf = rt;
305 return 1;
306 }
307 BUG_TRAP(res!=0);
308 }

--- 119 unchanged lines hidden (view full) ---

428static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
429 int addrlen, int plen,
430 int offset)
431{
432 struct fib6_node *fn, *in, *ln;
433 struct fib6_node *pn = NULL;
434 struct rt6key *key;
435 int bit;
437 __be32 dir = 0;
436 __be32 dir = 0;
438 __u32 sernum = fib6_new_sernum();
439
440 RT6_TRACE("fib6_add_1\n");
441
442 /* insert node in tree */
443
444 fn = root;
445
446 do {
447 key = (struct rt6key *)((u8 *)fn->leaf + offset);
448
449 /*
450 * Prefix match
451 */
452 if (plen < fn->fn_bit ||
453 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
454 goto insert_above;
437 __u32 sernum = fib6_new_sernum();
438
439 RT6_TRACE("fib6_add_1\n");
440
441 /* insert node in tree */
442
443 fn = root;
444
445 do {
446 key = (struct rt6key *)((u8 *)fn->leaf + offset);
447
448 /*
449 * Prefix match
450 */
451 if (plen < fn->fn_bit ||
452 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
453 goto insert_above;
455
454
456 /*
457 * Exact match ?
458 */
455 /*
456 * Exact match ?
457 */
459
458
460 if (plen == fn->fn_bit) {
461 /* clean up an intermediate node */
462 if ((fn->fn_flags & RTN_RTINFO) == 0) {
463 rt6_release(fn->leaf);
464 fn->leaf = NULL;
465 }
459 if (plen == fn->fn_bit) {
460 /* clean up an intermediate node */
461 if ((fn->fn_flags & RTN_RTINFO) == 0) {
462 rt6_release(fn->leaf);
463 fn->leaf = NULL;
464 }
466
465
467 fn->fn_sernum = sernum;
466 fn->fn_sernum = sernum;
468
467
469 return fn;
470 }
471
472 /*
473 * We have more bits to go
474 */
468 return fn;
469 }
470
471 /*
472 * We have more bits to go
473 */
475
474
476 /* Try to walk down on tree. */
477 fn->fn_sernum = sernum;
478 dir = addr_bit_set(addr, fn->fn_bit);
479 pn = fn;
480 fn = dir ? fn->right: fn->left;
481 } while (fn);
482
483 /*
484 * We walked to the bottom of tree.
485 * Create new leaf node without children.
486 */
487
488 ln = node_alloc();
489
490 if (ln == NULL)
491 return NULL;
492 ln->fn_bit = plen;
475 /* Try to walk down on tree. */
476 fn->fn_sernum = sernum;
477 dir = addr_bit_set(addr, fn->fn_bit);
478 pn = fn;
479 fn = dir ? fn->right: fn->left;
480 } while (fn);
481
482 /*
483 * We walked to the bottom of tree.
484 * Create new leaf node without children.
485 */
486
487 ln = node_alloc();
488
489 if (ln == NULL)
490 return NULL;
491 ln->fn_bit = plen;
493
492
494 ln->parent = pn;
495 ln->fn_sernum = sernum;
496
497 if (dir)
498 pn->right = ln;
499 else
500 pn->left = ln;
501
502 return ln;
503
504
505insert_above:
506 /*
493 ln->parent = pn;
494 ln->fn_sernum = sernum;
495
496 if (dir)
497 pn->right = ln;
498 else
499 pn->left = ln;
500
501 return ln;
502
503
504insert_above:
505 /*
507 * split since we don't have a common prefix anymore or
506 * split since we don't have a common prefix anymore or
508 * we have a less significant route.
509 * we've to insert an intermediate node on the list
510 * this new node will point to the one we need to create
511 * and the current
512 */
513
514 pn = fn->parent;
515
516 /* find 1st bit in difference between the 2 addrs.
517
518 See comment in __ipv6_addr_diff: bit may be an invalid value,
519 but if it is >= plen, the value is ignored in any case.
520 */
507 * we have a less significant route.
508 * we've to insert an intermediate node on the list
509 * this new node will point to the one we need to create
510 * and the current
511 */
512
513 pn = fn->parent;
514
515 /* find 1st bit in difference between the 2 addrs.
516
517 See comment in __ipv6_addr_diff: bit may be an invalid value,
518 but if it is >= plen, the value is ignored in any case.
519 */
521
520
522 bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
523
521 bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
522
524 /*
525 * (intermediate)[in]
523 /*
524 * (intermediate)[in]
526 * / \
527 * (new leaf node)[ln] (old node)[fn]
528 */
529 if (plen > bit) {
530 in = node_alloc();
531 ln = node_alloc();
525 * / \
526 * (new leaf node)[ln] (old node)[fn]
527 */
528 if (plen > bit) {
529 in = node_alloc();
530 ln = node_alloc();
532
531
533 if (in == NULL || ln == NULL) {
534 if (in)
535 node_free(in);
536 if (ln)
537 node_free(ln);
538 return NULL;
539 }
540
532 if (in == NULL || ln == NULL) {
533 if (in)
534 node_free(in);
535 if (ln)
536 node_free(ln);
537 return NULL;
538 }
539
541 /*
542 * new intermediate node.
540 /*
541 * new intermediate node.
543 * RTN_RTINFO will
544 * be off since that an address that chooses one of
545 * the branches would not match less specific routes
546 * in the other branch
547 */
548
549 in->fn_bit = bit;
550

--- 20 unchanged lines hidden (view full) ---

571 in->right = ln;
572 in->left = fn;
573 } else {
574 in->left = ln;
575 in->right = fn;
576 }
577 } else { /* plen <= bit */
578
542 * RTN_RTINFO will
543 * be off since that an address that chooses one of
544 * the branches would not match less specific routes
545 * in the other branch
546 */
547
548 in->fn_bit = bit;
549

--- 20 unchanged lines hidden (view full) ---

570 in->right = ln;
571 in->left = fn;
572 } else {
573 in->left = ln;
574 in->right = fn;
575 }
576 } else { /* plen <= bit */
577
579 /*
578 /*
580 * (new leaf node)[ln]
581 * / \
582 * (old node)[fn] NULL
583 */
584
585 ln = node_alloc();
586
587 if (ln == NULL)
588 return NULL;
589
590 ln->fn_bit = plen;
591
592 ln->parent = pn;
593
594 ln->fn_sernum = sernum;
579 * (new leaf node)[ln]
580 * / \
581 * (old node)[fn] NULL
582 */
583
584 ln = node_alloc();
585
586 if (ln == NULL)
587 return NULL;
588
589 ln->fn_bit = plen;
590
591 ln->parent = pn;
592
593 ln->fn_sernum = sernum;
595
594
596 if (dir)
597 pn->right = ln;
598 else
599 pn->left = ln;
600
601 if (addr_bit_set(&key->addr, plen))
602 ln->right = fn;
603 else

--- 15 unchanged lines hidden (view full) ---

619 struct rt6_info **ins;
620
621 ins = &fn->leaf;
622
623 if (fn->fn_flags&RTN_TL_ROOT &&
624 fn->leaf == &ip6_null_entry &&
625 !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
626 fn->leaf = rt;
595 if (dir)
596 pn->right = ln;
597 else
598 pn->left = ln;
599
600 if (addr_bit_set(&key->addr, plen))
601 ln->right = fn;
602 else

--- 15 unchanged lines hidden (view full) ---

618 struct rt6_info **ins;
619
620 ins = &fn->leaf;
621
622 if (fn->fn_flags&RTN_TL_ROOT &&
623 fn->leaf == &ip6_null_entry &&
624 !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
625 fn->leaf = rt;
627 rt->u.dst.rt6_next = NULL;
626 rt->u.next = NULL;
628 goto out;
629 }
630
627 goto out;
628 }
629
631 for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) {
630 for (iter = fn->leaf; iter; iter=iter->u.next) {
632 /*
633 * Search for duplicates
634 */
635
636 if (iter->rt6i_metric == rt->rt6i_metric) {
637 /*
638 * Same priority level
639 */

--- 11 unchanged lines hidden (view full) ---

651 }
652 return -EEXIST;
653 }
654 }
655
656 if (iter->rt6i_metric > rt->rt6i_metric)
657 break;
658
631 /*
632 * Search for duplicates
633 */
634
635 if (iter->rt6i_metric == rt->rt6i_metric) {
636 /*
637 * Same priority level
638 */

--- 11 unchanged lines hidden (view full) ---

650 }
651 return -EEXIST;
652 }
653 }
654
655 if (iter->rt6i_metric > rt->rt6i_metric)
656 break;
657
659 ins = &iter->u.dst.rt6_next;
658 ins = &iter->u.next;
660 }
661
662 /*
663 * insert node
664 */
665
666out:
659 }
660
661 /*
662 * insert node
663 */
664
665out:
667 rt->u.dst.rt6_next = iter;
666 rt->u.next = iter;
668 *ins = rt;
669 rt->rt6i_node = fn;
670 atomic_inc(&rt->rt6i_ref);
671 inet6_rt_notify(RTM_NEWROUTE, rt, info);
672 rt6_stats.fib_rt_entries++;
673
674 if ((fn->fn_flags & RTN_RTINFO) == 0) {
675 rt6_stats.fib_route_nodes++;

--- 424 unchanged lines hidden (view full) ---

1100 struct nl_info *info)
1101{
1102 struct fib6_walker_t *w;
1103 struct rt6_info *rt = *rtp;
1104
1105 RT6_TRACE("fib6_del_route\n");
1106
1107 /* Unlink it */
667 *ins = rt;
668 rt->rt6i_node = fn;
669 atomic_inc(&rt->rt6i_ref);
670 inet6_rt_notify(RTM_NEWROUTE, rt, info);
671 rt6_stats.fib_rt_entries++;
672
673 if ((fn->fn_flags & RTN_RTINFO) == 0) {
674 rt6_stats.fib_route_nodes++;

--- 424 unchanged lines hidden (view full) ---

1099 struct nl_info *info)
1100{
1101 struct fib6_walker_t *w;
1102 struct rt6_info *rt = *rtp;
1103
1104 RT6_TRACE("fib6_del_route\n");
1105
1106 /* Unlink it */
1108 *rtp = rt->u.dst.rt6_next;
1107 *rtp = rt->u.next;
1109 rt->rt6i_node = NULL;
1110 rt6_stats.fib_rt_entries--;
1111 rt6_stats.fib_discarded_routes++;
1112
1113 /* Adjust walkers */
1114 read_lock(&fib6_walker_lock);
1115 FOR_WALKERS(w) {
1116 if (w->state == FWS_C && w->leaf == rt) {
1117 RT6_TRACE("walker %p adjusted by delroute\n", w);
1108 rt->rt6i_node = NULL;
1109 rt6_stats.fib_rt_entries--;
1110 rt6_stats.fib_discarded_routes++;
1111
1112 /* Adjust walkers */
1113 read_lock(&fib6_walker_lock);
1114 FOR_WALKERS(w) {
1115 if (w->state == FWS_C && w->leaf == rt) {
1116 RT6_TRACE("walker %p adjusted by delroute\n", w);
1118 w->leaf = rt->u.dst.rt6_next;
1117 w->leaf = rt->u.next;
1119 if (w->leaf == NULL)
1120 w->state = FWS_U;
1121 }
1122 }
1123 read_unlock(&fib6_walker_lock);
1124
1118 if (w->leaf == NULL)
1119 w->state = FWS_U;
1120 }
1121 }
1122 read_unlock(&fib6_walker_lock);
1123
1125 rt->u.dst.rt6_next = NULL;
1124 rt->u.next = NULL;
1126
1127 if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
1128 fn->leaf = &ip6_null_entry;
1129
1130 /* If it was last route, expunge its radix tree node */
1131 if (fn->leaf == NULL) {
1132 fn->fn_flags &= ~RTN_RTINFO;
1133 rt6_stats.fib_route_nodes--;

--- 51 unchanged lines hidden (view full) ---

1185#endif
1186 fib6_prune_clones(pn, rt);
1187 }
1188
1189 /*
1190 * Walk the leaf entries looking for ourself
1191 */
1192
1125
1126 if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
1127 fn->leaf = &ip6_null_entry;
1128
1129 /* If it was last route, expunge its radix tree node */
1130 if (fn->leaf == NULL) {
1131 fn->fn_flags &= ~RTN_RTINFO;
1132 rt6_stats.fib_route_nodes--;

--- 51 unchanged lines hidden (view full) ---

1184#endif
1185 fib6_prune_clones(pn, rt);
1186 }
1187
1188 /*
1189 * Walk the leaf entries looking for ourself
1190 */
1191
1193 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) {
1192 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
1194 if (*rtp == rt) {
1195 fib6_del_route(fn, rtp, info);
1196 return 0;
1197 }
1198 }
1199 return -ENOENT;
1200}
1201
1202/*
1203 * Tree traversal function.
1204 *
1205 * Certainly, it is not interrupt safe.
1206 * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
1207 * It means, that we can modify tree during walking
1208 * and use this function for garbage collection, clone pruning,
1193 if (*rtp == rt) {
1194 fib6_del_route(fn, rtp, info);
1195 return 0;
1196 }
1197 }
1198 return -ENOENT;
1199}
1200
1201/*
1202 * Tree traversal function.
1203 *
1204 * Certainly, it is not interrupt safe.
1205 * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
1206 * It means, that we can modify tree during walking
1207 * and use this function for garbage collection, clone pruning,
1209 * cleaning tree when a device goes down etc. etc.
1208 * cleaning tree when a device goes down etc. etc.
1210 *
1211 * It guarantees that every node will be traversed,
1212 * and that it will be traversed only once.
1213 *
1214 * Callback function w->func may return:
1215 * 0 -> continue walking.
1216 * positive value -> walking is suspended (used by tree dumps,
1217 * and probably by gc, if it will be split to several slices)

--- 22 unchanged lines hidden (view full) ---

1240 switch (w->state) {
1241#ifdef CONFIG_IPV6_SUBTREES
1242 case FWS_S:
1243 if (FIB6_SUBTREE(fn)) {
1244 w->node = FIB6_SUBTREE(fn);
1245 continue;
1246 }
1247 w->state = FWS_L;
1209 *
1210 * It guarantees that every node will be traversed,
1211 * and that it will be traversed only once.
1212 *
1213 * Callback function w->func may return:
1214 * 0 -> continue walking.
1215 * positive value -> walking is suspended (used by tree dumps,
1216 * and probably by gc, if it will be split to several slices)

--- 22 unchanged lines hidden (view full) ---

1239 switch (w->state) {
1240#ifdef CONFIG_IPV6_SUBTREES
1241 case FWS_S:
1242 if (FIB6_SUBTREE(fn)) {
1243 w->node = FIB6_SUBTREE(fn);
1244 continue;
1245 }
1246 w->state = FWS_L;
1248#endif
1247#endif
1249 case FWS_L:
1250 if (fn->left) {
1251 w->node = fn->left;
1252 w->state = FWS_INIT;
1253 continue;
1254 }
1255 w->state = FWS_R;
1256 case FWS_R:

--- 55 unchanged lines hidden (view full) ---

1312}
1313
1314static int fib6_clean_node(struct fib6_walker_t *w)
1315{
1316 int res;
1317 struct rt6_info *rt;
1318 struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
1319
1248 case FWS_L:
1249 if (fn->left) {
1250 w->node = fn->left;
1251 w->state = FWS_INIT;
1252 continue;
1253 }
1254 w->state = FWS_R;
1255 case FWS_R:

--- 55 unchanged lines hidden (view full) ---

1311}
1312
1313static int fib6_clean_node(struct fib6_walker_t *w)
1314{
1315 int res;
1316 struct rt6_info *rt;
1317 struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
1318
1320 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
1319 for (rt = w->leaf; rt; rt = rt->u.next) {
1321 res = c->func(rt, c->arg);
1322 if (res < 0) {
1323 w->leaf = rt;
1324 res = fib6_del(rt, NULL);
1325 if (res) {
1326#if RT6_DEBUG >= 2
1327 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
1328#endif

--- 4 unchanged lines hidden (view full) ---

1333 BUG_TRAP(res==0);
1334 }
1335 w->leaf = rt;
1336 return 0;
1337}
1338
1339/*
1340 * Convenient frontend to tree walker.
1320 res = c->func(rt, c->arg);
1321 if (res < 0) {
1322 w->leaf = rt;
1323 res = fib6_del(rt, NULL);
1324 if (res) {
1325#if RT6_DEBUG >= 2
1326 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
1327#endif

--- 4 unchanged lines hidden (view full) ---

1332 BUG_TRAP(res==0);
1333 }
1334 w->leaf = rt;
1335 return 0;
1336}
1337
1338/*
1339 * Convenient frontend to tree walker.
1341 *
1340 *
1342 * func is called on each route.
1343 * It may return -1 -> delete this route.
1344 * 0 -> continue walking
1345 *
1346 * prune==1 -> only immediate children of node (certainly,
1347 * ignoring pure split nodes) will be scanned.
1348 */
1349

--- 139 unchanged lines hidden ---
1341 * func is called on each route.
1342 * It may return -1 -> delete this route.
1343 * 0 -> continue walking
1344 *
1345 * prune==1 -> only immediate children of node (certainly,
1346 * ignoring pure split nodes) will be scanned.
1347 */
1348

--- 139 unchanged lines hidden ---