xref: /openbmc/linux/net/ipv4/fib_trie.c (revision b59cfbf7)
119baf839SRobert Olsson /*
219baf839SRobert Olsson  *   This program is free software; you can redistribute it and/or
319baf839SRobert Olsson  *   modify it under the terms of the GNU General Public License
419baf839SRobert Olsson  *   as published by the Free Software Foundation; either version
519baf839SRobert Olsson  *   2 of the License, or (at your option) any later version.
619baf839SRobert Olsson  *
719baf839SRobert Olsson  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
819baf839SRobert Olsson  *     & Swedish University of Agricultural Sciences.
919baf839SRobert Olsson  *
1019baf839SRobert Olsson  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
1119baf839SRobert Olsson  *     Agricultural Sciences.
1219baf839SRobert Olsson  *
1319baf839SRobert Olsson  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
1419baf839SRobert Olsson  *
1519baf839SRobert Olsson  * This work is based on the LPC-trie which is originally descibed in:
1619baf839SRobert Olsson  *
1719baf839SRobert Olsson  * An experimental study of compression methods for dynamic tries
1819baf839SRobert Olsson  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
1919baf839SRobert Olsson  * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
2019baf839SRobert Olsson  *
2119baf839SRobert Olsson  *
2219baf839SRobert Olsson  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
2319baf839SRobert Olsson  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2419baf839SRobert Olsson  *
2519baf839SRobert Olsson  * Version:	$Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
2619baf839SRobert Olsson  *
2719baf839SRobert Olsson  *
2819baf839SRobert Olsson  * Code from fib_hash has been reused which includes the following header:
2919baf839SRobert Olsson  *
3019baf839SRobert Olsson  *
3119baf839SRobert Olsson  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3219baf839SRobert Olsson  *		operating system.  INET is implemented using the  BSD Socket
3319baf839SRobert Olsson  *		interface as the means of communication with the user level.
3419baf839SRobert Olsson  *
3519baf839SRobert Olsson  *		IPv4 FIB: lookup engine and maintenance routines.
3619baf839SRobert Olsson  *
3719baf839SRobert Olsson  *
3819baf839SRobert Olsson  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
3919baf839SRobert Olsson  *
4019baf839SRobert Olsson  *		This program is free software; you can redistribute it and/or
4119baf839SRobert Olsson  *		modify it under the terms of the GNU General Public License
4219baf839SRobert Olsson  *		as published by the Free Software Foundation; either version
4319baf839SRobert Olsson  *		2 of the License, or (at your option) any later version.
44fd966255SRobert Olsson  *
45fd966255SRobert Olsson  * Substantial contributions to this work comes from:
46fd966255SRobert Olsson  *
47fd966255SRobert Olsson  *		David S. Miller, <davem@davemloft.net>
48fd966255SRobert Olsson  *		Stephen Hemminger <shemminger@osdl.org>
49fd966255SRobert Olsson  *		Paul E. McKenney <paulmck@us.ibm.com>
50fd966255SRobert Olsson  *		Patrick McHardy <kaber@trash.net>
5119baf839SRobert Olsson  */
5219baf839SRobert Olsson 
5305eee48cSRobert Olsson #define VERSION "0.408"
5419baf839SRobert Olsson 
5519baf839SRobert Olsson #include <asm/uaccess.h>
5619baf839SRobert Olsson #include <asm/system.h>
571977f032SJiri Slaby #include <linux/bitops.h>
5819baf839SRobert Olsson #include <linux/types.h>
5919baf839SRobert Olsson #include <linux/kernel.h>
6019baf839SRobert Olsson #include <linux/mm.h>
6119baf839SRobert Olsson #include <linux/string.h>
6219baf839SRobert Olsson #include <linux/socket.h>
6319baf839SRobert Olsson #include <linux/sockios.h>
6419baf839SRobert Olsson #include <linux/errno.h>
6519baf839SRobert Olsson #include <linux/in.h>
6619baf839SRobert Olsson #include <linux/inet.h>
67cd8787abSStephen Hemminger #include <linux/inetdevice.h>
6819baf839SRobert Olsson #include <linux/netdevice.h>
6919baf839SRobert Olsson #include <linux/if_arp.h>
7019baf839SRobert Olsson #include <linux/proc_fs.h>
712373ce1cSRobert Olsson #include <linux/rcupdate.h>
7219baf839SRobert Olsson #include <linux/skbuff.h>
7319baf839SRobert Olsson #include <linux/netlink.h>
7419baf839SRobert Olsson #include <linux/init.h>
7519baf839SRobert Olsson #include <linux/list.h>
76457c4cbcSEric W. Biederman #include <net/net_namespace.h>
7719baf839SRobert Olsson #include <net/ip.h>
7819baf839SRobert Olsson #include <net/protocol.h>
7919baf839SRobert Olsson #include <net/route.h>
8019baf839SRobert Olsson #include <net/tcp.h>
8119baf839SRobert Olsson #include <net/sock.h>
8219baf839SRobert Olsson #include <net/ip_fib.h>
8319baf839SRobert Olsson #include "fib_lookup.h"
8419baf839SRobert Olsson 
8506ef921dSRobert Olsson #define MAX_STAT_DEPTH 32
8619baf839SRobert Olsson 
8719baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key))
8819baf839SRobert Olsson 
8919baf839SRobert Olsson typedef unsigned int t_key;
9019baf839SRobert Olsson 
9119baf839SRobert Olsson #define T_TNODE 0
9219baf839SRobert Olsson #define T_LEAF  1
9319baf839SRobert Olsson #define NODE_TYPE_MASK	0x1UL
942373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
952373ce1cSRobert Olsson 
9691b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF))
9791b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF)
9819baf839SRobert Olsson 
9919baf839SRobert Olsson struct node {
10091b9a277SOlof Johansson 	unsigned long parent;
1018d965444SEric Dumazet 	t_key key;
10219baf839SRobert Olsson };
10319baf839SRobert Olsson 
10419baf839SRobert Olsson struct leaf {
10591b9a277SOlof Johansson 	unsigned long parent;
1068d965444SEric Dumazet 	t_key key;
10719baf839SRobert Olsson 	struct hlist_head list;
1082373ce1cSRobert Olsson 	struct rcu_head rcu;
10919baf839SRobert Olsson };
11019baf839SRobert Olsson 
11119baf839SRobert Olsson struct leaf_info {
11219baf839SRobert Olsson 	struct hlist_node hlist;
1132373ce1cSRobert Olsson 	struct rcu_head rcu;
11419baf839SRobert Olsson 	int plen;
11519baf839SRobert Olsson 	struct list_head falh;
11619baf839SRobert Olsson };
11719baf839SRobert Olsson 
11819baf839SRobert Olsson struct tnode {
11991b9a277SOlof Johansson 	unsigned long parent;
1208d965444SEric Dumazet 	t_key key;
121112d8cfcSEric Dumazet 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
122112d8cfcSEric Dumazet 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
1238d965444SEric Dumazet 	unsigned int full_children;	/* KEYLENGTH bits needed */
1248d965444SEric Dumazet 	unsigned int empty_children;	/* KEYLENGTH bits needed */
1252373ce1cSRobert Olsson 	struct rcu_head rcu;
12619baf839SRobert Olsson 	struct node *child[0];
12719baf839SRobert Olsson };
12819baf839SRobert Olsson 
12919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
13019baf839SRobert Olsson struct trie_use_stats {
13119baf839SRobert Olsson 	unsigned int gets;
13219baf839SRobert Olsson 	unsigned int backtrack;
13319baf839SRobert Olsson 	unsigned int semantic_match_passed;
13419baf839SRobert Olsson 	unsigned int semantic_match_miss;
13519baf839SRobert Olsson 	unsigned int null_node_hit;
1362f36895aSRobert Olsson 	unsigned int resize_node_skipped;
13719baf839SRobert Olsson };
13819baf839SRobert Olsson #endif
13919baf839SRobert Olsson 
14019baf839SRobert Olsson struct trie_stat {
14119baf839SRobert Olsson 	unsigned int totdepth;
14219baf839SRobert Olsson 	unsigned int maxdepth;
14319baf839SRobert Olsson 	unsigned int tnodes;
14419baf839SRobert Olsson 	unsigned int leaves;
14519baf839SRobert Olsson 	unsigned int nullpointers;
14606ef921dSRobert Olsson 	unsigned int nodesizes[MAX_STAT_DEPTH];
14719baf839SRobert Olsson };
14819baf839SRobert Olsson 
14919baf839SRobert Olsson struct trie {
15019baf839SRobert Olsson 	struct node *trie;
151d717a9a6SStephen Hemminger 	unsigned int size;
15219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15319baf839SRobert Olsson 	struct trie_use_stats stats;
15419baf839SRobert Olsson #endif
15519baf839SRobert Olsson };
15619baf839SRobert Olsson 
15719baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
15819baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
15919baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn);
1602f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn);
1612f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn);
16219baf839SRobert Olsson static void tnode_free(struct tnode *tn);
16319baf839SRobert Olsson 
164e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly;
16519baf839SRobert Olsson 
16606801916SStephen Hemminger static inline struct tnode *node_parent(struct node *node)
16706801916SStephen Hemminger {
168b59cfbf7SEric Dumazet 	return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
169b59cfbf7SEric Dumazet }
17006801916SStephen Hemminger 
171b59cfbf7SEric Dumazet static inline struct tnode *node_parent_rcu(struct node *node)
172b59cfbf7SEric Dumazet {
173b59cfbf7SEric Dumazet 	struct tnode *ret = node_parent(node);
174b59cfbf7SEric Dumazet 
17506801916SStephen Hemminger 	return rcu_dereference(ret);
17606801916SStephen Hemminger }
17706801916SStephen Hemminger 
17806801916SStephen Hemminger static inline void node_set_parent(struct node *node, struct tnode *ptr)
17906801916SStephen Hemminger {
18006801916SStephen Hemminger 	rcu_assign_pointer(node->parent,
18106801916SStephen Hemminger 			   (unsigned long)ptr | NODE_TYPE(node));
18206801916SStephen Hemminger }
1832373ce1cSRobert Olsson 
184b59cfbf7SEric Dumazet static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
18519baf839SRobert Olsson {
186b59cfbf7SEric Dumazet 	BUG_ON(i >= 1U << tn->bits);
18719baf839SRobert Olsson 
188b59cfbf7SEric Dumazet 	return tn->child[i];
189b59cfbf7SEric Dumazet }
190b59cfbf7SEric Dumazet 
191b59cfbf7SEric Dumazet static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
192b59cfbf7SEric Dumazet {
193b59cfbf7SEric Dumazet 	struct node *ret = tnode_get_child(tn, i);
194b59cfbf7SEric Dumazet 
195b59cfbf7SEric Dumazet 	return rcu_dereference(ret);
19619baf839SRobert Olsson }
19719baf839SRobert Olsson 
198bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn)
19919baf839SRobert Olsson {
20019baf839SRobert Olsson 	return 1 << tn->bits;
20119baf839SRobert Olsson }
20219baf839SRobert Olsson 
203ab66b4a7SStephen Hemminger static inline t_key mask_pfx(t_key k, unsigned short l)
204ab66b4a7SStephen Hemminger {
205ab66b4a7SStephen Hemminger 	return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
206ab66b4a7SStephen Hemminger }
207ab66b4a7SStephen Hemminger 
20819baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
20919baf839SRobert Olsson {
21019baf839SRobert Olsson 	if (offset < KEYLENGTH)
21119baf839SRobert Olsson 		return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
21219baf839SRobert Olsson 	else
21319baf839SRobert Olsson 		return 0;
21419baf839SRobert Olsson }
21519baf839SRobert Olsson 
21619baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b)
21719baf839SRobert Olsson {
21819baf839SRobert Olsson 	return a == b;
21919baf839SRobert Olsson }
22019baf839SRobert Olsson 
22119baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
22219baf839SRobert Olsson {
22319baf839SRobert Olsson 	if (bits == 0 || offset >= KEYLENGTH)
22419baf839SRobert Olsson 		return 1;
22519baf839SRobert Olsson 	bits = bits > KEYLENGTH ? KEYLENGTH : bits;
22619baf839SRobert Olsson 	return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
22719baf839SRobert Olsson }
22819baf839SRobert Olsson 
22919baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b)
23019baf839SRobert Olsson {
23119baf839SRobert Olsson 	t_key diff = a ^ b;
23219baf839SRobert Olsson 	int i = offset;
23319baf839SRobert Olsson 
23419baf839SRobert Olsson 	if (!diff)
23519baf839SRobert Olsson 		return 0;
23619baf839SRobert Olsson 	while ((diff << i) >> (KEYLENGTH-1) == 0)
23719baf839SRobert Olsson 		i++;
23819baf839SRobert Olsson 	return i;
23919baf839SRobert Olsson }
24019baf839SRobert Olsson 
24119baf839SRobert Olsson /*
24219baf839SRobert Olsson   To understand this stuff, an understanding of keys and all their bits is
24319baf839SRobert Olsson   necessary. Every node in the trie has a key associated with it, but not
24419baf839SRobert Olsson   all of the bits in that key are significant.
24519baf839SRobert Olsson 
24619baf839SRobert Olsson   Consider a node 'n' and its parent 'tp'.
24719baf839SRobert Olsson 
24819baf839SRobert Olsson   If n is a leaf, every bit in its key is significant. Its presence is
249772cb712SRobert Olsson   necessitated by path compression, since during a tree traversal (when
25019baf839SRobert Olsson   searching for a leaf - unless we are doing an insertion) we will completely
25119baf839SRobert Olsson   ignore all skipped bits we encounter. Thus we need to verify, at the end of
25219baf839SRobert Olsson   a potentially successful search, that we have indeed been walking the
25319baf839SRobert Olsson   correct key path.
25419baf839SRobert Olsson 
25519baf839SRobert Olsson   Note that we can never "miss" the correct key in the tree if present by
25619baf839SRobert Olsson   following the wrong path. Path compression ensures that segments of the key
25719baf839SRobert Olsson   that are the same for all keys with a given prefix are skipped, but the
25819baf839SRobert Olsson   skipped part *is* identical for each node in the subtrie below the skipped
25919baf839SRobert Olsson   bit! trie_insert() in this implementation takes care of that - note the
26019baf839SRobert Olsson   call to tkey_sub_equals() in trie_insert().
26119baf839SRobert Olsson 
26219baf839SRobert Olsson   if n is an internal node - a 'tnode' here, the various parts of its key
26319baf839SRobert Olsson   have many different meanings.
26419baf839SRobert Olsson 
26519baf839SRobert Olsson   Example:
26619baf839SRobert Olsson   _________________________________________________________________
26719baf839SRobert Olsson   | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
26819baf839SRobert Olsson   -----------------------------------------------------------------
26919baf839SRobert Olsson     0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
27019baf839SRobert Olsson 
27119baf839SRobert Olsson   _________________________________________________________________
27219baf839SRobert Olsson   | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
27319baf839SRobert Olsson   -----------------------------------------------------------------
27419baf839SRobert Olsson    16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
27519baf839SRobert Olsson 
27619baf839SRobert Olsson   tp->pos = 7
27719baf839SRobert Olsson   tp->bits = 3
27819baf839SRobert Olsson   n->pos = 15
27919baf839SRobert Olsson   n->bits = 4
28019baf839SRobert Olsson 
28119baf839SRobert Olsson   First, let's just ignore the bits that come before the parent tp, that is
28219baf839SRobert Olsson   the bits from 0 to (tp->pos-1). They are *known* but at this point we do
28319baf839SRobert Olsson   not use them for anything.
28419baf839SRobert Olsson 
28519baf839SRobert Olsson   The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
28619baf839SRobert Olsson   index into the parent's child array. That is, they will be used to find
28719baf839SRobert Olsson   'n' among tp's children.
28819baf839SRobert Olsson 
28919baf839SRobert Olsson   The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
29019baf839SRobert Olsson   for the node n.
29119baf839SRobert Olsson 
29219baf839SRobert Olsson   All the bits we have seen so far are significant to the node n. The rest
29319baf839SRobert Olsson   of the bits are really not needed or indeed known in n->key.
29419baf839SRobert Olsson 
29519baf839SRobert Olsson   The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
29619baf839SRobert Olsson   n's child array, and will of course be different for each child.
29719baf839SRobert Olsson 
298c877efb2SStephen Hemminger 
29919baf839SRobert Olsson   The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
30019baf839SRobert Olsson   at this point.
30119baf839SRobert Olsson 
30219baf839SRobert Olsson */
30319baf839SRobert Olsson 
3040c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn)
30519baf839SRobert Olsson {
3060c7770c7SStephen Hemminger 	WARN_ON(tn && tn->pos+tn->bits > 32);
30719baf839SRobert Olsson }
30819baf839SRobert Olsson 
309f5026fabSDenis V. Lunev static const int halve_threshold = 25;
310f5026fabSDenis V. Lunev static const int inflate_threshold = 50;
311f5026fabSDenis V. Lunev static const int halve_threshold_root = 8;
312f5026fabSDenis V. Lunev static const int inflate_threshold_root = 15;
31319baf839SRobert Olsson 
3142373ce1cSRobert Olsson 
3152373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head)
3162373ce1cSRobert Olsson {
3172373ce1cSRobert Olsson 	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
3182373ce1cSRobert Olsson 	kmem_cache_free(fn_alias_kmem, fa);
3192373ce1cSRobert Olsson }
3202373ce1cSRobert Olsson 
3212373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa)
3222373ce1cSRobert Olsson {
3232373ce1cSRobert Olsson 	call_rcu(&fa->rcu, __alias_free_mem);
3242373ce1cSRobert Olsson }
3252373ce1cSRobert Olsson 
3262373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head)
3272373ce1cSRobert Olsson {
3282373ce1cSRobert Olsson 	kfree(container_of(head, struct leaf, rcu));
3292373ce1cSRobert Olsson }
3302373ce1cSRobert Olsson 
3312373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head)
3322373ce1cSRobert Olsson {
3332373ce1cSRobert Olsson 	kfree(container_of(head, struct leaf_info, rcu));
3342373ce1cSRobert Olsson }
3352373ce1cSRobert Olsson 
3362373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf)
3372373ce1cSRobert Olsson {
3382373ce1cSRobert Olsson 	call_rcu(&leaf->rcu, __leaf_info_free_rcu);
3392373ce1cSRobert Olsson }
3402373ce1cSRobert Olsson 
3418d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size)
3422373ce1cSRobert Olsson {
3432373ce1cSRobert Olsson 	struct page *pages;
3442373ce1cSRobert Olsson 
3452373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3468d965444SEric Dumazet 		return kzalloc(size, GFP_KERNEL);
3472373ce1cSRobert Olsson 
3482373ce1cSRobert Olsson 	pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
3492373ce1cSRobert Olsson 	if (!pages)
3502373ce1cSRobert Olsson 		return NULL;
3512373ce1cSRobert Olsson 
3522373ce1cSRobert Olsson 	return page_address(pages);
3532373ce1cSRobert Olsson }
3542373ce1cSRobert Olsson 
3552373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head)
3562373ce1cSRobert Olsson {
3572373ce1cSRobert Olsson 	struct tnode *tn = container_of(head, struct tnode, rcu);
3588d965444SEric Dumazet 	size_t size = sizeof(struct tnode) +
3598d965444SEric Dumazet 		      (sizeof(struct node *) << tn->bits);
3602373ce1cSRobert Olsson 
3612373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3622373ce1cSRobert Olsson 		kfree(tn);
3632373ce1cSRobert Olsson 	else
3642373ce1cSRobert Olsson 		free_pages((unsigned long)tn, get_order(size));
3652373ce1cSRobert Olsson }
3662373ce1cSRobert Olsson 
3672373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn)
3682373ce1cSRobert Olsson {
369550e29bcSRobert Olsson 	if (IS_LEAF(tn)) {
370550e29bcSRobert Olsson 		struct leaf *l = (struct leaf *) tn;
371550e29bcSRobert Olsson 		call_rcu_bh(&l->rcu, __leaf_free_rcu);
372132adf54SStephen Hemminger 	} else
3732373ce1cSRobert Olsson 		call_rcu(&tn->rcu, __tnode_free_rcu);
3742373ce1cSRobert Olsson }
3752373ce1cSRobert Olsson 
37619baf839SRobert Olsson static struct leaf *leaf_new(void)
37719baf839SRobert Olsson {
37819baf839SRobert Olsson 	struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
37919baf839SRobert Olsson 	if (l) {
3802373ce1cSRobert Olsson 		l->parent = T_LEAF;
38119baf839SRobert Olsson 		INIT_HLIST_HEAD(&l->list);
38219baf839SRobert Olsson 	}
38319baf839SRobert Olsson 	return l;
38419baf839SRobert Olsson }
38519baf839SRobert Olsson 
38619baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen)
38719baf839SRobert Olsson {
38819baf839SRobert Olsson 	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
3892373ce1cSRobert Olsson 	if (li) {
39019baf839SRobert Olsson 		li->plen = plen;
39119baf839SRobert Olsson 		INIT_LIST_HEAD(&li->falh);
3922373ce1cSRobert Olsson 	}
39319baf839SRobert Olsson 	return li;
39419baf839SRobert Olsson }
39519baf839SRobert Olsson 
39619baf839SRobert Olsson static struct tnode* tnode_new(t_key key, int pos, int bits)
39719baf839SRobert Olsson {
3988d965444SEric Dumazet 	size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
399f0e36f8cSPatrick McHardy 	struct tnode *tn = tnode_alloc(sz);
40019baf839SRobert Olsson 
40119baf839SRobert Olsson 	if (tn) {
4022373ce1cSRobert Olsson 		tn->parent = T_TNODE;
40319baf839SRobert Olsson 		tn->pos = pos;
40419baf839SRobert Olsson 		tn->bits = bits;
40519baf839SRobert Olsson 		tn->key = key;
40619baf839SRobert Olsson 		tn->full_children = 0;
40719baf839SRobert Olsson 		tn->empty_children = 1<<bits;
40819baf839SRobert Olsson 	}
409c877efb2SStephen Hemminger 
4108d965444SEric Dumazet 	pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
4118d965444SEric Dumazet 		 (unsigned long) (sizeof(struct node) << bits));
41219baf839SRobert Olsson 	return tn;
41319baf839SRobert Olsson }
41419baf839SRobert Olsson 
41519baf839SRobert Olsson /*
41619baf839SRobert Olsson  * Check whether a tnode 'n' is "full", i.e. it is an internal node
41719baf839SRobert Olsson  * and no bits are skipped. See discussion in dyntree paper p. 6
41819baf839SRobert Olsson  */
41919baf839SRobert Olsson 
420bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n)
42119baf839SRobert Olsson {
42219baf839SRobert Olsson 	if (n == NULL || IS_LEAF(n))
42319baf839SRobert Olsson 		return 0;
42419baf839SRobert Olsson 
42519baf839SRobert Olsson 	return ((struct tnode *) n)->pos == tn->pos + tn->bits;
42619baf839SRobert Olsson }
42719baf839SRobert Olsson 
42819baf839SRobert Olsson static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
42919baf839SRobert Olsson {
43019baf839SRobert Olsson 	tnode_put_child_reorg(tn, i, n, -1);
43119baf839SRobert Olsson }
43219baf839SRobert Olsson 
43319baf839SRobert Olsson  /*
43419baf839SRobert Olsson   * Add a child at position i overwriting the old value.
43519baf839SRobert Olsson   * Update the value of full_children and empty_children.
43619baf839SRobert Olsson   */
43719baf839SRobert Olsson 
43819baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
43919baf839SRobert Olsson {
4402373ce1cSRobert Olsson 	struct node *chi = tn->child[i];
44119baf839SRobert Olsson 	int isfull;
44219baf839SRobert Olsson 
4430c7770c7SStephen Hemminger 	BUG_ON(i >= 1<<tn->bits);
4440c7770c7SStephen Hemminger 
44519baf839SRobert Olsson 
44619baf839SRobert Olsson 	/* update emptyChildren */
44719baf839SRobert Olsson 	if (n == NULL && chi != NULL)
44819baf839SRobert Olsson 		tn->empty_children++;
44919baf839SRobert Olsson 	else if (n != NULL && chi == NULL)
45019baf839SRobert Olsson 		tn->empty_children--;
45119baf839SRobert Olsson 
45219baf839SRobert Olsson 	/* update fullChildren */
45319baf839SRobert Olsson 	if (wasfull == -1)
45419baf839SRobert Olsson 		wasfull = tnode_full(tn, chi);
45519baf839SRobert Olsson 
45619baf839SRobert Olsson 	isfull = tnode_full(tn, n);
45719baf839SRobert Olsson 	if (wasfull && !isfull)
45819baf839SRobert Olsson 		tn->full_children--;
45919baf839SRobert Olsson 	else if (!wasfull && isfull)
46019baf839SRobert Olsson 		tn->full_children++;
46191b9a277SOlof Johansson 
46219baf839SRobert Olsson 	if (n)
46306801916SStephen Hemminger 		node_set_parent(n, tn);
46419baf839SRobert Olsson 
4652373ce1cSRobert Olsson 	rcu_assign_pointer(tn->child[i], n);
46619baf839SRobert Olsson }
46719baf839SRobert Olsson 
46819baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn)
46919baf839SRobert Olsson {
47019baf839SRobert Olsson 	int i;
4712f36895aSRobert Olsson 	int err = 0;
4722f80b3c8SRobert Olsson 	struct tnode *old_tn;
473e6308be8SRobert Olsson 	int inflate_threshold_use;
474e6308be8SRobert Olsson 	int halve_threshold_use;
47505eee48cSRobert Olsson 	int max_resize;
47619baf839SRobert Olsson 
47719baf839SRobert Olsson 	if (!tn)
47819baf839SRobert Olsson 		return NULL;
47919baf839SRobert Olsson 
4800c7770c7SStephen Hemminger 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
48119baf839SRobert Olsson 		 tn, inflate_threshold, halve_threshold);
48219baf839SRobert Olsson 
48319baf839SRobert Olsson 	/* No children */
48419baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn)) {
48519baf839SRobert Olsson 		tnode_free(tn);
48619baf839SRobert Olsson 		return NULL;
48719baf839SRobert Olsson 	}
48819baf839SRobert Olsson 	/* One child */
48919baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn) - 1)
49019baf839SRobert Olsson 		for (i = 0; i < tnode_child_length(tn); i++) {
49191b9a277SOlof Johansson 			struct node *n;
49219baf839SRobert Olsson 
49391b9a277SOlof Johansson 			n = tn->child[i];
4942373ce1cSRobert Olsson 			if (!n)
49591b9a277SOlof Johansson 				continue;
49619baf839SRobert Olsson 
49719baf839SRobert Olsson 			/* compress one level */
49806801916SStephen Hemminger 			node_set_parent(n, NULL);
49919baf839SRobert Olsson 			tnode_free(tn);
50019baf839SRobert Olsson 			return n;
50119baf839SRobert Olsson 		}
50219baf839SRobert Olsson 	/*
50319baf839SRobert Olsson 	 * Double as long as the resulting node has a number of
50419baf839SRobert Olsson 	 * nonempty nodes that are above the threshold.
50519baf839SRobert Olsson 	 */
50619baf839SRobert Olsson 
50719baf839SRobert Olsson 	/*
50819baf839SRobert Olsson 	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
50919baf839SRobert Olsson 	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
51019baf839SRobert Olsson 	 * Telecommunications, page 6:
51119baf839SRobert Olsson 	 * "A node is doubled if the ratio of non-empty children to all
51219baf839SRobert Olsson 	 * children in the *doubled* node is at least 'high'."
51319baf839SRobert Olsson 	 *
51419baf839SRobert Olsson 	 * 'high' in this instance is the variable 'inflate_threshold'. It
51519baf839SRobert Olsson 	 * is expressed as a percentage, so we multiply it with
51619baf839SRobert Olsson 	 * tnode_child_length() and instead of multiplying by 2 (since the
51719baf839SRobert Olsson 	 * child array will be doubled by inflate()) and multiplying
51819baf839SRobert Olsson 	 * the left-hand side by 100 (to handle the percentage thing) we
51919baf839SRobert Olsson 	 * multiply the left-hand side by 50.
52019baf839SRobert Olsson 	 *
52119baf839SRobert Olsson 	 * The left-hand side may look a bit weird: tnode_child_length(tn)
52219baf839SRobert Olsson 	 * - tn->empty_children is of course the number of non-null children
52319baf839SRobert Olsson 	 * in the current node. tn->full_children is the number of "full"
52419baf839SRobert Olsson 	 * children, that is non-null tnodes with a skip value of 0.
52519baf839SRobert Olsson 	 * All of those will be doubled in the resulting inflated tnode, so
52619baf839SRobert Olsson 	 * we just count them one extra time here.
52719baf839SRobert Olsson 	 *
52819baf839SRobert Olsson 	 * A clearer way to write this would be:
52919baf839SRobert Olsson 	 *
53019baf839SRobert Olsson 	 * to_be_doubled = tn->full_children;
53119baf839SRobert Olsson 	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
53219baf839SRobert Olsson 	 *     tn->full_children;
53319baf839SRobert Olsson 	 *
53419baf839SRobert Olsson 	 * new_child_length = tnode_child_length(tn) * 2;
53519baf839SRobert Olsson 	 *
53619baf839SRobert Olsson 	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
53719baf839SRobert Olsson 	 *      new_child_length;
53819baf839SRobert Olsson 	 * if (new_fill_factor >= inflate_threshold)
53919baf839SRobert Olsson 	 *
54019baf839SRobert Olsson 	 * ...and so on, tho it would mess up the while () loop.
54119baf839SRobert Olsson 	 *
54219baf839SRobert Olsson 	 * anyway,
54319baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
54419baf839SRobert Olsson 	 *      inflate_threshold
54519baf839SRobert Olsson 	 *
54619baf839SRobert Olsson 	 * avoid a division:
54719baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
54819baf839SRobert Olsson 	 *      inflate_threshold * new_child_length
54919baf839SRobert Olsson 	 *
55019baf839SRobert Olsson 	 * expand not_to_be_doubled and to_be_doubled, and shorten:
55119baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
55219baf839SRobert Olsson 	 *    tn->full_children) >= inflate_threshold * new_child_length
55319baf839SRobert Olsson 	 *
55419baf839SRobert Olsson 	 * expand new_child_length:
55519baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
55619baf839SRobert Olsson 	 *    tn->full_children) >=
55719baf839SRobert Olsson 	 *      inflate_threshold * tnode_child_length(tn) * 2
55819baf839SRobert Olsson 	 *
55919baf839SRobert Olsson 	 * shorten again:
56019baf839SRobert Olsson 	 * 50 * (tn->full_children + tnode_child_length(tn) -
56119baf839SRobert Olsson 	 *    tn->empty_children) >= inflate_threshold *
56219baf839SRobert Olsson 	 *    tnode_child_length(tn)
56319baf839SRobert Olsson 	 *
56419baf839SRobert Olsson 	 */
56519baf839SRobert Olsson 
56619baf839SRobert Olsson 	check_tnode(tn);
56719baf839SRobert Olsson 
568e6308be8SRobert Olsson 	/* Keep root node larger  */
569e6308be8SRobert Olsson 
570e6308be8SRobert Olsson 	if (!tn->parent)
571e6308be8SRobert Olsson 		inflate_threshold_use = inflate_threshold_root;
572e6308be8SRobert Olsson 	else
573e6308be8SRobert Olsson 		inflate_threshold_use = inflate_threshold;
574e6308be8SRobert Olsson 
5752f36895aSRobert Olsson 	err = 0;
57605eee48cSRobert Olsson 	max_resize = 10;
57705eee48cSRobert Olsson 	while ((tn->full_children > 0 &&  max_resize-- &&
57819baf839SRobert Olsson 	       50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
579e6308be8SRobert Olsson 				inflate_threshold_use * tnode_child_length(tn))) {
58019baf839SRobert Olsson 
5812f80b3c8SRobert Olsson 		old_tn = tn;
5822f80b3c8SRobert Olsson 		tn = inflate(t, tn);
5832f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
5842f80b3c8SRobert Olsson 			tn = old_tn;
5852f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
5862f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
5872f36895aSRobert Olsson #endif
5882f36895aSRobert Olsson 			break;
5892f36895aSRobert Olsson 		}
59019baf839SRobert Olsson 	}
59119baf839SRobert Olsson 
59205eee48cSRobert Olsson 	if (max_resize < 0) {
59305eee48cSRobert Olsson 		if (!tn->parent)
59405eee48cSRobert Olsson 			printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
59505eee48cSRobert Olsson 			       inflate_threshold_root, tn->bits);
59605eee48cSRobert Olsson 		else
59705eee48cSRobert Olsson 			printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
59805eee48cSRobert Olsson 			       inflate_threshold, tn->bits);
59905eee48cSRobert Olsson 	}
60005eee48cSRobert Olsson 
60119baf839SRobert Olsson 	check_tnode(tn);
60219baf839SRobert Olsson 
60319baf839SRobert Olsson 	/*
60419baf839SRobert Olsson 	 * Halve as long as the number of empty children in this
60519baf839SRobert Olsson 	 * node is above threshold.
60619baf839SRobert Olsson 	 */
6072f36895aSRobert Olsson 
608e6308be8SRobert Olsson 
609e6308be8SRobert Olsson 	/* Keep root node larger  */
610e6308be8SRobert Olsson 
611e6308be8SRobert Olsson 	if (!tn->parent)
612e6308be8SRobert Olsson 		halve_threshold_use = halve_threshold_root;
613e6308be8SRobert Olsson 	else
614e6308be8SRobert Olsson 		halve_threshold_use = halve_threshold;
615e6308be8SRobert Olsson 
6162f36895aSRobert Olsson 	err = 0;
61705eee48cSRobert Olsson 	max_resize = 10;
61805eee48cSRobert Olsson 	while (tn->bits > 1 &&  max_resize-- &&
61919baf839SRobert Olsson 	       100 * (tnode_child_length(tn) - tn->empty_children) <
620e6308be8SRobert Olsson 	       halve_threshold_use * tnode_child_length(tn)) {
62119baf839SRobert Olsson 
6222f80b3c8SRobert Olsson 		old_tn = tn;
6232f80b3c8SRobert Olsson 		tn = halve(t, tn);
6242f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6252f80b3c8SRobert Olsson 			tn = old_tn;
6262f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6272f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6282f36895aSRobert Olsson #endif
6292f36895aSRobert Olsson 			break;
6302f36895aSRobert Olsson 		}
6312f36895aSRobert Olsson 	}
6322f36895aSRobert Olsson 
63305eee48cSRobert Olsson 	if (max_resize < 0) {
63405eee48cSRobert Olsson 		if (!tn->parent)
63505eee48cSRobert Olsson 			printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
63605eee48cSRobert Olsson 			       halve_threshold_root, tn->bits);
63705eee48cSRobert Olsson 		else
63805eee48cSRobert Olsson 			printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
63905eee48cSRobert Olsson 			       halve_threshold, tn->bits);
64005eee48cSRobert Olsson 	}
64119baf839SRobert Olsson 
64219baf839SRobert Olsson 	/* Only one child remains */
64319baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn) - 1)
64419baf839SRobert Olsson 		for (i = 0; i < tnode_child_length(tn); i++) {
64591b9a277SOlof Johansson 			struct node *n;
64619baf839SRobert Olsson 
64791b9a277SOlof Johansson 			n = tn->child[i];
6482373ce1cSRobert Olsson 			if (!n)
64991b9a277SOlof Johansson 				continue;
65091b9a277SOlof Johansson 
65191b9a277SOlof Johansson 			/* compress one level */
65291b9a277SOlof Johansson 
65306801916SStephen Hemminger 			node_set_parent(n, NULL);
65419baf839SRobert Olsson 			tnode_free(tn);
65519baf839SRobert Olsson 			return n;
65619baf839SRobert Olsson 		}
65719baf839SRobert Olsson 
65819baf839SRobert Olsson 	return (struct node *) tn;
65919baf839SRobert Olsson }
66019baf839SRobert Olsson 
6612f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn)
66219baf839SRobert Olsson {
66319baf839SRobert Olsson 	struct tnode *oldtnode = tn;
66419baf839SRobert Olsson 	int olen = tnode_child_length(tn);
66519baf839SRobert Olsson 	int i;
66619baf839SRobert Olsson 
6670c7770c7SStephen Hemminger 	pr_debug("In inflate\n");
66819baf839SRobert Olsson 
66919baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
67019baf839SRobert Olsson 
6712f80b3c8SRobert Olsson 	if (!tn)
6722f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
6732f36895aSRobert Olsson 
6742f36895aSRobert Olsson 	/*
6752f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
6762f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
6772f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and  inflate
6782f36895aSRobert Olsson 	 * of tnode is ignored.
6792f36895aSRobert Olsson 	 */
6802f36895aSRobert Olsson 
6812f36895aSRobert Olsson 	for (i = 0; i < olen; i++) {
6822f36895aSRobert Olsson 		struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
6832f36895aSRobert Olsson 
6842f36895aSRobert Olsson 		if (inode &&
6852f36895aSRobert Olsson 		    IS_TNODE(inode) &&
6862f36895aSRobert Olsson 		    inode->pos == oldtnode->pos + oldtnode->bits &&
6872f36895aSRobert Olsson 		    inode->bits > 1) {
6882f36895aSRobert Olsson 			struct tnode *left, *right;
689ab66b4a7SStephen Hemminger 			t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
6902f36895aSRobert Olsson 
6912f36895aSRobert Olsson 			left = tnode_new(inode->key&(~m), inode->pos + 1,
6922f36895aSRobert Olsson 					 inode->bits - 1);
6932f80b3c8SRobert Olsson 			if (!left)
6942f80b3c8SRobert Olsson 				goto nomem;
6952f36895aSRobert Olsson 
6962f36895aSRobert Olsson 			right = tnode_new(inode->key|m, inode->pos + 1,
6972f36895aSRobert Olsson 					  inode->bits - 1);
6982f36895aSRobert Olsson 
6992f36895aSRobert Olsson 			if (!right) {
7002f80b3c8SRobert Olsson 				tnode_free(left);
7012f80b3c8SRobert Olsson 				goto nomem;
7022f36895aSRobert Olsson 			}
7032f36895aSRobert Olsson 
7042f36895aSRobert Olsson 			put_child(t, tn, 2*i, (struct node *) left);
7052f36895aSRobert Olsson 			put_child(t, tn, 2*i+1, (struct node *) right);
7062f36895aSRobert Olsson 		}
7072f36895aSRobert Olsson 	}
7082f36895aSRobert Olsson 
70919baf839SRobert Olsson 	for (i = 0; i < olen; i++) {
710c95aaf9aSStephen Hemminger 		struct tnode *inode;
71119baf839SRobert Olsson 		struct node *node = tnode_get_child(oldtnode, i);
71291b9a277SOlof Johansson 		struct tnode *left, *right;
71391b9a277SOlof Johansson 		int size, j;
71419baf839SRobert Olsson 
71519baf839SRobert Olsson 		/* An empty child */
71619baf839SRobert Olsson 		if (node == NULL)
71719baf839SRobert Olsson 			continue;
71819baf839SRobert Olsson 
71919baf839SRobert Olsson 		/* A leaf or an internal node with skipped bits */
72019baf839SRobert Olsson 
72119baf839SRobert Olsson 		if (IS_LEAF(node) || ((struct tnode *) node)->pos >
72219baf839SRobert Olsson 		   tn->pos + tn->bits - 1) {
7232f36895aSRobert Olsson 			if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
72419baf839SRobert Olsson 					     1) == 0)
72519baf839SRobert Olsson 				put_child(t, tn, 2*i, node);
72619baf839SRobert Olsson 			else
72719baf839SRobert Olsson 				put_child(t, tn, 2*i+1, node);
72819baf839SRobert Olsson 			continue;
72919baf839SRobert Olsson 		}
73019baf839SRobert Olsson 
73119baf839SRobert Olsson 		/* An internal node with two children */
73219baf839SRobert Olsson 		inode = (struct tnode *) node;
73319baf839SRobert Olsson 
73419baf839SRobert Olsson 		if (inode->bits == 1) {
73519baf839SRobert Olsson 			put_child(t, tn, 2*i, inode->child[0]);
73619baf839SRobert Olsson 			put_child(t, tn, 2*i+1, inode->child[1]);
73719baf839SRobert Olsson 
73819baf839SRobert Olsson 			tnode_free(inode);
73991b9a277SOlof Johansson 			continue;
74019baf839SRobert Olsson 		}
74119baf839SRobert Olsson 
74219baf839SRobert Olsson 		/* An internal node with more than two children */
74319baf839SRobert Olsson 
74419baf839SRobert Olsson 		/* We will replace this node 'inode' with two new
74519baf839SRobert Olsson 		 * ones, 'left' and 'right', each with half of the
74619baf839SRobert Olsson 		 * original children. The two new nodes will have
74719baf839SRobert Olsson 		 * a position one bit further down the key and this
74819baf839SRobert Olsson 		 * means that the "significant" part of their keys
74919baf839SRobert Olsson 		 * (see the discussion near the top of this file)
75019baf839SRobert Olsson 		 * will differ by one bit, which will be "0" in
75119baf839SRobert Olsson 		 * left's key and "1" in right's key. Since we are
75219baf839SRobert Olsson 		 * moving the key position by one step, the bit that
75319baf839SRobert Olsson 		 * we are moving away from - the bit at position
75419baf839SRobert Olsson 		 * (inode->pos) - is the one that will differ between
75519baf839SRobert Olsson 		 * left and right. So... we synthesize that bit in the
75619baf839SRobert Olsson 		 * two  new keys.
75719baf839SRobert Olsson 		 * The mask 'm' below will be a single "one" bit at
75819baf839SRobert Olsson 		 * the position (inode->pos)
75919baf839SRobert Olsson 		 */
76019baf839SRobert Olsson 
76119baf839SRobert Olsson 		/* Use the old key, but set the new significant
76219baf839SRobert Olsson 		 *   bit to zero.
76319baf839SRobert Olsson 		 */
7642f36895aSRobert Olsson 
7652f36895aSRobert Olsson 		left = (struct tnode *) tnode_get_child(tn, 2*i);
7662f36895aSRobert Olsson 		put_child(t, tn, 2*i, NULL);
76719baf839SRobert Olsson 
76891b9a277SOlof Johansson 		BUG_ON(!left);
76919baf839SRobert Olsson 
7702f36895aSRobert Olsson 		right = (struct tnode *) tnode_get_child(tn, 2*i+1);
7712f36895aSRobert Olsson 		put_child(t, tn, 2*i+1, NULL);
77219baf839SRobert Olsson 
77391b9a277SOlof Johansson 		BUG_ON(!right);
77419baf839SRobert Olsson 
77519baf839SRobert Olsson 		size = tnode_child_length(left);
77619baf839SRobert Olsson 		for (j = 0; j < size; j++) {
77719baf839SRobert Olsson 			put_child(t, left, j, inode->child[j]);
77819baf839SRobert Olsson 			put_child(t, right, j, inode->child[j + size]);
77919baf839SRobert Olsson 		}
78019baf839SRobert Olsson 		put_child(t, tn, 2*i, resize(t, left));
78119baf839SRobert Olsson 		put_child(t, tn, 2*i+1, resize(t, right));
78219baf839SRobert Olsson 
78319baf839SRobert Olsson 		tnode_free(inode);
78419baf839SRobert Olsson 	}
78519baf839SRobert Olsson 	tnode_free(oldtnode);
78619baf839SRobert Olsson 	return tn;
7872f80b3c8SRobert Olsson nomem:
7882f80b3c8SRobert Olsson 	{
7892f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
7902f80b3c8SRobert Olsson 		int j;
7912f80b3c8SRobert Olsson 
7922f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
7932f80b3c8SRobert Olsson 			if (tn->child[j])
7942f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
7952f80b3c8SRobert Olsson 
7962f80b3c8SRobert Olsson 		tnode_free(tn);
7972f80b3c8SRobert Olsson 
7982f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
7992f80b3c8SRobert Olsson 	}
80019baf839SRobert Olsson }
80119baf839SRobert Olsson 
8022f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn)
80319baf839SRobert Olsson {
80419baf839SRobert Olsson 	struct tnode *oldtnode = tn;
80519baf839SRobert Olsson 	struct node *left, *right;
80619baf839SRobert Olsson 	int i;
80719baf839SRobert Olsson 	int olen = tnode_child_length(tn);
80819baf839SRobert Olsson 
8090c7770c7SStephen Hemminger 	pr_debug("In halve\n");
81019baf839SRobert Olsson 
81119baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
81219baf839SRobert Olsson 
8132f80b3c8SRobert Olsson 	if (!tn)
8142f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8152f36895aSRobert Olsson 
8162f36895aSRobert Olsson 	/*
8172f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
8182f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
8192f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and halve
8202f36895aSRobert Olsson 	 * of tnode is ignored.
8212f36895aSRobert Olsson 	 */
8222f36895aSRobert Olsson 
8232f36895aSRobert Olsson 	for (i = 0; i < olen; i += 2) {
8242f36895aSRobert Olsson 		left = tnode_get_child(oldtnode, i);
8252f36895aSRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
8262f36895aSRobert Olsson 
8272f36895aSRobert Olsson 		/* Two nonempty children */
8282f36895aSRobert Olsson 		if (left && right) {
8292f80b3c8SRobert Olsson 			struct tnode *newn;
8302f36895aSRobert Olsson 
8312f80b3c8SRobert Olsson 			newn = tnode_new(left->key, tn->pos + tn->bits, 1);
8322f80b3c8SRobert Olsson 
8332f80b3c8SRobert Olsson 			if (!newn)
8342f80b3c8SRobert Olsson 				goto nomem;
8352f80b3c8SRobert Olsson 
8362f80b3c8SRobert Olsson 			put_child(t, tn, i/2, (struct node *)newn);
8372f36895aSRobert Olsson 		}
8382f36895aSRobert Olsson 
8392f36895aSRobert Olsson 	}
84019baf839SRobert Olsson 
84119baf839SRobert Olsson 	for (i = 0; i < olen; i += 2) {
84291b9a277SOlof Johansson 		struct tnode *newBinNode;
84391b9a277SOlof Johansson 
84419baf839SRobert Olsson 		left = tnode_get_child(oldtnode, i);
84519baf839SRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
84619baf839SRobert Olsson 
84719baf839SRobert Olsson 		/* At least one of the children is empty */
84819baf839SRobert Olsson 		if (left == NULL) {
84919baf839SRobert Olsson 			if (right == NULL)    /* Both are empty */
85019baf839SRobert Olsson 				continue;
85119baf839SRobert Olsson 			put_child(t, tn, i/2, right);
85291b9a277SOlof Johansson 			continue;
85391b9a277SOlof Johansson 		}
85491b9a277SOlof Johansson 
85591b9a277SOlof Johansson 		if (right == NULL) {
85619baf839SRobert Olsson 			put_child(t, tn, i/2, left);
85791b9a277SOlof Johansson 			continue;
85891b9a277SOlof Johansson 		}
85919baf839SRobert Olsson 
86019baf839SRobert Olsson 		/* Two nonempty children */
86191b9a277SOlof Johansson 		newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
8622f36895aSRobert Olsson 		put_child(t, tn, i/2, NULL);
86319baf839SRobert Olsson 		put_child(t, newBinNode, 0, left);
86419baf839SRobert Olsson 		put_child(t, newBinNode, 1, right);
86519baf839SRobert Olsson 		put_child(t, tn, i/2, resize(t, newBinNode));
86619baf839SRobert Olsson 	}
86719baf839SRobert Olsson 	tnode_free(oldtnode);
86819baf839SRobert Olsson 	return tn;
8692f80b3c8SRobert Olsson nomem:
8702f80b3c8SRobert Olsson 	{
8712f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
8722f80b3c8SRobert Olsson 		int j;
8732f80b3c8SRobert Olsson 
8742f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
8752f80b3c8SRobert Olsson 			if (tn->child[j])
8762f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
8772f80b3c8SRobert Olsson 
8782f80b3c8SRobert Olsson 		tnode_free(tn);
8792f80b3c8SRobert Olsson 
8802f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8812f80b3c8SRobert Olsson 	}
88219baf839SRobert Olsson }
88319baf839SRobert Olsson 
884772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines
8852373ce1cSRobert Olsson  via get_fa_head and dump */
8862373ce1cSRobert Olsson 
887772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
88819baf839SRobert Olsson {
889772cb712SRobert Olsson 	struct hlist_head *head = &l->list;
89019baf839SRobert Olsson 	struct hlist_node *node;
89119baf839SRobert Olsson 	struct leaf_info *li;
89219baf839SRobert Olsson 
8932373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, head, hlist)
89419baf839SRobert Olsson 		if (li->plen == plen)
89519baf839SRobert Olsson 			return li;
89691b9a277SOlof Johansson 
89719baf839SRobert Olsson 	return NULL;
89819baf839SRobert Olsson }
89919baf839SRobert Olsson 
90019baf839SRobert Olsson static inline struct list_head * get_fa_head(struct leaf *l, int plen)
90119baf839SRobert Olsson {
902772cb712SRobert Olsson 	struct leaf_info *li = find_leaf_info(l, plen);
90319baf839SRobert Olsson 
90491b9a277SOlof Johansson 	if (!li)
90591b9a277SOlof Johansson 		return NULL;
90619baf839SRobert Olsson 
90791b9a277SOlof Johansson 	return &li->falh;
90819baf839SRobert Olsson }
90919baf839SRobert Olsson 
91019baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
91119baf839SRobert Olsson {
91219baf839SRobert Olsson 	struct leaf_info *li = NULL, *last = NULL;
91391b9a277SOlof Johansson 	struct hlist_node *node;
91419baf839SRobert Olsson 
91591b9a277SOlof Johansson 	if (hlist_empty(head)) {
9162373ce1cSRobert Olsson 		hlist_add_head_rcu(&new->hlist, head);
91791b9a277SOlof Johansson 	} else {
91891b9a277SOlof Johansson 		hlist_for_each_entry(li, node, head, hlist) {
91919baf839SRobert Olsson 			if (new->plen > li->plen)
92019baf839SRobert Olsson 				break;
92119baf839SRobert Olsson 
92219baf839SRobert Olsson 			last = li;
92319baf839SRobert Olsson 		}
92419baf839SRobert Olsson 		if (last)
9252373ce1cSRobert Olsson 			hlist_add_after_rcu(&last->hlist, &new->hlist);
92619baf839SRobert Olsson 		else
9272373ce1cSRobert Olsson 			hlist_add_before_rcu(&new->hlist, &li->hlist);
92819baf839SRobert Olsson 	}
92919baf839SRobert Olsson }
93019baf839SRobert Olsson 
9312373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
9322373ce1cSRobert Olsson 
93319baf839SRobert Olsson static struct leaf *
93419baf839SRobert Olsson fib_find_node(struct trie *t, u32 key)
93519baf839SRobert Olsson {
93619baf839SRobert Olsson 	int pos;
93719baf839SRobert Olsson 	struct tnode *tn;
93819baf839SRobert Olsson 	struct node *n;
93919baf839SRobert Olsson 
94019baf839SRobert Olsson 	pos = 0;
9412373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
94219baf839SRobert Olsson 
94319baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
94419baf839SRobert Olsson 		tn = (struct tnode *) n;
94519baf839SRobert Olsson 
94619baf839SRobert Olsson 		check_tnode(tn);
94719baf839SRobert Olsson 
94819baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
94919baf839SRobert Olsson 			pos = tn->pos + tn->bits;
950b59cfbf7SEric Dumazet 			n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits));
95191b9a277SOlof Johansson 		} else
95219baf839SRobert Olsson 			break;
95319baf839SRobert Olsson 	}
95419baf839SRobert Olsson 	/* Case we have found a leaf. Compare prefixes */
95519baf839SRobert Olsson 
95691b9a277SOlof Johansson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
95791b9a277SOlof Johansson 		return (struct leaf *)n;
95891b9a277SOlof Johansson 
95919baf839SRobert Olsson 	return NULL;
96019baf839SRobert Olsson }
96119baf839SRobert Olsson 
96219baf839SRobert Olsson static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
96319baf839SRobert Olsson {
96419baf839SRobert Olsson 	int wasfull;
96506801916SStephen Hemminger 	t_key cindex, key = tn->key;
96606801916SStephen Hemminger 	struct tnode *tp;
96719baf839SRobert Olsson 
96806801916SStephen Hemminger 	while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
96919baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
97019baf839SRobert Olsson 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
97119baf839SRobert Olsson 		tn = (struct tnode *) resize (t, (struct tnode *)tn);
97219baf839SRobert Olsson 		tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
97319baf839SRobert Olsson 
97406801916SStephen Hemminger 		tp = node_parent((struct node *) tn);
97506801916SStephen Hemminger 		if (!tp)
97619baf839SRobert Olsson 			break;
97706801916SStephen Hemminger 		tn = tp;
97819baf839SRobert Olsson 	}
97906801916SStephen Hemminger 
98019baf839SRobert Olsson 	/* Handle last (top) tnode */
98119baf839SRobert Olsson 	if (IS_TNODE(tn))
98219baf839SRobert Olsson 		tn = (struct tnode*) resize(t, (struct tnode *)tn);
98319baf839SRobert Olsson 
98419baf839SRobert Olsson 	return (struct node*) tn;
98519baf839SRobert Olsson }
98619baf839SRobert Olsson 
9872373ce1cSRobert Olsson /* only used from updater-side */
9882373ce1cSRobert Olsson 
989fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
99019baf839SRobert Olsson {
99119baf839SRobert Olsson 	int pos, newpos;
99219baf839SRobert Olsson 	struct tnode *tp = NULL, *tn = NULL;
99319baf839SRobert Olsson 	struct node *n;
99419baf839SRobert Olsson 	struct leaf *l;
99519baf839SRobert Olsson 	int missbit;
99619baf839SRobert Olsson 	struct list_head *fa_head = NULL;
99719baf839SRobert Olsson 	struct leaf_info *li;
99819baf839SRobert Olsson 	t_key cindex;
99919baf839SRobert Olsson 
100019baf839SRobert Olsson 	pos = 0;
100119baf839SRobert Olsson 	n = t->trie;
100219baf839SRobert Olsson 
100319baf839SRobert Olsson 	/* If we point to NULL, stop. Either the tree is empty and we should
100419baf839SRobert Olsson 	 * just put a new leaf in if, or we have reached an empty child slot,
100519baf839SRobert Olsson 	 * and we should just put our new leaf in that.
100619baf839SRobert Olsson 	 * If we point to a T_TNODE, check if it matches our key. Note that
100719baf839SRobert Olsson 	 * a T_TNODE might be skipping any number of bits - its 'pos' need
100819baf839SRobert Olsson 	 * not be the parent's 'pos'+'bits'!
100919baf839SRobert Olsson 	 *
101019baf839SRobert Olsson 	 * If it does match the current key, get pos/bits from it, extract
101119baf839SRobert Olsson 	 * the index from our key, push the T_TNODE and walk the tree.
101219baf839SRobert Olsson 	 *
101319baf839SRobert Olsson 	 * If it doesn't, we have to replace it with a new T_TNODE.
101419baf839SRobert Olsson 	 *
101519baf839SRobert Olsson 	 * If we point to a T_LEAF, it might or might not have the same key
101619baf839SRobert Olsson 	 * as we do. If it does, just change the value, update the T_LEAF's
101719baf839SRobert Olsson 	 * value, and return it.
101819baf839SRobert Olsson 	 * If it doesn't, we need to replace it with a T_TNODE.
101919baf839SRobert Olsson 	 */
102019baf839SRobert Olsson 
102119baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
102219baf839SRobert Olsson 		tn = (struct tnode *) n;
102319baf839SRobert Olsson 
102419baf839SRobert Olsson 		check_tnode(tn);
102519baf839SRobert Olsson 
102619baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
102719baf839SRobert Olsson 			tp = tn;
102819baf839SRobert Olsson 			pos = tn->pos + tn->bits;
102919baf839SRobert Olsson 			n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
103019baf839SRobert Olsson 
103106801916SStephen Hemminger 			BUG_ON(n && node_parent(n) != tn);
103291b9a277SOlof Johansson 		} else
103319baf839SRobert Olsson 			break;
103419baf839SRobert Olsson 	}
103519baf839SRobert Olsson 
103619baf839SRobert Olsson 	/*
103719baf839SRobert Olsson 	 * n  ----> NULL, LEAF or TNODE
103819baf839SRobert Olsson 	 *
103919baf839SRobert Olsson 	 * tp is n's (parent) ----> NULL or TNODE
104019baf839SRobert Olsson 	 */
104119baf839SRobert Olsson 
104291b9a277SOlof Johansson 	BUG_ON(tp && IS_LEAF(tp));
104319baf839SRobert Olsson 
104419baf839SRobert Olsson 	/* Case 1: n is a leaf. Compare prefixes */
104519baf839SRobert Olsson 
104619baf839SRobert Olsson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1047c95aaf9aSStephen Hemminger 		l = (struct leaf *) n;
104819baf839SRobert Olsson 		li = leaf_info_new(plen);
104919baf839SRobert Olsson 
1050fea86ad8SStephen Hemminger 		if (!li)
1051fea86ad8SStephen Hemminger 			return NULL;
105219baf839SRobert Olsson 
105319baf839SRobert Olsson 		fa_head = &li->falh;
105419baf839SRobert Olsson 		insert_leaf_info(&l->list, li);
105519baf839SRobert Olsson 		goto done;
105619baf839SRobert Olsson 	}
105719baf839SRobert Olsson 	l = leaf_new();
105819baf839SRobert Olsson 
1059fea86ad8SStephen Hemminger 	if (!l)
1060fea86ad8SStephen Hemminger 		return NULL;
106119baf839SRobert Olsson 
106219baf839SRobert Olsson 	l->key = key;
106319baf839SRobert Olsson 	li = leaf_info_new(plen);
106419baf839SRobert Olsson 
1065f835e471SRobert Olsson 	if (!li) {
1066f835e471SRobert Olsson 		tnode_free((struct tnode *) l);
1067fea86ad8SStephen Hemminger 		return NULL;
1068f835e471SRobert Olsson 	}
106919baf839SRobert Olsson 
107019baf839SRobert Olsson 	fa_head = &li->falh;
107119baf839SRobert Olsson 	insert_leaf_info(&l->list, li);
107219baf839SRobert Olsson 
107319baf839SRobert Olsson 	if (t->trie && n == NULL) {
107491b9a277SOlof Johansson 		/* Case 2: n is NULL, and will just insert a new leaf */
107519baf839SRobert Olsson 
107606801916SStephen Hemminger 		node_set_parent((struct node *)l, tp);
107719baf839SRobert Olsson 
107819baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
107919baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
108091b9a277SOlof Johansson 	} else {
108119baf839SRobert Olsson 		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
108219baf839SRobert Olsson 		/*
108319baf839SRobert Olsson 		 *  Add a new tnode here
108419baf839SRobert Olsson 		 *  first tnode need some special handling
108519baf839SRobert Olsson 		 */
108619baf839SRobert Olsson 
108719baf839SRobert Olsson 		if (tp)
108819baf839SRobert Olsson 			pos = tp->pos+tp->bits;
108919baf839SRobert Olsson 		else
109019baf839SRobert Olsson 			pos = 0;
109191b9a277SOlof Johansson 
109219baf839SRobert Olsson 		if (n) {
109319baf839SRobert Olsson 			newpos = tkey_mismatch(key, pos, n->key);
109419baf839SRobert Olsson 			tn = tnode_new(n->key, newpos, 1);
109591b9a277SOlof Johansson 		} else {
109619baf839SRobert Olsson 			newpos = 0;
109719baf839SRobert Olsson 			tn = tnode_new(key, newpos, 1); /* First tnode */
109819baf839SRobert Olsson 		}
1099f835e471SRobert Olsson 
1100f835e471SRobert Olsson 		if (!tn) {
1101f835e471SRobert Olsson 			free_leaf_info(li);
1102f835e471SRobert Olsson 			tnode_free((struct tnode *) l);
1103fea86ad8SStephen Hemminger 			return NULL;
1104f835e471SRobert Olsson 		}
110519baf839SRobert Olsson 
110606801916SStephen Hemminger 		node_set_parent((struct node *)tn, tp);
110719baf839SRobert Olsson 
110819baf839SRobert Olsson 		missbit = tkey_extract_bits(key, newpos, 1);
110919baf839SRobert Olsson 		put_child(t, tn, missbit, (struct node *)l);
111019baf839SRobert Olsson 		put_child(t, tn, 1-missbit, n);
111119baf839SRobert Olsson 
111219baf839SRobert Olsson 		if (tp) {
111319baf839SRobert Olsson 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
111419baf839SRobert Olsson 			put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
111591b9a277SOlof Johansson 		} else {
11162373ce1cSRobert Olsson 			rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
111719baf839SRobert Olsson 			tp = tn;
111819baf839SRobert Olsson 		}
111919baf839SRobert Olsson 	}
112091b9a277SOlof Johansson 
112191b9a277SOlof Johansson 	if (tp && tp->pos + tp->bits > 32)
112278c6671aSStephen Hemminger 		printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
112319baf839SRobert Olsson 		       tp, tp->pos, tp->bits, key, plen);
112491b9a277SOlof Johansson 
112519baf839SRobert Olsson 	/* Rebalance the trie */
11262373ce1cSRobert Olsson 
11272373ce1cSRobert Olsson 	rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
1128f835e471SRobert Olsson done:
112919baf839SRobert Olsson 	return fa_head;
113019baf839SRobert Olsson }
113119baf839SRobert Olsson 
1132d562f1f8SRobert Olsson /*
1133d562f1f8SRobert Olsson  * Caller must hold RTNL.
1134d562f1f8SRobert Olsson  */
11354e902c57SThomas Graf static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
113619baf839SRobert Olsson {
113719baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
113819baf839SRobert Olsson 	struct fib_alias *fa, *new_fa;
113919baf839SRobert Olsson 	struct list_head *fa_head = NULL;
114019baf839SRobert Olsson 	struct fib_info *fi;
11414e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
11424e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
114319baf839SRobert Olsson 	u32 key, mask;
114419baf839SRobert Olsson 	int err;
114519baf839SRobert Olsson 	struct leaf *l;
114619baf839SRobert Olsson 
114719baf839SRobert Olsson 	if (plen > 32)
114819baf839SRobert Olsson 		return -EINVAL;
114919baf839SRobert Olsson 
11504e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
115119baf839SRobert Olsson 
11522dfe55b4SPatrick McHardy 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
115319baf839SRobert Olsson 
115419baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
115519baf839SRobert Olsson 
115619baf839SRobert Olsson 	if (key & ~mask)
115719baf839SRobert Olsson 		return -EINVAL;
115819baf839SRobert Olsson 
115919baf839SRobert Olsson 	key = key & mask;
116019baf839SRobert Olsson 
11614e902c57SThomas Graf 	fi = fib_create_info(cfg);
11624e902c57SThomas Graf 	if (IS_ERR(fi)) {
11634e902c57SThomas Graf 		err = PTR_ERR(fi);
116419baf839SRobert Olsson 		goto err;
11654e902c57SThomas Graf 	}
116619baf839SRobert Olsson 
116719baf839SRobert Olsson 	l = fib_find_node(t, key);
116819baf839SRobert Olsson 	fa = NULL;
116919baf839SRobert Olsson 
117019baf839SRobert Olsson 	if (l) {
117119baf839SRobert Olsson 		fa_head = get_fa_head(l, plen);
117219baf839SRobert Olsson 		fa = fib_find_alias(fa_head, tos, fi->fib_priority);
117319baf839SRobert Olsson 	}
117419baf839SRobert Olsson 
117519baf839SRobert Olsson 	/* Now fa, if non-NULL, points to the first fib alias
117619baf839SRobert Olsson 	 * with the same keys [prefix,tos,priority], if such key already
117719baf839SRobert Olsson 	 * exists or to the node before which we will insert new one.
117819baf839SRobert Olsson 	 *
117919baf839SRobert Olsson 	 * If fa is NULL, we will need to allocate a new one and
118019baf839SRobert Olsson 	 * insert to the head of f.
118119baf839SRobert Olsson 	 *
118219baf839SRobert Olsson 	 * If f is NULL, no fib node matched the destination key
118319baf839SRobert Olsson 	 * and we need to allocate a new one of those as well.
118419baf839SRobert Olsson 	 */
118519baf839SRobert Olsson 
118691b9a277SOlof Johansson 	if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
118719baf839SRobert Olsson 		struct fib_alias *fa_orig;
118819baf839SRobert Olsson 
118919baf839SRobert Olsson 		err = -EEXIST;
11904e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_EXCL)
119119baf839SRobert Olsson 			goto out;
119219baf839SRobert Olsson 
11934e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_REPLACE) {
119419baf839SRobert Olsson 			struct fib_info *fi_drop;
119519baf839SRobert Olsson 			u8 state;
119619baf839SRobert Olsson 
11976725033fSJoonwoo Park 			if (fi->fib_treeref > 1)
11986725033fSJoonwoo Park 				goto out;
11996725033fSJoonwoo Park 
12002373ce1cSRobert Olsson 			err = -ENOBUFS;
1201e94b1766SChristoph Lameter 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
12022373ce1cSRobert Olsson 			if (new_fa == NULL)
12032373ce1cSRobert Olsson 				goto out;
120419baf839SRobert Olsson 
120519baf839SRobert Olsson 			fi_drop = fa->fa_info;
12062373ce1cSRobert Olsson 			new_fa->fa_tos = fa->fa_tos;
12072373ce1cSRobert Olsson 			new_fa->fa_info = fi;
12084e902c57SThomas Graf 			new_fa->fa_type = cfg->fc_type;
12094e902c57SThomas Graf 			new_fa->fa_scope = cfg->fc_scope;
121019baf839SRobert Olsson 			state = fa->fa_state;
12112373ce1cSRobert Olsson 			new_fa->fa_state &= ~FA_S_ACCESSED;
121219baf839SRobert Olsson 
12132373ce1cSRobert Olsson 			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
12142373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
121519baf839SRobert Olsson 
121619baf839SRobert Olsson 			fib_release_info(fi_drop);
121719baf839SRobert Olsson 			if (state & FA_S_ACCESSED)
121819baf839SRobert Olsson 				rt_cache_flush(-1);
1219b8f55831SMilan Kocian 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1220b8f55831SMilan Kocian 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
122119baf839SRobert Olsson 
122219baf839SRobert Olsson 			goto succeeded;
122319baf839SRobert Olsson 		}
122419baf839SRobert Olsson 		/* Error if we find a perfect match which
122519baf839SRobert Olsson 		 * uses the same scope, type, and nexthop
122619baf839SRobert Olsson 		 * information.
122719baf839SRobert Olsson 		 */
122819baf839SRobert Olsson 		fa_orig = fa;
122919baf839SRobert Olsson 		list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
123019baf839SRobert Olsson 			if (fa->fa_tos != tos)
123119baf839SRobert Olsson 				break;
123219baf839SRobert Olsson 			if (fa->fa_info->fib_priority != fi->fib_priority)
123319baf839SRobert Olsson 				break;
12344e902c57SThomas Graf 			if (fa->fa_type == cfg->fc_type &&
12354e902c57SThomas Graf 			    fa->fa_scope == cfg->fc_scope &&
123619baf839SRobert Olsson 			    fa->fa_info == fi) {
123719baf839SRobert Olsson 				goto out;
123819baf839SRobert Olsson 			}
123919baf839SRobert Olsson 		}
12404e902c57SThomas Graf 		if (!(cfg->fc_nlflags & NLM_F_APPEND))
124119baf839SRobert Olsson 			fa = fa_orig;
124219baf839SRobert Olsson 	}
124319baf839SRobert Olsson 	err = -ENOENT;
12444e902c57SThomas Graf 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
124519baf839SRobert Olsson 		goto out;
124619baf839SRobert Olsson 
124719baf839SRobert Olsson 	err = -ENOBUFS;
1248e94b1766SChristoph Lameter 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
124919baf839SRobert Olsson 	if (new_fa == NULL)
125019baf839SRobert Olsson 		goto out;
125119baf839SRobert Olsson 
125219baf839SRobert Olsson 	new_fa->fa_info = fi;
125319baf839SRobert Olsson 	new_fa->fa_tos = tos;
12544e902c57SThomas Graf 	new_fa->fa_type = cfg->fc_type;
12554e902c57SThomas Graf 	new_fa->fa_scope = cfg->fc_scope;
125619baf839SRobert Olsson 	new_fa->fa_state = 0;
125719baf839SRobert Olsson 	/*
125819baf839SRobert Olsson 	 * Insert new entry to the list.
125919baf839SRobert Olsson 	 */
126019baf839SRobert Olsson 
1261f835e471SRobert Olsson 	if (!fa_head) {
1262fea86ad8SStephen Hemminger 		fa_head = fib_insert_node(t, key, plen);
1263fea86ad8SStephen Hemminger 		if (unlikely(!fa_head)) {
1264fea86ad8SStephen Hemminger 			err = -ENOMEM;
1265f835e471SRobert Olsson 			goto out_free_new_fa;
1266f835e471SRobert Olsson 		}
1267fea86ad8SStephen Hemminger 	}
126819baf839SRobert Olsson 
12692373ce1cSRobert Olsson 	list_add_tail_rcu(&new_fa->fa_list,
12702373ce1cSRobert Olsson 			  (fa ? &fa->fa_list : fa_head));
127119baf839SRobert Olsson 
1272d717a9a6SStephen Hemminger 	t->size++;
1273d717a9a6SStephen Hemminger 
127419baf839SRobert Olsson 	rt_cache_flush(-1);
12754e902c57SThomas Graf 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1276b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
127719baf839SRobert Olsson succeeded:
127819baf839SRobert Olsson 	return 0;
1279f835e471SRobert Olsson 
1280f835e471SRobert Olsson out_free_new_fa:
1281f835e471SRobert Olsson 	kmem_cache_free(fn_alias_kmem, new_fa);
128219baf839SRobert Olsson out:
128319baf839SRobert Olsson 	fib_release_info(fi);
128491b9a277SOlof Johansson err:
128519baf839SRobert Olsson 	return err;
128619baf839SRobert Olsson }
128719baf839SRobert Olsson 
12882373ce1cSRobert Olsson 
1289772cb712SRobert Olsson /* should be called with rcu_read_lock */
12900c7770c7SStephen Hemminger static inline int check_leaf(struct trie *t, struct leaf *l,
12910c7770c7SStephen Hemminger 			     t_key key, int *plen, const struct flowi *flp,
129206c74270SPatrick McHardy 			     struct fib_result *res)
129319baf839SRobert Olsson {
129406c74270SPatrick McHardy 	int err, i;
1295888454c5SAl Viro 	__be32 mask;
129619baf839SRobert Olsson 	struct leaf_info *li;
129719baf839SRobert Olsson 	struct hlist_head *hhead = &l->list;
129819baf839SRobert Olsson 	struct hlist_node *node;
129919baf839SRobert Olsson 
13002373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
130119baf839SRobert Olsson 		i = li->plen;
1302888454c5SAl Viro 		mask = inet_make_mask(i);
1303888454c5SAl Viro 		if (l->key != (key & ntohl(mask)))
130419baf839SRobert Olsson 			continue;
130519baf839SRobert Olsson 
1306888454c5SAl Viro 		if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
130719baf839SRobert Olsson 			*plen = i;
130819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
130919baf839SRobert Olsson 			t->stats.semantic_match_passed++;
131019baf839SRobert Olsson #endif
131106c74270SPatrick McHardy 			return err;
131219baf839SRobert Olsson 		}
131319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
131419baf839SRobert Olsson 		t->stats.semantic_match_miss++;
131519baf839SRobert Olsson #endif
131619baf839SRobert Olsson 	}
131706c74270SPatrick McHardy 	return 1;
131819baf839SRobert Olsson }
131919baf839SRobert Olsson 
132019baf839SRobert Olsson static int
132119baf839SRobert Olsson fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
132219baf839SRobert Olsson {
132319baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
132419baf839SRobert Olsson 	int plen, ret = 0;
132519baf839SRobert Olsson 	struct node *n;
132619baf839SRobert Olsson 	struct tnode *pn;
132719baf839SRobert Olsson 	int pos, bits;
132819baf839SRobert Olsson 	t_key key = ntohl(flp->fl4_dst);
132919baf839SRobert Olsson 	int chopped_off;
133019baf839SRobert Olsson 	t_key cindex = 0;
133119baf839SRobert Olsson 	int current_prefix_length = KEYLENGTH;
133291b9a277SOlof Johansson 	struct tnode *cn;
133391b9a277SOlof Johansson 	t_key node_prefix, key_prefix, pref_mismatch;
133491b9a277SOlof Johansson 	int mp;
133591b9a277SOlof Johansson 
13362373ce1cSRobert Olsson 	rcu_read_lock();
133719baf839SRobert Olsson 
13382373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
133919baf839SRobert Olsson 	if (!n)
134019baf839SRobert Olsson 		goto failed;
134119baf839SRobert Olsson 
134219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
134319baf839SRobert Olsson 	t->stats.gets++;
134419baf839SRobert Olsson #endif
134519baf839SRobert Olsson 
134619baf839SRobert Olsson 	/* Just a leaf? */
134719baf839SRobert Olsson 	if (IS_LEAF(n)) {
134806c74270SPatrick McHardy 		if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
134919baf839SRobert Olsson 			goto found;
135019baf839SRobert Olsson 		goto failed;
135119baf839SRobert Olsson 	}
135219baf839SRobert Olsson 	pn = (struct tnode *) n;
135319baf839SRobert Olsson 	chopped_off = 0;
135419baf839SRobert Olsson 
135519baf839SRobert Olsson 	while (pn) {
135619baf839SRobert Olsson 		pos = pn->pos;
135719baf839SRobert Olsson 		bits = pn->bits;
135819baf839SRobert Olsson 
135919baf839SRobert Olsson 		if (!chopped_off)
1360ab66b4a7SStephen Hemminger 			cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1361ab66b4a7SStephen Hemminger 						   pos, bits);
136219baf839SRobert Olsson 
136319baf839SRobert Olsson 		n = tnode_get_child(pn, cindex);
136419baf839SRobert Olsson 
136519baf839SRobert Olsson 		if (n == NULL) {
136619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
136719baf839SRobert Olsson 			t->stats.null_node_hit++;
136819baf839SRobert Olsson #endif
136919baf839SRobert Olsson 			goto backtrace;
137019baf839SRobert Olsson 		}
137119baf839SRobert Olsson 
137291b9a277SOlof Johansson 		if (IS_LEAF(n)) {
137391b9a277SOlof Johansson 			if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
137491b9a277SOlof Johansson 				goto found;
137591b9a277SOlof Johansson 			else
137691b9a277SOlof Johansson 				goto backtrace;
137791b9a277SOlof Johansson 		}
137891b9a277SOlof Johansson 
137919baf839SRobert Olsson #define HL_OPTIMIZE
138019baf839SRobert Olsson #ifdef HL_OPTIMIZE
138191b9a277SOlof Johansson 		cn = (struct tnode *)n;
138219baf839SRobert Olsson 
138319baf839SRobert Olsson 		/*
138419baf839SRobert Olsson 		 * It's a tnode, and we can do some extra checks here if we
138519baf839SRobert Olsson 		 * like, to avoid descending into a dead-end branch.
138619baf839SRobert Olsson 		 * This tnode is in the parent's child array at index
138719baf839SRobert Olsson 		 * key[p_pos..p_pos+p_bits] but potentially with some bits
138819baf839SRobert Olsson 		 * chopped off, so in reality the index may be just a
138919baf839SRobert Olsson 		 * subprefix, padded with zero at the end.
139019baf839SRobert Olsson 		 * We can also take a look at any skipped bits in this
139119baf839SRobert Olsson 		 * tnode - everything up to p_pos is supposed to be ok,
139219baf839SRobert Olsson 		 * and the non-chopped bits of the index (se previous
139319baf839SRobert Olsson 		 * paragraph) are also guaranteed ok, but the rest is
139419baf839SRobert Olsson 		 * considered unknown.
139519baf839SRobert Olsson 		 *
139619baf839SRobert Olsson 		 * The skipped bits are key[pos+bits..cn->pos].
139719baf839SRobert Olsson 		 */
139819baf839SRobert Olsson 
139919baf839SRobert Olsson 		/* If current_prefix_length < pos+bits, we are already doing
140019baf839SRobert Olsson 		 * actual prefix  matching, which means everything from
140119baf839SRobert Olsson 		 * pos+(bits-chopped_off) onward must be zero along some
140219baf839SRobert Olsson 		 * branch of this subtree - otherwise there is *no* valid
140319baf839SRobert Olsson 		 * prefix present. Here we can only check the skipped
140419baf839SRobert Olsson 		 * bits. Remember, since we have already indexed into the
140519baf839SRobert Olsson 		 * parent's child array, we know that the bits we chopped of
140619baf839SRobert Olsson 		 * *are* zero.
140719baf839SRobert Olsson 		 */
140819baf839SRobert Olsson 
140919baf839SRobert Olsson 		/* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
141019baf839SRobert Olsson 
141119baf839SRobert Olsson 		if (current_prefix_length < pos+bits) {
141219baf839SRobert Olsson 			if (tkey_extract_bits(cn->key, current_prefix_length,
141319baf839SRobert Olsson 						cn->pos - current_prefix_length) != 0 ||
141419baf839SRobert Olsson 			    !(cn->child[0]))
141519baf839SRobert Olsson 				goto backtrace;
141619baf839SRobert Olsson 		}
141719baf839SRobert Olsson 
141819baf839SRobert Olsson 		/*
141919baf839SRobert Olsson 		 * If chopped_off=0, the index is fully validated and we
142019baf839SRobert Olsson 		 * only need to look at the skipped bits for this, the new,
142119baf839SRobert Olsson 		 * tnode. What we actually want to do is to find out if
142219baf839SRobert Olsson 		 * these skipped bits match our key perfectly, or if we will
142319baf839SRobert Olsson 		 * have to count on finding a matching prefix further down,
142419baf839SRobert Olsson 		 * because if we do, we would like to have some way of
142519baf839SRobert Olsson 		 * verifying the existence of such a prefix at this point.
142619baf839SRobert Olsson 		 */
142719baf839SRobert Olsson 
142819baf839SRobert Olsson 		/* The only thing we can do at this point is to verify that
142919baf839SRobert Olsson 		 * any such matching prefix can indeed be a prefix to our
143019baf839SRobert Olsson 		 * key, and if the bits in the node we are inspecting that
143119baf839SRobert Olsson 		 * do not match our key are not ZERO, this cannot be true.
143219baf839SRobert Olsson 		 * Thus, find out where there is a mismatch (before cn->pos)
143319baf839SRobert Olsson 		 * and verify that all the mismatching bits are zero in the
143419baf839SRobert Olsson 		 * new tnode's key.
143519baf839SRobert Olsson 		 */
143619baf839SRobert Olsson 
143719baf839SRobert Olsson 		/* Note: We aren't very concerned about the piece of the key
143819baf839SRobert Olsson 		 * that precede pn->pos+pn->bits, since these have already been
143919baf839SRobert Olsson 		 * checked. The bits after cn->pos aren't checked since these are
144019baf839SRobert Olsson 		 * by definition "unknown" at this point. Thus, what we want to
144119baf839SRobert Olsson 		 * see is if we are about to enter the "prefix matching" state,
144219baf839SRobert Olsson 		 * and in that case verify that the skipped bits that will prevail
144319baf839SRobert Olsson 		 * throughout this subtree are zero, as they have to be if we are
144419baf839SRobert Olsson 		 * to find a matching prefix.
144519baf839SRobert Olsson 		 */
144619baf839SRobert Olsson 
1447ab66b4a7SStephen Hemminger 		node_prefix = mask_pfx(cn->key, cn->pos);
1448ab66b4a7SStephen Hemminger 		key_prefix = mask_pfx(key, cn->pos);
144919baf839SRobert Olsson 		pref_mismatch = key_prefix^node_prefix;
145019baf839SRobert Olsson 		mp = 0;
145119baf839SRobert Olsson 
145219baf839SRobert Olsson 		/* In short: If skipped bits in this node do not match the search
145319baf839SRobert Olsson 		 * key, enter the "prefix matching" state.directly.
145419baf839SRobert Olsson 		 */
145519baf839SRobert Olsson 		if (pref_mismatch) {
145619baf839SRobert Olsson 			while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
145719baf839SRobert Olsson 				mp++;
145819baf839SRobert Olsson 				pref_mismatch = pref_mismatch <<1;
145919baf839SRobert Olsson 			}
146019baf839SRobert Olsson 			key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
146119baf839SRobert Olsson 
146219baf839SRobert Olsson 			if (key_prefix != 0)
146319baf839SRobert Olsson 				goto backtrace;
146419baf839SRobert Olsson 
146519baf839SRobert Olsson 			if (current_prefix_length >= cn->pos)
146619baf839SRobert Olsson 				current_prefix_length = mp;
146719baf839SRobert Olsson 		}
146819baf839SRobert Olsson #endif
146919baf839SRobert Olsson 		pn = (struct tnode *)n; /* Descend */
147019baf839SRobert Olsson 		chopped_off = 0;
147119baf839SRobert Olsson 		continue;
147291b9a277SOlof Johansson 
147319baf839SRobert Olsson backtrace:
147419baf839SRobert Olsson 		chopped_off++;
147519baf839SRobert Olsson 
147619baf839SRobert Olsson 		/* As zero don't change the child key (cindex) */
147791b9a277SOlof Johansson 		while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
147819baf839SRobert Olsson 			chopped_off++;
147919baf839SRobert Olsson 
148019baf839SRobert Olsson 		/* Decrease current_... with bits chopped off */
148119baf839SRobert Olsson 		if (current_prefix_length > pn->pos + pn->bits - chopped_off)
148219baf839SRobert Olsson 			current_prefix_length = pn->pos + pn->bits - chopped_off;
148319baf839SRobert Olsson 
148419baf839SRobert Olsson 		/*
148519baf839SRobert Olsson 		 * Either we do the actual chop off according or if we have
148619baf839SRobert Olsson 		 * chopped off all bits in this tnode walk up to our parent.
148719baf839SRobert Olsson 		 */
148819baf839SRobert Olsson 
148991b9a277SOlof Johansson 		if (chopped_off <= pn->bits) {
149019baf839SRobert Olsson 			cindex &= ~(1 << (chopped_off-1));
149191b9a277SOlof Johansson 		} else {
149206801916SStephen Hemminger 			struct tnode *parent = node_parent((struct node *) pn);
149306801916SStephen Hemminger 			if (!parent)
149419baf839SRobert Olsson 				goto failed;
149519baf839SRobert Olsson 
149619baf839SRobert Olsson 			/* Get Child's index */
149706801916SStephen Hemminger 			cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
149806801916SStephen Hemminger 			pn = parent;
149919baf839SRobert Olsson 			chopped_off = 0;
150019baf839SRobert Olsson 
150119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
150219baf839SRobert Olsson 			t->stats.backtrack++;
150319baf839SRobert Olsson #endif
150419baf839SRobert Olsson 			goto backtrace;
150519baf839SRobert Olsson 		}
150619baf839SRobert Olsson 	}
150719baf839SRobert Olsson failed:
150819baf839SRobert Olsson 	ret = 1;
150919baf839SRobert Olsson found:
15102373ce1cSRobert Olsson 	rcu_read_unlock();
151119baf839SRobert Olsson 	return ret;
151219baf839SRobert Olsson }
151319baf839SRobert Olsson 
15142373ce1cSRobert Olsson /* only called from updater side */
151519baf839SRobert Olsson static int trie_leaf_remove(struct trie *t, t_key key)
151619baf839SRobert Olsson {
151719baf839SRobert Olsson 	t_key cindex;
151819baf839SRobert Olsson 	struct tnode *tp = NULL;
151919baf839SRobert Olsson 	struct node *n = t->trie;
152019baf839SRobert Olsson 	struct leaf *l;
152119baf839SRobert Olsson 
15220c7770c7SStephen Hemminger 	pr_debug("entering trie_leaf_remove(%p)\n", n);
152319baf839SRobert Olsson 
152419baf839SRobert Olsson 	/* Note that in the case skipped bits, those bits are *not* checked!
152519baf839SRobert Olsson 	 * When we finish this, we will have NULL or a T_LEAF, and the
152619baf839SRobert Olsson 	 * T_LEAF may or may not match our key.
152719baf839SRobert Olsson 	 */
152819baf839SRobert Olsson 
152919baf839SRobert Olsson 	while (n != NULL && IS_TNODE(n)) {
153019baf839SRobert Olsson 		struct tnode *tn = (struct tnode *) n;
153119baf839SRobert Olsson 		check_tnode(tn);
153219baf839SRobert Olsson 		n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
153319baf839SRobert Olsson 
153406801916SStephen Hemminger 		BUG_ON(n && node_parent(n) != tn);
153519baf839SRobert Olsson 	}
153619baf839SRobert Olsson 	l = (struct leaf *) n;
153719baf839SRobert Olsson 
153819baf839SRobert Olsson 	if (!n || !tkey_equals(l->key, key))
153919baf839SRobert Olsson 		return 0;
154019baf839SRobert Olsson 
154119baf839SRobert Olsson 	/*
154219baf839SRobert Olsson 	 * Key found.
154319baf839SRobert Olsson 	 * Remove the leaf and rebalance the tree
154419baf839SRobert Olsson 	 */
154519baf839SRobert Olsson 
154619baf839SRobert Olsson 	t->size--;
154719baf839SRobert Olsson 
154806801916SStephen Hemminger 	tp = node_parent(n);
154919baf839SRobert Olsson 	tnode_free((struct tnode *) n);
155019baf839SRobert Olsson 
155119baf839SRobert Olsson 	if (tp) {
155219baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
155319baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, NULL);
15542373ce1cSRobert Olsson 		rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
155591b9a277SOlof Johansson 	} else
15562373ce1cSRobert Olsson 		rcu_assign_pointer(t->trie, NULL);
155719baf839SRobert Olsson 
155819baf839SRobert Olsson 	return 1;
155919baf839SRobert Olsson }
156019baf839SRobert Olsson 
1561d562f1f8SRobert Olsson /*
1562d562f1f8SRobert Olsson  * Caller must hold RTNL.
1563d562f1f8SRobert Olsson  */
15644e902c57SThomas Graf static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
156519baf839SRobert Olsson {
156619baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
156719baf839SRobert Olsson 	u32 key, mask;
15684e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
15694e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
157019baf839SRobert Olsson 	struct fib_alias *fa, *fa_to_delete;
157119baf839SRobert Olsson 	struct list_head *fa_head;
157219baf839SRobert Olsson 	struct leaf *l;
157391b9a277SOlof Johansson 	struct leaf_info *li;
157491b9a277SOlof Johansson 
157519baf839SRobert Olsson 	if (plen > 32)
157619baf839SRobert Olsson 		return -EINVAL;
157719baf839SRobert Olsson 
15784e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
157919baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
158019baf839SRobert Olsson 
158119baf839SRobert Olsson 	if (key & ~mask)
158219baf839SRobert Olsson 		return -EINVAL;
158319baf839SRobert Olsson 
158419baf839SRobert Olsson 	key = key & mask;
158519baf839SRobert Olsson 	l = fib_find_node(t, key);
158619baf839SRobert Olsson 
158719baf839SRobert Olsson 	if (!l)
158819baf839SRobert Olsson 		return -ESRCH;
158919baf839SRobert Olsson 
159019baf839SRobert Olsson 	fa_head = get_fa_head(l, plen);
159119baf839SRobert Olsson 	fa = fib_find_alias(fa_head, tos, 0);
159219baf839SRobert Olsson 
159319baf839SRobert Olsson 	if (!fa)
159419baf839SRobert Olsson 		return -ESRCH;
159519baf839SRobert Olsson 
15960c7770c7SStephen Hemminger 	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
159719baf839SRobert Olsson 
159819baf839SRobert Olsson 	fa_to_delete = NULL;
159919baf839SRobert Olsson 	fa_head = fa->fa_list.prev;
16002373ce1cSRobert Olsson 
160119baf839SRobert Olsson 	list_for_each_entry(fa, fa_head, fa_list) {
160219baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
160319baf839SRobert Olsson 
160419baf839SRobert Olsson 		if (fa->fa_tos != tos)
160519baf839SRobert Olsson 			break;
160619baf839SRobert Olsson 
16074e902c57SThomas Graf 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
16084e902c57SThomas Graf 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
16094e902c57SThomas Graf 		     fa->fa_scope == cfg->fc_scope) &&
16104e902c57SThomas Graf 		    (!cfg->fc_protocol ||
16114e902c57SThomas Graf 		     fi->fib_protocol == cfg->fc_protocol) &&
16124e902c57SThomas Graf 		    fib_nh_match(cfg, fi) == 0) {
161319baf839SRobert Olsson 			fa_to_delete = fa;
161419baf839SRobert Olsson 			break;
161519baf839SRobert Olsson 		}
161619baf839SRobert Olsson 	}
161719baf839SRobert Olsson 
161891b9a277SOlof Johansson 	if (!fa_to_delete)
161991b9a277SOlof Johansson 		return -ESRCH;
162019baf839SRobert Olsson 
162119baf839SRobert Olsson 	fa = fa_to_delete;
16224e902c57SThomas Graf 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1623b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
162419baf839SRobert Olsson 
162519baf839SRobert Olsson 	l = fib_find_node(t, key);
1626772cb712SRobert Olsson 	li = find_leaf_info(l, plen);
162719baf839SRobert Olsson 
16282373ce1cSRobert Olsson 	list_del_rcu(&fa->fa_list);
162919baf839SRobert Olsson 
163019baf839SRobert Olsson 	if (list_empty(fa_head)) {
16312373ce1cSRobert Olsson 		hlist_del_rcu(&li->hlist);
163219baf839SRobert Olsson 		free_leaf_info(li);
16332373ce1cSRobert Olsson 	}
163419baf839SRobert Olsson 
163519baf839SRobert Olsson 	if (hlist_empty(&l->list))
163619baf839SRobert Olsson 		trie_leaf_remove(t, key);
163719baf839SRobert Olsson 
163819baf839SRobert Olsson 	if (fa->fa_state & FA_S_ACCESSED)
163919baf839SRobert Olsson 		rt_cache_flush(-1);
164019baf839SRobert Olsson 
16412373ce1cSRobert Olsson 	fib_release_info(fa->fa_info);
16422373ce1cSRobert Olsson 	alias_free_mem_rcu(fa);
164319baf839SRobert Olsson 	return 0;
164419baf839SRobert Olsson }
164519baf839SRobert Olsson 
164619baf839SRobert Olsson static int trie_flush_list(struct trie *t, struct list_head *head)
164719baf839SRobert Olsson {
164819baf839SRobert Olsson 	struct fib_alias *fa, *fa_node;
164919baf839SRobert Olsson 	int found = 0;
165019baf839SRobert Olsson 
165119baf839SRobert Olsson 	list_for_each_entry_safe(fa, fa_node, head, fa_list) {
165219baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
165319baf839SRobert Olsson 
165419baf839SRobert Olsson 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
16552373ce1cSRobert Olsson 			list_del_rcu(&fa->fa_list);
16562373ce1cSRobert Olsson 			fib_release_info(fa->fa_info);
16572373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
165819baf839SRobert Olsson 			found++;
165919baf839SRobert Olsson 		}
166019baf839SRobert Olsson 	}
166119baf839SRobert Olsson 	return found;
166219baf839SRobert Olsson }
166319baf839SRobert Olsson 
166419baf839SRobert Olsson static int trie_flush_leaf(struct trie *t, struct leaf *l)
166519baf839SRobert Olsson {
166619baf839SRobert Olsson 	int found = 0;
166719baf839SRobert Olsson 	struct hlist_head *lih = &l->list;
166819baf839SRobert Olsson 	struct hlist_node *node, *tmp;
166919baf839SRobert Olsson 	struct leaf_info *li = NULL;
167019baf839SRobert Olsson 
167119baf839SRobert Olsson 	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
167219baf839SRobert Olsson 		found += trie_flush_list(t, &li->falh);
167319baf839SRobert Olsson 
167419baf839SRobert Olsson 		if (list_empty(&li->falh)) {
16752373ce1cSRobert Olsson 			hlist_del_rcu(&li->hlist);
167619baf839SRobert Olsson 			free_leaf_info(li);
167719baf839SRobert Olsson 		}
167819baf839SRobert Olsson 	}
167919baf839SRobert Olsson 	return found;
168019baf839SRobert Olsson }
168119baf839SRobert Olsson 
16822373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
16832373ce1cSRobert Olsson 
168419baf839SRobert Olsson static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
168519baf839SRobert Olsson {
168619baf839SRobert Olsson 	struct node *c = (struct node *) thisleaf;
168719baf839SRobert Olsson 	struct tnode *p;
168819baf839SRobert Olsson 	int idx;
16892373ce1cSRobert Olsson 	struct node *trie = rcu_dereference(t->trie);
169019baf839SRobert Olsson 
169119baf839SRobert Olsson 	if (c == NULL) {
16922373ce1cSRobert Olsson 		if (trie == NULL)
169319baf839SRobert Olsson 			return NULL;
169419baf839SRobert Olsson 
16952373ce1cSRobert Olsson 		if (IS_LEAF(trie))          /* trie w. just a leaf */
16962373ce1cSRobert Olsson 			return (struct leaf *) trie;
169719baf839SRobert Olsson 
16982373ce1cSRobert Olsson 		p = (struct tnode*) trie;  /* Start */
169991b9a277SOlof Johansson 	} else
1700b59cfbf7SEric Dumazet 		p = node_parent_rcu(c);
1701c877efb2SStephen Hemminger 
170219baf839SRobert Olsson 	while (p) {
170319baf839SRobert Olsson 		int pos, last;
170419baf839SRobert Olsson 
170519baf839SRobert Olsson 		/*  Find the next child of the parent */
170619baf839SRobert Olsson 		if (c)
170719baf839SRobert Olsson 			pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
170819baf839SRobert Olsson 		else
170919baf839SRobert Olsson 			pos = 0;
171019baf839SRobert Olsson 
171119baf839SRobert Olsson 		last = 1 << p->bits;
171219baf839SRobert Olsson 		for (idx = pos; idx < last ; idx++) {
17132373ce1cSRobert Olsson 			c = rcu_dereference(p->child[idx]);
17142373ce1cSRobert Olsson 
17152373ce1cSRobert Olsson 			if (!c)
171691b9a277SOlof Johansson 				continue;
171719baf839SRobert Olsson 
171819baf839SRobert Olsson 			/* Decend if tnode */
17192373ce1cSRobert Olsson 			while (IS_TNODE(c)) {
17202373ce1cSRobert Olsson 				p = (struct tnode *) c;
172119baf839SRobert Olsson 				idx = 0;
172219baf839SRobert Olsson 
172319baf839SRobert Olsson 				/* Rightmost non-NULL branch */
172419baf839SRobert Olsson 				if (p && IS_TNODE(p))
17252373ce1cSRobert Olsson 					while (!(c = rcu_dereference(p->child[idx]))
17262373ce1cSRobert Olsson 					       && idx < (1<<p->bits)) idx++;
172719baf839SRobert Olsson 
172819baf839SRobert Olsson 				/* Done with this tnode? */
17292373ce1cSRobert Olsson 				if (idx >= (1 << p->bits) || !c)
173019baf839SRobert Olsson 					goto up;
173119baf839SRobert Olsson 			}
17322373ce1cSRobert Olsson 			return (struct leaf *) c;
173319baf839SRobert Olsson 		}
173419baf839SRobert Olsson up:
173519baf839SRobert Olsson 		/* No more children go up one step  */
173619baf839SRobert Olsson 		c = (struct node *) p;
1737b59cfbf7SEric Dumazet 		p = node_parent_rcu(c);
173819baf839SRobert Olsson 	}
173919baf839SRobert Olsson 	return NULL; /* Ready. Root of trie */
174019baf839SRobert Olsson }
174119baf839SRobert Olsson 
1742d562f1f8SRobert Olsson /*
1743d562f1f8SRobert Olsson  * Caller must hold RTNL.
1744d562f1f8SRobert Olsson  */
174519baf839SRobert Olsson static int fn_trie_flush(struct fib_table *tb)
174619baf839SRobert Olsson {
174719baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
174819baf839SRobert Olsson 	struct leaf *ll = NULL, *l = NULL;
174919baf839SRobert Olsson 	int found = 0, h;
175019baf839SRobert Olsson 
175119baf839SRobert Olsson 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
175219baf839SRobert Olsson 		found += trie_flush_leaf(t, l);
175319baf839SRobert Olsson 
175419baf839SRobert Olsson 		if (ll && hlist_empty(&ll->list))
175519baf839SRobert Olsson 			trie_leaf_remove(t, ll->key);
175619baf839SRobert Olsson 		ll = l;
175719baf839SRobert Olsson 	}
175819baf839SRobert Olsson 
175919baf839SRobert Olsson 	if (ll && hlist_empty(&ll->list))
176019baf839SRobert Olsson 		trie_leaf_remove(t, ll->key);
176119baf839SRobert Olsson 
17620c7770c7SStephen Hemminger 	pr_debug("trie_flush found=%d\n", found);
176319baf839SRobert Olsson 	return found;
176419baf839SRobert Olsson }
176519baf839SRobert Olsson 
176619baf839SRobert Olsson static void
176719baf839SRobert Olsson fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
176819baf839SRobert Olsson {
176919baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
177019baf839SRobert Olsson 	int order, last_idx;
177119baf839SRobert Olsson 	struct fib_info *fi = NULL;
177219baf839SRobert Olsson 	struct fib_info *last_resort;
177319baf839SRobert Olsson 	struct fib_alias *fa = NULL;
177419baf839SRobert Olsson 	struct list_head *fa_head;
177519baf839SRobert Olsson 	struct leaf *l;
177619baf839SRobert Olsson 
177719baf839SRobert Olsson 	last_idx = -1;
177819baf839SRobert Olsson 	last_resort = NULL;
177919baf839SRobert Olsson 	order = -1;
178019baf839SRobert Olsson 
17812373ce1cSRobert Olsson 	rcu_read_lock();
178219baf839SRobert Olsson 
178319baf839SRobert Olsson 	l = fib_find_node(t, 0);
178419baf839SRobert Olsson 	if (!l)
178519baf839SRobert Olsson 		goto out;
178619baf839SRobert Olsson 
178719baf839SRobert Olsson 	fa_head = get_fa_head(l, 0);
178819baf839SRobert Olsson 	if (!fa_head)
178919baf839SRobert Olsson 		goto out;
179019baf839SRobert Olsson 
179119baf839SRobert Olsson 	if (list_empty(fa_head))
179219baf839SRobert Olsson 		goto out;
179319baf839SRobert Olsson 
17942373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fa_head, fa_list) {
179519baf839SRobert Olsson 		struct fib_info *next_fi = fa->fa_info;
179619baf839SRobert Olsson 
179719baf839SRobert Olsson 		if (fa->fa_scope != res->scope ||
179819baf839SRobert Olsson 		    fa->fa_type != RTN_UNICAST)
179919baf839SRobert Olsson 			continue;
180019baf839SRobert Olsson 
180119baf839SRobert Olsson 		if (next_fi->fib_priority > res->fi->fib_priority)
180219baf839SRobert Olsson 			break;
180319baf839SRobert Olsson 		if (!next_fi->fib_nh[0].nh_gw ||
180419baf839SRobert Olsson 		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
180519baf839SRobert Olsson 			continue;
180619baf839SRobert Olsson 		fa->fa_state |= FA_S_ACCESSED;
180719baf839SRobert Olsson 
180819baf839SRobert Olsson 		if (fi == NULL) {
180919baf839SRobert Olsson 			if (next_fi != res->fi)
181019baf839SRobert Olsson 				break;
181119baf839SRobert Olsson 		} else if (!fib_detect_death(fi, order, &last_resort,
1812971b893eSDenis V. Lunev 					     &last_idx, tb->tb_default)) {
1813a2bbe682SDenis V. Lunev 			fib_result_assign(res, fi);
1814971b893eSDenis V. Lunev 			tb->tb_default = order;
181519baf839SRobert Olsson 			goto out;
181619baf839SRobert Olsson 		}
181719baf839SRobert Olsson 		fi = next_fi;
181819baf839SRobert Olsson 		order++;
181919baf839SRobert Olsson 	}
182019baf839SRobert Olsson 	if (order <= 0 || fi == NULL) {
1821971b893eSDenis V. Lunev 		tb->tb_default = -1;
182219baf839SRobert Olsson 		goto out;
182319baf839SRobert Olsson 	}
182419baf839SRobert Olsson 
1825971b893eSDenis V. Lunev 	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1826971b893eSDenis V. Lunev 				tb->tb_default)) {
1827a2bbe682SDenis V. Lunev 		fib_result_assign(res, fi);
1828971b893eSDenis V. Lunev 		tb->tb_default = order;
182919baf839SRobert Olsson 		goto out;
183019baf839SRobert Olsson 	}
1831a2bbe682SDenis V. Lunev 	if (last_idx >= 0)
1832a2bbe682SDenis V. Lunev 		fib_result_assign(res, last_resort);
1833971b893eSDenis V. Lunev 	tb->tb_default = last_idx;
1834971b893eSDenis V. Lunev out:
18352373ce1cSRobert Olsson 	rcu_read_unlock();
183619baf839SRobert Olsson }
183719baf839SRobert Olsson 
183819baf839SRobert Olsson static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
183919baf839SRobert Olsson 			   struct sk_buff *skb, struct netlink_callback *cb)
184019baf839SRobert Olsson {
184119baf839SRobert Olsson 	int i, s_i;
184219baf839SRobert Olsson 	struct fib_alias *fa;
184319baf839SRobert Olsson 
184432ab5f80SAl Viro 	__be32 xkey = htonl(key);
184519baf839SRobert Olsson 
18461af5a8c4SPatrick McHardy 	s_i = cb->args[4];
184719baf839SRobert Olsson 	i = 0;
184819baf839SRobert Olsson 
18492373ce1cSRobert Olsson 	/* rcu_read_lock is hold by caller */
18502373ce1cSRobert Olsson 
18512373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fah, fa_list) {
185219baf839SRobert Olsson 		if (i < s_i) {
185319baf839SRobert Olsson 			i++;
185419baf839SRobert Olsson 			continue;
185519baf839SRobert Olsson 		}
185678c6671aSStephen Hemminger 		BUG_ON(!fa->fa_info);
185719baf839SRobert Olsson 
185819baf839SRobert Olsson 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
185919baf839SRobert Olsson 				  cb->nlh->nlmsg_seq,
186019baf839SRobert Olsson 				  RTM_NEWROUTE,
186119baf839SRobert Olsson 				  tb->tb_id,
186219baf839SRobert Olsson 				  fa->fa_type,
186319baf839SRobert Olsson 				  fa->fa_scope,
1864be403ea1SThomas Graf 				  xkey,
186519baf839SRobert Olsson 				  plen,
186619baf839SRobert Olsson 				  fa->fa_tos,
186790f66914SDavid S. Miller 				  fa->fa_info, 0) < 0) {
18681af5a8c4SPatrick McHardy 			cb->args[4] = i;
186919baf839SRobert Olsson 			return -1;
187019baf839SRobert Olsson 		}
187119baf839SRobert Olsson 		i++;
187219baf839SRobert Olsson 	}
18731af5a8c4SPatrick McHardy 	cb->args[4] = i;
187419baf839SRobert Olsson 	return skb->len;
187519baf839SRobert Olsson }
187619baf839SRobert Olsson 
187719baf839SRobert Olsson static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
187819baf839SRobert Olsson 			     struct netlink_callback *cb)
187919baf839SRobert Olsson {
188019baf839SRobert Olsson 	int h, s_h;
188119baf839SRobert Olsson 	struct list_head *fa_head;
188219baf839SRobert Olsson 	struct leaf *l = NULL;
188391b9a277SOlof Johansson 
18841af5a8c4SPatrick McHardy 	s_h = cb->args[3];
188519baf839SRobert Olsson 
188619baf839SRobert Olsson 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
188719baf839SRobert Olsson 		if (h < s_h)
188819baf839SRobert Olsson 			continue;
188919baf839SRobert Olsson 		if (h > s_h)
18901af5a8c4SPatrick McHardy 			memset(&cb->args[4], 0,
18911af5a8c4SPatrick McHardy 			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
189219baf839SRobert Olsson 
189319baf839SRobert Olsson 		fa_head = get_fa_head(l, plen);
189419baf839SRobert Olsson 
189519baf839SRobert Olsson 		if (!fa_head)
189619baf839SRobert Olsson 			continue;
189719baf839SRobert Olsson 
189819baf839SRobert Olsson 		if (list_empty(fa_head))
189919baf839SRobert Olsson 			continue;
190019baf839SRobert Olsson 
190119baf839SRobert Olsson 		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
19021af5a8c4SPatrick McHardy 			cb->args[3] = h;
190319baf839SRobert Olsson 			return -1;
190419baf839SRobert Olsson 		}
190519baf839SRobert Olsson 	}
19061af5a8c4SPatrick McHardy 	cb->args[3] = h;
190719baf839SRobert Olsson 	return skb->len;
190819baf839SRobert Olsson }
190919baf839SRobert Olsson 
191019baf839SRobert Olsson static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
191119baf839SRobert Olsson {
191219baf839SRobert Olsson 	int m, s_m;
191319baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
191419baf839SRobert Olsson 
19151af5a8c4SPatrick McHardy 	s_m = cb->args[2];
191619baf839SRobert Olsson 
19172373ce1cSRobert Olsson 	rcu_read_lock();
191819baf839SRobert Olsson 	for (m = 0; m <= 32; m++) {
191919baf839SRobert Olsson 		if (m < s_m)
192019baf839SRobert Olsson 			continue;
192119baf839SRobert Olsson 		if (m > s_m)
19221af5a8c4SPatrick McHardy 			memset(&cb->args[3], 0,
19231af5a8c4SPatrick McHardy 				sizeof(cb->args) - 3*sizeof(cb->args[0]));
192419baf839SRobert Olsson 
192519baf839SRobert Olsson 		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
19261af5a8c4SPatrick McHardy 			cb->args[2] = m;
192719baf839SRobert Olsson 			goto out;
192819baf839SRobert Olsson 		}
192919baf839SRobert Olsson 	}
19302373ce1cSRobert Olsson 	rcu_read_unlock();
19311af5a8c4SPatrick McHardy 	cb->args[2] = m;
193219baf839SRobert Olsson 	return skb->len;
193319baf839SRobert Olsson out:
19342373ce1cSRobert Olsson 	rcu_read_unlock();
193519baf839SRobert Olsson 	return -1;
193619baf839SRobert Olsson }
193719baf839SRobert Olsson 
19387f9b8052SStephen Hemminger void __init fib_hash_init(void)
19397f9b8052SStephen Hemminger {
19407f9b8052SStephen Hemminger 	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
19417f9b8052SStephen Hemminger 					  0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
19427f9b8052SStephen Hemminger }
194319baf839SRobert Olsson 
19447f9b8052SStephen Hemminger 
19457f9b8052SStephen Hemminger /* Fix more generic FIB names for init later */
19467f9b8052SStephen Hemminger struct fib_table *fib_hash_table(u32 id)
194719baf839SRobert Olsson {
194819baf839SRobert Olsson 	struct fib_table *tb;
194919baf839SRobert Olsson 	struct trie *t;
195019baf839SRobert Olsson 
195119baf839SRobert Olsson 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
195219baf839SRobert Olsson 		     GFP_KERNEL);
195319baf839SRobert Olsson 	if (tb == NULL)
195419baf839SRobert Olsson 		return NULL;
195519baf839SRobert Olsson 
195619baf839SRobert Olsson 	tb->tb_id = id;
1957971b893eSDenis V. Lunev 	tb->tb_default = -1;
195819baf839SRobert Olsson 	tb->tb_lookup = fn_trie_lookup;
195919baf839SRobert Olsson 	tb->tb_insert = fn_trie_insert;
196019baf839SRobert Olsson 	tb->tb_delete = fn_trie_delete;
196119baf839SRobert Olsson 	tb->tb_flush = fn_trie_flush;
196219baf839SRobert Olsson 	tb->tb_select_default = fn_trie_select_default;
196319baf839SRobert Olsson 	tb->tb_dump = fn_trie_dump;
196419baf839SRobert Olsson 
196519baf839SRobert Olsson 	t = (struct trie *) tb->tb_data;
1966c28a1cf4SStephen Hemminger 	memset(t, 0, sizeof(*t));
196719baf839SRobert Olsson 
196819baf839SRobert Olsson 	if (id == RT_TABLE_LOCAL)
196978c6671aSStephen Hemminger 		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
197019baf839SRobert Olsson 
197119baf839SRobert Olsson 	return tb;
197219baf839SRobert Olsson }
197319baf839SRobert Olsson 
1974cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS
1975cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */
1976cb7b593cSStephen Hemminger struct fib_trie_iter {
19771c340b2fSDenis V. Lunev 	struct seq_net_private p;
1978877a9bffSEric W. Biederman 	struct trie *trie_local, *trie_main;
1979cb7b593cSStephen Hemminger 	struct tnode *tnode;
1980cb7b593cSStephen Hemminger 	struct trie *trie;
1981cb7b593cSStephen Hemminger 	unsigned index;
1982cb7b593cSStephen Hemminger 	unsigned depth;
1983cb7b593cSStephen Hemminger };
198419baf839SRobert Olsson 
1985cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
198619baf839SRobert Olsson {
1987cb7b593cSStephen Hemminger 	struct tnode *tn = iter->tnode;
1988cb7b593cSStephen Hemminger 	unsigned cindex = iter->index;
1989cb7b593cSStephen Hemminger 	struct tnode *p;
199019baf839SRobert Olsson 
19916640e697SEric W. Biederman 	/* A single entry routing table */
19926640e697SEric W. Biederman 	if (!tn)
19936640e697SEric W. Biederman 		return NULL;
19946640e697SEric W. Biederman 
1995cb7b593cSStephen Hemminger 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1996cb7b593cSStephen Hemminger 		 iter->tnode, iter->index, iter->depth);
1997cb7b593cSStephen Hemminger rescan:
1998cb7b593cSStephen Hemminger 	while (cindex < (1<<tn->bits)) {
1999b59cfbf7SEric Dumazet 		struct node *n = tnode_get_child_rcu(tn, cindex);
200019baf839SRobert Olsson 
2001cb7b593cSStephen Hemminger 		if (n) {
200219baf839SRobert Olsson 			if (IS_LEAF(n)) {
2003cb7b593cSStephen Hemminger 				iter->tnode = tn;
2004cb7b593cSStephen Hemminger 				iter->index = cindex + 1;
200591b9a277SOlof Johansson 			} else {
2006cb7b593cSStephen Hemminger 				/* push down one level */
2007cb7b593cSStephen Hemminger 				iter->tnode = (struct tnode *) n;
2008cb7b593cSStephen Hemminger 				iter->index = 0;
2009cb7b593cSStephen Hemminger 				++iter->depth;
201019baf839SRobert Olsson 			}
2011cb7b593cSStephen Hemminger 			return n;
201219baf839SRobert Olsson 		}
201319baf839SRobert Olsson 
2014cb7b593cSStephen Hemminger 		++cindex;
2015cb7b593cSStephen Hemminger 	}
2016cb7b593cSStephen Hemminger 
2017cb7b593cSStephen Hemminger 	/* Current node exhausted, pop back up */
2018b59cfbf7SEric Dumazet 	p = node_parent_rcu((struct node *)tn);
2019cb7b593cSStephen Hemminger 	if (p) {
2020cb7b593cSStephen Hemminger 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2021cb7b593cSStephen Hemminger 		tn = p;
2022cb7b593cSStephen Hemminger 		--iter->depth;
2023cb7b593cSStephen Hemminger 		goto rescan;
2024cb7b593cSStephen Hemminger 	}
2025cb7b593cSStephen Hemminger 
2026cb7b593cSStephen Hemminger 	/* got root? */
2027cb7b593cSStephen Hemminger 	return NULL;
2028cb7b593cSStephen Hemminger }
2029cb7b593cSStephen Hemminger 
2030cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2031cb7b593cSStephen Hemminger 				       struct trie *t)
2032cb7b593cSStephen Hemminger {
20335ddf0eb2SRobert Olsson 	struct node *n ;
20345ddf0eb2SRobert Olsson 
20355ddf0eb2SRobert Olsson 	if (!t)
20365ddf0eb2SRobert Olsson 		return NULL;
20375ddf0eb2SRobert Olsson 
20385ddf0eb2SRobert Olsson 	n = rcu_dereference(t->trie);
20395ddf0eb2SRobert Olsson 
20405ddf0eb2SRobert Olsson 	if (!iter)
20415ddf0eb2SRobert Olsson 		return NULL;
2042cb7b593cSStephen Hemminger 
20436640e697SEric W. Biederman 	if (n) {
20446640e697SEric W. Biederman 		if (IS_TNODE(n)) {
2045cb7b593cSStephen Hemminger 			iter->tnode = (struct tnode *) n;
2046cb7b593cSStephen Hemminger 			iter->trie = t;
2047cb7b593cSStephen Hemminger 			iter->index = 0;
20481d25cd6cSRobert Olsson 			iter->depth = 1;
20496640e697SEric W. Biederman 		} else {
20506640e697SEric W. Biederman 			iter->tnode = NULL;
20516640e697SEric W. Biederman 			iter->trie  = t;
20526640e697SEric W. Biederman 			iter->index = 0;
20536640e697SEric W. Biederman 			iter->depth = 0;
20546640e697SEric W. Biederman 		}
2055cb7b593cSStephen Hemminger 		return n;
2056cb7b593cSStephen Hemminger 	}
2057cb7b593cSStephen Hemminger 	return NULL;
2058cb7b593cSStephen Hemminger }
2059cb7b593cSStephen Hemminger 
2060cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s)
206119baf839SRobert Olsson {
20622373ce1cSRobert Olsson 	struct node *n;
2063cb7b593cSStephen Hemminger 	struct fib_trie_iter iter;
2064cb7b593cSStephen Hemminger 
2065cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
206619baf839SRobert Olsson 
20672373ce1cSRobert Olsson 	rcu_read_lock();
2068cb7b593cSStephen Hemminger 	for (n = fib_trie_get_first(&iter, t); n;
2069cb7b593cSStephen Hemminger 	     n = fib_trie_get_next(&iter)) {
2070cb7b593cSStephen Hemminger 		if (IS_LEAF(n)) {
207119baf839SRobert Olsson 			s->leaves++;
2072cb7b593cSStephen Hemminger 			s->totdepth += iter.depth;
2073cb7b593cSStephen Hemminger 			if (iter.depth > s->maxdepth)
2074cb7b593cSStephen Hemminger 				s->maxdepth = iter.depth;
207591b9a277SOlof Johansson 		} else {
2076cb7b593cSStephen Hemminger 			const struct tnode *tn = (const struct tnode *) n;
2077cb7b593cSStephen Hemminger 			int i;
207819baf839SRobert Olsson 
207919baf839SRobert Olsson 			s->tnodes++;
208006ef921dSRobert Olsson 			if (tn->bits < MAX_STAT_DEPTH)
208119baf839SRobert Olsson 				s->nodesizes[tn->bits]++;
208206ef921dSRobert Olsson 
2083cb7b593cSStephen Hemminger 			for (i = 0; i < (1<<tn->bits); i++)
2084cb7b593cSStephen Hemminger 				if (!tn->child[i])
208519baf839SRobert Olsson 					s->nullpointers++;
208619baf839SRobert Olsson 		}
208719baf839SRobert Olsson 	}
20882373ce1cSRobert Olsson 	rcu_read_unlock();
208919baf839SRobert Olsson }
209019baf839SRobert Olsson 
209119baf839SRobert Olsson /*
209219baf839SRobert Olsson  *	This outputs /proc/net/fib_triestats
209319baf839SRobert Olsson  */
2094cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
209519baf839SRobert Olsson {
2096cb7b593cSStephen Hemminger 	unsigned i, max, pointers, bytes, avdepth;
209719baf839SRobert Olsson 
209819baf839SRobert Olsson 	if (stat->leaves)
209919baf839SRobert Olsson 		avdepth = stat->totdepth*100 / stat->leaves;
210019baf839SRobert Olsson 	else
210119baf839SRobert Olsson 		avdepth = 0;
210219baf839SRobert Olsson 
2103187b5188SStephen Hemminger 	seq_printf(seq, "\tAver depth:     %u.%02d\n", avdepth / 100, avdepth % 100 );
2104cb7b593cSStephen Hemminger 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
2105cb7b593cSStephen Hemminger 
2106cb7b593cSStephen Hemminger 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
2107cb7b593cSStephen Hemminger 
2108cb7b593cSStephen Hemminger 	bytes = sizeof(struct leaf) * stat->leaves;
2109187b5188SStephen Hemminger 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
211019baf839SRobert Olsson 	bytes += sizeof(struct tnode) * stat->tnodes;
211119baf839SRobert Olsson 
211206ef921dSRobert Olsson 	max = MAX_STAT_DEPTH;
211306ef921dSRobert Olsson 	while (max > 0 && stat->nodesizes[max-1] == 0)
211419baf839SRobert Olsson 		max--;
211519baf839SRobert Olsson 
2116cb7b593cSStephen Hemminger 	pointers = 0;
211719baf839SRobert Olsson 	for (i = 1; i <= max; i++)
211819baf839SRobert Olsson 		if (stat->nodesizes[i] != 0) {
2119187b5188SStephen Hemminger 			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
212019baf839SRobert Olsson 			pointers += (1<<i) * stat->nodesizes[i];
212119baf839SRobert Olsson 		}
2122cb7b593cSStephen Hemminger 	seq_putc(seq, '\n');
2123187b5188SStephen Hemminger 	seq_printf(seq, "\tPointers: %u\n", pointers);
2124cb7b593cSStephen Hemminger 
212519baf839SRobert Olsson 	bytes += sizeof(struct node *) * pointers;
2126187b5188SStephen Hemminger 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2127187b5188SStephen Hemminger 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
212866a2f7fdSStephen Hemminger }
212919baf839SRobert Olsson 
213019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
213166a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq,
213266a2f7fdSStephen Hemminger 			    const struct trie_use_stats *stats)
213366a2f7fdSStephen Hemminger {
213466a2f7fdSStephen Hemminger 	seq_printf(seq, "\nCounters:\n---------\n");
213566a2f7fdSStephen Hemminger 	seq_printf(seq,"gets = %u\n", stats->gets);
213666a2f7fdSStephen Hemminger 	seq_printf(seq,"backtracks = %u\n", stats->backtrack);
213766a2f7fdSStephen Hemminger 	seq_printf(seq,"semantic match passed = %u\n", stats->semantic_match_passed);
213866a2f7fdSStephen Hemminger 	seq_printf(seq,"semantic match miss = %u\n", stats->semantic_match_miss);
213966a2f7fdSStephen Hemminger 	seq_printf(seq,"null node hit= %u\n", stats->null_node_hit);
214066a2f7fdSStephen Hemminger 	seq_printf(seq,"skipped node resize = %u\n\n", stats->resize_node_skipped);
214119baf839SRobert Olsson }
214266a2f7fdSStephen Hemminger #endif /*  CONFIG_IP_FIB_TRIE_STATS */
214366a2f7fdSStephen Hemminger 
2144d717a9a6SStephen Hemminger static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie)
2145d717a9a6SStephen Hemminger {
2146d717a9a6SStephen Hemminger 	struct trie_stat stat;
2147d717a9a6SStephen Hemminger 
2148d717a9a6SStephen Hemminger 	seq_printf(seq, "%s: %d\n", name, trie->size);
2149d717a9a6SStephen Hemminger 	trie_collect_stats(trie, &stat);
2150d717a9a6SStephen Hemminger 	trie_show_stats(seq, &stat);
2151d717a9a6SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS
2152d717a9a6SStephen Hemminger 	trie_show_usage(seq, &trie->stats);
2153d717a9a6SStephen Hemminger #endif
2154d717a9a6SStephen Hemminger }
215519baf839SRobert Olsson 
215619baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v)
215719baf839SRobert Olsson {
21581c340b2fSDenis V. Lunev 	struct net *net = (struct net *)seq->private;
2159877a9bffSEric W. Biederman 	struct fib_table *tb;
2160877a9bffSEric W. Biederman 
2161d717a9a6SStephen Hemminger 	seq_printf(seq,
2162d717a9a6SStephen Hemminger 		   "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
216319baf839SRobert Olsson 		   sizeof(struct leaf), sizeof(struct tnode));
216419baf839SRobert Olsson 
2165d717a9a6SStephen Hemminger 	tb = fib_get_table(net, RT_TABLE_LOCAL);
2166d717a9a6SStephen Hemminger 	if (tb)
2167d717a9a6SStephen Hemminger 		fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
2168cb7b593cSStephen Hemminger 
2169d717a9a6SStephen Hemminger 	tb = fib_get_table(net, RT_TABLE_MAIN);
2170d717a9a6SStephen Hemminger 	if (tb)
2171d717a9a6SStephen Hemminger 		fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
2172cb7b593cSStephen Hemminger 
217319baf839SRobert Olsson 	return 0;
217419baf839SRobert Olsson }
217519baf839SRobert Olsson 
217619baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file)
217719baf839SRobert Olsson {
21781c340b2fSDenis V. Lunev 	int err;
21791c340b2fSDenis V. Lunev 	struct net *net;
21801c340b2fSDenis V. Lunev 
21811c340b2fSDenis V. Lunev 	net = get_proc_net(inode);
21821c340b2fSDenis V. Lunev 	if (net == NULL)
21831c340b2fSDenis V. Lunev 		return -ENXIO;
21841c340b2fSDenis V. Lunev 	err = single_open(file, fib_triestat_seq_show, net);
21851c340b2fSDenis V. Lunev 	if (err < 0) {
21861c340b2fSDenis V. Lunev 		put_net(net);
21871c340b2fSDenis V. Lunev 		return err;
21881c340b2fSDenis V. Lunev 	}
21891c340b2fSDenis V. Lunev 	return 0;
21901c340b2fSDenis V. Lunev }
21911c340b2fSDenis V. Lunev 
21921c340b2fSDenis V. Lunev static int fib_triestat_seq_release(struct inode *ino, struct file *f)
21931c340b2fSDenis V. Lunev {
21941c340b2fSDenis V. Lunev 	struct seq_file *seq = f->private_data;
21951c340b2fSDenis V. Lunev 	put_net(seq->private);
21961c340b2fSDenis V. Lunev 	return single_release(ino, f);
219719baf839SRobert Olsson }
219819baf839SRobert Olsson 
21999a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = {
220019baf839SRobert Olsson 	.owner	= THIS_MODULE,
220119baf839SRobert Olsson 	.open	= fib_triestat_seq_open,
220219baf839SRobert Olsson 	.read	= seq_read,
220319baf839SRobert Olsson 	.llseek	= seq_lseek,
22041c340b2fSDenis V. Lunev 	.release = fib_triestat_seq_release,
220519baf839SRobert Olsson };
220619baf839SRobert Olsson 
2207cb7b593cSStephen Hemminger static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2208cb7b593cSStephen Hemminger 				      loff_t pos)
220919baf839SRobert Olsson {
2210cb7b593cSStephen Hemminger 	loff_t idx = 0;
2211cb7b593cSStephen Hemminger 	struct node *n;
2212cb7b593cSStephen Hemminger 
2213877a9bffSEric W. Biederman 	for (n = fib_trie_get_first(iter, iter->trie_local);
2214cb7b593cSStephen Hemminger 	     n; ++idx, n = fib_trie_get_next(iter)) {
2215cb7b593cSStephen Hemminger 		if (pos == idx)
2216cb7b593cSStephen Hemminger 			return n;
221719baf839SRobert Olsson 	}
221819baf839SRobert Olsson 
2219877a9bffSEric W. Biederman 	for (n = fib_trie_get_first(iter, iter->trie_main);
2220cb7b593cSStephen Hemminger 	     n; ++idx, n = fib_trie_get_next(iter)) {
2221cb7b593cSStephen Hemminger 		if (pos == idx)
2222cb7b593cSStephen Hemminger 			return n;
222319baf839SRobert Olsson 	}
222419baf839SRobert Olsson 	return NULL;
222519baf839SRobert Olsson }
222619baf839SRobert Olsson 
222719baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2228c95aaf9aSStephen Hemminger 	__acquires(RCU)
222919baf839SRobert Olsson {
2230877a9bffSEric W. Biederman 	struct fib_trie_iter *iter = seq->private;
2231877a9bffSEric W. Biederman 	struct fib_table *tb;
2232877a9bffSEric W. Biederman 
2233877a9bffSEric W. Biederman 	if (!iter->trie_local) {
22341c340b2fSDenis V. Lunev 		tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
2235877a9bffSEric W. Biederman 		if (tb)
2236877a9bffSEric W. Biederman 			iter->trie_local = (struct trie *) tb->tb_data;
2237877a9bffSEric W. Biederman 	}
2238877a9bffSEric W. Biederman 	if (!iter->trie_main) {
22391c340b2fSDenis V. Lunev 		tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
2240877a9bffSEric W. Biederman 		if (tb)
2241877a9bffSEric W. Biederman 			iter->trie_main = (struct trie *) tb->tb_data;
2242877a9bffSEric W. Biederman 	}
2243cb7b593cSStephen Hemminger 	rcu_read_lock();
2244cb7b593cSStephen Hemminger 	if (*pos == 0)
224591b9a277SOlof Johansson 		return SEQ_START_TOKEN;
2246877a9bffSEric W. Biederman 	return fib_trie_get_idx(iter, *pos - 1);
224719baf839SRobert Olsson }
224819baf839SRobert Olsson 
224919baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
225019baf839SRobert Olsson {
2251cb7b593cSStephen Hemminger 	struct fib_trie_iter *iter = seq->private;
2252cb7b593cSStephen Hemminger 	void *l = v;
2253cb7b593cSStephen Hemminger 
225419baf839SRobert Olsson 	++*pos;
225591b9a277SOlof Johansson 	if (v == SEQ_START_TOKEN)
2256cb7b593cSStephen Hemminger 		return fib_trie_get_idx(iter, 0);
225791b9a277SOlof Johansson 
2258cb7b593cSStephen Hemminger 	v = fib_trie_get_next(iter);
2259cb7b593cSStephen Hemminger 	BUG_ON(v == l);
2260cb7b593cSStephen Hemminger 	if (v)
2261cb7b593cSStephen Hemminger 		return v;
2262cb7b593cSStephen Hemminger 
2263cb7b593cSStephen Hemminger 	/* continue scan in next trie */
2264877a9bffSEric W. Biederman 	if (iter->trie == iter->trie_local)
2265877a9bffSEric W. Biederman 		return fib_trie_get_first(iter, iter->trie_main);
2266cb7b593cSStephen Hemminger 
2267cb7b593cSStephen Hemminger 	return NULL;
226819baf839SRobert Olsson }
226919baf839SRobert Olsson 
227019baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2271c95aaf9aSStephen Hemminger 	__releases(RCU)
227219baf839SRobert Olsson {
2273cb7b593cSStephen Hemminger 	rcu_read_unlock();
227419baf839SRobert Olsson }
227519baf839SRobert Olsson 
2276cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n)
2277cb7b593cSStephen Hemminger {
2278cb7b593cSStephen Hemminger 	while (n-- > 0) seq_puts(seq, "   ");
2279cb7b593cSStephen Hemminger }
228019baf839SRobert Olsson 
228128d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2282cb7b593cSStephen Hemminger {
2283cb7b593cSStephen Hemminger 	switch (s) {
2284cb7b593cSStephen Hemminger 	case RT_SCOPE_UNIVERSE: return "universe";
2285cb7b593cSStephen Hemminger 	case RT_SCOPE_SITE:	return "site";
2286cb7b593cSStephen Hemminger 	case RT_SCOPE_LINK:	return "link";
2287cb7b593cSStephen Hemminger 	case RT_SCOPE_HOST:	return "host";
2288cb7b593cSStephen Hemminger 	case RT_SCOPE_NOWHERE:	return "nowhere";
2289cb7b593cSStephen Hemminger 	default:
229028d36e37SEric Dumazet 		snprintf(buf, len, "scope=%d", s);
2291cb7b593cSStephen Hemminger 		return buf;
2292cb7b593cSStephen Hemminger 	}
2293cb7b593cSStephen Hemminger }
2294cb7b593cSStephen Hemminger 
2295cb7b593cSStephen Hemminger static const char *rtn_type_names[__RTN_MAX] = {
2296cb7b593cSStephen Hemminger 	[RTN_UNSPEC] = "UNSPEC",
2297cb7b593cSStephen Hemminger 	[RTN_UNICAST] = "UNICAST",
2298cb7b593cSStephen Hemminger 	[RTN_LOCAL] = "LOCAL",
2299cb7b593cSStephen Hemminger 	[RTN_BROADCAST] = "BROADCAST",
2300cb7b593cSStephen Hemminger 	[RTN_ANYCAST] = "ANYCAST",
2301cb7b593cSStephen Hemminger 	[RTN_MULTICAST] = "MULTICAST",
2302cb7b593cSStephen Hemminger 	[RTN_BLACKHOLE] = "BLACKHOLE",
2303cb7b593cSStephen Hemminger 	[RTN_UNREACHABLE] = "UNREACHABLE",
2304cb7b593cSStephen Hemminger 	[RTN_PROHIBIT] = "PROHIBIT",
2305cb7b593cSStephen Hemminger 	[RTN_THROW] = "THROW",
2306cb7b593cSStephen Hemminger 	[RTN_NAT] = "NAT",
2307cb7b593cSStephen Hemminger 	[RTN_XRESOLVE] = "XRESOLVE",
2308cb7b593cSStephen Hemminger };
2309cb7b593cSStephen Hemminger 
231028d36e37SEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned t)
2311cb7b593cSStephen Hemminger {
2312cb7b593cSStephen Hemminger 	if (t < __RTN_MAX && rtn_type_names[t])
2313cb7b593cSStephen Hemminger 		return rtn_type_names[t];
231428d36e37SEric Dumazet 	snprintf(buf, len, "type %u", t);
2315cb7b593cSStephen Hemminger 	return buf;
2316cb7b593cSStephen Hemminger }
2317cb7b593cSStephen Hemminger 
2318cb7b593cSStephen Hemminger /* Pretty print the trie */
231919baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v)
232019baf839SRobert Olsson {
2321cb7b593cSStephen Hemminger 	const struct fib_trie_iter *iter = seq->private;
2322cb7b593cSStephen Hemminger 	struct node *n = v;
232319baf839SRobert Olsson 
2324cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN)
2325cb7b593cSStephen Hemminger 		return 0;
232619baf839SRobert Olsson 
2327b59cfbf7SEric Dumazet 	if (!node_parent_rcu(n)) {
2328877a9bffSEric W. Biederman 		if (iter->trie == iter->trie_local)
2329cb7b593cSStephen Hemminger 			seq_puts(seq, "<local>:\n");
2330cb7b593cSStephen Hemminger 		else
2331cb7b593cSStephen Hemminger 			seq_puts(seq, "<main>:\n");
2332cb7b593cSStephen Hemminger 	}
2333095b8501SRobert Olsson 
2334095b8501SRobert Olsson 	if (IS_TNODE(n)) {
2335095b8501SRobert Olsson 		struct tnode *tn = (struct tnode *) n;
2336ab66b4a7SStephen Hemminger 		__be32 prf = htonl(mask_pfx(tn->key, tn->pos));
2337095b8501SRobert Olsson 
23381d25cd6cSRobert Olsson 		seq_indent(seq, iter->depth-1);
23391d25cd6cSRobert Olsson 		seq_printf(seq, "  +-- %d.%d.%d.%d/%d %d %d %d\n",
23401d25cd6cSRobert Olsson 			   NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
23411d25cd6cSRobert Olsson 			   tn->empty_children);
23421d25cd6cSRobert Olsson 
2343cb7b593cSStephen Hemminger 	} else {
2344cb7b593cSStephen Hemminger 		struct leaf *l = (struct leaf *) n;
2345cb7b593cSStephen Hemminger 		int i;
234632ab5f80SAl Viro 		__be32 val = htonl(l->key);
2347cb7b593cSStephen Hemminger 
2348cb7b593cSStephen Hemminger 		seq_indent(seq, iter->depth);
2349cb7b593cSStephen Hemminger 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
2350cb7b593cSStephen Hemminger 		for (i = 32; i >= 0; i--) {
2351772cb712SRobert Olsson 			struct leaf_info *li = find_leaf_info(l, i);
235228d36e37SEric Dumazet 
2353cb7b593cSStephen Hemminger 			if (li) {
2354cb7b593cSStephen Hemminger 				struct fib_alias *fa;
235528d36e37SEric Dumazet 
2356cb7b593cSStephen Hemminger 				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
235728d36e37SEric Dumazet 					char buf1[32], buf2[32];
235828d36e37SEric Dumazet 
2359cb7b593cSStephen Hemminger 					seq_indent(seq, iter->depth+1);
2360cb7b593cSStephen Hemminger 					seq_printf(seq, "  /%d %s %s", i,
236128d36e37SEric Dumazet 						   rtn_scope(buf1, sizeof(buf1),
236228d36e37SEric Dumazet 							     fa->fa_scope),
236328d36e37SEric Dumazet 						   rtn_type(buf2, sizeof(buf2),
236428d36e37SEric Dumazet 							     fa->fa_type));
2365cb7b593cSStephen Hemminger 					if (fa->fa_tos)
2366cb7b593cSStephen Hemminger 						seq_printf(seq, "tos =%d\n",
2367cb7b593cSStephen Hemminger 							   fa->fa_tos);
2368cb7b593cSStephen Hemminger 					seq_putc(seq, '\n');
2369cb7b593cSStephen Hemminger 				}
2370cb7b593cSStephen Hemminger 			}
2371cb7b593cSStephen Hemminger 		}
237219baf839SRobert Olsson 	}
237319baf839SRobert Olsson 
237419baf839SRobert Olsson 	return 0;
237519baf839SRobert Olsson }
237619baf839SRobert Olsson 
2377f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = {
237819baf839SRobert Olsson 	.start  = fib_trie_seq_start,
237919baf839SRobert Olsson 	.next   = fib_trie_seq_next,
238019baf839SRobert Olsson 	.stop   = fib_trie_seq_stop,
238119baf839SRobert Olsson 	.show   = fib_trie_seq_show,
238219baf839SRobert Olsson };
238319baf839SRobert Olsson 
238419baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file)
238519baf839SRobert Olsson {
23861c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_trie_seq_ops,
2387cf7732e4SPavel Emelyanov 			    sizeof(struct fib_trie_iter));
238819baf839SRobert Olsson }
238919baf839SRobert Olsson 
23909a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = {
239119baf839SRobert Olsson 	.owner  = THIS_MODULE,
239219baf839SRobert Olsson 	.open   = fib_trie_seq_open,
239319baf839SRobert Olsson 	.read   = seq_read,
239419baf839SRobert Olsson 	.llseek = seq_lseek,
23951c340b2fSDenis V. Lunev 	.release = seq_release_net,
239619baf839SRobert Olsson };
239719baf839SRobert Olsson 
239832ab5f80SAl Viro static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2399cb7b593cSStephen Hemminger {
2400cb7b593cSStephen Hemminger 	static unsigned type2flags[RTN_MAX + 1] = {
2401cb7b593cSStephen Hemminger 		[7] = RTF_REJECT, [8] = RTF_REJECT,
2402cb7b593cSStephen Hemminger 	};
2403cb7b593cSStephen Hemminger 	unsigned flags = type2flags[type];
2404cb7b593cSStephen Hemminger 
2405cb7b593cSStephen Hemminger 	if (fi && fi->fib_nh->nh_gw)
2406cb7b593cSStephen Hemminger 		flags |= RTF_GATEWAY;
240732ab5f80SAl Viro 	if (mask == htonl(0xFFFFFFFF))
2408cb7b593cSStephen Hemminger 		flags |= RTF_HOST;
2409cb7b593cSStephen Hemminger 	flags |= RTF_UP;
2410cb7b593cSStephen Hemminger 	return flags;
2411cb7b593cSStephen Hemminger }
2412cb7b593cSStephen Hemminger 
2413cb7b593cSStephen Hemminger /*
2414cb7b593cSStephen Hemminger  *	This outputs /proc/net/route.
2415cb7b593cSStephen Hemminger  *	The format of the file is not supposed to be changed
2416cb7b593cSStephen Hemminger  * 	and needs to be same as fib_hash output to avoid breaking
2417cb7b593cSStephen Hemminger  *	legacy utilities
2418cb7b593cSStephen Hemminger  */
2419cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v)
2420cb7b593cSStephen Hemminger {
2421c9e53cbeSPatrick McHardy 	const struct fib_trie_iter *iter = seq->private;
2422cb7b593cSStephen Hemminger 	struct leaf *l = v;
2423cb7b593cSStephen Hemminger 	int i;
2424cb7b593cSStephen Hemminger 	char bf[128];
2425cb7b593cSStephen Hemminger 
2426cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN) {
2427cb7b593cSStephen Hemminger 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2428cb7b593cSStephen Hemminger 			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2429cb7b593cSStephen Hemminger 			   "\tWindow\tIRTT");
2430cb7b593cSStephen Hemminger 		return 0;
2431cb7b593cSStephen Hemminger 	}
2432cb7b593cSStephen Hemminger 
2433877a9bffSEric W. Biederman 	if (iter->trie == iter->trie_local)
2434c9e53cbeSPatrick McHardy 		return 0;
2435cb7b593cSStephen Hemminger 	if (IS_TNODE(l))
2436cb7b593cSStephen Hemminger 		return 0;
2437cb7b593cSStephen Hemminger 
2438cb7b593cSStephen Hemminger 	for (i=32; i>=0; i--) {
2439772cb712SRobert Olsson 		struct leaf_info *li = find_leaf_info(l, i);
2440cb7b593cSStephen Hemminger 		struct fib_alias *fa;
244132ab5f80SAl Viro 		__be32 mask, prefix;
2442cb7b593cSStephen Hemminger 
2443cb7b593cSStephen Hemminger 		if (!li)
2444cb7b593cSStephen Hemminger 			continue;
2445cb7b593cSStephen Hemminger 
2446cb7b593cSStephen Hemminger 		mask = inet_make_mask(li->plen);
2447cb7b593cSStephen Hemminger 		prefix = htonl(l->key);
2448cb7b593cSStephen Hemminger 
2449cb7b593cSStephen Hemminger 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
24501371e37dSHerbert Xu 			const struct fib_info *fi = fa->fa_info;
2451cb7b593cSStephen Hemminger 			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2452cb7b593cSStephen Hemminger 
2453cb7b593cSStephen Hemminger 			if (fa->fa_type == RTN_BROADCAST
2454cb7b593cSStephen Hemminger 			    || fa->fa_type == RTN_MULTICAST)
2455cb7b593cSStephen Hemminger 				continue;
2456cb7b593cSStephen Hemminger 
2457cb7b593cSStephen Hemminger 			if (fi)
2458cb7b593cSStephen Hemminger 				snprintf(bf, sizeof(bf),
2459cb7b593cSStephen Hemminger 					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2460cb7b593cSStephen Hemminger 					 fi->fib_dev ? fi->fib_dev->name : "*",
2461cb7b593cSStephen Hemminger 					 prefix,
2462cb7b593cSStephen Hemminger 					 fi->fib_nh->nh_gw, flags, 0, 0,
2463cb7b593cSStephen Hemminger 					 fi->fib_priority,
2464cb7b593cSStephen Hemminger 					 mask,
2465cb7b593cSStephen Hemminger 					 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2466cb7b593cSStephen Hemminger 					 fi->fib_window,
2467cb7b593cSStephen Hemminger 					 fi->fib_rtt >> 3);
2468cb7b593cSStephen Hemminger 			else
2469cb7b593cSStephen Hemminger 				snprintf(bf, sizeof(bf),
2470cb7b593cSStephen Hemminger 					 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2471cb7b593cSStephen Hemminger 					 prefix, 0, flags, 0, 0, 0,
2472cb7b593cSStephen Hemminger 					 mask, 0, 0, 0);
2473cb7b593cSStephen Hemminger 
2474cb7b593cSStephen Hemminger 			seq_printf(seq, "%-127s\n", bf);
2475cb7b593cSStephen Hemminger 		}
2476cb7b593cSStephen Hemminger 	}
2477cb7b593cSStephen Hemminger 
2478cb7b593cSStephen Hemminger 	return 0;
2479cb7b593cSStephen Hemminger }
2480cb7b593cSStephen Hemminger 
2481f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = {
2482cb7b593cSStephen Hemminger 	.start  = fib_trie_seq_start,
2483cb7b593cSStephen Hemminger 	.next   = fib_trie_seq_next,
2484cb7b593cSStephen Hemminger 	.stop   = fib_trie_seq_stop,
2485cb7b593cSStephen Hemminger 	.show   = fib_route_seq_show,
2486cb7b593cSStephen Hemminger };
2487cb7b593cSStephen Hemminger 
2488cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file)
2489cb7b593cSStephen Hemminger {
24901c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_route_seq_ops,
2491cf7732e4SPavel Emelyanov 			    sizeof(struct fib_trie_iter));
2492cb7b593cSStephen Hemminger }
2493cb7b593cSStephen Hemminger 
24949a32144eSArjan van de Ven static const struct file_operations fib_route_fops = {
2495cb7b593cSStephen Hemminger 	.owner  = THIS_MODULE,
2496cb7b593cSStephen Hemminger 	.open   = fib_route_seq_open,
2497cb7b593cSStephen Hemminger 	.read   = seq_read,
2498cb7b593cSStephen Hemminger 	.llseek = seq_lseek,
24991c340b2fSDenis V. Lunev 	.release = seq_release_net,
2500cb7b593cSStephen Hemminger };
2501cb7b593cSStephen Hemminger 
250261a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net)
250319baf839SRobert Olsson {
250461a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
2505cb7b593cSStephen Hemminger 		goto out1;
2506cb7b593cSStephen Hemminger 
250761a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
250861a02653SDenis V. Lunev 				  &fib_triestat_fops))
2509cb7b593cSStephen Hemminger 		goto out2;
2510cb7b593cSStephen Hemminger 
251161a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
2512cb7b593cSStephen Hemminger 		goto out3;
2513cb7b593cSStephen Hemminger 
251419baf839SRobert Olsson 	return 0;
2515cb7b593cSStephen Hemminger 
2516cb7b593cSStephen Hemminger out3:
251761a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
2518cb7b593cSStephen Hemminger out2:
251961a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
2520cb7b593cSStephen Hemminger out1:
2521cb7b593cSStephen Hemminger 	return -ENOMEM;
252219baf839SRobert Olsson }
252319baf839SRobert Olsson 
252461a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net)
252519baf839SRobert Olsson {
252661a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
252761a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
252861a02653SDenis V. Lunev 	proc_net_remove(net, "route");
252919baf839SRobert Olsson }
253019baf839SRobert Olsson 
253119baf839SRobert Olsson #endif /* CONFIG_PROC_FS */
2532