xref: /openbmc/linux/net/ipv4/fib_trie.c (revision 9a32144e)
119baf839SRobert Olsson /*
219baf839SRobert Olsson  *   This program is free software; you can redistribute it and/or
319baf839SRobert Olsson  *   modify it under the terms of the GNU General Public License
419baf839SRobert Olsson  *   as published by the Free Software Foundation; either version
519baf839SRobert Olsson  *   2 of the License, or (at your option) any later version.
619baf839SRobert Olsson  *
719baf839SRobert Olsson  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
819baf839SRobert Olsson  *     & Swedish University of Agricultural Sciences.
919baf839SRobert Olsson  *
1019baf839SRobert Olsson  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
1119baf839SRobert Olsson  *     Agricultural Sciences.
1219baf839SRobert Olsson  *
1319baf839SRobert Olsson  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
1419baf839SRobert Olsson  *
1519baf839SRobert Olsson  * This work is based on the LPC-trie which is originally descibed in:
1619baf839SRobert Olsson  *
1719baf839SRobert Olsson  * An experimental study of compression methods for dynamic tries
1819baf839SRobert Olsson  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
1919baf839SRobert Olsson  * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
2019baf839SRobert Olsson  *
2119baf839SRobert Olsson  *
2219baf839SRobert Olsson  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
2319baf839SRobert Olsson  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2419baf839SRobert Olsson  *
2519baf839SRobert Olsson  * Version:	$Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
2619baf839SRobert Olsson  *
2719baf839SRobert Olsson  *
2819baf839SRobert Olsson  * Code from fib_hash has been reused which includes the following header:
2919baf839SRobert Olsson  *
3019baf839SRobert Olsson  *
3119baf839SRobert Olsson  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3219baf839SRobert Olsson  *		operating system.  INET is implemented using the  BSD Socket
3319baf839SRobert Olsson  *		interface as the means of communication with the user level.
3419baf839SRobert Olsson  *
3519baf839SRobert Olsson  *		IPv4 FIB: lookup engine and maintenance routines.
3619baf839SRobert Olsson  *
3719baf839SRobert Olsson  *
3819baf839SRobert Olsson  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
3919baf839SRobert Olsson  *
4019baf839SRobert Olsson  *		This program is free software; you can redistribute it and/or
4119baf839SRobert Olsson  *		modify it under the terms of the GNU General Public License
4219baf839SRobert Olsson  *		as published by the Free Software Foundation; either version
4319baf839SRobert Olsson  *		2 of the License, or (at your option) any later version.
44fd966255SRobert Olsson  *
45fd966255SRobert Olsson  * Substantial contributions to this work comes from:
46fd966255SRobert Olsson  *
47fd966255SRobert Olsson  *		David S. Miller, <davem@davemloft.net>
48fd966255SRobert Olsson  *		Stephen Hemminger <shemminger@osdl.org>
49fd966255SRobert Olsson  *		Paul E. McKenney <paulmck@us.ibm.com>
50fd966255SRobert Olsson  *		Patrick McHardy <kaber@trash.net>
5119baf839SRobert Olsson  */
5219baf839SRobert Olsson 
53550e29bcSRobert Olsson #define VERSION "0.407"
5419baf839SRobert Olsson 
5519baf839SRobert Olsson #include <asm/uaccess.h>
5619baf839SRobert Olsson #include <asm/system.h>
5719baf839SRobert Olsson #include <asm/bitops.h>
5819baf839SRobert Olsson #include <linux/types.h>
5919baf839SRobert Olsson #include <linux/kernel.h>
6019baf839SRobert Olsson #include <linux/sched.h>
6119baf839SRobert Olsson #include <linux/mm.h>
6219baf839SRobert Olsson #include <linux/string.h>
6319baf839SRobert Olsson #include <linux/socket.h>
6419baf839SRobert Olsson #include <linux/sockios.h>
6519baf839SRobert Olsson #include <linux/errno.h>
6619baf839SRobert Olsson #include <linux/in.h>
6719baf839SRobert Olsson #include <linux/inet.h>
68cd8787abSStephen Hemminger #include <linux/inetdevice.h>
6919baf839SRobert Olsson #include <linux/netdevice.h>
7019baf839SRobert Olsson #include <linux/if_arp.h>
7119baf839SRobert Olsson #include <linux/proc_fs.h>
722373ce1cSRobert Olsson #include <linux/rcupdate.h>
7319baf839SRobert Olsson #include <linux/skbuff.h>
7419baf839SRobert Olsson #include <linux/netlink.h>
7519baf839SRobert Olsson #include <linux/init.h>
7619baf839SRobert Olsson #include <linux/list.h>
7719baf839SRobert Olsson #include <net/ip.h>
7819baf839SRobert Olsson #include <net/protocol.h>
7919baf839SRobert Olsson #include <net/route.h>
8019baf839SRobert Olsson #include <net/tcp.h>
8119baf839SRobert Olsson #include <net/sock.h>
8219baf839SRobert Olsson #include <net/ip_fib.h>
8319baf839SRobert Olsson #include "fib_lookup.h"
8419baf839SRobert Olsson 
8519baf839SRobert Olsson #undef CONFIG_IP_FIB_TRIE_STATS
8606ef921dSRobert Olsson #define MAX_STAT_DEPTH 32
8719baf839SRobert Olsson 
8819baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key))
8919baf839SRobert Olsson #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
9019baf839SRobert Olsson #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
9119baf839SRobert Olsson 
9219baf839SRobert Olsson typedef unsigned int t_key;
9319baf839SRobert Olsson 
9419baf839SRobert Olsson #define T_TNODE 0
9519baf839SRobert Olsson #define T_LEAF  1
9619baf839SRobert Olsson #define NODE_TYPE_MASK	0x1UL
9791b9a277SOlof Johansson #define NODE_PARENT(node) \
982373ce1cSRobert Olsson 	((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
992373ce1cSRobert Olsson 
1002373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
1012373ce1cSRobert Olsson 
10291b9a277SOlof Johansson #define NODE_SET_PARENT(node, ptr)		\
1032373ce1cSRobert Olsson 	rcu_assign_pointer((node)->parent,	\
1042373ce1cSRobert Olsson 			   ((unsigned long)(ptr)) | NODE_TYPE(node))
10519baf839SRobert Olsson 
10691b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF))
10791b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF)
10819baf839SRobert Olsson 
10919baf839SRobert Olsson struct node {
11019baf839SRobert Olsson 	t_key key;
11191b9a277SOlof Johansson 	unsigned long parent;
11219baf839SRobert Olsson };
11319baf839SRobert Olsson 
11419baf839SRobert Olsson struct leaf {
11519baf839SRobert Olsson 	t_key key;
11691b9a277SOlof Johansson 	unsigned long parent;
11719baf839SRobert Olsson 	struct hlist_head list;
1182373ce1cSRobert Olsson 	struct rcu_head rcu;
11919baf839SRobert Olsson };
12019baf839SRobert Olsson 
12119baf839SRobert Olsson struct leaf_info {
12219baf839SRobert Olsson 	struct hlist_node hlist;
1232373ce1cSRobert Olsson 	struct rcu_head rcu;
12419baf839SRobert Olsson 	int plen;
12519baf839SRobert Olsson 	struct list_head falh;
12619baf839SRobert Olsson };
12719baf839SRobert Olsson 
12819baf839SRobert Olsson struct tnode {
12919baf839SRobert Olsson 	t_key key;
13091b9a277SOlof Johansson 	unsigned long parent;
13119baf839SRobert Olsson 	unsigned short pos:5;		/* 2log(KEYLENGTH) bits needed */
13219baf839SRobert Olsson 	unsigned short bits:5;		/* 2log(KEYLENGTH) bits needed */
13319baf839SRobert Olsson 	unsigned short full_children;	/* KEYLENGTH bits needed */
13419baf839SRobert Olsson 	unsigned short empty_children;	/* KEYLENGTH bits needed */
1352373ce1cSRobert Olsson 	struct rcu_head rcu;
13619baf839SRobert Olsson 	struct node *child[0];
13719baf839SRobert Olsson };
13819baf839SRobert Olsson 
13919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
14019baf839SRobert Olsson struct trie_use_stats {
14119baf839SRobert Olsson 	unsigned int gets;
14219baf839SRobert Olsson 	unsigned int backtrack;
14319baf839SRobert Olsson 	unsigned int semantic_match_passed;
14419baf839SRobert Olsson 	unsigned int semantic_match_miss;
14519baf839SRobert Olsson 	unsigned int null_node_hit;
1462f36895aSRobert Olsson 	unsigned int resize_node_skipped;
14719baf839SRobert Olsson };
14819baf839SRobert Olsson #endif
14919baf839SRobert Olsson 
15019baf839SRobert Olsson struct trie_stat {
15119baf839SRobert Olsson 	unsigned int totdepth;
15219baf839SRobert Olsson 	unsigned int maxdepth;
15319baf839SRobert Olsson 	unsigned int tnodes;
15419baf839SRobert Olsson 	unsigned int leaves;
15519baf839SRobert Olsson 	unsigned int nullpointers;
15606ef921dSRobert Olsson 	unsigned int nodesizes[MAX_STAT_DEPTH];
15719baf839SRobert Olsson };
15819baf839SRobert Olsson 
15919baf839SRobert Olsson struct trie {
16019baf839SRobert Olsson 	struct node *trie;
16119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
16219baf839SRobert Olsson 	struct trie_use_stats stats;
16319baf839SRobert Olsson #endif
16419baf839SRobert Olsson 	int size;
16519baf839SRobert Olsson 	unsigned int revision;
16619baf839SRobert Olsson };
16719baf839SRobert Olsson 
16819baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
16919baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
17019baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn);
1712f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn);
1722f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn);
17319baf839SRobert Olsson static void tnode_free(struct tnode *tn);
17419baf839SRobert Olsson 
175e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly;
17619baf839SRobert Olsson static struct trie *trie_local = NULL, *trie_main = NULL;
17719baf839SRobert Olsson 
1782373ce1cSRobert Olsson 
1792373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
1802373ce1cSRobert Olsson 
18119baf839SRobert Olsson static inline struct node *tnode_get_child(struct tnode *tn, int i)
18219baf839SRobert Olsson {
18391b9a277SOlof Johansson 	BUG_ON(i >= 1 << tn->bits);
18419baf839SRobert Olsson 
1852373ce1cSRobert Olsson 	return rcu_dereference(tn->child[i]);
18619baf839SRobert Olsson }
18719baf839SRobert Olsson 
188bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn)
18919baf839SRobert Olsson {
19019baf839SRobert Olsson 	return 1 << tn->bits;
19119baf839SRobert Olsson }
19219baf839SRobert Olsson 
19319baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
19419baf839SRobert Olsson {
19519baf839SRobert Olsson 	if (offset < KEYLENGTH)
19619baf839SRobert Olsson 		return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
19719baf839SRobert Olsson 	else
19819baf839SRobert Olsson 		return 0;
19919baf839SRobert Olsson }
20019baf839SRobert Olsson 
20119baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b)
20219baf839SRobert Olsson {
20319baf839SRobert Olsson 	return a == b;
20419baf839SRobert Olsson }
20519baf839SRobert Olsson 
20619baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
20719baf839SRobert Olsson {
20819baf839SRobert Olsson 	if (bits == 0 || offset >= KEYLENGTH)
20919baf839SRobert Olsson 		return 1;
21019baf839SRobert Olsson 	bits = bits > KEYLENGTH ? KEYLENGTH : bits;
21119baf839SRobert Olsson 	return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
21219baf839SRobert Olsson }
21319baf839SRobert Olsson 
21419baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b)
21519baf839SRobert Olsson {
21619baf839SRobert Olsson 	t_key diff = a ^ b;
21719baf839SRobert Olsson 	int i = offset;
21819baf839SRobert Olsson 
21919baf839SRobert Olsson 	if (!diff)
22019baf839SRobert Olsson 		return 0;
22119baf839SRobert Olsson 	while ((diff << i) >> (KEYLENGTH-1) == 0)
22219baf839SRobert Olsson 		i++;
22319baf839SRobert Olsson 	return i;
22419baf839SRobert Olsson }
22519baf839SRobert Olsson 
22619baf839SRobert Olsson /*
22719baf839SRobert Olsson   To understand this stuff, an understanding of keys and all their bits is
22819baf839SRobert Olsson   necessary. Every node in the trie has a key associated with it, but not
22919baf839SRobert Olsson   all of the bits in that key are significant.
23019baf839SRobert Olsson 
23119baf839SRobert Olsson   Consider a node 'n' and its parent 'tp'.
23219baf839SRobert Olsson 
23319baf839SRobert Olsson   If n is a leaf, every bit in its key is significant. Its presence is
234772cb712SRobert Olsson   necessitated by path compression, since during a tree traversal (when
23519baf839SRobert Olsson   searching for a leaf - unless we are doing an insertion) we will completely
23619baf839SRobert Olsson   ignore all skipped bits we encounter. Thus we need to verify, at the end of
23719baf839SRobert Olsson   a potentially successful search, that we have indeed been walking the
23819baf839SRobert Olsson   correct key path.
23919baf839SRobert Olsson 
24019baf839SRobert Olsson   Note that we can never "miss" the correct key in the tree if present by
24119baf839SRobert Olsson   following the wrong path. Path compression ensures that segments of the key
24219baf839SRobert Olsson   that are the same for all keys with a given prefix are skipped, but the
24319baf839SRobert Olsson   skipped part *is* identical for each node in the subtrie below the skipped
24419baf839SRobert Olsson   bit! trie_insert() in this implementation takes care of that - note the
24519baf839SRobert Olsson   call to tkey_sub_equals() in trie_insert().
24619baf839SRobert Olsson 
24719baf839SRobert Olsson   if n is an internal node - a 'tnode' here, the various parts of its key
24819baf839SRobert Olsson   have many different meanings.
24919baf839SRobert Olsson 
25019baf839SRobert Olsson   Example:
25119baf839SRobert Olsson   _________________________________________________________________
25219baf839SRobert Olsson   | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
25319baf839SRobert Olsson   -----------------------------------------------------------------
25419baf839SRobert Olsson     0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
25519baf839SRobert Olsson 
25619baf839SRobert Olsson   _________________________________________________________________
25719baf839SRobert Olsson   | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
25819baf839SRobert Olsson   -----------------------------------------------------------------
25919baf839SRobert Olsson    16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
26019baf839SRobert Olsson 
26119baf839SRobert Olsson   tp->pos = 7
26219baf839SRobert Olsson   tp->bits = 3
26319baf839SRobert Olsson   n->pos = 15
26419baf839SRobert Olsson   n->bits = 4
26519baf839SRobert Olsson 
26619baf839SRobert Olsson   First, let's just ignore the bits that come before the parent tp, that is
26719baf839SRobert Olsson   the bits from 0 to (tp->pos-1). They are *known* but at this point we do
26819baf839SRobert Olsson   not use them for anything.
26919baf839SRobert Olsson 
27019baf839SRobert Olsson   The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
27119baf839SRobert Olsson   index into the parent's child array. That is, they will be used to find
27219baf839SRobert Olsson   'n' among tp's children.
27319baf839SRobert Olsson 
27419baf839SRobert Olsson   The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
27519baf839SRobert Olsson   for the node n.
27619baf839SRobert Olsson 
27719baf839SRobert Olsson   All the bits we have seen so far are significant to the node n. The rest
27819baf839SRobert Olsson   of the bits are really not needed or indeed known in n->key.
27919baf839SRobert Olsson 
28019baf839SRobert Olsson   The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
28119baf839SRobert Olsson   n's child array, and will of course be different for each child.
28219baf839SRobert Olsson 
283c877efb2SStephen Hemminger 
28419baf839SRobert Olsson   The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
28519baf839SRobert Olsson   at this point.
28619baf839SRobert Olsson 
28719baf839SRobert Olsson */
28819baf839SRobert Olsson 
2890c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn)
29019baf839SRobert Olsson {
2910c7770c7SStephen Hemminger 	WARN_ON(tn && tn->pos+tn->bits > 32);
29219baf839SRobert Olsson }
29319baf839SRobert Olsson 
29419baf839SRobert Olsson static int halve_threshold = 25;
29519baf839SRobert Olsson static int inflate_threshold = 50;
296e6308be8SRobert Olsson static int halve_threshold_root = 15;
297e6308be8SRobert Olsson static int inflate_threshold_root = 25;
29819baf839SRobert Olsson 
2992373ce1cSRobert Olsson 
3002373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head)
3012373ce1cSRobert Olsson {
3022373ce1cSRobert Olsson 	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
3032373ce1cSRobert Olsson 	kmem_cache_free(fn_alias_kmem, fa);
3042373ce1cSRobert Olsson }
3052373ce1cSRobert Olsson 
3062373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa)
3072373ce1cSRobert Olsson {
3082373ce1cSRobert Olsson 	call_rcu(&fa->rcu, __alias_free_mem);
3092373ce1cSRobert Olsson }
3102373ce1cSRobert Olsson 
3112373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head)
3122373ce1cSRobert Olsson {
3132373ce1cSRobert Olsson 	kfree(container_of(head, struct leaf, rcu));
3142373ce1cSRobert Olsson }
3152373ce1cSRobert Olsson 
3162373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head)
3172373ce1cSRobert Olsson {
3182373ce1cSRobert Olsson 	kfree(container_of(head, struct leaf_info, rcu));
3192373ce1cSRobert Olsson }
3202373ce1cSRobert Olsson 
3212373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf)
3222373ce1cSRobert Olsson {
3232373ce1cSRobert Olsson 	call_rcu(&leaf->rcu, __leaf_info_free_rcu);
3242373ce1cSRobert Olsson }
3252373ce1cSRobert Olsson 
3262373ce1cSRobert Olsson static struct tnode *tnode_alloc(unsigned int size)
3272373ce1cSRobert Olsson {
3282373ce1cSRobert Olsson 	struct page *pages;
3292373ce1cSRobert Olsson 
3302373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3312373ce1cSRobert Olsson 		return kcalloc(size, 1, GFP_KERNEL);
3322373ce1cSRobert Olsson 
3332373ce1cSRobert Olsson 	pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
3342373ce1cSRobert Olsson 	if (!pages)
3352373ce1cSRobert Olsson 		return NULL;
3362373ce1cSRobert Olsson 
3372373ce1cSRobert Olsson 	return page_address(pages);
3382373ce1cSRobert Olsson }
3392373ce1cSRobert Olsson 
3402373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head)
3412373ce1cSRobert Olsson {
3422373ce1cSRobert Olsson 	struct tnode *tn = container_of(head, struct tnode, rcu);
3432373ce1cSRobert Olsson 	unsigned int size = sizeof(struct tnode) +
3442373ce1cSRobert Olsson 		(1 << tn->bits) * sizeof(struct node *);
3452373ce1cSRobert Olsson 
3462373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3472373ce1cSRobert Olsson 		kfree(tn);
3482373ce1cSRobert Olsson 	else
3492373ce1cSRobert Olsson 		free_pages((unsigned long)tn, get_order(size));
3502373ce1cSRobert Olsson }
3512373ce1cSRobert Olsson 
3522373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn)
3532373ce1cSRobert Olsson {
354550e29bcSRobert Olsson 	if(IS_LEAF(tn)) {
355550e29bcSRobert Olsson 		struct leaf *l = (struct leaf *) tn;
356550e29bcSRobert Olsson 		call_rcu_bh(&l->rcu, __leaf_free_rcu);
357550e29bcSRobert Olsson 	}
358550e29bcSRobert Olsson 	else
3592373ce1cSRobert Olsson 		call_rcu(&tn->rcu, __tnode_free_rcu);
3602373ce1cSRobert Olsson }
3612373ce1cSRobert Olsson 
36219baf839SRobert Olsson static struct leaf *leaf_new(void)
36319baf839SRobert Olsson {
36419baf839SRobert Olsson 	struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
36519baf839SRobert Olsson 	if (l) {
3662373ce1cSRobert Olsson 		l->parent = T_LEAF;
36719baf839SRobert Olsson 		INIT_HLIST_HEAD(&l->list);
36819baf839SRobert Olsson 	}
36919baf839SRobert Olsson 	return l;
37019baf839SRobert Olsson }
37119baf839SRobert Olsson 
37219baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen)
37319baf839SRobert Olsson {
37419baf839SRobert Olsson 	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
3752373ce1cSRobert Olsson 	if (li) {
37619baf839SRobert Olsson 		li->plen = plen;
37719baf839SRobert Olsson 		INIT_LIST_HEAD(&li->falh);
3782373ce1cSRobert Olsson 	}
37919baf839SRobert Olsson 	return li;
38019baf839SRobert Olsson }
38119baf839SRobert Olsson 
38219baf839SRobert Olsson static struct tnode* tnode_new(t_key key, int pos, int bits)
38319baf839SRobert Olsson {
38419baf839SRobert Olsson 	int nchildren = 1<<bits;
38519baf839SRobert Olsson 	int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
386f0e36f8cSPatrick McHardy 	struct tnode *tn = tnode_alloc(sz);
38719baf839SRobert Olsson 
38819baf839SRobert Olsson 	if (tn) {
38919baf839SRobert Olsson 		memset(tn, 0, sz);
3902373ce1cSRobert Olsson 		tn->parent = T_TNODE;
39119baf839SRobert Olsson 		tn->pos = pos;
39219baf839SRobert Olsson 		tn->bits = bits;
39319baf839SRobert Olsson 		tn->key = key;
39419baf839SRobert Olsson 		tn->full_children = 0;
39519baf839SRobert Olsson 		tn->empty_children = 1<<bits;
39619baf839SRobert Olsson 	}
397c877efb2SStephen Hemminger 
3980c7770c7SStephen Hemminger 	pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
39919baf839SRobert Olsson 		 (unsigned int) (sizeof(struct node) * 1<<bits));
40019baf839SRobert Olsson 	return tn;
40119baf839SRobert Olsson }
40219baf839SRobert Olsson 
40319baf839SRobert Olsson /*
40419baf839SRobert Olsson  * Check whether a tnode 'n' is "full", i.e. it is an internal node
40519baf839SRobert Olsson  * and no bits are skipped. See discussion in dyntree paper p. 6
40619baf839SRobert Olsson  */
40719baf839SRobert Olsson 
408bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n)
40919baf839SRobert Olsson {
41019baf839SRobert Olsson 	if (n == NULL || IS_LEAF(n))
41119baf839SRobert Olsson 		return 0;
41219baf839SRobert Olsson 
41319baf839SRobert Olsson 	return ((struct tnode *) n)->pos == tn->pos + tn->bits;
41419baf839SRobert Olsson }
41519baf839SRobert Olsson 
41619baf839SRobert Olsson static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
41719baf839SRobert Olsson {
41819baf839SRobert Olsson 	tnode_put_child_reorg(tn, i, n, -1);
41919baf839SRobert Olsson }
42019baf839SRobert Olsson 
42119baf839SRobert Olsson  /*
42219baf839SRobert Olsson   * Add a child at position i overwriting the old value.
42319baf839SRobert Olsson   * Update the value of full_children and empty_children.
42419baf839SRobert Olsson   */
42519baf839SRobert Olsson 
42619baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
42719baf839SRobert Olsson {
4282373ce1cSRobert Olsson 	struct node *chi = tn->child[i];
42919baf839SRobert Olsson 	int isfull;
43019baf839SRobert Olsson 
4310c7770c7SStephen Hemminger 	BUG_ON(i >= 1<<tn->bits);
4320c7770c7SStephen Hemminger 
43319baf839SRobert Olsson 
43419baf839SRobert Olsson 	/* update emptyChildren */
43519baf839SRobert Olsson 	if (n == NULL && chi != NULL)
43619baf839SRobert Olsson 		tn->empty_children++;
43719baf839SRobert Olsson 	else if (n != NULL && chi == NULL)
43819baf839SRobert Olsson 		tn->empty_children--;
43919baf839SRobert Olsson 
44019baf839SRobert Olsson 	/* update fullChildren */
44119baf839SRobert Olsson 	if (wasfull == -1)
44219baf839SRobert Olsson 		wasfull = tnode_full(tn, chi);
44319baf839SRobert Olsson 
44419baf839SRobert Olsson 	isfull = tnode_full(tn, n);
44519baf839SRobert Olsson 	if (wasfull && !isfull)
44619baf839SRobert Olsson 		tn->full_children--;
44719baf839SRobert Olsson 	else if (!wasfull && isfull)
44819baf839SRobert Olsson 		tn->full_children++;
44991b9a277SOlof Johansson 
45019baf839SRobert Olsson 	if (n)
45119baf839SRobert Olsson 		NODE_SET_PARENT(n, tn);
45219baf839SRobert Olsson 
4532373ce1cSRobert Olsson 	rcu_assign_pointer(tn->child[i], n);
45419baf839SRobert Olsson }
45519baf839SRobert Olsson 
45619baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn)
45719baf839SRobert Olsson {
45819baf839SRobert Olsson 	int i;
4592f36895aSRobert Olsson 	int err = 0;
4602f80b3c8SRobert Olsson 	struct tnode *old_tn;
461e6308be8SRobert Olsson 	int inflate_threshold_use;
462e6308be8SRobert Olsson 	int halve_threshold_use;
46319baf839SRobert Olsson 
46419baf839SRobert Olsson 	if (!tn)
46519baf839SRobert Olsson 		return NULL;
46619baf839SRobert Olsson 
4670c7770c7SStephen Hemminger 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
46819baf839SRobert Olsson 		 tn, inflate_threshold, halve_threshold);
46919baf839SRobert Olsson 
47019baf839SRobert Olsson 	/* No children */
47119baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn)) {
47219baf839SRobert Olsson 		tnode_free(tn);
47319baf839SRobert Olsson 		return NULL;
47419baf839SRobert Olsson 	}
47519baf839SRobert Olsson 	/* One child */
47619baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn) - 1)
47719baf839SRobert Olsson 		for (i = 0; i < tnode_child_length(tn); i++) {
47891b9a277SOlof Johansson 			struct node *n;
47919baf839SRobert Olsson 
48091b9a277SOlof Johansson 			n = tn->child[i];
4812373ce1cSRobert Olsson 			if (!n)
48291b9a277SOlof Johansson 				continue;
48319baf839SRobert Olsson 
48419baf839SRobert Olsson 			/* compress one level */
4852373ce1cSRobert Olsson 			NODE_SET_PARENT(n, NULL);
48619baf839SRobert Olsson 			tnode_free(tn);
48719baf839SRobert Olsson 			return n;
48819baf839SRobert Olsson 		}
48919baf839SRobert Olsson 	/*
49019baf839SRobert Olsson 	 * Double as long as the resulting node has a number of
49119baf839SRobert Olsson 	 * nonempty nodes that are above the threshold.
49219baf839SRobert Olsson 	 */
49319baf839SRobert Olsson 
49419baf839SRobert Olsson 	/*
49519baf839SRobert Olsson 	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
49619baf839SRobert Olsson 	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
49719baf839SRobert Olsson 	 * Telecommunications, page 6:
49819baf839SRobert Olsson 	 * "A node is doubled if the ratio of non-empty children to all
49919baf839SRobert Olsson 	 * children in the *doubled* node is at least 'high'."
50019baf839SRobert Olsson 	 *
50119baf839SRobert Olsson 	 * 'high' in this instance is the variable 'inflate_threshold'. It
50219baf839SRobert Olsson 	 * is expressed as a percentage, so we multiply it with
50319baf839SRobert Olsson 	 * tnode_child_length() and instead of multiplying by 2 (since the
50419baf839SRobert Olsson 	 * child array will be doubled by inflate()) and multiplying
50519baf839SRobert Olsson 	 * the left-hand side by 100 (to handle the percentage thing) we
50619baf839SRobert Olsson 	 * multiply the left-hand side by 50.
50719baf839SRobert Olsson 	 *
50819baf839SRobert Olsson 	 * The left-hand side may look a bit weird: tnode_child_length(tn)
50919baf839SRobert Olsson 	 * - tn->empty_children is of course the number of non-null children
51019baf839SRobert Olsson 	 * in the current node. tn->full_children is the number of "full"
51119baf839SRobert Olsson 	 * children, that is non-null tnodes with a skip value of 0.
51219baf839SRobert Olsson 	 * All of those will be doubled in the resulting inflated tnode, so
51319baf839SRobert Olsson 	 * we just count them one extra time here.
51419baf839SRobert Olsson 	 *
51519baf839SRobert Olsson 	 * A clearer way to write this would be:
51619baf839SRobert Olsson 	 *
51719baf839SRobert Olsson 	 * to_be_doubled = tn->full_children;
51819baf839SRobert Olsson 	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
51919baf839SRobert Olsson 	 *     tn->full_children;
52019baf839SRobert Olsson 	 *
52119baf839SRobert Olsson 	 * new_child_length = tnode_child_length(tn) * 2;
52219baf839SRobert Olsson 	 *
52319baf839SRobert Olsson 	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
52419baf839SRobert Olsson 	 *      new_child_length;
52519baf839SRobert Olsson 	 * if (new_fill_factor >= inflate_threshold)
52619baf839SRobert Olsson 	 *
52719baf839SRobert Olsson 	 * ...and so on, tho it would mess up the while () loop.
52819baf839SRobert Olsson 	 *
52919baf839SRobert Olsson 	 * anyway,
53019baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
53119baf839SRobert Olsson 	 *      inflate_threshold
53219baf839SRobert Olsson 	 *
53319baf839SRobert Olsson 	 * avoid a division:
53419baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
53519baf839SRobert Olsson 	 *      inflate_threshold * new_child_length
53619baf839SRobert Olsson 	 *
53719baf839SRobert Olsson 	 * expand not_to_be_doubled and to_be_doubled, and shorten:
53819baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
53919baf839SRobert Olsson 	 *    tn->full_children) >= inflate_threshold * new_child_length
54019baf839SRobert Olsson 	 *
54119baf839SRobert Olsson 	 * expand new_child_length:
54219baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
54319baf839SRobert Olsson 	 *    tn->full_children) >=
54419baf839SRobert Olsson 	 *      inflate_threshold * tnode_child_length(tn) * 2
54519baf839SRobert Olsson 	 *
54619baf839SRobert Olsson 	 * shorten again:
54719baf839SRobert Olsson 	 * 50 * (tn->full_children + tnode_child_length(tn) -
54819baf839SRobert Olsson 	 *    tn->empty_children) >= inflate_threshold *
54919baf839SRobert Olsson 	 *    tnode_child_length(tn)
55019baf839SRobert Olsson 	 *
55119baf839SRobert Olsson 	 */
55219baf839SRobert Olsson 
55319baf839SRobert Olsson 	check_tnode(tn);
55419baf839SRobert Olsson 
555e6308be8SRobert Olsson 	/* Keep root node larger  */
556e6308be8SRobert Olsson 
557e6308be8SRobert Olsson 	if(!tn->parent)
558e6308be8SRobert Olsson 		inflate_threshold_use = inflate_threshold_root;
559e6308be8SRobert Olsson 	else
560e6308be8SRobert Olsson 		inflate_threshold_use = inflate_threshold;
561e6308be8SRobert Olsson 
5622f36895aSRobert Olsson 	err = 0;
56319baf839SRobert Olsson 	while ((tn->full_children > 0 &&
56419baf839SRobert Olsson 	       50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
565e6308be8SRobert Olsson 				inflate_threshold_use * tnode_child_length(tn))) {
56619baf839SRobert Olsson 
5672f80b3c8SRobert Olsson 		old_tn = tn;
5682f80b3c8SRobert Olsson 		tn = inflate(t, tn);
5692f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
5702f80b3c8SRobert Olsson 			tn = old_tn;
5712f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
5722f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
5732f36895aSRobert Olsson #endif
5742f36895aSRobert Olsson 			break;
5752f36895aSRobert Olsson 		}
57619baf839SRobert Olsson 	}
57719baf839SRobert Olsson 
57819baf839SRobert Olsson 	check_tnode(tn);
57919baf839SRobert Olsson 
58019baf839SRobert Olsson 	/*
58119baf839SRobert Olsson 	 * Halve as long as the number of empty children in this
58219baf839SRobert Olsson 	 * node is above threshold.
58319baf839SRobert Olsson 	 */
5842f36895aSRobert Olsson 
585e6308be8SRobert Olsson 
586e6308be8SRobert Olsson 	/* Keep root node larger  */
587e6308be8SRobert Olsson 
588e6308be8SRobert Olsson 	if(!tn->parent)
589e6308be8SRobert Olsson 		halve_threshold_use = halve_threshold_root;
590e6308be8SRobert Olsson 	else
591e6308be8SRobert Olsson 		halve_threshold_use = halve_threshold;
592e6308be8SRobert Olsson 
5932f36895aSRobert Olsson 	err = 0;
59419baf839SRobert Olsson 	while (tn->bits > 1 &&
59519baf839SRobert Olsson 	       100 * (tnode_child_length(tn) - tn->empty_children) <
596e6308be8SRobert Olsson 	       halve_threshold_use * tnode_child_length(tn)) {
59719baf839SRobert Olsson 
5982f80b3c8SRobert Olsson 		old_tn = tn;
5992f80b3c8SRobert Olsson 		tn = halve(t, tn);
6002f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6012f80b3c8SRobert Olsson 			tn = old_tn;
6022f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6032f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6042f36895aSRobert Olsson #endif
6052f36895aSRobert Olsson 			break;
6062f36895aSRobert Olsson 		}
6072f36895aSRobert Olsson 	}
6082f36895aSRobert Olsson 
60919baf839SRobert Olsson 
61019baf839SRobert Olsson 	/* Only one child remains */
61119baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn) - 1)
61219baf839SRobert Olsson 		for (i = 0; i < tnode_child_length(tn); i++) {
61391b9a277SOlof Johansson 			struct node *n;
61419baf839SRobert Olsson 
61591b9a277SOlof Johansson 			n = tn->child[i];
6162373ce1cSRobert Olsson 			if (!n)
61791b9a277SOlof Johansson 				continue;
61891b9a277SOlof Johansson 
61991b9a277SOlof Johansson 			/* compress one level */
62091b9a277SOlof Johansson 
6212373ce1cSRobert Olsson 			NODE_SET_PARENT(n, NULL);
62219baf839SRobert Olsson 			tnode_free(tn);
62319baf839SRobert Olsson 			return n;
62419baf839SRobert Olsson 		}
62519baf839SRobert Olsson 
62619baf839SRobert Olsson 	return (struct node *) tn;
62719baf839SRobert Olsson }
62819baf839SRobert Olsson 
6292f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn)
63019baf839SRobert Olsson {
63119baf839SRobert Olsson 	struct tnode *inode;
63219baf839SRobert Olsson 	struct tnode *oldtnode = tn;
63319baf839SRobert Olsson 	int olen = tnode_child_length(tn);
63419baf839SRobert Olsson 	int i;
63519baf839SRobert Olsson 
6360c7770c7SStephen Hemminger 	pr_debug("In inflate\n");
63719baf839SRobert Olsson 
63819baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
63919baf839SRobert Olsson 
6402f80b3c8SRobert Olsson 	if (!tn)
6412f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
6422f36895aSRobert Olsson 
6432f36895aSRobert Olsson 	/*
6442f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
6452f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
6462f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and  inflate
6472f36895aSRobert Olsson 	 * of tnode is ignored.
6482f36895aSRobert Olsson 	 */
6492f36895aSRobert Olsson 
6502f36895aSRobert Olsson 	for (i = 0; i < olen; i++) {
6512f36895aSRobert Olsson 		struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
6522f36895aSRobert Olsson 
6532f36895aSRobert Olsson 		if (inode &&
6542f36895aSRobert Olsson 		    IS_TNODE(inode) &&
6552f36895aSRobert Olsson 		    inode->pos == oldtnode->pos + oldtnode->bits &&
6562f36895aSRobert Olsson 		    inode->bits > 1) {
6572f36895aSRobert Olsson 			struct tnode *left, *right;
6582f36895aSRobert Olsson 			t_key m = TKEY_GET_MASK(inode->pos, 1);
6592f36895aSRobert Olsson 
6602f36895aSRobert Olsson 			left = tnode_new(inode->key&(~m), inode->pos + 1,
6612f36895aSRobert Olsson 					 inode->bits - 1);
6622f80b3c8SRobert Olsson 			if (!left)
6632f80b3c8SRobert Olsson 				goto nomem;
6642f36895aSRobert Olsson 
6652f36895aSRobert Olsson 			right = tnode_new(inode->key|m, inode->pos + 1,
6662f36895aSRobert Olsson 					  inode->bits - 1);
6672f36895aSRobert Olsson 
6682f36895aSRobert Olsson 			if (!right) {
6692f80b3c8SRobert Olsson 				tnode_free(left);
6702f80b3c8SRobert Olsson 				goto nomem;
6712f36895aSRobert Olsson 			}
6722f36895aSRobert Olsson 
6732f36895aSRobert Olsson 			put_child(t, tn, 2*i, (struct node *) left);
6742f36895aSRobert Olsson 			put_child(t, tn, 2*i+1, (struct node *) right);
6752f36895aSRobert Olsson 		}
6762f36895aSRobert Olsson 	}
6772f36895aSRobert Olsson 
67819baf839SRobert Olsson 	for (i = 0; i < olen; i++) {
67919baf839SRobert Olsson 		struct node *node = tnode_get_child(oldtnode, i);
68091b9a277SOlof Johansson 		struct tnode *left, *right;
68191b9a277SOlof Johansson 		int size, j;
68219baf839SRobert Olsson 
68319baf839SRobert Olsson 		/* An empty child */
68419baf839SRobert Olsson 		if (node == NULL)
68519baf839SRobert Olsson 			continue;
68619baf839SRobert Olsson 
68719baf839SRobert Olsson 		/* A leaf or an internal node with skipped bits */
68819baf839SRobert Olsson 
68919baf839SRobert Olsson 		if (IS_LEAF(node) || ((struct tnode *) node)->pos >
69019baf839SRobert Olsson 		   tn->pos + tn->bits - 1) {
6912f36895aSRobert Olsson 			if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
69219baf839SRobert Olsson 					     1) == 0)
69319baf839SRobert Olsson 				put_child(t, tn, 2*i, node);
69419baf839SRobert Olsson 			else
69519baf839SRobert Olsson 				put_child(t, tn, 2*i+1, node);
69619baf839SRobert Olsson 			continue;
69719baf839SRobert Olsson 		}
69819baf839SRobert Olsson 
69919baf839SRobert Olsson 		/* An internal node with two children */
70019baf839SRobert Olsson 		inode = (struct tnode *) node;
70119baf839SRobert Olsson 
70219baf839SRobert Olsson 		if (inode->bits == 1) {
70319baf839SRobert Olsson 			put_child(t, tn, 2*i, inode->child[0]);
70419baf839SRobert Olsson 			put_child(t, tn, 2*i+1, inode->child[1]);
70519baf839SRobert Olsson 
70619baf839SRobert Olsson 			tnode_free(inode);
70791b9a277SOlof Johansson 			continue;
70819baf839SRobert Olsson 		}
70919baf839SRobert Olsson 
71019baf839SRobert Olsson 		/* An internal node with more than two children */
71119baf839SRobert Olsson 
71219baf839SRobert Olsson 		/* We will replace this node 'inode' with two new
71319baf839SRobert Olsson 		 * ones, 'left' and 'right', each with half of the
71419baf839SRobert Olsson 		 * original children. The two new nodes will have
71519baf839SRobert Olsson 		 * a position one bit further down the key and this
71619baf839SRobert Olsson 		 * means that the "significant" part of their keys
71719baf839SRobert Olsson 		 * (see the discussion near the top of this file)
71819baf839SRobert Olsson 		 * will differ by one bit, which will be "0" in
71919baf839SRobert Olsson 		 * left's key and "1" in right's key. Since we are
72019baf839SRobert Olsson 		 * moving the key position by one step, the bit that
72119baf839SRobert Olsson 		 * we are moving away from - the bit at position
72219baf839SRobert Olsson 		 * (inode->pos) - is the one that will differ between
72319baf839SRobert Olsson 		 * left and right. So... we synthesize that bit in the
72419baf839SRobert Olsson 		 * two  new keys.
72519baf839SRobert Olsson 		 * The mask 'm' below will be a single "one" bit at
72619baf839SRobert Olsson 		 * the position (inode->pos)
72719baf839SRobert Olsson 		 */
72819baf839SRobert Olsson 
72919baf839SRobert Olsson 		/* Use the old key, but set the new significant
73019baf839SRobert Olsson 		 *   bit to zero.
73119baf839SRobert Olsson 		 */
7322f36895aSRobert Olsson 
7332f36895aSRobert Olsson 		left = (struct tnode *) tnode_get_child(tn, 2*i);
7342f36895aSRobert Olsson 		put_child(t, tn, 2*i, NULL);
73519baf839SRobert Olsson 
73691b9a277SOlof Johansson 		BUG_ON(!left);
73719baf839SRobert Olsson 
7382f36895aSRobert Olsson 		right = (struct tnode *) tnode_get_child(tn, 2*i+1);
7392f36895aSRobert Olsson 		put_child(t, tn, 2*i+1, NULL);
74019baf839SRobert Olsson 
74191b9a277SOlof Johansson 		BUG_ON(!right);
74219baf839SRobert Olsson 
74319baf839SRobert Olsson 		size = tnode_child_length(left);
74419baf839SRobert Olsson 		for (j = 0; j < size; j++) {
74519baf839SRobert Olsson 			put_child(t, left, j, inode->child[j]);
74619baf839SRobert Olsson 			put_child(t, right, j, inode->child[j + size]);
74719baf839SRobert Olsson 		}
74819baf839SRobert Olsson 		put_child(t, tn, 2*i, resize(t, left));
74919baf839SRobert Olsson 		put_child(t, tn, 2*i+1, resize(t, right));
75019baf839SRobert Olsson 
75119baf839SRobert Olsson 		tnode_free(inode);
75219baf839SRobert Olsson 	}
75319baf839SRobert Olsson 	tnode_free(oldtnode);
75419baf839SRobert Olsson 	return tn;
7552f80b3c8SRobert Olsson nomem:
7562f80b3c8SRobert Olsson 	{
7572f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
7582f80b3c8SRobert Olsson 		int j;
7592f80b3c8SRobert Olsson 
7602f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
7612f80b3c8SRobert Olsson 			if (tn->child[j])
7622f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
7632f80b3c8SRobert Olsson 
7642f80b3c8SRobert Olsson 		tnode_free(tn);
7652f80b3c8SRobert Olsson 
7662f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
7672f80b3c8SRobert Olsson 	}
76819baf839SRobert Olsson }
76919baf839SRobert Olsson 
7702f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn)
77119baf839SRobert Olsson {
77219baf839SRobert Olsson 	struct tnode *oldtnode = tn;
77319baf839SRobert Olsson 	struct node *left, *right;
77419baf839SRobert Olsson 	int i;
77519baf839SRobert Olsson 	int olen = tnode_child_length(tn);
77619baf839SRobert Olsson 
7770c7770c7SStephen Hemminger 	pr_debug("In halve\n");
77819baf839SRobert Olsson 
77919baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
78019baf839SRobert Olsson 
7812f80b3c8SRobert Olsson 	if (!tn)
7822f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
7832f36895aSRobert Olsson 
7842f36895aSRobert Olsson 	/*
7852f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
7862f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
7872f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and halve
7882f36895aSRobert Olsson 	 * of tnode is ignored.
7892f36895aSRobert Olsson 	 */
7902f36895aSRobert Olsson 
7912f36895aSRobert Olsson 	for (i = 0; i < olen; i += 2) {
7922f36895aSRobert Olsson 		left = tnode_get_child(oldtnode, i);
7932f36895aSRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
7942f36895aSRobert Olsson 
7952f36895aSRobert Olsson 		/* Two nonempty children */
7962f36895aSRobert Olsson 		if (left && right) {
7972f80b3c8SRobert Olsson 			struct tnode *newn;
7982f36895aSRobert Olsson 
7992f80b3c8SRobert Olsson 			newn = tnode_new(left->key, tn->pos + tn->bits, 1);
8002f80b3c8SRobert Olsson 
8012f80b3c8SRobert Olsson 			if (!newn)
8022f80b3c8SRobert Olsson 				goto nomem;
8032f80b3c8SRobert Olsson 
8042f80b3c8SRobert Olsson 			put_child(t, tn, i/2, (struct node *)newn);
8052f36895aSRobert Olsson 		}
8062f36895aSRobert Olsson 
8072f36895aSRobert Olsson 	}
80819baf839SRobert Olsson 
80919baf839SRobert Olsson 	for (i = 0; i < olen; i += 2) {
81091b9a277SOlof Johansson 		struct tnode *newBinNode;
81191b9a277SOlof Johansson 
81219baf839SRobert Olsson 		left = tnode_get_child(oldtnode, i);
81319baf839SRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
81419baf839SRobert Olsson 
81519baf839SRobert Olsson 		/* At least one of the children is empty */
81619baf839SRobert Olsson 		if (left == NULL) {
81719baf839SRobert Olsson 			if (right == NULL)    /* Both are empty */
81819baf839SRobert Olsson 				continue;
81919baf839SRobert Olsson 			put_child(t, tn, i/2, right);
82091b9a277SOlof Johansson 			continue;
82191b9a277SOlof Johansson 		}
82291b9a277SOlof Johansson 
82391b9a277SOlof Johansson 		if (right == NULL) {
82419baf839SRobert Olsson 			put_child(t, tn, i/2, left);
82591b9a277SOlof Johansson 			continue;
82691b9a277SOlof Johansson 		}
82719baf839SRobert Olsson 
82819baf839SRobert Olsson 		/* Two nonempty children */
82991b9a277SOlof Johansson 		newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
8302f36895aSRobert Olsson 		put_child(t, tn, i/2, NULL);
83119baf839SRobert Olsson 		put_child(t, newBinNode, 0, left);
83219baf839SRobert Olsson 		put_child(t, newBinNode, 1, right);
83319baf839SRobert Olsson 		put_child(t, tn, i/2, resize(t, newBinNode));
83419baf839SRobert Olsson 	}
83519baf839SRobert Olsson 	tnode_free(oldtnode);
83619baf839SRobert Olsson 	return tn;
8372f80b3c8SRobert Olsson nomem:
8382f80b3c8SRobert Olsson 	{
8392f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
8402f80b3c8SRobert Olsson 		int j;
8412f80b3c8SRobert Olsson 
8422f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
8432f80b3c8SRobert Olsson 			if (tn->child[j])
8442f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
8452f80b3c8SRobert Olsson 
8462f80b3c8SRobert Olsson 		tnode_free(tn);
8472f80b3c8SRobert Olsson 
8482f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8492f80b3c8SRobert Olsson 	}
85019baf839SRobert Olsson }
85119baf839SRobert Olsson 
85291b9a277SOlof Johansson static void trie_init(struct trie *t)
85319baf839SRobert Olsson {
85491b9a277SOlof Johansson 	if (!t)
85591b9a277SOlof Johansson 		return;
85691b9a277SOlof Johansson 
85719baf839SRobert Olsson 	t->size = 0;
8582373ce1cSRobert Olsson 	rcu_assign_pointer(t->trie, NULL);
85919baf839SRobert Olsson 	t->revision = 0;
86019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
86119baf839SRobert Olsson 	memset(&t->stats, 0, sizeof(struct trie_use_stats));
86219baf839SRobert Olsson #endif
86319baf839SRobert Olsson }
86419baf839SRobert Olsson 
865772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines
8662373ce1cSRobert Olsson  via get_fa_head and dump */
8672373ce1cSRobert Olsson 
868772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
86919baf839SRobert Olsson {
870772cb712SRobert Olsson 	struct hlist_head *head = &l->list;
87119baf839SRobert Olsson 	struct hlist_node *node;
87219baf839SRobert Olsson 	struct leaf_info *li;
87319baf839SRobert Olsson 
8742373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, head, hlist)
87519baf839SRobert Olsson 		if (li->plen == plen)
87619baf839SRobert Olsson 			return li;
87791b9a277SOlof Johansson 
87819baf839SRobert Olsson 	return NULL;
87919baf839SRobert Olsson }
88019baf839SRobert Olsson 
88119baf839SRobert Olsson static inline struct list_head * get_fa_head(struct leaf *l, int plen)
88219baf839SRobert Olsson {
883772cb712SRobert Olsson 	struct leaf_info *li = find_leaf_info(l, plen);
88419baf839SRobert Olsson 
88591b9a277SOlof Johansson 	if (!li)
88691b9a277SOlof Johansson 		return NULL;
88719baf839SRobert Olsson 
88891b9a277SOlof Johansson 	return &li->falh;
88919baf839SRobert Olsson }
89019baf839SRobert Olsson 
89119baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
89219baf839SRobert Olsson {
89319baf839SRobert Olsson 	struct leaf_info *li = NULL, *last = NULL;
89491b9a277SOlof Johansson 	struct hlist_node *node;
89519baf839SRobert Olsson 
89691b9a277SOlof Johansson 	if (hlist_empty(head)) {
8972373ce1cSRobert Olsson 		hlist_add_head_rcu(&new->hlist, head);
89891b9a277SOlof Johansson 	} else {
89991b9a277SOlof Johansson 		hlist_for_each_entry(li, node, head, hlist) {
90019baf839SRobert Olsson 			if (new->plen > li->plen)
90119baf839SRobert Olsson 				break;
90219baf839SRobert Olsson 
90319baf839SRobert Olsson 			last = li;
90419baf839SRobert Olsson 		}
90519baf839SRobert Olsson 		if (last)
9062373ce1cSRobert Olsson 			hlist_add_after_rcu(&last->hlist, &new->hlist);
90719baf839SRobert Olsson 		else
9082373ce1cSRobert Olsson 			hlist_add_before_rcu(&new->hlist, &li->hlist);
90919baf839SRobert Olsson 	}
91019baf839SRobert Olsson }
91119baf839SRobert Olsson 
9122373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
9132373ce1cSRobert Olsson 
91419baf839SRobert Olsson static struct leaf *
91519baf839SRobert Olsson fib_find_node(struct trie *t, u32 key)
91619baf839SRobert Olsson {
91719baf839SRobert Olsson 	int pos;
91819baf839SRobert Olsson 	struct tnode *tn;
91919baf839SRobert Olsson 	struct node *n;
92019baf839SRobert Olsson 
92119baf839SRobert Olsson 	pos = 0;
9222373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
92319baf839SRobert Olsson 
92419baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
92519baf839SRobert Olsson 		tn = (struct tnode *) n;
92619baf839SRobert Olsson 
92719baf839SRobert Olsson 		check_tnode(tn);
92819baf839SRobert Olsson 
92919baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
93019baf839SRobert Olsson 			pos = tn->pos + tn->bits;
93119baf839SRobert Olsson 			n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
93291b9a277SOlof Johansson 		} else
93319baf839SRobert Olsson 			break;
93419baf839SRobert Olsson 	}
93519baf839SRobert Olsson 	/* Case we have found a leaf. Compare prefixes */
93619baf839SRobert Olsson 
93791b9a277SOlof Johansson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
93891b9a277SOlof Johansson 		return (struct leaf *)n;
93991b9a277SOlof Johansson 
94019baf839SRobert Olsson 	return NULL;
94119baf839SRobert Olsson }
94219baf839SRobert Olsson 
94319baf839SRobert Olsson static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
94419baf839SRobert Olsson {
94519baf839SRobert Olsson 	int wasfull;
94619baf839SRobert Olsson 	t_key cindex, key;
94719baf839SRobert Olsson 	struct tnode *tp = NULL;
94819baf839SRobert Olsson 
94919baf839SRobert Olsson 	key = tn->key;
95019baf839SRobert Olsson 
95119baf839SRobert Olsson 	while (tn != NULL && NODE_PARENT(tn) != NULL) {
95219baf839SRobert Olsson 
95319baf839SRobert Olsson 		tp = NODE_PARENT(tn);
95419baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
95519baf839SRobert Olsson 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
95619baf839SRobert Olsson 		tn = (struct tnode *) resize (t, (struct tnode *)tn);
95719baf839SRobert Olsson 		tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
95819baf839SRobert Olsson 
95919baf839SRobert Olsson 		if (!NODE_PARENT(tn))
96019baf839SRobert Olsson 			break;
96119baf839SRobert Olsson 
96219baf839SRobert Olsson 		tn = NODE_PARENT(tn);
96319baf839SRobert Olsson 	}
96419baf839SRobert Olsson 	/* Handle last (top) tnode */
96519baf839SRobert Olsson 	if (IS_TNODE(tn))
96619baf839SRobert Olsson 		tn = (struct tnode*) resize(t, (struct tnode *)tn);
96719baf839SRobert Olsson 
96819baf839SRobert Olsson 	return (struct node*) tn;
96919baf839SRobert Olsson }
97019baf839SRobert Olsson 
9712373ce1cSRobert Olsson /* only used from updater-side */
9722373ce1cSRobert Olsson 
97319baf839SRobert Olsson static  struct list_head *
974f835e471SRobert Olsson fib_insert_node(struct trie *t, int *err, u32 key, int plen)
97519baf839SRobert Olsson {
97619baf839SRobert Olsson 	int pos, newpos;
97719baf839SRobert Olsson 	struct tnode *tp = NULL, *tn = NULL;
97819baf839SRobert Olsson 	struct node *n;
97919baf839SRobert Olsson 	struct leaf *l;
98019baf839SRobert Olsson 	int missbit;
98119baf839SRobert Olsson 	struct list_head *fa_head = NULL;
98219baf839SRobert Olsson 	struct leaf_info *li;
98319baf839SRobert Olsson 	t_key cindex;
98419baf839SRobert Olsson 
98519baf839SRobert Olsson 	pos = 0;
98619baf839SRobert Olsson 	n = t->trie;
98719baf839SRobert Olsson 
98819baf839SRobert Olsson 	/* If we point to NULL, stop. Either the tree is empty and we should
98919baf839SRobert Olsson 	 * just put a new leaf in if, or we have reached an empty child slot,
99019baf839SRobert Olsson 	 * and we should just put our new leaf in that.
99119baf839SRobert Olsson 	 * If we point to a T_TNODE, check if it matches our key. Note that
99219baf839SRobert Olsson 	 * a T_TNODE might be skipping any number of bits - its 'pos' need
99319baf839SRobert Olsson 	 * not be the parent's 'pos'+'bits'!
99419baf839SRobert Olsson 	 *
99519baf839SRobert Olsson 	 * If it does match the current key, get pos/bits from it, extract
99619baf839SRobert Olsson 	 * the index from our key, push the T_TNODE and walk the tree.
99719baf839SRobert Olsson 	 *
99819baf839SRobert Olsson 	 * If it doesn't, we have to replace it with a new T_TNODE.
99919baf839SRobert Olsson 	 *
100019baf839SRobert Olsson 	 * If we point to a T_LEAF, it might or might not have the same key
100119baf839SRobert Olsson 	 * as we do. If it does, just change the value, update the T_LEAF's
100219baf839SRobert Olsson 	 * value, and return it.
100319baf839SRobert Olsson 	 * If it doesn't, we need to replace it with a T_TNODE.
100419baf839SRobert Olsson 	 */
100519baf839SRobert Olsson 
100619baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
100719baf839SRobert Olsson 		tn = (struct tnode *) n;
100819baf839SRobert Olsson 
100919baf839SRobert Olsson 		check_tnode(tn);
101019baf839SRobert Olsson 
101119baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
101219baf839SRobert Olsson 			tp = tn;
101319baf839SRobert Olsson 			pos = tn->pos + tn->bits;
101419baf839SRobert Olsson 			n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
101519baf839SRobert Olsson 
10160c7770c7SStephen Hemminger 			BUG_ON(n && NODE_PARENT(n) != tn);
101791b9a277SOlof Johansson 		} else
101819baf839SRobert Olsson 			break;
101919baf839SRobert Olsson 	}
102019baf839SRobert Olsson 
102119baf839SRobert Olsson 	/*
102219baf839SRobert Olsson 	 * n  ----> NULL, LEAF or TNODE
102319baf839SRobert Olsson 	 *
102419baf839SRobert Olsson 	 * tp is n's (parent) ----> NULL or TNODE
102519baf839SRobert Olsson 	 */
102619baf839SRobert Olsson 
102791b9a277SOlof Johansson 	BUG_ON(tp && IS_LEAF(tp));
102819baf839SRobert Olsson 
102919baf839SRobert Olsson 	/* Case 1: n is a leaf. Compare prefixes */
103019baf839SRobert Olsson 
103119baf839SRobert Olsson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
103219baf839SRobert Olsson 		struct leaf *l = (struct leaf *) n;
103319baf839SRobert Olsson 
103419baf839SRobert Olsson 		li = leaf_info_new(plen);
103519baf839SRobert Olsson 
1036f835e471SRobert Olsson 		if (!li) {
1037f835e471SRobert Olsson 			*err = -ENOMEM;
1038f835e471SRobert Olsson 			goto err;
1039f835e471SRobert Olsson 		}
104019baf839SRobert Olsson 
104119baf839SRobert Olsson 		fa_head = &li->falh;
104219baf839SRobert Olsson 		insert_leaf_info(&l->list, li);
104319baf839SRobert Olsson 		goto done;
104419baf839SRobert Olsson 	}
104519baf839SRobert Olsson 	t->size++;
104619baf839SRobert Olsson 	l = leaf_new();
104719baf839SRobert Olsson 
1048f835e471SRobert Olsson 	if (!l) {
1049f835e471SRobert Olsson 		*err = -ENOMEM;
1050f835e471SRobert Olsson 		goto err;
1051f835e471SRobert Olsson 	}
105219baf839SRobert Olsson 
105319baf839SRobert Olsson 	l->key = key;
105419baf839SRobert Olsson 	li = leaf_info_new(plen);
105519baf839SRobert Olsson 
1056f835e471SRobert Olsson 	if (!li) {
1057f835e471SRobert Olsson 		tnode_free((struct tnode *) l);
1058f835e471SRobert Olsson 		*err = -ENOMEM;
1059f835e471SRobert Olsson 		goto err;
1060f835e471SRobert Olsson 	}
106119baf839SRobert Olsson 
106219baf839SRobert Olsson 	fa_head = &li->falh;
106319baf839SRobert Olsson 	insert_leaf_info(&l->list, li);
106419baf839SRobert Olsson 
106519baf839SRobert Olsson 	if (t->trie && n == NULL) {
106691b9a277SOlof Johansson 		/* Case 2: n is NULL, and will just insert a new leaf */
106719baf839SRobert Olsson 
106819baf839SRobert Olsson 		NODE_SET_PARENT(l, tp);
106919baf839SRobert Olsson 
107019baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
107119baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
107291b9a277SOlof Johansson 	} else {
107319baf839SRobert Olsson 		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
107419baf839SRobert Olsson 		/*
107519baf839SRobert Olsson 		 *  Add a new tnode here
107619baf839SRobert Olsson 		 *  first tnode need some special handling
107719baf839SRobert Olsson 		 */
107819baf839SRobert Olsson 
107919baf839SRobert Olsson 		if (tp)
108019baf839SRobert Olsson 			pos = tp->pos+tp->bits;
108119baf839SRobert Olsson 		else
108219baf839SRobert Olsson 			pos = 0;
108391b9a277SOlof Johansson 
108419baf839SRobert Olsson 		if (n) {
108519baf839SRobert Olsson 			newpos = tkey_mismatch(key, pos, n->key);
108619baf839SRobert Olsson 			tn = tnode_new(n->key, newpos, 1);
108791b9a277SOlof Johansson 		} else {
108819baf839SRobert Olsson 			newpos = 0;
108919baf839SRobert Olsson 			tn = tnode_new(key, newpos, 1); /* First tnode */
109019baf839SRobert Olsson 		}
1091f835e471SRobert Olsson 
1092f835e471SRobert Olsson 		if (!tn) {
1093f835e471SRobert Olsson 			free_leaf_info(li);
1094f835e471SRobert Olsson 			tnode_free((struct tnode *) l);
1095f835e471SRobert Olsson 			*err = -ENOMEM;
1096f835e471SRobert Olsson 			goto err;
1097f835e471SRobert Olsson 		}
109819baf839SRobert Olsson 
109919baf839SRobert Olsson 		NODE_SET_PARENT(tn, tp);
110019baf839SRobert Olsson 
110119baf839SRobert Olsson 		missbit = tkey_extract_bits(key, newpos, 1);
110219baf839SRobert Olsson 		put_child(t, tn, missbit, (struct node *)l);
110319baf839SRobert Olsson 		put_child(t, tn, 1-missbit, n);
110419baf839SRobert Olsson 
110519baf839SRobert Olsson 		if (tp) {
110619baf839SRobert Olsson 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
110719baf839SRobert Olsson 			put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
110891b9a277SOlof Johansson 		} else {
11092373ce1cSRobert Olsson 			rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
111019baf839SRobert Olsson 			tp = tn;
111119baf839SRobert Olsson 		}
111219baf839SRobert Olsson 	}
111391b9a277SOlof Johansson 
111491b9a277SOlof Johansson 	if (tp && tp->pos + tp->bits > 32)
111578c6671aSStephen Hemminger 		printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
111619baf839SRobert Olsson 		       tp, tp->pos, tp->bits, key, plen);
111791b9a277SOlof Johansson 
111819baf839SRobert Olsson 	/* Rebalance the trie */
11192373ce1cSRobert Olsson 
11202373ce1cSRobert Olsson 	rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
1121f835e471SRobert Olsson done:
1122f835e471SRobert Olsson 	t->revision++;
112391b9a277SOlof Johansson err:
112419baf839SRobert Olsson 	return fa_head;
112519baf839SRobert Olsson }
112619baf839SRobert Olsson 
11274e902c57SThomas Graf static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
112819baf839SRobert Olsson {
112919baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
113019baf839SRobert Olsson 	struct fib_alias *fa, *new_fa;
113119baf839SRobert Olsson 	struct list_head *fa_head = NULL;
113219baf839SRobert Olsson 	struct fib_info *fi;
11334e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
11344e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
113519baf839SRobert Olsson 	u32 key, mask;
113619baf839SRobert Olsson 	int err;
113719baf839SRobert Olsson 	struct leaf *l;
113819baf839SRobert Olsson 
113919baf839SRobert Olsson 	if (plen > 32)
114019baf839SRobert Olsson 		return -EINVAL;
114119baf839SRobert Olsson 
11424e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
114319baf839SRobert Olsson 
11442dfe55b4SPatrick McHardy 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
114519baf839SRobert Olsson 
114619baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
114719baf839SRobert Olsson 
114819baf839SRobert Olsson 	if (key & ~mask)
114919baf839SRobert Olsson 		return -EINVAL;
115019baf839SRobert Olsson 
115119baf839SRobert Olsson 	key = key & mask;
115219baf839SRobert Olsson 
11534e902c57SThomas Graf 	fi = fib_create_info(cfg);
11544e902c57SThomas Graf 	if (IS_ERR(fi)) {
11554e902c57SThomas Graf 		err = PTR_ERR(fi);
115619baf839SRobert Olsson 		goto err;
11574e902c57SThomas Graf 	}
115819baf839SRobert Olsson 
115919baf839SRobert Olsson 	l = fib_find_node(t, key);
116019baf839SRobert Olsson 	fa = NULL;
116119baf839SRobert Olsson 
116219baf839SRobert Olsson 	if (l) {
116319baf839SRobert Olsson 		fa_head = get_fa_head(l, plen);
116419baf839SRobert Olsson 		fa = fib_find_alias(fa_head, tos, fi->fib_priority);
116519baf839SRobert Olsson 	}
116619baf839SRobert Olsson 
116719baf839SRobert Olsson 	/* Now fa, if non-NULL, points to the first fib alias
116819baf839SRobert Olsson 	 * with the same keys [prefix,tos,priority], if such key already
116919baf839SRobert Olsson 	 * exists or to the node before which we will insert new one.
117019baf839SRobert Olsson 	 *
117119baf839SRobert Olsson 	 * If fa is NULL, we will need to allocate a new one and
117219baf839SRobert Olsson 	 * insert to the head of f.
117319baf839SRobert Olsson 	 *
117419baf839SRobert Olsson 	 * If f is NULL, no fib node matched the destination key
117519baf839SRobert Olsson 	 * and we need to allocate a new one of those as well.
117619baf839SRobert Olsson 	 */
117719baf839SRobert Olsson 
117891b9a277SOlof Johansson 	if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
117919baf839SRobert Olsson 		struct fib_alias *fa_orig;
118019baf839SRobert Olsson 
118119baf839SRobert Olsson 		err = -EEXIST;
11824e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_EXCL)
118319baf839SRobert Olsson 			goto out;
118419baf839SRobert Olsson 
11854e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_REPLACE) {
118619baf839SRobert Olsson 			struct fib_info *fi_drop;
118719baf839SRobert Olsson 			u8 state;
118819baf839SRobert Olsson 
11892373ce1cSRobert Olsson 			err = -ENOBUFS;
1190e94b1766SChristoph Lameter 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
11912373ce1cSRobert Olsson 			if (new_fa == NULL)
11922373ce1cSRobert Olsson 				goto out;
119319baf839SRobert Olsson 
119419baf839SRobert Olsson 			fi_drop = fa->fa_info;
11952373ce1cSRobert Olsson 			new_fa->fa_tos = fa->fa_tos;
11962373ce1cSRobert Olsson 			new_fa->fa_info = fi;
11974e902c57SThomas Graf 			new_fa->fa_type = cfg->fc_type;
11984e902c57SThomas Graf 			new_fa->fa_scope = cfg->fc_scope;
119919baf839SRobert Olsson 			state = fa->fa_state;
12002373ce1cSRobert Olsson 			new_fa->fa_state &= ~FA_S_ACCESSED;
120119baf839SRobert Olsson 
12022373ce1cSRobert Olsson 			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
12032373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
120419baf839SRobert Olsson 
120519baf839SRobert Olsson 			fib_release_info(fi_drop);
120619baf839SRobert Olsson 			if (state & FA_S_ACCESSED)
120719baf839SRobert Olsson 				rt_cache_flush(-1);
120819baf839SRobert Olsson 
120919baf839SRobert Olsson 			goto succeeded;
121019baf839SRobert Olsson 		}
121119baf839SRobert Olsson 		/* Error if we find a perfect match which
121219baf839SRobert Olsson 		 * uses the same scope, type, and nexthop
121319baf839SRobert Olsson 		 * information.
121419baf839SRobert Olsson 		 */
121519baf839SRobert Olsson 		fa_orig = fa;
121619baf839SRobert Olsson 		list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
121719baf839SRobert Olsson 			if (fa->fa_tos != tos)
121819baf839SRobert Olsson 				break;
121919baf839SRobert Olsson 			if (fa->fa_info->fib_priority != fi->fib_priority)
122019baf839SRobert Olsson 				break;
12214e902c57SThomas Graf 			if (fa->fa_type == cfg->fc_type &&
12224e902c57SThomas Graf 			    fa->fa_scope == cfg->fc_scope &&
122319baf839SRobert Olsson 			    fa->fa_info == fi) {
122419baf839SRobert Olsson 				goto out;
122519baf839SRobert Olsson 			}
122619baf839SRobert Olsson 		}
12274e902c57SThomas Graf 		if (!(cfg->fc_nlflags & NLM_F_APPEND))
122819baf839SRobert Olsson 			fa = fa_orig;
122919baf839SRobert Olsson 	}
123019baf839SRobert Olsson 	err = -ENOENT;
12314e902c57SThomas Graf 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
123219baf839SRobert Olsson 		goto out;
123319baf839SRobert Olsson 
123419baf839SRobert Olsson 	err = -ENOBUFS;
1235e94b1766SChristoph Lameter 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
123619baf839SRobert Olsson 	if (new_fa == NULL)
123719baf839SRobert Olsson 		goto out;
123819baf839SRobert Olsson 
123919baf839SRobert Olsson 	new_fa->fa_info = fi;
124019baf839SRobert Olsson 	new_fa->fa_tos = tos;
12414e902c57SThomas Graf 	new_fa->fa_type = cfg->fc_type;
12424e902c57SThomas Graf 	new_fa->fa_scope = cfg->fc_scope;
124319baf839SRobert Olsson 	new_fa->fa_state = 0;
124419baf839SRobert Olsson 	/*
124519baf839SRobert Olsson 	 * Insert new entry to the list.
124619baf839SRobert Olsson 	 */
124719baf839SRobert Olsson 
1248f835e471SRobert Olsson 	if (!fa_head) {
1249f835e471SRobert Olsson 		err = 0;
1250b47b2ec1SHerbert Xu 		fa_head = fib_insert_node(t, &err, key, plen);
1251f835e471SRobert Olsson 		if (err)
1252f835e471SRobert Olsson 			goto out_free_new_fa;
1253f835e471SRobert Olsson 	}
125419baf839SRobert Olsson 
12552373ce1cSRobert Olsson 	list_add_tail_rcu(&new_fa->fa_list,
12562373ce1cSRobert Olsson 			  (fa ? &fa->fa_list : fa_head));
125719baf839SRobert Olsson 
125819baf839SRobert Olsson 	rt_cache_flush(-1);
12594e902c57SThomas Graf 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
12604e902c57SThomas Graf 		  &cfg->fc_nlinfo);
126119baf839SRobert Olsson succeeded:
126219baf839SRobert Olsson 	return 0;
1263f835e471SRobert Olsson 
1264f835e471SRobert Olsson out_free_new_fa:
1265f835e471SRobert Olsson 	kmem_cache_free(fn_alias_kmem, new_fa);
126619baf839SRobert Olsson out:
126719baf839SRobert Olsson 	fib_release_info(fi);
126891b9a277SOlof Johansson err:
126919baf839SRobert Olsson 	return err;
127019baf839SRobert Olsson }
127119baf839SRobert Olsson 
12722373ce1cSRobert Olsson 
1273772cb712SRobert Olsson /* should be called with rcu_read_lock */
12740c7770c7SStephen Hemminger static inline int check_leaf(struct trie *t, struct leaf *l,
12750c7770c7SStephen Hemminger 			     t_key key, int *plen, const struct flowi *flp,
127606c74270SPatrick McHardy 			     struct fib_result *res)
127719baf839SRobert Olsson {
127806c74270SPatrick McHardy 	int err, i;
1279888454c5SAl Viro 	__be32 mask;
128019baf839SRobert Olsson 	struct leaf_info *li;
128119baf839SRobert Olsson 	struct hlist_head *hhead = &l->list;
128219baf839SRobert Olsson 	struct hlist_node *node;
128319baf839SRobert Olsson 
12842373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
128519baf839SRobert Olsson 		i = li->plen;
1286888454c5SAl Viro 		mask = inet_make_mask(i);
1287888454c5SAl Viro 		if (l->key != (key & ntohl(mask)))
128819baf839SRobert Olsson 			continue;
128919baf839SRobert Olsson 
1290888454c5SAl Viro 		if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
129119baf839SRobert Olsson 			*plen = i;
129219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
129319baf839SRobert Olsson 			t->stats.semantic_match_passed++;
129419baf839SRobert Olsson #endif
129506c74270SPatrick McHardy 			return err;
129619baf839SRobert Olsson 		}
129719baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
129819baf839SRobert Olsson 		t->stats.semantic_match_miss++;
129919baf839SRobert Olsson #endif
130019baf839SRobert Olsson 	}
130106c74270SPatrick McHardy 	return 1;
130219baf839SRobert Olsson }
130319baf839SRobert Olsson 
130419baf839SRobert Olsson static int
130519baf839SRobert Olsson fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
130619baf839SRobert Olsson {
130719baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
130819baf839SRobert Olsson 	int plen, ret = 0;
130919baf839SRobert Olsson 	struct node *n;
131019baf839SRobert Olsson 	struct tnode *pn;
131119baf839SRobert Olsson 	int pos, bits;
131219baf839SRobert Olsson 	t_key key = ntohl(flp->fl4_dst);
131319baf839SRobert Olsson 	int chopped_off;
131419baf839SRobert Olsson 	t_key cindex = 0;
131519baf839SRobert Olsson 	int current_prefix_length = KEYLENGTH;
131691b9a277SOlof Johansson 	struct tnode *cn;
131791b9a277SOlof Johansson 	t_key node_prefix, key_prefix, pref_mismatch;
131891b9a277SOlof Johansson 	int mp;
131991b9a277SOlof Johansson 
13202373ce1cSRobert Olsson 	rcu_read_lock();
132119baf839SRobert Olsson 
13222373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
132319baf839SRobert Olsson 	if (!n)
132419baf839SRobert Olsson 		goto failed;
132519baf839SRobert Olsson 
132619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
132719baf839SRobert Olsson 	t->stats.gets++;
132819baf839SRobert Olsson #endif
132919baf839SRobert Olsson 
133019baf839SRobert Olsson 	/* Just a leaf? */
133119baf839SRobert Olsson 	if (IS_LEAF(n)) {
133206c74270SPatrick McHardy 		if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
133319baf839SRobert Olsson 			goto found;
133419baf839SRobert Olsson 		goto failed;
133519baf839SRobert Olsson 	}
133619baf839SRobert Olsson 	pn = (struct tnode *) n;
133719baf839SRobert Olsson 	chopped_off = 0;
133819baf839SRobert Olsson 
133919baf839SRobert Olsson 	while (pn) {
134019baf839SRobert Olsson 		pos = pn->pos;
134119baf839SRobert Olsson 		bits = pn->bits;
134219baf839SRobert Olsson 
134319baf839SRobert Olsson 		if (!chopped_off)
134419baf839SRobert Olsson 			cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
134519baf839SRobert Olsson 
134619baf839SRobert Olsson 		n = tnode_get_child(pn, cindex);
134719baf839SRobert Olsson 
134819baf839SRobert Olsson 		if (n == NULL) {
134919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
135019baf839SRobert Olsson 			t->stats.null_node_hit++;
135119baf839SRobert Olsson #endif
135219baf839SRobert Olsson 			goto backtrace;
135319baf839SRobert Olsson 		}
135419baf839SRobert Olsson 
135591b9a277SOlof Johansson 		if (IS_LEAF(n)) {
135691b9a277SOlof Johansson 			if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
135791b9a277SOlof Johansson 				goto found;
135891b9a277SOlof Johansson 			else
135991b9a277SOlof Johansson 				goto backtrace;
136091b9a277SOlof Johansson 		}
136191b9a277SOlof Johansson 
136219baf839SRobert Olsson #define HL_OPTIMIZE
136319baf839SRobert Olsson #ifdef HL_OPTIMIZE
136491b9a277SOlof Johansson 		cn = (struct tnode *)n;
136519baf839SRobert Olsson 
136619baf839SRobert Olsson 		/*
136719baf839SRobert Olsson 		 * It's a tnode, and we can do some extra checks here if we
136819baf839SRobert Olsson 		 * like, to avoid descending into a dead-end branch.
136919baf839SRobert Olsson 		 * This tnode is in the parent's child array at index
137019baf839SRobert Olsson 		 * key[p_pos..p_pos+p_bits] but potentially with some bits
137119baf839SRobert Olsson 		 * chopped off, so in reality the index may be just a
137219baf839SRobert Olsson 		 * subprefix, padded with zero at the end.
137319baf839SRobert Olsson 		 * We can also take a look at any skipped bits in this
137419baf839SRobert Olsson 		 * tnode - everything up to p_pos is supposed to be ok,
137519baf839SRobert Olsson 		 * and the non-chopped bits of the index (se previous
137619baf839SRobert Olsson 		 * paragraph) are also guaranteed ok, but the rest is
137719baf839SRobert Olsson 		 * considered unknown.
137819baf839SRobert Olsson 		 *
137919baf839SRobert Olsson 		 * The skipped bits are key[pos+bits..cn->pos].
138019baf839SRobert Olsson 		 */
138119baf839SRobert Olsson 
138219baf839SRobert Olsson 		/* If current_prefix_length < pos+bits, we are already doing
138319baf839SRobert Olsson 		 * actual prefix  matching, which means everything from
138419baf839SRobert Olsson 		 * pos+(bits-chopped_off) onward must be zero along some
138519baf839SRobert Olsson 		 * branch of this subtree - otherwise there is *no* valid
138619baf839SRobert Olsson 		 * prefix present. Here we can only check the skipped
138719baf839SRobert Olsson 		 * bits. Remember, since we have already indexed into the
138819baf839SRobert Olsson 		 * parent's child array, we know that the bits we chopped of
138919baf839SRobert Olsson 		 * *are* zero.
139019baf839SRobert Olsson 		 */
139119baf839SRobert Olsson 
139219baf839SRobert Olsson 		/* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
139319baf839SRobert Olsson 
139419baf839SRobert Olsson 		if (current_prefix_length < pos+bits) {
139519baf839SRobert Olsson 			if (tkey_extract_bits(cn->key, current_prefix_length,
139619baf839SRobert Olsson 						cn->pos - current_prefix_length) != 0 ||
139719baf839SRobert Olsson 			    !(cn->child[0]))
139819baf839SRobert Olsson 				goto backtrace;
139919baf839SRobert Olsson 		}
140019baf839SRobert Olsson 
140119baf839SRobert Olsson 		/*
140219baf839SRobert Olsson 		 * If chopped_off=0, the index is fully validated and we
140319baf839SRobert Olsson 		 * only need to look at the skipped bits for this, the new,
140419baf839SRobert Olsson 		 * tnode. What we actually want to do is to find out if
140519baf839SRobert Olsson 		 * these skipped bits match our key perfectly, or if we will
140619baf839SRobert Olsson 		 * have to count on finding a matching prefix further down,
140719baf839SRobert Olsson 		 * because if we do, we would like to have some way of
140819baf839SRobert Olsson 		 * verifying the existence of such a prefix at this point.
140919baf839SRobert Olsson 		 */
141019baf839SRobert Olsson 
141119baf839SRobert Olsson 		/* The only thing we can do at this point is to verify that
141219baf839SRobert Olsson 		 * any such matching prefix can indeed be a prefix to our
141319baf839SRobert Olsson 		 * key, and if the bits in the node we are inspecting that
141419baf839SRobert Olsson 		 * do not match our key are not ZERO, this cannot be true.
141519baf839SRobert Olsson 		 * Thus, find out where there is a mismatch (before cn->pos)
141619baf839SRobert Olsson 		 * and verify that all the mismatching bits are zero in the
141719baf839SRobert Olsson 		 * new tnode's key.
141819baf839SRobert Olsson 		 */
141919baf839SRobert Olsson 
142019baf839SRobert Olsson 		/* Note: We aren't very concerned about the piece of the key
142119baf839SRobert Olsson 		 * that precede pn->pos+pn->bits, since these have already been
142219baf839SRobert Olsson 		 * checked. The bits after cn->pos aren't checked since these are
142319baf839SRobert Olsson 		 * by definition "unknown" at this point. Thus, what we want to
142419baf839SRobert Olsson 		 * see is if we are about to enter the "prefix matching" state,
142519baf839SRobert Olsson 		 * and in that case verify that the skipped bits that will prevail
142619baf839SRobert Olsson 		 * throughout this subtree are zero, as they have to be if we are
142719baf839SRobert Olsson 		 * to find a matching prefix.
142819baf839SRobert Olsson 		 */
142919baf839SRobert Olsson 
143019baf839SRobert Olsson 		node_prefix = MASK_PFX(cn->key, cn->pos);
143119baf839SRobert Olsson 		key_prefix = MASK_PFX(key, cn->pos);
143219baf839SRobert Olsson 		pref_mismatch = key_prefix^node_prefix;
143319baf839SRobert Olsson 		mp = 0;
143419baf839SRobert Olsson 
143519baf839SRobert Olsson 		/* In short: If skipped bits in this node do not match the search
143619baf839SRobert Olsson 		 * key, enter the "prefix matching" state.directly.
143719baf839SRobert Olsson 		 */
143819baf839SRobert Olsson 		if (pref_mismatch) {
143919baf839SRobert Olsson 			while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
144019baf839SRobert Olsson 				mp++;
144119baf839SRobert Olsson 				pref_mismatch = pref_mismatch <<1;
144219baf839SRobert Olsson 			}
144319baf839SRobert Olsson 			key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
144419baf839SRobert Olsson 
144519baf839SRobert Olsson 			if (key_prefix != 0)
144619baf839SRobert Olsson 				goto backtrace;
144719baf839SRobert Olsson 
144819baf839SRobert Olsson 			if (current_prefix_length >= cn->pos)
144919baf839SRobert Olsson 				current_prefix_length = mp;
145019baf839SRobert Olsson 		}
145119baf839SRobert Olsson #endif
145219baf839SRobert Olsson 		pn = (struct tnode *)n; /* Descend */
145319baf839SRobert Olsson 		chopped_off = 0;
145419baf839SRobert Olsson 		continue;
145591b9a277SOlof Johansson 
145619baf839SRobert Olsson backtrace:
145719baf839SRobert Olsson 		chopped_off++;
145819baf839SRobert Olsson 
145919baf839SRobert Olsson 		/* As zero don't change the child key (cindex) */
146091b9a277SOlof Johansson 		while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
146119baf839SRobert Olsson 			chopped_off++;
146219baf839SRobert Olsson 
146319baf839SRobert Olsson 		/* Decrease current_... with bits chopped off */
146419baf839SRobert Olsson 		if (current_prefix_length > pn->pos + pn->bits - chopped_off)
146519baf839SRobert Olsson 			current_prefix_length = pn->pos + pn->bits - chopped_off;
146619baf839SRobert Olsson 
146719baf839SRobert Olsson 		/*
146819baf839SRobert Olsson 		 * Either we do the actual chop off according or if we have
146919baf839SRobert Olsson 		 * chopped off all bits in this tnode walk up to our parent.
147019baf839SRobert Olsson 		 */
147119baf839SRobert Olsson 
147291b9a277SOlof Johansson 		if (chopped_off <= pn->bits) {
147319baf839SRobert Olsson 			cindex &= ~(1 << (chopped_off-1));
147491b9a277SOlof Johansson 		} else {
147519baf839SRobert Olsson 			if (NODE_PARENT(pn) == NULL)
147619baf839SRobert Olsson 				goto failed;
147719baf839SRobert Olsson 
147819baf839SRobert Olsson 			/* Get Child's index */
147919baf839SRobert Olsson 			cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
148019baf839SRobert Olsson 			pn = NODE_PARENT(pn);
148119baf839SRobert Olsson 			chopped_off = 0;
148219baf839SRobert Olsson 
148319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
148419baf839SRobert Olsson 			t->stats.backtrack++;
148519baf839SRobert Olsson #endif
148619baf839SRobert Olsson 			goto backtrace;
148719baf839SRobert Olsson 		}
148819baf839SRobert Olsson 	}
148919baf839SRobert Olsson failed:
149019baf839SRobert Olsson 	ret = 1;
149119baf839SRobert Olsson found:
14922373ce1cSRobert Olsson 	rcu_read_unlock();
149319baf839SRobert Olsson 	return ret;
149419baf839SRobert Olsson }
149519baf839SRobert Olsson 
14962373ce1cSRobert Olsson /* only called from updater side */
149719baf839SRobert Olsson static int trie_leaf_remove(struct trie *t, t_key key)
149819baf839SRobert Olsson {
149919baf839SRobert Olsson 	t_key cindex;
150019baf839SRobert Olsson 	struct tnode *tp = NULL;
150119baf839SRobert Olsson 	struct node *n = t->trie;
150219baf839SRobert Olsson 	struct leaf *l;
150319baf839SRobert Olsson 
15040c7770c7SStephen Hemminger 	pr_debug("entering trie_leaf_remove(%p)\n", n);
150519baf839SRobert Olsson 
150619baf839SRobert Olsson 	/* Note that in the case skipped bits, those bits are *not* checked!
150719baf839SRobert Olsson 	 * When we finish this, we will have NULL or a T_LEAF, and the
150819baf839SRobert Olsson 	 * T_LEAF may or may not match our key.
150919baf839SRobert Olsson 	 */
151019baf839SRobert Olsson 
151119baf839SRobert Olsson 	while (n != NULL && IS_TNODE(n)) {
151219baf839SRobert Olsson 		struct tnode *tn = (struct tnode *) n;
151319baf839SRobert Olsson 		check_tnode(tn);
151419baf839SRobert Olsson 		n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
151519baf839SRobert Olsson 
15160c7770c7SStephen Hemminger 		BUG_ON(n && NODE_PARENT(n) != tn);
151719baf839SRobert Olsson 	}
151819baf839SRobert Olsson 	l = (struct leaf *) n;
151919baf839SRobert Olsson 
152019baf839SRobert Olsson 	if (!n || !tkey_equals(l->key, key))
152119baf839SRobert Olsson 		return 0;
152219baf839SRobert Olsson 
152319baf839SRobert Olsson 	/*
152419baf839SRobert Olsson 	 * Key found.
152519baf839SRobert Olsson 	 * Remove the leaf and rebalance the tree
152619baf839SRobert Olsson 	 */
152719baf839SRobert Olsson 
152819baf839SRobert Olsson 	t->revision++;
152919baf839SRobert Olsson 	t->size--;
153019baf839SRobert Olsson 
15312373ce1cSRobert Olsson 	preempt_disable();
153219baf839SRobert Olsson 	tp = NODE_PARENT(n);
153319baf839SRobert Olsson 	tnode_free((struct tnode *) n);
153419baf839SRobert Olsson 
153519baf839SRobert Olsson 	if (tp) {
153619baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
153719baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, NULL);
15382373ce1cSRobert Olsson 		rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
153991b9a277SOlof Johansson 	} else
15402373ce1cSRobert Olsson 		rcu_assign_pointer(t->trie, NULL);
15412373ce1cSRobert Olsson 	preempt_enable();
154219baf839SRobert Olsson 
154319baf839SRobert Olsson 	return 1;
154419baf839SRobert Olsson }
154519baf839SRobert Olsson 
15464e902c57SThomas Graf static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
154719baf839SRobert Olsson {
154819baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
154919baf839SRobert Olsson 	u32 key, mask;
15504e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
15514e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
155219baf839SRobert Olsson 	struct fib_alias *fa, *fa_to_delete;
155319baf839SRobert Olsson 	struct list_head *fa_head;
155419baf839SRobert Olsson 	struct leaf *l;
155591b9a277SOlof Johansson 	struct leaf_info *li;
155691b9a277SOlof Johansson 
155719baf839SRobert Olsson 	if (plen > 32)
155819baf839SRobert Olsson 		return -EINVAL;
155919baf839SRobert Olsson 
15604e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
156119baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
156219baf839SRobert Olsson 
156319baf839SRobert Olsson 	if (key & ~mask)
156419baf839SRobert Olsson 		return -EINVAL;
156519baf839SRobert Olsson 
156619baf839SRobert Olsson 	key = key & mask;
156719baf839SRobert Olsson 	l = fib_find_node(t, key);
156819baf839SRobert Olsson 
156919baf839SRobert Olsson 	if (!l)
157019baf839SRobert Olsson 		return -ESRCH;
157119baf839SRobert Olsson 
157219baf839SRobert Olsson 	fa_head = get_fa_head(l, plen);
157319baf839SRobert Olsson 	fa = fib_find_alias(fa_head, tos, 0);
157419baf839SRobert Olsson 
157519baf839SRobert Olsson 	if (!fa)
157619baf839SRobert Olsson 		return -ESRCH;
157719baf839SRobert Olsson 
15780c7770c7SStephen Hemminger 	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
157919baf839SRobert Olsson 
158019baf839SRobert Olsson 	fa_to_delete = NULL;
158119baf839SRobert Olsson 	fa_head = fa->fa_list.prev;
15822373ce1cSRobert Olsson 
158319baf839SRobert Olsson 	list_for_each_entry(fa, fa_head, fa_list) {
158419baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
158519baf839SRobert Olsson 
158619baf839SRobert Olsson 		if (fa->fa_tos != tos)
158719baf839SRobert Olsson 			break;
158819baf839SRobert Olsson 
15894e902c57SThomas Graf 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
15904e902c57SThomas Graf 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
15914e902c57SThomas Graf 		     fa->fa_scope == cfg->fc_scope) &&
15924e902c57SThomas Graf 		    (!cfg->fc_protocol ||
15934e902c57SThomas Graf 		     fi->fib_protocol == cfg->fc_protocol) &&
15944e902c57SThomas Graf 		    fib_nh_match(cfg, fi) == 0) {
159519baf839SRobert Olsson 			fa_to_delete = fa;
159619baf839SRobert Olsson 			break;
159719baf839SRobert Olsson 		}
159819baf839SRobert Olsson 	}
159919baf839SRobert Olsson 
160091b9a277SOlof Johansson 	if (!fa_to_delete)
160191b9a277SOlof Johansson 		return -ESRCH;
160219baf839SRobert Olsson 
160319baf839SRobert Olsson 	fa = fa_to_delete;
16044e902c57SThomas Graf 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
16054e902c57SThomas Graf 		  &cfg->fc_nlinfo);
160619baf839SRobert Olsson 
160719baf839SRobert Olsson 	l = fib_find_node(t, key);
1608772cb712SRobert Olsson 	li = find_leaf_info(l, plen);
160919baf839SRobert Olsson 
16102373ce1cSRobert Olsson 	list_del_rcu(&fa->fa_list);
161119baf839SRobert Olsson 
161219baf839SRobert Olsson 	if (list_empty(fa_head)) {
16132373ce1cSRobert Olsson 		hlist_del_rcu(&li->hlist);
161419baf839SRobert Olsson 		free_leaf_info(li);
16152373ce1cSRobert Olsson 	}
161619baf839SRobert Olsson 
161719baf839SRobert Olsson 	if (hlist_empty(&l->list))
161819baf839SRobert Olsson 		trie_leaf_remove(t, key);
161919baf839SRobert Olsson 
162019baf839SRobert Olsson 	if (fa->fa_state & FA_S_ACCESSED)
162119baf839SRobert Olsson 		rt_cache_flush(-1);
162219baf839SRobert Olsson 
16232373ce1cSRobert Olsson 	fib_release_info(fa->fa_info);
16242373ce1cSRobert Olsson 	alias_free_mem_rcu(fa);
162519baf839SRobert Olsson 	return 0;
162619baf839SRobert Olsson }
162719baf839SRobert Olsson 
162819baf839SRobert Olsson static int trie_flush_list(struct trie *t, struct list_head *head)
162919baf839SRobert Olsson {
163019baf839SRobert Olsson 	struct fib_alias *fa, *fa_node;
163119baf839SRobert Olsson 	int found = 0;
163219baf839SRobert Olsson 
163319baf839SRobert Olsson 	list_for_each_entry_safe(fa, fa_node, head, fa_list) {
163419baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
163519baf839SRobert Olsson 
163619baf839SRobert Olsson 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
16372373ce1cSRobert Olsson 			list_del_rcu(&fa->fa_list);
16382373ce1cSRobert Olsson 			fib_release_info(fa->fa_info);
16392373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
164019baf839SRobert Olsson 			found++;
164119baf839SRobert Olsson 		}
164219baf839SRobert Olsson 	}
164319baf839SRobert Olsson 	return found;
164419baf839SRobert Olsson }
164519baf839SRobert Olsson 
164619baf839SRobert Olsson static int trie_flush_leaf(struct trie *t, struct leaf *l)
164719baf839SRobert Olsson {
164819baf839SRobert Olsson 	int found = 0;
164919baf839SRobert Olsson 	struct hlist_head *lih = &l->list;
165019baf839SRobert Olsson 	struct hlist_node *node, *tmp;
165119baf839SRobert Olsson 	struct leaf_info *li = NULL;
165219baf839SRobert Olsson 
165319baf839SRobert Olsson 	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
165419baf839SRobert Olsson 		found += trie_flush_list(t, &li->falh);
165519baf839SRobert Olsson 
165619baf839SRobert Olsson 		if (list_empty(&li->falh)) {
16572373ce1cSRobert Olsson 			hlist_del_rcu(&li->hlist);
165819baf839SRobert Olsson 			free_leaf_info(li);
165919baf839SRobert Olsson 		}
166019baf839SRobert Olsson 	}
166119baf839SRobert Olsson 	return found;
166219baf839SRobert Olsson }
166319baf839SRobert Olsson 
16642373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
16652373ce1cSRobert Olsson 
166619baf839SRobert Olsson static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
166719baf839SRobert Olsson {
166819baf839SRobert Olsson 	struct node *c = (struct node *) thisleaf;
166919baf839SRobert Olsson 	struct tnode *p;
167019baf839SRobert Olsson 	int idx;
16712373ce1cSRobert Olsson 	struct node *trie = rcu_dereference(t->trie);
167219baf839SRobert Olsson 
167319baf839SRobert Olsson 	if (c == NULL) {
16742373ce1cSRobert Olsson 		if (trie == NULL)
167519baf839SRobert Olsson 			return NULL;
167619baf839SRobert Olsson 
16772373ce1cSRobert Olsson 		if (IS_LEAF(trie))          /* trie w. just a leaf */
16782373ce1cSRobert Olsson 			return (struct leaf *) trie;
167919baf839SRobert Olsson 
16802373ce1cSRobert Olsson 		p = (struct tnode*) trie;  /* Start */
168191b9a277SOlof Johansson 	} else
168219baf839SRobert Olsson 		p = (struct tnode *) NODE_PARENT(c);
1683c877efb2SStephen Hemminger 
168419baf839SRobert Olsson 	while (p) {
168519baf839SRobert Olsson 		int pos, last;
168619baf839SRobert Olsson 
168719baf839SRobert Olsson 		/*  Find the next child of the parent */
168819baf839SRobert Olsson 		if (c)
168919baf839SRobert Olsson 			pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
169019baf839SRobert Olsson 		else
169119baf839SRobert Olsson 			pos = 0;
169219baf839SRobert Olsson 
169319baf839SRobert Olsson 		last = 1 << p->bits;
169419baf839SRobert Olsson 		for (idx = pos; idx < last ; idx++) {
16952373ce1cSRobert Olsson 			c = rcu_dereference(p->child[idx]);
16962373ce1cSRobert Olsson 
16972373ce1cSRobert Olsson 			if (!c)
169891b9a277SOlof Johansson 				continue;
169919baf839SRobert Olsson 
170019baf839SRobert Olsson 			/* Decend if tnode */
17012373ce1cSRobert Olsson 			while (IS_TNODE(c)) {
17022373ce1cSRobert Olsson 				p = (struct tnode *) c;
170319baf839SRobert Olsson 				idx = 0;
170419baf839SRobert Olsson 
170519baf839SRobert Olsson 				/* Rightmost non-NULL branch */
170619baf839SRobert Olsson 				if (p && IS_TNODE(p))
17072373ce1cSRobert Olsson 					while (!(c = rcu_dereference(p->child[idx]))
17082373ce1cSRobert Olsson 					       && idx < (1<<p->bits)) idx++;
170919baf839SRobert Olsson 
171019baf839SRobert Olsson 				/* Done with this tnode? */
17112373ce1cSRobert Olsson 				if (idx >= (1 << p->bits) || !c)
171219baf839SRobert Olsson 					goto up;
171319baf839SRobert Olsson 			}
17142373ce1cSRobert Olsson 			return (struct leaf *) c;
171519baf839SRobert Olsson 		}
171619baf839SRobert Olsson up:
171719baf839SRobert Olsson 		/* No more children go up one step  */
171819baf839SRobert Olsson 		c = (struct node *) p;
171919baf839SRobert Olsson 		p = (struct tnode *) NODE_PARENT(p);
172019baf839SRobert Olsson 	}
172119baf839SRobert Olsson 	return NULL; /* Ready. Root of trie */
172219baf839SRobert Olsson }
172319baf839SRobert Olsson 
172419baf839SRobert Olsson static int fn_trie_flush(struct fib_table *tb)
172519baf839SRobert Olsson {
172619baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
172719baf839SRobert Olsson 	struct leaf *ll = NULL, *l = NULL;
172819baf839SRobert Olsson 	int found = 0, h;
172919baf839SRobert Olsson 
173019baf839SRobert Olsson 	t->revision++;
173119baf839SRobert Olsson 
173219baf839SRobert Olsson 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
173319baf839SRobert Olsson 		found += trie_flush_leaf(t, l);
173419baf839SRobert Olsson 
173519baf839SRobert Olsson 		if (ll && hlist_empty(&ll->list))
173619baf839SRobert Olsson 			trie_leaf_remove(t, ll->key);
173719baf839SRobert Olsson 		ll = l;
173819baf839SRobert Olsson 	}
173919baf839SRobert Olsson 
174019baf839SRobert Olsson 	if (ll && hlist_empty(&ll->list))
174119baf839SRobert Olsson 		trie_leaf_remove(t, ll->key);
174219baf839SRobert Olsson 
17430c7770c7SStephen Hemminger 	pr_debug("trie_flush found=%d\n", found);
174419baf839SRobert Olsson 	return found;
174519baf839SRobert Olsson }
174619baf839SRobert Olsson 
174719baf839SRobert Olsson static int trie_last_dflt = -1;
174819baf839SRobert Olsson 
174919baf839SRobert Olsson static void
175019baf839SRobert Olsson fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
175119baf839SRobert Olsson {
175219baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
175319baf839SRobert Olsson 	int order, last_idx;
175419baf839SRobert Olsson 	struct fib_info *fi = NULL;
175519baf839SRobert Olsson 	struct fib_info *last_resort;
175619baf839SRobert Olsson 	struct fib_alias *fa = NULL;
175719baf839SRobert Olsson 	struct list_head *fa_head;
175819baf839SRobert Olsson 	struct leaf *l;
175919baf839SRobert Olsson 
176019baf839SRobert Olsson 	last_idx = -1;
176119baf839SRobert Olsson 	last_resort = NULL;
176219baf839SRobert Olsson 	order = -1;
176319baf839SRobert Olsson 
17642373ce1cSRobert Olsson 	rcu_read_lock();
176519baf839SRobert Olsson 
176619baf839SRobert Olsson 	l = fib_find_node(t, 0);
176719baf839SRobert Olsson 	if (!l)
176819baf839SRobert Olsson 		goto out;
176919baf839SRobert Olsson 
177019baf839SRobert Olsson 	fa_head = get_fa_head(l, 0);
177119baf839SRobert Olsson 	if (!fa_head)
177219baf839SRobert Olsson 		goto out;
177319baf839SRobert Olsson 
177419baf839SRobert Olsson 	if (list_empty(fa_head))
177519baf839SRobert Olsson 		goto out;
177619baf839SRobert Olsson 
17772373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fa_head, fa_list) {
177819baf839SRobert Olsson 		struct fib_info *next_fi = fa->fa_info;
177919baf839SRobert Olsson 
178019baf839SRobert Olsson 		if (fa->fa_scope != res->scope ||
178119baf839SRobert Olsson 		    fa->fa_type != RTN_UNICAST)
178219baf839SRobert Olsson 			continue;
178319baf839SRobert Olsson 
178419baf839SRobert Olsson 		if (next_fi->fib_priority > res->fi->fib_priority)
178519baf839SRobert Olsson 			break;
178619baf839SRobert Olsson 		if (!next_fi->fib_nh[0].nh_gw ||
178719baf839SRobert Olsson 		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
178819baf839SRobert Olsson 			continue;
178919baf839SRobert Olsson 		fa->fa_state |= FA_S_ACCESSED;
179019baf839SRobert Olsson 
179119baf839SRobert Olsson 		if (fi == NULL) {
179219baf839SRobert Olsson 			if (next_fi != res->fi)
179319baf839SRobert Olsson 				break;
179419baf839SRobert Olsson 		} else if (!fib_detect_death(fi, order, &last_resort,
179519baf839SRobert Olsson 					     &last_idx, &trie_last_dflt)) {
179619baf839SRobert Olsson 			if (res->fi)
179719baf839SRobert Olsson 				fib_info_put(res->fi);
179819baf839SRobert Olsson 			res->fi = fi;
179919baf839SRobert Olsson 			atomic_inc(&fi->fib_clntref);
180019baf839SRobert Olsson 			trie_last_dflt = order;
180119baf839SRobert Olsson 			goto out;
180219baf839SRobert Olsson 		}
180319baf839SRobert Olsson 		fi = next_fi;
180419baf839SRobert Olsson 		order++;
180519baf839SRobert Olsson 	}
180619baf839SRobert Olsson 	if (order <= 0 || fi == NULL) {
180719baf839SRobert Olsson 		trie_last_dflt = -1;
180819baf839SRobert Olsson 		goto out;
180919baf839SRobert Olsson 	}
181019baf839SRobert Olsson 
181119baf839SRobert Olsson 	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
181219baf839SRobert Olsson 		if (res->fi)
181319baf839SRobert Olsson 			fib_info_put(res->fi);
181419baf839SRobert Olsson 		res->fi = fi;
181519baf839SRobert Olsson 		atomic_inc(&fi->fib_clntref);
181619baf839SRobert Olsson 		trie_last_dflt = order;
181719baf839SRobert Olsson 		goto out;
181819baf839SRobert Olsson 	}
181919baf839SRobert Olsson 	if (last_idx >= 0) {
182019baf839SRobert Olsson 		if (res->fi)
182119baf839SRobert Olsson 			fib_info_put(res->fi);
182219baf839SRobert Olsson 		res->fi = last_resort;
182319baf839SRobert Olsson 		if (last_resort)
182419baf839SRobert Olsson 			atomic_inc(&last_resort->fib_clntref);
182519baf839SRobert Olsson 	}
182619baf839SRobert Olsson 	trie_last_dflt = last_idx;
182719baf839SRobert Olsson  out:;
18282373ce1cSRobert Olsson 	rcu_read_unlock();
182919baf839SRobert Olsson }
183019baf839SRobert Olsson 
183119baf839SRobert Olsson static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
183219baf839SRobert Olsson 			   struct sk_buff *skb, struct netlink_callback *cb)
183319baf839SRobert Olsson {
183419baf839SRobert Olsson 	int i, s_i;
183519baf839SRobert Olsson 	struct fib_alias *fa;
183619baf839SRobert Olsson 
183732ab5f80SAl Viro 	__be32 xkey = htonl(key);
183819baf839SRobert Olsson 
18391af5a8c4SPatrick McHardy 	s_i = cb->args[4];
184019baf839SRobert Olsson 	i = 0;
184119baf839SRobert Olsson 
18422373ce1cSRobert Olsson 	/* rcu_read_lock is hold by caller */
18432373ce1cSRobert Olsson 
18442373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fah, fa_list) {
184519baf839SRobert Olsson 		if (i < s_i) {
184619baf839SRobert Olsson 			i++;
184719baf839SRobert Olsson 			continue;
184819baf839SRobert Olsson 		}
184978c6671aSStephen Hemminger 		BUG_ON(!fa->fa_info);
185019baf839SRobert Olsson 
185119baf839SRobert Olsson 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
185219baf839SRobert Olsson 				  cb->nlh->nlmsg_seq,
185319baf839SRobert Olsson 				  RTM_NEWROUTE,
185419baf839SRobert Olsson 				  tb->tb_id,
185519baf839SRobert Olsson 				  fa->fa_type,
185619baf839SRobert Olsson 				  fa->fa_scope,
1857be403ea1SThomas Graf 				  xkey,
185819baf839SRobert Olsson 				  plen,
185919baf839SRobert Olsson 				  fa->fa_tos,
186090f66914SDavid S. Miller 				  fa->fa_info, 0) < 0) {
18611af5a8c4SPatrick McHardy 			cb->args[4] = i;
186219baf839SRobert Olsson 			return -1;
186319baf839SRobert Olsson 		}
186419baf839SRobert Olsson 		i++;
186519baf839SRobert Olsson 	}
18661af5a8c4SPatrick McHardy 	cb->args[4] = i;
186719baf839SRobert Olsson 	return skb->len;
186819baf839SRobert Olsson }
186919baf839SRobert Olsson 
187019baf839SRobert Olsson static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
187119baf839SRobert Olsson 			     struct netlink_callback *cb)
187219baf839SRobert Olsson {
187319baf839SRobert Olsson 	int h, s_h;
187419baf839SRobert Olsson 	struct list_head *fa_head;
187519baf839SRobert Olsson 	struct leaf *l = NULL;
187691b9a277SOlof Johansson 
18771af5a8c4SPatrick McHardy 	s_h = cb->args[3];
187819baf839SRobert Olsson 
187919baf839SRobert Olsson 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
188019baf839SRobert Olsson 		if (h < s_h)
188119baf839SRobert Olsson 			continue;
188219baf839SRobert Olsson 		if (h > s_h)
18831af5a8c4SPatrick McHardy 			memset(&cb->args[4], 0,
18841af5a8c4SPatrick McHardy 			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
188519baf839SRobert Olsson 
188619baf839SRobert Olsson 		fa_head = get_fa_head(l, plen);
188719baf839SRobert Olsson 
188819baf839SRobert Olsson 		if (!fa_head)
188919baf839SRobert Olsson 			continue;
189019baf839SRobert Olsson 
189119baf839SRobert Olsson 		if (list_empty(fa_head))
189219baf839SRobert Olsson 			continue;
189319baf839SRobert Olsson 
189419baf839SRobert Olsson 		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
18951af5a8c4SPatrick McHardy 			cb->args[3] = h;
189619baf839SRobert Olsson 			return -1;
189719baf839SRobert Olsson 		}
189819baf839SRobert Olsson 	}
18991af5a8c4SPatrick McHardy 	cb->args[3] = h;
190019baf839SRobert Olsson 	return skb->len;
190119baf839SRobert Olsson }
190219baf839SRobert Olsson 
190319baf839SRobert Olsson static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
190419baf839SRobert Olsson {
190519baf839SRobert Olsson 	int m, s_m;
190619baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
190719baf839SRobert Olsson 
19081af5a8c4SPatrick McHardy 	s_m = cb->args[2];
190919baf839SRobert Olsson 
19102373ce1cSRobert Olsson 	rcu_read_lock();
191119baf839SRobert Olsson 	for (m = 0; m <= 32; m++) {
191219baf839SRobert Olsson 		if (m < s_m)
191319baf839SRobert Olsson 			continue;
191419baf839SRobert Olsson 		if (m > s_m)
19151af5a8c4SPatrick McHardy 			memset(&cb->args[3], 0,
19161af5a8c4SPatrick McHardy 				sizeof(cb->args) - 3*sizeof(cb->args[0]));
191719baf839SRobert Olsson 
191819baf839SRobert Olsson 		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
19191af5a8c4SPatrick McHardy 			cb->args[2] = m;
192019baf839SRobert Olsson 			goto out;
192119baf839SRobert Olsson 		}
192219baf839SRobert Olsson 	}
19232373ce1cSRobert Olsson 	rcu_read_unlock();
19241af5a8c4SPatrick McHardy 	cb->args[2] = m;
192519baf839SRobert Olsson 	return skb->len;
192619baf839SRobert Olsson out:
19272373ce1cSRobert Olsson 	rcu_read_unlock();
192819baf839SRobert Olsson 	return -1;
192919baf839SRobert Olsson }
193019baf839SRobert Olsson 
193119baf839SRobert Olsson /* Fix more generic FIB names for init later */
193219baf839SRobert Olsson 
193319baf839SRobert Olsson #ifdef CONFIG_IP_MULTIPLE_TABLES
19342dfe55b4SPatrick McHardy struct fib_table * fib_hash_init(u32 id)
193519baf839SRobert Olsson #else
19362dfe55b4SPatrick McHardy struct fib_table * __init fib_hash_init(u32 id)
193719baf839SRobert Olsson #endif
193819baf839SRobert Olsson {
193919baf839SRobert Olsson 	struct fib_table *tb;
194019baf839SRobert Olsson 	struct trie *t;
194119baf839SRobert Olsson 
194219baf839SRobert Olsson 	if (fn_alias_kmem == NULL)
194319baf839SRobert Olsson 		fn_alias_kmem = kmem_cache_create("ip_fib_alias",
194419baf839SRobert Olsson 						  sizeof(struct fib_alias),
194519baf839SRobert Olsson 						  0, SLAB_HWCACHE_ALIGN,
194619baf839SRobert Olsson 						  NULL, NULL);
194719baf839SRobert Olsson 
194819baf839SRobert Olsson 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
194919baf839SRobert Olsson 		     GFP_KERNEL);
195019baf839SRobert Olsson 	if (tb == NULL)
195119baf839SRobert Olsson 		return NULL;
195219baf839SRobert Olsson 
195319baf839SRobert Olsson 	tb->tb_id = id;
195419baf839SRobert Olsson 	tb->tb_lookup = fn_trie_lookup;
195519baf839SRobert Olsson 	tb->tb_insert = fn_trie_insert;
195619baf839SRobert Olsson 	tb->tb_delete = fn_trie_delete;
195719baf839SRobert Olsson 	tb->tb_flush = fn_trie_flush;
195819baf839SRobert Olsson 	tb->tb_select_default = fn_trie_select_default;
195919baf839SRobert Olsson 	tb->tb_dump = fn_trie_dump;
196019baf839SRobert Olsson 	memset(tb->tb_data, 0, sizeof(struct trie));
196119baf839SRobert Olsson 
196219baf839SRobert Olsson 	t = (struct trie *) tb->tb_data;
196319baf839SRobert Olsson 
196419baf839SRobert Olsson 	trie_init(t);
196519baf839SRobert Olsson 
196619baf839SRobert Olsson 	if (id == RT_TABLE_LOCAL)
196719baf839SRobert Olsson 		trie_local = t;
196819baf839SRobert Olsson 	else if (id == RT_TABLE_MAIN)
196919baf839SRobert Olsson 		trie_main = t;
197019baf839SRobert Olsson 
197119baf839SRobert Olsson 	if (id == RT_TABLE_LOCAL)
197278c6671aSStephen Hemminger 		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
197319baf839SRobert Olsson 
197419baf839SRobert Olsson 	return tb;
197519baf839SRobert Olsson }
197619baf839SRobert Olsson 
1977cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS
1978cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */
1979cb7b593cSStephen Hemminger struct fib_trie_iter {
1980cb7b593cSStephen Hemminger 	struct tnode *tnode;
1981cb7b593cSStephen Hemminger 	struct trie *trie;
1982cb7b593cSStephen Hemminger 	unsigned index;
1983cb7b593cSStephen Hemminger 	unsigned depth;
1984cb7b593cSStephen Hemminger };
198519baf839SRobert Olsson 
1986cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
198719baf839SRobert Olsson {
1988cb7b593cSStephen Hemminger 	struct tnode *tn = iter->tnode;
1989cb7b593cSStephen Hemminger 	unsigned cindex = iter->index;
1990cb7b593cSStephen Hemminger 	struct tnode *p;
199119baf839SRobert Olsson 
19926640e697SEric W. Biederman 	/* A single entry routing table */
19936640e697SEric W. Biederman 	if (!tn)
19946640e697SEric W. Biederman 		return NULL;
19956640e697SEric W. Biederman 
1996cb7b593cSStephen Hemminger 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1997cb7b593cSStephen Hemminger 		 iter->tnode, iter->index, iter->depth);
1998cb7b593cSStephen Hemminger rescan:
1999cb7b593cSStephen Hemminger 	while (cindex < (1<<tn->bits)) {
2000cb7b593cSStephen Hemminger 		struct node *n = tnode_get_child(tn, cindex);
200119baf839SRobert Olsson 
2002cb7b593cSStephen Hemminger 		if (n) {
200319baf839SRobert Olsson 			if (IS_LEAF(n)) {
2004cb7b593cSStephen Hemminger 				iter->tnode = tn;
2005cb7b593cSStephen Hemminger 				iter->index = cindex + 1;
200691b9a277SOlof Johansson 			} else {
2007cb7b593cSStephen Hemminger 				/* push down one level */
2008cb7b593cSStephen Hemminger 				iter->tnode = (struct tnode *) n;
2009cb7b593cSStephen Hemminger 				iter->index = 0;
2010cb7b593cSStephen Hemminger 				++iter->depth;
201119baf839SRobert Olsson 			}
2012cb7b593cSStephen Hemminger 			return n;
201319baf839SRobert Olsson 		}
201419baf839SRobert Olsson 
2015cb7b593cSStephen Hemminger 		++cindex;
2016cb7b593cSStephen Hemminger 	}
2017cb7b593cSStephen Hemminger 
2018cb7b593cSStephen Hemminger 	/* Current node exhausted, pop back up */
2019cb7b593cSStephen Hemminger 	p = NODE_PARENT(tn);
2020cb7b593cSStephen Hemminger 	if (p) {
2021cb7b593cSStephen Hemminger 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2022cb7b593cSStephen Hemminger 		tn = p;
2023cb7b593cSStephen Hemminger 		--iter->depth;
2024cb7b593cSStephen Hemminger 		goto rescan;
2025cb7b593cSStephen Hemminger 	}
2026cb7b593cSStephen Hemminger 
2027cb7b593cSStephen Hemminger 	/* got root? */
2028cb7b593cSStephen Hemminger 	return NULL;
2029cb7b593cSStephen Hemminger }
2030cb7b593cSStephen Hemminger 
2031cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2032cb7b593cSStephen Hemminger 				       struct trie *t)
2033cb7b593cSStephen Hemminger {
20345ddf0eb2SRobert Olsson 	struct node *n ;
20355ddf0eb2SRobert Olsson 
20365ddf0eb2SRobert Olsson 	if(!t)
20375ddf0eb2SRobert Olsson 		return NULL;
20385ddf0eb2SRobert Olsson 
20395ddf0eb2SRobert Olsson 	n = rcu_dereference(t->trie);
20405ddf0eb2SRobert Olsson 
20415ddf0eb2SRobert Olsson 	if(!iter)
20425ddf0eb2SRobert Olsson 		return NULL;
2043cb7b593cSStephen Hemminger 
20446640e697SEric W. Biederman 	if (n) {
20456640e697SEric W. Biederman 		if (IS_TNODE(n)) {
2046cb7b593cSStephen Hemminger 			iter->tnode = (struct tnode *) n;
2047cb7b593cSStephen Hemminger 			iter->trie = t;
2048cb7b593cSStephen Hemminger 			iter->index = 0;
20491d25cd6cSRobert Olsson 			iter->depth = 1;
20506640e697SEric W. Biederman 		} else {
20516640e697SEric W. Biederman 			iter->tnode = NULL;
20526640e697SEric W. Biederman 			iter->trie  = t;
20536640e697SEric W. Biederman 			iter->index = 0;
20546640e697SEric W. Biederman 			iter->depth = 0;
20556640e697SEric W. Biederman 		}
2056cb7b593cSStephen Hemminger 		return n;
2057cb7b593cSStephen Hemminger 	}
2058cb7b593cSStephen Hemminger 	return NULL;
2059cb7b593cSStephen Hemminger }
2060cb7b593cSStephen Hemminger 
2061cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s)
206219baf839SRobert Olsson {
20632373ce1cSRobert Olsson 	struct node *n;
2064cb7b593cSStephen Hemminger 	struct fib_trie_iter iter;
2065cb7b593cSStephen Hemminger 
2066cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
206719baf839SRobert Olsson 
20682373ce1cSRobert Olsson 	rcu_read_lock();
2069cb7b593cSStephen Hemminger 	for (n = fib_trie_get_first(&iter, t); n;
2070cb7b593cSStephen Hemminger 	     n = fib_trie_get_next(&iter)) {
2071cb7b593cSStephen Hemminger 		if (IS_LEAF(n)) {
207219baf839SRobert Olsson 			s->leaves++;
2073cb7b593cSStephen Hemminger 			s->totdepth += iter.depth;
2074cb7b593cSStephen Hemminger 			if (iter.depth > s->maxdepth)
2075cb7b593cSStephen Hemminger 				s->maxdepth = iter.depth;
207691b9a277SOlof Johansson 		} else {
2077cb7b593cSStephen Hemminger 			const struct tnode *tn = (const struct tnode *) n;
2078cb7b593cSStephen Hemminger 			int i;
207919baf839SRobert Olsson 
208019baf839SRobert Olsson 			s->tnodes++;
208106ef921dSRobert Olsson 			if(tn->bits < MAX_STAT_DEPTH)
208219baf839SRobert Olsson 				s->nodesizes[tn->bits]++;
208306ef921dSRobert Olsson 
2084cb7b593cSStephen Hemminger 			for (i = 0; i < (1<<tn->bits); i++)
2085cb7b593cSStephen Hemminger 				if (!tn->child[i])
208619baf839SRobert Olsson 					s->nullpointers++;
208719baf839SRobert Olsson 		}
208819baf839SRobert Olsson 	}
20892373ce1cSRobert Olsson 	rcu_read_unlock();
209019baf839SRobert Olsson }
209119baf839SRobert Olsson 
209219baf839SRobert Olsson /*
209319baf839SRobert Olsson  *	This outputs /proc/net/fib_triestats
209419baf839SRobert Olsson  */
2095cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
209619baf839SRobert Olsson {
2097cb7b593cSStephen Hemminger 	unsigned i, max, pointers, bytes, avdepth;
209819baf839SRobert Olsson 
209919baf839SRobert Olsson 	if (stat->leaves)
210019baf839SRobert Olsson 		avdepth = stat->totdepth*100 / stat->leaves;
210119baf839SRobert Olsson 	else
210219baf839SRobert Olsson 		avdepth = 0;
210319baf839SRobert Olsson 
2104cb7b593cSStephen Hemminger 	seq_printf(seq, "\tAver depth:     %d.%02d\n", avdepth / 100, avdepth % 100 );
2105cb7b593cSStephen Hemminger 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
2106cb7b593cSStephen Hemminger 
2107cb7b593cSStephen Hemminger 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
2108cb7b593cSStephen Hemminger 
2109cb7b593cSStephen Hemminger 	bytes = sizeof(struct leaf) * stat->leaves;
2110cb7b593cSStephen Hemminger 	seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
211119baf839SRobert Olsson 	bytes += sizeof(struct tnode) * stat->tnodes;
211219baf839SRobert Olsson 
211306ef921dSRobert Olsson 	max = MAX_STAT_DEPTH;
211406ef921dSRobert Olsson 	while (max > 0 && stat->nodesizes[max-1] == 0)
211519baf839SRobert Olsson 		max--;
211619baf839SRobert Olsson 
2117cb7b593cSStephen Hemminger 	pointers = 0;
211819baf839SRobert Olsson 	for (i = 1; i <= max; i++)
211919baf839SRobert Olsson 		if (stat->nodesizes[i] != 0) {
212019baf839SRobert Olsson 			seq_printf(seq, "  %d: %d",  i, stat->nodesizes[i]);
212119baf839SRobert Olsson 			pointers += (1<<i) * stat->nodesizes[i];
212219baf839SRobert Olsson 		}
2123cb7b593cSStephen Hemminger 	seq_putc(seq, '\n');
2124cb7b593cSStephen Hemminger 	seq_printf(seq, "\tPointers: %d\n", pointers);
2125cb7b593cSStephen Hemminger 
212619baf839SRobert Olsson 	bytes += sizeof(struct node *) * pointers;
212719baf839SRobert Olsson 	seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2128cb7b593cSStephen Hemminger 	seq_printf(seq, "Total size: %d  kB\n", (bytes + 1023) / 1024);
212919baf839SRobert Olsson 
213019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
213119baf839SRobert Olsson 	seq_printf(seq, "Counters:\n---------\n");
213219baf839SRobert Olsson 	seq_printf(seq,"gets = %d\n", t->stats.gets);
213319baf839SRobert Olsson 	seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
213419baf839SRobert Olsson 	seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
213519baf839SRobert Olsson 	seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
213619baf839SRobert Olsson 	seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
21372f36895aSRobert Olsson 	seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
213819baf839SRobert Olsson #ifdef CLEAR_STATS
213919baf839SRobert Olsson 	memset(&(t->stats), 0, sizeof(t->stats));
214019baf839SRobert Olsson #endif
214119baf839SRobert Olsson #endif /*  CONFIG_IP_FIB_TRIE_STATS */
214219baf839SRobert Olsson }
214319baf839SRobert Olsson 
214419baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v)
214519baf839SRobert Olsson {
2146cb7b593cSStephen Hemminger 	struct trie_stat *stat;
214719baf839SRobert Olsson 
2148cb7b593cSStephen Hemminger 	stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2149cb7b593cSStephen Hemminger 	if (!stat)
2150cb7b593cSStephen Hemminger 		return -ENOMEM;
2151cb7b593cSStephen Hemminger 
215219baf839SRobert Olsson 	seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
215319baf839SRobert Olsson 		   sizeof(struct leaf), sizeof(struct tnode));
215419baf839SRobert Olsson 
2155cb7b593cSStephen Hemminger 	if (trie_local) {
2156cb7b593cSStephen Hemminger 		seq_printf(seq, "Local:\n");
2157cb7b593cSStephen Hemminger 		trie_collect_stats(trie_local, stat);
2158cb7b593cSStephen Hemminger 		trie_show_stats(seq, stat);
215919baf839SRobert Olsson 	}
2160cb7b593cSStephen Hemminger 
2161cb7b593cSStephen Hemminger 	if (trie_main) {
2162cb7b593cSStephen Hemminger 		seq_printf(seq, "Main:\n");
2163cb7b593cSStephen Hemminger 		trie_collect_stats(trie_main, stat);
2164cb7b593cSStephen Hemminger 		trie_show_stats(seq, stat);
2165cb7b593cSStephen Hemminger 	}
2166cb7b593cSStephen Hemminger 	kfree(stat);
2167cb7b593cSStephen Hemminger 
216819baf839SRobert Olsson 	return 0;
216919baf839SRobert Olsson }
217019baf839SRobert Olsson 
217119baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file)
217219baf839SRobert Olsson {
2173cb7b593cSStephen Hemminger 	return single_open(file, fib_triestat_seq_show, NULL);
217419baf839SRobert Olsson }
217519baf839SRobert Olsson 
2176*9a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = {
217719baf839SRobert Olsson 	.owner	= THIS_MODULE,
217819baf839SRobert Olsson 	.open	= fib_triestat_seq_open,
217919baf839SRobert Olsson 	.read	= seq_read,
218019baf839SRobert Olsson 	.llseek	= seq_lseek,
2181cb7b593cSStephen Hemminger 	.release = single_release,
218219baf839SRobert Olsson };
218319baf839SRobert Olsson 
2184cb7b593cSStephen Hemminger static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2185cb7b593cSStephen Hemminger 				      loff_t pos)
218619baf839SRobert Olsson {
2187cb7b593cSStephen Hemminger 	loff_t idx = 0;
2188cb7b593cSStephen Hemminger 	struct node *n;
2189cb7b593cSStephen Hemminger 
2190cb7b593cSStephen Hemminger 	for (n = fib_trie_get_first(iter, trie_local);
2191cb7b593cSStephen Hemminger 	     n; ++idx, n = fib_trie_get_next(iter)) {
2192cb7b593cSStephen Hemminger 		if (pos == idx)
2193cb7b593cSStephen Hemminger 			return n;
219419baf839SRobert Olsson 	}
219519baf839SRobert Olsson 
2196cb7b593cSStephen Hemminger 	for (n = fib_trie_get_first(iter, trie_main);
2197cb7b593cSStephen Hemminger 	     n; ++idx, n = fib_trie_get_next(iter)) {
2198cb7b593cSStephen Hemminger 		if (pos == idx)
2199cb7b593cSStephen Hemminger 			return n;
220019baf839SRobert Olsson 	}
220119baf839SRobert Olsson 	return NULL;
220219baf839SRobert Olsson }
220319baf839SRobert Olsson 
220419baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
220519baf839SRobert Olsson {
2206cb7b593cSStephen Hemminger 	rcu_read_lock();
2207cb7b593cSStephen Hemminger 	if (*pos == 0)
220891b9a277SOlof Johansson 		return SEQ_START_TOKEN;
2209cb7b593cSStephen Hemminger 	return fib_trie_get_idx(seq->private, *pos - 1);
221019baf839SRobert Olsson }
221119baf839SRobert Olsson 
221219baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
221319baf839SRobert Olsson {
2214cb7b593cSStephen Hemminger 	struct fib_trie_iter *iter = seq->private;
2215cb7b593cSStephen Hemminger 	void *l = v;
2216cb7b593cSStephen Hemminger 
221719baf839SRobert Olsson 	++*pos;
221891b9a277SOlof Johansson 	if (v == SEQ_START_TOKEN)
2219cb7b593cSStephen Hemminger 		return fib_trie_get_idx(iter, 0);
222091b9a277SOlof Johansson 
2221cb7b593cSStephen Hemminger 	v = fib_trie_get_next(iter);
2222cb7b593cSStephen Hemminger 	BUG_ON(v == l);
2223cb7b593cSStephen Hemminger 	if (v)
2224cb7b593cSStephen Hemminger 		return v;
2225cb7b593cSStephen Hemminger 
2226cb7b593cSStephen Hemminger 	/* continue scan in next trie */
2227cb7b593cSStephen Hemminger 	if (iter->trie == trie_local)
2228cb7b593cSStephen Hemminger 		return fib_trie_get_first(iter, trie_main);
2229cb7b593cSStephen Hemminger 
2230cb7b593cSStephen Hemminger 	return NULL;
223119baf839SRobert Olsson }
223219baf839SRobert Olsson 
223319baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v)
223419baf839SRobert Olsson {
2235cb7b593cSStephen Hemminger 	rcu_read_unlock();
223619baf839SRobert Olsson }
223719baf839SRobert Olsson 
2238cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n)
2239cb7b593cSStephen Hemminger {
2240cb7b593cSStephen Hemminger 	while (n-- > 0) seq_puts(seq, "   ");
2241cb7b593cSStephen Hemminger }
224219baf839SRobert Olsson 
2243cb7b593cSStephen Hemminger static inline const char *rtn_scope(enum rt_scope_t s)
2244cb7b593cSStephen Hemminger {
2245cb7b593cSStephen Hemminger 	static char buf[32];
2246cb7b593cSStephen Hemminger 
2247cb7b593cSStephen Hemminger 	switch(s) {
2248cb7b593cSStephen Hemminger 	case RT_SCOPE_UNIVERSE: return "universe";
2249cb7b593cSStephen Hemminger 	case RT_SCOPE_SITE:	return "site";
2250cb7b593cSStephen Hemminger 	case RT_SCOPE_LINK:	return "link";
2251cb7b593cSStephen Hemminger 	case RT_SCOPE_HOST:	return "host";
2252cb7b593cSStephen Hemminger 	case RT_SCOPE_NOWHERE:	return "nowhere";
2253cb7b593cSStephen Hemminger 	default:
2254cb7b593cSStephen Hemminger 		snprintf(buf, sizeof(buf), "scope=%d", s);
2255cb7b593cSStephen Hemminger 		return buf;
2256cb7b593cSStephen Hemminger 	}
2257cb7b593cSStephen Hemminger }
2258cb7b593cSStephen Hemminger 
2259cb7b593cSStephen Hemminger static const char *rtn_type_names[__RTN_MAX] = {
2260cb7b593cSStephen Hemminger 	[RTN_UNSPEC] = "UNSPEC",
2261cb7b593cSStephen Hemminger 	[RTN_UNICAST] = "UNICAST",
2262cb7b593cSStephen Hemminger 	[RTN_LOCAL] = "LOCAL",
2263cb7b593cSStephen Hemminger 	[RTN_BROADCAST] = "BROADCAST",
2264cb7b593cSStephen Hemminger 	[RTN_ANYCAST] = "ANYCAST",
2265cb7b593cSStephen Hemminger 	[RTN_MULTICAST] = "MULTICAST",
2266cb7b593cSStephen Hemminger 	[RTN_BLACKHOLE] = "BLACKHOLE",
2267cb7b593cSStephen Hemminger 	[RTN_UNREACHABLE] = "UNREACHABLE",
2268cb7b593cSStephen Hemminger 	[RTN_PROHIBIT] = "PROHIBIT",
2269cb7b593cSStephen Hemminger 	[RTN_THROW] = "THROW",
2270cb7b593cSStephen Hemminger 	[RTN_NAT] = "NAT",
2271cb7b593cSStephen Hemminger 	[RTN_XRESOLVE] = "XRESOLVE",
2272cb7b593cSStephen Hemminger };
2273cb7b593cSStephen Hemminger 
2274cb7b593cSStephen Hemminger static inline const char *rtn_type(unsigned t)
2275cb7b593cSStephen Hemminger {
2276cb7b593cSStephen Hemminger 	static char buf[32];
2277cb7b593cSStephen Hemminger 
2278cb7b593cSStephen Hemminger 	if (t < __RTN_MAX && rtn_type_names[t])
2279cb7b593cSStephen Hemminger 		return rtn_type_names[t];
2280cb7b593cSStephen Hemminger 	snprintf(buf, sizeof(buf), "type %d", t);
2281cb7b593cSStephen Hemminger 	return buf;
2282cb7b593cSStephen Hemminger }
2283cb7b593cSStephen Hemminger 
2284cb7b593cSStephen Hemminger /* Pretty print the trie */
228519baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v)
228619baf839SRobert Olsson {
2287cb7b593cSStephen Hemminger 	const struct fib_trie_iter *iter = seq->private;
2288cb7b593cSStephen Hemminger 	struct node *n = v;
228919baf839SRobert Olsson 
2290cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN)
2291cb7b593cSStephen Hemminger 		return 0;
229219baf839SRobert Olsson 
2293cb7b593cSStephen Hemminger 	if (!NODE_PARENT(n)) {
2294cb7b593cSStephen Hemminger 		if (iter->trie == trie_local)
2295cb7b593cSStephen Hemminger 			seq_puts(seq, "<local>:\n");
2296cb7b593cSStephen Hemminger 		else
2297cb7b593cSStephen Hemminger 			seq_puts(seq, "<main>:\n");
2298cb7b593cSStephen Hemminger 	}
2299095b8501SRobert Olsson 
2300095b8501SRobert Olsson 	if (IS_TNODE(n)) {
2301095b8501SRobert Olsson 		struct tnode *tn = (struct tnode *) n;
2302095b8501SRobert Olsson 		__be32 prf = htonl(MASK_PFX(tn->key, tn->pos));
2303095b8501SRobert Olsson 
23041d25cd6cSRobert Olsson 		seq_indent(seq, iter->depth-1);
23051d25cd6cSRobert Olsson 		seq_printf(seq, "  +-- %d.%d.%d.%d/%d %d %d %d\n",
23061d25cd6cSRobert Olsson 			   NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
23071d25cd6cSRobert Olsson 			   tn->empty_children);
23081d25cd6cSRobert Olsson 
2309cb7b593cSStephen Hemminger 	} else {
2310cb7b593cSStephen Hemminger 		struct leaf *l = (struct leaf *) n;
2311cb7b593cSStephen Hemminger 		int i;
231232ab5f80SAl Viro 		__be32 val = htonl(l->key);
2313cb7b593cSStephen Hemminger 
2314cb7b593cSStephen Hemminger 		seq_indent(seq, iter->depth);
2315cb7b593cSStephen Hemminger 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
2316cb7b593cSStephen Hemminger 		for (i = 32; i >= 0; i--) {
2317772cb712SRobert Olsson 			struct leaf_info *li = find_leaf_info(l, i);
2318cb7b593cSStephen Hemminger 			if (li) {
2319cb7b593cSStephen Hemminger 				struct fib_alias *fa;
2320cb7b593cSStephen Hemminger 				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2321cb7b593cSStephen Hemminger 					seq_indent(seq, iter->depth+1);
2322cb7b593cSStephen Hemminger 					seq_printf(seq, "  /%d %s %s", i,
2323cb7b593cSStephen Hemminger 						   rtn_scope(fa->fa_scope),
2324cb7b593cSStephen Hemminger 						   rtn_type(fa->fa_type));
2325cb7b593cSStephen Hemminger 					if (fa->fa_tos)
2326cb7b593cSStephen Hemminger 						seq_printf(seq, "tos =%d\n",
2327cb7b593cSStephen Hemminger 							   fa->fa_tos);
2328cb7b593cSStephen Hemminger 					seq_putc(seq, '\n');
2329cb7b593cSStephen Hemminger 				}
2330cb7b593cSStephen Hemminger 			}
2331cb7b593cSStephen Hemminger 		}
233219baf839SRobert Olsson 	}
233319baf839SRobert Olsson 
233419baf839SRobert Olsson 	return 0;
233519baf839SRobert Olsson }
233619baf839SRobert Olsson 
233719baf839SRobert Olsson static struct seq_operations fib_trie_seq_ops = {
233819baf839SRobert Olsson 	.start  = fib_trie_seq_start,
233919baf839SRobert Olsson 	.next   = fib_trie_seq_next,
234019baf839SRobert Olsson 	.stop   = fib_trie_seq_stop,
234119baf839SRobert Olsson 	.show   = fib_trie_seq_show,
234219baf839SRobert Olsson };
234319baf839SRobert Olsson 
234419baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file)
234519baf839SRobert Olsson {
234619baf839SRobert Olsson 	struct seq_file *seq;
234719baf839SRobert Olsson 	int rc = -ENOMEM;
2348cb7b593cSStephen Hemminger 	struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2349cb7b593cSStephen Hemminger 
2350cb7b593cSStephen Hemminger 	if (!s)
2351cb7b593cSStephen Hemminger 		goto out;
235219baf839SRobert Olsson 
235319baf839SRobert Olsson 	rc = seq_open(file, &fib_trie_seq_ops);
235419baf839SRobert Olsson 	if (rc)
235519baf839SRobert Olsson 		goto out_kfree;
235619baf839SRobert Olsson 
235719baf839SRobert Olsson 	seq	     = file->private_data;
2358cb7b593cSStephen Hemminger 	seq->private = s;
2359cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
236019baf839SRobert Olsson out:
236119baf839SRobert Olsson 	return rc;
236219baf839SRobert Olsson out_kfree:
2363cb7b593cSStephen Hemminger 	kfree(s);
236419baf839SRobert Olsson 	goto out;
236519baf839SRobert Olsson }
236619baf839SRobert Olsson 
2367*9a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = {
236819baf839SRobert Olsson 	.owner  = THIS_MODULE,
236919baf839SRobert Olsson 	.open   = fib_trie_seq_open,
237019baf839SRobert Olsson 	.read   = seq_read,
237119baf839SRobert Olsson 	.llseek = seq_lseek,
237219baf839SRobert Olsson 	.release = seq_release_private,
237319baf839SRobert Olsson };
237419baf839SRobert Olsson 
237532ab5f80SAl Viro static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2376cb7b593cSStephen Hemminger {
2377cb7b593cSStephen Hemminger 	static unsigned type2flags[RTN_MAX + 1] = {
2378cb7b593cSStephen Hemminger 		[7] = RTF_REJECT, [8] = RTF_REJECT,
2379cb7b593cSStephen Hemminger 	};
2380cb7b593cSStephen Hemminger 	unsigned flags = type2flags[type];
2381cb7b593cSStephen Hemminger 
2382cb7b593cSStephen Hemminger 	if (fi && fi->fib_nh->nh_gw)
2383cb7b593cSStephen Hemminger 		flags |= RTF_GATEWAY;
238432ab5f80SAl Viro 	if (mask == htonl(0xFFFFFFFF))
2385cb7b593cSStephen Hemminger 		flags |= RTF_HOST;
2386cb7b593cSStephen Hemminger 	flags |= RTF_UP;
2387cb7b593cSStephen Hemminger 	return flags;
2388cb7b593cSStephen Hemminger }
2389cb7b593cSStephen Hemminger 
2390cb7b593cSStephen Hemminger /*
2391cb7b593cSStephen Hemminger  *	This outputs /proc/net/route.
2392cb7b593cSStephen Hemminger  *	The format of the file is not supposed to be changed
2393cb7b593cSStephen Hemminger  * 	and needs to be same as fib_hash output to avoid breaking
2394cb7b593cSStephen Hemminger  *	legacy utilities
2395cb7b593cSStephen Hemminger  */
2396cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v)
2397cb7b593cSStephen Hemminger {
2398c9e53cbeSPatrick McHardy 	const struct fib_trie_iter *iter = seq->private;
2399cb7b593cSStephen Hemminger 	struct leaf *l = v;
2400cb7b593cSStephen Hemminger 	int i;
2401cb7b593cSStephen Hemminger 	char bf[128];
2402cb7b593cSStephen Hemminger 
2403cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN) {
2404cb7b593cSStephen Hemminger 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2405cb7b593cSStephen Hemminger 			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2406cb7b593cSStephen Hemminger 			   "\tWindow\tIRTT");
2407cb7b593cSStephen Hemminger 		return 0;
2408cb7b593cSStephen Hemminger 	}
2409cb7b593cSStephen Hemminger 
2410c9e53cbeSPatrick McHardy 	if (iter->trie == trie_local)
2411c9e53cbeSPatrick McHardy 		return 0;
2412cb7b593cSStephen Hemminger 	if (IS_TNODE(l))
2413cb7b593cSStephen Hemminger 		return 0;
2414cb7b593cSStephen Hemminger 
2415cb7b593cSStephen Hemminger 	for (i=32; i>=0; i--) {
2416772cb712SRobert Olsson 		struct leaf_info *li = find_leaf_info(l, i);
2417cb7b593cSStephen Hemminger 		struct fib_alias *fa;
241832ab5f80SAl Viro 		__be32 mask, prefix;
2419cb7b593cSStephen Hemminger 
2420cb7b593cSStephen Hemminger 		if (!li)
2421cb7b593cSStephen Hemminger 			continue;
2422cb7b593cSStephen Hemminger 
2423cb7b593cSStephen Hemminger 		mask = inet_make_mask(li->plen);
2424cb7b593cSStephen Hemminger 		prefix = htonl(l->key);
2425cb7b593cSStephen Hemminger 
2426cb7b593cSStephen Hemminger 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
24271371e37dSHerbert Xu 			const struct fib_info *fi = fa->fa_info;
2428cb7b593cSStephen Hemminger 			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2429cb7b593cSStephen Hemminger 
2430cb7b593cSStephen Hemminger 			if (fa->fa_type == RTN_BROADCAST
2431cb7b593cSStephen Hemminger 			    || fa->fa_type == RTN_MULTICAST)
2432cb7b593cSStephen Hemminger 				continue;
2433cb7b593cSStephen Hemminger 
2434cb7b593cSStephen Hemminger 			if (fi)
2435cb7b593cSStephen Hemminger 				snprintf(bf, sizeof(bf),
2436cb7b593cSStephen Hemminger 					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2437cb7b593cSStephen Hemminger 					 fi->fib_dev ? fi->fib_dev->name : "*",
2438cb7b593cSStephen Hemminger 					 prefix,
2439cb7b593cSStephen Hemminger 					 fi->fib_nh->nh_gw, flags, 0, 0,
2440cb7b593cSStephen Hemminger 					 fi->fib_priority,
2441cb7b593cSStephen Hemminger 					 mask,
2442cb7b593cSStephen Hemminger 					 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2443cb7b593cSStephen Hemminger 					 fi->fib_window,
2444cb7b593cSStephen Hemminger 					 fi->fib_rtt >> 3);
2445cb7b593cSStephen Hemminger 			else
2446cb7b593cSStephen Hemminger 				snprintf(bf, sizeof(bf),
2447cb7b593cSStephen Hemminger 					 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2448cb7b593cSStephen Hemminger 					 prefix, 0, flags, 0, 0, 0,
2449cb7b593cSStephen Hemminger 					 mask, 0, 0, 0);
2450cb7b593cSStephen Hemminger 
2451cb7b593cSStephen Hemminger 			seq_printf(seq, "%-127s\n", bf);
2452cb7b593cSStephen Hemminger 		}
2453cb7b593cSStephen Hemminger 	}
2454cb7b593cSStephen Hemminger 
2455cb7b593cSStephen Hemminger 	return 0;
2456cb7b593cSStephen Hemminger }
2457cb7b593cSStephen Hemminger 
2458cb7b593cSStephen Hemminger static struct seq_operations fib_route_seq_ops = {
2459cb7b593cSStephen Hemminger 	.start  = fib_trie_seq_start,
2460cb7b593cSStephen Hemminger 	.next   = fib_trie_seq_next,
2461cb7b593cSStephen Hemminger 	.stop   = fib_trie_seq_stop,
2462cb7b593cSStephen Hemminger 	.show   = fib_route_seq_show,
2463cb7b593cSStephen Hemminger };
2464cb7b593cSStephen Hemminger 
2465cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file)
2466cb7b593cSStephen Hemminger {
2467cb7b593cSStephen Hemminger 	struct seq_file *seq;
2468cb7b593cSStephen Hemminger 	int rc = -ENOMEM;
2469cb7b593cSStephen Hemminger 	struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2470cb7b593cSStephen Hemminger 
2471cb7b593cSStephen Hemminger 	if (!s)
2472cb7b593cSStephen Hemminger 		goto out;
2473cb7b593cSStephen Hemminger 
2474cb7b593cSStephen Hemminger 	rc = seq_open(file, &fib_route_seq_ops);
2475cb7b593cSStephen Hemminger 	if (rc)
2476cb7b593cSStephen Hemminger 		goto out_kfree;
2477cb7b593cSStephen Hemminger 
2478cb7b593cSStephen Hemminger 	seq	     = file->private_data;
2479cb7b593cSStephen Hemminger 	seq->private = s;
2480cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
2481cb7b593cSStephen Hemminger out:
2482cb7b593cSStephen Hemminger 	return rc;
2483cb7b593cSStephen Hemminger out_kfree:
2484cb7b593cSStephen Hemminger 	kfree(s);
2485cb7b593cSStephen Hemminger 	goto out;
2486cb7b593cSStephen Hemminger }
2487cb7b593cSStephen Hemminger 
2488*9a32144eSArjan van de Ven static const struct file_operations fib_route_fops = {
2489cb7b593cSStephen Hemminger 	.owner  = THIS_MODULE,
2490cb7b593cSStephen Hemminger 	.open   = fib_route_seq_open,
2491cb7b593cSStephen Hemminger 	.read   = seq_read,
2492cb7b593cSStephen Hemminger 	.llseek = seq_lseek,
2493cb7b593cSStephen Hemminger 	.release = seq_release_private,
2494cb7b593cSStephen Hemminger };
2495cb7b593cSStephen Hemminger 
249619baf839SRobert Olsson int __init fib_proc_init(void)
249719baf839SRobert Olsson {
2498cb7b593cSStephen Hemminger 	if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
2499cb7b593cSStephen Hemminger 		goto out1;
2500cb7b593cSStephen Hemminger 
2501cb7b593cSStephen Hemminger 	if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
2502cb7b593cSStephen Hemminger 		goto out2;
2503cb7b593cSStephen Hemminger 
2504cb7b593cSStephen Hemminger 	if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
2505cb7b593cSStephen Hemminger 		goto out3;
2506cb7b593cSStephen Hemminger 
250719baf839SRobert Olsson 	return 0;
2508cb7b593cSStephen Hemminger 
2509cb7b593cSStephen Hemminger out3:
2510cb7b593cSStephen Hemminger 	proc_net_remove("fib_triestat");
2511cb7b593cSStephen Hemminger out2:
2512cb7b593cSStephen Hemminger 	proc_net_remove("fib_trie");
2513cb7b593cSStephen Hemminger out1:
2514cb7b593cSStephen Hemminger 	return -ENOMEM;
251519baf839SRobert Olsson }
251619baf839SRobert Olsson 
251719baf839SRobert Olsson void __init fib_proc_exit(void)
251819baf839SRobert Olsson {
251919baf839SRobert Olsson 	proc_net_remove("fib_trie");
2520cb7b593cSStephen Hemminger 	proc_net_remove("fib_triestat");
2521cb7b593cSStephen Hemminger 	proc_net_remove("route");
252219baf839SRobert Olsson }
252319baf839SRobert Olsson 
252419baf839SRobert Olsson #endif /* CONFIG_PROC_FS */
2525