xref: /openbmc/linux/net/ipv4/fib_trie.c (revision 5c74501f)
119baf839SRobert Olsson /*
219baf839SRobert Olsson  *   This program is free software; you can redistribute it and/or
319baf839SRobert Olsson  *   modify it under the terms of the GNU General Public License
419baf839SRobert Olsson  *   as published by the Free Software Foundation; either version
519baf839SRobert Olsson  *   2 of the License, or (at your option) any later version.
619baf839SRobert Olsson  *
719baf839SRobert Olsson  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
819baf839SRobert Olsson  *     & Swedish University of Agricultural Sciences.
919baf839SRobert Olsson  *
1019baf839SRobert Olsson  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
1119baf839SRobert Olsson  *     Agricultural Sciences.
1219baf839SRobert Olsson  *
1319baf839SRobert Olsson  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
1419baf839SRobert Olsson  *
1525985edcSLucas De Marchi  * This work is based on the LPC-trie which is originally described in:
1619baf839SRobert Olsson  *
1719baf839SRobert Olsson  * An experimental study of compression methods for dynamic tries
1819baf839SRobert Olsson  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19631dd1a8SJustin P. Mattock  * http://www.csc.kth.se/~snilsson/software/dyntrie2/
2019baf839SRobert Olsson  *
2119baf839SRobert Olsson  *
2219baf839SRobert Olsson  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
2319baf839SRobert Olsson  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2419baf839SRobert Olsson  *
2519baf839SRobert Olsson  *
2619baf839SRobert Olsson  * Code from fib_hash has been reused which includes the following header:
2719baf839SRobert Olsson  *
2819baf839SRobert Olsson  *
2919baf839SRobert Olsson  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3019baf839SRobert Olsson  *		operating system.  INET is implemented using the  BSD Socket
3119baf839SRobert Olsson  *		interface as the means of communication with the user level.
3219baf839SRobert Olsson  *
3319baf839SRobert Olsson  *		IPv4 FIB: lookup engine and maintenance routines.
3419baf839SRobert Olsson  *
3519baf839SRobert Olsson  *
3619baf839SRobert Olsson  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
3719baf839SRobert Olsson  *
3819baf839SRobert Olsson  *		This program is free software; you can redistribute it and/or
3919baf839SRobert Olsson  *		modify it under the terms of the GNU General Public License
4019baf839SRobert Olsson  *		as published by the Free Software Foundation; either version
4119baf839SRobert Olsson  *		2 of the License, or (at your option) any later version.
42fd966255SRobert Olsson  *
43fd966255SRobert Olsson  * Substantial contributions to this work comes from:
44fd966255SRobert Olsson  *
45fd966255SRobert Olsson  *		David S. Miller, <davem@davemloft.net>
46fd966255SRobert Olsson  *		Stephen Hemminger <shemminger@osdl.org>
47fd966255SRobert Olsson  *		Paul E. McKenney <paulmck@us.ibm.com>
48fd966255SRobert Olsson  *		Patrick McHardy <kaber@trash.net>
4919baf839SRobert Olsson  */
5019baf839SRobert Olsson 
5180b71b80SJens Låås #define VERSION "0.409"
5219baf839SRobert Olsson 
5319baf839SRobert Olsson #include <asm/uaccess.h>
5419baf839SRobert Olsson #include <asm/system.h>
551977f032SJiri Slaby #include <linux/bitops.h>
5619baf839SRobert Olsson #include <linux/types.h>
5719baf839SRobert Olsson #include <linux/kernel.h>
5819baf839SRobert Olsson #include <linux/mm.h>
5919baf839SRobert Olsson #include <linux/string.h>
6019baf839SRobert Olsson #include <linux/socket.h>
6119baf839SRobert Olsson #include <linux/sockios.h>
6219baf839SRobert Olsson #include <linux/errno.h>
6319baf839SRobert Olsson #include <linux/in.h>
6419baf839SRobert Olsson #include <linux/inet.h>
65cd8787abSStephen Hemminger #include <linux/inetdevice.h>
6619baf839SRobert Olsson #include <linux/netdevice.h>
6719baf839SRobert Olsson #include <linux/if_arp.h>
6819baf839SRobert Olsson #include <linux/proc_fs.h>
692373ce1cSRobert Olsson #include <linux/rcupdate.h>
7019baf839SRobert Olsson #include <linux/skbuff.h>
7119baf839SRobert Olsson #include <linux/netlink.h>
7219baf839SRobert Olsson #include <linux/init.h>
7319baf839SRobert Olsson #include <linux/list.h>
745a0e3ad6STejun Heo #include <linux/slab.h>
7570c71606SPaul Gortmaker #include <linux/prefetch.h>
76457c4cbcSEric W. Biederman #include <net/net_namespace.h>
7719baf839SRobert Olsson #include <net/ip.h>
7819baf839SRobert Olsson #include <net/protocol.h>
7919baf839SRobert Olsson #include <net/route.h>
8019baf839SRobert Olsson #include <net/tcp.h>
8119baf839SRobert Olsson #include <net/sock.h>
8219baf839SRobert Olsson #include <net/ip_fib.h>
8319baf839SRobert Olsson #include "fib_lookup.h"
8419baf839SRobert Olsson 
8506ef921dSRobert Olsson #define MAX_STAT_DEPTH 32
8619baf839SRobert Olsson 
8719baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key))
8819baf839SRobert Olsson 
8919baf839SRobert Olsson typedef unsigned int t_key;
9019baf839SRobert Olsson 
9119baf839SRobert Olsson #define T_TNODE 0
9219baf839SRobert Olsson #define T_LEAF  1
9319baf839SRobert Olsson #define NODE_TYPE_MASK	0x1UL
942373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
952373ce1cSRobert Olsson 
9691b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF))
9791b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF)
9819baf839SRobert Olsson 
99b299e4f0SDavid S. Miller struct rt_trie_node {
10091b9a277SOlof Johansson 	unsigned long parent;
1018d965444SEric Dumazet 	t_key key;
10219baf839SRobert Olsson };
10319baf839SRobert Olsson 
10419baf839SRobert Olsson struct leaf {
10591b9a277SOlof Johansson 	unsigned long parent;
1068d965444SEric Dumazet 	t_key key;
10719baf839SRobert Olsson 	struct hlist_head list;
1082373ce1cSRobert Olsson 	struct rcu_head rcu;
10919baf839SRobert Olsson };
11019baf839SRobert Olsson 
11119baf839SRobert Olsson struct leaf_info {
11219baf839SRobert Olsson 	struct hlist_node hlist;
11319baf839SRobert Olsson 	int plen;
1145c74501fSEric Dumazet 	u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
11519baf839SRobert Olsson 	struct list_head falh;
1165c74501fSEric Dumazet 	struct rcu_head rcu;
11719baf839SRobert Olsson };
11819baf839SRobert Olsson 
11919baf839SRobert Olsson struct tnode {
12091b9a277SOlof Johansson 	unsigned long parent;
1218d965444SEric Dumazet 	t_key key;
122112d8cfcSEric Dumazet 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
123112d8cfcSEric Dumazet 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
1248d965444SEric Dumazet 	unsigned int full_children;	/* KEYLENGTH bits needed */
1258d965444SEric Dumazet 	unsigned int empty_children;	/* KEYLENGTH bits needed */
12615be75cdSStephen Hemminger 	union {
1272373ce1cSRobert Olsson 		struct rcu_head rcu;
12815be75cdSStephen Hemminger 		struct work_struct work;
129e0f7cb8cSJarek Poplawski 		struct tnode *tnode_free;
13015be75cdSStephen Hemminger 	};
1310a5c0475SEric Dumazet 	struct rt_trie_node __rcu *child[0];
13219baf839SRobert Olsson };
13319baf839SRobert Olsson 
13419baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
13519baf839SRobert Olsson struct trie_use_stats {
13619baf839SRobert Olsson 	unsigned int gets;
13719baf839SRobert Olsson 	unsigned int backtrack;
13819baf839SRobert Olsson 	unsigned int semantic_match_passed;
13919baf839SRobert Olsson 	unsigned int semantic_match_miss;
14019baf839SRobert Olsson 	unsigned int null_node_hit;
1412f36895aSRobert Olsson 	unsigned int resize_node_skipped;
14219baf839SRobert Olsson };
14319baf839SRobert Olsson #endif
14419baf839SRobert Olsson 
14519baf839SRobert Olsson struct trie_stat {
14619baf839SRobert Olsson 	unsigned int totdepth;
14719baf839SRobert Olsson 	unsigned int maxdepth;
14819baf839SRobert Olsson 	unsigned int tnodes;
14919baf839SRobert Olsson 	unsigned int leaves;
15019baf839SRobert Olsson 	unsigned int nullpointers;
15193672292SStephen Hemminger 	unsigned int prefixes;
15206ef921dSRobert Olsson 	unsigned int nodesizes[MAX_STAT_DEPTH];
15319baf839SRobert Olsson };
15419baf839SRobert Olsson 
15519baf839SRobert Olsson struct trie {
1560a5c0475SEric Dumazet 	struct rt_trie_node __rcu *trie;
15719baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15819baf839SRobert Olsson 	struct trie_use_stats stats;
15919baf839SRobert Olsson #endif
16019baf839SRobert Olsson };
16119baf839SRobert Olsson 
162b299e4f0SDavid S. Miller static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
163b299e4f0SDavid S. Miller static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
164a07f5f50SStephen Hemminger 				  int wasfull);
165b299e4f0SDavid S. Miller static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
1662f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn);
1672f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn);
168e0f7cb8cSJarek Poplawski /* tnodes to free after resize(); protected by RTNL */
169e0f7cb8cSJarek Poplawski static struct tnode *tnode_free_head;
170c3059477SJarek Poplawski static size_t tnode_free_size;
171c3059477SJarek Poplawski 
172c3059477SJarek Poplawski /*
173c3059477SJarek Poplawski  * synchronize_rcu after call_rcu for that many pages; it should be especially
174c3059477SJarek Poplawski  * useful before resizing the root node with PREEMPT_NONE configs; the value was
175c3059477SJarek Poplawski  * obtained experimentally, aiming to avoid visible slowdown.
176c3059477SJarek Poplawski  */
177c3059477SJarek Poplawski static const int sync_pages = 128;
17819baf839SRobert Olsson 
179e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly;
180bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly;
18119baf839SRobert Olsson 
1820a5c0475SEric Dumazet /*
1830a5c0475SEric Dumazet  * caller must hold RTNL
1840a5c0475SEric Dumazet  */
1850a5c0475SEric Dumazet static inline struct tnode *node_parent(const struct rt_trie_node *node)
18606801916SStephen Hemminger {
1870a5c0475SEric Dumazet 	unsigned long parent;
1880a5c0475SEric Dumazet 
1890a5c0475SEric Dumazet 	parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
1900a5c0475SEric Dumazet 
1910a5c0475SEric Dumazet 	return (struct tnode *)(parent & ~NODE_TYPE_MASK);
192b59cfbf7SEric Dumazet }
19306801916SStephen Hemminger 
1940a5c0475SEric Dumazet /*
1950a5c0475SEric Dumazet  * caller must hold RCU read lock or RTNL
1960a5c0475SEric Dumazet  */
1970a5c0475SEric Dumazet static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
198b59cfbf7SEric Dumazet {
1990a5c0475SEric Dumazet 	unsigned long parent;
200b59cfbf7SEric Dumazet 
2010a5c0475SEric Dumazet 	parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
2020a5c0475SEric Dumazet 							   lockdep_rtnl_is_held());
2030a5c0475SEric Dumazet 
2040a5c0475SEric Dumazet 	return (struct tnode *)(parent & ~NODE_TYPE_MASK);
20506801916SStephen Hemminger }
20606801916SStephen Hemminger 
2076440cc9eSStephen Hemminger /* Same as rcu_assign_pointer
2086440cc9eSStephen Hemminger  * but that macro() assumes that value is a pointer.
2096440cc9eSStephen Hemminger  */
210b299e4f0SDavid S. Miller static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
21106801916SStephen Hemminger {
2126440cc9eSStephen Hemminger 	smp_wmb();
2136440cc9eSStephen Hemminger 	node->parent = (unsigned long)ptr | NODE_TYPE(node);
21406801916SStephen Hemminger }
2152373ce1cSRobert Olsson 
2160a5c0475SEric Dumazet /*
2170a5c0475SEric Dumazet  * caller must hold RTNL
2180a5c0475SEric Dumazet  */
2190a5c0475SEric Dumazet static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
22019baf839SRobert Olsson {
221b59cfbf7SEric Dumazet 	BUG_ON(i >= 1U << tn->bits);
22219baf839SRobert Olsson 
2230a5c0475SEric Dumazet 	return rtnl_dereference(tn->child[i]);
224b59cfbf7SEric Dumazet }
225b59cfbf7SEric Dumazet 
2260a5c0475SEric Dumazet /*
2270a5c0475SEric Dumazet  * caller must hold RCU read lock or RTNL
2280a5c0475SEric Dumazet  */
2290a5c0475SEric Dumazet static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
230b59cfbf7SEric Dumazet {
2310a5c0475SEric Dumazet 	BUG_ON(i >= 1U << tn->bits);
232b59cfbf7SEric Dumazet 
2330a5c0475SEric Dumazet 	return rcu_dereference_rtnl(tn->child[i]);
23419baf839SRobert Olsson }
23519baf839SRobert Olsson 
236bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn)
23719baf839SRobert Olsson {
23819baf839SRobert Olsson 	return 1 << tn->bits;
23919baf839SRobert Olsson }
24019baf839SRobert Olsson 
2413b004569SDavid S. Miller static inline t_key mask_pfx(t_key k, unsigned int l)
242ab66b4a7SStephen Hemminger {
243ab66b4a7SStephen Hemminger 	return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
244ab66b4a7SStephen Hemminger }
245ab66b4a7SStephen Hemminger 
2463b004569SDavid S. Miller static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
24719baf839SRobert Olsson {
24819baf839SRobert Olsson 	if (offset < KEYLENGTH)
24919baf839SRobert Olsson 		return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
25019baf839SRobert Olsson 	else
25119baf839SRobert Olsson 		return 0;
25219baf839SRobert Olsson }
25319baf839SRobert Olsson 
25419baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b)
25519baf839SRobert Olsson {
25619baf839SRobert Olsson 	return a == b;
25719baf839SRobert Olsson }
25819baf839SRobert Olsson 
25919baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
26019baf839SRobert Olsson {
26119baf839SRobert Olsson 	if (bits == 0 || offset >= KEYLENGTH)
26219baf839SRobert Olsson 		return 1;
26319baf839SRobert Olsson 	bits = bits > KEYLENGTH ? KEYLENGTH : bits;
26419baf839SRobert Olsson 	return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
26519baf839SRobert Olsson }
26619baf839SRobert Olsson 
26719baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b)
26819baf839SRobert Olsson {
26919baf839SRobert Olsson 	t_key diff = a ^ b;
27019baf839SRobert Olsson 	int i = offset;
27119baf839SRobert Olsson 
27219baf839SRobert Olsson 	if (!diff)
27319baf839SRobert Olsson 		return 0;
27419baf839SRobert Olsson 	while ((diff << i) >> (KEYLENGTH-1) == 0)
27519baf839SRobert Olsson 		i++;
27619baf839SRobert Olsson 	return i;
27719baf839SRobert Olsson }
27819baf839SRobert Olsson 
27919baf839SRobert Olsson /*
28019baf839SRobert Olsson   To understand this stuff, an understanding of keys and all their bits is
28119baf839SRobert Olsson   necessary. Every node in the trie has a key associated with it, but not
28219baf839SRobert Olsson   all of the bits in that key are significant.
28319baf839SRobert Olsson 
28419baf839SRobert Olsson   Consider a node 'n' and its parent 'tp'.
28519baf839SRobert Olsson 
28619baf839SRobert Olsson   If n is a leaf, every bit in its key is significant. Its presence is
287772cb712SRobert Olsson   necessitated by path compression, since during a tree traversal (when
28819baf839SRobert Olsson   searching for a leaf - unless we are doing an insertion) we will completely
28919baf839SRobert Olsson   ignore all skipped bits we encounter. Thus we need to verify, at the end of
29019baf839SRobert Olsson   a potentially successful search, that we have indeed been walking the
29119baf839SRobert Olsson   correct key path.
29219baf839SRobert Olsson 
29319baf839SRobert Olsson   Note that we can never "miss" the correct key in the tree if present by
29419baf839SRobert Olsson   following the wrong path. Path compression ensures that segments of the key
29519baf839SRobert Olsson   that are the same for all keys with a given prefix are skipped, but the
29619baf839SRobert Olsson   skipped part *is* identical for each node in the subtrie below the skipped
29719baf839SRobert Olsson   bit! trie_insert() in this implementation takes care of that - note the
29819baf839SRobert Olsson   call to tkey_sub_equals() in trie_insert().
29919baf839SRobert Olsson 
30019baf839SRobert Olsson   if n is an internal node - a 'tnode' here, the various parts of its key
30119baf839SRobert Olsson   have many different meanings.
30219baf839SRobert Olsson 
30319baf839SRobert Olsson   Example:
30419baf839SRobert Olsson   _________________________________________________________________
30519baf839SRobert Olsson   | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
30619baf839SRobert Olsson   -----------------------------------------------------------------
30719baf839SRobert Olsson     0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
30819baf839SRobert Olsson 
30919baf839SRobert Olsson   _________________________________________________________________
31019baf839SRobert Olsson   | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
31119baf839SRobert Olsson   -----------------------------------------------------------------
31219baf839SRobert Olsson    16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
31319baf839SRobert Olsson 
31419baf839SRobert Olsson   tp->pos = 7
31519baf839SRobert Olsson   tp->bits = 3
31619baf839SRobert Olsson   n->pos = 15
31719baf839SRobert Olsson   n->bits = 4
31819baf839SRobert Olsson 
31919baf839SRobert Olsson   First, let's just ignore the bits that come before the parent tp, that is
32019baf839SRobert Olsson   the bits from 0 to (tp->pos-1). They are *known* but at this point we do
32119baf839SRobert Olsson   not use them for anything.
32219baf839SRobert Olsson 
32319baf839SRobert Olsson   The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
32419baf839SRobert Olsson   index into the parent's child array. That is, they will be used to find
32519baf839SRobert Olsson   'n' among tp's children.
32619baf839SRobert Olsson 
32719baf839SRobert Olsson   The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
32819baf839SRobert Olsson   for the node n.
32919baf839SRobert Olsson 
33019baf839SRobert Olsson   All the bits we have seen so far are significant to the node n. The rest
33119baf839SRobert Olsson   of the bits are really not needed or indeed known in n->key.
33219baf839SRobert Olsson 
33319baf839SRobert Olsson   The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
33419baf839SRobert Olsson   n's child array, and will of course be different for each child.
33519baf839SRobert Olsson 
336c877efb2SStephen Hemminger 
33719baf839SRobert Olsson   The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
33819baf839SRobert Olsson   at this point.
33919baf839SRobert Olsson 
34019baf839SRobert Olsson */
34119baf839SRobert Olsson 
3420c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn)
34319baf839SRobert Olsson {
3440c7770c7SStephen Hemminger 	WARN_ON(tn && tn->pos+tn->bits > 32);
34519baf839SRobert Olsson }
34619baf839SRobert Olsson 
347f5026fabSDenis V. Lunev static const int halve_threshold = 25;
348f5026fabSDenis V. Lunev static const int inflate_threshold = 50;
349345aa031SJarek Poplawski static const int halve_threshold_root = 15;
35080b71b80SJens Låås static const int inflate_threshold_root = 30;
3512373ce1cSRobert Olsson 
3522373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head)
3532373ce1cSRobert Olsson {
3542373ce1cSRobert Olsson 	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
3552373ce1cSRobert Olsson 	kmem_cache_free(fn_alias_kmem, fa);
3562373ce1cSRobert Olsson }
3572373ce1cSRobert Olsson 
3582373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa)
3592373ce1cSRobert Olsson {
3602373ce1cSRobert Olsson 	call_rcu(&fa->rcu, __alias_free_mem);
3612373ce1cSRobert Olsson }
3622373ce1cSRobert Olsson 
3632373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head)
3642373ce1cSRobert Olsson {
365bc3c8c1eSStephen Hemminger 	struct leaf *l = container_of(head, struct leaf, rcu);
366bc3c8c1eSStephen Hemminger 	kmem_cache_free(trie_leaf_kmem, l);
3672373ce1cSRobert Olsson }
3682373ce1cSRobert Olsson 
369387a5487SStephen Hemminger static inline void free_leaf(struct leaf *l)
370387a5487SStephen Hemminger {
371387a5487SStephen Hemminger 	call_rcu_bh(&l->rcu, __leaf_free_rcu);
372387a5487SStephen Hemminger }
373387a5487SStephen Hemminger 
3742373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf)
3752373ce1cSRobert Olsson {
376bceb0f45SLai Jiangshan 	kfree_rcu(leaf, rcu);
3772373ce1cSRobert Olsson }
3782373ce1cSRobert Olsson 
3798d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size)
3802373ce1cSRobert Olsson {
3812373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3828d965444SEric Dumazet 		return kzalloc(size, GFP_KERNEL);
38315be75cdSStephen Hemminger 	else
3847a1c8e5aSEric Dumazet 		return vzalloc(size);
38515be75cdSStephen Hemminger }
3862373ce1cSRobert Olsson 
38715be75cdSStephen Hemminger static void __tnode_vfree(struct work_struct *arg)
38815be75cdSStephen Hemminger {
38915be75cdSStephen Hemminger 	struct tnode *tn = container_of(arg, struct tnode, work);
39015be75cdSStephen Hemminger 	vfree(tn);
3912373ce1cSRobert Olsson }
3922373ce1cSRobert Olsson 
3932373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head)
3942373ce1cSRobert Olsson {
3952373ce1cSRobert Olsson 	struct tnode *tn = container_of(head, struct tnode, rcu);
3968d965444SEric Dumazet 	size_t size = sizeof(struct tnode) +
397b299e4f0SDavid S. Miller 		      (sizeof(struct rt_trie_node *) << tn->bits);
3982373ce1cSRobert Olsson 
3992373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
4002373ce1cSRobert Olsson 		kfree(tn);
40115be75cdSStephen Hemminger 	else {
40215be75cdSStephen Hemminger 		INIT_WORK(&tn->work, __tnode_vfree);
40315be75cdSStephen Hemminger 		schedule_work(&tn->work);
40415be75cdSStephen Hemminger 	}
4052373ce1cSRobert Olsson }
4062373ce1cSRobert Olsson 
4072373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn)
4082373ce1cSRobert Olsson {
409387a5487SStephen Hemminger 	if (IS_LEAF(tn))
410387a5487SStephen Hemminger 		free_leaf((struct leaf *) tn);
411387a5487SStephen Hemminger 	else
4122373ce1cSRobert Olsson 		call_rcu(&tn->rcu, __tnode_free_rcu);
4132373ce1cSRobert Olsson }
4142373ce1cSRobert Olsson 
415e0f7cb8cSJarek Poplawski static void tnode_free_safe(struct tnode *tn)
416e0f7cb8cSJarek Poplawski {
417e0f7cb8cSJarek Poplawski 	BUG_ON(IS_LEAF(tn));
418e0f7cb8cSJarek Poplawski 	tn->tnode_free = tnode_free_head;
419e0f7cb8cSJarek Poplawski 	tnode_free_head = tn;
420c3059477SJarek Poplawski 	tnode_free_size += sizeof(struct tnode) +
421b299e4f0SDavid S. Miller 			   (sizeof(struct rt_trie_node *) << tn->bits);
422e0f7cb8cSJarek Poplawski }
423e0f7cb8cSJarek Poplawski 
424e0f7cb8cSJarek Poplawski static void tnode_free_flush(void)
425e0f7cb8cSJarek Poplawski {
426e0f7cb8cSJarek Poplawski 	struct tnode *tn;
427e0f7cb8cSJarek Poplawski 
428e0f7cb8cSJarek Poplawski 	while ((tn = tnode_free_head)) {
429e0f7cb8cSJarek Poplawski 		tnode_free_head = tn->tnode_free;
430e0f7cb8cSJarek Poplawski 		tn->tnode_free = NULL;
431e0f7cb8cSJarek Poplawski 		tnode_free(tn);
432e0f7cb8cSJarek Poplawski 	}
433c3059477SJarek Poplawski 
434c3059477SJarek Poplawski 	if (tnode_free_size >= PAGE_SIZE * sync_pages) {
435c3059477SJarek Poplawski 		tnode_free_size = 0;
436c3059477SJarek Poplawski 		synchronize_rcu();
437c3059477SJarek Poplawski 	}
438e0f7cb8cSJarek Poplawski }
439e0f7cb8cSJarek Poplawski 
44019baf839SRobert Olsson static struct leaf *leaf_new(void)
44119baf839SRobert Olsson {
442bc3c8c1eSStephen Hemminger 	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
44319baf839SRobert Olsson 	if (l) {
4442373ce1cSRobert Olsson 		l->parent = T_LEAF;
44519baf839SRobert Olsson 		INIT_HLIST_HEAD(&l->list);
44619baf839SRobert Olsson 	}
44719baf839SRobert Olsson 	return l;
44819baf839SRobert Olsson }
44919baf839SRobert Olsson 
45019baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen)
45119baf839SRobert Olsson {
45219baf839SRobert Olsson 	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
4532373ce1cSRobert Olsson 	if (li) {
45419baf839SRobert Olsson 		li->plen = plen;
4555c74501fSEric Dumazet 		li->mask_plen = ntohl(inet_make_mask(plen));
45619baf839SRobert Olsson 		INIT_LIST_HEAD(&li->falh);
4572373ce1cSRobert Olsson 	}
45819baf839SRobert Olsson 	return li;
45919baf839SRobert Olsson }
46019baf839SRobert Olsson 
46119baf839SRobert Olsson static struct tnode *tnode_new(t_key key, int pos, int bits)
46219baf839SRobert Olsson {
463b299e4f0SDavid S. Miller 	size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
464f0e36f8cSPatrick McHardy 	struct tnode *tn = tnode_alloc(sz);
46519baf839SRobert Olsson 
46619baf839SRobert Olsson 	if (tn) {
4672373ce1cSRobert Olsson 		tn->parent = T_TNODE;
46819baf839SRobert Olsson 		tn->pos = pos;
46919baf839SRobert Olsson 		tn->bits = bits;
47019baf839SRobert Olsson 		tn->key = key;
47119baf839SRobert Olsson 		tn->full_children = 0;
47219baf839SRobert Olsson 		tn->empty_children = 1<<bits;
47319baf839SRobert Olsson 	}
474c877efb2SStephen Hemminger 
475a034ee3cSEric Dumazet 	pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
476b299e4f0SDavid S. Miller 		 sizeof(struct rt_trie_node) << bits);
47719baf839SRobert Olsson 	return tn;
47819baf839SRobert Olsson }
47919baf839SRobert Olsson 
48019baf839SRobert Olsson /*
48119baf839SRobert Olsson  * Check whether a tnode 'n' is "full", i.e. it is an internal node
48219baf839SRobert Olsson  * and no bits are skipped. See discussion in dyntree paper p. 6
48319baf839SRobert Olsson  */
48419baf839SRobert Olsson 
485b299e4f0SDavid S. Miller static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
48619baf839SRobert Olsson {
48719baf839SRobert Olsson 	if (n == NULL || IS_LEAF(n))
48819baf839SRobert Olsson 		return 0;
48919baf839SRobert Olsson 
49019baf839SRobert Olsson 	return ((struct tnode *) n)->pos == tn->pos + tn->bits;
49119baf839SRobert Olsson }
49219baf839SRobert Olsson 
493a07f5f50SStephen Hemminger static inline void put_child(struct trie *t, struct tnode *tn, int i,
494b299e4f0SDavid S. Miller 			     struct rt_trie_node *n)
49519baf839SRobert Olsson {
49619baf839SRobert Olsson 	tnode_put_child_reorg(tn, i, n, -1);
49719baf839SRobert Olsson }
49819baf839SRobert Olsson 
49919baf839SRobert Olsson  /*
50019baf839SRobert Olsson   * Add a child at position i overwriting the old value.
50119baf839SRobert Olsson   * Update the value of full_children and empty_children.
50219baf839SRobert Olsson   */
50319baf839SRobert Olsson 
504b299e4f0SDavid S. Miller static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
505a07f5f50SStephen Hemminger 				  int wasfull)
50619baf839SRobert Olsson {
5070a5c0475SEric Dumazet 	struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
50819baf839SRobert Olsson 	int isfull;
50919baf839SRobert Olsson 
5100c7770c7SStephen Hemminger 	BUG_ON(i >= 1<<tn->bits);
5110c7770c7SStephen Hemminger 
51219baf839SRobert Olsson 	/* update emptyChildren */
51319baf839SRobert Olsson 	if (n == NULL && chi != NULL)
51419baf839SRobert Olsson 		tn->empty_children++;
51519baf839SRobert Olsson 	else if (n != NULL && chi == NULL)
51619baf839SRobert Olsson 		tn->empty_children--;
51719baf839SRobert Olsson 
51819baf839SRobert Olsson 	/* update fullChildren */
51919baf839SRobert Olsson 	if (wasfull == -1)
52019baf839SRobert Olsson 		wasfull = tnode_full(tn, chi);
52119baf839SRobert Olsson 
52219baf839SRobert Olsson 	isfull = tnode_full(tn, n);
52319baf839SRobert Olsson 	if (wasfull && !isfull)
52419baf839SRobert Olsson 		tn->full_children--;
52519baf839SRobert Olsson 	else if (!wasfull && isfull)
52619baf839SRobert Olsson 		tn->full_children++;
52791b9a277SOlof Johansson 
52819baf839SRobert Olsson 	if (n)
52906801916SStephen Hemminger 		node_set_parent(n, tn);
53019baf839SRobert Olsson 
5312373ce1cSRobert Olsson 	rcu_assign_pointer(tn->child[i], n);
53219baf839SRobert Olsson }
53319baf839SRobert Olsson 
53480b71b80SJens Låås #define MAX_WORK 10
535b299e4f0SDavid S. Miller static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
53619baf839SRobert Olsson {
53719baf839SRobert Olsson 	int i;
5382f80b3c8SRobert Olsson 	struct tnode *old_tn;
539e6308be8SRobert Olsson 	int inflate_threshold_use;
540e6308be8SRobert Olsson 	int halve_threshold_use;
54180b71b80SJens Låås 	int max_work;
54219baf839SRobert Olsson 
54319baf839SRobert Olsson 	if (!tn)
54419baf839SRobert Olsson 		return NULL;
54519baf839SRobert Olsson 
5460c7770c7SStephen Hemminger 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
54719baf839SRobert Olsson 		 tn, inflate_threshold, halve_threshold);
54819baf839SRobert Olsson 
54919baf839SRobert Olsson 	/* No children */
55019baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn)) {
551e0f7cb8cSJarek Poplawski 		tnode_free_safe(tn);
55219baf839SRobert Olsson 		return NULL;
55319baf839SRobert Olsson 	}
55419baf839SRobert Olsson 	/* One child */
55519baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn) - 1)
55680b71b80SJens Låås 		goto one_child;
55719baf839SRobert Olsson 	/*
55819baf839SRobert Olsson 	 * Double as long as the resulting node has a number of
55919baf839SRobert Olsson 	 * nonempty nodes that are above the threshold.
56019baf839SRobert Olsson 	 */
56119baf839SRobert Olsson 
56219baf839SRobert Olsson 	/*
56319baf839SRobert Olsson 	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
56419baf839SRobert Olsson 	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
56519baf839SRobert Olsson 	 * Telecommunications, page 6:
56619baf839SRobert Olsson 	 * "A node is doubled if the ratio of non-empty children to all
56719baf839SRobert Olsson 	 * children in the *doubled* node is at least 'high'."
56819baf839SRobert Olsson 	 *
56919baf839SRobert Olsson 	 * 'high' in this instance is the variable 'inflate_threshold'. It
57019baf839SRobert Olsson 	 * is expressed as a percentage, so we multiply it with
57119baf839SRobert Olsson 	 * tnode_child_length() and instead of multiplying by 2 (since the
57219baf839SRobert Olsson 	 * child array will be doubled by inflate()) and multiplying
57319baf839SRobert Olsson 	 * the left-hand side by 100 (to handle the percentage thing) we
57419baf839SRobert Olsson 	 * multiply the left-hand side by 50.
57519baf839SRobert Olsson 	 *
57619baf839SRobert Olsson 	 * The left-hand side may look a bit weird: tnode_child_length(tn)
57719baf839SRobert Olsson 	 * - tn->empty_children is of course the number of non-null children
57819baf839SRobert Olsson 	 * in the current node. tn->full_children is the number of "full"
57919baf839SRobert Olsson 	 * children, that is non-null tnodes with a skip value of 0.
58019baf839SRobert Olsson 	 * All of those will be doubled in the resulting inflated tnode, so
58119baf839SRobert Olsson 	 * we just count them one extra time here.
58219baf839SRobert Olsson 	 *
58319baf839SRobert Olsson 	 * A clearer way to write this would be:
58419baf839SRobert Olsson 	 *
58519baf839SRobert Olsson 	 * to_be_doubled = tn->full_children;
58619baf839SRobert Olsson 	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
58719baf839SRobert Olsson 	 *     tn->full_children;
58819baf839SRobert Olsson 	 *
58919baf839SRobert Olsson 	 * new_child_length = tnode_child_length(tn) * 2;
59019baf839SRobert Olsson 	 *
59119baf839SRobert Olsson 	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
59219baf839SRobert Olsson 	 *      new_child_length;
59319baf839SRobert Olsson 	 * if (new_fill_factor >= inflate_threshold)
59419baf839SRobert Olsson 	 *
59519baf839SRobert Olsson 	 * ...and so on, tho it would mess up the while () loop.
59619baf839SRobert Olsson 	 *
59719baf839SRobert Olsson 	 * anyway,
59819baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
59919baf839SRobert Olsson 	 *      inflate_threshold
60019baf839SRobert Olsson 	 *
60119baf839SRobert Olsson 	 * avoid a division:
60219baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
60319baf839SRobert Olsson 	 *      inflate_threshold * new_child_length
60419baf839SRobert Olsson 	 *
60519baf839SRobert Olsson 	 * expand not_to_be_doubled and to_be_doubled, and shorten:
60619baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
60719baf839SRobert Olsson 	 *    tn->full_children) >= inflate_threshold * new_child_length
60819baf839SRobert Olsson 	 *
60919baf839SRobert Olsson 	 * expand new_child_length:
61019baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
61119baf839SRobert Olsson 	 *    tn->full_children) >=
61219baf839SRobert Olsson 	 *      inflate_threshold * tnode_child_length(tn) * 2
61319baf839SRobert Olsson 	 *
61419baf839SRobert Olsson 	 * shorten again:
61519baf839SRobert Olsson 	 * 50 * (tn->full_children + tnode_child_length(tn) -
61619baf839SRobert Olsson 	 *    tn->empty_children) >= inflate_threshold *
61719baf839SRobert Olsson 	 *    tnode_child_length(tn)
61819baf839SRobert Olsson 	 *
61919baf839SRobert Olsson 	 */
62019baf839SRobert Olsson 
62119baf839SRobert Olsson 	check_tnode(tn);
62219baf839SRobert Olsson 
623e6308be8SRobert Olsson 	/* Keep root node larger  */
624e6308be8SRobert Olsson 
625b299e4f0SDavid S. Miller 	if (!node_parent((struct rt_trie_node *)tn)) {
62680b71b80SJens Låås 		inflate_threshold_use = inflate_threshold_root;
62780b71b80SJens Låås 		halve_threshold_use = halve_threshold_root;
628a034ee3cSEric Dumazet 	} else {
629e6308be8SRobert Olsson 		inflate_threshold_use = inflate_threshold;
63080b71b80SJens Låås 		halve_threshold_use = halve_threshold;
63180b71b80SJens Låås 	}
632e6308be8SRobert Olsson 
63380b71b80SJens Låås 	max_work = MAX_WORK;
63480b71b80SJens Låås 	while ((tn->full_children > 0 &&  max_work-- &&
635a07f5f50SStephen Hemminger 		50 * (tn->full_children + tnode_child_length(tn)
636a07f5f50SStephen Hemminger 		      - tn->empty_children)
637a07f5f50SStephen Hemminger 		>= inflate_threshold_use * tnode_child_length(tn))) {
63819baf839SRobert Olsson 
6392f80b3c8SRobert Olsson 		old_tn = tn;
6402f80b3c8SRobert Olsson 		tn = inflate(t, tn);
641a07f5f50SStephen Hemminger 
6422f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6432f80b3c8SRobert Olsson 			tn = old_tn;
6442f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6452f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6462f36895aSRobert Olsson #endif
6472f36895aSRobert Olsson 			break;
6482f36895aSRobert Olsson 		}
64919baf839SRobert Olsson 	}
65019baf839SRobert Olsson 
65119baf839SRobert Olsson 	check_tnode(tn);
65219baf839SRobert Olsson 
65380b71b80SJens Låås 	/* Return if at least one inflate is run */
65480b71b80SJens Låås 	if (max_work != MAX_WORK)
655b299e4f0SDavid S. Miller 		return (struct rt_trie_node *) tn;
65680b71b80SJens Låås 
65719baf839SRobert Olsson 	/*
65819baf839SRobert Olsson 	 * Halve as long as the number of empty children in this
65919baf839SRobert Olsson 	 * node is above threshold.
66019baf839SRobert Olsson 	 */
6612f36895aSRobert Olsson 
66280b71b80SJens Låås 	max_work = MAX_WORK;
66380b71b80SJens Låås 	while (tn->bits > 1 &&  max_work-- &&
66419baf839SRobert Olsson 	       100 * (tnode_child_length(tn) - tn->empty_children) <
665e6308be8SRobert Olsson 	       halve_threshold_use * tnode_child_length(tn)) {
66619baf839SRobert Olsson 
6672f80b3c8SRobert Olsson 		old_tn = tn;
6682f80b3c8SRobert Olsson 		tn = halve(t, tn);
6692f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6702f80b3c8SRobert Olsson 			tn = old_tn;
6712f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6722f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6732f36895aSRobert Olsson #endif
6742f36895aSRobert Olsson 			break;
6752f36895aSRobert Olsson 		}
6762f36895aSRobert Olsson 	}
6772f36895aSRobert Olsson 
67819baf839SRobert Olsson 
67919baf839SRobert Olsson 	/* Only one child remains */
68080b71b80SJens Låås 	if (tn->empty_children == tnode_child_length(tn) - 1) {
68180b71b80SJens Låås one_child:
68219baf839SRobert Olsson 		for (i = 0; i < tnode_child_length(tn); i++) {
683b299e4f0SDavid S. Miller 			struct rt_trie_node *n;
68419baf839SRobert Olsson 
6850a5c0475SEric Dumazet 			n = rtnl_dereference(tn->child[i]);
6862373ce1cSRobert Olsson 			if (!n)
68791b9a277SOlof Johansson 				continue;
68891b9a277SOlof Johansson 
68991b9a277SOlof Johansson 			/* compress one level */
69091b9a277SOlof Johansson 
69106801916SStephen Hemminger 			node_set_parent(n, NULL);
692e0f7cb8cSJarek Poplawski 			tnode_free_safe(tn);
69319baf839SRobert Olsson 			return n;
69419baf839SRobert Olsson 		}
69580b71b80SJens Låås 	}
696b299e4f0SDavid S. Miller 	return (struct rt_trie_node *) tn;
69719baf839SRobert Olsson }
69819baf839SRobert Olsson 
6990a5c0475SEric Dumazet 
7000a5c0475SEric Dumazet static void tnode_clean_free(struct tnode *tn)
7010a5c0475SEric Dumazet {
7020a5c0475SEric Dumazet 	int i;
7030a5c0475SEric Dumazet 	struct tnode *tofree;
7040a5c0475SEric Dumazet 
7050a5c0475SEric Dumazet 	for (i = 0; i < tnode_child_length(tn); i++) {
7060a5c0475SEric Dumazet 		tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
7070a5c0475SEric Dumazet 		if (tofree)
7080a5c0475SEric Dumazet 			tnode_free(tofree);
7090a5c0475SEric Dumazet 	}
7100a5c0475SEric Dumazet 	tnode_free(tn);
7110a5c0475SEric Dumazet }
7120a5c0475SEric Dumazet 
7132f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn)
71419baf839SRobert Olsson {
71519baf839SRobert Olsson 	struct tnode *oldtnode = tn;
71619baf839SRobert Olsson 	int olen = tnode_child_length(tn);
71719baf839SRobert Olsson 	int i;
71819baf839SRobert Olsson 
7190c7770c7SStephen Hemminger 	pr_debug("In inflate\n");
72019baf839SRobert Olsson 
72119baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
72219baf839SRobert Olsson 
7232f80b3c8SRobert Olsson 	if (!tn)
7242f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
7252f36895aSRobert Olsson 
7262f36895aSRobert Olsson 	/*
7272f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
7282f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
7292f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and  inflate
7302f36895aSRobert Olsson 	 * of tnode is ignored.
7312f36895aSRobert Olsson 	 */
7322f36895aSRobert Olsson 
7332f36895aSRobert Olsson 	for (i = 0; i < olen; i++) {
734a07f5f50SStephen Hemminger 		struct tnode *inode;
7352f36895aSRobert Olsson 
736a07f5f50SStephen Hemminger 		inode = (struct tnode *) tnode_get_child(oldtnode, i);
7372f36895aSRobert Olsson 		if (inode &&
7382f36895aSRobert Olsson 		    IS_TNODE(inode) &&
7392f36895aSRobert Olsson 		    inode->pos == oldtnode->pos + oldtnode->bits &&
7402f36895aSRobert Olsson 		    inode->bits > 1) {
7412f36895aSRobert Olsson 			struct tnode *left, *right;
742ab66b4a7SStephen Hemminger 			t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
7432f36895aSRobert Olsson 
7442f36895aSRobert Olsson 			left = tnode_new(inode->key&(~m), inode->pos + 1,
7452f36895aSRobert Olsson 					 inode->bits - 1);
7462f80b3c8SRobert Olsson 			if (!left)
7472f80b3c8SRobert Olsson 				goto nomem;
7482f36895aSRobert Olsson 
7492f36895aSRobert Olsson 			right = tnode_new(inode->key|m, inode->pos + 1,
7502f36895aSRobert Olsson 					  inode->bits - 1);
7512f36895aSRobert Olsson 
7522f36895aSRobert Olsson 			if (!right) {
7532f80b3c8SRobert Olsson 				tnode_free(left);
7542f80b3c8SRobert Olsson 				goto nomem;
7552f36895aSRobert Olsson 			}
7562f36895aSRobert Olsson 
757b299e4f0SDavid S. Miller 			put_child(t, tn, 2*i, (struct rt_trie_node *) left);
758b299e4f0SDavid S. Miller 			put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
7592f36895aSRobert Olsson 		}
7602f36895aSRobert Olsson 	}
7612f36895aSRobert Olsson 
76219baf839SRobert Olsson 	for (i = 0; i < olen; i++) {
763c95aaf9aSStephen Hemminger 		struct tnode *inode;
764b299e4f0SDavid S. Miller 		struct rt_trie_node *node = tnode_get_child(oldtnode, i);
76591b9a277SOlof Johansson 		struct tnode *left, *right;
76691b9a277SOlof Johansson 		int size, j;
76719baf839SRobert Olsson 
76819baf839SRobert Olsson 		/* An empty child */
76919baf839SRobert Olsson 		if (node == NULL)
77019baf839SRobert Olsson 			continue;
77119baf839SRobert Olsson 
77219baf839SRobert Olsson 		/* A leaf or an internal node with skipped bits */
77319baf839SRobert Olsson 
77419baf839SRobert Olsson 		if (IS_LEAF(node) || ((struct tnode *) node)->pos >
77519baf839SRobert Olsson 		   tn->pos + tn->bits - 1) {
776a07f5f50SStephen Hemminger 			if (tkey_extract_bits(node->key,
777a07f5f50SStephen Hemminger 					      oldtnode->pos + oldtnode->bits,
77819baf839SRobert Olsson 					      1) == 0)
77919baf839SRobert Olsson 				put_child(t, tn, 2*i, node);
78019baf839SRobert Olsson 			else
78119baf839SRobert Olsson 				put_child(t, tn, 2*i+1, node);
78219baf839SRobert Olsson 			continue;
78319baf839SRobert Olsson 		}
78419baf839SRobert Olsson 
78519baf839SRobert Olsson 		/* An internal node with two children */
78619baf839SRobert Olsson 		inode = (struct tnode *) node;
78719baf839SRobert Olsson 
78819baf839SRobert Olsson 		if (inode->bits == 1) {
7890a5c0475SEric Dumazet 			put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
7900a5c0475SEric Dumazet 			put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
79119baf839SRobert Olsson 
792e0f7cb8cSJarek Poplawski 			tnode_free_safe(inode);
79391b9a277SOlof Johansson 			continue;
79419baf839SRobert Olsson 		}
79519baf839SRobert Olsson 
79619baf839SRobert Olsson 		/* An internal node with more than two children */
79719baf839SRobert Olsson 
79819baf839SRobert Olsson 		/* We will replace this node 'inode' with two new
79919baf839SRobert Olsson 		 * ones, 'left' and 'right', each with half of the
80019baf839SRobert Olsson 		 * original children. The two new nodes will have
80119baf839SRobert Olsson 		 * a position one bit further down the key and this
80219baf839SRobert Olsson 		 * means that the "significant" part of their keys
80319baf839SRobert Olsson 		 * (see the discussion near the top of this file)
80419baf839SRobert Olsson 		 * will differ by one bit, which will be "0" in
80519baf839SRobert Olsson 		 * left's key and "1" in right's key. Since we are
80619baf839SRobert Olsson 		 * moving the key position by one step, the bit that
80719baf839SRobert Olsson 		 * we are moving away from - the bit at position
80819baf839SRobert Olsson 		 * (inode->pos) - is the one that will differ between
80919baf839SRobert Olsson 		 * left and right. So... we synthesize that bit in the
81019baf839SRobert Olsson 		 * two  new keys.
81119baf839SRobert Olsson 		 * The mask 'm' below will be a single "one" bit at
81219baf839SRobert Olsson 		 * the position (inode->pos)
81319baf839SRobert Olsson 		 */
81419baf839SRobert Olsson 
81519baf839SRobert Olsson 		/* Use the old key, but set the new significant
81619baf839SRobert Olsson 		 *   bit to zero.
81719baf839SRobert Olsson 		 */
8182f36895aSRobert Olsson 
8192f36895aSRobert Olsson 		left = (struct tnode *) tnode_get_child(tn, 2*i);
8202f36895aSRobert Olsson 		put_child(t, tn, 2*i, NULL);
82119baf839SRobert Olsson 
82291b9a277SOlof Johansson 		BUG_ON(!left);
82319baf839SRobert Olsson 
8242f36895aSRobert Olsson 		right = (struct tnode *) tnode_get_child(tn, 2*i+1);
8252f36895aSRobert Olsson 		put_child(t, tn, 2*i+1, NULL);
82619baf839SRobert Olsson 
82791b9a277SOlof Johansson 		BUG_ON(!right);
82819baf839SRobert Olsson 
82919baf839SRobert Olsson 		size = tnode_child_length(left);
83019baf839SRobert Olsson 		for (j = 0; j < size; j++) {
8310a5c0475SEric Dumazet 			put_child(t, left, j, rtnl_dereference(inode->child[j]));
8320a5c0475SEric Dumazet 			put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
83319baf839SRobert Olsson 		}
83419baf839SRobert Olsson 		put_child(t, tn, 2*i, resize(t, left));
83519baf839SRobert Olsson 		put_child(t, tn, 2*i+1, resize(t, right));
83619baf839SRobert Olsson 
837e0f7cb8cSJarek Poplawski 		tnode_free_safe(inode);
83819baf839SRobert Olsson 	}
839e0f7cb8cSJarek Poplawski 	tnode_free_safe(oldtnode);
84019baf839SRobert Olsson 	return tn;
8412f80b3c8SRobert Olsson nomem:
8420a5c0475SEric Dumazet 	tnode_clean_free(tn);
8432f80b3c8SRobert Olsson 	return ERR_PTR(-ENOMEM);
8442f80b3c8SRobert Olsson }
84519baf839SRobert Olsson 
8462f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn)
84719baf839SRobert Olsson {
84819baf839SRobert Olsson 	struct tnode *oldtnode = tn;
849b299e4f0SDavid S. Miller 	struct rt_trie_node *left, *right;
85019baf839SRobert Olsson 	int i;
85119baf839SRobert Olsson 	int olen = tnode_child_length(tn);
85219baf839SRobert Olsson 
8530c7770c7SStephen Hemminger 	pr_debug("In halve\n");
85419baf839SRobert Olsson 
85519baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
85619baf839SRobert Olsson 
8572f80b3c8SRobert Olsson 	if (!tn)
8582f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8592f36895aSRobert Olsson 
8602f36895aSRobert Olsson 	/*
8612f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
8622f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
8632f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and halve
8642f36895aSRobert Olsson 	 * of tnode is ignored.
8652f36895aSRobert Olsson 	 */
8662f36895aSRobert Olsson 
8672f36895aSRobert Olsson 	for (i = 0; i < olen; i += 2) {
8682f36895aSRobert Olsson 		left = tnode_get_child(oldtnode, i);
8692f36895aSRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
8702f36895aSRobert Olsson 
8712f36895aSRobert Olsson 		/* Two nonempty children */
8722f36895aSRobert Olsson 		if (left && right) {
8732f80b3c8SRobert Olsson 			struct tnode *newn;
8742f36895aSRobert Olsson 
8752f80b3c8SRobert Olsson 			newn = tnode_new(left->key, tn->pos + tn->bits, 1);
8762f80b3c8SRobert Olsson 
8772f80b3c8SRobert Olsson 			if (!newn)
8782f80b3c8SRobert Olsson 				goto nomem;
8792f80b3c8SRobert Olsson 
880b299e4f0SDavid S. Miller 			put_child(t, tn, i/2, (struct rt_trie_node *)newn);
8812f36895aSRobert Olsson 		}
8822f36895aSRobert Olsson 
8832f36895aSRobert Olsson 	}
88419baf839SRobert Olsson 
88519baf839SRobert Olsson 	for (i = 0; i < olen; i += 2) {
88691b9a277SOlof Johansson 		struct tnode *newBinNode;
88791b9a277SOlof Johansson 
88819baf839SRobert Olsson 		left = tnode_get_child(oldtnode, i);
88919baf839SRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
89019baf839SRobert Olsson 
89119baf839SRobert Olsson 		/* At least one of the children is empty */
89219baf839SRobert Olsson 		if (left == NULL) {
89319baf839SRobert Olsson 			if (right == NULL)    /* Both are empty */
89419baf839SRobert Olsson 				continue;
89519baf839SRobert Olsson 			put_child(t, tn, i/2, right);
89691b9a277SOlof Johansson 			continue;
89791b9a277SOlof Johansson 		}
89891b9a277SOlof Johansson 
89991b9a277SOlof Johansson 		if (right == NULL) {
90019baf839SRobert Olsson 			put_child(t, tn, i/2, left);
90191b9a277SOlof Johansson 			continue;
90291b9a277SOlof Johansson 		}
90319baf839SRobert Olsson 
90419baf839SRobert Olsson 		/* Two nonempty children */
90591b9a277SOlof Johansson 		newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
9062f36895aSRobert Olsson 		put_child(t, tn, i/2, NULL);
90719baf839SRobert Olsson 		put_child(t, newBinNode, 0, left);
90819baf839SRobert Olsson 		put_child(t, newBinNode, 1, right);
90919baf839SRobert Olsson 		put_child(t, tn, i/2, resize(t, newBinNode));
91019baf839SRobert Olsson 	}
911e0f7cb8cSJarek Poplawski 	tnode_free_safe(oldtnode);
91219baf839SRobert Olsson 	return tn;
9132f80b3c8SRobert Olsson nomem:
9140a5c0475SEric Dumazet 	tnode_clean_free(tn);
9152f80b3c8SRobert Olsson 	return ERR_PTR(-ENOMEM);
9162f80b3c8SRobert Olsson }
91719baf839SRobert Olsson 
918772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines
9192373ce1cSRobert Olsson  via get_fa_head and dump */
9202373ce1cSRobert Olsson 
921772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
92219baf839SRobert Olsson {
923772cb712SRobert Olsson 	struct hlist_head *head = &l->list;
92419baf839SRobert Olsson 	struct hlist_node *node;
92519baf839SRobert Olsson 	struct leaf_info *li;
92619baf839SRobert Olsson 
9272373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, head, hlist)
92819baf839SRobert Olsson 		if (li->plen == plen)
92919baf839SRobert Olsson 			return li;
93091b9a277SOlof Johansson 
93119baf839SRobert Olsson 	return NULL;
93219baf839SRobert Olsson }
93319baf839SRobert Olsson 
93419baf839SRobert Olsson static inline struct list_head *get_fa_head(struct leaf *l, int plen)
93519baf839SRobert Olsson {
936772cb712SRobert Olsson 	struct leaf_info *li = find_leaf_info(l, plen);
93719baf839SRobert Olsson 
93891b9a277SOlof Johansson 	if (!li)
93991b9a277SOlof Johansson 		return NULL;
94019baf839SRobert Olsson 
94191b9a277SOlof Johansson 	return &li->falh;
94219baf839SRobert Olsson }
94319baf839SRobert Olsson 
94419baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
94519baf839SRobert Olsson {
94619baf839SRobert Olsson 	struct leaf_info *li = NULL, *last = NULL;
94791b9a277SOlof Johansson 	struct hlist_node *node;
94819baf839SRobert Olsson 
94991b9a277SOlof Johansson 	if (hlist_empty(head)) {
9502373ce1cSRobert Olsson 		hlist_add_head_rcu(&new->hlist, head);
95191b9a277SOlof Johansson 	} else {
95291b9a277SOlof Johansson 		hlist_for_each_entry(li, node, head, hlist) {
95319baf839SRobert Olsson 			if (new->plen > li->plen)
95419baf839SRobert Olsson 				break;
95519baf839SRobert Olsson 
95619baf839SRobert Olsson 			last = li;
95719baf839SRobert Olsson 		}
95819baf839SRobert Olsson 		if (last)
9592373ce1cSRobert Olsson 			hlist_add_after_rcu(&last->hlist, &new->hlist);
96019baf839SRobert Olsson 		else
9612373ce1cSRobert Olsson 			hlist_add_before_rcu(&new->hlist, &li->hlist);
96219baf839SRobert Olsson 	}
96319baf839SRobert Olsson }
96419baf839SRobert Olsson 
9652373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
9662373ce1cSRobert Olsson 
96719baf839SRobert Olsson static struct leaf *
96819baf839SRobert Olsson fib_find_node(struct trie *t, u32 key)
96919baf839SRobert Olsson {
97019baf839SRobert Olsson 	int pos;
97119baf839SRobert Olsson 	struct tnode *tn;
972b299e4f0SDavid S. Miller 	struct rt_trie_node *n;
97319baf839SRobert Olsson 
97419baf839SRobert Olsson 	pos = 0;
975a034ee3cSEric Dumazet 	n = rcu_dereference_rtnl(t->trie);
97619baf839SRobert Olsson 
97719baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
97819baf839SRobert Olsson 		tn = (struct tnode *) n;
97919baf839SRobert Olsson 
98019baf839SRobert Olsson 		check_tnode(tn);
98119baf839SRobert Olsson 
98219baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
98319baf839SRobert Olsson 			pos = tn->pos + tn->bits;
984a07f5f50SStephen Hemminger 			n = tnode_get_child_rcu(tn,
985a07f5f50SStephen Hemminger 						tkey_extract_bits(key,
986a07f5f50SStephen Hemminger 								  tn->pos,
987a07f5f50SStephen Hemminger 								  tn->bits));
98891b9a277SOlof Johansson 		} else
98919baf839SRobert Olsson 			break;
99019baf839SRobert Olsson 	}
99119baf839SRobert Olsson 	/* Case we have found a leaf. Compare prefixes */
99219baf839SRobert Olsson 
99391b9a277SOlof Johansson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
99491b9a277SOlof Johansson 		return (struct leaf *)n;
99591b9a277SOlof Johansson 
99619baf839SRobert Olsson 	return NULL;
99719baf839SRobert Olsson }
99819baf839SRobert Olsson 
9997b85576dSJarek Poplawski static void trie_rebalance(struct trie *t, struct tnode *tn)
100019baf839SRobert Olsson {
100119baf839SRobert Olsson 	int wasfull;
10023ed18d76SRobert Olsson 	t_key cindex, key;
100306801916SStephen Hemminger 	struct tnode *tp;
100419baf839SRobert Olsson 
10053ed18d76SRobert Olsson 	key = tn->key;
10063ed18d76SRobert Olsson 
1007b299e4f0SDavid S. Miller 	while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
100819baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
100919baf839SRobert Olsson 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
101019baf839SRobert Olsson 		tn = (struct tnode *) resize(t, (struct tnode *)tn);
1011a07f5f50SStephen Hemminger 
1012a07f5f50SStephen Hemminger 		tnode_put_child_reorg((struct tnode *)tp, cindex,
1013b299e4f0SDavid S. Miller 				      (struct rt_trie_node *)tn, wasfull);
101419baf839SRobert Olsson 
1015b299e4f0SDavid S. Miller 		tp = node_parent((struct rt_trie_node *) tn);
1016008440e3SJarek Poplawski 		if (!tp)
1017b299e4f0SDavid S. Miller 			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1018008440e3SJarek Poplawski 
1019e0f7cb8cSJarek Poplawski 		tnode_free_flush();
102006801916SStephen Hemminger 		if (!tp)
102119baf839SRobert Olsson 			break;
102206801916SStephen Hemminger 		tn = tp;
102319baf839SRobert Olsson 	}
102406801916SStephen Hemminger 
102519baf839SRobert Olsson 	/* Handle last (top) tnode */
10267b85576dSJarek Poplawski 	if (IS_TNODE(tn))
102719baf839SRobert Olsson 		tn = (struct tnode *)resize(t, (struct tnode *)tn);
102819baf839SRobert Olsson 
1029b299e4f0SDavid S. Miller 	rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
10307b85576dSJarek Poplawski 	tnode_free_flush();
103119baf839SRobert Olsson }
103219baf839SRobert Olsson 
10332373ce1cSRobert Olsson /* only used from updater-side */
10342373ce1cSRobert Olsson 
1035fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
103619baf839SRobert Olsson {
103719baf839SRobert Olsson 	int pos, newpos;
103819baf839SRobert Olsson 	struct tnode *tp = NULL, *tn = NULL;
1039b299e4f0SDavid S. Miller 	struct rt_trie_node *n;
104019baf839SRobert Olsson 	struct leaf *l;
104119baf839SRobert Olsson 	int missbit;
104219baf839SRobert Olsson 	struct list_head *fa_head = NULL;
104319baf839SRobert Olsson 	struct leaf_info *li;
104419baf839SRobert Olsson 	t_key cindex;
104519baf839SRobert Olsson 
104619baf839SRobert Olsson 	pos = 0;
10470a5c0475SEric Dumazet 	n = rtnl_dereference(t->trie);
104819baf839SRobert Olsson 
104919baf839SRobert Olsson 	/* If we point to NULL, stop. Either the tree is empty and we should
105019baf839SRobert Olsson 	 * just put a new leaf in if, or we have reached an empty child slot,
105119baf839SRobert Olsson 	 * and we should just put our new leaf in that.
105219baf839SRobert Olsson 	 * If we point to a T_TNODE, check if it matches our key. Note that
105319baf839SRobert Olsson 	 * a T_TNODE might be skipping any number of bits - its 'pos' need
105419baf839SRobert Olsson 	 * not be the parent's 'pos'+'bits'!
105519baf839SRobert Olsson 	 *
105619baf839SRobert Olsson 	 * If it does match the current key, get pos/bits from it, extract
105719baf839SRobert Olsson 	 * the index from our key, push the T_TNODE and walk the tree.
105819baf839SRobert Olsson 	 *
105919baf839SRobert Olsson 	 * If it doesn't, we have to replace it with a new T_TNODE.
106019baf839SRobert Olsson 	 *
106119baf839SRobert Olsson 	 * If we point to a T_LEAF, it might or might not have the same key
106219baf839SRobert Olsson 	 * as we do. If it does, just change the value, update the T_LEAF's
106319baf839SRobert Olsson 	 * value, and return it.
106419baf839SRobert Olsson 	 * If it doesn't, we need to replace it with a T_TNODE.
106519baf839SRobert Olsson 	 */
106619baf839SRobert Olsson 
106719baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
106819baf839SRobert Olsson 		tn = (struct tnode *) n;
106919baf839SRobert Olsson 
107019baf839SRobert Olsson 		check_tnode(tn);
107119baf839SRobert Olsson 
107219baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
107319baf839SRobert Olsson 			tp = tn;
107419baf839SRobert Olsson 			pos = tn->pos + tn->bits;
1075a07f5f50SStephen Hemminger 			n = tnode_get_child(tn,
1076a07f5f50SStephen Hemminger 					    tkey_extract_bits(key,
1077a07f5f50SStephen Hemminger 							      tn->pos,
1078a07f5f50SStephen Hemminger 							      tn->bits));
107919baf839SRobert Olsson 
108006801916SStephen Hemminger 			BUG_ON(n && node_parent(n) != tn);
108191b9a277SOlof Johansson 		} else
108219baf839SRobert Olsson 			break;
108319baf839SRobert Olsson 	}
108419baf839SRobert Olsson 
108519baf839SRobert Olsson 	/*
108619baf839SRobert Olsson 	 * n  ----> NULL, LEAF or TNODE
108719baf839SRobert Olsson 	 *
108819baf839SRobert Olsson 	 * tp is n's (parent) ----> NULL or TNODE
108919baf839SRobert Olsson 	 */
109019baf839SRobert Olsson 
109191b9a277SOlof Johansson 	BUG_ON(tp && IS_LEAF(tp));
109219baf839SRobert Olsson 
109319baf839SRobert Olsson 	/* Case 1: n is a leaf. Compare prefixes */
109419baf839SRobert Olsson 
109519baf839SRobert Olsson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1096c95aaf9aSStephen Hemminger 		l = (struct leaf *) n;
109719baf839SRobert Olsson 		li = leaf_info_new(plen);
109819baf839SRobert Olsson 
1099fea86ad8SStephen Hemminger 		if (!li)
1100fea86ad8SStephen Hemminger 			return NULL;
110119baf839SRobert Olsson 
110219baf839SRobert Olsson 		fa_head = &li->falh;
110319baf839SRobert Olsson 		insert_leaf_info(&l->list, li);
110419baf839SRobert Olsson 		goto done;
110519baf839SRobert Olsson 	}
110619baf839SRobert Olsson 	l = leaf_new();
110719baf839SRobert Olsson 
1108fea86ad8SStephen Hemminger 	if (!l)
1109fea86ad8SStephen Hemminger 		return NULL;
111019baf839SRobert Olsson 
111119baf839SRobert Olsson 	l->key = key;
111219baf839SRobert Olsson 	li = leaf_info_new(plen);
111319baf839SRobert Olsson 
1114f835e471SRobert Olsson 	if (!li) {
1115387a5487SStephen Hemminger 		free_leaf(l);
1116fea86ad8SStephen Hemminger 		return NULL;
1117f835e471SRobert Olsson 	}
111819baf839SRobert Olsson 
111919baf839SRobert Olsson 	fa_head = &li->falh;
112019baf839SRobert Olsson 	insert_leaf_info(&l->list, li);
112119baf839SRobert Olsson 
112219baf839SRobert Olsson 	if (t->trie && n == NULL) {
112391b9a277SOlof Johansson 		/* Case 2: n is NULL, and will just insert a new leaf */
112419baf839SRobert Olsson 
1125b299e4f0SDavid S. Miller 		node_set_parent((struct rt_trie_node *)l, tp);
112619baf839SRobert Olsson 
112719baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1128b299e4f0SDavid S. Miller 		put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l);
112991b9a277SOlof Johansson 	} else {
113019baf839SRobert Olsson 		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
113119baf839SRobert Olsson 		/*
113219baf839SRobert Olsson 		 *  Add a new tnode here
113319baf839SRobert Olsson 		 *  first tnode need some special handling
113419baf839SRobert Olsson 		 */
113519baf839SRobert Olsson 
113619baf839SRobert Olsson 		if (tp)
113719baf839SRobert Olsson 			pos = tp->pos+tp->bits;
113819baf839SRobert Olsson 		else
113919baf839SRobert Olsson 			pos = 0;
114091b9a277SOlof Johansson 
114119baf839SRobert Olsson 		if (n) {
114219baf839SRobert Olsson 			newpos = tkey_mismatch(key, pos, n->key);
114319baf839SRobert Olsson 			tn = tnode_new(n->key, newpos, 1);
114491b9a277SOlof Johansson 		} else {
114519baf839SRobert Olsson 			newpos = 0;
114619baf839SRobert Olsson 			tn = tnode_new(key, newpos, 1); /* First tnode */
114719baf839SRobert Olsson 		}
1148f835e471SRobert Olsson 
1149f835e471SRobert Olsson 		if (!tn) {
1150f835e471SRobert Olsson 			free_leaf_info(li);
1151387a5487SStephen Hemminger 			free_leaf(l);
1152fea86ad8SStephen Hemminger 			return NULL;
1153f835e471SRobert Olsson 		}
115419baf839SRobert Olsson 
1155b299e4f0SDavid S. Miller 		node_set_parent((struct rt_trie_node *)tn, tp);
115619baf839SRobert Olsson 
115719baf839SRobert Olsson 		missbit = tkey_extract_bits(key, newpos, 1);
1158b299e4f0SDavid S. Miller 		put_child(t, tn, missbit, (struct rt_trie_node *)l);
115919baf839SRobert Olsson 		put_child(t, tn, 1-missbit, n);
116019baf839SRobert Olsson 
116119baf839SRobert Olsson 		if (tp) {
116219baf839SRobert Olsson 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1163a07f5f50SStephen Hemminger 			put_child(t, (struct tnode *)tp, cindex,
1164b299e4f0SDavid S. Miller 				  (struct rt_trie_node *)tn);
116591b9a277SOlof Johansson 		} else {
1166b299e4f0SDavid S. Miller 			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
116719baf839SRobert Olsson 			tp = tn;
116819baf839SRobert Olsson 		}
116919baf839SRobert Olsson 	}
117091b9a277SOlof Johansson 
117191b9a277SOlof Johansson 	if (tp && tp->pos + tp->bits > 32)
1172a07f5f50SStephen Hemminger 		pr_warning("fib_trie"
1173a07f5f50SStephen Hemminger 			   " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
117419baf839SRobert Olsson 			   tp, tp->pos, tp->bits, key, plen);
117591b9a277SOlof Johansson 
117619baf839SRobert Olsson 	/* Rebalance the trie */
11772373ce1cSRobert Olsson 
11787b85576dSJarek Poplawski 	trie_rebalance(t, tp);
1179f835e471SRobert Olsson done:
118019baf839SRobert Olsson 	return fa_head;
118119baf839SRobert Olsson }
118219baf839SRobert Olsson 
1183d562f1f8SRobert Olsson /*
1184d562f1f8SRobert Olsson  * Caller must hold RTNL.
1185d562f1f8SRobert Olsson  */
118616c6cf8bSStephen Hemminger int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
118719baf839SRobert Olsson {
118819baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
118919baf839SRobert Olsson 	struct fib_alias *fa, *new_fa;
119019baf839SRobert Olsson 	struct list_head *fa_head = NULL;
119119baf839SRobert Olsson 	struct fib_info *fi;
11924e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
11934e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
119419baf839SRobert Olsson 	u32 key, mask;
119519baf839SRobert Olsson 	int err;
119619baf839SRobert Olsson 	struct leaf *l;
119719baf839SRobert Olsson 
119819baf839SRobert Olsson 	if (plen > 32)
119919baf839SRobert Olsson 		return -EINVAL;
120019baf839SRobert Olsson 
12014e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
120219baf839SRobert Olsson 
12032dfe55b4SPatrick McHardy 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
120419baf839SRobert Olsson 
120519baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
120619baf839SRobert Olsson 
120719baf839SRobert Olsson 	if (key & ~mask)
120819baf839SRobert Olsson 		return -EINVAL;
120919baf839SRobert Olsson 
121019baf839SRobert Olsson 	key = key & mask;
121119baf839SRobert Olsson 
12124e902c57SThomas Graf 	fi = fib_create_info(cfg);
12134e902c57SThomas Graf 	if (IS_ERR(fi)) {
12144e902c57SThomas Graf 		err = PTR_ERR(fi);
121519baf839SRobert Olsson 		goto err;
12164e902c57SThomas Graf 	}
121719baf839SRobert Olsson 
121819baf839SRobert Olsson 	l = fib_find_node(t, key);
121919baf839SRobert Olsson 	fa = NULL;
122019baf839SRobert Olsson 
122119baf839SRobert Olsson 	if (l) {
122219baf839SRobert Olsson 		fa_head = get_fa_head(l, plen);
122319baf839SRobert Olsson 		fa = fib_find_alias(fa_head, tos, fi->fib_priority);
122419baf839SRobert Olsson 	}
122519baf839SRobert Olsson 
122619baf839SRobert Olsson 	/* Now fa, if non-NULL, points to the first fib alias
122719baf839SRobert Olsson 	 * with the same keys [prefix,tos,priority], if such key already
122819baf839SRobert Olsson 	 * exists or to the node before which we will insert new one.
122919baf839SRobert Olsson 	 *
123019baf839SRobert Olsson 	 * If fa is NULL, we will need to allocate a new one and
123119baf839SRobert Olsson 	 * insert to the head of f.
123219baf839SRobert Olsson 	 *
123319baf839SRobert Olsson 	 * If f is NULL, no fib node matched the destination key
123419baf839SRobert Olsson 	 * and we need to allocate a new one of those as well.
123519baf839SRobert Olsson 	 */
123619baf839SRobert Olsson 
1237936f6f8eSJulian Anastasov 	if (fa && fa->fa_tos == tos &&
1238936f6f8eSJulian Anastasov 	    fa->fa_info->fib_priority == fi->fib_priority) {
1239936f6f8eSJulian Anastasov 		struct fib_alias *fa_first, *fa_match;
124019baf839SRobert Olsson 
124119baf839SRobert Olsson 		err = -EEXIST;
12424e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_EXCL)
124319baf839SRobert Olsson 			goto out;
124419baf839SRobert Olsson 
1245936f6f8eSJulian Anastasov 		/* We have 2 goals:
1246936f6f8eSJulian Anastasov 		 * 1. Find exact match for type, scope, fib_info to avoid
1247936f6f8eSJulian Anastasov 		 * duplicate routes
1248936f6f8eSJulian Anastasov 		 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1249936f6f8eSJulian Anastasov 		 */
1250936f6f8eSJulian Anastasov 		fa_match = NULL;
1251936f6f8eSJulian Anastasov 		fa_first = fa;
1252936f6f8eSJulian Anastasov 		fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1253936f6f8eSJulian Anastasov 		list_for_each_entry_continue(fa, fa_head, fa_list) {
1254936f6f8eSJulian Anastasov 			if (fa->fa_tos != tos)
1255936f6f8eSJulian Anastasov 				break;
1256936f6f8eSJulian Anastasov 			if (fa->fa_info->fib_priority != fi->fib_priority)
1257936f6f8eSJulian Anastasov 				break;
1258936f6f8eSJulian Anastasov 			if (fa->fa_type == cfg->fc_type &&
1259936f6f8eSJulian Anastasov 			    fa->fa_info == fi) {
1260936f6f8eSJulian Anastasov 				fa_match = fa;
1261936f6f8eSJulian Anastasov 				break;
1262936f6f8eSJulian Anastasov 			}
1263936f6f8eSJulian Anastasov 		}
1264936f6f8eSJulian Anastasov 
12654e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_REPLACE) {
126619baf839SRobert Olsson 			struct fib_info *fi_drop;
126719baf839SRobert Olsson 			u8 state;
126819baf839SRobert Olsson 
1269936f6f8eSJulian Anastasov 			fa = fa_first;
1270936f6f8eSJulian Anastasov 			if (fa_match) {
1271936f6f8eSJulian Anastasov 				if (fa == fa_match)
1272936f6f8eSJulian Anastasov 					err = 0;
12736725033fSJoonwoo Park 				goto out;
1274936f6f8eSJulian Anastasov 			}
12752373ce1cSRobert Olsson 			err = -ENOBUFS;
1276e94b1766SChristoph Lameter 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
12772373ce1cSRobert Olsson 			if (new_fa == NULL)
12782373ce1cSRobert Olsson 				goto out;
127919baf839SRobert Olsson 
128019baf839SRobert Olsson 			fi_drop = fa->fa_info;
12812373ce1cSRobert Olsson 			new_fa->fa_tos = fa->fa_tos;
12822373ce1cSRobert Olsson 			new_fa->fa_info = fi;
12834e902c57SThomas Graf 			new_fa->fa_type = cfg->fc_type;
128419baf839SRobert Olsson 			state = fa->fa_state;
1285936f6f8eSJulian Anastasov 			new_fa->fa_state = state & ~FA_S_ACCESSED;
128619baf839SRobert Olsson 
12872373ce1cSRobert Olsson 			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
12882373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
128919baf839SRobert Olsson 
129019baf839SRobert Olsson 			fib_release_info(fi_drop);
129119baf839SRobert Olsson 			if (state & FA_S_ACCESSED)
129276e6ebfbSDenis V. Lunev 				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
1293b8f55831SMilan Kocian 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1294b8f55831SMilan Kocian 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
129519baf839SRobert Olsson 
129619baf839SRobert Olsson 			goto succeeded;
129719baf839SRobert Olsson 		}
129819baf839SRobert Olsson 		/* Error if we find a perfect match which
129919baf839SRobert Olsson 		 * uses the same scope, type, and nexthop
130019baf839SRobert Olsson 		 * information.
130119baf839SRobert Olsson 		 */
1302936f6f8eSJulian Anastasov 		if (fa_match)
130319baf839SRobert Olsson 			goto out;
1304a07f5f50SStephen Hemminger 
13054e902c57SThomas Graf 		if (!(cfg->fc_nlflags & NLM_F_APPEND))
1306936f6f8eSJulian Anastasov 			fa = fa_first;
130719baf839SRobert Olsson 	}
130819baf839SRobert Olsson 	err = -ENOENT;
13094e902c57SThomas Graf 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
131019baf839SRobert Olsson 		goto out;
131119baf839SRobert Olsson 
131219baf839SRobert Olsson 	err = -ENOBUFS;
1313e94b1766SChristoph Lameter 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
131419baf839SRobert Olsson 	if (new_fa == NULL)
131519baf839SRobert Olsson 		goto out;
131619baf839SRobert Olsson 
131719baf839SRobert Olsson 	new_fa->fa_info = fi;
131819baf839SRobert Olsson 	new_fa->fa_tos = tos;
13194e902c57SThomas Graf 	new_fa->fa_type = cfg->fc_type;
132019baf839SRobert Olsson 	new_fa->fa_state = 0;
132119baf839SRobert Olsson 	/*
132219baf839SRobert Olsson 	 * Insert new entry to the list.
132319baf839SRobert Olsson 	 */
132419baf839SRobert Olsson 
1325f835e471SRobert Olsson 	if (!fa_head) {
1326fea86ad8SStephen Hemminger 		fa_head = fib_insert_node(t, key, plen);
1327fea86ad8SStephen Hemminger 		if (unlikely(!fa_head)) {
1328fea86ad8SStephen Hemminger 			err = -ENOMEM;
1329f835e471SRobert Olsson 			goto out_free_new_fa;
1330f835e471SRobert Olsson 		}
1331fea86ad8SStephen Hemminger 	}
133219baf839SRobert Olsson 
133321d8c49eSDavid S. Miller 	if (!plen)
133421d8c49eSDavid S. Miller 		tb->tb_num_default++;
133521d8c49eSDavid S. Miller 
13362373ce1cSRobert Olsson 	list_add_tail_rcu(&new_fa->fa_list,
13372373ce1cSRobert Olsson 			  (fa ? &fa->fa_list : fa_head));
133819baf839SRobert Olsson 
133976e6ebfbSDenis V. Lunev 	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
13404e902c57SThomas Graf 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1341b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
134219baf839SRobert Olsson succeeded:
134319baf839SRobert Olsson 	return 0;
1344f835e471SRobert Olsson 
1345f835e471SRobert Olsson out_free_new_fa:
1346f835e471SRobert Olsson 	kmem_cache_free(fn_alias_kmem, new_fa);
134719baf839SRobert Olsson out:
134819baf839SRobert Olsson 	fib_release_info(fi);
134991b9a277SOlof Johansson err:
135019baf839SRobert Olsson 	return err;
135119baf839SRobert Olsson }
135219baf839SRobert Olsson 
1353772cb712SRobert Olsson /* should be called with rcu_read_lock */
13545b470441SDavid S. Miller static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
135522bd5b9bSDavid S. Miller 		      t_key key,  const struct flowi4 *flp,
1356ebc0ffaeSEric Dumazet 		      struct fib_result *res, int fib_flags)
135719baf839SRobert Olsson {
135819baf839SRobert Olsson 	struct leaf_info *li;
135919baf839SRobert Olsson 	struct hlist_head *hhead = &l->list;
136019baf839SRobert Olsson 	struct hlist_node *node;
136119baf839SRobert Olsson 
13622373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
13633be0686bSDavid S. Miller 		struct fib_alias *fa;
1364a07f5f50SStephen Hemminger 
13655c74501fSEric Dumazet 		if (l->key != (key & li->mask_plen))
136619baf839SRobert Olsson 			continue;
136719baf839SRobert Olsson 
13683be0686bSDavid S. Miller 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
13693be0686bSDavid S. Miller 			struct fib_info *fi = fa->fa_info;
13703be0686bSDavid S. Miller 			int nhsel, err;
1371a07f5f50SStephen Hemminger 
137222bd5b9bSDavid S. Miller 			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
13733be0686bSDavid S. Miller 				continue;
137437e826c5SDavid S. Miller 			if (fa->fa_info->fib_scope < flp->flowi4_scope)
13753be0686bSDavid S. Miller 				continue;
13763be0686bSDavid S. Miller 			fib_alias_accessed(fa);
13773be0686bSDavid S. Miller 			err = fib_props[fa->fa_type].error;
13783be0686bSDavid S. Miller 			if (err) {
137919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
13801fbc7843SJulian Anastasov 				t->stats.semantic_match_passed++;
138119baf839SRobert Olsson #endif
13821fbc7843SJulian Anastasov 				return err;
13833be0686bSDavid S. Miller 			}
13843be0686bSDavid S. Miller 			if (fi->fib_flags & RTNH_F_DEAD)
13853be0686bSDavid S. Miller 				continue;
13863be0686bSDavid S. Miller 			for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
13873be0686bSDavid S. Miller 				const struct fib_nh *nh = &fi->fib_nh[nhsel];
13883be0686bSDavid S. Miller 
13893be0686bSDavid S. Miller 				if (nh->nh_flags & RTNH_F_DEAD)
13903be0686bSDavid S. Miller 					continue;
139122bd5b9bSDavid S. Miller 				if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
13923be0686bSDavid S. Miller 					continue;
13933be0686bSDavid S. Miller 
13943be0686bSDavid S. Miller #ifdef CONFIG_IP_FIB_TRIE_STATS
13953be0686bSDavid S. Miller 				t->stats.semantic_match_passed++;
13963be0686bSDavid S. Miller #endif
13975c74501fSEric Dumazet 				res->prefixlen = li->plen;
13983be0686bSDavid S. Miller 				res->nh_sel = nhsel;
13993be0686bSDavid S. Miller 				res->type = fa->fa_type;
140037e826c5SDavid S. Miller 				res->scope = fa->fa_info->fib_scope;
14013be0686bSDavid S. Miller 				res->fi = fi;
14023be0686bSDavid S. Miller 				res->table = tb;
14033be0686bSDavid S. Miller 				res->fa_head = &li->falh;
14043be0686bSDavid S. Miller 				if (!(fib_flags & FIB_LOOKUP_NOREF))
14055c74501fSEric Dumazet 					atomic_inc(&fi->fib_clntref);
14063be0686bSDavid S. Miller 				return 0;
14073be0686bSDavid S. Miller 			}
14083be0686bSDavid S. Miller 		}
14093be0686bSDavid S. Miller 
14103be0686bSDavid S. Miller #ifdef CONFIG_IP_FIB_TRIE_STATS
14113be0686bSDavid S. Miller 		t->stats.semantic_match_miss++;
14123be0686bSDavid S. Miller #endif
141319baf839SRobert Olsson 	}
141419baf839SRobert Olsson 
14152e655571SBen Hutchings 	return 1;
1416a07f5f50SStephen Hemminger }
1417a07f5f50SStephen Hemminger 
141822bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1419ebc0ffaeSEric Dumazet 		     struct fib_result *res, int fib_flags)
142019baf839SRobert Olsson {
142119baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
14222e655571SBen Hutchings 	int ret;
1423b299e4f0SDavid S. Miller 	struct rt_trie_node *n;
142419baf839SRobert Olsson 	struct tnode *pn;
14253b004569SDavid S. Miller 	unsigned int pos, bits;
142622bd5b9bSDavid S. Miller 	t_key key = ntohl(flp->daddr);
14273b004569SDavid S. Miller 	unsigned int chopped_off;
142819baf839SRobert Olsson 	t_key cindex = 0;
14293b004569SDavid S. Miller 	unsigned int current_prefix_length = KEYLENGTH;
143091b9a277SOlof Johansson 	struct tnode *cn;
1431874ffa8fSEric Dumazet 	t_key pref_mismatch;
143291b9a277SOlof Johansson 
14332373ce1cSRobert Olsson 	rcu_read_lock();
143419baf839SRobert Olsson 
14352373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
143619baf839SRobert Olsson 	if (!n)
143719baf839SRobert Olsson 		goto failed;
143819baf839SRobert Olsson 
143919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
144019baf839SRobert Olsson 	t->stats.gets++;
144119baf839SRobert Olsson #endif
144219baf839SRobert Olsson 
144319baf839SRobert Olsson 	/* Just a leaf? */
144419baf839SRobert Olsson 	if (IS_LEAF(n)) {
14455b470441SDavid S. Miller 		ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1446a07f5f50SStephen Hemminger 		goto found;
144719baf839SRobert Olsson 	}
1448a07f5f50SStephen Hemminger 
144919baf839SRobert Olsson 	pn = (struct tnode *) n;
145019baf839SRobert Olsson 	chopped_off = 0;
145119baf839SRobert Olsson 
145219baf839SRobert Olsson 	while (pn) {
145319baf839SRobert Olsson 		pos = pn->pos;
145419baf839SRobert Olsson 		bits = pn->bits;
145519baf839SRobert Olsson 
145619baf839SRobert Olsson 		if (!chopped_off)
1457ab66b4a7SStephen Hemminger 			cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1458ab66b4a7SStephen Hemminger 						   pos, bits);
145919baf839SRobert Olsson 
1460b902e573SJarek Poplawski 		n = tnode_get_child_rcu(pn, cindex);
146119baf839SRobert Olsson 
146219baf839SRobert Olsson 		if (n == NULL) {
146319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
146419baf839SRobert Olsson 			t->stats.null_node_hit++;
146519baf839SRobert Olsson #endif
146619baf839SRobert Olsson 			goto backtrace;
146719baf839SRobert Olsson 		}
146819baf839SRobert Olsson 
146991b9a277SOlof Johansson 		if (IS_LEAF(n)) {
14705b470441SDavid S. Miller 			ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
14712e655571SBen Hutchings 			if (ret > 0)
147291b9a277SOlof Johansson 				goto backtrace;
1473a07f5f50SStephen Hemminger 			goto found;
147491b9a277SOlof Johansson 		}
147591b9a277SOlof Johansson 
147691b9a277SOlof Johansson 		cn = (struct tnode *)n;
147719baf839SRobert Olsson 
147819baf839SRobert Olsson 		/*
147919baf839SRobert Olsson 		 * It's a tnode, and we can do some extra checks here if we
148019baf839SRobert Olsson 		 * like, to avoid descending into a dead-end branch.
148119baf839SRobert Olsson 		 * This tnode is in the parent's child array at index
148219baf839SRobert Olsson 		 * key[p_pos..p_pos+p_bits] but potentially with some bits
148319baf839SRobert Olsson 		 * chopped off, so in reality the index may be just a
148419baf839SRobert Olsson 		 * subprefix, padded with zero at the end.
148519baf839SRobert Olsson 		 * We can also take a look at any skipped bits in this
148619baf839SRobert Olsson 		 * tnode - everything up to p_pos is supposed to be ok,
148719baf839SRobert Olsson 		 * and the non-chopped bits of the index (se previous
148819baf839SRobert Olsson 		 * paragraph) are also guaranteed ok, but the rest is
148919baf839SRobert Olsson 		 * considered unknown.
149019baf839SRobert Olsson 		 *
149119baf839SRobert Olsson 		 * The skipped bits are key[pos+bits..cn->pos].
149219baf839SRobert Olsson 		 */
149319baf839SRobert Olsson 
149419baf839SRobert Olsson 		/* If current_prefix_length < pos+bits, we are already doing
149519baf839SRobert Olsson 		 * actual prefix  matching, which means everything from
149619baf839SRobert Olsson 		 * pos+(bits-chopped_off) onward must be zero along some
149719baf839SRobert Olsson 		 * branch of this subtree - otherwise there is *no* valid
149819baf839SRobert Olsson 		 * prefix present. Here we can only check the skipped
149919baf839SRobert Olsson 		 * bits. Remember, since we have already indexed into the
150019baf839SRobert Olsson 		 * parent's child array, we know that the bits we chopped of
150119baf839SRobert Olsson 		 * *are* zero.
150219baf839SRobert Olsson 		 */
150319baf839SRobert Olsson 
1504a07f5f50SStephen Hemminger 		/* NOTA BENE: Checking only skipped bits
1505a07f5f50SStephen Hemminger 		   for the new node here */
150619baf839SRobert Olsson 
150719baf839SRobert Olsson 		if (current_prefix_length < pos+bits) {
150819baf839SRobert Olsson 			if (tkey_extract_bits(cn->key, current_prefix_length,
1509a07f5f50SStephen Hemminger 						cn->pos - current_prefix_length)
1510a07f5f50SStephen Hemminger 			    || !(cn->child[0]))
151119baf839SRobert Olsson 				goto backtrace;
151219baf839SRobert Olsson 		}
151319baf839SRobert Olsson 
151419baf839SRobert Olsson 		/*
151519baf839SRobert Olsson 		 * If chopped_off=0, the index is fully validated and we
151619baf839SRobert Olsson 		 * only need to look at the skipped bits for this, the new,
151719baf839SRobert Olsson 		 * tnode. What we actually want to do is to find out if
151819baf839SRobert Olsson 		 * these skipped bits match our key perfectly, or if we will
151919baf839SRobert Olsson 		 * have to count on finding a matching prefix further down,
152019baf839SRobert Olsson 		 * because if we do, we would like to have some way of
152119baf839SRobert Olsson 		 * verifying the existence of such a prefix at this point.
152219baf839SRobert Olsson 		 */
152319baf839SRobert Olsson 
152419baf839SRobert Olsson 		/* The only thing we can do at this point is to verify that
152519baf839SRobert Olsson 		 * any such matching prefix can indeed be a prefix to our
152619baf839SRobert Olsson 		 * key, and if the bits in the node we are inspecting that
152719baf839SRobert Olsson 		 * do not match our key are not ZERO, this cannot be true.
152819baf839SRobert Olsson 		 * Thus, find out where there is a mismatch (before cn->pos)
152919baf839SRobert Olsson 		 * and verify that all the mismatching bits are zero in the
153019baf839SRobert Olsson 		 * new tnode's key.
153119baf839SRobert Olsson 		 */
153219baf839SRobert Olsson 
1533a07f5f50SStephen Hemminger 		/*
1534a07f5f50SStephen Hemminger 		 * Note: We aren't very concerned about the piece of
1535a07f5f50SStephen Hemminger 		 * the key that precede pn->pos+pn->bits, since these
1536a07f5f50SStephen Hemminger 		 * have already been checked. The bits after cn->pos
1537a07f5f50SStephen Hemminger 		 * aren't checked since these are by definition
1538a07f5f50SStephen Hemminger 		 * "unknown" at this point. Thus, what we want to see
1539a07f5f50SStephen Hemminger 		 * is if we are about to enter the "prefix matching"
1540a07f5f50SStephen Hemminger 		 * state, and in that case verify that the skipped
1541a07f5f50SStephen Hemminger 		 * bits that will prevail throughout this subtree are
1542a07f5f50SStephen Hemminger 		 * zero, as they have to be if we are to find a
1543a07f5f50SStephen Hemminger 		 * matching prefix.
154419baf839SRobert Olsson 		 */
154519baf839SRobert Olsson 
1546874ffa8fSEric Dumazet 		pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
154719baf839SRobert Olsson 
1548a07f5f50SStephen Hemminger 		/*
1549a07f5f50SStephen Hemminger 		 * In short: If skipped bits in this node do not match
1550a07f5f50SStephen Hemminger 		 * the search key, enter the "prefix matching"
1551a07f5f50SStephen Hemminger 		 * state.directly.
155219baf839SRobert Olsson 		 */
155319baf839SRobert Olsson 		if (pref_mismatch) {
1554874ffa8fSEric Dumazet 			int mp = KEYLENGTH - fls(pref_mismatch);
155519baf839SRobert Olsson 
1556874ffa8fSEric Dumazet 			if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
155719baf839SRobert Olsson 				goto backtrace;
155819baf839SRobert Olsson 
155919baf839SRobert Olsson 			if (current_prefix_length >= cn->pos)
156019baf839SRobert Olsson 				current_prefix_length = mp;
156119baf839SRobert Olsson 		}
1562a07f5f50SStephen Hemminger 
156319baf839SRobert Olsson 		pn = (struct tnode *)n; /* Descend */
156419baf839SRobert Olsson 		chopped_off = 0;
156519baf839SRobert Olsson 		continue;
156691b9a277SOlof Johansson 
156719baf839SRobert Olsson backtrace:
156819baf839SRobert Olsson 		chopped_off++;
156919baf839SRobert Olsson 
157019baf839SRobert Olsson 		/* As zero don't change the child key (cindex) */
1571a07f5f50SStephen Hemminger 		while ((chopped_off <= pn->bits)
1572a07f5f50SStephen Hemminger 		       && !(cindex & (1<<(chopped_off-1))))
157319baf839SRobert Olsson 			chopped_off++;
157419baf839SRobert Olsson 
157519baf839SRobert Olsson 		/* Decrease current_... with bits chopped off */
157619baf839SRobert Olsson 		if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1577a07f5f50SStephen Hemminger 			current_prefix_length = pn->pos + pn->bits
1578a07f5f50SStephen Hemminger 				- chopped_off;
157919baf839SRobert Olsson 
158019baf839SRobert Olsson 		/*
158119baf839SRobert Olsson 		 * Either we do the actual chop off according or if we have
158219baf839SRobert Olsson 		 * chopped off all bits in this tnode walk up to our parent.
158319baf839SRobert Olsson 		 */
158419baf839SRobert Olsson 
158591b9a277SOlof Johansson 		if (chopped_off <= pn->bits) {
158619baf839SRobert Olsson 			cindex &= ~(1 << (chopped_off-1));
158791b9a277SOlof Johansson 		} else {
1588b299e4f0SDavid S. Miller 			struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
158906801916SStephen Hemminger 			if (!parent)
159019baf839SRobert Olsson 				goto failed;
159119baf839SRobert Olsson 
159219baf839SRobert Olsson 			/* Get Child's index */
159306801916SStephen Hemminger 			cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
159406801916SStephen Hemminger 			pn = parent;
159519baf839SRobert Olsson 			chopped_off = 0;
159619baf839SRobert Olsson 
159719baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
159819baf839SRobert Olsson 			t->stats.backtrack++;
159919baf839SRobert Olsson #endif
160019baf839SRobert Olsson 			goto backtrace;
160119baf839SRobert Olsson 		}
160219baf839SRobert Olsson 	}
160319baf839SRobert Olsson failed:
160419baf839SRobert Olsson 	ret = 1;
160519baf839SRobert Olsson found:
16062373ce1cSRobert Olsson 	rcu_read_unlock();
160719baf839SRobert Olsson 	return ret;
160819baf839SRobert Olsson }
160919baf839SRobert Olsson 
161019baf839SRobert Olsson /*
16119195bef7SStephen Hemminger  * Remove the leaf and return parent.
161219baf839SRobert Olsson  */
16139195bef7SStephen Hemminger static void trie_leaf_remove(struct trie *t, struct leaf *l)
16149195bef7SStephen Hemminger {
1615b299e4f0SDavid S. Miller 	struct tnode *tp = node_parent((struct rt_trie_node *) l);
16169195bef7SStephen Hemminger 
16179195bef7SStephen Hemminger 	pr_debug("entering trie_leaf_remove(%p)\n", l);
161819baf839SRobert Olsson 
161919baf839SRobert Olsson 	if (tp) {
16209195bef7SStephen Hemminger 		t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
162119baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, NULL);
16227b85576dSJarek Poplawski 		trie_rebalance(t, tp);
162391b9a277SOlof Johansson 	} else
16242373ce1cSRobert Olsson 		rcu_assign_pointer(t->trie, NULL);
162519baf839SRobert Olsson 
1626387a5487SStephen Hemminger 	free_leaf(l);
162719baf839SRobert Olsson }
162819baf839SRobert Olsson 
1629d562f1f8SRobert Olsson /*
1630d562f1f8SRobert Olsson  * Caller must hold RTNL.
1631d562f1f8SRobert Olsson  */
163216c6cf8bSStephen Hemminger int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
163319baf839SRobert Olsson {
163419baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
163519baf839SRobert Olsson 	u32 key, mask;
16364e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
16374e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
163819baf839SRobert Olsson 	struct fib_alias *fa, *fa_to_delete;
163919baf839SRobert Olsson 	struct list_head *fa_head;
164019baf839SRobert Olsson 	struct leaf *l;
164191b9a277SOlof Johansson 	struct leaf_info *li;
164291b9a277SOlof Johansson 
164319baf839SRobert Olsson 	if (plen > 32)
164419baf839SRobert Olsson 		return -EINVAL;
164519baf839SRobert Olsson 
16464e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
164719baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
164819baf839SRobert Olsson 
164919baf839SRobert Olsson 	if (key & ~mask)
165019baf839SRobert Olsson 		return -EINVAL;
165119baf839SRobert Olsson 
165219baf839SRobert Olsson 	key = key & mask;
165319baf839SRobert Olsson 	l = fib_find_node(t, key);
165419baf839SRobert Olsson 
165519baf839SRobert Olsson 	if (!l)
165619baf839SRobert Olsson 		return -ESRCH;
165719baf839SRobert Olsson 
165819baf839SRobert Olsson 	fa_head = get_fa_head(l, plen);
165919baf839SRobert Olsson 	fa = fib_find_alias(fa_head, tos, 0);
166019baf839SRobert Olsson 
166119baf839SRobert Olsson 	if (!fa)
166219baf839SRobert Olsson 		return -ESRCH;
166319baf839SRobert Olsson 
16640c7770c7SStephen Hemminger 	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
166519baf839SRobert Olsson 
166619baf839SRobert Olsson 	fa_to_delete = NULL;
1667936f6f8eSJulian Anastasov 	fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1668936f6f8eSJulian Anastasov 	list_for_each_entry_continue(fa, fa_head, fa_list) {
166919baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
167019baf839SRobert Olsson 
167119baf839SRobert Olsson 		if (fa->fa_tos != tos)
167219baf839SRobert Olsson 			break;
167319baf839SRobert Olsson 
16744e902c57SThomas Graf 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
16754e902c57SThomas Graf 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
167637e826c5SDavid S. Miller 		     fa->fa_info->fib_scope == cfg->fc_scope) &&
167774cb3c10SJulian Anastasov 		    (!cfg->fc_prefsrc ||
167874cb3c10SJulian Anastasov 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
16794e902c57SThomas Graf 		    (!cfg->fc_protocol ||
16804e902c57SThomas Graf 		     fi->fib_protocol == cfg->fc_protocol) &&
16814e902c57SThomas Graf 		    fib_nh_match(cfg, fi) == 0) {
168219baf839SRobert Olsson 			fa_to_delete = fa;
168319baf839SRobert Olsson 			break;
168419baf839SRobert Olsson 		}
168519baf839SRobert Olsson 	}
168619baf839SRobert Olsson 
168791b9a277SOlof Johansson 	if (!fa_to_delete)
168891b9a277SOlof Johansson 		return -ESRCH;
168919baf839SRobert Olsson 
169019baf839SRobert Olsson 	fa = fa_to_delete;
16914e902c57SThomas Graf 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1692b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
169319baf839SRobert Olsson 
169419baf839SRobert Olsson 	l = fib_find_node(t, key);
1695772cb712SRobert Olsson 	li = find_leaf_info(l, plen);
169619baf839SRobert Olsson 
16972373ce1cSRobert Olsson 	list_del_rcu(&fa->fa_list);
169819baf839SRobert Olsson 
169921d8c49eSDavid S. Miller 	if (!plen)
170021d8c49eSDavid S. Miller 		tb->tb_num_default--;
170121d8c49eSDavid S. Miller 
170219baf839SRobert Olsson 	if (list_empty(fa_head)) {
17032373ce1cSRobert Olsson 		hlist_del_rcu(&li->hlist);
170419baf839SRobert Olsson 		free_leaf_info(li);
17052373ce1cSRobert Olsson 	}
170619baf839SRobert Olsson 
170719baf839SRobert Olsson 	if (hlist_empty(&l->list))
17089195bef7SStephen Hemminger 		trie_leaf_remove(t, l);
170919baf839SRobert Olsson 
171019baf839SRobert Olsson 	if (fa->fa_state & FA_S_ACCESSED)
171176e6ebfbSDenis V. Lunev 		rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
171219baf839SRobert Olsson 
17132373ce1cSRobert Olsson 	fib_release_info(fa->fa_info);
17142373ce1cSRobert Olsson 	alias_free_mem_rcu(fa);
171519baf839SRobert Olsson 	return 0;
171619baf839SRobert Olsson }
171719baf839SRobert Olsson 
1718ef3660ceSStephen Hemminger static int trie_flush_list(struct list_head *head)
171919baf839SRobert Olsson {
172019baf839SRobert Olsson 	struct fib_alias *fa, *fa_node;
172119baf839SRobert Olsson 	int found = 0;
172219baf839SRobert Olsson 
172319baf839SRobert Olsson 	list_for_each_entry_safe(fa, fa_node, head, fa_list) {
172419baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
172519baf839SRobert Olsson 
172619baf839SRobert Olsson 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
17272373ce1cSRobert Olsson 			list_del_rcu(&fa->fa_list);
17282373ce1cSRobert Olsson 			fib_release_info(fa->fa_info);
17292373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
173019baf839SRobert Olsson 			found++;
173119baf839SRobert Olsson 		}
173219baf839SRobert Olsson 	}
173319baf839SRobert Olsson 	return found;
173419baf839SRobert Olsson }
173519baf839SRobert Olsson 
1736ef3660ceSStephen Hemminger static int trie_flush_leaf(struct leaf *l)
173719baf839SRobert Olsson {
173819baf839SRobert Olsson 	int found = 0;
173919baf839SRobert Olsson 	struct hlist_head *lih = &l->list;
174019baf839SRobert Olsson 	struct hlist_node *node, *tmp;
174119baf839SRobert Olsson 	struct leaf_info *li = NULL;
174219baf839SRobert Olsson 
174319baf839SRobert Olsson 	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1744ef3660ceSStephen Hemminger 		found += trie_flush_list(&li->falh);
174519baf839SRobert Olsson 
174619baf839SRobert Olsson 		if (list_empty(&li->falh)) {
17472373ce1cSRobert Olsson 			hlist_del_rcu(&li->hlist);
174819baf839SRobert Olsson 			free_leaf_info(li);
174919baf839SRobert Olsson 		}
175019baf839SRobert Olsson 	}
175119baf839SRobert Olsson 	return found;
175219baf839SRobert Olsson }
175319baf839SRobert Olsson 
175482cfbb00SStephen Hemminger /*
175582cfbb00SStephen Hemminger  * Scan for the next right leaf starting at node p->child[idx]
175682cfbb00SStephen Hemminger  * Since we have back pointer, no recursion necessary.
175782cfbb00SStephen Hemminger  */
1758b299e4f0SDavid S. Miller static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
175919baf839SRobert Olsson {
176082cfbb00SStephen Hemminger 	do {
176182cfbb00SStephen Hemminger 		t_key idx;
176219baf839SRobert Olsson 
176319baf839SRobert Olsson 		if (c)
176482cfbb00SStephen Hemminger 			idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
176519baf839SRobert Olsson 		else
176682cfbb00SStephen Hemminger 			idx = 0;
176719baf839SRobert Olsson 
176882cfbb00SStephen Hemminger 		while (idx < 1u << p->bits) {
176982cfbb00SStephen Hemminger 			c = tnode_get_child_rcu(p, idx++);
17702373ce1cSRobert Olsson 			if (!c)
177191b9a277SOlof Johansson 				continue;
177219baf839SRobert Olsson 
177382cfbb00SStephen Hemminger 			if (IS_LEAF(c)) {
17740a5c0475SEric Dumazet 				prefetch(rcu_dereference_rtnl(p->child[idx]));
17752373ce1cSRobert Olsson 				return (struct leaf *) c;
177619baf839SRobert Olsson 			}
177782cfbb00SStephen Hemminger 
177882cfbb00SStephen Hemminger 			/* Rescan start scanning in new node */
177982cfbb00SStephen Hemminger 			p = (struct tnode *) c;
178082cfbb00SStephen Hemminger 			idx = 0;
178119baf839SRobert Olsson 		}
178282cfbb00SStephen Hemminger 
178382cfbb00SStephen Hemminger 		/* Node empty, walk back up to parent */
1784b299e4f0SDavid S. Miller 		c = (struct rt_trie_node *) p;
178582cfbb00SStephen Hemminger 	} while ((p = node_parent_rcu(c)) != NULL);
178682cfbb00SStephen Hemminger 
178782cfbb00SStephen Hemminger 	return NULL; /* Root of trie */
178882cfbb00SStephen Hemminger }
178982cfbb00SStephen Hemminger 
179082cfbb00SStephen Hemminger static struct leaf *trie_firstleaf(struct trie *t)
179182cfbb00SStephen Hemminger {
1792a034ee3cSEric Dumazet 	struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
179382cfbb00SStephen Hemminger 
179482cfbb00SStephen Hemminger 	if (!n)
179582cfbb00SStephen Hemminger 		return NULL;
179682cfbb00SStephen Hemminger 
179782cfbb00SStephen Hemminger 	if (IS_LEAF(n))          /* trie is just a leaf */
179882cfbb00SStephen Hemminger 		return (struct leaf *) n;
179982cfbb00SStephen Hemminger 
180082cfbb00SStephen Hemminger 	return leaf_walk_rcu(n, NULL);
180182cfbb00SStephen Hemminger }
180282cfbb00SStephen Hemminger 
180382cfbb00SStephen Hemminger static struct leaf *trie_nextleaf(struct leaf *l)
180482cfbb00SStephen Hemminger {
1805b299e4f0SDavid S. Miller 	struct rt_trie_node *c = (struct rt_trie_node *) l;
1806b902e573SJarek Poplawski 	struct tnode *p = node_parent_rcu(c);
180782cfbb00SStephen Hemminger 
180882cfbb00SStephen Hemminger 	if (!p)
180982cfbb00SStephen Hemminger 		return NULL;	/* trie with just one leaf */
181082cfbb00SStephen Hemminger 
181182cfbb00SStephen Hemminger 	return leaf_walk_rcu(p, c);
181219baf839SRobert Olsson }
181319baf839SRobert Olsson 
181471d67e66SStephen Hemminger static struct leaf *trie_leafindex(struct trie *t, int index)
181571d67e66SStephen Hemminger {
181671d67e66SStephen Hemminger 	struct leaf *l = trie_firstleaf(t);
181771d67e66SStephen Hemminger 
1818ec28cf73SStephen Hemminger 	while (l && index-- > 0)
181971d67e66SStephen Hemminger 		l = trie_nextleaf(l);
1820ec28cf73SStephen Hemminger 
182171d67e66SStephen Hemminger 	return l;
182271d67e66SStephen Hemminger }
182371d67e66SStephen Hemminger 
182471d67e66SStephen Hemminger 
1825d562f1f8SRobert Olsson /*
1826d562f1f8SRobert Olsson  * Caller must hold RTNL.
1827d562f1f8SRobert Olsson  */
182816c6cf8bSStephen Hemminger int fib_table_flush(struct fib_table *tb)
182919baf839SRobert Olsson {
183019baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
18319195bef7SStephen Hemminger 	struct leaf *l, *ll = NULL;
183282cfbb00SStephen Hemminger 	int found = 0;
183319baf839SRobert Olsson 
183482cfbb00SStephen Hemminger 	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
1835ef3660ceSStephen Hemminger 		found += trie_flush_leaf(l);
183619baf839SRobert Olsson 
183719baf839SRobert Olsson 		if (ll && hlist_empty(&ll->list))
18389195bef7SStephen Hemminger 			trie_leaf_remove(t, ll);
183919baf839SRobert Olsson 		ll = l;
184019baf839SRobert Olsson 	}
184119baf839SRobert Olsson 
184219baf839SRobert Olsson 	if (ll && hlist_empty(&ll->list))
18439195bef7SStephen Hemminger 		trie_leaf_remove(t, ll);
184419baf839SRobert Olsson 
18450c7770c7SStephen Hemminger 	pr_debug("trie_flush found=%d\n", found);
184619baf839SRobert Olsson 	return found;
184719baf839SRobert Olsson }
184819baf839SRobert Olsson 
18494aa2c466SPavel Emelyanov void fib_free_table(struct fib_table *tb)
18504aa2c466SPavel Emelyanov {
18514aa2c466SPavel Emelyanov 	kfree(tb);
18524aa2c466SPavel Emelyanov }
18534aa2c466SPavel Emelyanov 
1854a07f5f50SStephen Hemminger static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1855a07f5f50SStephen Hemminger 			   struct fib_table *tb,
185619baf839SRobert Olsson 			   struct sk_buff *skb, struct netlink_callback *cb)
185719baf839SRobert Olsson {
185819baf839SRobert Olsson 	int i, s_i;
185919baf839SRobert Olsson 	struct fib_alias *fa;
186032ab5f80SAl Viro 	__be32 xkey = htonl(key);
186119baf839SRobert Olsson 
186271d67e66SStephen Hemminger 	s_i = cb->args[5];
186319baf839SRobert Olsson 	i = 0;
186419baf839SRobert Olsson 
18652373ce1cSRobert Olsson 	/* rcu_read_lock is hold by caller */
18662373ce1cSRobert Olsson 
18672373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fah, fa_list) {
186819baf839SRobert Olsson 		if (i < s_i) {
186919baf839SRobert Olsson 			i++;
187019baf839SRobert Olsson 			continue;
187119baf839SRobert Olsson 		}
187219baf839SRobert Olsson 
187319baf839SRobert Olsson 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
187419baf839SRobert Olsson 				  cb->nlh->nlmsg_seq,
187519baf839SRobert Olsson 				  RTM_NEWROUTE,
187619baf839SRobert Olsson 				  tb->tb_id,
187719baf839SRobert Olsson 				  fa->fa_type,
1878be403ea1SThomas Graf 				  xkey,
187919baf839SRobert Olsson 				  plen,
188019baf839SRobert Olsson 				  fa->fa_tos,
188164347f78SStephen Hemminger 				  fa->fa_info, NLM_F_MULTI) < 0) {
188271d67e66SStephen Hemminger 			cb->args[5] = i;
188319baf839SRobert Olsson 			return -1;
188419baf839SRobert Olsson 		}
188519baf839SRobert Olsson 		i++;
188619baf839SRobert Olsson 	}
188771d67e66SStephen Hemminger 	cb->args[5] = i;
188819baf839SRobert Olsson 	return skb->len;
188919baf839SRobert Olsson }
189019baf839SRobert Olsson 
1891a88ee229SStephen Hemminger static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1892a07f5f50SStephen Hemminger 			struct sk_buff *skb, struct netlink_callback *cb)
189319baf839SRobert Olsson {
1894a88ee229SStephen Hemminger 	struct leaf_info *li;
1895a88ee229SStephen Hemminger 	struct hlist_node *node;
1896a88ee229SStephen Hemminger 	int i, s_i;
189791b9a277SOlof Johansson 
189871d67e66SStephen Hemminger 	s_i = cb->args[4];
1899a88ee229SStephen Hemminger 	i = 0;
190019baf839SRobert Olsson 
1901a88ee229SStephen Hemminger 	/* rcu_read_lock is hold by caller */
1902a88ee229SStephen Hemminger 	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1903a88ee229SStephen Hemminger 		if (i < s_i) {
1904a88ee229SStephen Hemminger 			i++;
190519baf839SRobert Olsson 			continue;
1906a88ee229SStephen Hemminger 		}
190719baf839SRobert Olsson 
1908a88ee229SStephen Hemminger 		if (i > s_i)
190971d67e66SStephen Hemminger 			cb->args[5] = 0;
191019baf839SRobert Olsson 
1911a88ee229SStephen Hemminger 		if (list_empty(&li->falh))
191219baf839SRobert Olsson 			continue;
191319baf839SRobert Olsson 
1914a88ee229SStephen Hemminger 		if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
191571d67e66SStephen Hemminger 			cb->args[4] = i;
191619baf839SRobert Olsson 			return -1;
191719baf839SRobert Olsson 		}
1918a88ee229SStephen Hemminger 		i++;
191919baf839SRobert Olsson 	}
1920a88ee229SStephen Hemminger 
192171d67e66SStephen Hemminger 	cb->args[4] = i;
192219baf839SRobert Olsson 	return skb->len;
192319baf839SRobert Olsson }
192419baf839SRobert Olsson 
192516c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1926a07f5f50SStephen Hemminger 		   struct netlink_callback *cb)
192719baf839SRobert Olsson {
1928a88ee229SStephen Hemminger 	struct leaf *l;
192919baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
1930d5ce8a0eSStephen Hemminger 	t_key key = cb->args[2];
193171d67e66SStephen Hemminger 	int count = cb->args[3];
193219baf839SRobert Olsson 
19332373ce1cSRobert Olsson 	rcu_read_lock();
1934d5ce8a0eSStephen Hemminger 	/* Dump starting at last key.
1935d5ce8a0eSStephen Hemminger 	 * Note: 0.0.0.0/0 (ie default) is first key.
1936d5ce8a0eSStephen Hemminger 	 */
193771d67e66SStephen Hemminger 	if (count == 0)
1938d5ce8a0eSStephen Hemminger 		l = trie_firstleaf(t);
1939d5ce8a0eSStephen Hemminger 	else {
194071d67e66SStephen Hemminger 		/* Normally, continue from last key, but if that is missing
194171d67e66SStephen Hemminger 		 * fallback to using slow rescan
1942d5ce8a0eSStephen Hemminger 		 */
194371d67e66SStephen Hemminger 		l = fib_find_node(t, key);
194471d67e66SStephen Hemminger 		if (!l)
194571d67e66SStephen Hemminger 			l = trie_leafindex(t, count);
194619baf839SRobert Olsson 	}
1947a88ee229SStephen Hemminger 
1948d5ce8a0eSStephen Hemminger 	while (l) {
1949d5ce8a0eSStephen Hemminger 		cb->args[2] = l->key;
1950a88ee229SStephen Hemminger 		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
195171d67e66SStephen Hemminger 			cb->args[3] = count;
19522373ce1cSRobert Olsson 			rcu_read_unlock();
195319baf839SRobert Olsson 			return -1;
195419baf839SRobert Olsson 		}
1955d5ce8a0eSStephen Hemminger 
195671d67e66SStephen Hemminger 		++count;
1957d5ce8a0eSStephen Hemminger 		l = trie_nextleaf(l);
195871d67e66SStephen Hemminger 		memset(&cb->args[4], 0,
195971d67e66SStephen Hemminger 		       sizeof(cb->args) - 4*sizeof(cb->args[0]));
1960a88ee229SStephen Hemminger 	}
196171d67e66SStephen Hemminger 	cb->args[3] = count;
1962a88ee229SStephen Hemminger 	rcu_read_unlock();
1963a88ee229SStephen Hemminger 
1964a88ee229SStephen Hemminger 	return skb->len;
1965a88ee229SStephen Hemminger }
196619baf839SRobert Olsson 
19675348ba85SDavid S. Miller void __init fib_trie_init(void)
19687f9b8052SStephen Hemminger {
1969a07f5f50SStephen Hemminger 	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1970a07f5f50SStephen Hemminger 					  sizeof(struct fib_alias),
1971bc3c8c1eSStephen Hemminger 					  0, SLAB_PANIC, NULL);
1972bc3c8c1eSStephen Hemminger 
1973bc3c8c1eSStephen Hemminger 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1974bc3c8c1eSStephen Hemminger 					   max(sizeof(struct leaf),
1975bc3c8c1eSStephen Hemminger 					       sizeof(struct leaf_info)),
1976bc3c8c1eSStephen Hemminger 					   0, SLAB_PANIC, NULL);
19777f9b8052SStephen Hemminger }
197819baf839SRobert Olsson 
19797f9b8052SStephen Hemminger 
19805348ba85SDavid S. Miller struct fib_table *fib_trie_table(u32 id)
198119baf839SRobert Olsson {
198219baf839SRobert Olsson 	struct fib_table *tb;
198319baf839SRobert Olsson 	struct trie *t;
198419baf839SRobert Olsson 
198519baf839SRobert Olsson 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
198619baf839SRobert Olsson 		     GFP_KERNEL);
198719baf839SRobert Olsson 	if (tb == NULL)
198819baf839SRobert Olsson 		return NULL;
198919baf839SRobert Olsson 
199019baf839SRobert Olsson 	tb->tb_id = id;
1991971b893eSDenis V. Lunev 	tb->tb_default = -1;
199221d8c49eSDavid S. Miller 	tb->tb_num_default = 0;
199319baf839SRobert Olsson 
199419baf839SRobert Olsson 	t = (struct trie *) tb->tb_data;
1995c28a1cf4SStephen Hemminger 	memset(t, 0, sizeof(*t));
199619baf839SRobert Olsson 
199719baf839SRobert Olsson 	return tb;
199819baf839SRobert Olsson }
199919baf839SRobert Olsson 
2000cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS
2001cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */
2002cb7b593cSStephen Hemminger struct fib_trie_iter {
20031c340b2fSDenis V. Lunev 	struct seq_net_private p;
20043d3b2d25SStephen Hemminger 	struct fib_table *tb;
2005cb7b593cSStephen Hemminger 	struct tnode *tnode;
2006a034ee3cSEric Dumazet 	unsigned int index;
2007a034ee3cSEric Dumazet 	unsigned int depth;
2008cb7b593cSStephen Hemminger };
200919baf839SRobert Olsson 
2010b299e4f0SDavid S. Miller static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
201119baf839SRobert Olsson {
2012cb7b593cSStephen Hemminger 	struct tnode *tn = iter->tnode;
2013a034ee3cSEric Dumazet 	unsigned int cindex = iter->index;
2014cb7b593cSStephen Hemminger 	struct tnode *p;
201519baf839SRobert Olsson 
20166640e697SEric W. Biederman 	/* A single entry routing table */
20176640e697SEric W. Biederman 	if (!tn)
20186640e697SEric W. Biederman 		return NULL;
20196640e697SEric W. Biederman 
2020cb7b593cSStephen Hemminger 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2021cb7b593cSStephen Hemminger 		 iter->tnode, iter->index, iter->depth);
2022cb7b593cSStephen Hemminger rescan:
2023cb7b593cSStephen Hemminger 	while (cindex < (1<<tn->bits)) {
2024b299e4f0SDavid S. Miller 		struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex);
202519baf839SRobert Olsson 
2026cb7b593cSStephen Hemminger 		if (n) {
202719baf839SRobert Olsson 			if (IS_LEAF(n)) {
2028cb7b593cSStephen Hemminger 				iter->tnode = tn;
2029cb7b593cSStephen Hemminger 				iter->index = cindex + 1;
203091b9a277SOlof Johansson 			} else {
2031cb7b593cSStephen Hemminger 				/* push down one level */
2032cb7b593cSStephen Hemminger 				iter->tnode = (struct tnode *) n;
2033cb7b593cSStephen Hemminger 				iter->index = 0;
2034cb7b593cSStephen Hemminger 				++iter->depth;
203519baf839SRobert Olsson 			}
2036cb7b593cSStephen Hemminger 			return n;
203719baf839SRobert Olsson 		}
203819baf839SRobert Olsson 
2039cb7b593cSStephen Hemminger 		++cindex;
2040cb7b593cSStephen Hemminger 	}
2041cb7b593cSStephen Hemminger 
2042cb7b593cSStephen Hemminger 	/* Current node exhausted, pop back up */
2043b299e4f0SDavid S. Miller 	p = node_parent_rcu((struct rt_trie_node *)tn);
2044cb7b593cSStephen Hemminger 	if (p) {
2045cb7b593cSStephen Hemminger 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2046cb7b593cSStephen Hemminger 		tn = p;
2047cb7b593cSStephen Hemminger 		--iter->depth;
2048cb7b593cSStephen Hemminger 		goto rescan;
2049cb7b593cSStephen Hemminger 	}
2050cb7b593cSStephen Hemminger 
2051cb7b593cSStephen Hemminger 	/* got root? */
2052cb7b593cSStephen Hemminger 	return NULL;
2053cb7b593cSStephen Hemminger }
2054cb7b593cSStephen Hemminger 
2055b299e4f0SDavid S. Miller static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
2056cb7b593cSStephen Hemminger 				       struct trie *t)
2057cb7b593cSStephen Hemminger {
2058b299e4f0SDavid S. Miller 	struct rt_trie_node *n;
20595ddf0eb2SRobert Olsson 
20605ddf0eb2SRobert Olsson 	if (!t)
20615ddf0eb2SRobert Olsson 		return NULL;
20625ddf0eb2SRobert Olsson 
20635ddf0eb2SRobert Olsson 	n = rcu_dereference(t->trie);
20643d3b2d25SStephen Hemminger 	if (!n)
20655ddf0eb2SRobert Olsson 		return NULL;
2066cb7b593cSStephen Hemminger 
20676640e697SEric W. Biederman 	if (IS_TNODE(n)) {
2068cb7b593cSStephen Hemminger 		iter->tnode = (struct tnode *) n;
2069cb7b593cSStephen Hemminger 		iter->index = 0;
20701d25cd6cSRobert Olsson 		iter->depth = 1;
20716640e697SEric W. Biederman 	} else {
20726640e697SEric W. Biederman 		iter->tnode = NULL;
20736640e697SEric W. Biederman 		iter->index = 0;
20746640e697SEric W. Biederman 		iter->depth = 0;
20756640e697SEric W. Biederman 	}
20763d3b2d25SStephen Hemminger 
2077cb7b593cSStephen Hemminger 	return n;
2078cb7b593cSStephen Hemminger }
2079cb7b593cSStephen Hemminger 
2080cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s)
208119baf839SRobert Olsson {
2082b299e4f0SDavid S. Miller 	struct rt_trie_node *n;
2083cb7b593cSStephen Hemminger 	struct fib_trie_iter iter;
2084cb7b593cSStephen Hemminger 
2085cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
208619baf839SRobert Olsson 
20872373ce1cSRobert Olsson 	rcu_read_lock();
20883d3b2d25SStephen Hemminger 	for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2089cb7b593cSStephen Hemminger 		if (IS_LEAF(n)) {
209093672292SStephen Hemminger 			struct leaf *l = (struct leaf *)n;
209193672292SStephen Hemminger 			struct leaf_info *li;
209293672292SStephen Hemminger 			struct hlist_node *tmp;
209393672292SStephen Hemminger 
209419baf839SRobert Olsson 			s->leaves++;
2095cb7b593cSStephen Hemminger 			s->totdepth += iter.depth;
2096cb7b593cSStephen Hemminger 			if (iter.depth > s->maxdepth)
2097cb7b593cSStephen Hemminger 				s->maxdepth = iter.depth;
209893672292SStephen Hemminger 
209993672292SStephen Hemminger 			hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
210093672292SStephen Hemminger 				++s->prefixes;
210191b9a277SOlof Johansson 		} else {
2102cb7b593cSStephen Hemminger 			const struct tnode *tn = (const struct tnode *) n;
2103cb7b593cSStephen Hemminger 			int i;
210419baf839SRobert Olsson 
210519baf839SRobert Olsson 			s->tnodes++;
210606ef921dSRobert Olsson 			if (tn->bits < MAX_STAT_DEPTH)
210719baf839SRobert Olsson 				s->nodesizes[tn->bits]++;
210806ef921dSRobert Olsson 
2109cb7b593cSStephen Hemminger 			for (i = 0; i < (1<<tn->bits); i++)
2110cb7b593cSStephen Hemminger 				if (!tn->child[i])
211119baf839SRobert Olsson 					s->nullpointers++;
211219baf839SRobert Olsson 		}
211319baf839SRobert Olsson 	}
21142373ce1cSRobert Olsson 	rcu_read_unlock();
211519baf839SRobert Olsson }
211619baf839SRobert Olsson 
211719baf839SRobert Olsson /*
211819baf839SRobert Olsson  *	This outputs /proc/net/fib_triestats
211919baf839SRobert Olsson  */
2120cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
212119baf839SRobert Olsson {
2122a034ee3cSEric Dumazet 	unsigned int i, max, pointers, bytes, avdepth;
212319baf839SRobert Olsson 
212419baf839SRobert Olsson 	if (stat->leaves)
212519baf839SRobert Olsson 		avdepth = stat->totdepth*100 / stat->leaves;
212619baf839SRobert Olsson 	else
212719baf839SRobert Olsson 		avdepth = 0;
212819baf839SRobert Olsson 
2129a07f5f50SStephen Hemminger 	seq_printf(seq, "\tAver depth:     %u.%02d\n",
2130a07f5f50SStephen Hemminger 		   avdepth / 100, avdepth % 100);
2131cb7b593cSStephen Hemminger 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
2132cb7b593cSStephen Hemminger 
2133cb7b593cSStephen Hemminger 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
2134cb7b593cSStephen Hemminger 	bytes = sizeof(struct leaf) * stat->leaves;
213593672292SStephen Hemminger 
213693672292SStephen Hemminger 	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
213793672292SStephen Hemminger 	bytes += sizeof(struct leaf_info) * stat->prefixes;
213893672292SStephen Hemminger 
2139187b5188SStephen Hemminger 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
214019baf839SRobert Olsson 	bytes += sizeof(struct tnode) * stat->tnodes;
214119baf839SRobert Olsson 
214206ef921dSRobert Olsson 	max = MAX_STAT_DEPTH;
214306ef921dSRobert Olsson 	while (max > 0 && stat->nodesizes[max-1] == 0)
214419baf839SRobert Olsson 		max--;
214519baf839SRobert Olsson 
2146cb7b593cSStephen Hemminger 	pointers = 0;
214719baf839SRobert Olsson 	for (i = 1; i <= max; i++)
214819baf839SRobert Olsson 		if (stat->nodesizes[i] != 0) {
2149187b5188SStephen Hemminger 			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
215019baf839SRobert Olsson 			pointers += (1<<i) * stat->nodesizes[i];
215119baf839SRobert Olsson 		}
2152cb7b593cSStephen Hemminger 	seq_putc(seq, '\n');
2153187b5188SStephen Hemminger 	seq_printf(seq, "\tPointers: %u\n", pointers);
2154cb7b593cSStephen Hemminger 
2155b299e4f0SDavid S. Miller 	bytes += sizeof(struct rt_trie_node *) * pointers;
2156187b5188SStephen Hemminger 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2157187b5188SStephen Hemminger 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
215866a2f7fdSStephen Hemminger }
215919baf839SRobert Olsson 
216019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
216166a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq,
216266a2f7fdSStephen Hemminger 			    const struct trie_use_stats *stats)
216366a2f7fdSStephen Hemminger {
216466a2f7fdSStephen Hemminger 	seq_printf(seq, "\nCounters:\n---------\n");
216566a2f7fdSStephen Hemminger 	seq_printf(seq, "gets = %u\n", stats->gets);
216666a2f7fdSStephen Hemminger 	seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2167a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match passed = %u\n",
2168a07f5f50SStephen Hemminger 		   stats->semantic_match_passed);
2169a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match miss = %u\n",
2170a07f5f50SStephen Hemminger 		   stats->semantic_match_miss);
217166a2f7fdSStephen Hemminger 	seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2172a07f5f50SStephen Hemminger 	seq_printf(seq, "skipped node resize = %u\n\n",
2173a07f5f50SStephen Hemminger 		   stats->resize_node_skipped);
217419baf839SRobert Olsson }
217566a2f7fdSStephen Hemminger #endif /*  CONFIG_IP_FIB_TRIE_STATS */
217666a2f7fdSStephen Hemminger 
21773d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2178d717a9a6SStephen Hemminger {
21793d3b2d25SStephen Hemminger 	if (tb->tb_id == RT_TABLE_LOCAL)
21803d3b2d25SStephen Hemminger 		seq_puts(seq, "Local:\n");
21813d3b2d25SStephen Hemminger 	else if (tb->tb_id == RT_TABLE_MAIN)
21823d3b2d25SStephen Hemminger 		seq_puts(seq, "Main:\n");
21833d3b2d25SStephen Hemminger 	else
21843d3b2d25SStephen Hemminger 		seq_printf(seq, "Id %d:\n", tb->tb_id);
2185d717a9a6SStephen Hemminger }
218619baf839SRobert Olsson 
21873d3b2d25SStephen Hemminger 
218819baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v)
218919baf839SRobert Olsson {
21901c340b2fSDenis V. Lunev 	struct net *net = (struct net *)seq->private;
21913d3b2d25SStephen Hemminger 	unsigned int h;
2192877a9bffSEric W. Biederman 
2193d717a9a6SStephen Hemminger 	seq_printf(seq,
2194a07f5f50SStephen Hemminger 		   "Basic info: size of leaf:"
2195a07f5f50SStephen Hemminger 		   " %Zd bytes, size of tnode: %Zd bytes.\n",
219619baf839SRobert Olsson 		   sizeof(struct leaf), sizeof(struct tnode));
219719baf839SRobert Olsson 
21983d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
21993d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
22003d3b2d25SStephen Hemminger 		struct hlist_node *node;
22013d3b2d25SStephen Hemminger 		struct fib_table *tb;
2202cb7b593cSStephen Hemminger 
22033d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
22043d3b2d25SStephen Hemminger 			struct trie *t = (struct trie *) tb->tb_data;
22053d3b2d25SStephen Hemminger 			struct trie_stat stat;
22063d3b2d25SStephen Hemminger 
22073d3b2d25SStephen Hemminger 			if (!t)
22083d3b2d25SStephen Hemminger 				continue;
22093d3b2d25SStephen Hemminger 
22103d3b2d25SStephen Hemminger 			fib_table_print(seq, tb);
22113d3b2d25SStephen Hemminger 
22123d3b2d25SStephen Hemminger 			trie_collect_stats(t, &stat);
22133d3b2d25SStephen Hemminger 			trie_show_stats(seq, &stat);
22143d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS
22153d3b2d25SStephen Hemminger 			trie_show_usage(seq, &t->stats);
22163d3b2d25SStephen Hemminger #endif
22173d3b2d25SStephen Hemminger 		}
22183d3b2d25SStephen Hemminger 	}
2219cb7b593cSStephen Hemminger 
222019baf839SRobert Olsson 	return 0;
222119baf839SRobert Olsson }
222219baf839SRobert Olsson 
222319baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file)
222419baf839SRobert Olsson {
2225de05c557SPavel Emelyanov 	return single_open_net(inode, file, fib_triestat_seq_show);
22261c340b2fSDenis V. Lunev }
22271c340b2fSDenis V. Lunev 
22289a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = {
222919baf839SRobert Olsson 	.owner	= THIS_MODULE,
223019baf839SRobert Olsson 	.open	= fib_triestat_seq_open,
223119baf839SRobert Olsson 	.read	= seq_read,
223219baf839SRobert Olsson 	.llseek	= seq_lseek,
2233b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
223419baf839SRobert Olsson };
223519baf839SRobert Olsson 
2236b299e4f0SDavid S. Miller static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
223719baf839SRobert Olsson {
22381218854aSYOSHIFUJI Hideaki 	struct fib_trie_iter *iter = seq->private;
22391218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
2240cb7b593cSStephen Hemminger 	loff_t idx = 0;
22413d3b2d25SStephen Hemminger 	unsigned int h;
22423d3b2d25SStephen Hemminger 
22433d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
22443d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
22453d3b2d25SStephen Hemminger 		struct hlist_node *node;
22463d3b2d25SStephen Hemminger 		struct fib_table *tb;
22473d3b2d25SStephen Hemminger 
22483d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2249b299e4f0SDavid S. Miller 			struct rt_trie_node *n;
2250cb7b593cSStephen Hemminger 
22513d3b2d25SStephen Hemminger 			for (n = fib_trie_get_first(iter,
22523d3b2d25SStephen Hemminger 						    (struct trie *) tb->tb_data);
22533d3b2d25SStephen Hemminger 			     n; n = fib_trie_get_next(iter))
22543d3b2d25SStephen Hemminger 				if (pos == idx++) {
22553d3b2d25SStephen Hemminger 					iter->tb = tb;
2256cb7b593cSStephen Hemminger 					return n;
225719baf839SRobert Olsson 				}
22583d3b2d25SStephen Hemminger 		}
22593d3b2d25SStephen Hemminger 	}
226019baf839SRobert Olsson 
226119baf839SRobert Olsson 	return NULL;
226219baf839SRobert Olsson }
226319baf839SRobert Olsson 
226419baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2265c95aaf9aSStephen Hemminger 	__acquires(RCU)
226619baf839SRobert Olsson {
2267cb7b593cSStephen Hemminger 	rcu_read_lock();
22681218854aSYOSHIFUJI Hideaki 	return fib_trie_get_idx(seq, *pos);
226919baf839SRobert Olsson }
227019baf839SRobert Olsson 
227119baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
227219baf839SRobert Olsson {
2273cb7b593cSStephen Hemminger 	struct fib_trie_iter *iter = seq->private;
22741218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
22753d3b2d25SStephen Hemminger 	struct fib_table *tb = iter->tb;
22763d3b2d25SStephen Hemminger 	struct hlist_node *tb_node;
22773d3b2d25SStephen Hemminger 	unsigned int h;
2278b299e4f0SDavid S. Miller 	struct rt_trie_node *n;
2279cb7b593cSStephen Hemminger 
228019baf839SRobert Olsson 	++*pos;
22813d3b2d25SStephen Hemminger 	/* next node in same table */
22823d3b2d25SStephen Hemminger 	n = fib_trie_get_next(iter);
22833d3b2d25SStephen Hemminger 	if (n)
22843d3b2d25SStephen Hemminger 		return n;
228591b9a277SOlof Johansson 
22863d3b2d25SStephen Hemminger 	/* walk rest of this hash chain */
22873d3b2d25SStephen Hemminger 	h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
22880a5c0475SEric Dumazet 	while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
22893d3b2d25SStephen Hemminger 		tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
22903d3b2d25SStephen Hemminger 		n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
22913d3b2d25SStephen Hemminger 		if (n)
22923d3b2d25SStephen Hemminger 			goto found;
22933d3b2d25SStephen Hemminger 	}
2294cb7b593cSStephen Hemminger 
22953d3b2d25SStephen Hemminger 	/* new hash chain */
22963d3b2d25SStephen Hemminger 	while (++h < FIB_TABLE_HASHSZ) {
22973d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
22983d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
22993d3b2d25SStephen Hemminger 			n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
23003d3b2d25SStephen Hemminger 			if (n)
23013d3b2d25SStephen Hemminger 				goto found;
23023d3b2d25SStephen Hemminger 		}
23033d3b2d25SStephen Hemminger 	}
2304cb7b593cSStephen Hemminger 	return NULL;
23053d3b2d25SStephen Hemminger 
23063d3b2d25SStephen Hemminger found:
23073d3b2d25SStephen Hemminger 	iter->tb = tb;
23083d3b2d25SStephen Hemminger 	return n;
230919baf839SRobert Olsson }
231019baf839SRobert Olsson 
231119baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2312c95aaf9aSStephen Hemminger 	__releases(RCU)
231319baf839SRobert Olsson {
2314cb7b593cSStephen Hemminger 	rcu_read_unlock();
231519baf839SRobert Olsson }
231619baf839SRobert Olsson 
2317cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n)
2318cb7b593cSStephen Hemminger {
2319a034ee3cSEric Dumazet 	while (n-- > 0)
2320a034ee3cSEric Dumazet 		seq_puts(seq, "   ");
2321cb7b593cSStephen Hemminger }
232219baf839SRobert Olsson 
232328d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2324cb7b593cSStephen Hemminger {
2325cb7b593cSStephen Hemminger 	switch (s) {
2326cb7b593cSStephen Hemminger 	case RT_SCOPE_UNIVERSE: return "universe";
2327cb7b593cSStephen Hemminger 	case RT_SCOPE_SITE:	return "site";
2328cb7b593cSStephen Hemminger 	case RT_SCOPE_LINK:	return "link";
2329cb7b593cSStephen Hemminger 	case RT_SCOPE_HOST:	return "host";
2330cb7b593cSStephen Hemminger 	case RT_SCOPE_NOWHERE:	return "nowhere";
2331cb7b593cSStephen Hemminger 	default:
233228d36e37SEric Dumazet 		snprintf(buf, len, "scope=%d", s);
2333cb7b593cSStephen Hemminger 		return buf;
2334cb7b593cSStephen Hemminger 	}
2335cb7b593cSStephen Hemminger }
2336cb7b593cSStephen Hemminger 
233736cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = {
2338cb7b593cSStephen Hemminger 	[RTN_UNSPEC] = "UNSPEC",
2339cb7b593cSStephen Hemminger 	[RTN_UNICAST] = "UNICAST",
2340cb7b593cSStephen Hemminger 	[RTN_LOCAL] = "LOCAL",
2341cb7b593cSStephen Hemminger 	[RTN_BROADCAST] = "BROADCAST",
2342cb7b593cSStephen Hemminger 	[RTN_ANYCAST] = "ANYCAST",
2343cb7b593cSStephen Hemminger 	[RTN_MULTICAST] = "MULTICAST",
2344cb7b593cSStephen Hemminger 	[RTN_BLACKHOLE] = "BLACKHOLE",
2345cb7b593cSStephen Hemminger 	[RTN_UNREACHABLE] = "UNREACHABLE",
2346cb7b593cSStephen Hemminger 	[RTN_PROHIBIT] = "PROHIBIT",
2347cb7b593cSStephen Hemminger 	[RTN_THROW] = "THROW",
2348cb7b593cSStephen Hemminger 	[RTN_NAT] = "NAT",
2349cb7b593cSStephen Hemminger 	[RTN_XRESOLVE] = "XRESOLVE",
2350cb7b593cSStephen Hemminger };
2351cb7b593cSStephen Hemminger 
2352a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2353cb7b593cSStephen Hemminger {
2354cb7b593cSStephen Hemminger 	if (t < __RTN_MAX && rtn_type_names[t])
2355cb7b593cSStephen Hemminger 		return rtn_type_names[t];
235628d36e37SEric Dumazet 	snprintf(buf, len, "type %u", t);
2357cb7b593cSStephen Hemminger 	return buf;
2358cb7b593cSStephen Hemminger }
2359cb7b593cSStephen Hemminger 
2360cb7b593cSStephen Hemminger /* Pretty print the trie */
236119baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v)
236219baf839SRobert Olsson {
2363cb7b593cSStephen Hemminger 	const struct fib_trie_iter *iter = seq->private;
2364b299e4f0SDavid S. Miller 	struct rt_trie_node *n = v;
236519baf839SRobert Olsson 
23663d3b2d25SStephen Hemminger 	if (!node_parent_rcu(n))
23673d3b2d25SStephen Hemminger 		fib_table_print(seq, iter->tb);
2368095b8501SRobert Olsson 
2369095b8501SRobert Olsson 	if (IS_TNODE(n)) {
2370095b8501SRobert Olsson 		struct tnode *tn = (struct tnode *) n;
2371ab66b4a7SStephen Hemminger 		__be32 prf = htonl(mask_pfx(tn->key, tn->pos));
2372095b8501SRobert Olsson 
23731d25cd6cSRobert Olsson 		seq_indent(seq, iter->depth-1);
2374673d57e7SHarvey Harrison 		seq_printf(seq, "  +-- %pI4/%d %d %d %d\n",
2375673d57e7SHarvey Harrison 			   &prf, tn->pos, tn->bits, tn->full_children,
23761d25cd6cSRobert Olsson 			   tn->empty_children);
23771d25cd6cSRobert Olsson 
2378cb7b593cSStephen Hemminger 	} else {
2379cb7b593cSStephen Hemminger 		struct leaf *l = (struct leaf *) n;
23801328042eSStephen Hemminger 		struct leaf_info *li;
23811328042eSStephen Hemminger 		struct hlist_node *node;
238232ab5f80SAl Viro 		__be32 val = htonl(l->key);
2383cb7b593cSStephen Hemminger 
2384cb7b593cSStephen Hemminger 		seq_indent(seq, iter->depth);
2385673d57e7SHarvey Harrison 		seq_printf(seq, "  |-- %pI4\n", &val);
238628d36e37SEric Dumazet 
23871328042eSStephen Hemminger 		hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2388cb7b593cSStephen Hemminger 			struct fib_alias *fa;
238928d36e37SEric Dumazet 
2390cb7b593cSStephen Hemminger 			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
239128d36e37SEric Dumazet 				char buf1[32], buf2[32];
239228d36e37SEric Dumazet 
2393cb7b593cSStephen Hemminger 				seq_indent(seq, iter->depth+1);
23941328042eSStephen Hemminger 				seq_printf(seq, "  /%d %s %s", li->plen,
239528d36e37SEric Dumazet 					   rtn_scope(buf1, sizeof(buf1),
239637e826c5SDavid S. Miller 						     fa->fa_info->fib_scope),
239728d36e37SEric Dumazet 					   rtn_type(buf2, sizeof(buf2),
239828d36e37SEric Dumazet 						    fa->fa_type));
2399cb7b593cSStephen Hemminger 				if (fa->fa_tos)
2400b9c4d82aSDenis V. Lunev 					seq_printf(seq, " tos=%d", fa->fa_tos);
2401cb7b593cSStephen Hemminger 				seq_putc(seq, '\n');
2402cb7b593cSStephen Hemminger 			}
2403cb7b593cSStephen Hemminger 		}
2404cb7b593cSStephen Hemminger 	}
240519baf839SRobert Olsson 
240619baf839SRobert Olsson 	return 0;
240719baf839SRobert Olsson }
240819baf839SRobert Olsson 
2409f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = {
241019baf839SRobert Olsson 	.start  = fib_trie_seq_start,
241119baf839SRobert Olsson 	.next   = fib_trie_seq_next,
241219baf839SRobert Olsson 	.stop   = fib_trie_seq_stop,
241319baf839SRobert Olsson 	.show   = fib_trie_seq_show,
241419baf839SRobert Olsson };
241519baf839SRobert Olsson 
241619baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file)
241719baf839SRobert Olsson {
24181c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_trie_seq_ops,
2419cf7732e4SPavel Emelyanov 			    sizeof(struct fib_trie_iter));
242019baf839SRobert Olsson }
242119baf839SRobert Olsson 
24229a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = {
242319baf839SRobert Olsson 	.owner  = THIS_MODULE,
242419baf839SRobert Olsson 	.open   = fib_trie_seq_open,
242519baf839SRobert Olsson 	.read   = seq_read,
242619baf839SRobert Olsson 	.llseek = seq_lseek,
24271c340b2fSDenis V. Lunev 	.release = seq_release_net,
242819baf839SRobert Olsson };
242919baf839SRobert Olsson 
24308315f5d8SStephen Hemminger struct fib_route_iter {
24318315f5d8SStephen Hemminger 	struct seq_net_private p;
24328315f5d8SStephen Hemminger 	struct trie *main_trie;
24338315f5d8SStephen Hemminger 	loff_t	pos;
24348315f5d8SStephen Hemminger 	t_key	key;
24358315f5d8SStephen Hemminger };
24368315f5d8SStephen Hemminger 
24378315f5d8SStephen Hemminger static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
24388315f5d8SStephen Hemminger {
24398315f5d8SStephen Hemminger 	struct leaf *l = NULL;
24408315f5d8SStephen Hemminger 	struct trie *t = iter->main_trie;
24418315f5d8SStephen Hemminger 
24428315f5d8SStephen Hemminger 	/* use cache location of last found key */
24438315f5d8SStephen Hemminger 	if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
24448315f5d8SStephen Hemminger 		pos -= iter->pos;
24458315f5d8SStephen Hemminger 	else {
24468315f5d8SStephen Hemminger 		iter->pos = 0;
24478315f5d8SStephen Hemminger 		l = trie_firstleaf(t);
24488315f5d8SStephen Hemminger 	}
24498315f5d8SStephen Hemminger 
24508315f5d8SStephen Hemminger 	while (l && pos-- > 0) {
24518315f5d8SStephen Hemminger 		iter->pos++;
24528315f5d8SStephen Hemminger 		l = trie_nextleaf(l);
24538315f5d8SStephen Hemminger 	}
24548315f5d8SStephen Hemminger 
24558315f5d8SStephen Hemminger 	if (l)
24568315f5d8SStephen Hemminger 		iter->key = pos;	/* remember it */
24578315f5d8SStephen Hemminger 	else
24588315f5d8SStephen Hemminger 		iter->pos = 0;		/* forget it */
24598315f5d8SStephen Hemminger 
24608315f5d8SStephen Hemminger 	return l;
24618315f5d8SStephen Hemminger }
24628315f5d8SStephen Hemminger 
24638315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
24648315f5d8SStephen Hemminger 	__acquires(RCU)
24658315f5d8SStephen Hemminger {
24668315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
24678315f5d8SStephen Hemminger 	struct fib_table *tb;
24688315f5d8SStephen Hemminger 
24698315f5d8SStephen Hemminger 	rcu_read_lock();
24701218854aSYOSHIFUJI Hideaki 	tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
24718315f5d8SStephen Hemminger 	if (!tb)
24728315f5d8SStephen Hemminger 		return NULL;
24738315f5d8SStephen Hemminger 
24748315f5d8SStephen Hemminger 	iter->main_trie = (struct trie *) tb->tb_data;
24758315f5d8SStephen Hemminger 	if (*pos == 0)
24768315f5d8SStephen Hemminger 		return SEQ_START_TOKEN;
24778315f5d8SStephen Hemminger 	else
24788315f5d8SStephen Hemminger 		return fib_route_get_idx(iter, *pos - 1);
24798315f5d8SStephen Hemminger }
24808315f5d8SStephen Hemminger 
24818315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
24828315f5d8SStephen Hemminger {
24838315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
24848315f5d8SStephen Hemminger 	struct leaf *l = v;
24858315f5d8SStephen Hemminger 
24868315f5d8SStephen Hemminger 	++*pos;
24878315f5d8SStephen Hemminger 	if (v == SEQ_START_TOKEN) {
24888315f5d8SStephen Hemminger 		iter->pos = 0;
24898315f5d8SStephen Hemminger 		l = trie_firstleaf(iter->main_trie);
24908315f5d8SStephen Hemminger 	} else {
24918315f5d8SStephen Hemminger 		iter->pos++;
24928315f5d8SStephen Hemminger 		l = trie_nextleaf(l);
24938315f5d8SStephen Hemminger 	}
24948315f5d8SStephen Hemminger 
24958315f5d8SStephen Hemminger 	if (l)
24968315f5d8SStephen Hemminger 		iter->key = l->key;
24978315f5d8SStephen Hemminger 	else
24988315f5d8SStephen Hemminger 		iter->pos = 0;
24998315f5d8SStephen Hemminger 	return l;
25008315f5d8SStephen Hemminger }
25018315f5d8SStephen Hemminger 
25028315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v)
25038315f5d8SStephen Hemminger 	__releases(RCU)
25048315f5d8SStephen Hemminger {
25058315f5d8SStephen Hemminger 	rcu_read_unlock();
25068315f5d8SStephen Hemminger }
25078315f5d8SStephen Hemminger 
2508a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2509cb7b593cSStephen Hemminger {
2510a034ee3cSEric Dumazet 	unsigned int flags = 0;
2511cb7b593cSStephen Hemminger 
2512a034ee3cSEric Dumazet 	if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2513a034ee3cSEric Dumazet 		flags = RTF_REJECT;
2514cb7b593cSStephen Hemminger 	if (fi && fi->fib_nh->nh_gw)
2515cb7b593cSStephen Hemminger 		flags |= RTF_GATEWAY;
251632ab5f80SAl Viro 	if (mask == htonl(0xFFFFFFFF))
2517cb7b593cSStephen Hemminger 		flags |= RTF_HOST;
2518cb7b593cSStephen Hemminger 	flags |= RTF_UP;
2519cb7b593cSStephen Hemminger 	return flags;
2520cb7b593cSStephen Hemminger }
2521cb7b593cSStephen Hemminger 
2522cb7b593cSStephen Hemminger /*
2523cb7b593cSStephen Hemminger  *	This outputs /proc/net/route.
2524cb7b593cSStephen Hemminger  *	The format of the file is not supposed to be changed
2525cb7b593cSStephen Hemminger  *	and needs to be same as fib_hash output to avoid breaking
2526cb7b593cSStephen Hemminger  *	legacy utilities
2527cb7b593cSStephen Hemminger  */
2528cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v)
2529cb7b593cSStephen Hemminger {
2530cb7b593cSStephen Hemminger 	struct leaf *l = v;
25311328042eSStephen Hemminger 	struct leaf_info *li;
25321328042eSStephen Hemminger 	struct hlist_node *node;
2533cb7b593cSStephen Hemminger 
2534cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN) {
2535cb7b593cSStephen Hemminger 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2536cb7b593cSStephen Hemminger 			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2537cb7b593cSStephen Hemminger 			   "\tWindow\tIRTT");
2538cb7b593cSStephen Hemminger 		return 0;
2539cb7b593cSStephen Hemminger 	}
2540cb7b593cSStephen Hemminger 
25411328042eSStephen Hemminger 	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2542cb7b593cSStephen Hemminger 		struct fib_alias *fa;
254332ab5f80SAl Viro 		__be32 mask, prefix;
2544cb7b593cSStephen Hemminger 
2545cb7b593cSStephen Hemminger 		mask = inet_make_mask(li->plen);
2546cb7b593cSStephen Hemminger 		prefix = htonl(l->key);
2547cb7b593cSStephen Hemminger 
2548cb7b593cSStephen Hemminger 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
25491371e37dSHerbert Xu 			const struct fib_info *fi = fa->fa_info;
2550a034ee3cSEric Dumazet 			unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
25515e659e4cSPavel Emelyanov 			int len;
2552cb7b593cSStephen Hemminger 
2553cb7b593cSStephen Hemminger 			if (fa->fa_type == RTN_BROADCAST
2554cb7b593cSStephen Hemminger 			    || fa->fa_type == RTN_MULTICAST)
2555cb7b593cSStephen Hemminger 				continue;
2556cb7b593cSStephen Hemminger 
2557cb7b593cSStephen Hemminger 			if (fi)
25585e659e4cSPavel Emelyanov 				seq_printf(seq,
25595e659e4cSPavel Emelyanov 					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
25605e659e4cSPavel Emelyanov 					 "%d\t%08X\t%d\t%u\t%u%n",
2561cb7b593cSStephen Hemminger 					 fi->fib_dev ? fi->fib_dev->name : "*",
2562cb7b593cSStephen Hemminger 					 prefix,
2563cb7b593cSStephen Hemminger 					 fi->fib_nh->nh_gw, flags, 0, 0,
2564cb7b593cSStephen Hemminger 					 fi->fib_priority,
2565cb7b593cSStephen Hemminger 					 mask,
2566a07f5f50SStephen Hemminger 					 (fi->fib_advmss ?
2567a07f5f50SStephen Hemminger 					  fi->fib_advmss + 40 : 0),
2568cb7b593cSStephen Hemminger 					 fi->fib_window,
25695e659e4cSPavel Emelyanov 					 fi->fib_rtt >> 3, &len);
2570cb7b593cSStephen Hemminger 			else
25715e659e4cSPavel Emelyanov 				seq_printf(seq,
25725e659e4cSPavel Emelyanov 					 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
25735e659e4cSPavel Emelyanov 					 "%d\t%08X\t%d\t%u\t%u%n",
2574cb7b593cSStephen Hemminger 					 prefix, 0, flags, 0, 0, 0,
25755e659e4cSPavel Emelyanov 					 mask, 0, 0, 0, &len);
2576cb7b593cSStephen Hemminger 
25775e659e4cSPavel Emelyanov 			seq_printf(seq, "%*s\n", 127 - len, "");
2578cb7b593cSStephen Hemminger 		}
2579cb7b593cSStephen Hemminger 	}
2580cb7b593cSStephen Hemminger 
2581cb7b593cSStephen Hemminger 	return 0;
2582cb7b593cSStephen Hemminger }
2583cb7b593cSStephen Hemminger 
2584f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = {
25858315f5d8SStephen Hemminger 	.start  = fib_route_seq_start,
25868315f5d8SStephen Hemminger 	.next   = fib_route_seq_next,
25878315f5d8SStephen Hemminger 	.stop   = fib_route_seq_stop,
2588cb7b593cSStephen Hemminger 	.show   = fib_route_seq_show,
2589cb7b593cSStephen Hemminger };
2590cb7b593cSStephen Hemminger 
2591cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file)
2592cb7b593cSStephen Hemminger {
25931c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_route_seq_ops,
25948315f5d8SStephen Hemminger 			    sizeof(struct fib_route_iter));
2595cb7b593cSStephen Hemminger }
2596cb7b593cSStephen Hemminger 
25979a32144eSArjan van de Ven static const struct file_operations fib_route_fops = {
2598cb7b593cSStephen Hemminger 	.owner  = THIS_MODULE,
2599cb7b593cSStephen Hemminger 	.open   = fib_route_seq_open,
2600cb7b593cSStephen Hemminger 	.read   = seq_read,
2601cb7b593cSStephen Hemminger 	.llseek = seq_lseek,
26021c340b2fSDenis V. Lunev 	.release = seq_release_net,
2603cb7b593cSStephen Hemminger };
2604cb7b593cSStephen Hemminger 
260561a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net)
260619baf839SRobert Olsson {
260761a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
2608cb7b593cSStephen Hemminger 		goto out1;
2609cb7b593cSStephen Hemminger 
261061a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
261161a02653SDenis V. Lunev 				  &fib_triestat_fops))
2612cb7b593cSStephen Hemminger 		goto out2;
2613cb7b593cSStephen Hemminger 
261461a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
2615cb7b593cSStephen Hemminger 		goto out3;
2616cb7b593cSStephen Hemminger 
261719baf839SRobert Olsson 	return 0;
2618cb7b593cSStephen Hemminger 
2619cb7b593cSStephen Hemminger out3:
262061a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
2621cb7b593cSStephen Hemminger out2:
262261a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
2623cb7b593cSStephen Hemminger out1:
2624cb7b593cSStephen Hemminger 	return -ENOMEM;
262519baf839SRobert Olsson }
262619baf839SRobert Olsson 
262761a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net)
262819baf839SRobert Olsson {
262961a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
263061a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
263161a02653SDenis V. Lunev 	proc_net_remove(net, "route");
263219baf839SRobert Olsson }
263319baf839SRobert Olsson 
263419baf839SRobert Olsson #endif /* CONFIG_PROC_FS */
2635