xref: /openbmc/linux/net/ipv4/fib_trie.c (revision 80b71b80)
119baf839SRobert Olsson /*
219baf839SRobert Olsson  *   This program is free software; you can redistribute it and/or
319baf839SRobert Olsson  *   modify it under the terms of the GNU General Public License
419baf839SRobert Olsson  *   as published by the Free Software Foundation; either version
519baf839SRobert Olsson  *   2 of the License, or (at your option) any later version.
619baf839SRobert Olsson  *
719baf839SRobert Olsson  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
819baf839SRobert Olsson  *     & Swedish University of Agricultural Sciences.
919baf839SRobert Olsson  *
1019baf839SRobert Olsson  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
1119baf839SRobert Olsson  *     Agricultural Sciences.
1219baf839SRobert Olsson  *
1319baf839SRobert Olsson  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
1419baf839SRobert Olsson  *
1519baf839SRobert Olsson  * This work is based on the LPC-trie which is originally descibed in:
1619baf839SRobert Olsson  *
1719baf839SRobert Olsson  * An experimental study of compression methods for dynamic tries
1819baf839SRobert Olsson  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
1919baf839SRobert Olsson  * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
2019baf839SRobert Olsson  *
2119baf839SRobert Olsson  *
2219baf839SRobert Olsson  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
2319baf839SRobert Olsson  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2419baf839SRobert Olsson  *
2519baf839SRobert Olsson  *
2619baf839SRobert Olsson  * Code from fib_hash has been reused which includes the following header:
2719baf839SRobert Olsson  *
2819baf839SRobert Olsson  *
2919baf839SRobert Olsson  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3019baf839SRobert Olsson  *		operating system.  INET is implemented using the  BSD Socket
3119baf839SRobert Olsson  *		interface as the means of communication with the user level.
3219baf839SRobert Olsson  *
3319baf839SRobert Olsson  *		IPv4 FIB: lookup engine and maintenance routines.
3419baf839SRobert Olsson  *
3519baf839SRobert Olsson  *
3619baf839SRobert Olsson  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
3719baf839SRobert Olsson  *
3819baf839SRobert Olsson  *		This program is free software; you can redistribute it and/or
3919baf839SRobert Olsson  *		modify it under the terms of the GNU General Public License
4019baf839SRobert Olsson  *		as published by the Free Software Foundation; either version
4119baf839SRobert Olsson  *		2 of the License, or (at your option) any later version.
42fd966255SRobert Olsson  *
43fd966255SRobert Olsson  * Substantial contributions to this work comes from:
44fd966255SRobert Olsson  *
45fd966255SRobert Olsson  *		David S. Miller, <davem@davemloft.net>
46fd966255SRobert Olsson  *		Stephen Hemminger <shemminger@osdl.org>
47fd966255SRobert Olsson  *		Paul E. McKenney <paulmck@us.ibm.com>
48fd966255SRobert Olsson  *		Patrick McHardy <kaber@trash.net>
4919baf839SRobert Olsson  */
5019baf839SRobert Olsson 
5180b71b80SJens Låås #define VERSION "0.409"
5219baf839SRobert Olsson 
5319baf839SRobert Olsson #include <asm/uaccess.h>
5419baf839SRobert Olsson #include <asm/system.h>
551977f032SJiri Slaby #include <linux/bitops.h>
5619baf839SRobert Olsson #include <linux/types.h>
5719baf839SRobert Olsson #include <linux/kernel.h>
5819baf839SRobert Olsson #include <linux/mm.h>
5919baf839SRobert Olsson #include <linux/string.h>
6019baf839SRobert Olsson #include <linux/socket.h>
6119baf839SRobert Olsson #include <linux/sockios.h>
6219baf839SRobert Olsson #include <linux/errno.h>
6319baf839SRobert Olsson #include <linux/in.h>
6419baf839SRobert Olsson #include <linux/inet.h>
65cd8787abSStephen Hemminger #include <linux/inetdevice.h>
6619baf839SRobert Olsson #include <linux/netdevice.h>
6719baf839SRobert Olsson #include <linux/if_arp.h>
6819baf839SRobert Olsson #include <linux/proc_fs.h>
692373ce1cSRobert Olsson #include <linux/rcupdate.h>
7019baf839SRobert Olsson #include <linux/skbuff.h>
7119baf839SRobert Olsson #include <linux/netlink.h>
7219baf839SRobert Olsson #include <linux/init.h>
7319baf839SRobert Olsson #include <linux/list.h>
74457c4cbcSEric W. Biederman #include <net/net_namespace.h>
7519baf839SRobert Olsson #include <net/ip.h>
7619baf839SRobert Olsson #include <net/protocol.h>
7719baf839SRobert Olsson #include <net/route.h>
7819baf839SRobert Olsson #include <net/tcp.h>
7919baf839SRobert Olsson #include <net/sock.h>
8019baf839SRobert Olsson #include <net/ip_fib.h>
8119baf839SRobert Olsson #include "fib_lookup.h"
8219baf839SRobert Olsson 
8306ef921dSRobert Olsson #define MAX_STAT_DEPTH 32
8419baf839SRobert Olsson 
8519baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key))
8619baf839SRobert Olsson 
8719baf839SRobert Olsson typedef unsigned int t_key;
8819baf839SRobert Olsson 
8919baf839SRobert Olsson #define T_TNODE 0
9019baf839SRobert Olsson #define T_LEAF  1
9119baf839SRobert Olsson #define NODE_TYPE_MASK	0x1UL
922373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
932373ce1cSRobert Olsson 
9491b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF))
9591b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF)
9619baf839SRobert Olsson 
9719baf839SRobert Olsson struct node {
9891b9a277SOlof Johansson 	unsigned long parent;
998d965444SEric Dumazet 	t_key key;
10019baf839SRobert Olsson };
10119baf839SRobert Olsson 
10219baf839SRobert Olsson struct leaf {
10391b9a277SOlof Johansson 	unsigned long parent;
1048d965444SEric Dumazet 	t_key key;
10519baf839SRobert Olsson 	struct hlist_head list;
1062373ce1cSRobert Olsson 	struct rcu_head rcu;
10719baf839SRobert Olsson };
10819baf839SRobert Olsson 
10919baf839SRobert Olsson struct leaf_info {
11019baf839SRobert Olsson 	struct hlist_node hlist;
1112373ce1cSRobert Olsson 	struct rcu_head rcu;
11219baf839SRobert Olsson 	int plen;
11319baf839SRobert Olsson 	struct list_head falh;
11419baf839SRobert Olsson };
11519baf839SRobert Olsson 
11619baf839SRobert Olsson struct tnode {
11791b9a277SOlof Johansson 	unsigned long parent;
1188d965444SEric Dumazet 	t_key key;
119112d8cfcSEric Dumazet 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
120112d8cfcSEric Dumazet 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
1218d965444SEric Dumazet 	unsigned int full_children;	/* KEYLENGTH bits needed */
1228d965444SEric Dumazet 	unsigned int empty_children;	/* KEYLENGTH bits needed */
12315be75cdSStephen Hemminger 	union {
1242373ce1cSRobert Olsson 		struct rcu_head rcu;
12515be75cdSStephen Hemminger 		struct work_struct work;
126e0f7cb8cSJarek Poplawski 		struct tnode *tnode_free;
12715be75cdSStephen Hemminger 	};
12819baf839SRobert Olsson 	struct node *child[0];
12919baf839SRobert Olsson };
13019baf839SRobert Olsson 
13119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
13219baf839SRobert Olsson struct trie_use_stats {
13319baf839SRobert Olsson 	unsigned int gets;
13419baf839SRobert Olsson 	unsigned int backtrack;
13519baf839SRobert Olsson 	unsigned int semantic_match_passed;
13619baf839SRobert Olsson 	unsigned int semantic_match_miss;
13719baf839SRobert Olsson 	unsigned int null_node_hit;
1382f36895aSRobert Olsson 	unsigned int resize_node_skipped;
13919baf839SRobert Olsson };
14019baf839SRobert Olsson #endif
14119baf839SRobert Olsson 
14219baf839SRobert Olsson struct trie_stat {
14319baf839SRobert Olsson 	unsigned int totdepth;
14419baf839SRobert Olsson 	unsigned int maxdepth;
14519baf839SRobert Olsson 	unsigned int tnodes;
14619baf839SRobert Olsson 	unsigned int leaves;
14719baf839SRobert Olsson 	unsigned int nullpointers;
14893672292SStephen Hemminger 	unsigned int prefixes;
14906ef921dSRobert Olsson 	unsigned int nodesizes[MAX_STAT_DEPTH];
15019baf839SRobert Olsson };
15119baf839SRobert Olsson 
15219baf839SRobert Olsson struct trie {
15319baf839SRobert Olsson 	struct node *trie;
15419baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15519baf839SRobert Olsson 	struct trie_use_stats stats;
15619baf839SRobert Olsson #endif
15719baf839SRobert Olsson };
15819baf839SRobert Olsson 
15919baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
160a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
161a07f5f50SStephen Hemminger 				  int wasfull);
16219baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn);
1632f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn);
1642f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn);
165e0f7cb8cSJarek Poplawski /* tnodes to free after resize(); protected by RTNL */
166e0f7cb8cSJarek Poplawski static struct tnode *tnode_free_head;
167c3059477SJarek Poplawski static size_t tnode_free_size;
168c3059477SJarek Poplawski 
169c3059477SJarek Poplawski /*
170c3059477SJarek Poplawski  * synchronize_rcu after call_rcu for that many pages; it should be especially
171c3059477SJarek Poplawski  * useful before resizing the root node with PREEMPT_NONE configs; the value was
172c3059477SJarek Poplawski  * obtained experimentally, aiming to avoid visible slowdown.
173c3059477SJarek Poplawski  */
174c3059477SJarek Poplawski static const int sync_pages = 128;
17519baf839SRobert Olsson 
176e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly;
177bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly;
17819baf839SRobert Olsson 
17906801916SStephen Hemminger static inline struct tnode *node_parent(struct node *node)
18006801916SStephen Hemminger {
181b59cfbf7SEric Dumazet 	return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
182b59cfbf7SEric Dumazet }
18306801916SStephen Hemminger 
184b59cfbf7SEric Dumazet static inline struct tnode *node_parent_rcu(struct node *node)
185b59cfbf7SEric Dumazet {
186b59cfbf7SEric Dumazet 	struct tnode *ret = node_parent(node);
187b59cfbf7SEric Dumazet 
18806801916SStephen Hemminger 	return rcu_dereference(ret);
18906801916SStephen Hemminger }
19006801916SStephen Hemminger 
1916440cc9eSStephen Hemminger /* Same as rcu_assign_pointer
1926440cc9eSStephen Hemminger  * but that macro() assumes that value is a pointer.
1936440cc9eSStephen Hemminger  */
19406801916SStephen Hemminger static inline void node_set_parent(struct node *node, struct tnode *ptr)
19506801916SStephen Hemminger {
1966440cc9eSStephen Hemminger 	smp_wmb();
1976440cc9eSStephen Hemminger 	node->parent = (unsigned long)ptr | NODE_TYPE(node);
19806801916SStephen Hemminger }
1992373ce1cSRobert Olsson 
200b59cfbf7SEric Dumazet static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
20119baf839SRobert Olsson {
202b59cfbf7SEric Dumazet 	BUG_ON(i >= 1U << tn->bits);
20319baf839SRobert Olsson 
204b59cfbf7SEric Dumazet 	return tn->child[i];
205b59cfbf7SEric Dumazet }
206b59cfbf7SEric Dumazet 
207b59cfbf7SEric Dumazet static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
208b59cfbf7SEric Dumazet {
209b59cfbf7SEric Dumazet 	struct node *ret = tnode_get_child(tn, i);
210b59cfbf7SEric Dumazet 
211b59cfbf7SEric Dumazet 	return rcu_dereference(ret);
21219baf839SRobert Olsson }
21319baf839SRobert Olsson 
214bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn)
21519baf839SRobert Olsson {
21619baf839SRobert Olsson 	return 1 << tn->bits;
21719baf839SRobert Olsson }
21819baf839SRobert Olsson 
219ab66b4a7SStephen Hemminger static inline t_key mask_pfx(t_key k, unsigned short l)
220ab66b4a7SStephen Hemminger {
221ab66b4a7SStephen Hemminger 	return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
222ab66b4a7SStephen Hemminger }
223ab66b4a7SStephen Hemminger 
22419baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
22519baf839SRobert Olsson {
22619baf839SRobert Olsson 	if (offset < KEYLENGTH)
22719baf839SRobert Olsson 		return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
22819baf839SRobert Olsson 	else
22919baf839SRobert Olsson 		return 0;
23019baf839SRobert Olsson }
23119baf839SRobert Olsson 
23219baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b)
23319baf839SRobert Olsson {
23419baf839SRobert Olsson 	return a == b;
23519baf839SRobert Olsson }
23619baf839SRobert Olsson 
23719baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
23819baf839SRobert Olsson {
23919baf839SRobert Olsson 	if (bits == 0 || offset >= KEYLENGTH)
24019baf839SRobert Olsson 		return 1;
24119baf839SRobert Olsson 	bits = bits > KEYLENGTH ? KEYLENGTH : bits;
24219baf839SRobert Olsson 	return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
24319baf839SRobert Olsson }
24419baf839SRobert Olsson 
24519baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b)
24619baf839SRobert Olsson {
24719baf839SRobert Olsson 	t_key diff = a ^ b;
24819baf839SRobert Olsson 	int i = offset;
24919baf839SRobert Olsson 
25019baf839SRobert Olsson 	if (!diff)
25119baf839SRobert Olsson 		return 0;
25219baf839SRobert Olsson 	while ((diff << i) >> (KEYLENGTH-1) == 0)
25319baf839SRobert Olsson 		i++;
25419baf839SRobert Olsson 	return i;
25519baf839SRobert Olsson }
25619baf839SRobert Olsson 
25719baf839SRobert Olsson /*
25819baf839SRobert Olsson   To understand this stuff, an understanding of keys and all their bits is
25919baf839SRobert Olsson   necessary. Every node in the trie has a key associated with it, but not
26019baf839SRobert Olsson   all of the bits in that key are significant.
26119baf839SRobert Olsson 
26219baf839SRobert Olsson   Consider a node 'n' and its parent 'tp'.
26319baf839SRobert Olsson 
26419baf839SRobert Olsson   If n is a leaf, every bit in its key is significant. Its presence is
265772cb712SRobert Olsson   necessitated by path compression, since during a tree traversal (when
26619baf839SRobert Olsson   searching for a leaf - unless we are doing an insertion) we will completely
26719baf839SRobert Olsson   ignore all skipped bits we encounter. Thus we need to verify, at the end of
26819baf839SRobert Olsson   a potentially successful search, that we have indeed been walking the
26919baf839SRobert Olsson   correct key path.
27019baf839SRobert Olsson 
27119baf839SRobert Olsson   Note that we can never "miss" the correct key in the tree if present by
27219baf839SRobert Olsson   following the wrong path. Path compression ensures that segments of the key
27319baf839SRobert Olsson   that are the same for all keys with a given prefix are skipped, but the
27419baf839SRobert Olsson   skipped part *is* identical for each node in the subtrie below the skipped
27519baf839SRobert Olsson   bit! trie_insert() in this implementation takes care of that - note the
27619baf839SRobert Olsson   call to tkey_sub_equals() in trie_insert().
27719baf839SRobert Olsson 
27819baf839SRobert Olsson   if n is an internal node - a 'tnode' here, the various parts of its key
27919baf839SRobert Olsson   have many different meanings.
28019baf839SRobert Olsson 
28119baf839SRobert Olsson   Example:
28219baf839SRobert Olsson   _________________________________________________________________
28319baf839SRobert Olsson   | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
28419baf839SRobert Olsson   -----------------------------------------------------------------
28519baf839SRobert Olsson     0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
28619baf839SRobert Olsson 
28719baf839SRobert Olsson   _________________________________________________________________
28819baf839SRobert Olsson   | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
28919baf839SRobert Olsson   -----------------------------------------------------------------
29019baf839SRobert Olsson    16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
29119baf839SRobert Olsson 
29219baf839SRobert Olsson   tp->pos = 7
29319baf839SRobert Olsson   tp->bits = 3
29419baf839SRobert Olsson   n->pos = 15
29519baf839SRobert Olsson   n->bits = 4
29619baf839SRobert Olsson 
29719baf839SRobert Olsson   First, let's just ignore the bits that come before the parent tp, that is
29819baf839SRobert Olsson   the bits from 0 to (tp->pos-1). They are *known* but at this point we do
29919baf839SRobert Olsson   not use them for anything.
30019baf839SRobert Olsson 
30119baf839SRobert Olsson   The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
30219baf839SRobert Olsson   index into the parent's child array. That is, they will be used to find
30319baf839SRobert Olsson   'n' among tp's children.
30419baf839SRobert Olsson 
30519baf839SRobert Olsson   The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
30619baf839SRobert Olsson   for the node n.
30719baf839SRobert Olsson 
30819baf839SRobert Olsson   All the bits we have seen so far are significant to the node n. The rest
30919baf839SRobert Olsson   of the bits are really not needed or indeed known in n->key.
31019baf839SRobert Olsson 
31119baf839SRobert Olsson   The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
31219baf839SRobert Olsson   n's child array, and will of course be different for each child.
31319baf839SRobert Olsson 
314c877efb2SStephen Hemminger 
31519baf839SRobert Olsson   The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
31619baf839SRobert Olsson   at this point.
31719baf839SRobert Olsson 
31819baf839SRobert Olsson */
31919baf839SRobert Olsson 
3200c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn)
32119baf839SRobert Olsson {
3220c7770c7SStephen Hemminger 	WARN_ON(tn && tn->pos+tn->bits > 32);
32319baf839SRobert Olsson }
32419baf839SRobert Olsson 
325f5026fabSDenis V. Lunev static const int halve_threshold = 25;
326f5026fabSDenis V. Lunev static const int inflate_threshold = 50;
327345aa031SJarek Poplawski static const int halve_threshold_root = 15;
32880b71b80SJens Låås static const int inflate_threshold_root = 30;
3292373ce1cSRobert Olsson 
3302373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head)
3312373ce1cSRobert Olsson {
3322373ce1cSRobert Olsson 	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
3332373ce1cSRobert Olsson 	kmem_cache_free(fn_alias_kmem, fa);
3342373ce1cSRobert Olsson }
3352373ce1cSRobert Olsson 
3362373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa)
3372373ce1cSRobert Olsson {
3382373ce1cSRobert Olsson 	call_rcu(&fa->rcu, __alias_free_mem);
3392373ce1cSRobert Olsson }
3402373ce1cSRobert Olsson 
3412373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head)
3422373ce1cSRobert Olsson {
343bc3c8c1eSStephen Hemminger 	struct leaf *l = container_of(head, struct leaf, rcu);
344bc3c8c1eSStephen Hemminger 	kmem_cache_free(trie_leaf_kmem, l);
3452373ce1cSRobert Olsson }
3462373ce1cSRobert Olsson 
347387a5487SStephen Hemminger static inline void free_leaf(struct leaf *l)
348387a5487SStephen Hemminger {
349387a5487SStephen Hemminger 	call_rcu_bh(&l->rcu, __leaf_free_rcu);
350387a5487SStephen Hemminger }
351387a5487SStephen Hemminger 
3522373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head)
3532373ce1cSRobert Olsson {
3542373ce1cSRobert Olsson 	kfree(container_of(head, struct leaf_info, rcu));
3552373ce1cSRobert Olsson }
3562373ce1cSRobert Olsson 
3572373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf)
3582373ce1cSRobert Olsson {
3592373ce1cSRobert Olsson 	call_rcu(&leaf->rcu, __leaf_info_free_rcu);
3602373ce1cSRobert Olsson }
3612373ce1cSRobert Olsson 
3628d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size)
3632373ce1cSRobert Olsson {
3642373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3658d965444SEric Dumazet 		return kzalloc(size, GFP_KERNEL);
36615be75cdSStephen Hemminger 	else
36715be75cdSStephen Hemminger 		return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
36815be75cdSStephen Hemminger }
3692373ce1cSRobert Olsson 
37015be75cdSStephen Hemminger static void __tnode_vfree(struct work_struct *arg)
37115be75cdSStephen Hemminger {
37215be75cdSStephen Hemminger 	struct tnode *tn = container_of(arg, struct tnode, work);
37315be75cdSStephen Hemminger 	vfree(tn);
3742373ce1cSRobert Olsson }
3752373ce1cSRobert Olsson 
3762373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head)
3772373ce1cSRobert Olsson {
3782373ce1cSRobert Olsson 	struct tnode *tn = container_of(head, struct tnode, rcu);
3798d965444SEric Dumazet 	size_t size = sizeof(struct tnode) +
3808d965444SEric Dumazet 		      (sizeof(struct node *) << tn->bits);
3812373ce1cSRobert Olsson 
3822373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3832373ce1cSRobert Olsson 		kfree(tn);
38415be75cdSStephen Hemminger 	else {
38515be75cdSStephen Hemminger 		INIT_WORK(&tn->work, __tnode_vfree);
38615be75cdSStephen Hemminger 		schedule_work(&tn->work);
38715be75cdSStephen Hemminger 	}
3882373ce1cSRobert Olsson }
3892373ce1cSRobert Olsson 
3902373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn)
3912373ce1cSRobert Olsson {
392387a5487SStephen Hemminger 	if (IS_LEAF(tn))
393387a5487SStephen Hemminger 		free_leaf((struct leaf *) tn);
394387a5487SStephen Hemminger 	else
3952373ce1cSRobert Olsson 		call_rcu(&tn->rcu, __tnode_free_rcu);
3962373ce1cSRobert Olsson }
3972373ce1cSRobert Olsson 
398e0f7cb8cSJarek Poplawski static void tnode_free_safe(struct tnode *tn)
399e0f7cb8cSJarek Poplawski {
400e0f7cb8cSJarek Poplawski 	BUG_ON(IS_LEAF(tn));
401e0f7cb8cSJarek Poplawski 	tn->tnode_free = tnode_free_head;
402e0f7cb8cSJarek Poplawski 	tnode_free_head = tn;
403c3059477SJarek Poplawski 	tnode_free_size += sizeof(struct tnode) +
404c3059477SJarek Poplawski 			   (sizeof(struct node *) << tn->bits);
405e0f7cb8cSJarek Poplawski }
406e0f7cb8cSJarek Poplawski 
407e0f7cb8cSJarek Poplawski static void tnode_free_flush(void)
408e0f7cb8cSJarek Poplawski {
409e0f7cb8cSJarek Poplawski 	struct tnode *tn;
410e0f7cb8cSJarek Poplawski 
411e0f7cb8cSJarek Poplawski 	while ((tn = tnode_free_head)) {
412e0f7cb8cSJarek Poplawski 		tnode_free_head = tn->tnode_free;
413e0f7cb8cSJarek Poplawski 		tn->tnode_free = NULL;
414e0f7cb8cSJarek Poplawski 		tnode_free(tn);
415e0f7cb8cSJarek Poplawski 	}
416c3059477SJarek Poplawski 
417c3059477SJarek Poplawski 	if (tnode_free_size >= PAGE_SIZE * sync_pages) {
418c3059477SJarek Poplawski 		tnode_free_size = 0;
419c3059477SJarek Poplawski 		synchronize_rcu();
420c3059477SJarek Poplawski 	}
421e0f7cb8cSJarek Poplawski }
422e0f7cb8cSJarek Poplawski 
42319baf839SRobert Olsson static struct leaf *leaf_new(void)
42419baf839SRobert Olsson {
425bc3c8c1eSStephen Hemminger 	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
42619baf839SRobert Olsson 	if (l) {
4272373ce1cSRobert Olsson 		l->parent = T_LEAF;
42819baf839SRobert Olsson 		INIT_HLIST_HEAD(&l->list);
42919baf839SRobert Olsson 	}
43019baf839SRobert Olsson 	return l;
43119baf839SRobert Olsson }
43219baf839SRobert Olsson 
43319baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen)
43419baf839SRobert Olsson {
43519baf839SRobert Olsson 	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
4362373ce1cSRobert Olsson 	if (li) {
43719baf839SRobert Olsson 		li->plen = plen;
43819baf839SRobert Olsson 		INIT_LIST_HEAD(&li->falh);
4392373ce1cSRobert Olsson 	}
44019baf839SRobert Olsson 	return li;
44119baf839SRobert Olsson }
44219baf839SRobert Olsson 
44319baf839SRobert Olsson static struct tnode *tnode_new(t_key key, int pos, int bits)
44419baf839SRobert Olsson {
4458d965444SEric Dumazet 	size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
446f0e36f8cSPatrick McHardy 	struct tnode *tn = tnode_alloc(sz);
44719baf839SRobert Olsson 
44819baf839SRobert Olsson 	if (tn) {
4492373ce1cSRobert Olsson 		tn->parent = T_TNODE;
45019baf839SRobert Olsson 		tn->pos = pos;
45119baf839SRobert Olsson 		tn->bits = bits;
45219baf839SRobert Olsson 		tn->key = key;
45319baf839SRobert Olsson 		tn->full_children = 0;
45419baf839SRobert Olsson 		tn->empty_children = 1<<bits;
45519baf839SRobert Olsson 	}
456c877efb2SStephen Hemminger 
4578d965444SEric Dumazet 	pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
4588d965444SEric Dumazet 		 (unsigned long) (sizeof(struct node) << bits));
45919baf839SRobert Olsson 	return tn;
46019baf839SRobert Olsson }
46119baf839SRobert Olsson 
46219baf839SRobert Olsson /*
46319baf839SRobert Olsson  * Check whether a tnode 'n' is "full", i.e. it is an internal node
46419baf839SRobert Olsson  * and no bits are skipped. See discussion in dyntree paper p. 6
46519baf839SRobert Olsson  */
46619baf839SRobert Olsson 
467bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n)
46819baf839SRobert Olsson {
46919baf839SRobert Olsson 	if (n == NULL || IS_LEAF(n))
47019baf839SRobert Olsson 		return 0;
47119baf839SRobert Olsson 
47219baf839SRobert Olsson 	return ((struct tnode *) n)->pos == tn->pos + tn->bits;
47319baf839SRobert Olsson }
47419baf839SRobert Olsson 
475a07f5f50SStephen Hemminger static inline void put_child(struct trie *t, struct tnode *tn, int i,
476a07f5f50SStephen Hemminger 			     struct node *n)
47719baf839SRobert Olsson {
47819baf839SRobert Olsson 	tnode_put_child_reorg(tn, i, n, -1);
47919baf839SRobert Olsson }
48019baf839SRobert Olsson 
48119baf839SRobert Olsson  /*
48219baf839SRobert Olsson   * Add a child at position i overwriting the old value.
48319baf839SRobert Olsson   * Update the value of full_children and empty_children.
48419baf839SRobert Olsson   */
48519baf839SRobert Olsson 
486a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
487a07f5f50SStephen Hemminger 				  int wasfull)
48819baf839SRobert Olsson {
4892373ce1cSRobert Olsson 	struct node *chi = tn->child[i];
49019baf839SRobert Olsson 	int isfull;
49119baf839SRobert Olsson 
4920c7770c7SStephen Hemminger 	BUG_ON(i >= 1<<tn->bits);
4930c7770c7SStephen Hemminger 
49419baf839SRobert Olsson 	/* update emptyChildren */
49519baf839SRobert Olsson 	if (n == NULL && chi != NULL)
49619baf839SRobert Olsson 		tn->empty_children++;
49719baf839SRobert Olsson 	else if (n != NULL && chi == NULL)
49819baf839SRobert Olsson 		tn->empty_children--;
49919baf839SRobert Olsson 
50019baf839SRobert Olsson 	/* update fullChildren */
50119baf839SRobert Olsson 	if (wasfull == -1)
50219baf839SRobert Olsson 		wasfull = tnode_full(tn, chi);
50319baf839SRobert Olsson 
50419baf839SRobert Olsson 	isfull = tnode_full(tn, n);
50519baf839SRobert Olsson 	if (wasfull && !isfull)
50619baf839SRobert Olsson 		tn->full_children--;
50719baf839SRobert Olsson 	else if (!wasfull && isfull)
50819baf839SRobert Olsson 		tn->full_children++;
50991b9a277SOlof Johansson 
51019baf839SRobert Olsson 	if (n)
51106801916SStephen Hemminger 		node_set_parent(n, tn);
51219baf839SRobert Olsson 
5132373ce1cSRobert Olsson 	rcu_assign_pointer(tn->child[i], n);
51419baf839SRobert Olsson }
51519baf839SRobert Olsson 
51680b71b80SJens Låås #define MAX_WORK 10
51719baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn)
51819baf839SRobert Olsson {
51919baf839SRobert Olsson 	int i;
5202f80b3c8SRobert Olsson 	struct tnode *old_tn;
521e6308be8SRobert Olsson 	int inflate_threshold_use;
522e6308be8SRobert Olsson 	int halve_threshold_use;
52380b71b80SJens Låås 	int max_work;
52419baf839SRobert Olsson 
52519baf839SRobert Olsson 	if (!tn)
52619baf839SRobert Olsson 		return NULL;
52719baf839SRobert Olsson 
5280c7770c7SStephen Hemminger 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
52919baf839SRobert Olsson 		 tn, inflate_threshold, halve_threshold);
53019baf839SRobert Olsson 
53119baf839SRobert Olsson 	/* No children */
53219baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn)) {
533e0f7cb8cSJarek Poplawski 		tnode_free_safe(tn);
53419baf839SRobert Olsson 		return NULL;
53519baf839SRobert Olsson 	}
53619baf839SRobert Olsson 	/* One child */
53719baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn) - 1)
53880b71b80SJens Låås 		goto one_child;
53919baf839SRobert Olsson 	/*
54019baf839SRobert Olsson 	 * Double as long as the resulting node has a number of
54119baf839SRobert Olsson 	 * nonempty nodes that are above the threshold.
54219baf839SRobert Olsson 	 */
54319baf839SRobert Olsson 
54419baf839SRobert Olsson 	/*
54519baf839SRobert Olsson 	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
54619baf839SRobert Olsson 	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
54719baf839SRobert Olsson 	 * Telecommunications, page 6:
54819baf839SRobert Olsson 	 * "A node is doubled if the ratio of non-empty children to all
54919baf839SRobert Olsson 	 * children in the *doubled* node is at least 'high'."
55019baf839SRobert Olsson 	 *
55119baf839SRobert Olsson 	 * 'high' in this instance is the variable 'inflate_threshold'. It
55219baf839SRobert Olsson 	 * is expressed as a percentage, so we multiply it with
55319baf839SRobert Olsson 	 * tnode_child_length() and instead of multiplying by 2 (since the
55419baf839SRobert Olsson 	 * child array will be doubled by inflate()) and multiplying
55519baf839SRobert Olsson 	 * the left-hand side by 100 (to handle the percentage thing) we
55619baf839SRobert Olsson 	 * multiply the left-hand side by 50.
55719baf839SRobert Olsson 	 *
55819baf839SRobert Olsson 	 * The left-hand side may look a bit weird: tnode_child_length(tn)
55919baf839SRobert Olsson 	 * - tn->empty_children is of course the number of non-null children
56019baf839SRobert Olsson 	 * in the current node. tn->full_children is the number of "full"
56119baf839SRobert Olsson 	 * children, that is non-null tnodes with a skip value of 0.
56219baf839SRobert Olsson 	 * All of those will be doubled in the resulting inflated tnode, so
56319baf839SRobert Olsson 	 * we just count them one extra time here.
56419baf839SRobert Olsson 	 *
56519baf839SRobert Olsson 	 * A clearer way to write this would be:
56619baf839SRobert Olsson 	 *
56719baf839SRobert Olsson 	 * to_be_doubled = tn->full_children;
56819baf839SRobert Olsson 	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
56919baf839SRobert Olsson 	 *     tn->full_children;
57019baf839SRobert Olsson 	 *
57119baf839SRobert Olsson 	 * new_child_length = tnode_child_length(tn) * 2;
57219baf839SRobert Olsson 	 *
57319baf839SRobert Olsson 	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
57419baf839SRobert Olsson 	 *      new_child_length;
57519baf839SRobert Olsson 	 * if (new_fill_factor >= inflate_threshold)
57619baf839SRobert Olsson 	 *
57719baf839SRobert Olsson 	 * ...and so on, tho it would mess up the while () loop.
57819baf839SRobert Olsson 	 *
57919baf839SRobert Olsson 	 * anyway,
58019baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
58119baf839SRobert Olsson 	 *      inflate_threshold
58219baf839SRobert Olsson 	 *
58319baf839SRobert Olsson 	 * avoid a division:
58419baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
58519baf839SRobert Olsson 	 *      inflate_threshold * new_child_length
58619baf839SRobert Olsson 	 *
58719baf839SRobert Olsson 	 * expand not_to_be_doubled and to_be_doubled, and shorten:
58819baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
58919baf839SRobert Olsson 	 *    tn->full_children) >= inflate_threshold * new_child_length
59019baf839SRobert Olsson 	 *
59119baf839SRobert Olsson 	 * expand new_child_length:
59219baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
59319baf839SRobert Olsson 	 *    tn->full_children) >=
59419baf839SRobert Olsson 	 *      inflate_threshold * tnode_child_length(tn) * 2
59519baf839SRobert Olsson 	 *
59619baf839SRobert Olsson 	 * shorten again:
59719baf839SRobert Olsson 	 * 50 * (tn->full_children + tnode_child_length(tn) -
59819baf839SRobert Olsson 	 *    tn->empty_children) >= inflate_threshold *
59919baf839SRobert Olsson 	 *    tnode_child_length(tn)
60019baf839SRobert Olsson 	 *
60119baf839SRobert Olsson 	 */
60219baf839SRobert Olsson 
60319baf839SRobert Olsson 	check_tnode(tn);
60419baf839SRobert Olsson 
605e6308be8SRobert Olsson 	/* Keep root node larger  */
606e6308be8SRobert Olsson 
60780b71b80SJens Låås 	if (!node_parent((struct node*) tn)) {
60880b71b80SJens Låås 		inflate_threshold_use = inflate_threshold_root;
60980b71b80SJens Låås 		halve_threshold_use = halve_threshold_root;
61080b71b80SJens Låås 	}
61180b71b80SJens Låås 	else {
612e6308be8SRobert Olsson 		inflate_threshold_use = inflate_threshold;
61380b71b80SJens Låås 		halve_threshold_use = halve_threshold;
61480b71b80SJens Låås 	}
615e6308be8SRobert Olsson 
61680b71b80SJens Låås 	max_work = MAX_WORK;
61780b71b80SJens Låås 	while ((tn->full_children > 0 &&  max_work-- &&
618a07f5f50SStephen Hemminger 		50 * (tn->full_children + tnode_child_length(tn)
619a07f5f50SStephen Hemminger 		      - tn->empty_children)
620a07f5f50SStephen Hemminger 		>= inflate_threshold_use * tnode_child_length(tn))) {
62119baf839SRobert Olsson 
6222f80b3c8SRobert Olsson 		old_tn = tn;
6232f80b3c8SRobert Olsson 		tn = inflate(t, tn);
624a07f5f50SStephen Hemminger 
6252f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6262f80b3c8SRobert Olsson 			tn = old_tn;
6272f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6282f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6292f36895aSRobert Olsson #endif
6302f36895aSRobert Olsson 			break;
6312f36895aSRobert Olsson 		}
63219baf839SRobert Olsson 	}
63319baf839SRobert Olsson 
63419baf839SRobert Olsson 	check_tnode(tn);
63519baf839SRobert Olsson 
63680b71b80SJens Låås 	/* Return if at least one inflate is run */
63780b71b80SJens Låås 	if( max_work != MAX_WORK)
63880b71b80SJens Låås 		return (struct node *) tn;
63980b71b80SJens Låås 
64019baf839SRobert Olsson 	/*
64119baf839SRobert Olsson 	 * Halve as long as the number of empty children in this
64219baf839SRobert Olsson 	 * node is above threshold.
64319baf839SRobert Olsson 	 */
6442f36895aSRobert Olsson 
64580b71b80SJens Låås 	max_work = MAX_WORK;
64680b71b80SJens Låås 	while (tn->bits > 1 &&  max_work-- &&
64719baf839SRobert Olsson 	       100 * (tnode_child_length(tn) - tn->empty_children) <
648e6308be8SRobert Olsson 	       halve_threshold_use * tnode_child_length(tn)) {
64919baf839SRobert Olsson 
6502f80b3c8SRobert Olsson 		old_tn = tn;
6512f80b3c8SRobert Olsson 		tn = halve(t, tn);
6522f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6532f80b3c8SRobert Olsson 			tn = old_tn;
6542f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6552f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6562f36895aSRobert Olsson #endif
6572f36895aSRobert Olsson 			break;
6582f36895aSRobert Olsson 		}
6592f36895aSRobert Olsson 	}
6602f36895aSRobert Olsson 
66119baf839SRobert Olsson 
66219baf839SRobert Olsson 	/* Only one child remains */
66380b71b80SJens Låås 	if (tn->empty_children == tnode_child_length(tn) - 1) {
66480b71b80SJens Låås one_child:
66519baf839SRobert Olsson 		for (i = 0; i < tnode_child_length(tn); i++) {
66691b9a277SOlof Johansson 			struct node *n;
66719baf839SRobert Olsson 
66891b9a277SOlof Johansson 			n = tn->child[i];
6692373ce1cSRobert Olsson 			if (!n)
67091b9a277SOlof Johansson 				continue;
67191b9a277SOlof Johansson 
67291b9a277SOlof Johansson 			/* compress one level */
67391b9a277SOlof Johansson 
67406801916SStephen Hemminger 			node_set_parent(n, NULL);
675e0f7cb8cSJarek Poplawski 			tnode_free_safe(tn);
67619baf839SRobert Olsson 			return n;
67719baf839SRobert Olsson 		}
67880b71b80SJens Låås 	}
67919baf839SRobert Olsson 	return (struct node *) tn;
68019baf839SRobert Olsson }
68119baf839SRobert Olsson 
6822f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn)
68319baf839SRobert Olsson {
68419baf839SRobert Olsson 	struct tnode *oldtnode = tn;
68519baf839SRobert Olsson 	int olen = tnode_child_length(tn);
68619baf839SRobert Olsson 	int i;
68719baf839SRobert Olsson 
6880c7770c7SStephen Hemminger 	pr_debug("In inflate\n");
68919baf839SRobert Olsson 
69019baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
69119baf839SRobert Olsson 
6922f80b3c8SRobert Olsson 	if (!tn)
6932f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
6942f36895aSRobert Olsson 
6952f36895aSRobert Olsson 	/*
6962f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
6972f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
6982f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and  inflate
6992f36895aSRobert Olsson 	 * of tnode is ignored.
7002f36895aSRobert Olsson 	 */
7012f36895aSRobert Olsson 
7022f36895aSRobert Olsson 	for (i = 0; i < olen; i++) {
703a07f5f50SStephen Hemminger 		struct tnode *inode;
7042f36895aSRobert Olsson 
705a07f5f50SStephen Hemminger 		inode = (struct tnode *) tnode_get_child(oldtnode, i);
7062f36895aSRobert Olsson 		if (inode &&
7072f36895aSRobert Olsson 		    IS_TNODE(inode) &&
7082f36895aSRobert Olsson 		    inode->pos == oldtnode->pos + oldtnode->bits &&
7092f36895aSRobert Olsson 		    inode->bits > 1) {
7102f36895aSRobert Olsson 			struct tnode *left, *right;
711ab66b4a7SStephen Hemminger 			t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
7122f36895aSRobert Olsson 
7132f36895aSRobert Olsson 			left = tnode_new(inode->key&(~m), inode->pos + 1,
7142f36895aSRobert Olsson 					 inode->bits - 1);
7152f80b3c8SRobert Olsson 			if (!left)
7162f80b3c8SRobert Olsson 				goto nomem;
7172f36895aSRobert Olsson 
7182f36895aSRobert Olsson 			right = tnode_new(inode->key|m, inode->pos + 1,
7192f36895aSRobert Olsson 					  inode->bits - 1);
7202f36895aSRobert Olsson 
7212f36895aSRobert Olsson 			if (!right) {
7222f80b3c8SRobert Olsson 				tnode_free(left);
7232f80b3c8SRobert Olsson 				goto nomem;
7242f36895aSRobert Olsson 			}
7252f36895aSRobert Olsson 
7262f36895aSRobert Olsson 			put_child(t, tn, 2*i, (struct node *) left);
7272f36895aSRobert Olsson 			put_child(t, tn, 2*i+1, (struct node *) right);
7282f36895aSRobert Olsson 		}
7292f36895aSRobert Olsson 	}
7302f36895aSRobert Olsson 
73119baf839SRobert Olsson 	for (i = 0; i < olen; i++) {
732c95aaf9aSStephen Hemminger 		struct tnode *inode;
73319baf839SRobert Olsson 		struct node *node = tnode_get_child(oldtnode, i);
73491b9a277SOlof Johansson 		struct tnode *left, *right;
73591b9a277SOlof Johansson 		int size, j;
73619baf839SRobert Olsson 
73719baf839SRobert Olsson 		/* An empty child */
73819baf839SRobert Olsson 		if (node == NULL)
73919baf839SRobert Olsson 			continue;
74019baf839SRobert Olsson 
74119baf839SRobert Olsson 		/* A leaf or an internal node with skipped bits */
74219baf839SRobert Olsson 
74319baf839SRobert Olsson 		if (IS_LEAF(node) || ((struct tnode *) node)->pos >
74419baf839SRobert Olsson 		   tn->pos + tn->bits - 1) {
745a07f5f50SStephen Hemminger 			if (tkey_extract_bits(node->key,
746a07f5f50SStephen Hemminger 					      oldtnode->pos + oldtnode->bits,
74719baf839SRobert Olsson 					      1) == 0)
74819baf839SRobert Olsson 				put_child(t, tn, 2*i, node);
74919baf839SRobert Olsson 			else
75019baf839SRobert Olsson 				put_child(t, tn, 2*i+1, node);
75119baf839SRobert Olsson 			continue;
75219baf839SRobert Olsson 		}
75319baf839SRobert Olsson 
75419baf839SRobert Olsson 		/* An internal node with two children */
75519baf839SRobert Olsson 		inode = (struct tnode *) node;
75619baf839SRobert Olsson 
75719baf839SRobert Olsson 		if (inode->bits == 1) {
75819baf839SRobert Olsson 			put_child(t, tn, 2*i, inode->child[0]);
75919baf839SRobert Olsson 			put_child(t, tn, 2*i+1, inode->child[1]);
76019baf839SRobert Olsson 
761e0f7cb8cSJarek Poplawski 			tnode_free_safe(inode);
76291b9a277SOlof Johansson 			continue;
76319baf839SRobert Olsson 		}
76419baf839SRobert Olsson 
76519baf839SRobert Olsson 		/* An internal node with more than two children */
76619baf839SRobert Olsson 
76719baf839SRobert Olsson 		/* We will replace this node 'inode' with two new
76819baf839SRobert Olsson 		 * ones, 'left' and 'right', each with half of the
76919baf839SRobert Olsson 		 * original children. The two new nodes will have
77019baf839SRobert Olsson 		 * a position one bit further down the key and this
77119baf839SRobert Olsson 		 * means that the "significant" part of their keys
77219baf839SRobert Olsson 		 * (see the discussion near the top of this file)
77319baf839SRobert Olsson 		 * will differ by one bit, which will be "0" in
77419baf839SRobert Olsson 		 * left's key and "1" in right's key. Since we are
77519baf839SRobert Olsson 		 * moving the key position by one step, the bit that
77619baf839SRobert Olsson 		 * we are moving away from - the bit at position
77719baf839SRobert Olsson 		 * (inode->pos) - is the one that will differ between
77819baf839SRobert Olsson 		 * left and right. So... we synthesize that bit in the
77919baf839SRobert Olsson 		 * two  new keys.
78019baf839SRobert Olsson 		 * The mask 'm' below will be a single "one" bit at
78119baf839SRobert Olsson 		 * the position (inode->pos)
78219baf839SRobert Olsson 		 */
78319baf839SRobert Olsson 
78419baf839SRobert Olsson 		/* Use the old key, but set the new significant
78519baf839SRobert Olsson 		 *   bit to zero.
78619baf839SRobert Olsson 		 */
7872f36895aSRobert Olsson 
7882f36895aSRobert Olsson 		left = (struct tnode *) tnode_get_child(tn, 2*i);
7892f36895aSRobert Olsson 		put_child(t, tn, 2*i, NULL);
79019baf839SRobert Olsson 
79191b9a277SOlof Johansson 		BUG_ON(!left);
79219baf839SRobert Olsson 
7932f36895aSRobert Olsson 		right = (struct tnode *) tnode_get_child(tn, 2*i+1);
7942f36895aSRobert Olsson 		put_child(t, tn, 2*i+1, NULL);
79519baf839SRobert Olsson 
79691b9a277SOlof Johansson 		BUG_ON(!right);
79719baf839SRobert Olsson 
79819baf839SRobert Olsson 		size = tnode_child_length(left);
79919baf839SRobert Olsson 		for (j = 0; j < size; j++) {
80019baf839SRobert Olsson 			put_child(t, left, j, inode->child[j]);
80119baf839SRobert Olsson 			put_child(t, right, j, inode->child[j + size]);
80219baf839SRobert Olsson 		}
80319baf839SRobert Olsson 		put_child(t, tn, 2*i, resize(t, left));
80419baf839SRobert Olsson 		put_child(t, tn, 2*i+1, resize(t, right));
80519baf839SRobert Olsson 
806e0f7cb8cSJarek Poplawski 		tnode_free_safe(inode);
80719baf839SRobert Olsson 	}
808e0f7cb8cSJarek Poplawski 	tnode_free_safe(oldtnode);
80919baf839SRobert Olsson 	return tn;
8102f80b3c8SRobert Olsson nomem:
8112f80b3c8SRobert Olsson 	{
8122f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
8132f80b3c8SRobert Olsson 		int j;
8142f80b3c8SRobert Olsson 
8152f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
8162f80b3c8SRobert Olsson 			if (tn->child[j])
8172f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
8182f80b3c8SRobert Olsson 
8192f80b3c8SRobert Olsson 		tnode_free(tn);
8202f80b3c8SRobert Olsson 
8212f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8222f80b3c8SRobert Olsson 	}
82319baf839SRobert Olsson }
82419baf839SRobert Olsson 
8252f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn)
82619baf839SRobert Olsson {
82719baf839SRobert Olsson 	struct tnode *oldtnode = tn;
82819baf839SRobert Olsson 	struct node *left, *right;
82919baf839SRobert Olsson 	int i;
83019baf839SRobert Olsson 	int olen = tnode_child_length(tn);
83119baf839SRobert Olsson 
8320c7770c7SStephen Hemminger 	pr_debug("In halve\n");
83319baf839SRobert Olsson 
83419baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
83519baf839SRobert Olsson 
8362f80b3c8SRobert Olsson 	if (!tn)
8372f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8382f36895aSRobert Olsson 
8392f36895aSRobert Olsson 	/*
8402f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
8412f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
8422f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and halve
8432f36895aSRobert Olsson 	 * of tnode is ignored.
8442f36895aSRobert Olsson 	 */
8452f36895aSRobert Olsson 
8462f36895aSRobert Olsson 	for (i = 0; i < olen; i += 2) {
8472f36895aSRobert Olsson 		left = tnode_get_child(oldtnode, i);
8482f36895aSRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
8492f36895aSRobert Olsson 
8502f36895aSRobert Olsson 		/* Two nonempty children */
8512f36895aSRobert Olsson 		if (left && right) {
8522f80b3c8SRobert Olsson 			struct tnode *newn;
8532f36895aSRobert Olsson 
8542f80b3c8SRobert Olsson 			newn = tnode_new(left->key, tn->pos + tn->bits, 1);
8552f80b3c8SRobert Olsson 
8562f80b3c8SRobert Olsson 			if (!newn)
8572f80b3c8SRobert Olsson 				goto nomem;
8582f80b3c8SRobert Olsson 
8592f80b3c8SRobert Olsson 			put_child(t, tn, i/2, (struct node *)newn);
8602f36895aSRobert Olsson 		}
8612f36895aSRobert Olsson 
8622f36895aSRobert Olsson 	}
86319baf839SRobert Olsson 
86419baf839SRobert Olsson 	for (i = 0; i < olen; i += 2) {
86591b9a277SOlof Johansson 		struct tnode *newBinNode;
86691b9a277SOlof Johansson 
86719baf839SRobert Olsson 		left = tnode_get_child(oldtnode, i);
86819baf839SRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
86919baf839SRobert Olsson 
87019baf839SRobert Olsson 		/* At least one of the children is empty */
87119baf839SRobert Olsson 		if (left == NULL) {
87219baf839SRobert Olsson 			if (right == NULL)    /* Both are empty */
87319baf839SRobert Olsson 				continue;
87419baf839SRobert Olsson 			put_child(t, tn, i/2, right);
87591b9a277SOlof Johansson 			continue;
87691b9a277SOlof Johansson 		}
87791b9a277SOlof Johansson 
87891b9a277SOlof Johansson 		if (right == NULL) {
87919baf839SRobert Olsson 			put_child(t, tn, i/2, left);
88091b9a277SOlof Johansson 			continue;
88191b9a277SOlof Johansson 		}
88219baf839SRobert Olsson 
88319baf839SRobert Olsson 		/* Two nonempty children */
88491b9a277SOlof Johansson 		newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
8852f36895aSRobert Olsson 		put_child(t, tn, i/2, NULL);
88619baf839SRobert Olsson 		put_child(t, newBinNode, 0, left);
88719baf839SRobert Olsson 		put_child(t, newBinNode, 1, right);
88819baf839SRobert Olsson 		put_child(t, tn, i/2, resize(t, newBinNode));
88919baf839SRobert Olsson 	}
890e0f7cb8cSJarek Poplawski 	tnode_free_safe(oldtnode);
89119baf839SRobert Olsson 	return tn;
8922f80b3c8SRobert Olsson nomem:
8932f80b3c8SRobert Olsson 	{
8942f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
8952f80b3c8SRobert Olsson 		int j;
8962f80b3c8SRobert Olsson 
8972f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
8982f80b3c8SRobert Olsson 			if (tn->child[j])
8992f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
9002f80b3c8SRobert Olsson 
9012f80b3c8SRobert Olsson 		tnode_free(tn);
9022f80b3c8SRobert Olsson 
9032f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
9042f80b3c8SRobert Olsson 	}
90519baf839SRobert Olsson }
90619baf839SRobert Olsson 
907772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines
9082373ce1cSRobert Olsson  via get_fa_head and dump */
9092373ce1cSRobert Olsson 
910772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
91119baf839SRobert Olsson {
912772cb712SRobert Olsson 	struct hlist_head *head = &l->list;
91319baf839SRobert Olsson 	struct hlist_node *node;
91419baf839SRobert Olsson 	struct leaf_info *li;
91519baf839SRobert Olsson 
9162373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, head, hlist)
91719baf839SRobert Olsson 		if (li->plen == plen)
91819baf839SRobert Olsson 			return li;
91991b9a277SOlof Johansson 
92019baf839SRobert Olsson 	return NULL;
92119baf839SRobert Olsson }
92219baf839SRobert Olsson 
92319baf839SRobert Olsson static inline struct list_head *get_fa_head(struct leaf *l, int plen)
92419baf839SRobert Olsson {
925772cb712SRobert Olsson 	struct leaf_info *li = find_leaf_info(l, plen);
92619baf839SRobert Olsson 
92791b9a277SOlof Johansson 	if (!li)
92891b9a277SOlof Johansson 		return NULL;
92919baf839SRobert Olsson 
93091b9a277SOlof Johansson 	return &li->falh;
93119baf839SRobert Olsson }
93219baf839SRobert Olsson 
93319baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
93419baf839SRobert Olsson {
93519baf839SRobert Olsson 	struct leaf_info *li = NULL, *last = NULL;
93691b9a277SOlof Johansson 	struct hlist_node *node;
93719baf839SRobert Olsson 
93891b9a277SOlof Johansson 	if (hlist_empty(head)) {
9392373ce1cSRobert Olsson 		hlist_add_head_rcu(&new->hlist, head);
94091b9a277SOlof Johansson 	} else {
94191b9a277SOlof Johansson 		hlist_for_each_entry(li, node, head, hlist) {
94219baf839SRobert Olsson 			if (new->plen > li->plen)
94319baf839SRobert Olsson 				break;
94419baf839SRobert Olsson 
94519baf839SRobert Olsson 			last = li;
94619baf839SRobert Olsson 		}
94719baf839SRobert Olsson 		if (last)
9482373ce1cSRobert Olsson 			hlist_add_after_rcu(&last->hlist, &new->hlist);
94919baf839SRobert Olsson 		else
9502373ce1cSRobert Olsson 			hlist_add_before_rcu(&new->hlist, &li->hlist);
95119baf839SRobert Olsson 	}
95219baf839SRobert Olsson }
95319baf839SRobert Olsson 
9542373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
9552373ce1cSRobert Olsson 
95619baf839SRobert Olsson static struct leaf *
95719baf839SRobert Olsson fib_find_node(struct trie *t, u32 key)
95819baf839SRobert Olsson {
95919baf839SRobert Olsson 	int pos;
96019baf839SRobert Olsson 	struct tnode *tn;
96119baf839SRobert Olsson 	struct node *n;
96219baf839SRobert Olsson 
96319baf839SRobert Olsson 	pos = 0;
9642373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
96519baf839SRobert Olsson 
96619baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
96719baf839SRobert Olsson 		tn = (struct tnode *) n;
96819baf839SRobert Olsson 
96919baf839SRobert Olsson 		check_tnode(tn);
97019baf839SRobert Olsson 
97119baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
97219baf839SRobert Olsson 			pos = tn->pos + tn->bits;
973a07f5f50SStephen Hemminger 			n = tnode_get_child_rcu(tn,
974a07f5f50SStephen Hemminger 						tkey_extract_bits(key,
975a07f5f50SStephen Hemminger 								  tn->pos,
976a07f5f50SStephen Hemminger 								  tn->bits));
97791b9a277SOlof Johansson 		} else
97819baf839SRobert Olsson 			break;
97919baf839SRobert Olsson 	}
98019baf839SRobert Olsson 	/* Case we have found a leaf. Compare prefixes */
98119baf839SRobert Olsson 
98291b9a277SOlof Johansson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
98391b9a277SOlof Johansson 		return (struct leaf *)n;
98491b9a277SOlof Johansson 
98519baf839SRobert Olsson 	return NULL;
98619baf839SRobert Olsson }
98719baf839SRobert Olsson 
9887b85576dSJarek Poplawski static void trie_rebalance(struct trie *t, struct tnode *tn)
98919baf839SRobert Olsson {
99019baf839SRobert Olsson 	int wasfull;
9913ed18d76SRobert Olsson 	t_key cindex, key;
99206801916SStephen Hemminger 	struct tnode *tp;
99319baf839SRobert Olsson 
9943ed18d76SRobert Olsson 	key = tn->key;
9953ed18d76SRobert Olsson 
99606801916SStephen Hemminger 	while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
99719baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
99819baf839SRobert Olsson 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
99919baf839SRobert Olsson 		tn = (struct tnode *) resize(t, (struct tnode *)tn);
1000a07f5f50SStephen Hemminger 
1001a07f5f50SStephen Hemminger 		tnode_put_child_reorg((struct tnode *)tp, cindex,
1002a07f5f50SStephen Hemminger 				      (struct node *)tn, wasfull);
100319baf839SRobert Olsson 
100406801916SStephen Hemminger 		tp = node_parent((struct node *) tn);
1005008440e3SJarek Poplawski 		if (!tp)
1006008440e3SJarek Poplawski 			rcu_assign_pointer(t->trie, (struct node *)tn);
1007008440e3SJarek Poplawski 
1008e0f7cb8cSJarek Poplawski 		tnode_free_flush();
100906801916SStephen Hemminger 		if (!tp)
101019baf839SRobert Olsson 			break;
101106801916SStephen Hemminger 		tn = tp;
101219baf839SRobert Olsson 	}
101306801916SStephen Hemminger 
101419baf839SRobert Olsson 	/* Handle last (top) tnode */
10157b85576dSJarek Poplawski 	if (IS_TNODE(tn))
101619baf839SRobert Olsson 		tn = (struct tnode *)resize(t, (struct tnode *)tn);
101719baf839SRobert Olsson 
10187b85576dSJarek Poplawski 	rcu_assign_pointer(t->trie, (struct node *)tn);
10197b85576dSJarek Poplawski 	tnode_free_flush();
10207b85576dSJarek Poplawski 
10217b85576dSJarek Poplawski 	return;
102219baf839SRobert Olsson }
102319baf839SRobert Olsson 
10242373ce1cSRobert Olsson /* only used from updater-side */
10252373ce1cSRobert Olsson 
1026fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
102719baf839SRobert Olsson {
102819baf839SRobert Olsson 	int pos, newpos;
102919baf839SRobert Olsson 	struct tnode *tp = NULL, *tn = NULL;
103019baf839SRobert Olsson 	struct node *n;
103119baf839SRobert Olsson 	struct leaf *l;
103219baf839SRobert Olsson 	int missbit;
103319baf839SRobert Olsson 	struct list_head *fa_head = NULL;
103419baf839SRobert Olsson 	struct leaf_info *li;
103519baf839SRobert Olsson 	t_key cindex;
103619baf839SRobert Olsson 
103719baf839SRobert Olsson 	pos = 0;
103819baf839SRobert Olsson 	n = t->trie;
103919baf839SRobert Olsson 
104019baf839SRobert Olsson 	/* If we point to NULL, stop. Either the tree is empty and we should
104119baf839SRobert Olsson 	 * just put a new leaf in if, or we have reached an empty child slot,
104219baf839SRobert Olsson 	 * and we should just put our new leaf in that.
104319baf839SRobert Olsson 	 * If we point to a T_TNODE, check if it matches our key. Note that
104419baf839SRobert Olsson 	 * a T_TNODE might be skipping any number of bits - its 'pos' need
104519baf839SRobert Olsson 	 * not be the parent's 'pos'+'bits'!
104619baf839SRobert Olsson 	 *
104719baf839SRobert Olsson 	 * If it does match the current key, get pos/bits from it, extract
104819baf839SRobert Olsson 	 * the index from our key, push the T_TNODE and walk the tree.
104919baf839SRobert Olsson 	 *
105019baf839SRobert Olsson 	 * If it doesn't, we have to replace it with a new T_TNODE.
105119baf839SRobert Olsson 	 *
105219baf839SRobert Olsson 	 * If we point to a T_LEAF, it might or might not have the same key
105319baf839SRobert Olsson 	 * as we do. If it does, just change the value, update the T_LEAF's
105419baf839SRobert Olsson 	 * value, and return it.
105519baf839SRobert Olsson 	 * If it doesn't, we need to replace it with a T_TNODE.
105619baf839SRobert Olsson 	 */
105719baf839SRobert Olsson 
105819baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
105919baf839SRobert Olsson 		tn = (struct tnode *) n;
106019baf839SRobert Olsson 
106119baf839SRobert Olsson 		check_tnode(tn);
106219baf839SRobert Olsson 
106319baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
106419baf839SRobert Olsson 			tp = tn;
106519baf839SRobert Olsson 			pos = tn->pos + tn->bits;
1066a07f5f50SStephen Hemminger 			n = tnode_get_child(tn,
1067a07f5f50SStephen Hemminger 					    tkey_extract_bits(key,
1068a07f5f50SStephen Hemminger 							      tn->pos,
1069a07f5f50SStephen Hemminger 							      tn->bits));
107019baf839SRobert Olsson 
107106801916SStephen Hemminger 			BUG_ON(n && node_parent(n) != tn);
107291b9a277SOlof Johansson 		} else
107319baf839SRobert Olsson 			break;
107419baf839SRobert Olsson 	}
107519baf839SRobert Olsson 
107619baf839SRobert Olsson 	/*
107719baf839SRobert Olsson 	 * n  ----> NULL, LEAF or TNODE
107819baf839SRobert Olsson 	 *
107919baf839SRobert Olsson 	 * tp is n's (parent) ----> NULL or TNODE
108019baf839SRobert Olsson 	 */
108119baf839SRobert Olsson 
108291b9a277SOlof Johansson 	BUG_ON(tp && IS_LEAF(tp));
108319baf839SRobert Olsson 
108419baf839SRobert Olsson 	/* Case 1: n is a leaf. Compare prefixes */
108519baf839SRobert Olsson 
108619baf839SRobert Olsson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1087c95aaf9aSStephen Hemminger 		l = (struct leaf *) n;
108819baf839SRobert Olsson 		li = leaf_info_new(plen);
108919baf839SRobert Olsson 
1090fea86ad8SStephen Hemminger 		if (!li)
1091fea86ad8SStephen Hemminger 			return NULL;
109219baf839SRobert Olsson 
109319baf839SRobert Olsson 		fa_head = &li->falh;
109419baf839SRobert Olsson 		insert_leaf_info(&l->list, li);
109519baf839SRobert Olsson 		goto done;
109619baf839SRobert Olsson 	}
109719baf839SRobert Olsson 	l = leaf_new();
109819baf839SRobert Olsson 
1099fea86ad8SStephen Hemminger 	if (!l)
1100fea86ad8SStephen Hemminger 		return NULL;
110119baf839SRobert Olsson 
110219baf839SRobert Olsson 	l->key = key;
110319baf839SRobert Olsson 	li = leaf_info_new(plen);
110419baf839SRobert Olsson 
1105f835e471SRobert Olsson 	if (!li) {
1106387a5487SStephen Hemminger 		free_leaf(l);
1107fea86ad8SStephen Hemminger 		return NULL;
1108f835e471SRobert Olsson 	}
110919baf839SRobert Olsson 
111019baf839SRobert Olsson 	fa_head = &li->falh;
111119baf839SRobert Olsson 	insert_leaf_info(&l->list, li);
111219baf839SRobert Olsson 
111319baf839SRobert Olsson 	if (t->trie && n == NULL) {
111491b9a277SOlof Johansson 		/* Case 2: n is NULL, and will just insert a new leaf */
111519baf839SRobert Olsson 
111606801916SStephen Hemminger 		node_set_parent((struct node *)l, tp);
111719baf839SRobert Olsson 
111819baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
111919baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
112091b9a277SOlof Johansson 	} else {
112119baf839SRobert Olsson 		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
112219baf839SRobert Olsson 		/*
112319baf839SRobert Olsson 		 *  Add a new tnode here
112419baf839SRobert Olsson 		 *  first tnode need some special handling
112519baf839SRobert Olsson 		 */
112619baf839SRobert Olsson 
112719baf839SRobert Olsson 		if (tp)
112819baf839SRobert Olsson 			pos = tp->pos+tp->bits;
112919baf839SRobert Olsson 		else
113019baf839SRobert Olsson 			pos = 0;
113191b9a277SOlof Johansson 
113219baf839SRobert Olsson 		if (n) {
113319baf839SRobert Olsson 			newpos = tkey_mismatch(key, pos, n->key);
113419baf839SRobert Olsson 			tn = tnode_new(n->key, newpos, 1);
113591b9a277SOlof Johansson 		} else {
113619baf839SRobert Olsson 			newpos = 0;
113719baf839SRobert Olsson 			tn = tnode_new(key, newpos, 1); /* First tnode */
113819baf839SRobert Olsson 		}
1139f835e471SRobert Olsson 
1140f835e471SRobert Olsson 		if (!tn) {
1141f835e471SRobert Olsson 			free_leaf_info(li);
1142387a5487SStephen Hemminger 			free_leaf(l);
1143fea86ad8SStephen Hemminger 			return NULL;
1144f835e471SRobert Olsson 		}
114519baf839SRobert Olsson 
114606801916SStephen Hemminger 		node_set_parent((struct node *)tn, tp);
114719baf839SRobert Olsson 
114819baf839SRobert Olsson 		missbit = tkey_extract_bits(key, newpos, 1);
114919baf839SRobert Olsson 		put_child(t, tn, missbit, (struct node *)l);
115019baf839SRobert Olsson 		put_child(t, tn, 1-missbit, n);
115119baf839SRobert Olsson 
115219baf839SRobert Olsson 		if (tp) {
115319baf839SRobert Olsson 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1154a07f5f50SStephen Hemminger 			put_child(t, (struct tnode *)tp, cindex,
1155a07f5f50SStephen Hemminger 				  (struct node *)tn);
115691b9a277SOlof Johansson 		} else {
1157a07f5f50SStephen Hemminger 			rcu_assign_pointer(t->trie, (struct node *)tn);
115819baf839SRobert Olsson 			tp = tn;
115919baf839SRobert Olsson 		}
116019baf839SRobert Olsson 	}
116191b9a277SOlof Johansson 
116291b9a277SOlof Johansson 	if (tp && tp->pos + tp->bits > 32)
1163a07f5f50SStephen Hemminger 		pr_warning("fib_trie"
1164a07f5f50SStephen Hemminger 			   " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
116519baf839SRobert Olsson 			   tp, tp->pos, tp->bits, key, plen);
116691b9a277SOlof Johansson 
116719baf839SRobert Olsson 	/* Rebalance the trie */
11682373ce1cSRobert Olsson 
11697b85576dSJarek Poplawski 	trie_rebalance(t, tp);
1170f835e471SRobert Olsson done:
117119baf839SRobert Olsson 	return fa_head;
117219baf839SRobert Olsson }
117319baf839SRobert Olsson 
1174d562f1f8SRobert Olsson /*
1175d562f1f8SRobert Olsson  * Caller must hold RTNL.
1176d562f1f8SRobert Olsson  */
11774e902c57SThomas Graf static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
117819baf839SRobert Olsson {
117919baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
118019baf839SRobert Olsson 	struct fib_alias *fa, *new_fa;
118119baf839SRobert Olsson 	struct list_head *fa_head = NULL;
118219baf839SRobert Olsson 	struct fib_info *fi;
11834e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
11844e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
118519baf839SRobert Olsson 	u32 key, mask;
118619baf839SRobert Olsson 	int err;
118719baf839SRobert Olsson 	struct leaf *l;
118819baf839SRobert Olsson 
118919baf839SRobert Olsson 	if (plen > 32)
119019baf839SRobert Olsson 		return -EINVAL;
119119baf839SRobert Olsson 
11924e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
119319baf839SRobert Olsson 
11942dfe55b4SPatrick McHardy 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
119519baf839SRobert Olsson 
119619baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
119719baf839SRobert Olsson 
119819baf839SRobert Olsson 	if (key & ~mask)
119919baf839SRobert Olsson 		return -EINVAL;
120019baf839SRobert Olsson 
120119baf839SRobert Olsson 	key = key & mask;
120219baf839SRobert Olsson 
12034e902c57SThomas Graf 	fi = fib_create_info(cfg);
12044e902c57SThomas Graf 	if (IS_ERR(fi)) {
12054e902c57SThomas Graf 		err = PTR_ERR(fi);
120619baf839SRobert Olsson 		goto err;
12074e902c57SThomas Graf 	}
120819baf839SRobert Olsson 
120919baf839SRobert Olsson 	l = fib_find_node(t, key);
121019baf839SRobert Olsson 	fa = NULL;
121119baf839SRobert Olsson 
121219baf839SRobert Olsson 	if (l) {
121319baf839SRobert Olsson 		fa_head = get_fa_head(l, plen);
121419baf839SRobert Olsson 		fa = fib_find_alias(fa_head, tos, fi->fib_priority);
121519baf839SRobert Olsson 	}
121619baf839SRobert Olsson 
121719baf839SRobert Olsson 	/* Now fa, if non-NULL, points to the first fib alias
121819baf839SRobert Olsson 	 * with the same keys [prefix,tos,priority], if such key already
121919baf839SRobert Olsson 	 * exists or to the node before which we will insert new one.
122019baf839SRobert Olsson 	 *
122119baf839SRobert Olsson 	 * If fa is NULL, we will need to allocate a new one and
122219baf839SRobert Olsson 	 * insert to the head of f.
122319baf839SRobert Olsson 	 *
122419baf839SRobert Olsson 	 * If f is NULL, no fib node matched the destination key
122519baf839SRobert Olsson 	 * and we need to allocate a new one of those as well.
122619baf839SRobert Olsson 	 */
122719baf839SRobert Olsson 
1228936f6f8eSJulian Anastasov 	if (fa && fa->fa_tos == tos &&
1229936f6f8eSJulian Anastasov 	    fa->fa_info->fib_priority == fi->fib_priority) {
1230936f6f8eSJulian Anastasov 		struct fib_alias *fa_first, *fa_match;
123119baf839SRobert Olsson 
123219baf839SRobert Olsson 		err = -EEXIST;
12334e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_EXCL)
123419baf839SRobert Olsson 			goto out;
123519baf839SRobert Olsson 
1236936f6f8eSJulian Anastasov 		/* We have 2 goals:
1237936f6f8eSJulian Anastasov 		 * 1. Find exact match for type, scope, fib_info to avoid
1238936f6f8eSJulian Anastasov 		 * duplicate routes
1239936f6f8eSJulian Anastasov 		 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1240936f6f8eSJulian Anastasov 		 */
1241936f6f8eSJulian Anastasov 		fa_match = NULL;
1242936f6f8eSJulian Anastasov 		fa_first = fa;
1243936f6f8eSJulian Anastasov 		fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1244936f6f8eSJulian Anastasov 		list_for_each_entry_continue(fa, fa_head, fa_list) {
1245936f6f8eSJulian Anastasov 			if (fa->fa_tos != tos)
1246936f6f8eSJulian Anastasov 				break;
1247936f6f8eSJulian Anastasov 			if (fa->fa_info->fib_priority != fi->fib_priority)
1248936f6f8eSJulian Anastasov 				break;
1249936f6f8eSJulian Anastasov 			if (fa->fa_type == cfg->fc_type &&
1250936f6f8eSJulian Anastasov 			    fa->fa_scope == cfg->fc_scope &&
1251936f6f8eSJulian Anastasov 			    fa->fa_info == fi) {
1252936f6f8eSJulian Anastasov 				fa_match = fa;
1253936f6f8eSJulian Anastasov 				break;
1254936f6f8eSJulian Anastasov 			}
1255936f6f8eSJulian Anastasov 		}
1256936f6f8eSJulian Anastasov 
12574e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_REPLACE) {
125819baf839SRobert Olsson 			struct fib_info *fi_drop;
125919baf839SRobert Olsson 			u8 state;
126019baf839SRobert Olsson 
1261936f6f8eSJulian Anastasov 			fa = fa_first;
1262936f6f8eSJulian Anastasov 			if (fa_match) {
1263936f6f8eSJulian Anastasov 				if (fa == fa_match)
1264936f6f8eSJulian Anastasov 					err = 0;
12656725033fSJoonwoo Park 				goto out;
1266936f6f8eSJulian Anastasov 			}
12672373ce1cSRobert Olsson 			err = -ENOBUFS;
1268e94b1766SChristoph Lameter 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
12692373ce1cSRobert Olsson 			if (new_fa == NULL)
12702373ce1cSRobert Olsson 				goto out;
127119baf839SRobert Olsson 
127219baf839SRobert Olsson 			fi_drop = fa->fa_info;
12732373ce1cSRobert Olsson 			new_fa->fa_tos = fa->fa_tos;
12742373ce1cSRobert Olsson 			new_fa->fa_info = fi;
12754e902c57SThomas Graf 			new_fa->fa_type = cfg->fc_type;
12764e902c57SThomas Graf 			new_fa->fa_scope = cfg->fc_scope;
127719baf839SRobert Olsson 			state = fa->fa_state;
1278936f6f8eSJulian Anastasov 			new_fa->fa_state = state & ~FA_S_ACCESSED;
127919baf839SRobert Olsson 
12802373ce1cSRobert Olsson 			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
12812373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
128219baf839SRobert Olsson 
128319baf839SRobert Olsson 			fib_release_info(fi_drop);
128419baf839SRobert Olsson 			if (state & FA_S_ACCESSED)
128576e6ebfbSDenis V. Lunev 				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
1286b8f55831SMilan Kocian 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1287b8f55831SMilan Kocian 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
128819baf839SRobert Olsson 
128919baf839SRobert Olsson 			goto succeeded;
129019baf839SRobert Olsson 		}
129119baf839SRobert Olsson 		/* Error if we find a perfect match which
129219baf839SRobert Olsson 		 * uses the same scope, type, and nexthop
129319baf839SRobert Olsson 		 * information.
129419baf839SRobert Olsson 		 */
1295936f6f8eSJulian Anastasov 		if (fa_match)
129619baf839SRobert Olsson 			goto out;
1297a07f5f50SStephen Hemminger 
12984e902c57SThomas Graf 		if (!(cfg->fc_nlflags & NLM_F_APPEND))
1299936f6f8eSJulian Anastasov 			fa = fa_first;
130019baf839SRobert Olsson 	}
130119baf839SRobert Olsson 	err = -ENOENT;
13024e902c57SThomas Graf 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
130319baf839SRobert Olsson 		goto out;
130419baf839SRobert Olsson 
130519baf839SRobert Olsson 	err = -ENOBUFS;
1306e94b1766SChristoph Lameter 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
130719baf839SRobert Olsson 	if (new_fa == NULL)
130819baf839SRobert Olsson 		goto out;
130919baf839SRobert Olsson 
131019baf839SRobert Olsson 	new_fa->fa_info = fi;
131119baf839SRobert Olsson 	new_fa->fa_tos = tos;
13124e902c57SThomas Graf 	new_fa->fa_type = cfg->fc_type;
13134e902c57SThomas Graf 	new_fa->fa_scope = cfg->fc_scope;
131419baf839SRobert Olsson 	new_fa->fa_state = 0;
131519baf839SRobert Olsson 	/*
131619baf839SRobert Olsson 	 * Insert new entry to the list.
131719baf839SRobert Olsson 	 */
131819baf839SRobert Olsson 
1319f835e471SRobert Olsson 	if (!fa_head) {
1320fea86ad8SStephen Hemminger 		fa_head = fib_insert_node(t, key, plen);
1321fea86ad8SStephen Hemminger 		if (unlikely(!fa_head)) {
1322fea86ad8SStephen Hemminger 			err = -ENOMEM;
1323f835e471SRobert Olsson 			goto out_free_new_fa;
1324f835e471SRobert Olsson 		}
1325fea86ad8SStephen Hemminger 	}
132619baf839SRobert Olsson 
13272373ce1cSRobert Olsson 	list_add_tail_rcu(&new_fa->fa_list,
13282373ce1cSRobert Olsson 			  (fa ? &fa->fa_list : fa_head));
132919baf839SRobert Olsson 
133076e6ebfbSDenis V. Lunev 	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
13314e902c57SThomas Graf 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1332b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
133319baf839SRobert Olsson succeeded:
133419baf839SRobert Olsson 	return 0;
1335f835e471SRobert Olsson 
1336f835e471SRobert Olsson out_free_new_fa:
1337f835e471SRobert Olsson 	kmem_cache_free(fn_alias_kmem, new_fa);
133819baf839SRobert Olsson out:
133919baf839SRobert Olsson 	fib_release_info(fi);
134091b9a277SOlof Johansson err:
134119baf839SRobert Olsson 	return err;
134219baf839SRobert Olsson }
134319baf839SRobert Olsson 
1344772cb712SRobert Olsson /* should be called with rcu_read_lock */
1345a07f5f50SStephen Hemminger static int check_leaf(struct trie *t, struct leaf *l,
1346a07f5f50SStephen Hemminger 		      t_key key,  const struct flowi *flp,
134706c74270SPatrick McHardy 		      struct fib_result *res)
134819baf839SRobert Olsson {
134919baf839SRobert Olsson 	struct leaf_info *li;
135019baf839SRobert Olsson 	struct hlist_head *hhead = &l->list;
135119baf839SRobert Olsson 	struct hlist_node *node;
135219baf839SRobert Olsson 
13532373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1354a07f5f50SStephen Hemminger 		int err;
1355a07f5f50SStephen Hemminger 		int plen = li->plen;
1356a07f5f50SStephen Hemminger 		__be32 mask = inet_make_mask(plen);
1357a07f5f50SStephen Hemminger 
1358888454c5SAl Viro 		if (l->key != (key & ntohl(mask)))
135919baf839SRobert Olsson 			continue;
136019baf839SRobert Olsson 
1361e204a345SRami Rosen 		err = fib_semantic_match(&li->falh, flp, res, plen);
1362a07f5f50SStephen Hemminger 
136319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
1364a07f5f50SStephen Hemminger 		if (err <= 0)
136519baf839SRobert Olsson 			t->stats.semantic_match_passed++;
1366a07f5f50SStephen Hemminger 		else
136719baf839SRobert Olsson 			t->stats.semantic_match_miss++;
136819baf839SRobert Olsson #endif
1369a07f5f50SStephen Hemminger 		if (err <= 0)
13702e655571SBen Hutchings 			return err;
137119baf839SRobert Olsson 	}
137219baf839SRobert Olsson 
13732e655571SBen Hutchings 	return 1;
1374a07f5f50SStephen Hemminger }
1375a07f5f50SStephen Hemminger 
1376a07f5f50SStephen Hemminger static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1377a07f5f50SStephen Hemminger 			  struct fib_result *res)
137819baf839SRobert Olsson {
137919baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
13802e655571SBen Hutchings 	int ret;
138119baf839SRobert Olsson 	struct node *n;
138219baf839SRobert Olsson 	struct tnode *pn;
138319baf839SRobert Olsson 	int pos, bits;
138419baf839SRobert Olsson 	t_key key = ntohl(flp->fl4_dst);
138519baf839SRobert Olsson 	int chopped_off;
138619baf839SRobert Olsson 	t_key cindex = 0;
138719baf839SRobert Olsson 	int current_prefix_length = KEYLENGTH;
138891b9a277SOlof Johansson 	struct tnode *cn;
138991b9a277SOlof Johansson 	t_key node_prefix, key_prefix, pref_mismatch;
139091b9a277SOlof Johansson 	int mp;
139191b9a277SOlof Johansson 
13922373ce1cSRobert Olsson 	rcu_read_lock();
139319baf839SRobert Olsson 
13942373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
139519baf839SRobert Olsson 	if (!n)
139619baf839SRobert Olsson 		goto failed;
139719baf839SRobert Olsson 
139819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
139919baf839SRobert Olsson 	t->stats.gets++;
140019baf839SRobert Olsson #endif
140119baf839SRobert Olsson 
140219baf839SRobert Olsson 	/* Just a leaf? */
140319baf839SRobert Olsson 	if (IS_LEAF(n)) {
14042e655571SBen Hutchings 		ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1405a07f5f50SStephen Hemminger 		goto found;
140619baf839SRobert Olsson 	}
1407a07f5f50SStephen Hemminger 
140819baf839SRobert Olsson 	pn = (struct tnode *) n;
140919baf839SRobert Olsson 	chopped_off = 0;
141019baf839SRobert Olsson 
141119baf839SRobert Olsson 	while (pn) {
141219baf839SRobert Olsson 		pos = pn->pos;
141319baf839SRobert Olsson 		bits = pn->bits;
141419baf839SRobert Olsson 
141519baf839SRobert Olsson 		if (!chopped_off)
1416ab66b4a7SStephen Hemminger 			cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1417ab66b4a7SStephen Hemminger 						   pos, bits);
141819baf839SRobert Olsson 
1419b902e573SJarek Poplawski 		n = tnode_get_child_rcu(pn, cindex);
142019baf839SRobert Olsson 
142119baf839SRobert Olsson 		if (n == NULL) {
142219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
142319baf839SRobert Olsson 			t->stats.null_node_hit++;
142419baf839SRobert Olsson #endif
142519baf839SRobert Olsson 			goto backtrace;
142619baf839SRobert Olsson 		}
142719baf839SRobert Olsson 
142891b9a277SOlof Johansson 		if (IS_LEAF(n)) {
14292e655571SBen Hutchings 			ret = check_leaf(t, (struct leaf *)n, key, flp, res);
14302e655571SBen Hutchings 			if (ret > 0)
143191b9a277SOlof Johansson 				goto backtrace;
1432a07f5f50SStephen Hemminger 			goto found;
143391b9a277SOlof Johansson 		}
143491b9a277SOlof Johansson 
143591b9a277SOlof Johansson 		cn = (struct tnode *)n;
143619baf839SRobert Olsson 
143719baf839SRobert Olsson 		/*
143819baf839SRobert Olsson 		 * It's a tnode, and we can do some extra checks here if we
143919baf839SRobert Olsson 		 * like, to avoid descending into a dead-end branch.
144019baf839SRobert Olsson 		 * This tnode is in the parent's child array at index
144119baf839SRobert Olsson 		 * key[p_pos..p_pos+p_bits] but potentially with some bits
144219baf839SRobert Olsson 		 * chopped off, so in reality the index may be just a
144319baf839SRobert Olsson 		 * subprefix, padded with zero at the end.
144419baf839SRobert Olsson 		 * We can also take a look at any skipped bits in this
144519baf839SRobert Olsson 		 * tnode - everything up to p_pos is supposed to be ok,
144619baf839SRobert Olsson 		 * and the non-chopped bits of the index (se previous
144719baf839SRobert Olsson 		 * paragraph) are also guaranteed ok, but the rest is
144819baf839SRobert Olsson 		 * considered unknown.
144919baf839SRobert Olsson 		 *
145019baf839SRobert Olsson 		 * The skipped bits are key[pos+bits..cn->pos].
145119baf839SRobert Olsson 		 */
145219baf839SRobert Olsson 
145319baf839SRobert Olsson 		/* If current_prefix_length < pos+bits, we are already doing
145419baf839SRobert Olsson 		 * actual prefix  matching, which means everything from
145519baf839SRobert Olsson 		 * pos+(bits-chopped_off) onward must be zero along some
145619baf839SRobert Olsson 		 * branch of this subtree - otherwise there is *no* valid
145719baf839SRobert Olsson 		 * prefix present. Here we can only check the skipped
145819baf839SRobert Olsson 		 * bits. Remember, since we have already indexed into the
145919baf839SRobert Olsson 		 * parent's child array, we know that the bits we chopped of
146019baf839SRobert Olsson 		 * *are* zero.
146119baf839SRobert Olsson 		 */
146219baf839SRobert Olsson 
1463a07f5f50SStephen Hemminger 		/* NOTA BENE: Checking only skipped bits
1464a07f5f50SStephen Hemminger 		   for the new node here */
146519baf839SRobert Olsson 
146619baf839SRobert Olsson 		if (current_prefix_length < pos+bits) {
146719baf839SRobert Olsson 			if (tkey_extract_bits(cn->key, current_prefix_length,
1468a07f5f50SStephen Hemminger 						cn->pos - current_prefix_length)
1469a07f5f50SStephen Hemminger 			    || !(cn->child[0]))
147019baf839SRobert Olsson 				goto backtrace;
147119baf839SRobert Olsson 		}
147219baf839SRobert Olsson 
147319baf839SRobert Olsson 		/*
147419baf839SRobert Olsson 		 * If chopped_off=0, the index is fully validated and we
147519baf839SRobert Olsson 		 * only need to look at the skipped bits for this, the new,
147619baf839SRobert Olsson 		 * tnode. What we actually want to do is to find out if
147719baf839SRobert Olsson 		 * these skipped bits match our key perfectly, or if we will
147819baf839SRobert Olsson 		 * have to count on finding a matching prefix further down,
147919baf839SRobert Olsson 		 * because if we do, we would like to have some way of
148019baf839SRobert Olsson 		 * verifying the existence of such a prefix at this point.
148119baf839SRobert Olsson 		 */
148219baf839SRobert Olsson 
148319baf839SRobert Olsson 		/* The only thing we can do at this point is to verify that
148419baf839SRobert Olsson 		 * any such matching prefix can indeed be a prefix to our
148519baf839SRobert Olsson 		 * key, and if the bits in the node we are inspecting that
148619baf839SRobert Olsson 		 * do not match our key are not ZERO, this cannot be true.
148719baf839SRobert Olsson 		 * Thus, find out where there is a mismatch (before cn->pos)
148819baf839SRobert Olsson 		 * and verify that all the mismatching bits are zero in the
148919baf839SRobert Olsson 		 * new tnode's key.
149019baf839SRobert Olsson 		 */
149119baf839SRobert Olsson 
1492a07f5f50SStephen Hemminger 		/*
1493a07f5f50SStephen Hemminger 		 * Note: We aren't very concerned about the piece of
1494a07f5f50SStephen Hemminger 		 * the key that precede pn->pos+pn->bits, since these
1495a07f5f50SStephen Hemminger 		 * have already been checked. The bits after cn->pos
1496a07f5f50SStephen Hemminger 		 * aren't checked since these are by definition
1497a07f5f50SStephen Hemminger 		 * "unknown" at this point. Thus, what we want to see
1498a07f5f50SStephen Hemminger 		 * is if we are about to enter the "prefix matching"
1499a07f5f50SStephen Hemminger 		 * state, and in that case verify that the skipped
1500a07f5f50SStephen Hemminger 		 * bits that will prevail throughout this subtree are
1501a07f5f50SStephen Hemminger 		 * zero, as they have to be if we are to find a
1502a07f5f50SStephen Hemminger 		 * matching prefix.
150319baf839SRobert Olsson 		 */
150419baf839SRobert Olsson 
1505ab66b4a7SStephen Hemminger 		node_prefix = mask_pfx(cn->key, cn->pos);
1506ab66b4a7SStephen Hemminger 		key_prefix = mask_pfx(key, cn->pos);
150719baf839SRobert Olsson 		pref_mismatch = key_prefix^node_prefix;
150819baf839SRobert Olsson 		mp = 0;
150919baf839SRobert Olsson 
1510a07f5f50SStephen Hemminger 		/*
1511a07f5f50SStephen Hemminger 		 * In short: If skipped bits in this node do not match
1512a07f5f50SStephen Hemminger 		 * the search key, enter the "prefix matching"
1513a07f5f50SStephen Hemminger 		 * state.directly.
151419baf839SRobert Olsson 		 */
151519baf839SRobert Olsson 		if (pref_mismatch) {
151619baf839SRobert Olsson 			while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
151719baf839SRobert Olsson 				mp++;
151819baf839SRobert Olsson 				pref_mismatch = pref_mismatch << 1;
151919baf839SRobert Olsson 			}
152019baf839SRobert Olsson 			key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
152119baf839SRobert Olsson 
152219baf839SRobert Olsson 			if (key_prefix != 0)
152319baf839SRobert Olsson 				goto backtrace;
152419baf839SRobert Olsson 
152519baf839SRobert Olsson 			if (current_prefix_length >= cn->pos)
152619baf839SRobert Olsson 				current_prefix_length = mp;
152719baf839SRobert Olsson 		}
1528a07f5f50SStephen Hemminger 
152919baf839SRobert Olsson 		pn = (struct tnode *)n; /* Descend */
153019baf839SRobert Olsson 		chopped_off = 0;
153119baf839SRobert Olsson 		continue;
153291b9a277SOlof Johansson 
153319baf839SRobert Olsson backtrace:
153419baf839SRobert Olsson 		chopped_off++;
153519baf839SRobert Olsson 
153619baf839SRobert Olsson 		/* As zero don't change the child key (cindex) */
1537a07f5f50SStephen Hemminger 		while ((chopped_off <= pn->bits)
1538a07f5f50SStephen Hemminger 		       && !(cindex & (1<<(chopped_off-1))))
153919baf839SRobert Olsson 			chopped_off++;
154019baf839SRobert Olsson 
154119baf839SRobert Olsson 		/* Decrease current_... with bits chopped off */
154219baf839SRobert Olsson 		if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1543a07f5f50SStephen Hemminger 			current_prefix_length = pn->pos + pn->bits
1544a07f5f50SStephen Hemminger 				- chopped_off;
154519baf839SRobert Olsson 
154619baf839SRobert Olsson 		/*
154719baf839SRobert Olsson 		 * Either we do the actual chop off according or if we have
154819baf839SRobert Olsson 		 * chopped off all bits in this tnode walk up to our parent.
154919baf839SRobert Olsson 		 */
155019baf839SRobert Olsson 
155191b9a277SOlof Johansson 		if (chopped_off <= pn->bits) {
155219baf839SRobert Olsson 			cindex &= ~(1 << (chopped_off-1));
155391b9a277SOlof Johansson 		} else {
1554b902e573SJarek Poplawski 			struct tnode *parent = node_parent_rcu((struct node *) pn);
155506801916SStephen Hemminger 			if (!parent)
155619baf839SRobert Olsson 				goto failed;
155719baf839SRobert Olsson 
155819baf839SRobert Olsson 			/* Get Child's index */
155906801916SStephen Hemminger 			cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
156006801916SStephen Hemminger 			pn = parent;
156119baf839SRobert Olsson 			chopped_off = 0;
156219baf839SRobert Olsson 
156319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
156419baf839SRobert Olsson 			t->stats.backtrack++;
156519baf839SRobert Olsson #endif
156619baf839SRobert Olsson 			goto backtrace;
156719baf839SRobert Olsson 		}
156819baf839SRobert Olsson 	}
156919baf839SRobert Olsson failed:
157019baf839SRobert Olsson 	ret = 1;
157119baf839SRobert Olsson found:
15722373ce1cSRobert Olsson 	rcu_read_unlock();
157319baf839SRobert Olsson 	return ret;
157419baf839SRobert Olsson }
157519baf839SRobert Olsson 
157619baf839SRobert Olsson /*
15779195bef7SStephen Hemminger  * Remove the leaf and return parent.
157819baf839SRobert Olsson  */
15799195bef7SStephen Hemminger static void trie_leaf_remove(struct trie *t, struct leaf *l)
15809195bef7SStephen Hemminger {
15819195bef7SStephen Hemminger 	struct tnode *tp = node_parent((struct node *) l);
15829195bef7SStephen Hemminger 
15839195bef7SStephen Hemminger 	pr_debug("entering trie_leaf_remove(%p)\n", l);
158419baf839SRobert Olsson 
158519baf839SRobert Olsson 	if (tp) {
15869195bef7SStephen Hemminger 		t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
158719baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, NULL);
15887b85576dSJarek Poplawski 		trie_rebalance(t, tp);
158991b9a277SOlof Johansson 	} else
15902373ce1cSRobert Olsson 		rcu_assign_pointer(t->trie, NULL);
159119baf839SRobert Olsson 
1592387a5487SStephen Hemminger 	free_leaf(l);
159319baf839SRobert Olsson }
159419baf839SRobert Olsson 
1595d562f1f8SRobert Olsson /*
1596d562f1f8SRobert Olsson  * Caller must hold RTNL.
1597d562f1f8SRobert Olsson  */
15984e902c57SThomas Graf static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
159919baf839SRobert Olsson {
160019baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
160119baf839SRobert Olsson 	u32 key, mask;
16024e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
16034e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
160419baf839SRobert Olsson 	struct fib_alias *fa, *fa_to_delete;
160519baf839SRobert Olsson 	struct list_head *fa_head;
160619baf839SRobert Olsson 	struct leaf *l;
160791b9a277SOlof Johansson 	struct leaf_info *li;
160891b9a277SOlof Johansson 
160919baf839SRobert Olsson 	if (plen > 32)
161019baf839SRobert Olsson 		return -EINVAL;
161119baf839SRobert Olsson 
16124e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
161319baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
161419baf839SRobert Olsson 
161519baf839SRobert Olsson 	if (key & ~mask)
161619baf839SRobert Olsson 		return -EINVAL;
161719baf839SRobert Olsson 
161819baf839SRobert Olsson 	key = key & mask;
161919baf839SRobert Olsson 	l = fib_find_node(t, key);
162019baf839SRobert Olsson 
162119baf839SRobert Olsson 	if (!l)
162219baf839SRobert Olsson 		return -ESRCH;
162319baf839SRobert Olsson 
162419baf839SRobert Olsson 	fa_head = get_fa_head(l, plen);
162519baf839SRobert Olsson 	fa = fib_find_alias(fa_head, tos, 0);
162619baf839SRobert Olsson 
162719baf839SRobert Olsson 	if (!fa)
162819baf839SRobert Olsson 		return -ESRCH;
162919baf839SRobert Olsson 
16300c7770c7SStephen Hemminger 	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
163119baf839SRobert Olsson 
163219baf839SRobert Olsson 	fa_to_delete = NULL;
1633936f6f8eSJulian Anastasov 	fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1634936f6f8eSJulian Anastasov 	list_for_each_entry_continue(fa, fa_head, fa_list) {
163519baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
163619baf839SRobert Olsson 
163719baf839SRobert Olsson 		if (fa->fa_tos != tos)
163819baf839SRobert Olsson 			break;
163919baf839SRobert Olsson 
16404e902c57SThomas Graf 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
16414e902c57SThomas Graf 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
16424e902c57SThomas Graf 		     fa->fa_scope == cfg->fc_scope) &&
16434e902c57SThomas Graf 		    (!cfg->fc_protocol ||
16444e902c57SThomas Graf 		     fi->fib_protocol == cfg->fc_protocol) &&
16454e902c57SThomas Graf 		    fib_nh_match(cfg, fi) == 0) {
164619baf839SRobert Olsson 			fa_to_delete = fa;
164719baf839SRobert Olsson 			break;
164819baf839SRobert Olsson 		}
164919baf839SRobert Olsson 	}
165019baf839SRobert Olsson 
165191b9a277SOlof Johansson 	if (!fa_to_delete)
165291b9a277SOlof Johansson 		return -ESRCH;
165319baf839SRobert Olsson 
165419baf839SRobert Olsson 	fa = fa_to_delete;
16554e902c57SThomas Graf 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1656b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
165719baf839SRobert Olsson 
165819baf839SRobert Olsson 	l = fib_find_node(t, key);
1659772cb712SRobert Olsson 	li = find_leaf_info(l, plen);
166019baf839SRobert Olsson 
16612373ce1cSRobert Olsson 	list_del_rcu(&fa->fa_list);
166219baf839SRobert Olsson 
166319baf839SRobert Olsson 	if (list_empty(fa_head)) {
16642373ce1cSRobert Olsson 		hlist_del_rcu(&li->hlist);
166519baf839SRobert Olsson 		free_leaf_info(li);
16662373ce1cSRobert Olsson 	}
166719baf839SRobert Olsson 
166819baf839SRobert Olsson 	if (hlist_empty(&l->list))
16699195bef7SStephen Hemminger 		trie_leaf_remove(t, l);
167019baf839SRobert Olsson 
167119baf839SRobert Olsson 	if (fa->fa_state & FA_S_ACCESSED)
167276e6ebfbSDenis V. Lunev 		rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
167319baf839SRobert Olsson 
16742373ce1cSRobert Olsson 	fib_release_info(fa->fa_info);
16752373ce1cSRobert Olsson 	alias_free_mem_rcu(fa);
167619baf839SRobert Olsson 	return 0;
167719baf839SRobert Olsson }
167819baf839SRobert Olsson 
1679ef3660ceSStephen Hemminger static int trie_flush_list(struct list_head *head)
168019baf839SRobert Olsson {
168119baf839SRobert Olsson 	struct fib_alias *fa, *fa_node;
168219baf839SRobert Olsson 	int found = 0;
168319baf839SRobert Olsson 
168419baf839SRobert Olsson 	list_for_each_entry_safe(fa, fa_node, head, fa_list) {
168519baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
168619baf839SRobert Olsson 
168719baf839SRobert Olsson 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
16882373ce1cSRobert Olsson 			list_del_rcu(&fa->fa_list);
16892373ce1cSRobert Olsson 			fib_release_info(fa->fa_info);
16902373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
169119baf839SRobert Olsson 			found++;
169219baf839SRobert Olsson 		}
169319baf839SRobert Olsson 	}
169419baf839SRobert Olsson 	return found;
169519baf839SRobert Olsson }
169619baf839SRobert Olsson 
1697ef3660ceSStephen Hemminger static int trie_flush_leaf(struct leaf *l)
169819baf839SRobert Olsson {
169919baf839SRobert Olsson 	int found = 0;
170019baf839SRobert Olsson 	struct hlist_head *lih = &l->list;
170119baf839SRobert Olsson 	struct hlist_node *node, *tmp;
170219baf839SRobert Olsson 	struct leaf_info *li = NULL;
170319baf839SRobert Olsson 
170419baf839SRobert Olsson 	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1705ef3660ceSStephen Hemminger 		found += trie_flush_list(&li->falh);
170619baf839SRobert Olsson 
170719baf839SRobert Olsson 		if (list_empty(&li->falh)) {
17082373ce1cSRobert Olsson 			hlist_del_rcu(&li->hlist);
170919baf839SRobert Olsson 			free_leaf_info(li);
171019baf839SRobert Olsson 		}
171119baf839SRobert Olsson 	}
171219baf839SRobert Olsson 	return found;
171319baf839SRobert Olsson }
171419baf839SRobert Olsson 
171582cfbb00SStephen Hemminger /*
171682cfbb00SStephen Hemminger  * Scan for the next right leaf starting at node p->child[idx]
171782cfbb00SStephen Hemminger  * Since we have back pointer, no recursion necessary.
171882cfbb00SStephen Hemminger  */
171982cfbb00SStephen Hemminger static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
172019baf839SRobert Olsson {
172182cfbb00SStephen Hemminger 	do {
172282cfbb00SStephen Hemminger 		t_key idx;
172319baf839SRobert Olsson 
172419baf839SRobert Olsson 		if (c)
172582cfbb00SStephen Hemminger 			idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
172619baf839SRobert Olsson 		else
172782cfbb00SStephen Hemminger 			idx = 0;
172819baf839SRobert Olsson 
172982cfbb00SStephen Hemminger 		while (idx < 1u << p->bits) {
173082cfbb00SStephen Hemminger 			c = tnode_get_child_rcu(p, idx++);
17312373ce1cSRobert Olsson 			if (!c)
173291b9a277SOlof Johansson 				continue;
173319baf839SRobert Olsson 
173482cfbb00SStephen Hemminger 			if (IS_LEAF(c)) {
173582cfbb00SStephen Hemminger 				prefetch(p->child[idx]);
17362373ce1cSRobert Olsson 				return (struct leaf *) c;
173719baf839SRobert Olsson 			}
173882cfbb00SStephen Hemminger 
173982cfbb00SStephen Hemminger 			/* Rescan start scanning in new node */
174082cfbb00SStephen Hemminger 			p = (struct tnode *) c;
174182cfbb00SStephen Hemminger 			idx = 0;
174219baf839SRobert Olsson 		}
174382cfbb00SStephen Hemminger 
174482cfbb00SStephen Hemminger 		/* Node empty, walk back up to parent */
174582cfbb00SStephen Hemminger 		c = (struct node *) p;
174682cfbb00SStephen Hemminger 	} while ( (p = node_parent_rcu(c)) != NULL);
174782cfbb00SStephen Hemminger 
174882cfbb00SStephen Hemminger 	return NULL; /* Root of trie */
174982cfbb00SStephen Hemminger }
175082cfbb00SStephen Hemminger 
175182cfbb00SStephen Hemminger static struct leaf *trie_firstleaf(struct trie *t)
175282cfbb00SStephen Hemminger {
175382cfbb00SStephen Hemminger 	struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
175482cfbb00SStephen Hemminger 
175582cfbb00SStephen Hemminger 	if (!n)
175682cfbb00SStephen Hemminger 		return NULL;
175782cfbb00SStephen Hemminger 
175882cfbb00SStephen Hemminger 	if (IS_LEAF(n))          /* trie is just a leaf */
175982cfbb00SStephen Hemminger 		return (struct leaf *) n;
176082cfbb00SStephen Hemminger 
176182cfbb00SStephen Hemminger 	return leaf_walk_rcu(n, NULL);
176282cfbb00SStephen Hemminger }
176382cfbb00SStephen Hemminger 
176482cfbb00SStephen Hemminger static struct leaf *trie_nextleaf(struct leaf *l)
176582cfbb00SStephen Hemminger {
176682cfbb00SStephen Hemminger 	struct node *c = (struct node *) l;
1767b902e573SJarek Poplawski 	struct tnode *p = node_parent_rcu(c);
176882cfbb00SStephen Hemminger 
176982cfbb00SStephen Hemminger 	if (!p)
177082cfbb00SStephen Hemminger 		return NULL;	/* trie with just one leaf */
177182cfbb00SStephen Hemminger 
177282cfbb00SStephen Hemminger 	return leaf_walk_rcu(p, c);
177319baf839SRobert Olsson }
177419baf839SRobert Olsson 
177571d67e66SStephen Hemminger static struct leaf *trie_leafindex(struct trie *t, int index)
177671d67e66SStephen Hemminger {
177771d67e66SStephen Hemminger 	struct leaf *l = trie_firstleaf(t);
177871d67e66SStephen Hemminger 
1779ec28cf73SStephen Hemminger 	while (l && index-- > 0)
178071d67e66SStephen Hemminger 		l = trie_nextleaf(l);
1781ec28cf73SStephen Hemminger 
178271d67e66SStephen Hemminger 	return l;
178371d67e66SStephen Hemminger }
178471d67e66SStephen Hemminger 
178571d67e66SStephen Hemminger 
1786d562f1f8SRobert Olsson /*
1787d562f1f8SRobert Olsson  * Caller must hold RTNL.
1788d562f1f8SRobert Olsson  */
178919baf839SRobert Olsson static int fn_trie_flush(struct fib_table *tb)
179019baf839SRobert Olsson {
179119baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
17929195bef7SStephen Hemminger 	struct leaf *l, *ll = NULL;
179382cfbb00SStephen Hemminger 	int found = 0;
179419baf839SRobert Olsson 
179582cfbb00SStephen Hemminger 	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
1796ef3660ceSStephen Hemminger 		found += trie_flush_leaf(l);
179719baf839SRobert Olsson 
179819baf839SRobert Olsson 		if (ll && hlist_empty(&ll->list))
17999195bef7SStephen Hemminger 			trie_leaf_remove(t, ll);
180019baf839SRobert Olsson 		ll = l;
180119baf839SRobert Olsson 	}
180219baf839SRobert Olsson 
180319baf839SRobert Olsson 	if (ll && hlist_empty(&ll->list))
18049195bef7SStephen Hemminger 		trie_leaf_remove(t, ll);
180519baf839SRobert Olsson 
18060c7770c7SStephen Hemminger 	pr_debug("trie_flush found=%d\n", found);
180719baf839SRobert Olsson 	return found;
180819baf839SRobert Olsson }
180919baf839SRobert Olsson 
1810a07f5f50SStephen Hemminger static void fn_trie_select_default(struct fib_table *tb,
1811a07f5f50SStephen Hemminger 				   const struct flowi *flp,
1812a07f5f50SStephen Hemminger 				   struct fib_result *res)
181319baf839SRobert Olsson {
181419baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
181519baf839SRobert Olsson 	int order, last_idx;
181619baf839SRobert Olsson 	struct fib_info *fi = NULL;
181719baf839SRobert Olsson 	struct fib_info *last_resort;
181819baf839SRobert Olsson 	struct fib_alias *fa = NULL;
181919baf839SRobert Olsson 	struct list_head *fa_head;
182019baf839SRobert Olsson 	struct leaf *l;
182119baf839SRobert Olsson 
182219baf839SRobert Olsson 	last_idx = -1;
182319baf839SRobert Olsson 	last_resort = NULL;
182419baf839SRobert Olsson 	order = -1;
182519baf839SRobert Olsson 
18262373ce1cSRobert Olsson 	rcu_read_lock();
182719baf839SRobert Olsson 
182819baf839SRobert Olsson 	l = fib_find_node(t, 0);
182919baf839SRobert Olsson 	if (!l)
183019baf839SRobert Olsson 		goto out;
183119baf839SRobert Olsson 
183219baf839SRobert Olsson 	fa_head = get_fa_head(l, 0);
183319baf839SRobert Olsson 	if (!fa_head)
183419baf839SRobert Olsson 		goto out;
183519baf839SRobert Olsson 
183619baf839SRobert Olsson 	if (list_empty(fa_head))
183719baf839SRobert Olsson 		goto out;
183819baf839SRobert Olsson 
18392373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fa_head, fa_list) {
184019baf839SRobert Olsson 		struct fib_info *next_fi = fa->fa_info;
184119baf839SRobert Olsson 
184219baf839SRobert Olsson 		if (fa->fa_scope != res->scope ||
184319baf839SRobert Olsson 		    fa->fa_type != RTN_UNICAST)
184419baf839SRobert Olsson 			continue;
184519baf839SRobert Olsson 
184619baf839SRobert Olsson 		if (next_fi->fib_priority > res->fi->fib_priority)
184719baf839SRobert Olsson 			break;
184819baf839SRobert Olsson 		if (!next_fi->fib_nh[0].nh_gw ||
184919baf839SRobert Olsson 		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
185019baf839SRobert Olsson 			continue;
185119baf839SRobert Olsson 		fa->fa_state |= FA_S_ACCESSED;
185219baf839SRobert Olsson 
185319baf839SRobert Olsson 		if (fi == NULL) {
185419baf839SRobert Olsson 			if (next_fi != res->fi)
185519baf839SRobert Olsson 				break;
185619baf839SRobert Olsson 		} else if (!fib_detect_death(fi, order, &last_resort,
1857971b893eSDenis V. Lunev 					     &last_idx, tb->tb_default)) {
1858a2bbe682SDenis V. Lunev 			fib_result_assign(res, fi);
1859971b893eSDenis V. Lunev 			tb->tb_default = order;
186019baf839SRobert Olsson 			goto out;
186119baf839SRobert Olsson 		}
186219baf839SRobert Olsson 		fi = next_fi;
186319baf839SRobert Olsson 		order++;
186419baf839SRobert Olsson 	}
186519baf839SRobert Olsson 	if (order <= 0 || fi == NULL) {
1866971b893eSDenis V. Lunev 		tb->tb_default = -1;
186719baf839SRobert Olsson 		goto out;
186819baf839SRobert Olsson 	}
186919baf839SRobert Olsson 
1870971b893eSDenis V. Lunev 	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1871971b893eSDenis V. Lunev 				tb->tb_default)) {
1872a2bbe682SDenis V. Lunev 		fib_result_assign(res, fi);
1873971b893eSDenis V. Lunev 		tb->tb_default = order;
187419baf839SRobert Olsson 		goto out;
187519baf839SRobert Olsson 	}
1876a2bbe682SDenis V. Lunev 	if (last_idx >= 0)
1877a2bbe682SDenis V. Lunev 		fib_result_assign(res, last_resort);
1878971b893eSDenis V. Lunev 	tb->tb_default = last_idx;
1879971b893eSDenis V. Lunev out:
18802373ce1cSRobert Olsson 	rcu_read_unlock();
188119baf839SRobert Olsson }
188219baf839SRobert Olsson 
1883a07f5f50SStephen Hemminger static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1884a07f5f50SStephen Hemminger 			   struct fib_table *tb,
188519baf839SRobert Olsson 			   struct sk_buff *skb, struct netlink_callback *cb)
188619baf839SRobert Olsson {
188719baf839SRobert Olsson 	int i, s_i;
188819baf839SRobert Olsson 	struct fib_alias *fa;
188932ab5f80SAl Viro 	__be32 xkey = htonl(key);
189019baf839SRobert Olsson 
189171d67e66SStephen Hemminger 	s_i = cb->args[5];
189219baf839SRobert Olsson 	i = 0;
189319baf839SRobert Olsson 
18942373ce1cSRobert Olsson 	/* rcu_read_lock is hold by caller */
18952373ce1cSRobert Olsson 
18962373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fah, fa_list) {
189719baf839SRobert Olsson 		if (i < s_i) {
189819baf839SRobert Olsson 			i++;
189919baf839SRobert Olsson 			continue;
190019baf839SRobert Olsson 		}
190119baf839SRobert Olsson 
190219baf839SRobert Olsson 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
190319baf839SRobert Olsson 				  cb->nlh->nlmsg_seq,
190419baf839SRobert Olsson 				  RTM_NEWROUTE,
190519baf839SRobert Olsson 				  tb->tb_id,
190619baf839SRobert Olsson 				  fa->fa_type,
190719baf839SRobert Olsson 				  fa->fa_scope,
1908be403ea1SThomas Graf 				  xkey,
190919baf839SRobert Olsson 				  plen,
191019baf839SRobert Olsson 				  fa->fa_tos,
191164347f78SStephen Hemminger 				  fa->fa_info, NLM_F_MULTI) < 0) {
191271d67e66SStephen Hemminger 			cb->args[5] = i;
191319baf839SRobert Olsson 			return -1;
191419baf839SRobert Olsson 		}
191519baf839SRobert Olsson 		i++;
191619baf839SRobert Olsson 	}
191771d67e66SStephen Hemminger 	cb->args[5] = i;
191819baf839SRobert Olsson 	return skb->len;
191919baf839SRobert Olsson }
192019baf839SRobert Olsson 
1921a88ee229SStephen Hemminger static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1922a07f5f50SStephen Hemminger 			struct sk_buff *skb, struct netlink_callback *cb)
192319baf839SRobert Olsson {
1924a88ee229SStephen Hemminger 	struct leaf_info *li;
1925a88ee229SStephen Hemminger 	struct hlist_node *node;
1926a88ee229SStephen Hemminger 	int i, s_i;
192791b9a277SOlof Johansson 
192871d67e66SStephen Hemminger 	s_i = cb->args[4];
1929a88ee229SStephen Hemminger 	i = 0;
193019baf839SRobert Olsson 
1931a88ee229SStephen Hemminger 	/* rcu_read_lock is hold by caller */
1932a88ee229SStephen Hemminger 	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1933a88ee229SStephen Hemminger 		if (i < s_i) {
1934a88ee229SStephen Hemminger 			i++;
193519baf839SRobert Olsson 			continue;
1936a88ee229SStephen Hemminger 		}
193719baf839SRobert Olsson 
1938a88ee229SStephen Hemminger 		if (i > s_i)
193971d67e66SStephen Hemminger 			cb->args[5] = 0;
194019baf839SRobert Olsson 
1941a88ee229SStephen Hemminger 		if (list_empty(&li->falh))
194219baf839SRobert Olsson 			continue;
194319baf839SRobert Olsson 
1944a88ee229SStephen Hemminger 		if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
194571d67e66SStephen Hemminger 			cb->args[4] = i;
194619baf839SRobert Olsson 			return -1;
194719baf839SRobert Olsson 		}
1948a88ee229SStephen Hemminger 		i++;
194919baf839SRobert Olsson 	}
1950a88ee229SStephen Hemminger 
195171d67e66SStephen Hemminger 	cb->args[4] = i;
195219baf839SRobert Olsson 	return skb->len;
195319baf839SRobert Olsson }
195419baf839SRobert Olsson 
1955a07f5f50SStephen Hemminger static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1956a07f5f50SStephen Hemminger 			struct netlink_callback *cb)
195719baf839SRobert Olsson {
1958a88ee229SStephen Hemminger 	struct leaf *l;
195919baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
1960d5ce8a0eSStephen Hemminger 	t_key key = cb->args[2];
196171d67e66SStephen Hemminger 	int count = cb->args[3];
196219baf839SRobert Olsson 
19632373ce1cSRobert Olsson 	rcu_read_lock();
1964d5ce8a0eSStephen Hemminger 	/* Dump starting at last key.
1965d5ce8a0eSStephen Hemminger 	 * Note: 0.0.0.0/0 (ie default) is first key.
1966d5ce8a0eSStephen Hemminger 	 */
196771d67e66SStephen Hemminger 	if (count == 0)
1968d5ce8a0eSStephen Hemminger 		l = trie_firstleaf(t);
1969d5ce8a0eSStephen Hemminger 	else {
197071d67e66SStephen Hemminger 		/* Normally, continue from last key, but if that is missing
197171d67e66SStephen Hemminger 		 * fallback to using slow rescan
1972d5ce8a0eSStephen Hemminger 		 */
197371d67e66SStephen Hemminger 		l = fib_find_node(t, key);
197471d67e66SStephen Hemminger 		if (!l)
197571d67e66SStephen Hemminger 			l = trie_leafindex(t, count);
197619baf839SRobert Olsson 	}
1977a88ee229SStephen Hemminger 
1978d5ce8a0eSStephen Hemminger 	while (l) {
1979d5ce8a0eSStephen Hemminger 		cb->args[2] = l->key;
1980a88ee229SStephen Hemminger 		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
198171d67e66SStephen Hemminger 			cb->args[3] = count;
19822373ce1cSRobert Olsson 			rcu_read_unlock();
198319baf839SRobert Olsson 			return -1;
198419baf839SRobert Olsson 		}
1985d5ce8a0eSStephen Hemminger 
198671d67e66SStephen Hemminger 		++count;
1987d5ce8a0eSStephen Hemminger 		l = trie_nextleaf(l);
198871d67e66SStephen Hemminger 		memset(&cb->args[4], 0,
198971d67e66SStephen Hemminger 		       sizeof(cb->args) - 4*sizeof(cb->args[0]));
1990a88ee229SStephen Hemminger 	}
199171d67e66SStephen Hemminger 	cb->args[3] = count;
1992a88ee229SStephen Hemminger 	rcu_read_unlock();
1993a88ee229SStephen Hemminger 
1994a88ee229SStephen Hemminger 	return skb->len;
1995a88ee229SStephen Hemminger }
199619baf839SRobert Olsson 
19977f9b8052SStephen Hemminger void __init fib_hash_init(void)
19987f9b8052SStephen Hemminger {
1999a07f5f50SStephen Hemminger 	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2000a07f5f50SStephen Hemminger 					  sizeof(struct fib_alias),
2001bc3c8c1eSStephen Hemminger 					  0, SLAB_PANIC, NULL);
2002bc3c8c1eSStephen Hemminger 
2003bc3c8c1eSStephen Hemminger 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2004bc3c8c1eSStephen Hemminger 					   max(sizeof(struct leaf),
2005bc3c8c1eSStephen Hemminger 					       sizeof(struct leaf_info)),
2006bc3c8c1eSStephen Hemminger 					   0, SLAB_PANIC, NULL);
20077f9b8052SStephen Hemminger }
200819baf839SRobert Olsson 
20097f9b8052SStephen Hemminger 
20107f9b8052SStephen Hemminger /* Fix more generic FIB names for init later */
20117f9b8052SStephen Hemminger struct fib_table *fib_hash_table(u32 id)
201219baf839SRobert Olsson {
201319baf839SRobert Olsson 	struct fib_table *tb;
201419baf839SRobert Olsson 	struct trie *t;
201519baf839SRobert Olsson 
201619baf839SRobert Olsson 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
201719baf839SRobert Olsson 		     GFP_KERNEL);
201819baf839SRobert Olsson 	if (tb == NULL)
201919baf839SRobert Olsson 		return NULL;
202019baf839SRobert Olsson 
202119baf839SRobert Olsson 	tb->tb_id = id;
2022971b893eSDenis V. Lunev 	tb->tb_default = -1;
202319baf839SRobert Olsson 	tb->tb_lookup = fn_trie_lookup;
202419baf839SRobert Olsson 	tb->tb_insert = fn_trie_insert;
202519baf839SRobert Olsson 	tb->tb_delete = fn_trie_delete;
202619baf839SRobert Olsson 	tb->tb_flush = fn_trie_flush;
202719baf839SRobert Olsson 	tb->tb_select_default = fn_trie_select_default;
202819baf839SRobert Olsson 	tb->tb_dump = fn_trie_dump;
202919baf839SRobert Olsson 
203019baf839SRobert Olsson 	t = (struct trie *) tb->tb_data;
2031c28a1cf4SStephen Hemminger 	memset(t, 0, sizeof(*t));
203219baf839SRobert Olsson 
203319baf839SRobert Olsson 	if (id == RT_TABLE_LOCAL)
2034a07f5f50SStephen Hemminger 		pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
203519baf839SRobert Olsson 
203619baf839SRobert Olsson 	return tb;
203719baf839SRobert Olsson }
203819baf839SRobert Olsson 
2039cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS
2040cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */
2041cb7b593cSStephen Hemminger struct fib_trie_iter {
20421c340b2fSDenis V. Lunev 	struct seq_net_private p;
20433d3b2d25SStephen Hemminger 	struct fib_table *tb;
2044cb7b593cSStephen Hemminger 	struct tnode *tnode;
2045cb7b593cSStephen Hemminger 	unsigned index;
2046cb7b593cSStephen Hemminger 	unsigned depth;
2047cb7b593cSStephen Hemminger };
204819baf839SRobert Olsson 
2049cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
205019baf839SRobert Olsson {
2051cb7b593cSStephen Hemminger 	struct tnode *tn = iter->tnode;
2052cb7b593cSStephen Hemminger 	unsigned cindex = iter->index;
2053cb7b593cSStephen Hemminger 	struct tnode *p;
205419baf839SRobert Olsson 
20556640e697SEric W. Biederman 	/* A single entry routing table */
20566640e697SEric W. Biederman 	if (!tn)
20576640e697SEric W. Biederman 		return NULL;
20586640e697SEric W. Biederman 
2059cb7b593cSStephen Hemminger 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2060cb7b593cSStephen Hemminger 		 iter->tnode, iter->index, iter->depth);
2061cb7b593cSStephen Hemminger rescan:
2062cb7b593cSStephen Hemminger 	while (cindex < (1<<tn->bits)) {
2063b59cfbf7SEric Dumazet 		struct node *n = tnode_get_child_rcu(tn, cindex);
206419baf839SRobert Olsson 
2065cb7b593cSStephen Hemminger 		if (n) {
206619baf839SRobert Olsson 			if (IS_LEAF(n)) {
2067cb7b593cSStephen Hemminger 				iter->tnode = tn;
2068cb7b593cSStephen Hemminger 				iter->index = cindex + 1;
206991b9a277SOlof Johansson 			} else {
2070cb7b593cSStephen Hemminger 				/* push down one level */
2071cb7b593cSStephen Hemminger 				iter->tnode = (struct tnode *) n;
2072cb7b593cSStephen Hemminger 				iter->index = 0;
2073cb7b593cSStephen Hemminger 				++iter->depth;
207419baf839SRobert Olsson 			}
2075cb7b593cSStephen Hemminger 			return n;
207619baf839SRobert Olsson 		}
207719baf839SRobert Olsson 
2078cb7b593cSStephen Hemminger 		++cindex;
2079cb7b593cSStephen Hemminger 	}
2080cb7b593cSStephen Hemminger 
2081cb7b593cSStephen Hemminger 	/* Current node exhausted, pop back up */
2082b59cfbf7SEric Dumazet 	p = node_parent_rcu((struct node *)tn);
2083cb7b593cSStephen Hemminger 	if (p) {
2084cb7b593cSStephen Hemminger 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2085cb7b593cSStephen Hemminger 		tn = p;
2086cb7b593cSStephen Hemminger 		--iter->depth;
2087cb7b593cSStephen Hemminger 		goto rescan;
2088cb7b593cSStephen Hemminger 	}
2089cb7b593cSStephen Hemminger 
2090cb7b593cSStephen Hemminger 	/* got root? */
2091cb7b593cSStephen Hemminger 	return NULL;
2092cb7b593cSStephen Hemminger }
2093cb7b593cSStephen Hemminger 
2094cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2095cb7b593cSStephen Hemminger 				       struct trie *t)
2096cb7b593cSStephen Hemminger {
20975ddf0eb2SRobert Olsson 	struct node *n;
20985ddf0eb2SRobert Olsson 
20995ddf0eb2SRobert Olsson 	if (!t)
21005ddf0eb2SRobert Olsson 		return NULL;
21015ddf0eb2SRobert Olsson 
21025ddf0eb2SRobert Olsson 	n = rcu_dereference(t->trie);
21033d3b2d25SStephen Hemminger 	if (!n)
21045ddf0eb2SRobert Olsson 		return NULL;
2105cb7b593cSStephen Hemminger 
21066640e697SEric W. Biederman 	if (IS_TNODE(n)) {
2107cb7b593cSStephen Hemminger 		iter->tnode = (struct tnode *) n;
2108cb7b593cSStephen Hemminger 		iter->index = 0;
21091d25cd6cSRobert Olsson 		iter->depth = 1;
21106640e697SEric W. Biederman 	} else {
21116640e697SEric W. Biederman 		iter->tnode = NULL;
21126640e697SEric W. Biederman 		iter->index = 0;
21136640e697SEric W. Biederman 		iter->depth = 0;
21146640e697SEric W. Biederman 	}
21153d3b2d25SStephen Hemminger 
2116cb7b593cSStephen Hemminger 	return n;
2117cb7b593cSStephen Hemminger }
2118cb7b593cSStephen Hemminger 
2119cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s)
212019baf839SRobert Olsson {
21212373ce1cSRobert Olsson 	struct node *n;
2122cb7b593cSStephen Hemminger 	struct fib_trie_iter iter;
2123cb7b593cSStephen Hemminger 
2124cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
212519baf839SRobert Olsson 
21262373ce1cSRobert Olsson 	rcu_read_lock();
21273d3b2d25SStephen Hemminger 	for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2128cb7b593cSStephen Hemminger 		if (IS_LEAF(n)) {
212993672292SStephen Hemminger 			struct leaf *l = (struct leaf *)n;
213093672292SStephen Hemminger 			struct leaf_info *li;
213193672292SStephen Hemminger 			struct hlist_node *tmp;
213293672292SStephen Hemminger 
213319baf839SRobert Olsson 			s->leaves++;
2134cb7b593cSStephen Hemminger 			s->totdepth += iter.depth;
2135cb7b593cSStephen Hemminger 			if (iter.depth > s->maxdepth)
2136cb7b593cSStephen Hemminger 				s->maxdepth = iter.depth;
213793672292SStephen Hemminger 
213893672292SStephen Hemminger 			hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
213993672292SStephen Hemminger 				++s->prefixes;
214091b9a277SOlof Johansson 		} else {
2141cb7b593cSStephen Hemminger 			const struct tnode *tn = (const struct tnode *) n;
2142cb7b593cSStephen Hemminger 			int i;
214319baf839SRobert Olsson 
214419baf839SRobert Olsson 			s->tnodes++;
214506ef921dSRobert Olsson 			if (tn->bits < MAX_STAT_DEPTH)
214619baf839SRobert Olsson 				s->nodesizes[tn->bits]++;
214706ef921dSRobert Olsson 
2148cb7b593cSStephen Hemminger 			for (i = 0; i < (1<<tn->bits); i++)
2149cb7b593cSStephen Hemminger 				if (!tn->child[i])
215019baf839SRobert Olsson 					s->nullpointers++;
215119baf839SRobert Olsson 		}
215219baf839SRobert Olsson 	}
21532373ce1cSRobert Olsson 	rcu_read_unlock();
215419baf839SRobert Olsson }
215519baf839SRobert Olsson 
215619baf839SRobert Olsson /*
215719baf839SRobert Olsson  *	This outputs /proc/net/fib_triestats
215819baf839SRobert Olsson  */
2159cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
216019baf839SRobert Olsson {
2161cb7b593cSStephen Hemminger 	unsigned i, max, pointers, bytes, avdepth;
216219baf839SRobert Olsson 
216319baf839SRobert Olsson 	if (stat->leaves)
216419baf839SRobert Olsson 		avdepth = stat->totdepth*100 / stat->leaves;
216519baf839SRobert Olsson 	else
216619baf839SRobert Olsson 		avdepth = 0;
216719baf839SRobert Olsson 
2168a07f5f50SStephen Hemminger 	seq_printf(seq, "\tAver depth:     %u.%02d\n",
2169a07f5f50SStephen Hemminger 		   avdepth / 100, avdepth % 100);
2170cb7b593cSStephen Hemminger 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
2171cb7b593cSStephen Hemminger 
2172cb7b593cSStephen Hemminger 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
2173cb7b593cSStephen Hemminger 	bytes = sizeof(struct leaf) * stat->leaves;
217493672292SStephen Hemminger 
217593672292SStephen Hemminger 	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
217693672292SStephen Hemminger 	bytes += sizeof(struct leaf_info) * stat->prefixes;
217793672292SStephen Hemminger 
2178187b5188SStephen Hemminger 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
217919baf839SRobert Olsson 	bytes += sizeof(struct tnode) * stat->tnodes;
218019baf839SRobert Olsson 
218106ef921dSRobert Olsson 	max = MAX_STAT_DEPTH;
218206ef921dSRobert Olsson 	while (max > 0 && stat->nodesizes[max-1] == 0)
218319baf839SRobert Olsson 		max--;
218419baf839SRobert Olsson 
2185cb7b593cSStephen Hemminger 	pointers = 0;
218619baf839SRobert Olsson 	for (i = 1; i <= max; i++)
218719baf839SRobert Olsson 		if (stat->nodesizes[i] != 0) {
2188187b5188SStephen Hemminger 			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
218919baf839SRobert Olsson 			pointers += (1<<i) * stat->nodesizes[i];
219019baf839SRobert Olsson 		}
2191cb7b593cSStephen Hemminger 	seq_putc(seq, '\n');
2192187b5188SStephen Hemminger 	seq_printf(seq, "\tPointers: %u\n", pointers);
2193cb7b593cSStephen Hemminger 
219419baf839SRobert Olsson 	bytes += sizeof(struct node *) * pointers;
2195187b5188SStephen Hemminger 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2196187b5188SStephen Hemminger 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
219766a2f7fdSStephen Hemminger }
219819baf839SRobert Olsson 
219919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
220066a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq,
220166a2f7fdSStephen Hemminger 			    const struct trie_use_stats *stats)
220266a2f7fdSStephen Hemminger {
220366a2f7fdSStephen Hemminger 	seq_printf(seq, "\nCounters:\n---------\n");
220466a2f7fdSStephen Hemminger 	seq_printf(seq, "gets = %u\n", stats->gets);
220566a2f7fdSStephen Hemminger 	seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2206a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match passed = %u\n",
2207a07f5f50SStephen Hemminger 		   stats->semantic_match_passed);
2208a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match miss = %u\n",
2209a07f5f50SStephen Hemminger 		   stats->semantic_match_miss);
221066a2f7fdSStephen Hemminger 	seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2211a07f5f50SStephen Hemminger 	seq_printf(seq, "skipped node resize = %u\n\n",
2212a07f5f50SStephen Hemminger 		   stats->resize_node_skipped);
221319baf839SRobert Olsson }
221466a2f7fdSStephen Hemminger #endif /*  CONFIG_IP_FIB_TRIE_STATS */
221566a2f7fdSStephen Hemminger 
22163d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2217d717a9a6SStephen Hemminger {
22183d3b2d25SStephen Hemminger 	if (tb->tb_id == RT_TABLE_LOCAL)
22193d3b2d25SStephen Hemminger 		seq_puts(seq, "Local:\n");
22203d3b2d25SStephen Hemminger 	else if (tb->tb_id == RT_TABLE_MAIN)
22213d3b2d25SStephen Hemminger 		seq_puts(seq, "Main:\n");
22223d3b2d25SStephen Hemminger 	else
22233d3b2d25SStephen Hemminger 		seq_printf(seq, "Id %d:\n", tb->tb_id);
2224d717a9a6SStephen Hemminger }
222519baf839SRobert Olsson 
22263d3b2d25SStephen Hemminger 
222719baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v)
222819baf839SRobert Olsson {
22291c340b2fSDenis V. Lunev 	struct net *net = (struct net *)seq->private;
22303d3b2d25SStephen Hemminger 	unsigned int h;
2231877a9bffSEric W. Biederman 
2232d717a9a6SStephen Hemminger 	seq_printf(seq,
2233a07f5f50SStephen Hemminger 		   "Basic info: size of leaf:"
2234a07f5f50SStephen Hemminger 		   " %Zd bytes, size of tnode: %Zd bytes.\n",
223519baf839SRobert Olsson 		   sizeof(struct leaf), sizeof(struct tnode));
223619baf839SRobert Olsson 
22373d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
22383d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
22393d3b2d25SStephen Hemminger 		struct hlist_node *node;
22403d3b2d25SStephen Hemminger 		struct fib_table *tb;
2241cb7b593cSStephen Hemminger 
22423d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
22433d3b2d25SStephen Hemminger 			struct trie *t = (struct trie *) tb->tb_data;
22443d3b2d25SStephen Hemminger 			struct trie_stat stat;
22453d3b2d25SStephen Hemminger 
22463d3b2d25SStephen Hemminger 			if (!t)
22473d3b2d25SStephen Hemminger 				continue;
22483d3b2d25SStephen Hemminger 
22493d3b2d25SStephen Hemminger 			fib_table_print(seq, tb);
22503d3b2d25SStephen Hemminger 
22513d3b2d25SStephen Hemminger 			trie_collect_stats(t, &stat);
22523d3b2d25SStephen Hemminger 			trie_show_stats(seq, &stat);
22533d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS
22543d3b2d25SStephen Hemminger 			trie_show_usage(seq, &t->stats);
22553d3b2d25SStephen Hemminger #endif
22563d3b2d25SStephen Hemminger 		}
22573d3b2d25SStephen Hemminger 	}
2258cb7b593cSStephen Hemminger 
225919baf839SRobert Olsson 	return 0;
226019baf839SRobert Olsson }
226119baf839SRobert Olsson 
226219baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file)
226319baf839SRobert Olsson {
2264de05c557SPavel Emelyanov 	return single_open_net(inode, file, fib_triestat_seq_show);
22651c340b2fSDenis V. Lunev }
22661c340b2fSDenis V. Lunev 
22679a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = {
226819baf839SRobert Olsson 	.owner	= THIS_MODULE,
226919baf839SRobert Olsson 	.open	= fib_triestat_seq_open,
227019baf839SRobert Olsson 	.read	= seq_read,
227119baf839SRobert Olsson 	.llseek	= seq_lseek,
2272b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
227319baf839SRobert Olsson };
227419baf839SRobert Olsson 
22751218854aSYOSHIFUJI Hideaki static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
227619baf839SRobert Olsson {
22771218854aSYOSHIFUJI Hideaki 	struct fib_trie_iter *iter = seq->private;
22781218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
2279cb7b593cSStephen Hemminger 	loff_t idx = 0;
22803d3b2d25SStephen Hemminger 	unsigned int h;
22813d3b2d25SStephen Hemminger 
22823d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
22833d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
22843d3b2d25SStephen Hemminger 		struct hlist_node *node;
22853d3b2d25SStephen Hemminger 		struct fib_table *tb;
22863d3b2d25SStephen Hemminger 
22873d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2288cb7b593cSStephen Hemminger 			struct node *n;
2289cb7b593cSStephen Hemminger 
22903d3b2d25SStephen Hemminger 			for (n = fib_trie_get_first(iter,
22913d3b2d25SStephen Hemminger 						    (struct trie *) tb->tb_data);
22923d3b2d25SStephen Hemminger 			     n; n = fib_trie_get_next(iter))
22933d3b2d25SStephen Hemminger 				if (pos == idx++) {
22943d3b2d25SStephen Hemminger 					iter->tb = tb;
2295cb7b593cSStephen Hemminger 					return n;
229619baf839SRobert Olsson 				}
22973d3b2d25SStephen Hemminger 		}
22983d3b2d25SStephen Hemminger 	}
229919baf839SRobert Olsson 
230019baf839SRobert Olsson 	return NULL;
230119baf839SRobert Olsson }
230219baf839SRobert Olsson 
230319baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2304c95aaf9aSStephen Hemminger 	__acquires(RCU)
230519baf839SRobert Olsson {
2306cb7b593cSStephen Hemminger 	rcu_read_lock();
23071218854aSYOSHIFUJI Hideaki 	return fib_trie_get_idx(seq, *pos);
230819baf839SRobert Olsson }
230919baf839SRobert Olsson 
231019baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
231119baf839SRobert Olsson {
2312cb7b593cSStephen Hemminger 	struct fib_trie_iter *iter = seq->private;
23131218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
23143d3b2d25SStephen Hemminger 	struct fib_table *tb = iter->tb;
23153d3b2d25SStephen Hemminger 	struct hlist_node *tb_node;
23163d3b2d25SStephen Hemminger 	unsigned int h;
23173d3b2d25SStephen Hemminger 	struct node *n;
2318cb7b593cSStephen Hemminger 
231919baf839SRobert Olsson 	++*pos;
23203d3b2d25SStephen Hemminger 	/* next node in same table */
23213d3b2d25SStephen Hemminger 	n = fib_trie_get_next(iter);
23223d3b2d25SStephen Hemminger 	if (n)
23233d3b2d25SStephen Hemminger 		return n;
232491b9a277SOlof Johansson 
23253d3b2d25SStephen Hemminger 	/* walk rest of this hash chain */
23263d3b2d25SStephen Hemminger 	h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
23273d3b2d25SStephen Hemminger 	while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
23283d3b2d25SStephen Hemminger 		tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
23293d3b2d25SStephen Hemminger 		n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
23303d3b2d25SStephen Hemminger 		if (n)
23313d3b2d25SStephen Hemminger 			goto found;
23323d3b2d25SStephen Hemminger 	}
2333cb7b593cSStephen Hemminger 
23343d3b2d25SStephen Hemminger 	/* new hash chain */
23353d3b2d25SStephen Hemminger 	while (++h < FIB_TABLE_HASHSZ) {
23363d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
23373d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
23383d3b2d25SStephen Hemminger 			n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
23393d3b2d25SStephen Hemminger 			if (n)
23403d3b2d25SStephen Hemminger 				goto found;
23413d3b2d25SStephen Hemminger 		}
23423d3b2d25SStephen Hemminger 	}
2343cb7b593cSStephen Hemminger 	return NULL;
23443d3b2d25SStephen Hemminger 
23453d3b2d25SStephen Hemminger found:
23463d3b2d25SStephen Hemminger 	iter->tb = tb;
23473d3b2d25SStephen Hemminger 	return n;
234819baf839SRobert Olsson }
234919baf839SRobert Olsson 
235019baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2351c95aaf9aSStephen Hemminger 	__releases(RCU)
235219baf839SRobert Olsson {
2353cb7b593cSStephen Hemminger 	rcu_read_unlock();
235419baf839SRobert Olsson }
235519baf839SRobert Olsson 
2356cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n)
2357cb7b593cSStephen Hemminger {
2358cb7b593cSStephen Hemminger 	while (n-- > 0) seq_puts(seq, "   ");
2359cb7b593cSStephen Hemminger }
236019baf839SRobert Olsson 
236128d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2362cb7b593cSStephen Hemminger {
2363cb7b593cSStephen Hemminger 	switch (s) {
2364cb7b593cSStephen Hemminger 	case RT_SCOPE_UNIVERSE: return "universe";
2365cb7b593cSStephen Hemminger 	case RT_SCOPE_SITE:	return "site";
2366cb7b593cSStephen Hemminger 	case RT_SCOPE_LINK:	return "link";
2367cb7b593cSStephen Hemminger 	case RT_SCOPE_HOST:	return "host";
2368cb7b593cSStephen Hemminger 	case RT_SCOPE_NOWHERE:	return "nowhere";
2369cb7b593cSStephen Hemminger 	default:
237028d36e37SEric Dumazet 		snprintf(buf, len, "scope=%d", s);
2371cb7b593cSStephen Hemminger 		return buf;
2372cb7b593cSStephen Hemminger 	}
2373cb7b593cSStephen Hemminger }
2374cb7b593cSStephen Hemminger 
237536cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = {
2376cb7b593cSStephen Hemminger 	[RTN_UNSPEC] = "UNSPEC",
2377cb7b593cSStephen Hemminger 	[RTN_UNICAST] = "UNICAST",
2378cb7b593cSStephen Hemminger 	[RTN_LOCAL] = "LOCAL",
2379cb7b593cSStephen Hemminger 	[RTN_BROADCAST] = "BROADCAST",
2380cb7b593cSStephen Hemminger 	[RTN_ANYCAST] = "ANYCAST",
2381cb7b593cSStephen Hemminger 	[RTN_MULTICAST] = "MULTICAST",
2382cb7b593cSStephen Hemminger 	[RTN_BLACKHOLE] = "BLACKHOLE",
2383cb7b593cSStephen Hemminger 	[RTN_UNREACHABLE] = "UNREACHABLE",
2384cb7b593cSStephen Hemminger 	[RTN_PROHIBIT] = "PROHIBIT",
2385cb7b593cSStephen Hemminger 	[RTN_THROW] = "THROW",
2386cb7b593cSStephen Hemminger 	[RTN_NAT] = "NAT",
2387cb7b593cSStephen Hemminger 	[RTN_XRESOLVE] = "XRESOLVE",
2388cb7b593cSStephen Hemminger };
2389cb7b593cSStephen Hemminger 
239028d36e37SEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned t)
2391cb7b593cSStephen Hemminger {
2392cb7b593cSStephen Hemminger 	if (t < __RTN_MAX && rtn_type_names[t])
2393cb7b593cSStephen Hemminger 		return rtn_type_names[t];
239428d36e37SEric Dumazet 	snprintf(buf, len, "type %u", t);
2395cb7b593cSStephen Hemminger 	return buf;
2396cb7b593cSStephen Hemminger }
2397cb7b593cSStephen Hemminger 
2398cb7b593cSStephen Hemminger /* Pretty print the trie */
239919baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v)
240019baf839SRobert Olsson {
2401cb7b593cSStephen Hemminger 	const struct fib_trie_iter *iter = seq->private;
2402cb7b593cSStephen Hemminger 	struct node *n = v;
240319baf839SRobert Olsson 
24043d3b2d25SStephen Hemminger 	if (!node_parent_rcu(n))
24053d3b2d25SStephen Hemminger 		fib_table_print(seq, iter->tb);
2406095b8501SRobert Olsson 
2407095b8501SRobert Olsson 	if (IS_TNODE(n)) {
2408095b8501SRobert Olsson 		struct tnode *tn = (struct tnode *) n;
2409ab66b4a7SStephen Hemminger 		__be32 prf = htonl(mask_pfx(tn->key, tn->pos));
2410095b8501SRobert Olsson 
24111d25cd6cSRobert Olsson 		seq_indent(seq, iter->depth-1);
2412673d57e7SHarvey Harrison 		seq_printf(seq, "  +-- %pI4/%d %d %d %d\n",
2413673d57e7SHarvey Harrison 			   &prf, tn->pos, tn->bits, tn->full_children,
24141d25cd6cSRobert Olsson 			   tn->empty_children);
24151d25cd6cSRobert Olsson 
2416cb7b593cSStephen Hemminger 	} else {
2417cb7b593cSStephen Hemminger 		struct leaf *l = (struct leaf *) n;
24181328042eSStephen Hemminger 		struct leaf_info *li;
24191328042eSStephen Hemminger 		struct hlist_node *node;
242032ab5f80SAl Viro 		__be32 val = htonl(l->key);
2421cb7b593cSStephen Hemminger 
2422cb7b593cSStephen Hemminger 		seq_indent(seq, iter->depth);
2423673d57e7SHarvey Harrison 		seq_printf(seq, "  |-- %pI4\n", &val);
242428d36e37SEric Dumazet 
24251328042eSStephen Hemminger 		hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2426cb7b593cSStephen Hemminger 			struct fib_alias *fa;
242728d36e37SEric Dumazet 
2428cb7b593cSStephen Hemminger 			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
242928d36e37SEric Dumazet 				char buf1[32], buf2[32];
243028d36e37SEric Dumazet 
2431cb7b593cSStephen Hemminger 				seq_indent(seq, iter->depth+1);
24321328042eSStephen Hemminger 				seq_printf(seq, "  /%d %s %s", li->plen,
243328d36e37SEric Dumazet 					   rtn_scope(buf1, sizeof(buf1),
243428d36e37SEric Dumazet 						     fa->fa_scope),
243528d36e37SEric Dumazet 					   rtn_type(buf2, sizeof(buf2),
243628d36e37SEric Dumazet 						    fa->fa_type));
2437cb7b593cSStephen Hemminger 				if (fa->fa_tos)
2438b9c4d82aSDenis V. Lunev 					seq_printf(seq, " tos=%d", fa->fa_tos);
2439cb7b593cSStephen Hemminger 				seq_putc(seq, '\n');
2440cb7b593cSStephen Hemminger 			}
2441cb7b593cSStephen Hemminger 		}
2442cb7b593cSStephen Hemminger 	}
244319baf839SRobert Olsson 
244419baf839SRobert Olsson 	return 0;
244519baf839SRobert Olsson }
244619baf839SRobert Olsson 
2447f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = {
244819baf839SRobert Olsson 	.start  = fib_trie_seq_start,
244919baf839SRobert Olsson 	.next   = fib_trie_seq_next,
245019baf839SRobert Olsson 	.stop   = fib_trie_seq_stop,
245119baf839SRobert Olsson 	.show   = fib_trie_seq_show,
245219baf839SRobert Olsson };
245319baf839SRobert Olsson 
245419baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file)
245519baf839SRobert Olsson {
24561c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_trie_seq_ops,
2457cf7732e4SPavel Emelyanov 			    sizeof(struct fib_trie_iter));
245819baf839SRobert Olsson }
245919baf839SRobert Olsson 
24609a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = {
246119baf839SRobert Olsson 	.owner  = THIS_MODULE,
246219baf839SRobert Olsson 	.open   = fib_trie_seq_open,
246319baf839SRobert Olsson 	.read   = seq_read,
246419baf839SRobert Olsson 	.llseek = seq_lseek,
24651c340b2fSDenis V. Lunev 	.release = seq_release_net,
246619baf839SRobert Olsson };
246719baf839SRobert Olsson 
24688315f5d8SStephen Hemminger struct fib_route_iter {
24698315f5d8SStephen Hemminger 	struct seq_net_private p;
24708315f5d8SStephen Hemminger 	struct trie *main_trie;
24718315f5d8SStephen Hemminger 	loff_t	pos;
24728315f5d8SStephen Hemminger 	t_key	key;
24738315f5d8SStephen Hemminger };
24748315f5d8SStephen Hemminger 
24758315f5d8SStephen Hemminger static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
24768315f5d8SStephen Hemminger {
24778315f5d8SStephen Hemminger 	struct leaf *l = NULL;
24788315f5d8SStephen Hemminger 	struct trie *t = iter->main_trie;
24798315f5d8SStephen Hemminger 
24808315f5d8SStephen Hemminger 	/* use cache location of last found key */
24818315f5d8SStephen Hemminger 	if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
24828315f5d8SStephen Hemminger 		pos -= iter->pos;
24838315f5d8SStephen Hemminger 	else {
24848315f5d8SStephen Hemminger 		iter->pos = 0;
24858315f5d8SStephen Hemminger 		l = trie_firstleaf(t);
24868315f5d8SStephen Hemminger 	}
24878315f5d8SStephen Hemminger 
24888315f5d8SStephen Hemminger 	while (l && pos-- > 0) {
24898315f5d8SStephen Hemminger 		iter->pos++;
24908315f5d8SStephen Hemminger 		l = trie_nextleaf(l);
24918315f5d8SStephen Hemminger 	}
24928315f5d8SStephen Hemminger 
24938315f5d8SStephen Hemminger 	if (l)
24948315f5d8SStephen Hemminger 		iter->key = pos;	/* remember it */
24958315f5d8SStephen Hemminger 	else
24968315f5d8SStephen Hemminger 		iter->pos = 0;		/* forget it */
24978315f5d8SStephen Hemminger 
24988315f5d8SStephen Hemminger 	return l;
24998315f5d8SStephen Hemminger }
25008315f5d8SStephen Hemminger 
25018315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
25028315f5d8SStephen Hemminger 	__acquires(RCU)
25038315f5d8SStephen Hemminger {
25048315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
25058315f5d8SStephen Hemminger 	struct fib_table *tb;
25068315f5d8SStephen Hemminger 
25078315f5d8SStephen Hemminger 	rcu_read_lock();
25081218854aSYOSHIFUJI Hideaki 	tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
25098315f5d8SStephen Hemminger 	if (!tb)
25108315f5d8SStephen Hemminger 		return NULL;
25118315f5d8SStephen Hemminger 
25128315f5d8SStephen Hemminger 	iter->main_trie = (struct trie *) tb->tb_data;
25138315f5d8SStephen Hemminger 	if (*pos == 0)
25148315f5d8SStephen Hemminger 		return SEQ_START_TOKEN;
25158315f5d8SStephen Hemminger 	else
25168315f5d8SStephen Hemminger 		return fib_route_get_idx(iter, *pos - 1);
25178315f5d8SStephen Hemminger }
25188315f5d8SStephen Hemminger 
25198315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
25208315f5d8SStephen Hemminger {
25218315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
25228315f5d8SStephen Hemminger 	struct leaf *l = v;
25238315f5d8SStephen Hemminger 
25248315f5d8SStephen Hemminger 	++*pos;
25258315f5d8SStephen Hemminger 	if (v == SEQ_START_TOKEN) {
25268315f5d8SStephen Hemminger 		iter->pos = 0;
25278315f5d8SStephen Hemminger 		l = trie_firstleaf(iter->main_trie);
25288315f5d8SStephen Hemminger 	} else {
25298315f5d8SStephen Hemminger 		iter->pos++;
25308315f5d8SStephen Hemminger 		l = trie_nextleaf(l);
25318315f5d8SStephen Hemminger 	}
25328315f5d8SStephen Hemminger 
25338315f5d8SStephen Hemminger 	if (l)
25348315f5d8SStephen Hemminger 		iter->key = l->key;
25358315f5d8SStephen Hemminger 	else
25368315f5d8SStephen Hemminger 		iter->pos = 0;
25378315f5d8SStephen Hemminger 	return l;
25388315f5d8SStephen Hemminger }
25398315f5d8SStephen Hemminger 
25408315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v)
25418315f5d8SStephen Hemminger 	__releases(RCU)
25428315f5d8SStephen Hemminger {
25438315f5d8SStephen Hemminger 	rcu_read_unlock();
25448315f5d8SStephen Hemminger }
25458315f5d8SStephen Hemminger 
254632ab5f80SAl Viro static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2547cb7b593cSStephen Hemminger {
2548cb7b593cSStephen Hemminger 	static unsigned type2flags[RTN_MAX + 1] = {
2549cb7b593cSStephen Hemminger 		[7] = RTF_REJECT, [8] = RTF_REJECT,
2550cb7b593cSStephen Hemminger 	};
2551cb7b593cSStephen Hemminger 	unsigned flags = type2flags[type];
2552cb7b593cSStephen Hemminger 
2553cb7b593cSStephen Hemminger 	if (fi && fi->fib_nh->nh_gw)
2554cb7b593cSStephen Hemminger 		flags |= RTF_GATEWAY;
255532ab5f80SAl Viro 	if (mask == htonl(0xFFFFFFFF))
2556cb7b593cSStephen Hemminger 		flags |= RTF_HOST;
2557cb7b593cSStephen Hemminger 	flags |= RTF_UP;
2558cb7b593cSStephen Hemminger 	return flags;
2559cb7b593cSStephen Hemminger }
2560cb7b593cSStephen Hemminger 
2561cb7b593cSStephen Hemminger /*
2562cb7b593cSStephen Hemminger  *	This outputs /proc/net/route.
2563cb7b593cSStephen Hemminger  *	The format of the file is not supposed to be changed
2564cb7b593cSStephen Hemminger  * 	and needs to be same as fib_hash output to avoid breaking
2565cb7b593cSStephen Hemminger  *	legacy utilities
2566cb7b593cSStephen Hemminger  */
2567cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v)
2568cb7b593cSStephen Hemminger {
2569cb7b593cSStephen Hemminger 	struct leaf *l = v;
25701328042eSStephen Hemminger 	struct leaf_info *li;
25711328042eSStephen Hemminger 	struct hlist_node *node;
2572cb7b593cSStephen Hemminger 
2573cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN) {
2574cb7b593cSStephen Hemminger 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2575cb7b593cSStephen Hemminger 			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2576cb7b593cSStephen Hemminger 			   "\tWindow\tIRTT");
2577cb7b593cSStephen Hemminger 		return 0;
2578cb7b593cSStephen Hemminger 	}
2579cb7b593cSStephen Hemminger 
25801328042eSStephen Hemminger 	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2581cb7b593cSStephen Hemminger 		struct fib_alias *fa;
258232ab5f80SAl Viro 		__be32 mask, prefix;
2583cb7b593cSStephen Hemminger 
2584cb7b593cSStephen Hemminger 		mask = inet_make_mask(li->plen);
2585cb7b593cSStephen Hemminger 		prefix = htonl(l->key);
2586cb7b593cSStephen Hemminger 
2587cb7b593cSStephen Hemminger 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
25881371e37dSHerbert Xu 			const struct fib_info *fi = fa->fa_info;
2589cb7b593cSStephen Hemminger 			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
25905e659e4cSPavel Emelyanov 			int len;
2591cb7b593cSStephen Hemminger 
2592cb7b593cSStephen Hemminger 			if (fa->fa_type == RTN_BROADCAST
2593cb7b593cSStephen Hemminger 			    || fa->fa_type == RTN_MULTICAST)
2594cb7b593cSStephen Hemminger 				continue;
2595cb7b593cSStephen Hemminger 
2596cb7b593cSStephen Hemminger 			if (fi)
25975e659e4cSPavel Emelyanov 				seq_printf(seq,
25985e659e4cSPavel Emelyanov 					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
25995e659e4cSPavel Emelyanov 					 "%d\t%08X\t%d\t%u\t%u%n",
2600cb7b593cSStephen Hemminger 					 fi->fib_dev ? fi->fib_dev->name : "*",
2601cb7b593cSStephen Hemminger 					 prefix,
2602cb7b593cSStephen Hemminger 					 fi->fib_nh->nh_gw, flags, 0, 0,
2603cb7b593cSStephen Hemminger 					 fi->fib_priority,
2604cb7b593cSStephen Hemminger 					 mask,
2605a07f5f50SStephen Hemminger 					 (fi->fib_advmss ?
2606a07f5f50SStephen Hemminger 					  fi->fib_advmss + 40 : 0),
2607cb7b593cSStephen Hemminger 					 fi->fib_window,
26085e659e4cSPavel Emelyanov 					 fi->fib_rtt >> 3, &len);
2609cb7b593cSStephen Hemminger 			else
26105e659e4cSPavel Emelyanov 				seq_printf(seq,
26115e659e4cSPavel Emelyanov 					 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
26125e659e4cSPavel Emelyanov 					 "%d\t%08X\t%d\t%u\t%u%n",
2613cb7b593cSStephen Hemminger 					 prefix, 0, flags, 0, 0, 0,
26145e659e4cSPavel Emelyanov 					 mask, 0, 0, 0, &len);
2615cb7b593cSStephen Hemminger 
26165e659e4cSPavel Emelyanov 			seq_printf(seq, "%*s\n", 127 - len, "");
2617cb7b593cSStephen Hemminger 		}
2618cb7b593cSStephen Hemminger 	}
2619cb7b593cSStephen Hemminger 
2620cb7b593cSStephen Hemminger 	return 0;
2621cb7b593cSStephen Hemminger }
2622cb7b593cSStephen Hemminger 
2623f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = {
26248315f5d8SStephen Hemminger 	.start  = fib_route_seq_start,
26258315f5d8SStephen Hemminger 	.next   = fib_route_seq_next,
26268315f5d8SStephen Hemminger 	.stop   = fib_route_seq_stop,
2627cb7b593cSStephen Hemminger 	.show   = fib_route_seq_show,
2628cb7b593cSStephen Hemminger };
2629cb7b593cSStephen Hemminger 
2630cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file)
2631cb7b593cSStephen Hemminger {
26321c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_route_seq_ops,
26338315f5d8SStephen Hemminger 			    sizeof(struct fib_route_iter));
2634cb7b593cSStephen Hemminger }
2635cb7b593cSStephen Hemminger 
26369a32144eSArjan van de Ven static const struct file_operations fib_route_fops = {
2637cb7b593cSStephen Hemminger 	.owner  = THIS_MODULE,
2638cb7b593cSStephen Hemminger 	.open   = fib_route_seq_open,
2639cb7b593cSStephen Hemminger 	.read   = seq_read,
2640cb7b593cSStephen Hemminger 	.llseek = seq_lseek,
26411c340b2fSDenis V. Lunev 	.release = seq_release_net,
2642cb7b593cSStephen Hemminger };
2643cb7b593cSStephen Hemminger 
264461a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net)
264519baf839SRobert Olsson {
264661a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
2647cb7b593cSStephen Hemminger 		goto out1;
2648cb7b593cSStephen Hemminger 
264961a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
265061a02653SDenis V. Lunev 				  &fib_triestat_fops))
2651cb7b593cSStephen Hemminger 		goto out2;
2652cb7b593cSStephen Hemminger 
265361a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
2654cb7b593cSStephen Hemminger 		goto out3;
2655cb7b593cSStephen Hemminger 
265619baf839SRobert Olsson 	return 0;
2657cb7b593cSStephen Hemminger 
2658cb7b593cSStephen Hemminger out3:
265961a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
2660cb7b593cSStephen Hemminger out2:
266161a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
2662cb7b593cSStephen Hemminger out1:
2663cb7b593cSStephen Hemminger 	return -ENOMEM;
266419baf839SRobert Olsson }
266519baf839SRobert Olsson 
266661a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net)
266719baf839SRobert Olsson {
266861a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
266961a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
267061a02653SDenis V. Lunev 	proc_net_remove(net, "route");
267119baf839SRobert Olsson }
267219baf839SRobert Olsson 
267319baf839SRobert Olsson #endif /* CONFIG_PROC_FS */
2674