xref: /openbmc/linux/net/ipv4/fib_trie.c (revision 5b470441)
119baf839SRobert Olsson /*
219baf839SRobert Olsson  *   This program is free software; you can redistribute it and/or
319baf839SRobert Olsson  *   modify it under the terms of the GNU General Public License
419baf839SRobert Olsson  *   as published by the Free Software Foundation; either version
519baf839SRobert Olsson  *   2 of the License, or (at your option) any later version.
619baf839SRobert Olsson  *
719baf839SRobert Olsson  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
819baf839SRobert Olsson  *     & Swedish University of Agricultural Sciences.
919baf839SRobert Olsson  *
1019baf839SRobert Olsson  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
1119baf839SRobert Olsson  *     Agricultural Sciences.
1219baf839SRobert Olsson  *
1319baf839SRobert Olsson  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
1419baf839SRobert Olsson  *
1519baf839SRobert Olsson  * This work is based on the LPC-trie which is originally descibed in:
1619baf839SRobert Olsson  *
1719baf839SRobert Olsson  * An experimental study of compression methods for dynamic tries
1819baf839SRobert Olsson  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19631dd1a8SJustin P. Mattock  * http://www.csc.kth.se/~snilsson/software/dyntrie2/
2019baf839SRobert Olsson  *
2119baf839SRobert Olsson  *
2219baf839SRobert Olsson  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
2319baf839SRobert Olsson  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2419baf839SRobert Olsson  *
2519baf839SRobert Olsson  *
2619baf839SRobert Olsson  * Code from fib_hash has been reused which includes the following header:
2719baf839SRobert Olsson  *
2819baf839SRobert Olsson  *
2919baf839SRobert Olsson  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3019baf839SRobert Olsson  *		operating system.  INET is implemented using the  BSD Socket
3119baf839SRobert Olsson  *		interface as the means of communication with the user level.
3219baf839SRobert Olsson  *
3319baf839SRobert Olsson  *		IPv4 FIB: lookup engine and maintenance routines.
3419baf839SRobert Olsson  *
3519baf839SRobert Olsson  *
3619baf839SRobert Olsson  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
3719baf839SRobert Olsson  *
3819baf839SRobert Olsson  *		This program is free software; you can redistribute it and/or
3919baf839SRobert Olsson  *		modify it under the terms of the GNU General Public License
4019baf839SRobert Olsson  *		as published by the Free Software Foundation; either version
4119baf839SRobert Olsson  *		2 of the License, or (at your option) any later version.
42fd966255SRobert Olsson  *
43fd966255SRobert Olsson  * Substantial contributions to this work comes from:
44fd966255SRobert Olsson  *
45fd966255SRobert Olsson  *		David S. Miller, <davem@davemloft.net>
46fd966255SRobert Olsson  *		Stephen Hemminger <shemminger@osdl.org>
47fd966255SRobert Olsson  *		Paul E. McKenney <paulmck@us.ibm.com>
48fd966255SRobert Olsson  *		Patrick McHardy <kaber@trash.net>
4919baf839SRobert Olsson  */
5019baf839SRobert Olsson 
5180b71b80SJens Låås #define VERSION "0.409"
5219baf839SRobert Olsson 
5319baf839SRobert Olsson #include <asm/uaccess.h>
5419baf839SRobert Olsson #include <asm/system.h>
551977f032SJiri Slaby #include <linux/bitops.h>
5619baf839SRobert Olsson #include <linux/types.h>
5719baf839SRobert Olsson #include <linux/kernel.h>
5819baf839SRobert Olsson #include <linux/mm.h>
5919baf839SRobert Olsson #include <linux/string.h>
6019baf839SRobert Olsson #include <linux/socket.h>
6119baf839SRobert Olsson #include <linux/sockios.h>
6219baf839SRobert Olsson #include <linux/errno.h>
6319baf839SRobert Olsson #include <linux/in.h>
6419baf839SRobert Olsson #include <linux/inet.h>
65cd8787abSStephen Hemminger #include <linux/inetdevice.h>
6619baf839SRobert Olsson #include <linux/netdevice.h>
6719baf839SRobert Olsson #include <linux/if_arp.h>
6819baf839SRobert Olsson #include <linux/proc_fs.h>
692373ce1cSRobert Olsson #include <linux/rcupdate.h>
7019baf839SRobert Olsson #include <linux/skbuff.h>
7119baf839SRobert Olsson #include <linux/netlink.h>
7219baf839SRobert Olsson #include <linux/init.h>
7319baf839SRobert Olsson #include <linux/list.h>
745a0e3ad6STejun Heo #include <linux/slab.h>
75457c4cbcSEric W. Biederman #include <net/net_namespace.h>
7619baf839SRobert Olsson #include <net/ip.h>
7719baf839SRobert Olsson #include <net/protocol.h>
7819baf839SRobert Olsson #include <net/route.h>
7919baf839SRobert Olsson #include <net/tcp.h>
8019baf839SRobert Olsson #include <net/sock.h>
8119baf839SRobert Olsson #include <net/ip_fib.h>
8219baf839SRobert Olsson #include "fib_lookup.h"
8319baf839SRobert Olsson 
8406ef921dSRobert Olsson #define MAX_STAT_DEPTH 32
8519baf839SRobert Olsson 
8619baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key))
8719baf839SRobert Olsson 
8819baf839SRobert Olsson typedef unsigned int t_key;
8919baf839SRobert Olsson 
9019baf839SRobert Olsson #define T_TNODE 0
9119baf839SRobert Olsson #define T_LEAF  1
9219baf839SRobert Olsson #define NODE_TYPE_MASK	0x1UL
932373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
942373ce1cSRobert Olsson 
9591b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF))
9691b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF)
9719baf839SRobert Olsson 
9819baf839SRobert Olsson struct node {
9991b9a277SOlof Johansson 	unsigned long parent;
1008d965444SEric Dumazet 	t_key key;
10119baf839SRobert Olsson };
10219baf839SRobert Olsson 
10319baf839SRobert Olsson struct leaf {
10491b9a277SOlof Johansson 	unsigned long parent;
1058d965444SEric Dumazet 	t_key key;
10619baf839SRobert Olsson 	struct hlist_head list;
1072373ce1cSRobert Olsson 	struct rcu_head rcu;
10819baf839SRobert Olsson };
10919baf839SRobert Olsson 
11019baf839SRobert Olsson struct leaf_info {
11119baf839SRobert Olsson 	struct hlist_node hlist;
1122373ce1cSRobert Olsson 	struct rcu_head rcu;
11319baf839SRobert Olsson 	int plen;
11419baf839SRobert Olsson 	struct list_head falh;
11519baf839SRobert Olsson };
11619baf839SRobert Olsson 
11719baf839SRobert Olsson struct tnode {
11891b9a277SOlof Johansson 	unsigned long parent;
1198d965444SEric Dumazet 	t_key key;
120112d8cfcSEric Dumazet 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
121112d8cfcSEric Dumazet 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
1228d965444SEric Dumazet 	unsigned int full_children;	/* KEYLENGTH bits needed */
1238d965444SEric Dumazet 	unsigned int empty_children;	/* KEYLENGTH bits needed */
12415be75cdSStephen Hemminger 	union {
1252373ce1cSRobert Olsson 		struct rcu_head rcu;
12615be75cdSStephen Hemminger 		struct work_struct work;
127e0f7cb8cSJarek Poplawski 		struct tnode *tnode_free;
12815be75cdSStephen Hemminger 	};
12919baf839SRobert Olsson 	struct node *child[0];
13019baf839SRobert Olsson };
13119baf839SRobert Olsson 
13219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
13319baf839SRobert Olsson struct trie_use_stats {
13419baf839SRobert Olsson 	unsigned int gets;
13519baf839SRobert Olsson 	unsigned int backtrack;
13619baf839SRobert Olsson 	unsigned int semantic_match_passed;
13719baf839SRobert Olsson 	unsigned int semantic_match_miss;
13819baf839SRobert Olsson 	unsigned int null_node_hit;
1392f36895aSRobert Olsson 	unsigned int resize_node_skipped;
14019baf839SRobert Olsson };
14119baf839SRobert Olsson #endif
14219baf839SRobert Olsson 
14319baf839SRobert Olsson struct trie_stat {
14419baf839SRobert Olsson 	unsigned int totdepth;
14519baf839SRobert Olsson 	unsigned int maxdepth;
14619baf839SRobert Olsson 	unsigned int tnodes;
14719baf839SRobert Olsson 	unsigned int leaves;
14819baf839SRobert Olsson 	unsigned int nullpointers;
14993672292SStephen Hemminger 	unsigned int prefixes;
15006ef921dSRobert Olsson 	unsigned int nodesizes[MAX_STAT_DEPTH];
15119baf839SRobert Olsson };
15219baf839SRobert Olsson 
15319baf839SRobert Olsson struct trie {
15419baf839SRobert Olsson 	struct node *trie;
15519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15619baf839SRobert Olsson 	struct trie_use_stats stats;
15719baf839SRobert Olsson #endif
15819baf839SRobert Olsson };
15919baf839SRobert Olsson 
16019baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
161a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
162a07f5f50SStephen Hemminger 				  int wasfull);
16319baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn);
1642f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn);
1652f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn);
166e0f7cb8cSJarek Poplawski /* tnodes to free after resize(); protected by RTNL */
167e0f7cb8cSJarek Poplawski static struct tnode *tnode_free_head;
168c3059477SJarek Poplawski static size_t tnode_free_size;
169c3059477SJarek Poplawski 
170c3059477SJarek Poplawski /*
171c3059477SJarek Poplawski  * synchronize_rcu after call_rcu for that many pages; it should be especially
172c3059477SJarek Poplawski  * useful before resizing the root node with PREEMPT_NONE configs; the value was
173c3059477SJarek Poplawski  * obtained experimentally, aiming to avoid visible slowdown.
174c3059477SJarek Poplawski  */
175c3059477SJarek Poplawski static const int sync_pages = 128;
17619baf839SRobert Olsson 
177e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly;
178bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly;
17919baf839SRobert Olsson 
18006801916SStephen Hemminger static inline struct tnode *node_parent(struct node *node)
18106801916SStephen Hemminger {
182b59cfbf7SEric Dumazet 	return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
183b59cfbf7SEric Dumazet }
18406801916SStephen Hemminger 
185b59cfbf7SEric Dumazet static inline struct tnode *node_parent_rcu(struct node *node)
186b59cfbf7SEric Dumazet {
187b59cfbf7SEric Dumazet 	struct tnode *ret = node_parent(node);
188b59cfbf7SEric Dumazet 
189a034ee3cSEric Dumazet 	return rcu_dereference_rtnl(ret);
19006801916SStephen Hemminger }
19106801916SStephen Hemminger 
1926440cc9eSStephen Hemminger /* Same as rcu_assign_pointer
1936440cc9eSStephen Hemminger  * but that macro() assumes that value is a pointer.
1946440cc9eSStephen Hemminger  */
19506801916SStephen Hemminger static inline void node_set_parent(struct node *node, struct tnode *ptr)
19606801916SStephen Hemminger {
1976440cc9eSStephen Hemminger 	smp_wmb();
1986440cc9eSStephen Hemminger 	node->parent = (unsigned long)ptr | NODE_TYPE(node);
19906801916SStephen Hemminger }
2002373ce1cSRobert Olsson 
201b59cfbf7SEric Dumazet static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
20219baf839SRobert Olsson {
203b59cfbf7SEric Dumazet 	BUG_ON(i >= 1U << tn->bits);
20419baf839SRobert Olsson 
205b59cfbf7SEric Dumazet 	return tn->child[i];
206b59cfbf7SEric Dumazet }
207b59cfbf7SEric Dumazet 
208b59cfbf7SEric Dumazet static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
209b59cfbf7SEric Dumazet {
210b59cfbf7SEric Dumazet 	struct node *ret = tnode_get_child(tn, i);
211b59cfbf7SEric Dumazet 
212a034ee3cSEric Dumazet 	return rcu_dereference_rtnl(ret);
21319baf839SRobert Olsson }
21419baf839SRobert Olsson 
215bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn)
21619baf839SRobert Olsson {
21719baf839SRobert Olsson 	return 1 << tn->bits;
21819baf839SRobert Olsson }
21919baf839SRobert Olsson 
220ab66b4a7SStephen Hemminger static inline t_key mask_pfx(t_key k, unsigned short l)
221ab66b4a7SStephen Hemminger {
222ab66b4a7SStephen Hemminger 	return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
223ab66b4a7SStephen Hemminger }
224ab66b4a7SStephen Hemminger 
22519baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
22619baf839SRobert Olsson {
22719baf839SRobert Olsson 	if (offset < KEYLENGTH)
22819baf839SRobert Olsson 		return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
22919baf839SRobert Olsson 	else
23019baf839SRobert Olsson 		return 0;
23119baf839SRobert Olsson }
23219baf839SRobert Olsson 
23319baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b)
23419baf839SRobert Olsson {
23519baf839SRobert Olsson 	return a == b;
23619baf839SRobert Olsson }
23719baf839SRobert Olsson 
23819baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
23919baf839SRobert Olsson {
24019baf839SRobert Olsson 	if (bits == 0 || offset >= KEYLENGTH)
24119baf839SRobert Olsson 		return 1;
24219baf839SRobert Olsson 	bits = bits > KEYLENGTH ? KEYLENGTH : bits;
24319baf839SRobert Olsson 	return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
24419baf839SRobert Olsson }
24519baf839SRobert Olsson 
24619baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b)
24719baf839SRobert Olsson {
24819baf839SRobert Olsson 	t_key diff = a ^ b;
24919baf839SRobert Olsson 	int i = offset;
25019baf839SRobert Olsson 
25119baf839SRobert Olsson 	if (!diff)
25219baf839SRobert Olsson 		return 0;
25319baf839SRobert Olsson 	while ((diff << i) >> (KEYLENGTH-1) == 0)
25419baf839SRobert Olsson 		i++;
25519baf839SRobert Olsson 	return i;
25619baf839SRobert Olsson }
25719baf839SRobert Olsson 
25819baf839SRobert Olsson /*
25919baf839SRobert Olsson   To understand this stuff, an understanding of keys and all their bits is
26019baf839SRobert Olsson   necessary. Every node in the trie has a key associated with it, but not
26119baf839SRobert Olsson   all of the bits in that key are significant.
26219baf839SRobert Olsson 
26319baf839SRobert Olsson   Consider a node 'n' and its parent 'tp'.
26419baf839SRobert Olsson 
26519baf839SRobert Olsson   If n is a leaf, every bit in its key is significant. Its presence is
266772cb712SRobert Olsson   necessitated by path compression, since during a tree traversal (when
26719baf839SRobert Olsson   searching for a leaf - unless we are doing an insertion) we will completely
26819baf839SRobert Olsson   ignore all skipped bits we encounter. Thus we need to verify, at the end of
26919baf839SRobert Olsson   a potentially successful search, that we have indeed been walking the
27019baf839SRobert Olsson   correct key path.
27119baf839SRobert Olsson 
27219baf839SRobert Olsson   Note that we can never "miss" the correct key in the tree if present by
27319baf839SRobert Olsson   following the wrong path. Path compression ensures that segments of the key
27419baf839SRobert Olsson   that are the same for all keys with a given prefix are skipped, but the
27519baf839SRobert Olsson   skipped part *is* identical for each node in the subtrie below the skipped
27619baf839SRobert Olsson   bit! trie_insert() in this implementation takes care of that - note the
27719baf839SRobert Olsson   call to tkey_sub_equals() in trie_insert().
27819baf839SRobert Olsson 
27919baf839SRobert Olsson   if n is an internal node - a 'tnode' here, the various parts of its key
28019baf839SRobert Olsson   have many different meanings.
28119baf839SRobert Olsson 
28219baf839SRobert Olsson   Example:
28319baf839SRobert Olsson   _________________________________________________________________
28419baf839SRobert Olsson   | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
28519baf839SRobert Olsson   -----------------------------------------------------------------
28619baf839SRobert Olsson     0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
28719baf839SRobert Olsson 
28819baf839SRobert Olsson   _________________________________________________________________
28919baf839SRobert Olsson   | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
29019baf839SRobert Olsson   -----------------------------------------------------------------
29119baf839SRobert Olsson    16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
29219baf839SRobert Olsson 
29319baf839SRobert Olsson   tp->pos = 7
29419baf839SRobert Olsson   tp->bits = 3
29519baf839SRobert Olsson   n->pos = 15
29619baf839SRobert Olsson   n->bits = 4
29719baf839SRobert Olsson 
29819baf839SRobert Olsson   First, let's just ignore the bits that come before the parent tp, that is
29919baf839SRobert Olsson   the bits from 0 to (tp->pos-1). They are *known* but at this point we do
30019baf839SRobert Olsson   not use them for anything.
30119baf839SRobert Olsson 
30219baf839SRobert Olsson   The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
30319baf839SRobert Olsson   index into the parent's child array. That is, they will be used to find
30419baf839SRobert Olsson   'n' among tp's children.
30519baf839SRobert Olsson 
30619baf839SRobert Olsson   The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
30719baf839SRobert Olsson   for the node n.
30819baf839SRobert Olsson 
30919baf839SRobert Olsson   All the bits we have seen so far are significant to the node n. The rest
31019baf839SRobert Olsson   of the bits are really not needed or indeed known in n->key.
31119baf839SRobert Olsson 
31219baf839SRobert Olsson   The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
31319baf839SRobert Olsson   n's child array, and will of course be different for each child.
31419baf839SRobert Olsson 
315c877efb2SStephen Hemminger 
31619baf839SRobert Olsson   The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
31719baf839SRobert Olsson   at this point.
31819baf839SRobert Olsson 
31919baf839SRobert Olsson */
32019baf839SRobert Olsson 
3210c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn)
32219baf839SRobert Olsson {
3230c7770c7SStephen Hemminger 	WARN_ON(tn && tn->pos+tn->bits > 32);
32419baf839SRobert Olsson }
32519baf839SRobert Olsson 
326f5026fabSDenis V. Lunev static const int halve_threshold = 25;
327f5026fabSDenis V. Lunev static const int inflate_threshold = 50;
328345aa031SJarek Poplawski static const int halve_threshold_root = 15;
32980b71b80SJens Låås static const int inflate_threshold_root = 30;
3302373ce1cSRobert Olsson 
3312373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head)
3322373ce1cSRobert Olsson {
3332373ce1cSRobert Olsson 	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
3342373ce1cSRobert Olsson 	kmem_cache_free(fn_alias_kmem, fa);
3352373ce1cSRobert Olsson }
3362373ce1cSRobert Olsson 
3372373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa)
3382373ce1cSRobert Olsson {
3392373ce1cSRobert Olsson 	call_rcu(&fa->rcu, __alias_free_mem);
3402373ce1cSRobert Olsson }
3412373ce1cSRobert Olsson 
3422373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head)
3432373ce1cSRobert Olsson {
344bc3c8c1eSStephen Hemminger 	struct leaf *l = container_of(head, struct leaf, rcu);
345bc3c8c1eSStephen Hemminger 	kmem_cache_free(trie_leaf_kmem, l);
3462373ce1cSRobert Olsson }
3472373ce1cSRobert Olsson 
348387a5487SStephen Hemminger static inline void free_leaf(struct leaf *l)
349387a5487SStephen Hemminger {
350387a5487SStephen Hemminger 	call_rcu_bh(&l->rcu, __leaf_free_rcu);
351387a5487SStephen Hemminger }
352387a5487SStephen Hemminger 
3532373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head)
3542373ce1cSRobert Olsson {
3552373ce1cSRobert Olsson 	kfree(container_of(head, struct leaf_info, rcu));
3562373ce1cSRobert Olsson }
3572373ce1cSRobert Olsson 
3582373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf)
3592373ce1cSRobert Olsson {
3602373ce1cSRobert Olsson 	call_rcu(&leaf->rcu, __leaf_info_free_rcu);
3612373ce1cSRobert Olsson }
3622373ce1cSRobert Olsson 
3638d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size)
3642373ce1cSRobert Olsson {
3652373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3668d965444SEric Dumazet 		return kzalloc(size, GFP_KERNEL);
36715be75cdSStephen Hemminger 	else
3687a1c8e5aSEric Dumazet 		return vzalloc(size);
36915be75cdSStephen Hemminger }
3702373ce1cSRobert Olsson 
37115be75cdSStephen Hemminger static void __tnode_vfree(struct work_struct *arg)
37215be75cdSStephen Hemminger {
37315be75cdSStephen Hemminger 	struct tnode *tn = container_of(arg, struct tnode, work);
37415be75cdSStephen Hemminger 	vfree(tn);
3752373ce1cSRobert Olsson }
3762373ce1cSRobert Olsson 
3772373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head)
3782373ce1cSRobert Olsson {
3792373ce1cSRobert Olsson 	struct tnode *tn = container_of(head, struct tnode, rcu);
3808d965444SEric Dumazet 	size_t size = sizeof(struct tnode) +
3818d965444SEric Dumazet 		      (sizeof(struct node *) << tn->bits);
3822373ce1cSRobert Olsson 
3832373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3842373ce1cSRobert Olsson 		kfree(tn);
38515be75cdSStephen Hemminger 	else {
38615be75cdSStephen Hemminger 		INIT_WORK(&tn->work, __tnode_vfree);
38715be75cdSStephen Hemminger 		schedule_work(&tn->work);
38815be75cdSStephen Hemminger 	}
3892373ce1cSRobert Olsson }
3902373ce1cSRobert Olsson 
3912373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn)
3922373ce1cSRobert Olsson {
393387a5487SStephen Hemminger 	if (IS_LEAF(tn))
394387a5487SStephen Hemminger 		free_leaf((struct leaf *) tn);
395387a5487SStephen Hemminger 	else
3962373ce1cSRobert Olsson 		call_rcu(&tn->rcu, __tnode_free_rcu);
3972373ce1cSRobert Olsson }
3982373ce1cSRobert Olsson 
399e0f7cb8cSJarek Poplawski static void tnode_free_safe(struct tnode *tn)
400e0f7cb8cSJarek Poplawski {
401e0f7cb8cSJarek Poplawski 	BUG_ON(IS_LEAF(tn));
402e0f7cb8cSJarek Poplawski 	tn->tnode_free = tnode_free_head;
403e0f7cb8cSJarek Poplawski 	tnode_free_head = tn;
404c3059477SJarek Poplawski 	tnode_free_size += sizeof(struct tnode) +
405c3059477SJarek Poplawski 			   (sizeof(struct node *) << tn->bits);
406e0f7cb8cSJarek Poplawski }
407e0f7cb8cSJarek Poplawski 
408e0f7cb8cSJarek Poplawski static void tnode_free_flush(void)
409e0f7cb8cSJarek Poplawski {
410e0f7cb8cSJarek Poplawski 	struct tnode *tn;
411e0f7cb8cSJarek Poplawski 
412e0f7cb8cSJarek Poplawski 	while ((tn = tnode_free_head)) {
413e0f7cb8cSJarek Poplawski 		tnode_free_head = tn->tnode_free;
414e0f7cb8cSJarek Poplawski 		tn->tnode_free = NULL;
415e0f7cb8cSJarek Poplawski 		tnode_free(tn);
416e0f7cb8cSJarek Poplawski 	}
417c3059477SJarek Poplawski 
418c3059477SJarek Poplawski 	if (tnode_free_size >= PAGE_SIZE * sync_pages) {
419c3059477SJarek Poplawski 		tnode_free_size = 0;
420c3059477SJarek Poplawski 		synchronize_rcu();
421c3059477SJarek Poplawski 	}
422e0f7cb8cSJarek Poplawski }
423e0f7cb8cSJarek Poplawski 
42419baf839SRobert Olsson static struct leaf *leaf_new(void)
42519baf839SRobert Olsson {
426bc3c8c1eSStephen Hemminger 	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
42719baf839SRobert Olsson 	if (l) {
4282373ce1cSRobert Olsson 		l->parent = T_LEAF;
42919baf839SRobert Olsson 		INIT_HLIST_HEAD(&l->list);
43019baf839SRobert Olsson 	}
43119baf839SRobert Olsson 	return l;
43219baf839SRobert Olsson }
43319baf839SRobert Olsson 
43419baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen)
43519baf839SRobert Olsson {
43619baf839SRobert Olsson 	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
4372373ce1cSRobert Olsson 	if (li) {
43819baf839SRobert Olsson 		li->plen = plen;
43919baf839SRobert Olsson 		INIT_LIST_HEAD(&li->falh);
4402373ce1cSRobert Olsson 	}
44119baf839SRobert Olsson 	return li;
44219baf839SRobert Olsson }
44319baf839SRobert Olsson 
44419baf839SRobert Olsson static struct tnode *tnode_new(t_key key, int pos, int bits)
44519baf839SRobert Olsson {
4468d965444SEric Dumazet 	size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
447f0e36f8cSPatrick McHardy 	struct tnode *tn = tnode_alloc(sz);
44819baf839SRobert Olsson 
44919baf839SRobert Olsson 	if (tn) {
4502373ce1cSRobert Olsson 		tn->parent = T_TNODE;
45119baf839SRobert Olsson 		tn->pos = pos;
45219baf839SRobert Olsson 		tn->bits = bits;
45319baf839SRobert Olsson 		tn->key = key;
45419baf839SRobert Olsson 		tn->full_children = 0;
45519baf839SRobert Olsson 		tn->empty_children = 1<<bits;
45619baf839SRobert Olsson 	}
457c877efb2SStephen Hemminger 
458a034ee3cSEric Dumazet 	pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
459a034ee3cSEric Dumazet 		 sizeof(struct node) << bits);
46019baf839SRobert Olsson 	return tn;
46119baf839SRobert Olsson }
46219baf839SRobert Olsson 
46319baf839SRobert Olsson /*
46419baf839SRobert Olsson  * Check whether a tnode 'n' is "full", i.e. it is an internal node
46519baf839SRobert Olsson  * and no bits are skipped. See discussion in dyntree paper p. 6
46619baf839SRobert Olsson  */
46719baf839SRobert Olsson 
468bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n)
46919baf839SRobert Olsson {
47019baf839SRobert Olsson 	if (n == NULL || IS_LEAF(n))
47119baf839SRobert Olsson 		return 0;
47219baf839SRobert Olsson 
47319baf839SRobert Olsson 	return ((struct tnode *) n)->pos == tn->pos + tn->bits;
47419baf839SRobert Olsson }
47519baf839SRobert Olsson 
476a07f5f50SStephen Hemminger static inline void put_child(struct trie *t, struct tnode *tn, int i,
477a07f5f50SStephen Hemminger 			     struct node *n)
47819baf839SRobert Olsson {
47919baf839SRobert Olsson 	tnode_put_child_reorg(tn, i, n, -1);
48019baf839SRobert Olsson }
48119baf839SRobert Olsson 
48219baf839SRobert Olsson  /*
48319baf839SRobert Olsson   * Add a child at position i overwriting the old value.
48419baf839SRobert Olsson   * Update the value of full_children and empty_children.
48519baf839SRobert Olsson   */
48619baf839SRobert Olsson 
487a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
488a07f5f50SStephen Hemminger 				  int wasfull)
48919baf839SRobert Olsson {
4902373ce1cSRobert Olsson 	struct node *chi = tn->child[i];
49119baf839SRobert Olsson 	int isfull;
49219baf839SRobert Olsson 
4930c7770c7SStephen Hemminger 	BUG_ON(i >= 1<<tn->bits);
4940c7770c7SStephen Hemminger 
49519baf839SRobert Olsson 	/* update emptyChildren */
49619baf839SRobert Olsson 	if (n == NULL && chi != NULL)
49719baf839SRobert Olsson 		tn->empty_children++;
49819baf839SRobert Olsson 	else if (n != NULL && chi == NULL)
49919baf839SRobert Olsson 		tn->empty_children--;
50019baf839SRobert Olsson 
50119baf839SRobert Olsson 	/* update fullChildren */
50219baf839SRobert Olsson 	if (wasfull == -1)
50319baf839SRobert Olsson 		wasfull = tnode_full(tn, chi);
50419baf839SRobert Olsson 
50519baf839SRobert Olsson 	isfull = tnode_full(tn, n);
50619baf839SRobert Olsson 	if (wasfull && !isfull)
50719baf839SRobert Olsson 		tn->full_children--;
50819baf839SRobert Olsson 	else if (!wasfull && isfull)
50919baf839SRobert Olsson 		tn->full_children++;
51091b9a277SOlof Johansson 
51119baf839SRobert Olsson 	if (n)
51206801916SStephen Hemminger 		node_set_parent(n, tn);
51319baf839SRobert Olsson 
5142373ce1cSRobert Olsson 	rcu_assign_pointer(tn->child[i], n);
51519baf839SRobert Olsson }
51619baf839SRobert Olsson 
51780b71b80SJens Låås #define MAX_WORK 10
51819baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn)
51919baf839SRobert Olsson {
52019baf839SRobert Olsson 	int i;
5212f80b3c8SRobert Olsson 	struct tnode *old_tn;
522e6308be8SRobert Olsson 	int inflate_threshold_use;
523e6308be8SRobert Olsson 	int halve_threshold_use;
52480b71b80SJens Låås 	int max_work;
52519baf839SRobert Olsson 
52619baf839SRobert Olsson 	if (!tn)
52719baf839SRobert Olsson 		return NULL;
52819baf839SRobert Olsson 
5290c7770c7SStephen Hemminger 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
53019baf839SRobert Olsson 		 tn, inflate_threshold, halve_threshold);
53119baf839SRobert Olsson 
53219baf839SRobert Olsson 	/* No children */
53319baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn)) {
534e0f7cb8cSJarek Poplawski 		tnode_free_safe(tn);
53519baf839SRobert Olsson 		return NULL;
53619baf839SRobert Olsson 	}
53719baf839SRobert Olsson 	/* One child */
53819baf839SRobert Olsson 	if (tn->empty_children == tnode_child_length(tn) - 1)
53980b71b80SJens Låås 		goto one_child;
54019baf839SRobert Olsson 	/*
54119baf839SRobert Olsson 	 * Double as long as the resulting node has a number of
54219baf839SRobert Olsson 	 * nonempty nodes that are above the threshold.
54319baf839SRobert Olsson 	 */
54419baf839SRobert Olsson 
54519baf839SRobert Olsson 	/*
54619baf839SRobert Olsson 	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
54719baf839SRobert Olsson 	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
54819baf839SRobert Olsson 	 * Telecommunications, page 6:
54919baf839SRobert Olsson 	 * "A node is doubled if the ratio of non-empty children to all
55019baf839SRobert Olsson 	 * children in the *doubled* node is at least 'high'."
55119baf839SRobert Olsson 	 *
55219baf839SRobert Olsson 	 * 'high' in this instance is the variable 'inflate_threshold'. It
55319baf839SRobert Olsson 	 * is expressed as a percentage, so we multiply it with
55419baf839SRobert Olsson 	 * tnode_child_length() and instead of multiplying by 2 (since the
55519baf839SRobert Olsson 	 * child array will be doubled by inflate()) and multiplying
55619baf839SRobert Olsson 	 * the left-hand side by 100 (to handle the percentage thing) we
55719baf839SRobert Olsson 	 * multiply the left-hand side by 50.
55819baf839SRobert Olsson 	 *
55919baf839SRobert Olsson 	 * The left-hand side may look a bit weird: tnode_child_length(tn)
56019baf839SRobert Olsson 	 * - tn->empty_children is of course the number of non-null children
56119baf839SRobert Olsson 	 * in the current node. tn->full_children is the number of "full"
56219baf839SRobert Olsson 	 * children, that is non-null tnodes with a skip value of 0.
56319baf839SRobert Olsson 	 * All of those will be doubled in the resulting inflated tnode, so
56419baf839SRobert Olsson 	 * we just count them one extra time here.
56519baf839SRobert Olsson 	 *
56619baf839SRobert Olsson 	 * A clearer way to write this would be:
56719baf839SRobert Olsson 	 *
56819baf839SRobert Olsson 	 * to_be_doubled = tn->full_children;
56919baf839SRobert Olsson 	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
57019baf839SRobert Olsson 	 *     tn->full_children;
57119baf839SRobert Olsson 	 *
57219baf839SRobert Olsson 	 * new_child_length = tnode_child_length(tn) * 2;
57319baf839SRobert Olsson 	 *
57419baf839SRobert Olsson 	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
57519baf839SRobert Olsson 	 *      new_child_length;
57619baf839SRobert Olsson 	 * if (new_fill_factor >= inflate_threshold)
57719baf839SRobert Olsson 	 *
57819baf839SRobert Olsson 	 * ...and so on, tho it would mess up the while () loop.
57919baf839SRobert Olsson 	 *
58019baf839SRobert Olsson 	 * anyway,
58119baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
58219baf839SRobert Olsson 	 *      inflate_threshold
58319baf839SRobert Olsson 	 *
58419baf839SRobert Olsson 	 * avoid a division:
58519baf839SRobert Olsson 	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
58619baf839SRobert Olsson 	 *      inflate_threshold * new_child_length
58719baf839SRobert Olsson 	 *
58819baf839SRobert Olsson 	 * expand not_to_be_doubled and to_be_doubled, and shorten:
58919baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
59019baf839SRobert Olsson 	 *    tn->full_children) >= inflate_threshold * new_child_length
59119baf839SRobert Olsson 	 *
59219baf839SRobert Olsson 	 * expand new_child_length:
59319baf839SRobert Olsson 	 * 100 * (tnode_child_length(tn) - tn->empty_children +
59419baf839SRobert Olsson 	 *    tn->full_children) >=
59519baf839SRobert Olsson 	 *      inflate_threshold * tnode_child_length(tn) * 2
59619baf839SRobert Olsson 	 *
59719baf839SRobert Olsson 	 * shorten again:
59819baf839SRobert Olsson 	 * 50 * (tn->full_children + tnode_child_length(tn) -
59919baf839SRobert Olsson 	 *    tn->empty_children) >= inflate_threshold *
60019baf839SRobert Olsson 	 *    tnode_child_length(tn)
60119baf839SRobert Olsson 	 *
60219baf839SRobert Olsson 	 */
60319baf839SRobert Olsson 
60419baf839SRobert Olsson 	check_tnode(tn);
60519baf839SRobert Olsson 
606e6308be8SRobert Olsson 	/* Keep root node larger  */
607e6308be8SRobert Olsson 
60880b71b80SJens Låås 	if (!node_parent((struct node *)tn)) {
60980b71b80SJens Låås 		inflate_threshold_use = inflate_threshold_root;
61080b71b80SJens Låås 		halve_threshold_use = halve_threshold_root;
611a034ee3cSEric Dumazet 	} else {
612e6308be8SRobert Olsson 		inflate_threshold_use = inflate_threshold;
61380b71b80SJens Låås 		halve_threshold_use = halve_threshold;
61480b71b80SJens Låås 	}
615e6308be8SRobert Olsson 
61680b71b80SJens Låås 	max_work = MAX_WORK;
61780b71b80SJens Låås 	while ((tn->full_children > 0 &&  max_work-- &&
618a07f5f50SStephen Hemminger 		50 * (tn->full_children + tnode_child_length(tn)
619a07f5f50SStephen Hemminger 		      - tn->empty_children)
620a07f5f50SStephen Hemminger 		>= inflate_threshold_use * tnode_child_length(tn))) {
62119baf839SRobert Olsson 
6222f80b3c8SRobert Olsson 		old_tn = tn;
6232f80b3c8SRobert Olsson 		tn = inflate(t, tn);
624a07f5f50SStephen Hemminger 
6252f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6262f80b3c8SRobert Olsson 			tn = old_tn;
6272f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6282f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6292f36895aSRobert Olsson #endif
6302f36895aSRobert Olsson 			break;
6312f36895aSRobert Olsson 		}
63219baf839SRobert Olsson 	}
63319baf839SRobert Olsson 
63419baf839SRobert Olsson 	check_tnode(tn);
63519baf839SRobert Olsson 
63680b71b80SJens Låås 	/* Return if at least one inflate is run */
63780b71b80SJens Låås 	if (max_work != MAX_WORK)
63880b71b80SJens Låås 		return (struct node *) tn;
63980b71b80SJens Låås 
64019baf839SRobert Olsson 	/*
64119baf839SRobert Olsson 	 * Halve as long as the number of empty children in this
64219baf839SRobert Olsson 	 * node is above threshold.
64319baf839SRobert Olsson 	 */
6442f36895aSRobert Olsson 
64580b71b80SJens Låås 	max_work = MAX_WORK;
64680b71b80SJens Låås 	while (tn->bits > 1 &&  max_work-- &&
64719baf839SRobert Olsson 	       100 * (tnode_child_length(tn) - tn->empty_children) <
648e6308be8SRobert Olsson 	       halve_threshold_use * tnode_child_length(tn)) {
64919baf839SRobert Olsson 
6502f80b3c8SRobert Olsson 		old_tn = tn;
6512f80b3c8SRobert Olsson 		tn = halve(t, tn);
6522f80b3c8SRobert Olsson 		if (IS_ERR(tn)) {
6532f80b3c8SRobert Olsson 			tn = old_tn;
6542f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
6552f36895aSRobert Olsson 			t->stats.resize_node_skipped++;
6562f36895aSRobert Olsson #endif
6572f36895aSRobert Olsson 			break;
6582f36895aSRobert Olsson 		}
6592f36895aSRobert Olsson 	}
6602f36895aSRobert Olsson 
66119baf839SRobert Olsson 
66219baf839SRobert Olsson 	/* Only one child remains */
66380b71b80SJens Låås 	if (tn->empty_children == tnode_child_length(tn) - 1) {
66480b71b80SJens Låås one_child:
66519baf839SRobert Olsson 		for (i = 0; i < tnode_child_length(tn); i++) {
66691b9a277SOlof Johansson 			struct node *n;
66719baf839SRobert Olsson 
66891b9a277SOlof Johansson 			n = tn->child[i];
6692373ce1cSRobert Olsson 			if (!n)
67091b9a277SOlof Johansson 				continue;
67191b9a277SOlof Johansson 
67291b9a277SOlof Johansson 			/* compress one level */
67391b9a277SOlof Johansson 
67406801916SStephen Hemminger 			node_set_parent(n, NULL);
675e0f7cb8cSJarek Poplawski 			tnode_free_safe(tn);
67619baf839SRobert Olsson 			return n;
67719baf839SRobert Olsson 		}
67880b71b80SJens Låås 	}
67919baf839SRobert Olsson 	return (struct node *) tn;
68019baf839SRobert Olsson }
68119baf839SRobert Olsson 
6822f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn)
68319baf839SRobert Olsson {
68419baf839SRobert Olsson 	struct tnode *oldtnode = tn;
68519baf839SRobert Olsson 	int olen = tnode_child_length(tn);
68619baf839SRobert Olsson 	int i;
68719baf839SRobert Olsson 
6880c7770c7SStephen Hemminger 	pr_debug("In inflate\n");
68919baf839SRobert Olsson 
69019baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
69119baf839SRobert Olsson 
6922f80b3c8SRobert Olsson 	if (!tn)
6932f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
6942f36895aSRobert Olsson 
6952f36895aSRobert Olsson 	/*
6962f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
6972f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
6982f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and  inflate
6992f36895aSRobert Olsson 	 * of tnode is ignored.
7002f36895aSRobert Olsson 	 */
7012f36895aSRobert Olsson 
7022f36895aSRobert Olsson 	for (i = 0; i < olen; i++) {
703a07f5f50SStephen Hemminger 		struct tnode *inode;
7042f36895aSRobert Olsson 
705a07f5f50SStephen Hemminger 		inode = (struct tnode *) tnode_get_child(oldtnode, i);
7062f36895aSRobert Olsson 		if (inode &&
7072f36895aSRobert Olsson 		    IS_TNODE(inode) &&
7082f36895aSRobert Olsson 		    inode->pos == oldtnode->pos + oldtnode->bits &&
7092f36895aSRobert Olsson 		    inode->bits > 1) {
7102f36895aSRobert Olsson 			struct tnode *left, *right;
711ab66b4a7SStephen Hemminger 			t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
7122f36895aSRobert Olsson 
7132f36895aSRobert Olsson 			left = tnode_new(inode->key&(~m), inode->pos + 1,
7142f36895aSRobert Olsson 					 inode->bits - 1);
7152f80b3c8SRobert Olsson 			if (!left)
7162f80b3c8SRobert Olsson 				goto nomem;
7172f36895aSRobert Olsson 
7182f36895aSRobert Olsson 			right = tnode_new(inode->key|m, inode->pos + 1,
7192f36895aSRobert Olsson 					  inode->bits - 1);
7202f36895aSRobert Olsson 
7212f36895aSRobert Olsson 			if (!right) {
7222f80b3c8SRobert Olsson 				tnode_free(left);
7232f80b3c8SRobert Olsson 				goto nomem;
7242f36895aSRobert Olsson 			}
7252f36895aSRobert Olsson 
7262f36895aSRobert Olsson 			put_child(t, tn, 2*i, (struct node *) left);
7272f36895aSRobert Olsson 			put_child(t, tn, 2*i+1, (struct node *) right);
7282f36895aSRobert Olsson 		}
7292f36895aSRobert Olsson 	}
7302f36895aSRobert Olsson 
73119baf839SRobert Olsson 	for (i = 0; i < olen; i++) {
732c95aaf9aSStephen Hemminger 		struct tnode *inode;
73319baf839SRobert Olsson 		struct node *node = tnode_get_child(oldtnode, i);
73491b9a277SOlof Johansson 		struct tnode *left, *right;
73591b9a277SOlof Johansson 		int size, j;
73619baf839SRobert Olsson 
73719baf839SRobert Olsson 		/* An empty child */
73819baf839SRobert Olsson 		if (node == NULL)
73919baf839SRobert Olsson 			continue;
74019baf839SRobert Olsson 
74119baf839SRobert Olsson 		/* A leaf or an internal node with skipped bits */
74219baf839SRobert Olsson 
74319baf839SRobert Olsson 		if (IS_LEAF(node) || ((struct tnode *) node)->pos >
74419baf839SRobert Olsson 		   tn->pos + tn->bits - 1) {
745a07f5f50SStephen Hemminger 			if (tkey_extract_bits(node->key,
746a07f5f50SStephen Hemminger 					      oldtnode->pos + oldtnode->bits,
74719baf839SRobert Olsson 					      1) == 0)
74819baf839SRobert Olsson 				put_child(t, tn, 2*i, node);
74919baf839SRobert Olsson 			else
75019baf839SRobert Olsson 				put_child(t, tn, 2*i+1, node);
75119baf839SRobert Olsson 			continue;
75219baf839SRobert Olsson 		}
75319baf839SRobert Olsson 
75419baf839SRobert Olsson 		/* An internal node with two children */
75519baf839SRobert Olsson 		inode = (struct tnode *) node;
75619baf839SRobert Olsson 
75719baf839SRobert Olsson 		if (inode->bits == 1) {
75819baf839SRobert Olsson 			put_child(t, tn, 2*i, inode->child[0]);
75919baf839SRobert Olsson 			put_child(t, tn, 2*i+1, inode->child[1]);
76019baf839SRobert Olsson 
761e0f7cb8cSJarek Poplawski 			tnode_free_safe(inode);
76291b9a277SOlof Johansson 			continue;
76319baf839SRobert Olsson 		}
76419baf839SRobert Olsson 
76519baf839SRobert Olsson 		/* An internal node with more than two children */
76619baf839SRobert Olsson 
76719baf839SRobert Olsson 		/* We will replace this node 'inode' with two new
76819baf839SRobert Olsson 		 * ones, 'left' and 'right', each with half of the
76919baf839SRobert Olsson 		 * original children. The two new nodes will have
77019baf839SRobert Olsson 		 * a position one bit further down the key and this
77119baf839SRobert Olsson 		 * means that the "significant" part of their keys
77219baf839SRobert Olsson 		 * (see the discussion near the top of this file)
77319baf839SRobert Olsson 		 * will differ by one bit, which will be "0" in
77419baf839SRobert Olsson 		 * left's key and "1" in right's key. Since we are
77519baf839SRobert Olsson 		 * moving the key position by one step, the bit that
77619baf839SRobert Olsson 		 * we are moving away from - the bit at position
77719baf839SRobert Olsson 		 * (inode->pos) - is the one that will differ between
77819baf839SRobert Olsson 		 * left and right. So... we synthesize that bit in the
77919baf839SRobert Olsson 		 * two  new keys.
78019baf839SRobert Olsson 		 * The mask 'm' below will be a single "one" bit at
78119baf839SRobert Olsson 		 * the position (inode->pos)
78219baf839SRobert Olsson 		 */
78319baf839SRobert Olsson 
78419baf839SRobert Olsson 		/* Use the old key, but set the new significant
78519baf839SRobert Olsson 		 *   bit to zero.
78619baf839SRobert Olsson 		 */
7872f36895aSRobert Olsson 
7882f36895aSRobert Olsson 		left = (struct tnode *) tnode_get_child(tn, 2*i);
7892f36895aSRobert Olsson 		put_child(t, tn, 2*i, NULL);
79019baf839SRobert Olsson 
79191b9a277SOlof Johansson 		BUG_ON(!left);
79219baf839SRobert Olsson 
7932f36895aSRobert Olsson 		right = (struct tnode *) tnode_get_child(tn, 2*i+1);
7942f36895aSRobert Olsson 		put_child(t, tn, 2*i+1, NULL);
79519baf839SRobert Olsson 
79691b9a277SOlof Johansson 		BUG_ON(!right);
79719baf839SRobert Olsson 
79819baf839SRobert Olsson 		size = tnode_child_length(left);
79919baf839SRobert Olsson 		for (j = 0; j < size; j++) {
80019baf839SRobert Olsson 			put_child(t, left, j, inode->child[j]);
80119baf839SRobert Olsson 			put_child(t, right, j, inode->child[j + size]);
80219baf839SRobert Olsson 		}
80319baf839SRobert Olsson 		put_child(t, tn, 2*i, resize(t, left));
80419baf839SRobert Olsson 		put_child(t, tn, 2*i+1, resize(t, right));
80519baf839SRobert Olsson 
806e0f7cb8cSJarek Poplawski 		tnode_free_safe(inode);
80719baf839SRobert Olsson 	}
808e0f7cb8cSJarek Poplawski 	tnode_free_safe(oldtnode);
80919baf839SRobert Olsson 	return tn;
8102f80b3c8SRobert Olsson nomem:
8112f80b3c8SRobert Olsson 	{
8122f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
8132f80b3c8SRobert Olsson 		int j;
8142f80b3c8SRobert Olsson 
8152f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
8162f80b3c8SRobert Olsson 			if (tn->child[j])
8172f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
8182f80b3c8SRobert Olsson 
8192f80b3c8SRobert Olsson 		tnode_free(tn);
8202f80b3c8SRobert Olsson 
8212f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8222f80b3c8SRobert Olsson 	}
82319baf839SRobert Olsson }
82419baf839SRobert Olsson 
8252f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn)
82619baf839SRobert Olsson {
82719baf839SRobert Olsson 	struct tnode *oldtnode = tn;
82819baf839SRobert Olsson 	struct node *left, *right;
82919baf839SRobert Olsson 	int i;
83019baf839SRobert Olsson 	int olen = tnode_child_length(tn);
83119baf839SRobert Olsson 
8320c7770c7SStephen Hemminger 	pr_debug("In halve\n");
83319baf839SRobert Olsson 
83419baf839SRobert Olsson 	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
83519baf839SRobert Olsson 
8362f80b3c8SRobert Olsson 	if (!tn)
8372f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
8382f36895aSRobert Olsson 
8392f36895aSRobert Olsson 	/*
8402f36895aSRobert Olsson 	 * Preallocate and store tnodes before the actual work so we
8412f36895aSRobert Olsson 	 * don't get into an inconsistent state if memory allocation
8422f36895aSRobert Olsson 	 * fails. In case of failure we return the oldnode and halve
8432f36895aSRobert Olsson 	 * of tnode is ignored.
8442f36895aSRobert Olsson 	 */
8452f36895aSRobert Olsson 
8462f36895aSRobert Olsson 	for (i = 0; i < olen; i += 2) {
8472f36895aSRobert Olsson 		left = tnode_get_child(oldtnode, i);
8482f36895aSRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
8492f36895aSRobert Olsson 
8502f36895aSRobert Olsson 		/* Two nonempty children */
8512f36895aSRobert Olsson 		if (left && right) {
8522f80b3c8SRobert Olsson 			struct tnode *newn;
8532f36895aSRobert Olsson 
8542f80b3c8SRobert Olsson 			newn = tnode_new(left->key, tn->pos + tn->bits, 1);
8552f80b3c8SRobert Olsson 
8562f80b3c8SRobert Olsson 			if (!newn)
8572f80b3c8SRobert Olsson 				goto nomem;
8582f80b3c8SRobert Olsson 
8592f80b3c8SRobert Olsson 			put_child(t, tn, i/2, (struct node *)newn);
8602f36895aSRobert Olsson 		}
8612f36895aSRobert Olsson 
8622f36895aSRobert Olsson 	}
86319baf839SRobert Olsson 
86419baf839SRobert Olsson 	for (i = 0; i < olen; i += 2) {
86591b9a277SOlof Johansson 		struct tnode *newBinNode;
86691b9a277SOlof Johansson 
86719baf839SRobert Olsson 		left = tnode_get_child(oldtnode, i);
86819baf839SRobert Olsson 		right = tnode_get_child(oldtnode, i+1);
86919baf839SRobert Olsson 
87019baf839SRobert Olsson 		/* At least one of the children is empty */
87119baf839SRobert Olsson 		if (left == NULL) {
87219baf839SRobert Olsson 			if (right == NULL)    /* Both are empty */
87319baf839SRobert Olsson 				continue;
87419baf839SRobert Olsson 			put_child(t, tn, i/2, right);
87591b9a277SOlof Johansson 			continue;
87691b9a277SOlof Johansson 		}
87791b9a277SOlof Johansson 
87891b9a277SOlof Johansson 		if (right == NULL) {
87919baf839SRobert Olsson 			put_child(t, tn, i/2, left);
88091b9a277SOlof Johansson 			continue;
88191b9a277SOlof Johansson 		}
88219baf839SRobert Olsson 
88319baf839SRobert Olsson 		/* Two nonempty children */
88491b9a277SOlof Johansson 		newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
8852f36895aSRobert Olsson 		put_child(t, tn, i/2, NULL);
88619baf839SRobert Olsson 		put_child(t, newBinNode, 0, left);
88719baf839SRobert Olsson 		put_child(t, newBinNode, 1, right);
88819baf839SRobert Olsson 		put_child(t, tn, i/2, resize(t, newBinNode));
88919baf839SRobert Olsson 	}
890e0f7cb8cSJarek Poplawski 	tnode_free_safe(oldtnode);
89119baf839SRobert Olsson 	return tn;
8922f80b3c8SRobert Olsson nomem:
8932f80b3c8SRobert Olsson 	{
8942f80b3c8SRobert Olsson 		int size = tnode_child_length(tn);
8952f80b3c8SRobert Olsson 		int j;
8962f80b3c8SRobert Olsson 
8972f80b3c8SRobert Olsson 		for (j = 0; j < size; j++)
8982f80b3c8SRobert Olsson 			if (tn->child[j])
8992f80b3c8SRobert Olsson 				tnode_free((struct tnode *)tn->child[j]);
9002f80b3c8SRobert Olsson 
9012f80b3c8SRobert Olsson 		tnode_free(tn);
9022f80b3c8SRobert Olsson 
9032f80b3c8SRobert Olsson 		return ERR_PTR(-ENOMEM);
9042f80b3c8SRobert Olsson 	}
90519baf839SRobert Olsson }
90619baf839SRobert Olsson 
907772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines
9082373ce1cSRobert Olsson  via get_fa_head and dump */
9092373ce1cSRobert Olsson 
910772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
91119baf839SRobert Olsson {
912772cb712SRobert Olsson 	struct hlist_head *head = &l->list;
91319baf839SRobert Olsson 	struct hlist_node *node;
91419baf839SRobert Olsson 	struct leaf_info *li;
91519baf839SRobert Olsson 
9162373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, head, hlist)
91719baf839SRobert Olsson 		if (li->plen == plen)
91819baf839SRobert Olsson 			return li;
91991b9a277SOlof Johansson 
92019baf839SRobert Olsson 	return NULL;
92119baf839SRobert Olsson }
92219baf839SRobert Olsson 
92319baf839SRobert Olsson static inline struct list_head *get_fa_head(struct leaf *l, int plen)
92419baf839SRobert Olsson {
925772cb712SRobert Olsson 	struct leaf_info *li = find_leaf_info(l, plen);
92619baf839SRobert Olsson 
92791b9a277SOlof Johansson 	if (!li)
92891b9a277SOlof Johansson 		return NULL;
92919baf839SRobert Olsson 
93091b9a277SOlof Johansson 	return &li->falh;
93119baf839SRobert Olsson }
93219baf839SRobert Olsson 
93319baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
93419baf839SRobert Olsson {
93519baf839SRobert Olsson 	struct leaf_info *li = NULL, *last = NULL;
93691b9a277SOlof Johansson 	struct hlist_node *node;
93719baf839SRobert Olsson 
93891b9a277SOlof Johansson 	if (hlist_empty(head)) {
9392373ce1cSRobert Olsson 		hlist_add_head_rcu(&new->hlist, head);
94091b9a277SOlof Johansson 	} else {
94191b9a277SOlof Johansson 		hlist_for_each_entry(li, node, head, hlist) {
94219baf839SRobert Olsson 			if (new->plen > li->plen)
94319baf839SRobert Olsson 				break;
94419baf839SRobert Olsson 
94519baf839SRobert Olsson 			last = li;
94619baf839SRobert Olsson 		}
94719baf839SRobert Olsson 		if (last)
9482373ce1cSRobert Olsson 			hlist_add_after_rcu(&last->hlist, &new->hlist);
94919baf839SRobert Olsson 		else
9502373ce1cSRobert Olsson 			hlist_add_before_rcu(&new->hlist, &li->hlist);
95119baf839SRobert Olsson 	}
95219baf839SRobert Olsson }
95319baf839SRobert Olsson 
9542373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
9552373ce1cSRobert Olsson 
95619baf839SRobert Olsson static struct leaf *
95719baf839SRobert Olsson fib_find_node(struct trie *t, u32 key)
95819baf839SRobert Olsson {
95919baf839SRobert Olsson 	int pos;
96019baf839SRobert Olsson 	struct tnode *tn;
96119baf839SRobert Olsson 	struct node *n;
96219baf839SRobert Olsson 
96319baf839SRobert Olsson 	pos = 0;
964a034ee3cSEric Dumazet 	n = rcu_dereference_rtnl(t->trie);
96519baf839SRobert Olsson 
96619baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
96719baf839SRobert Olsson 		tn = (struct tnode *) n;
96819baf839SRobert Olsson 
96919baf839SRobert Olsson 		check_tnode(tn);
97019baf839SRobert Olsson 
97119baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
97219baf839SRobert Olsson 			pos = tn->pos + tn->bits;
973a07f5f50SStephen Hemminger 			n = tnode_get_child_rcu(tn,
974a07f5f50SStephen Hemminger 						tkey_extract_bits(key,
975a07f5f50SStephen Hemminger 								  tn->pos,
976a07f5f50SStephen Hemminger 								  tn->bits));
97791b9a277SOlof Johansson 		} else
97819baf839SRobert Olsson 			break;
97919baf839SRobert Olsson 	}
98019baf839SRobert Olsson 	/* Case we have found a leaf. Compare prefixes */
98119baf839SRobert Olsson 
98291b9a277SOlof Johansson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
98391b9a277SOlof Johansson 		return (struct leaf *)n;
98491b9a277SOlof Johansson 
98519baf839SRobert Olsson 	return NULL;
98619baf839SRobert Olsson }
98719baf839SRobert Olsson 
9887b85576dSJarek Poplawski static void trie_rebalance(struct trie *t, struct tnode *tn)
98919baf839SRobert Olsson {
99019baf839SRobert Olsson 	int wasfull;
9913ed18d76SRobert Olsson 	t_key cindex, key;
99206801916SStephen Hemminger 	struct tnode *tp;
99319baf839SRobert Olsson 
9943ed18d76SRobert Olsson 	key = tn->key;
9953ed18d76SRobert Olsson 
99606801916SStephen Hemminger 	while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
99719baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
99819baf839SRobert Olsson 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
99919baf839SRobert Olsson 		tn = (struct tnode *) resize(t, (struct tnode *)tn);
1000a07f5f50SStephen Hemminger 
1001a07f5f50SStephen Hemminger 		tnode_put_child_reorg((struct tnode *)tp, cindex,
1002a07f5f50SStephen Hemminger 				      (struct node *)tn, wasfull);
100319baf839SRobert Olsson 
100406801916SStephen Hemminger 		tp = node_parent((struct node *) tn);
1005008440e3SJarek Poplawski 		if (!tp)
1006008440e3SJarek Poplawski 			rcu_assign_pointer(t->trie, (struct node *)tn);
1007008440e3SJarek Poplawski 
1008e0f7cb8cSJarek Poplawski 		tnode_free_flush();
100906801916SStephen Hemminger 		if (!tp)
101019baf839SRobert Olsson 			break;
101106801916SStephen Hemminger 		tn = tp;
101219baf839SRobert Olsson 	}
101306801916SStephen Hemminger 
101419baf839SRobert Olsson 	/* Handle last (top) tnode */
10157b85576dSJarek Poplawski 	if (IS_TNODE(tn))
101619baf839SRobert Olsson 		tn = (struct tnode *)resize(t, (struct tnode *)tn);
101719baf839SRobert Olsson 
10187b85576dSJarek Poplawski 	rcu_assign_pointer(t->trie, (struct node *)tn);
10197b85576dSJarek Poplawski 	tnode_free_flush();
102019baf839SRobert Olsson }
102119baf839SRobert Olsson 
10222373ce1cSRobert Olsson /* only used from updater-side */
10232373ce1cSRobert Olsson 
1024fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
102519baf839SRobert Olsson {
102619baf839SRobert Olsson 	int pos, newpos;
102719baf839SRobert Olsson 	struct tnode *tp = NULL, *tn = NULL;
102819baf839SRobert Olsson 	struct node *n;
102919baf839SRobert Olsson 	struct leaf *l;
103019baf839SRobert Olsson 	int missbit;
103119baf839SRobert Olsson 	struct list_head *fa_head = NULL;
103219baf839SRobert Olsson 	struct leaf_info *li;
103319baf839SRobert Olsson 	t_key cindex;
103419baf839SRobert Olsson 
103519baf839SRobert Olsson 	pos = 0;
103619baf839SRobert Olsson 	n = t->trie;
103719baf839SRobert Olsson 
103819baf839SRobert Olsson 	/* If we point to NULL, stop. Either the tree is empty and we should
103919baf839SRobert Olsson 	 * just put a new leaf in if, or we have reached an empty child slot,
104019baf839SRobert Olsson 	 * and we should just put our new leaf in that.
104119baf839SRobert Olsson 	 * If we point to a T_TNODE, check if it matches our key. Note that
104219baf839SRobert Olsson 	 * a T_TNODE might be skipping any number of bits - its 'pos' need
104319baf839SRobert Olsson 	 * not be the parent's 'pos'+'bits'!
104419baf839SRobert Olsson 	 *
104519baf839SRobert Olsson 	 * If it does match the current key, get pos/bits from it, extract
104619baf839SRobert Olsson 	 * the index from our key, push the T_TNODE and walk the tree.
104719baf839SRobert Olsson 	 *
104819baf839SRobert Olsson 	 * If it doesn't, we have to replace it with a new T_TNODE.
104919baf839SRobert Olsson 	 *
105019baf839SRobert Olsson 	 * If we point to a T_LEAF, it might or might not have the same key
105119baf839SRobert Olsson 	 * as we do. If it does, just change the value, update the T_LEAF's
105219baf839SRobert Olsson 	 * value, and return it.
105319baf839SRobert Olsson 	 * If it doesn't, we need to replace it with a T_TNODE.
105419baf839SRobert Olsson 	 */
105519baf839SRobert Olsson 
105619baf839SRobert Olsson 	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
105719baf839SRobert Olsson 		tn = (struct tnode *) n;
105819baf839SRobert Olsson 
105919baf839SRobert Olsson 		check_tnode(tn);
106019baf839SRobert Olsson 
106119baf839SRobert Olsson 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
106219baf839SRobert Olsson 			tp = tn;
106319baf839SRobert Olsson 			pos = tn->pos + tn->bits;
1064a07f5f50SStephen Hemminger 			n = tnode_get_child(tn,
1065a07f5f50SStephen Hemminger 					    tkey_extract_bits(key,
1066a07f5f50SStephen Hemminger 							      tn->pos,
1067a07f5f50SStephen Hemminger 							      tn->bits));
106819baf839SRobert Olsson 
106906801916SStephen Hemminger 			BUG_ON(n && node_parent(n) != tn);
107091b9a277SOlof Johansson 		} else
107119baf839SRobert Olsson 			break;
107219baf839SRobert Olsson 	}
107319baf839SRobert Olsson 
107419baf839SRobert Olsson 	/*
107519baf839SRobert Olsson 	 * n  ----> NULL, LEAF or TNODE
107619baf839SRobert Olsson 	 *
107719baf839SRobert Olsson 	 * tp is n's (parent) ----> NULL or TNODE
107819baf839SRobert Olsson 	 */
107919baf839SRobert Olsson 
108091b9a277SOlof Johansson 	BUG_ON(tp && IS_LEAF(tp));
108119baf839SRobert Olsson 
108219baf839SRobert Olsson 	/* Case 1: n is a leaf. Compare prefixes */
108319baf839SRobert Olsson 
108419baf839SRobert Olsson 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1085c95aaf9aSStephen Hemminger 		l = (struct leaf *) n;
108619baf839SRobert Olsson 		li = leaf_info_new(plen);
108719baf839SRobert Olsson 
1088fea86ad8SStephen Hemminger 		if (!li)
1089fea86ad8SStephen Hemminger 			return NULL;
109019baf839SRobert Olsson 
109119baf839SRobert Olsson 		fa_head = &li->falh;
109219baf839SRobert Olsson 		insert_leaf_info(&l->list, li);
109319baf839SRobert Olsson 		goto done;
109419baf839SRobert Olsson 	}
109519baf839SRobert Olsson 	l = leaf_new();
109619baf839SRobert Olsson 
1097fea86ad8SStephen Hemminger 	if (!l)
1098fea86ad8SStephen Hemminger 		return NULL;
109919baf839SRobert Olsson 
110019baf839SRobert Olsson 	l->key = key;
110119baf839SRobert Olsson 	li = leaf_info_new(plen);
110219baf839SRobert Olsson 
1103f835e471SRobert Olsson 	if (!li) {
1104387a5487SStephen Hemminger 		free_leaf(l);
1105fea86ad8SStephen Hemminger 		return NULL;
1106f835e471SRobert Olsson 	}
110719baf839SRobert Olsson 
110819baf839SRobert Olsson 	fa_head = &li->falh;
110919baf839SRobert Olsson 	insert_leaf_info(&l->list, li);
111019baf839SRobert Olsson 
111119baf839SRobert Olsson 	if (t->trie && n == NULL) {
111291b9a277SOlof Johansson 		/* Case 2: n is NULL, and will just insert a new leaf */
111319baf839SRobert Olsson 
111406801916SStephen Hemminger 		node_set_parent((struct node *)l, tp);
111519baf839SRobert Olsson 
111619baf839SRobert Olsson 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
111719baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
111891b9a277SOlof Johansson 	} else {
111919baf839SRobert Olsson 		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
112019baf839SRobert Olsson 		/*
112119baf839SRobert Olsson 		 *  Add a new tnode here
112219baf839SRobert Olsson 		 *  first tnode need some special handling
112319baf839SRobert Olsson 		 */
112419baf839SRobert Olsson 
112519baf839SRobert Olsson 		if (tp)
112619baf839SRobert Olsson 			pos = tp->pos+tp->bits;
112719baf839SRobert Olsson 		else
112819baf839SRobert Olsson 			pos = 0;
112991b9a277SOlof Johansson 
113019baf839SRobert Olsson 		if (n) {
113119baf839SRobert Olsson 			newpos = tkey_mismatch(key, pos, n->key);
113219baf839SRobert Olsson 			tn = tnode_new(n->key, newpos, 1);
113391b9a277SOlof Johansson 		} else {
113419baf839SRobert Olsson 			newpos = 0;
113519baf839SRobert Olsson 			tn = tnode_new(key, newpos, 1); /* First tnode */
113619baf839SRobert Olsson 		}
1137f835e471SRobert Olsson 
1138f835e471SRobert Olsson 		if (!tn) {
1139f835e471SRobert Olsson 			free_leaf_info(li);
1140387a5487SStephen Hemminger 			free_leaf(l);
1141fea86ad8SStephen Hemminger 			return NULL;
1142f835e471SRobert Olsson 		}
114319baf839SRobert Olsson 
114406801916SStephen Hemminger 		node_set_parent((struct node *)tn, tp);
114519baf839SRobert Olsson 
114619baf839SRobert Olsson 		missbit = tkey_extract_bits(key, newpos, 1);
114719baf839SRobert Olsson 		put_child(t, tn, missbit, (struct node *)l);
114819baf839SRobert Olsson 		put_child(t, tn, 1-missbit, n);
114919baf839SRobert Olsson 
115019baf839SRobert Olsson 		if (tp) {
115119baf839SRobert Olsson 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1152a07f5f50SStephen Hemminger 			put_child(t, (struct tnode *)tp, cindex,
1153a07f5f50SStephen Hemminger 				  (struct node *)tn);
115491b9a277SOlof Johansson 		} else {
1155a07f5f50SStephen Hemminger 			rcu_assign_pointer(t->trie, (struct node *)tn);
115619baf839SRobert Olsson 			tp = tn;
115719baf839SRobert Olsson 		}
115819baf839SRobert Olsson 	}
115991b9a277SOlof Johansson 
116091b9a277SOlof Johansson 	if (tp && tp->pos + tp->bits > 32)
1161a07f5f50SStephen Hemminger 		pr_warning("fib_trie"
1162a07f5f50SStephen Hemminger 			   " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
116319baf839SRobert Olsson 			   tp, tp->pos, tp->bits, key, plen);
116491b9a277SOlof Johansson 
116519baf839SRobert Olsson 	/* Rebalance the trie */
11662373ce1cSRobert Olsson 
11677b85576dSJarek Poplawski 	trie_rebalance(t, tp);
1168f835e471SRobert Olsson done:
116919baf839SRobert Olsson 	return fa_head;
117019baf839SRobert Olsson }
117119baf839SRobert Olsson 
1172d562f1f8SRobert Olsson /*
1173d562f1f8SRobert Olsson  * Caller must hold RTNL.
1174d562f1f8SRobert Olsson  */
117516c6cf8bSStephen Hemminger int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
117619baf839SRobert Olsson {
117719baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
117819baf839SRobert Olsson 	struct fib_alias *fa, *new_fa;
117919baf839SRobert Olsson 	struct list_head *fa_head = NULL;
118019baf839SRobert Olsson 	struct fib_info *fi;
11814e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
11824e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
118319baf839SRobert Olsson 	u32 key, mask;
118419baf839SRobert Olsson 	int err;
118519baf839SRobert Olsson 	struct leaf *l;
118619baf839SRobert Olsson 
118719baf839SRobert Olsson 	if (plen > 32)
118819baf839SRobert Olsson 		return -EINVAL;
118919baf839SRobert Olsson 
11904e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
119119baf839SRobert Olsson 
11922dfe55b4SPatrick McHardy 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
119319baf839SRobert Olsson 
119419baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
119519baf839SRobert Olsson 
119619baf839SRobert Olsson 	if (key & ~mask)
119719baf839SRobert Olsson 		return -EINVAL;
119819baf839SRobert Olsson 
119919baf839SRobert Olsson 	key = key & mask;
120019baf839SRobert Olsson 
12014e902c57SThomas Graf 	fi = fib_create_info(cfg);
12024e902c57SThomas Graf 	if (IS_ERR(fi)) {
12034e902c57SThomas Graf 		err = PTR_ERR(fi);
120419baf839SRobert Olsson 		goto err;
12054e902c57SThomas Graf 	}
120619baf839SRobert Olsson 
120719baf839SRobert Olsson 	l = fib_find_node(t, key);
120819baf839SRobert Olsson 	fa = NULL;
120919baf839SRobert Olsson 
121019baf839SRobert Olsson 	if (l) {
121119baf839SRobert Olsson 		fa_head = get_fa_head(l, plen);
121219baf839SRobert Olsson 		fa = fib_find_alias(fa_head, tos, fi->fib_priority);
121319baf839SRobert Olsson 	}
121419baf839SRobert Olsson 
121519baf839SRobert Olsson 	/* Now fa, if non-NULL, points to the first fib alias
121619baf839SRobert Olsson 	 * with the same keys [prefix,tos,priority], if such key already
121719baf839SRobert Olsson 	 * exists or to the node before which we will insert new one.
121819baf839SRobert Olsson 	 *
121919baf839SRobert Olsson 	 * If fa is NULL, we will need to allocate a new one and
122019baf839SRobert Olsson 	 * insert to the head of f.
122119baf839SRobert Olsson 	 *
122219baf839SRobert Olsson 	 * If f is NULL, no fib node matched the destination key
122319baf839SRobert Olsson 	 * and we need to allocate a new one of those as well.
122419baf839SRobert Olsson 	 */
122519baf839SRobert Olsson 
1226936f6f8eSJulian Anastasov 	if (fa && fa->fa_tos == tos &&
1227936f6f8eSJulian Anastasov 	    fa->fa_info->fib_priority == fi->fib_priority) {
1228936f6f8eSJulian Anastasov 		struct fib_alias *fa_first, *fa_match;
122919baf839SRobert Olsson 
123019baf839SRobert Olsson 		err = -EEXIST;
12314e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_EXCL)
123219baf839SRobert Olsson 			goto out;
123319baf839SRobert Olsson 
1234936f6f8eSJulian Anastasov 		/* We have 2 goals:
1235936f6f8eSJulian Anastasov 		 * 1. Find exact match for type, scope, fib_info to avoid
1236936f6f8eSJulian Anastasov 		 * duplicate routes
1237936f6f8eSJulian Anastasov 		 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1238936f6f8eSJulian Anastasov 		 */
1239936f6f8eSJulian Anastasov 		fa_match = NULL;
1240936f6f8eSJulian Anastasov 		fa_first = fa;
1241936f6f8eSJulian Anastasov 		fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1242936f6f8eSJulian Anastasov 		list_for_each_entry_continue(fa, fa_head, fa_list) {
1243936f6f8eSJulian Anastasov 			if (fa->fa_tos != tos)
1244936f6f8eSJulian Anastasov 				break;
1245936f6f8eSJulian Anastasov 			if (fa->fa_info->fib_priority != fi->fib_priority)
1246936f6f8eSJulian Anastasov 				break;
1247936f6f8eSJulian Anastasov 			if (fa->fa_type == cfg->fc_type &&
1248936f6f8eSJulian Anastasov 			    fa->fa_scope == cfg->fc_scope &&
1249936f6f8eSJulian Anastasov 			    fa->fa_info == fi) {
1250936f6f8eSJulian Anastasov 				fa_match = fa;
1251936f6f8eSJulian Anastasov 				break;
1252936f6f8eSJulian Anastasov 			}
1253936f6f8eSJulian Anastasov 		}
1254936f6f8eSJulian Anastasov 
12554e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_REPLACE) {
125619baf839SRobert Olsson 			struct fib_info *fi_drop;
125719baf839SRobert Olsson 			u8 state;
125819baf839SRobert Olsson 
1259936f6f8eSJulian Anastasov 			fa = fa_first;
1260936f6f8eSJulian Anastasov 			if (fa_match) {
1261936f6f8eSJulian Anastasov 				if (fa == fa_match)
1262936f6f8eSJulian Anastasov 					err = 0;
12636725033fSJoonwoo Park 				goto out;
1264936f6f8eSJulian Anastasov 			}
12652373ce1cSRobert Olsson 			err = -ENOBUFS;
1266e94b1766SChristoph Lameter 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
12672373ce1cSRobert Olsson 			if (new_fa == NULL)
12682373ce1cSRobert Olsson 				goto out;
126919baf839SRobert Olsson 
127019baf839SRobert Olsson 			fi_drop = fa->fa_info;
12712373ce1cSRobert Olsson 			new_fa->fa_tos = fa->fa_tos;
12722373ce1cSRobert Olsson 			new_fa->fa_info = fi;
12734e902c57SThomas Graf 			new_fa->fa_type = cfg->fc_type;
12744e902c57SThomas Graf 			new_fa->fa_scope = cfg->fc_scope;
127519baf839SRobert Olsson 			state = fa->fa_state;
1276936f6f8eSJulian Anastasov 			new_fa->fa_state = state & ~FA_S_ACCESSED;
127719baf839SRobert Olsson 
12782373ce1cSRobert Olsson 			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
12792373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
128019baf839SRobert Olsson 
128119baf839SRobert Olsson 			fib_release_info(fi_drop);
128219baf839SRobert Olsson 			if (state & FA_S_ACCESSED)
128376e6ebfbSDenis V. Lunev 				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
1284b8f55831SMilan Kocian 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1285b8f55831SMilan Kocian 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
128619baf839SRobert Olsson 
128719baf839SRobert Olsson 			goto succeeded;
128819baf839SRobert Olsson 		}
128919baf839SRobert Olsson 		/* Error if we find a perfect match which
129019baf839SRobert Olsson 		 * uses the same scope, type, and nexthop
129119baf839SRobert Olsson 		 * information.
129219baf839SRobert Olsson 		 */
1293936f6f8eSJulian Anastasov 		if (fa_match)
129419baf839SRobert Olsson 			goto out;
1295a07f5f50SStephen Hemminger 
12964e902c57SThomas Graf 		if (!(cfg->fc_nlflags & NLM_F_APPEND))
1297936f6f8eSJulian Anastasov 			fa = fa_first;
129819baf839SRobert Olsson 	}
129919baf839SRobert Olsson 	err = -ENOENT;
13004e902c57SThomas Graf 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
130119baf839SRobert Olsson 		goto out;
130219baf839SRobert Olsson 
130319baf839SRobert Olsson 	err = -ENOBUFS;
1304e94b1766SChristoph Lameter 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
130519baf839SRobert Olsson 	if (new_fa == NULL)
130619baf839SRobert Olsson 		goto out;
130719baf839SRobert Olsson 
130819baf839SRobert Olsson 	new_fa->fa_info = fi;
130919baf839SRobert Olsson 	new_fa->fa_tos = tos;
13104e902c57SThomas Graf 	new_fa->fa_type = cfg->fc_type;
13114e902c57SThomas Graf 	new_fa->fa_scope = cfg->fc_scope;
131219baf839SRobert Olsson 	new_fa->fa_state = 0;
131319baf839SRobert Olsson 	/*
131419baf839SRobert Olsson 	 * Insert new entry to the list.
131519baf839SRobert Olsson 	 */
131619baf839SRobert Olsson 
1317f835e471SRobert Olsson 	if (!fa_head) {
1318fea86ad8SStephen Hemminger 		fa_head = fib_insert_node(t, key, plen);
1319fea86ad8SStephen Hemminger 		if (unlikely(!fa_head)) {
1320fea86ad8SStephen Hemminger 			err = -ENOMEM;
1321f835e471SRobert Olsson 			goto out_free_new_fa;
1322f835e471SRobert Olsson 		}
1323fea86ad8SStephen Hemminger 	}
132419baf839SRobert Olsson 
13252373ce1cSRobert Olsson 	list_add_tail_rcu(&new_fa->fa_list,
13262373ce1cSRobert Olsson 			  (fa ? &fa->fa_list : fa_head));
132719baf839SRobert Olsson 
132876e6ebfbSDenis V. Lunev 	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
13294e902c57SThomas Graf 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1330b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
133119baf839SRobert Olsson succeeded:
133219baf839SRobert Olsson 	return 0;
1333f835e471SRobert Olsson 
1334f835e471SRobert Olsson out_free_new_fa:
1335f835e471SRobert Olsson 	kmem_cache_free(fn_alias_kmem, new_fa);
133619baf839SRobert Olsson out:
133719baf839SRobert Olsson 	fib_release_info(fi);
133891b9a277SOlof Johansson err:
133919baf839SRobert Olsson 	return err;
134019baf839SRobert Olsson }
134119baf839SRobert Olsson 
1342772cb712SRobert Olsson /* should be called with rcu_read_lock */
13435b470441SDavid S. Miller static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1344a07f5f50SStephen Hemminger 		      t_key key,  const struct flowi *flp,
1345ebc0ffaeSEric Dumazet 		      struct fib_result *res, int fib_flags)
134619baf839SRobert Olsson {
134719baf839SRobert Olsson 	struct leaf_info *li;
134819baf839SRobert Olsson 	struct hlist_head *hhead = &l->list;
134919baf839SRobert Olsson 	struct hlist_node *node;
135019baf839SRobert Olsson 
13512373ce1cSRobert Olsson 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1352a07f5f50SStephen Hemminger 		int err;
1353a07f5f50SStephen Hemminger 		int plen = li->plen;
1354a07f5f50SStephen Hemminger 		__be32 mask = inet_make_mask(plen);
1355a07f5f50SStephen Hemminger 
1356888454c5SAl Viro 		if (l->key != (key & ntohl(mask)))
135719baf839SRobert Olsson 			continue;
135819baf839SRobert Olsson 
13595b470441SDavid S. Miller 		err = fib_semantic_match(tb, &li->falh, flp, res, plen, fib_flags);
1360a07f5f50SStephen Hemminger 
136119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
1362a07f5f50SStephen Hemminger 		if (err <= 0)
136319baf839SRobert Olsson 			t->stats.semantic_match_passed++;
1364a07f5f50SStephen Hemminger 		else
136519baf839SRobert Olsson 			t->stats.semantic_match_miss++;
136619baf839SRobert Olsson #endif
1367a07f5f50SStephen Hemminger 		if (err <= 0)
13682e655571SBen Hutchings 			return err;
136919baf839SRobert Olsson 	}
137019baf839SRobert Olsson 
13712e655571SBen Hutchings 	return 1;
1372a07f5f50SStephen Hemminger }
1373a07f5f50SStephen Hemminger 
137416c6cf8bSStephen Hemminger int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1375ebc0ffaeSEric Dumazet 		     struct fib_result *res, int fib_flags)
137619baf839SRobert Olsson {
137719baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
13782e655571SBen Hutchings 	int ret;
137919baf839SRobert Olsson 	struct node *n;
138019baf839SRobert Olsson 	struct tnode *pn;
138119baf839SRobert Olsson 	int pos, bits;
138219baf839SRobert Olsson 	t_key key = ntohl(flp->fl4_dst);
138319baf839SRobert Olsson 	int chopped_off;
138419baf839SRobert Olsson 	t_key cindex = 0;
138519baf839SRobert Olsson 	int current_prefix_length = KEYLENGTH;
138691b9a277SOlof Johansson 	struct tnode *cn;
1387874ffa8fSEric Dumazet 	t_key pref_mismatch;
138891b9a277SOlof Johansson 
13892373ce1cSRobert Olsson 	rcu_read_lock();
139019baf839SRobert Olsson 
13912373ce1cSRobert Olsson 	n = rcu_dereference(t->trie);
139219baf839SRobert Olsson 	if (!n)
139319baf839SRobert Olsson 		goto failed;
139419baf839SRobert Olsson 
139519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
139619baf839SRobert Olsson 	t->stats.gets++;
139719baf839SRobert Olsson #endif
139819baf839SRobert Olsson 
139919baf839SRobert Olsson 	/* Just a leaf? */
140019baf839SRobert Olsson 	if (IS_LEAF(n)) {
14015b470441SDavid S. Miller 		ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1402a07f5f50SStephen Hemminger 		goto found;
140319baf839SRobert Olsson 	}
1404a07f5f50SStephen Hemminger 
140519baf839SRobert Olsson 	pn = (struct tnode *) n;
140619baf839SRobert Olsson 	chopped_off = 0;
140719baf839SRobert Olsson 
140819baf839SRobert Olsson 	while (pn) {
140919baf839SRobert Olsson 		pos = pn->pos;
141019baf839SRobert Olsson 		bits = pn->bits;
141119baf839SRobert Olsson 
141219baf839SRobert Olsson 		if (!chopped_off)
1413ab66b4a7SStephen Hemminger 			cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1414ab66b4a7SStephen Hemminger 						   pos, bits);
141519baf839SRobert Olsson 
1416b902e573SJarek Poplawski 		n = tnode_get_child_rcu(pn, cindex);
141719baf839SRobert Olsson 
141819baf839SRobert Olsson 		if (n == NULL) {
141919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
142019baf839SRobert Olsson 			t->stats.null_node_hit++;
142119baf839SRobert Olsson #endif
142219baf839SRobert Olsson 			goto backtrace;
142319baf839SRobert Olsson 		}
142419baf839SRobert Olsson 
142591b9a277SOlof Johansson 		if (IS_LEAF(n)) {
14265b470441SDavid S. Miller 			ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
14272e655571SBen Hutchings 			if (ret > 0)
142891b9a277SOlof Johansson 				goto backtrace;
1429a07f5f50SStephen Hemminger 			goto found;
143091b9a277SOlof Johansson 		}
143191b9a277SOlof Johansson 
143291b9a277SOlof Johansson 		cn = (struct tnode *)n;
143319baf839SRobert Olsson 
143419baf839SRobert Olsson 		/*
143519baf839SRobert Olsson 		 * It's a tnode, and we can do some extra checks here if we
143619baf839SRobert Olsson 		 * like, to avoid descending into a dead-end branch.
143719baf839SRobert Olsson 		 * This tnode is in the parent's child array at index
143819baf839SRobert Olsson 		 * key[p_pos..p_pos+p_bits] but potentially with some bits
143919baf839SRobert Olsson 		 * chopped off, so in reality the index may be just a
144019baf839SRobert Olsson 		 * subprefix, padded with zero at the end.
144119baf839SRobert Olsson 		 * We can also take a look at any skipped bits in this
144219baf839SRobert Olsson 		 * tnode - everything up to p_pos is supposed to be ok,
144319baf839SRobert Olsson 		 * and the non-chopped bits of the index (se previous
144419baf839SRobert Olsson 		 * paragraph) are also guaranteed ok, but the rest is
144519baf839SRobert Olsson 		 * considered unknown.
144619baf839SRobert Olsson 		 *
144719baf839SRobert Olsson 		 * The skipped bits are key[pos+bits..cn->pos].
144819baf839SRobert Olsson 		 */
144919baf839SRobert Olsson 
145019baf839SRobert Olsson 		/* If current_prefix_length < pos+bits, we are already doing
145119baf839SRobert Olsson 		 * actual prefix  matching, which means everything from
145219baf839SRobert Olsson 		 * pos+(bits-chopped_off) onward must be zero along some
145319baf839SRobert Olsson 		 * branch of this subtree - otherwise there is *no* valid
145419baf839SRobert Olsson 		 * prefix present. Here we can only check the skipped
145519baf839SRobert Olsson 		 * bits. Remember, since we have already indexed into the
145619baf839SRobert Olsson 		 * parent's child array, we know that the bits we chopped of
145719baf839SRobert Olsson 		 * *are* zero.
145819baf839SRobert Olsson 		 */
145919baf839SRobert Olsson 
1460a07f5f50SStephen Hemminger 		/* NOTA BENE: Checking only skipped bits
1461a07f5f50SStephen Hemminger 		   for the new node here */
146219baf839SRobert Olsson 
146319baf839SRobert Olsson 		if (current_prefix_length < pos+bits) {
146419baf839SRobert Olsson 			if (tkey_extract_bits(cn->key, current_prefix_length,
1465a07f5f50SStephen Hemminger 						cn->pos - current_prefix_length)
1466a07f5f50SStephen Hemminger 			    || !(cn->child[0]))
146719baf839SRobert Olsson 				goto backtrace;
146819baf839SRobert Olsson 		}
146919baf839SRobert Olsson 
147019baf839SRobert Olsson 		/*
147119baf839SRobert Olsson 		 * If chopped_off=0, the index is fully validated and we
147219baf839SRobert Olsson 		 * only need to look at the skipped bits for this, the new,
147319baf839SRobert Olsson 		 * tnode. What we actually want to do is to find out if
147419baf839SRobert Olsson 		 * these skipped bits match our key perfectly, or if we will
147519baf839SRobert Olsson 		 * have to count on finding a matching prefix further down,
147619baf839SRobert Olsson 		 * because if we do, we would like to have some way of
147719baf839SRobert Olsson 		 * verifying the existence of such a prefix at this point.
147819baf839SRobert Olsson 		 */
147919baf839SRobert Olsson 
148019baf839SRobert Olsson 		/* The only thing we can do at this point is to verify that
148119baf839SRobert Olsson 		 * any such matching prefix can indeed be a prefix to our
148219baf839SRobert Olsson 		 * key, and if the bits in the node we are inspecting that
148319baf839SRobert Olsson 		 * do not match our key are not ZERO, this cannot be true.
148419baf839SRobert Olsson 		 * Thus, find out where there is a mismatch (before cn->pos)
148519baf839SRobert Olsson 		 * and verify that all the mismatching bits are zero in the
148619baf839SRobert Olsson 		 * new tnode's key.
148719baf839SRobert Olsson 		 */
148819baf839SRobert Olsson 
1489a07f5f50SStephen Hemminger 		/*
1490a07f5f50SStephen Hemminger 		 * Note: We aren't very concerned about the piece of
1491a07f5f50SStephen Hemminger 		 * the key that precede pn->pos+pn->bits, since these
1492a07f5f50SStephen Hemminger 		 * have already been checked. The bits after cn->pos
1493a07f5f50SStephen Hemminger 		 * aren't checked since these are by definition
1494a07f5f50SStephen Hemminger 		 * "unknown" at this point. Thus, what we want to see
1495a07f5f50SStephen Hemminger 		 * is if we are about to enter the "prefix matching"
1496a07f5f50SStephen Hemminger 		 * state, and in that case verify that the skipped
1497a07f5f50SStephen Hemminger 		 * bits that will prevail throughout this subtree are
1498a07f5f50SStephen Hemminger 		 * zero, as they have to be if we are to find a
1499a07f5f50SStephen Hemminger 		 * matching prefix.
150019baf839SRobert Olsson 		 */
150119baf839SRobert Olsson 
1502874ffa8fSEric Dumazet 		pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
150319baf839SRobert Olsson 
1504a07f5f50SStephen Hemminger 		/*
1505a07f5f50SStephen Hemminger 		 * In short: If skipped bits in this node do not match
1506a07f5f50SStephen Hemminger 		 * the search key, enter the "prefix matching"
1507a07f5f50SStephen Hemminger 		 * state.directly.
150819baf839SRobert Olsson 		 */
150919baf839SRobert Olsson 		if (pref_mismatch) {
1510874ffa8fSEric Dumazet 			int mp = KEYLENGTH - fls(pref_mismatch);
151119baf839SRobert Olsson 
1512874ffa8fSEric Dumazet 			if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
151319baf839SRobert Olsson 				goto backtrace;
151419baf839SRobert Olsson 
151519baf839SRobert Olsson 			if (current_prefix_length >= cn->pos)
151619baf839SRobert Olsson 				current_prefix_length = mp;
151719baf839SRobert Olsson 		}
1518a07f5f50SStephen Hemminger 
151919baf839SRobert Olsson 		pn = (struct tnode *)n; /* Descend */
152019baf839SRobert Olsson 		chopped_off = 0;
152119baf839SRobert Olsson 		continue;
152291b9a277SOlof Johansson 
152319baf839SRobert Olsson backtrace:
152419baf839SRobert Olsson 		chopped_off++;
152519baf839SRobert Olsson 
152619baf839SRobert Olsson 		/* As zero don't change the child key (cindex) */
1527a07f5f50SStephen Hemminger 		while ((chopped_off <= pn->bits)
1528a07f5f50SStephen Hemminger 		       && !(cindex & (1<<(chopped_off-1))))
152919baf839SRobert Olsson 			chopped_off++;
153019baf839SRobert Olsson 
153119baf839SRobert Olsson 		/* Decrease current_... with bits chopped off */
153219baf839SRobert Olsson 		if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1533a07f5f50SStephen Hemminger 			current_prefix_length = pn->pos + pn->bits
1534a07f5f50SStephen Hemminger 				- chopped_off;
153519baf839SRobert Olsson 
153619baf839SRobert Olsson 		/*
153719baf839SRobert Olsson 		 * Either we do the actual chop off according or if we have
153819baf839SRobert Olsson 		 * chopped off all bits in this tnode walk up to our parent.
153919baf839SRobert Olsson 		 */
154019baf839SRobert Olsson 
154191b9a277SOlof Johansson 		if (chopped_off <= pn->bits) {
154219baf839SRobert Olsson 			cindex &= ~(1 << (chopped_off-1));
154391b9a277SOlof Johansson 		} else {
1544b902e573SJarek Poplawski 			struct tnode *parent = node_parent_rcu((struct node *) pn);
154506801916SStephen Hemminger 			if (!parent)
154619baf839SRobert Olsson 				goto failed;
154719baf839SRobert Olsson 
154819baf839SRobert Olsson 			/* Get Child's index */
154906801916SStephen Hemminger 			cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
155006801916SStephen Hemminger 			pn = parent;
155119baf839SRobert Olsson 			chopped_off = 0;
155219baf839SRobert Olsson 
155319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
155419baf839SRobert Olsson 			t->stats.backtrack++;
155519baf839SRobert Olsson #endif
155619baf839SRobert Olsson 			goto backtrace;
155719baf839SRobert Olsson 		}
155819baf839SRobert Olsson 	}
155919baf839SRobert Olsson failed:
156019baf839SRobert Olsson 	ret = 1;
156119baf839SRobert Olsson found:
15622373ce1cSRobert Olsson 	rcu_read_unlock();
156319baf839SRobert Olsson 	return ret;
156419baf839SRobert Olsson }
156519baf839SRobert Olsson 
156619baf839SRobert Olsson /*
15679195bef7SStephen Hemminger  * Remove the leaf and return parent.
156819baf839SRobert Olsson  */
15699195bef7SStephen Hemminger static void trie_leaf_remove(struct trie *t, struct leaf *l)
15709195bef7SStephen Hemminger {
15719195bef7SStephen Hemminger 	struct tnode *tp = node_parent((struct node *) l);
15729195bef7SStephen Hemminger 
15739195bef7SStephen Hemminger 	pr_debug("entering trie_leaf_remove(%p)\n", l);
157419baf839SRobert Olsson 
157519baf839SRobert Olsson 	if (tp) {
15769195bef7SStephen Hemminger 		t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
157719baf839SRobert Olsson 		put_child(t, (struct tnode *)tp, cindex, NULL);
15787b85576dSJarek Poplawski 		trie_rebalance(t, tp);
157991b9a277SOlof Johansson 	} else
15802373ce1cSRobert Olsson 		rcu_assign_pointer(t->trie, NULL);
158119baf839SRobert Olsson 
1582387a5487SStephen Hemminger 	free_leaf(l);
158319baf839SRobert Olsson }
158419baf839SRobert Olsson 
1585d562f1f8SRobert Olsson /*
1586d562f1f8SRobert Olsson  * Caller must hold RTNL.
1587d562f1f8SRobert Olsson  */
158816c6cf8bSStephen Hemminger int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
158919baf839SRobert Olsson {
159019baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
159119baf839SRobert Olsson 	u32 key, mask;
15924e902c57SThomas Graf 	int plen = cfg->fc_dst_len;
15934e902c57SThomas Graf 	u8 tos = cfg->fc_tos;
159419baf839SRobert Olsson 	struct fib_alias *fa, *fa_to_delete;
159519baf839SRobert Olsson 	struct list_head *fa_head;
159619baf839SRobert Olsson 	struct leaf *l;
159791b9a277SOlof Johansson 	struct leaf_info *li;
159891b9a277SOlof Johansson 
159919baf839SRobert Olsson 	if (plen > 32)
160019baf839SRobert Olsson 		return -EINVAL;
160119baf839SRobert Olsson 
16024e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
160319baf839SRobert Olsson 	mask = ntohl(inet_make_mask(plen));
160419baf839SRobert Olsson 
160519baf839SRobert Olsson 	if (key & ~mask)
160619baf839SRobert Olsson 		return -EINVAL;
160719baf839SRobert Olsson 
160819baf839SRobert Olsson 	key = key & mask;
160919baf839SRobert Olsson 	l = fib_find_node(t, key);
161019baf839SRobert Olsson 
161119baf839SRobert Olsson 	if (!l)
161219baf839SRobert Olsson 		return -ESRCH;
161319baf839SRobert Olsson 
161419baf839SRobert Olsson 	fa_head = get_fa_head(l, plen);
161519baf839SRobert Olsson 	fa = fib_find_alias(fa_head, tos, 0);
161619baf839SRobert Olsson 
161719baf839SRobert Olsson 	if (!fa)
161819baf839SRobert Olsson 		return -ESRCH;
161919baf839SRobert Olsson 
16200c7770c7SStephen Hemminger 	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
162119baf839SRobert Olsson 
162219baf839SRobert Olsson 	fa_to_delete = NULL;
1623936f6f8eSJulian Anastasov 	fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1624936f6f8eSJulian Anastasov 	list_for_each_entry_continue(fa, fa_head, fa_list) {
162519baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
162619baf839SRobert Olsson 
162719baf839SRobert Olsson 		if (fa->fa_tos != tos)
162819baf839SRobert Olsson 			break;
162919baf839SRobert Olsson 
16304e902c57SThomas Graf 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
16314e902c57SThomas Graf 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
16324e902c57SThomas Graf 		     fa->fa_scope == cfg->fc_scope) &&
16334e902c57SThomas Graf 		    (!cfg->fc_protocol ||
16344e902c57SThomas Graf 		     fi->fib_protocol == cfg->fc_protocol) &&
16354e902c57SThomas Graf 		    fib_nh_match(cfg, fi) == 0) {
163619baf839SRobert Olsson 			fa_to_delete = fa;
163719baf839SRobert Olsson 			break;
163819baf839SRobert Olsson 		}
163919baf839SRobert Olsson 	}
164019baf839SRobert Olsson 
164191b9a277SOlof Johansson 	if (!fa_to_delete)
164291b9a277SOlof Johansson 		return -ESRCH;
164319baf839SRobert Olsson 
164419baf839SRobert Olsson 	fa = fa_to_delete;
16454e902c57SThomas Graf 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1646b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
164719baf839SRobert Olsson 
164819baf839SRobert Olsson 	l = fib_find_node(t, key);
1649772cb712SRobert Olsson 	li = find_leaf_info(l, plen);
165019baf839SRobert Olsson 
16512373ce1cSRobert Olsson 	list_del_rcu(&fa->fa_list);
165219baf839SRobert Olsson 
165319baf839SRobert Olsson 	if (list_empty(fa_head)) {
16542373ce1cSRobert Olsson 		hlist_del_rcu(&li->hlist);
165519baf839SRobert Olsson 		free_leaf_info(li);
16562373ce1cSRobert Olsson 	}
165719baf839SRobert Olsson 
165819baf839SRobert Olsson 	if (hlist_empty(&l->list))
16599195bef7SStephen Hemminger 		trie_leaf_remove(t, l);
166019baf839SRobert Olsson 
166119baf839SRobert Olsson 	if (fa->fa_state & FA_S_ACCESSED)
166276e6ebfbSDenis V. Lunev 		rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
166319baf839SRobert Olsson 
16642373ce1cSRobert Olsson 	fib_release_info(fa->fa_info);
16652373ce1cSRobert Olsson 	alias_free_mem_rcu(fa);
166619baf839SRobert Olsson 	return 0;
166719baf839SRobert Olsson }
166819baf839SRobert Olsson 
1669ef3660ceSStephen Hemminger static int trie_flush_list(struct list_head *head)
167019baf839SRobert Olsson {
167119baf839SRobert Olsson 	struct fib_alias *fa, *fa_node;
167219baf839SRobert Olsson 	int found = 0;
167319baf839SRobert Olsson 
167419baf839SRobert Olsson 	list_for_each_entry_safe(fa, fa_node, head, fa_list) {
167519baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
167619baf839SRobert Olsson 
167719baf839SRobert Olsson 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
16782373ce1cSRobert Olsson 			list_del_rcu(&fa->fa_list);
16792373ce1cSRobert Olsson 			fib_release_info(fa->fa_info);
16802373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
168119baf839SRobert Olsson 			found++;
168219baf839SRobert Olsson 		}
168319baf839SRobert Olsson 	}
168419baf839SRobert Olsson 	return found;
168519baf839SRobert Olsson }
168619baf839SRobert Olsson 
1687ef3660ceSStephen Hemminger static int trie_flush_leaf(struct leaf *l)
168819baf839SRobert Olsson {
168919baf839SRobert Olsson 	int found = 0;
169019baf839SRobert Olsson 	struct hlist_head *lih = &l->list;
169119baf839SRobert Olsson 	struct hlist_node *node, *tmp;
169219baf839SRobert Olsson 	struct leaf_info *li = NULL;
169319baf839SRobert Olsson 
169419baf839SRobert Olsson 	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1695ef3660ceSStephen Hemminger 		found += trie_flush_list(&li->falh);
169619baf839SRobert Olsson 
169719baf839SRobert Olsson 		if (list_empty(&li->falh)) {
16982373ce1cSRobert Olsson 			hlist_del_rcu(&li->hlist);
169919baf839SRobert Olsson 			free_leaf_info(li);
170019baf839SRobert Olsson 		}
170119baf839SRobert Olsson 	}
170219baf839SRobert Olsson 	return found;
170319baf839SRobert Olsson }
170419baf839SRobert Olsson 
170582cfbb00SStephen Hemminger /*
170682cfbb00SStephen Hemminger  * Scan for the next right leaf starting at node p->child[idx]
170782cfbb00SStephen Hemminger  * Since we have back pointer, no recursion necessary.
170882cfbb00SStephen Hemminger  */
170982cfbb00SStephen Hemminger static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
171019baf839SRobert Olsson {
171182cfbb00SStephen Hemminger 	do {
171282cfbb00SStephen Hemminger 		t_key idx;
171319baf839SRobert Olsson 
171419baf839SRobert Olsson 		if (c)
171582cfbb00SStephen Hemminger 			idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
171619baf839SRobert Olsson 		else
171782cfbb00SStephen Hemminger 			idx = 0;
171819baf839SRobert Olsson 
171982cfbb00SStephen Hemminger 		while (idx < 1u << p->bits) {
172082cfbb00SStephen Hemminger 			c = tnode_get_child_rcu(p, idx++);
17212373ce1cSRobert Olsson 			if (!c)
172291b9a277SOlof Johansson 				continue;
172319baf839SRobert Olsson 
172482cfbb00SStephen Hemminger 			if (IS_LEAF(c)) {
172582cfbb00SStephen Hemminger 				prefetch(p->child[idx]);
17262373ce1cSRobert Olsson 				return (struct leaf *) c;
172719baf839SRobert Olsson 			}
172882cfbb00SStephen Hemminger 
172982cfbb00SStephen Hemminger 			/* Rescan start scanning in new node */
173082cfbb00SStephen Hemminger 			p = (struct tnode *) c;
173182cfbb00SStephen Hemminger 			idx = 0;
173219baf839SRobert Olsson 		}
173382cfbb00SStephen Hemminger 
173482cfbb00SStephen Hemminger 		/* Node empty, walk back up to parent */
173582cfbb00SStephen Hemminger 		c = (struct node *) p;
173682cfbb00SStephen Hemminger 	} while ((p = node_parent_rcu(c)) != NULL);
173782cfbb00SStephen Hemminger 
173882cfbb00SStephen Hemminger 	return NULL; /* Root of trie */
173982cfbb00SStephen Hemminger }
174082cfbb00SStephen Hemminger 
174182cfbb00SStephen Hemminger static struct leaf *trie_firstleaf(struct trie *t)
174282cfbb00SStephen Hemminger {
1743a034ee3cSEric Dumazet 	struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
174482cfbb00SStephen Hemminger 
174582cfbb00SStephen Hemminger 	if (!n)
174682cfbb00SStephen Hemminger 		return NULL;
174782cfbb00SStephen Hemminger 
174882cfbb00SStephen Hemminger 	if (IS_LEAF(n))          /* trie is just a leaf */
174982cfbb00SStephen Hemminger 		return (struct leaf *) n;
175082cfbb00SStephen Hemminger 
175182cfbb00SStephen Hemminger 	return leaf_walk_rcu(n, NULL);
175282cfbb00SStephen Hemminger }
175382cfbb00SStephen Hemminger 
175482cfbb00SStephen Hemminger static struct leaf *trie_nextleaf(struct leaf *l)
175582cfbb00SStephen Hemminger {
175682cfbb00SStephen Hemminger 	struct node *c = (struct node *) l;
1757b902e573SJarek Poplawski 	struct tnode *p = node_parent_rcu(c);
175882cfbb00SStephen Hemminger 
175982cfbb00SStephen Hemminger 	if (!p)
176082cfbb00SStephen Hemminger 		return NULL;	/* trie with just one leaf */
176182cfbb00SStephen Hemminger 
176282cfbb00SStephen Hemminger 	return leaf_walk_rcu(p, c);
176319baf839SRobert Olsson }
176419baf839SRobert Olsson 
176571d67e66SStephen Hemminger static struct leaf *trie_leafindex(struct trie *t, int index)
176671d67e66SStephen Hemminger {
176771d67e66SStephen Hemminger 	struct leaf *l = trie_firstleaf(t);
176871d67e66SStephen Hemminger 
1769ec28cf73SStephen Hemminger 	while (l && index-- > 0)
177071d67e66SStephen Hemminger 		l = trie_nextleaf(l);
1771ec28cf73SStephen Hemminger 
177271d67e66SStephen Hemminger 	return l;
177371d67e66SStephen Hemminger }
177471d67e66SStephen Hemminger 
177571d67e66SStephen Hemminger 
1776d562f1f8SRobert Olsson /*
1777d562f1f8SRobert Olsson  * Caller must hold RTNL.
1778d562f1f8SRobert Olsson  */
177916c6cf8bSStephen Hemminger int fib_table_flush(struct fib_table *tb)
178019baf839SRobert Olsson {
178119baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
17829195bef7SStephen Hemminger 	struct leaf *l, *ll = NULL;
178382cfbb00SStephen Hemminger 	int found = 0;
178419baf839SRobert Olsson 
178582cfbb00SStephen Hemminger 	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
1786ef3660ceSStephen Hemminger 		found += trie_flush_leaf(l);
178719baf839SRobert Olsson 
178819baf839SRobert Olsson 		if (ll && hlist_empty(&ll->list))
17899195bef7SStephen Hemminger 			trie_leaf_remove(t, ll);
179019baf839SRobert Olsson 		ll = l;
179119baf839SRobert Olsson 	}
179219baf839SRobert Olsson 
179319baf839SRobert Olsson 	if (ll && hlist_empty(&ll->list))
17949195bef7SStephen Hemminger 		trie_leaf_remove(t, ll);
179519baf839SRobert Olsson 
17960c7770c7SStephen Hemminger 	pr_debug("trie_flush found=%d\n", found);
179719baf839SRobert Olsson 	return found;
179819baf839SRobert Olsson }
179919baf839SRobert Olsson 
18004aa2c466SPavel Emelyanov void fib_free_table(struct fib_table *tb)
18014aa2c466SPavel Emelyanov {
18024aa2c466SPavel Emelyanov 	kfree(tb);
18034aa2c466SPavel Emelyanov }
18044aa2c466SPavel Emelyanov 
180516c6cf8bSStephen Hemminger void fib_table_select_default(struct fib_table *tb,
1806a07f5f50SStephen Hemminger 			      const struct flowi *flp,
1807a07f5f50SStephen Hemminger 			      struct fib_result *res)
180819baf839SRobert Olsson {
180919baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
181019baf839SRobert Olsson 	int order, last_idx;
181119baf839SRobert Olsson 	struct fib_info *fi = NULL;
181219baf839SRobert Olsson 	struct fib_info *last_resort;
181319baf839SRobert Olsson 	struct fib_alias *fa = NULL;
181419baf839SRobert Olsson 	struct list_head *fa_head;
181519baf839SRobert Olsson 	struct leaf *l;
181619baf839SRobert Olsson 
181719baf839SRobert Olsson 	last_idx = -1;
181819baf839SRobert Olsson 	last_resort = NULL;
181919baf839SRobert Olsson 	order = -1;
182019baf839SRobert Olsson 
18212373ce1cSRobert Olsson 	rcu_read_lock();
182219baf839SRobert Olsson 
182319baf839SRobert Olsson 	l = fib_find_node(t, 0);
182419baf839SRobert Olsson 	if (!l)
182519baf839SRobert Olsson 		goto out;
182619baf839SRobert Olsson 
182719baf839SRobert Olsson 	fa_head = get_fa_head(l, 0);
182819baf839SRobert Olsson 	if (!fa_head)
182919baf839SRobert Olsson 		goto out;
183019baf839SRobert Olsson 
183119baf839SRobert Olsson 	if (list_empty(fa_head))
183219baf839SRobert Olsson 		goto out;
183319baf839SRobert Olsson 
18342373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fa_head, fa_list) {
183519baf839SRobert Olsson 		struct fib_info *next_fi = fa->fa_info;
183619baf839SRobert Olsson 
183719baf839SRobert Olsson 		if (fa->fa_scope != res->scope ||
183819baf839SRobert Olsson 		    fa->fa_type != RTN_UNICAST)
183919baf839SRobert Olsson 			continue;
184019baf839SRobert Olsson 
184119baf839SRobert Olsson 		if (next_fi->fib_priority > res->fi->fib_priority)
184219baf839SRobert Olsson 			break;
184319baf839SRobert Olsson 		if (!next_fi->fib_nh[0].nh_gw ||
184419baf839SRobert Olsson 		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
184519baf839SRobert Olsson 			continue;
18469b0c290eSEric Dumazet 
18479b0c290eSEric Dumazet 		fib_alias_accessed(fa);
184819baf839SRobert Olsson 
184919baf839SRobert Olsson 		if (fi == NULL) {
185019baf839SRobert Olsson 			if (next_fi != res->fi)
185119baf839SRobert Olsson 				break;
185219baf839SRobert Olsson 		} else if (!fib_detect_death(fi, order, &last_resort,
1853971b893eSDenis V. Lunev 					     &last_idx, tb->tb_default)) {
1854a2bbe682SDenis V. Lunev 			fib_result_assign(res, fi);
1855971b893eSDenis V. Lunev 			tb->tb_default = order;
185619baf839SRobert Olsson 			goto out;
185719baf839SRobert Olsson 		}
185819baf839SRobert Olsson 		fi = next_fi;
185919baf839SRobert Olsson 		order++;
186019baf839SRobert Olsson 	}
186119baf839SRobert Olsson 	if (order <= 0 || fi == NULL) {
1862971b893eSDenis V. Lunev 		tb->tb_default = -1;
186319baf839SRobert Olsson 		goto out;
186419baf839SRobert Olsson 	}
186519baf839SRobert Olsson 
1866971b893eSDenis V. Lunev 	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1867971b893eSDenis V. Lunev 				tb->tb_default)) {
1868a2bbe682SDenis V. Lunev 		fib_result_assign(res, fi);
1869971b893eSDenis V. Lunev 		tb->tb_default = order;
187019baf839SRobert Olsson 		goto out;
187119baf839SRobert Olsson 	}
1872a2bbe682SDenis V. Lunev 	if (last_idx >= 0)
1873a2bbe682SDenis V. Lunev 		fib_result_assign(res, last_resort);
1874971b893eSDenis V. Lunev 	tb->tb_default = last_idx;
1875971b893eSDenis V. Lunev out:
18762373ce1cSRobert Olsson 	rcu_read_unlock();
187719baf839SRobert Olsson }
187819baf839SRobert Olsson 
1879a07f5f50SStephen Hemminger static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1880a07f5f50SStephen Hemminger 			   struct fib_table *tb,
188119baf839SRobert Olsson 			   struct sk_buff *skb, struct netlink_callback *cb)
188219baf839SRobert Olsson {
188319baf839SRobert Olsson 	int i, s_i;
188419baf839SRobert Olsson 	struct fib_alias *fa;
188532ab5f80SAl Viro 	__be32 xkey = htonl(key);
188619baf839SRobert Olsson 
188771d67e66SStephen Hemminger 	s_i = cb->args[5];
188819baf839SRobert Olsson 	i = 0;
188919baf839SRobert Olsson 
18902373ce1cSRobert Olsson 	/* rcu_read_lock is hold by caller */
18912373ce1cSRobert Olsson 
18922373ce1cSRobert Olsson 	list_for_each_entry_rcu(fa, fah, fa_list) {
189319baf839SRobert Olsson 		if (i < s_i) {
189419baf839SRobert Olsson 			i++;
189519baf839SRobert Olsson 			continue;
189619baf839SRobert Olsson 		}
189719baf839SRobert Olsson 
189819baf839SRobert Olsson 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
189919baf839SRobert Olsson 				  cb->nlh->nlmsg_seq,
190019baf839SRobert Olsson 				  RTM_NEWROUTE,
190119baf839SRobert Olsson 				  tb->tb_id,
190219baf839SRobert Olsson 				  fa->fa_type,
190319baf839SRobert Olsson 				  fa->fa_scope,
1904be403ea1SThomas Graf 				  xkey,
190519baf839SRobert Olsson 				  plen,
190619baf839SRobert Olsson 				  fa->fa_tos,
190764347f78SStephen Hemminger 				  fa->fa_info, NLM_F_MULTI) < 0) {
190871d67e66SStephen Hemminger 			cb->args[5] = i;
190919baf839SRobert Olsson 			return -1;
191019baf839SRobert Olsson 		}
191119baf839SRobert Olsson 		i++;
191219baf839SRobert Olsson 	}
191371d67e66SStephen Hemminger 	cb->args[5] = i;
191419baf839SRobert Olsson 	return skb->len;
191519baf839SRobert Olsson }
191619baf839SRobert Olsson 
1917a88ee229SStephen Hemminger static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1918a07f5f50SStephen Hemminger 			struct sk_buff *skb, struct netlink_callback *cb)
191919baf839SRobert Olsson {
1920a88ee229SStephen Hemminger 	struct leaf_info *li;
1921a88ee229SStephen Hemminger 	struct hlist_node *node;
1922a88ee229SStephen Hemminger 	int i, s_i;
192391b9a277SOlof Johansson 
192471d67e66SStephen Hemminger 	s_i = cb->args[4];
1925a88ee229SStephen Hemminger 	i = 0;
192619baf839SRobert Olsson 
1927a88ee229SStephen Hemminger 	/* rcu_read_lock is hold by caller */
1928a88ee229SStephen Hemminger 	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1929a88ee229SStephen Hemminger 		if (i < s_i) {
1930a88ee229SStephen Hemminger 			i++;
193119baf839SRobert Olsson 			continue;
1932a88ee229SStephen Hemminger 		}
193319baf839SRobert Olsson 
1934a88ee229SStephen Hemminger 		if (i > s_i)
193571d67e66SStephen Hemminger 			cb->args[5] = 0;
193619baf839SRobert Olsson 
1937a88ee229SStephen Hemminger 		if (list_empty(&li->falh))
193819baf839SRobert Olsson 			continue;
193919baf839SRobert Olsson 
1940a88ee229SStephen Hemminger 		if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
194171d67e66SStephen Hemminger 			cb->args[4] = i;
194219baf839SRobert Olsson 			return -1;
194319baf839SRobert Olsson 		}
1944a88ee229SStephen Hemminger 		i++;
194519baf839SRobert Olsson 	}
1946a88ee229SStephen Hemminger 
194771d67e66SStephen Hemminger 	cb->args[4] = i;
194819baf839SRobert Olsson 	return skb->len;
194919baf839SRobert Olsson }
195019baf839SRobert Olsson 
195116c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1952a07f5f50SStephen Hemminger 		   struct netlink_callback *cb)
195319baf839SRobert Olsson {
1954a88ee229SStephen Hemminger 	struct leaf *l;
195519baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
1956d5ce8a0eSStephen Hemminger 	t_key key = cb->args[2];
195771d67e66SStephen Hemminger 	int count = cb->args[3];
195819baf839SRobert Olsson 
19592373ce1cSRobert Olsson 	rcu_read_lock();
1960d5ce8a0eSStephen Hemminger 	/* Dump starting at last key.
1961d5ce8a0eSStephen Hemminger 	 * Note: 0.0.0.0/0 (ie default) is first key.
1962d5ce8a0eSStephen Hemminger 	 */
196371d67e66SStephen Hemminger 	if (count == 0)
1964d5ce8a0eSStephen Hemminger 		l = trie_firstleaf(t);
1965d5ce8a0eSStephen Hemminger 	else {
196671d67e66SStephen Hemminger 		/* Normally, continue from last key, but if that is missing
196771d67e66SStephen Hemminger 		 * fallback to using slow rescan
1968d5ce8a0eSStephen Hemminger 		 */
196971d67e66SStephen Hemminger 		l = fib_find_node(t, key);
197071d67e66SStephen Hemminger 		if (!l)
197171d67e66SStephen Hemminger 			l = trie_leafindex(t, count);
197219baf839SRobert Olsson 	}
1973a88ee229SStephen Hemminger 
1974d5ce8a0eSStephen Hemminger 	while (l) {
1975d5ce8a0eSStephen Hemminger 		cb->args[2] = l->key;
1976a88ee229SStephen Hemminger 		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
197771d67e66SStephen Hemminger 			cb->args[3] = count;
19782373ce1cSRobert Olsson 			rcu_read_unlock();
197919baf839SRobert Olsson 			return -1;
198019baf839SRobert Olsson 		}
1981d5ce8a0eSStephen Hemminger 
198271d67e66SStephen Hemminger 		++count;
1983d5ce8a0eSStephen Hemminger 		l = trie_nextleaf(l);
198471d67e66SStephen Hemminger 		memset(&cb->args[4], 0,
198571d67e66SStephen Hemminger 		       sizeof(cb->args) - 4*sizeof(cb->args[0]));
1986a88ee229SStephen Hemminger 	}
198771d67e66SStephen Hemminger 	cb->args[3] = count;
1988a88ee229SStephen Hemminger 	rcu_read_unlock();
1989a88ee229SStephen Hemminger 
1990a88ee229SStephen Hemminger 	return skb->len;
1991a88ee229SStephen Hemminger }
199219baf839SRobert Olsson 
19937f9b8052SStephen Hemminger void __init fib_hash_init(void)
19947f9b8052SStephen Hemminger {
1995a07f5f50SStephen Hemminger 	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1996a07f5f50SStephen Hemminger 					  sizeof(struct fib_alias),
1997bc3c8c1eSStephen Hemminger 					  0, SLAB_PANIC, NULL);
1998bc3c8c1eSStephen Hemminger 
1999bc3c8c1eSStephen Hemminger 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2000bc3c8c1eSStephen Hemminger 					   max(sizeof(struct leaf),
2001bc3c8c1eSStephen Hemminger 					       sizeof(struct leaf_info)),
2002bc3c8c1eSStephen Hemminger 					   0, SLAB_PANIC, NULL);
20037f9b8052SStephen Hemminger }
200419baf839SRobert Olsson 
20057f9b8052SStephen Hemminger 
20067f9b8052SStephen Hemminger /* Fix more generic FIB names for init later */
20077f9b8052SStephen Hemminger struct fib_table *fib_hash_table(u32 id)
200819baf839SRobert Olsson {
200919baf839SRobert Olsson 	struct fib_table *tb;
201019baf839SRobert Olsson 	struct trie *t;
201119baf839SRobert Olsson 
201219baf839SRobert Olsson 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
201319baf839SRobert Olsson 		     GFP_KERNEL);
201419baf839SRobert Olsson 	if (tb == NULL)
201519baf839SRobert Olsson 		return NULL;
201619baf839SRobert Olsson 
201719baf839SRobert Olsson 	tb->tb_id = id;
2018971b893eSDenis V. Lunev 	tb->tb_default = -1;
201919baf839SRobert Olsson 
202019baf839SRobert Olsson 	t = (struct trie *) tb->tb_data;
2021c28a1cf4SStephen Hemminger 	memset(t, 0, sizeof(*t));
202219baf839SRobert Olsson 
202319baf839SRobert Olsson 	if (id == RT_TABLE_LOCAL)
2024a07f5f50SStephen Hemminger 		pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
202519baf839SRobert Olsson 
202619baf839SRobert Olsson 	return tb;
202719baf839SRobert Olsson }
202819baf839SRobert Olsson 
2029cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS
2030cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */
2031cb7b593cSStephen Hemminger struct fib_trie_iter {
20321c340b2fSDenis V. Lunev 	struct seq_net_private p;
20333d3b2d25SStephen Hemminger 	struct fib_table *tb;
2034cb7b593cSStephen Hemminger 	struct tnode *tnode;
2035a034ee3cSEric Dumazet 	unsigned int index;
2036a034ee3cSEric Dumazet 	unsigned int depth;
2037cb7b593cSStephen Hemminger };
203819baf839SRobert Olsson 
2039cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
204019baf839SRobert Olsson {
2041cb7b593cSStephen Hemminger 	struct tnode *tn = iter->tnode;
2042a034ee3cSEric Dumazet 	unsigned int cindex = iter->index;
2043cb7b593cSStephen Hemminger 	struct tnode *p;
204419baf839SRobert Olsson 
20456640e697SEric W. Biederman 	/* A single entry routing table */
20466640e697SEric W. Biederman 	if (!tn)
20476640e697SEric W. Biederman 		return NULL;
20486640e697SEric W. Biederman 
2049cb7b593cSStephen Hemminger 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2050cb7b593cSStephen Hemminger 		 iter->tnode, iter->index, iter->depth);
2051cb7b593cSStephen Hemminger rescan:
2052cb7b593cSStephen Hemminger 	while (cindex < (1<<tn->bits)) {
2053b59cfbf7SEric Dumazet 		struct node *n = tnode_get_child_rcu(tn, cindex);
205419baf839SRobert Olsson 
2055cb7b593cSStephen Hemminger 		if (n) {
205619baf839SRobert Olsson 			if (IS_LEAF(n)) {
2057cb7b593cSStephen Hemminger 				iter->tnode = tn;
2058cb7b593cSStephen Hemminger 				iter->index = cindex + 1;
205991b9a277SOlof Johansson 			} else {
2060cb7b593cSStephen Hemminger 				/* push down one level */
2061cb7b593cSStephen Hemminger 				iter->tnode = (struct tnode *) n;
2062cb7b593cSStephen Hemminger 				iter->index = 0;
2063cb7b593cSStephen Hemminger 				++iter->depth;
206419baf839SRobert Olsson 			}
2065cb7b593cSStephen Hemminger 			return n;
206619baf839SRobert Olsson 		}
206719baf839SRobert Olsson 
2068cb7b593cSStephen Hemminger 		++cindex;
2069cb7b593cSStephen Hemminger 	}
2070cb7b593cSStephen Hemminger 
2071cb7b593cSStephen Hemminger 	/* Current node exhausted, pop back up */
2072b59cfbf7SEric Dumazet 	p = node_parent_rcu((struct node *)tn);
2073cb7b593cSStephen Hemminger 	if (p) {
2074cb7b593cSStephen Hemminger 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2075cb7b593cSStephen Hemminger 		tn = p;
2076cb7b593cSStephen Hemminger 		--iter->depth;
2077cb7b593cSStephen Hemminger 		goto rescan;
2078cb7b593cSStephen Hemminger 	}
2079cb7b593cSStephen Hemminger 
2080cb7b593cSStephen Hemminger 	/* got root? */
2081cb7b593cSStephen Hemminger 	return NULL;
2082cb7b593cSStephen Hemminger }
2083cb7b593cSStephen Hemminger 
2084cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2085cb7b593cSStephen Hemminger 				       struct trie *t)
2086cb7b593cSStephen Hemminger {
20875ddf0eb2SRobert Olsson 	struct node *n;
20885ddf0eb2SRobert Olsson 
20895ddf0eb2SRobert Olsson 	if (!t)
20905ddf0eb2SRobert Olsson 		return NULL;
20915ddf0eb2SRobert Olsson 
20925ddf0eb2SRobert Olsson 	n = rcu_dereference(t->trie);
20933d3b2d25SStephen Hemminger 	if (!n)
20945ddf0eb2SRobert Olsson 		return NULL;
2095cb7b593cSStephen Hemminger 
20966640e697SEric W. Biederman 	if (IS_TNODE(n)) {
2097cb7b593cSStephen Hemminger 		iter->tnode = (struct tnode *) n;
2098cb7b593cSStephen Hemminger 		iter->index = 0;
20991d25cd6cSRobert Olsson 		iter->depth = 1;
21006640e697SEric W. Biederman 	} else {
21016640e697SEric W. Biederman 		iter->tnode = NULL;
21026640e697SEric W. Biederman 		iter->index = 0;
21036640e697SEric W. Biederman 		iter->depth = 0;
21046640e697SEric W. Biederman 	}
21053d3b2d25SStephen Hemminger 
2106cb7b593cSStephen Hemminger 	return n;
2107cb7b593cSStephen Hemminger }
2108cb7b593cSStephen Hemminger 
2109cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s)
211019baf839SRobert Olsson {
21112373ce1cSRobert Olsson 	struct node *n;
2112cb7b593cSStephen Hemminger 	struct fib_trie_iter iter;
2113cb7b593cSStephen Hemminger 
2114cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
211519baf839SRobert Olsson 
21162373ce1cSRobert Olsson 	rcu_read_lock();
21173d3b2d25SStephen Hemminger 	for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2118cb7b593cSStephen Hemminger 		if (IS_LEAF(n)) {
211993672292SStephen Hemminger 			struct leaf *l = (struct leaf *)n;
212093672292SStephen Hemminger 			struct leaf_info *li;
212193672292SStephen Hemminger 			struct hlist_node *tmp;
212293672292SStephen Hemminger 
212319baf839SRobert Olsson 			s->leaves++;
2124cb7b593cSStephen Hemminger 			s->totdepth += iter.depth;
2125cb7b593cSStephen Hemminger 			if (iter.depth > s->maxdepth)
2126cb7b593cSStephen Hemminger 				s->maxdepth = iter.depth;
212793672292SStephen Hemminger 
212893672292SStephen Hemminger 			hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
212993672292SStephen Hemminger 				++s->prefixes;
213091b9a277SOlof Johansson 		} else {
2131cb7b593cSStephen Hemminger 			const struct tnode *tn = (const struct tnode *) n;
2132cb7b593cSStephen Hemminger 			int i;
213319baf839SRobert Olsson 
213419baf839SRobert Olsson 			s->tnodes++;
213506ef921dSRobert Olsson 			if (tn->bits < MAX_STAT_DEPTH)
213619baf839SRobert Olsson 				s->nodesizes[tn->bits]++;
213706ef921dSRobert Olsson 
2138cb7b593cSStephen Hemminger 			for (i = 0; i < (1<<tn->bits); i++)
2139cb7b593cSStephen Hemminger 				if (!tn->child[i])
214019baf839SRobert Olsson 					s->nullpointers++;
214119baf839SRobert Olsson 		}
214219baf839SRobert Olsson 	}
21432373ce1cSRobert Olsson 	rcu_read_unlock();
214419baf839SRobert Olsson }
214519baf839SRobert Olsson 
214619baf839SRobert Olsson /*
214719baf839SRobert Olsson  *	This outputs /proc/net/fib_triestats
214819baf839SRobert Olsson  */
2149cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
215019baf839SRobert Olsson {
2151a034ee3cSEric Dumazet 	unsigned int i, max, pointers, bytes, avdepth;
215219baf839SRobert Olsson 
215319baf839SRobert Olsson 	if (stat->leaves)
215419baf839SRobert Olsson 		avdepth = stat->totdepth*100 / stat->leaves;
215519baf839SRobert Olsson 	else
215619baf839SRobert Olsson 		avdepth = 0;
215719baf839SRobert Olsson 
2158a07f5f50SStephen Hemminger 	seq_printf(seq, "\tAver depth:     %u.%02d\n",
2159a07f5f50SStephen Hemminger 		   avdepth / 100, avdepth % 100);
2160cb7b593cSStephen Hemminger 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
2161cb7b593cSStephen Hemminger 
2162cb7b593cSStephen Hemminger 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
2163cb7b593cSStephen Hemminger 	bytes = sizeof(struct leaf) * stat->leaves;
216493672292SStephen Hemminger 
216593672292SStephen Hemminger 	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
216693672292SStephen Hemminger 	bytes += sizeof(struct leaf_info) * stat->prefixes;
216793672292SStephen Hemminger 
2168187b5188SStephen Hemminger 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
216919baf839SRobert Olsson 	bytes += sizeof(struct tnode) * stat->tnodes;
217019baf839SRobert Olsson 
217106ef921dSRobert Olsson 	max = MAX_STAT_DEPTH;
217206ef921dSRobert Olsson 	while (max > 0 && stat->nodesizes[max-1] == 0)
217319baf839SRobert Olsson 		max--;
217419baf839SRobert Olsson 
2175cb7b593cSStephen Hemminger 	pointers = 0;
217619baf839SRobert Olsson 	for (i = 1; i <= max; i++)
217719baf839SRobert Olsson 		if (stat->nodesizes[i] != 0) {
2178187b5188SStephen Hemminger 			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
217919baf839SRobert Olsson 			pointers += (1<<i) * stat->nodesizes[i];
218019baf839SRobert Olsson 		}
2181cb7b593cSStephen Hemminger 	seq_putc(seq, '\n');
2182187b5188SStephen Hemminger 	seq_printf(seq, "\tPointers: %u\n", pointers);
2183cb7b593cSStephen Hemminger 
218419baf839SRobert Olsson 	bytes += sizeof(struct node *) * pointers;
2185187b5188SStephen Hemminger 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2186187b5188SStephen Hemminger 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
218766a2f7fdSStephen Hemminger }
218819baf839SRobert Olsson 
218919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
219066a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq,
219166a2f7fdSStephen Hemminger 			    const struct trie_use_stats *stats)
219266a2f7fdSStephen Hemminger {
219366a2f7fdSStephen Hemminger 	seq_printf(seq, "\nCounters:\n---------\n");
219466a2f7fdSStephen Hemminger 	seq_printf(seq, "gets = %u\n", stats->gets);
219566a2f7fdSStephen Hemminger 	seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2196a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match passed = %u\n",
2197a07f5f50SStephen Hemminger 		   stats->semantic_match_passed);
2198a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match miss = %u\n",
2199a07f5f50SStephen Hemminger 		   stats->semantic_match_miss);
220066a2f7fdSStephen Hemminger 	seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2201a07f5f50SStephen Hemminger 	seq_printf(seq, "skipped node resize = %u\n\n",
2202a07f5f50SStephen Hemminger 		   stats->resize_node_skipped);
220319baf839SRobert Olsson }
220466a2f7fdSStephen Hemminger #endif /*  CONFIG_IP_FIB_TRIE_STATS */
220566a2f7fdSStephen Hemminger 
22063d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2207d717a9a6SStephen Hemminger {
22083d3b2d25SStephen Hemminger 	if (tb->tb_id == RT_TABLE_LOCAL)
22093d3b2d25SStephen Hemminger 		seq_puts(seq, "Local:\n");
22103d3b2d25SStephen Hemminger 	else if (tb->tb_id == RT_TABLE_MAIN)
22113d3b2d25SStephen Hemminger 		seq_puts(seq, "Main:\n");
22123d3b2d25SStephen Hemminger 	else
22133d3b2d25SStephen Hemminger 		seq_printf(seq, "Id %d:\n", tb->tb_id);
2214d717a9a6SStephen Hemminger }
221519baf839SRobert Olsson 
22163d3b2d25SStephen Hemminger 
221719baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v)
221819baf839SRobert Olsson {
22191c340b2fSDenis V. Lunev 	struct net *net = (struct net *)seq->private;
22203d3b2d25SStephen Hemminger 	unsigned int h;
2221877a9bffSEric W. Biederman 
2222d717a9a6SStephen Hemminger 	seq_printf(seq,
2223a07f5f50SStephen Hemminger 		   "Basic info: size of leaf:"
2224a07f5f50SStephen Hemminger 		   " %Zd bytes, size of tnode: %Zd bytes.\n",
222519baf839SRobert Olsson 		   sizeof(struct leaf), sizeof(struct tnode));
222619baf839SRobert Olsson 
22273d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
22283d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
22293d3b2d25SStephen Hemminger 		struct hlist_node *node;
22303d3b2d25SStephen Hemminger 		struct fib_table *tb;
2231cb7b593cSStephen Hemminger 
22323d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
22333d3b2d25SStephen Hemminger 			struct trie *t = (struct trie *) tb->tb_data;
22343d3b2d25SStephen Hemminger 			struct trie_stat stat;
22353d3b2d25SStephen Hemminger 
22363d3b2d25SStephen Hemminger 			if (!t)
22373d3b2d25SStephen Hemminger 				continue;
22383d3b2d25SStephen Hemminger 
22393d3b2d25SStephen Hemminger 			fib_table_print(seq, tb);
22403d3b2d25SStephen Hemminger 
22413d3b2d25SStephen Hemminger 			trie_collect_stats(t, &stat);
22423d3b2d25SStephen Hemminger 			trie_show_stats(seq, &stat);
22433d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS
22443d3b2d25SStephen Hemminger 			trie_show_usage(seq, &t->stats);
22453d3b2d25SStephen Hemminger #endif
22463d3b2d25SStephen Hemminger 		}
22473d3b2d25SStephen Hemminger 	}
2248cb7b593cSStephen Hemminger 
224919baf839SRobert Olsson 	return 0;
225019baf839SRobert Olsson }
225119baf839SRobert Olsson 
225219baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file)
225319baf839SRobert Olsson {
2254de05c557SPavel Emelyanov 	return single_open_net(inode, file, fib_triestat_seq_show);
22551c340b2fSDenis V. Lunev }
22561c340b2fSDenis V. Lunev 
22579a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = {
225819baf839SRobert Olsson 	.owner	= THIS_MODULE,
225919baf839SRobert Olsson 	.open	= fib_triestat_seq_open,
226019baf839SRobert Olsson 	.read	= seq_read,
226119baf839SRobert Olsson 	.llseek	= seq_lseek,
2262b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
226319baf839SRobert Olsson };
226419baf839SRobert Olsson 
22651218854aSYOSHIFUJI Hideaki static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
226619baf839SRobert Olsson {
22671218854aSYOSHIFUJI Hideaki 	struct fib_trie_iter *iter = seq->private;
22681218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
2269cb7b593cSStephen Hemminger 	loff_t idx = 0;
22703d3b2d25SStephen Hemminger 	unsigned int h;
22713d3b2d25SStephen Hemminger 
22723d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
22733d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
22743d3b2d25SStephen Hemminger 		struct hlist_node *node;
22753d3b2d25SStephen Hemminger 		struct fib_table *tb;
22763d3b2d25SStephen Hemminger 
22773d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2278cb7b593cSStephen Hemminger 			struct node *n;
2279cb7b593cSStephen Hemminger 
22803d3b2d25SStephen Hemminger 			for (n = fib_trie_get_first(iter,
22813d3b2d25SStephen Hemminger 						    (struct trie *) tb->tb_data);
22823d3b2d25SStephen Hemminger 			     n; n = fib_trie_get_next(iter))
22833d3b2d25SStephen Hemminger 				if (pos == idx++) {
22843d3b2d25SStephen Hemminger 					iter->tb = tb;
2285cb7b593cSStephen Hemminger 					return n;
228619baf839SRobert Olsson 				}
22873d3b2d25SStephen Hemminger 		}
22883d3b2d25SStephen Hemminger 	}
228919baf839SRobert Olsson 
229019baf839SRobert Olsson 	return NULL;
229119baf839SRobert Olsson }
229219baf839SRobert Olsson 
229319baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2294c95aaf9aSStephen Hemminger 	__acquires(RCU)
229519baf839SRobert Olsson {
2296cb7b593cSStephen Hemminger 	rcu_read_lock();
22971218854aSYOSHIFUJI Hideaki 	return fib_trie_get_idx(seq, *pos);
229819baf839SRobert Olsson }
229919baf839SRobert Olsson 
230019baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
230119baf839SRobert Olsson {
2302cb7b593cSStephen Hemminger 	struct fib_trie_iter *iter = seq->private;
23031218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
23043d3b2d25SStephen Hemminger 	struct fib_table *tb = iter->tb;
23053d3b2d25SStephen Hemminger 	struct hlist_node *tb_node;
23063d3b2d25SStephen Hemminger 	unsigned int h;
23073d3b2d25SStephen Hemminger 	struct node *n;
2308cb7b593cSStephen Hemminger 
230919baf839SRobert Olsson 	++*pos;
23103d3b2d25SStephen Hemminger 	/* next node in same table */
23113d3b2d25SStephen Hemminger 	n = fib_trie_get_next(iter);
23123d3b2d25SStephen Hemminger 	if (n)
23133d3b2d25SStephen Hemminger 		return n;
231491b9a277SOlof Johansson 
23153d3b2d25SStephen Hemminger 	/* walk rest of this hash chain */
23163d3b2d25SStephen Hemminger 	h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
23173d3b2d25SStephen Hemminger 	while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
23183d3b2d25SStephen Hemminger 		tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
23193d3b2d25SStephen Hemminger 		n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
23203d3b2d25SStephen Hemminger 		if (n)
23213d3b2d25SStephen Hemminger 			goto found;
23223d3b2d25SStephen Hemminger 	}
2323cb7b593cSStephen Hemminger 
23243d3b2d25SStephen Hemminger 	/* new hash chain */
23253d3b2d25SStephen Hemminger 	while (++h < FIB_TABLE_HASHSZ) {
23263d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
23273d3b2d25SStephen Hemminger 		hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
23283d3b2d25SStephen Hemminger 			n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
23293d3b2d25SStephen Hemminger 			if (n)
23303d3b2d25SStephen Hemminger 				goto found;
23313d3b2d25SStephen Hemminger 		}
23323d3b2d25SStephen Hemminger 	}
2333cb7b593cSStephen Hemminger 	return NULL;
23343d3b2d25SStephen Hemminger 
23353d3b2d25SStephen Hemminger found:
23363d3b2d25SStephen Hemminger 	iter->tb = tb;
23373d3b2d25SStephen Hemminger 	return n;
233819baf839SRobert Olsson }
233919baf839SRobert Olsson 
234019baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2341c95aaf9aSStephen Hemminger 	__releases(RCU)
234219baf839SRobert Olsson {
2343cb7b593cSStephen Hemminger 	rcu_read_unlock();
234419baf839SRobert Olsson }
234519baf839SRobert Olsson 
2346cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n)
2347cb7b593cSStephen Hemminger {
2348a034ee3cSEric Dumazet 	while (n-- > 0)
2349a034ee3cSEric Dumazet 		seq_puts(seq, "   ");
2350cb7b593cSStephen Hemminger }
235119baf839SRobert Olsson 
235228d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2353cb7b593cSStephen Hemminger {
2354cb7b593cSStephen Hemminger 	switch (s) {
2355cb7b593cSStephen Hemminger 	case RT_SCOPE_UNIVERSE: return "universe";
2356cb7b593cSStephen Hemminger 	case RT_SCOPE_SITE:	return "site";
2357cb7b593cSStephen Hemminger 	case RT_SCOPE_LINK:	return "link";
2358cb7b593cSStephen Hemminger 	case RT_SCOPE_HOST:	return "host";
2359cb7b593cSStephen Hemminger 	case RT_SCOPE_NOWHERE:	return "nowhere";
2360cb7b593cSStephen Hemminger 	default:
236128d36e37SEric Dumazet 		snprintf(buf, len, "scope=%d", s);
2362cb7b593cSStephen Hemminger 		return buf;
2363cb7b593cSStephen Hemminger 	}
2364cb7b593cSStephen Hemminger }
2365cb7b593cSStephen Hemminger 
236636cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = {
2367cb7b593cSStephen Hemminger 	[RTN_UNSPEC] = "UNSPEC",
2368cb7b593cSStephen Hemminger 	[RTN_UNICAST] = "UNICAST",
2369cb7b593cSStephen Hemminger 	[RTN_LOCAL] = "LOCAL",
2370cb7b593cSStephen Hemminger 	[RTN_BROADCAST] = "BROADCAST",
2371cb7b593cSStephen Hemminger 	[RTN_ANYCAST] = "ANYCAST",
2372cb7b593cSStephen Hemminger 	[RTN_MULTICAST] = "MULTICAST",
2373cb7b593cSStephen Hemminger 	[RTN_BLACKHOLE] = "BLACKHOLE",
2374cb7b593cSStephen Hemminger 	[RTN_UNREACHABLE] = "UNREACHABLE",
2375cb7b593cSStephen Hemminger 	[RTN_PROHIBIT] = "PROHIBIT",
2376cb7b593cSStephen Hemminger 	[RTN_THROW] = "THROW",
2377cb7b593cSStephen Hemminger 	[RTN_NAT] = "NAT",
2378cb7b593cSStephen Hemminger 	[RTN_XRESOLVE] = "XRESOLVE",
2379cb7b593cSStephen Hemminger };
2380cb7b593cSStephen Hemminger 
2381a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2382cb7b593cSStephen Hemminger {
2383cb7b593cSStephen Hemminger 	if (t < __RTN_MAX && rtn_type_names[t])
2384cb7b593cSStephen Hemminger 		return rtn_type_names[t];
238528d36e37SEric Dumazet 	snprintf(buf, len, "type %u", t);
2386cb7b593cSStephen Hemminger 	return buf;
2387cb7b593cSStephen Hemminger }
2388cb7b593cSStephen Hemminger 
2389cb7b593cSStephen Hemminger /* Pretty print the trie */
239019baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v)
239119baf839SRobert Olsson {
2392cb7b593cSStephen Hemminger 	const struct fib_trie_iter *iter = seq->private;
2393cb7b593cSStephen Hemminger 	struct node *n = v;
239419baf839SRobert Olsson 
23953d3b2d25SStephen Hemminger 	if (!node_parent_rcu(n))
23963d3b2d25SStephen Hemminger 		fib_table_print(seq, iter->tb);
2397095b8501SRobert Olsson 
2398095b8501SRobert Olsson 	if (IS_TNODE(n)) {
2399095b8501SRobert Olsson 		struct tnode *tn = (struct tnode *) n;
2400ab66b4a7SStephen Hemminger 		__be32 prf = htonl(mask_pfx(tn->key, tn->pos));
2401095b8501SRobert Olsson 
24021d25cd6cSRobert Olsson 		seq_indent(seq, iter->depth-1);
2403673d57e7SHarvey Harrison 		seq_printf(seq, "  +-- %pI4/%d %d %d %d\n",
2404673d57e7SHarvey Harrison 			   &prf, tn->pos, tn->bits, tn->full_children,
24051d25cd6cSRobert Olsson 			   tn->empty_children);
24061d25cd6cSRobert Olsson 
2407cb7b593cSStephen Hemminger 	} else {
2408cb7b593cSStephen Hemminger 		struct leaf *l = (struct leaf *) n;
24091328042eSStephen Hemminger 		struct leaf_info *li;
24101328042eSStephen Hemminger 		struct hlist_node *node;
241132ab5f80SAl Viro 		__be32 val = htonl(l->key);
2412cb7b593cSStephen Hemminger 
2413cb7b593cSStephen Hemminger 		seq_indent(seq, iter->depth);
2414673d57e7SHarvey Harrison 		seq_printf(seq, "  |-- %pI4\n", &val);
241528d36e37SEric Dumazet 
24161328042eSStephen Hemminger 		hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2417cb7b593cSStephen Hemminger 			struct fib_alias *fa;
241828d36e37SEric Dumazet 
2419cb7b593cSStephen Hemminger 			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
242028d36e37SEric Dumazet 				char buf1[32], buf2[32];
242128d36e37SEric Dumazet 
2422cb7b593cSStephen Hemminger 				seq_indent(seq, iter->depth+1);
24231328042eSStephen Hemminger 				seq_printf(seq, "  /%d %s %s", li->plen,
242428d36e37SEric Dumazet 					   rtn_scope(buf1, sizeof(buf1),
242528d36e37SEric Dumazet 						     fa->fa_scope),
242628d36e37SEric Dumazet 					   rtn_type(buf2, sizeof(buf2),
242728d36e37SEric Dumazet 						    fa->fa_type));
2428cb7b593cSStephen Hemminger 				if (fa->fa_tos)
2429b9c4d82aSDenis V. Lunev 					seq_printf(seq, " tos=%d", fa->fa_tos);
2430cb7b593cSStephen Hemminger 				seq_putc(seq, '\n');
2431cb7b593cSStephen Hemminger 			}
2432cb7b593cSStephen Hemminger 		}
2433cb7b593cSStephen Hemminger 	}
243419baf839SRobert Olsson 
243519baf839SRobert Olsson 	return 0;
243619baf839SRobert Olsson }
243719baf839SRobert Olsson 
2438f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = {
243919baf839SRobert Olsson 	.start  = fib_trie_seq_start,
244019baf839SRobert Olsson 	.next   = fib_trie_seq_next,
244119baf839SRobert Olsson 	.stop   = fib_trie_seq_stop,
244219baf839SRobert Olsson 	.show   = fib_trie_seq_show,
244319baf839SRobert Olsson };
244419baf839SRobert Olsson 
244519baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file)
244619baf839SRobert Olsson {
24471c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_trie_seq_ops,
2448cf7732e4SPavel Emelyanov 			    sizeof(struct fib_trie_iter));
244919baf839SRobert Olsson }
245019baf839SRobert Olsson 
24519a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = {
245219baf839SRobert Olsson 	.owner  = THIS_MODULE,
245319baf839SRobert Olsson 	.open   = fib_trie_seq_open,
245419baf839SRobert Olsson 	.read   = seq_read,
245519baf839SRobert Olsson 	.llseek = seq_lseek,
24561c340b2fSDenis V. Lunev 	.release = seq_release_net,
245719baf839SRobert Olsson };
245819baf839SRobert Olsson 
24598315f5d8SStephen Hemminger struct fib_route_iter {
24608315f5d8SStephen Hemminger 	struct seq_net_private p;
24618315f5d8SStephen Hemminger 	struct trie *main_trie;
24628315f5d8SStephen Hemminger 	loff_t	pos;
24638315f5d8SStephen Hemminger 	t_key	key;
24648315f5d8SStephen Hemminger };
24658315f5d8SStephen Hemminger 
24668315f5d8SStephen Hemminger static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
24678315f5d8SStephen Hemminger {
24688315f5d8SStephen Hemminger 	struct leaf *l = NULL;
24698315f5d8SStephen Hemminger 	struct trie *t = iter->main_trie;
24708315f5d8SStephen Hemminger 
24718315f5d8SStephen Hemminger 	/* use cache location of last found key */
24728315f5d8SStephen Hemminger 	if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
24738315f5d8SStephen Hemminger 		pos -= iter->pos;
24748315f5d8SStephen Hemminger 	else {
24758315f5d8SStephen Hemminger 		iter->pos = 0;
24768315f5d8SStephen Hemminger 		l = trie_firstleaf(t);
24778315f5d8SStephen Hemminger 	}
24788315f5d8SStephen Hemminger 
24798315f5d8SStephen Hemminger 	while (l && pos-- > 0) {
24808315f5d8SStephen Hemminger 		iter->pos++;
24818315f5d8SStephen Hemminger 		l = trie_nextleaf(l);
24828315f5d8SStephen Hemminger 	}
24838315f5d8SStephen Hemminger 
24848315f5d8SStephen Hemminger 	if (l)
24858315f5d8SStephen Hemminger 		iter->key = pos;	/* remember it */
24868315f5d8SStephen Hemminger 	else
24878315f5d8SStephen Hemminger 		iter->pos = 0;		/* forget it */
24888315f5d8SStephen Hemminger 
24898315f5d8SStephen Hemminger 	return l;
24908315f5d8SStephen Hemminger }
24918315f5d8SStephen Hemminger 
24928315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
24938315f5d8SStephen Hemminger 	__acquires(RCU)
24948315f5d8SStephen Hemminger {
24958315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
24968315f5d8SStephen Hemminger 	struct fib_table *tb;
24978315f5d8SStephen Hemminger 
24988315f5d8SStephen Hemminger 	rcu_read_lock();
24991218854aSYOSHIFUJI Hideaki 	tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
25008315f5d8SStephen Hemminger 	if (!tb)
25018315f5d8SStephen Hemminger 		return NULL;
25028315f5d8SStephen Hemminger 
25038315f5d8SStephen Hemminger 	iter->main_trie = (struct trie *) tb->tb_data;
25048315f5d8SStephen Hemminger 	if (*pos == 0)
25058315f5d8SStephen Hemminger 		return SEQ_START_TOKEN;
25068315f5d8SStephen Hemminger 	else
25078315f5d8SStephen Hemminger 		return fib_route_get_idx(iter, *pos - 1);
25088315f5d8SStephen Hemminger }
25098315f5d8SStephen Hemminger 
25108315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
25118315f5d8SStephen Hemminger {
25128315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
25138315f5d8SStephen Hemminger 	struct leaf *l = v;
25148315f5d8SStephen Hemminger 
25158315f5d8SStephen Hemminger 	++*pos;
25168315f5d8SStephen Hemminger 	if (v == SEQ_START_TOKEN) {
25178315f5d8SStephen Hemminger 		iter->pos = 0;
25188315f5d8SStephen Hemminger 		l = trie_firstleaf(iter->main_trie);
25198315f5d8SStephen Hemminger 	} else {
25208315f5d8SStephen Hemminger 		iter->pos++;
25218315f5d8SStephen Hemminger 		l = trie_nextleaf(l);
25228315f5d8SStephen Hemminger 	}
25238315f5d8SStephen Hemminger 
25248315f5d8SStephen Hemminger 	if (l)
25258315f5d8SStephen Hemminger 		iter->key = l->key;
25268315f5d8SStephen Hemminger 	else
25278315f5d8SStephen Hemminger 		iter->pos = 0;
25288315f5d8SStephen Hemminger 	return l;
25298315f5d8SStephen Hemminger }
25308315f5d8SStephen Hemminger 
25318315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v)
25328315f5d8SStephen Hemminger 	__releases(RCU)
25338315f5d8SStephen Hemminger {
25348315f5d8SStephen Hemminger 	rcu_read_unlock();
25358315f5d8SStephen Hemminger }
25368315f5d8SStephen Hemminger 
2537a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2538cb7b593cSStephen Hemminger {
2539a034ee3cSEric Dumazet 	unsigned int flags = 0;
2540cb7b593cSStephen Hemminger 
2541a034ee3cSEric Dumazet 	if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2542a034ee3cSEric Dumazet 		flags = RTF_REJECT;
2543cb7b593cSStephen Hemminger 	if (fi && fi->fib_nh->nh_gw)
2544cb7b593cSStephen Hemminger 		flags |= RTF_GATEWAY;
254532ab5f80SAl Viro 	if (mask == htonl(0xFFFFFFFF))
2546cb7b593cSStephen Hemminger 		flags |= RTF_HOST;
2547cb7b593cSStephen Hemminger 	flags |= RTF_UP;
2548cb7b593cSStephen Hemminger 	return flags;
2549cb7b593cSStephen Hemminger }
2550cb7b593cSStephen Hemminger 
2551cb7b593cSStephen Hemminger /*
2552cb7b593cSStephen Hemminger  *	This outputs /proc/net/route.
2553cb7b593cSStephen Hemminger  *	The format of the file is not supposed to be changed
2554cb7b593cSStephen Hemminger  *	and needs to be same as fib_hash output to avoid breaking
2555cb7b593cSStephen Hemminger  *	legacy utilities
2556cb7b593cSStephen Hemminger  */
2557cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v)
2558cb7b593cSStephen Hemminger {
2559cb7b593cSStephen Hemminger 	struct leaf *l = v;
25601328042eSStephen Hemminger 	struct leaf_info *li;
25611328042eSStephen Hemminger 	struct hlist_node *node;
2562cb7b593cSStephen Hemminger 
2563cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN) {
2564cb7b593cSStephen Hemminger 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2565cb7b593cSStephen Hemminger 			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2566cb7b593cSStephen Hemminger 			   "\tWindow\tIRTT");
2567cb7b593cSStephen Hemminger 		return 0;
2568cb7b593cSStephen Hemminger 	}
2569cb7b593cSStephen Hemminger 
25701328042eSStephen Hemminger 	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2571cb7b593cSStephen Hemminger 		struct fib_alias *fa;
257232ab5f80SAl Viro 		__be32 mask, prefix;
2573cb7b593cSStephen Hemminger 
2574cb7b593cSStephen Hemminger 		mask = inet_make_mask(li->plen);
2575cb7b593cSStephen Hemminger 		prefix = htonl(l->key);
2576cb7b593cSStephen Hemminger 
2577cb7b593cSStephen Hemminger 		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
25781371e37dSHerbert Xu 			const struct fib_info *fi = fa->fa_info;
2579a034ee3cSEric Dumazet 			unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
25805e659e4cSPavel Emelyanov 			int len;
2581cb7b593cSStephen Hemminger 
2582cb7b593cSStephen Hemminger 			if (fa->fa_type == RTN_BROADCAST
2583cb7b593cSStephen Hemminger 			    || fa->fa_type == RTN_MULTICAST)
2584cb7b593cSStephen Hemminger 				continue;
2585cb7b593cSStephen Hemminger 
2586cb7b593cSStephen Hemminger 			if (fi)
25875e659e4cSPavel Emelyanov 				seq_printf(seq,
25885e659e4cSPavel Emelyanov 					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
25895e659e4cSPavel Emelyanov 					 "%d\t%08X\t%d\t%u\t%u%n",
2590cb7b593cSStephen Hemminger 					 fi->fib_dev ? fi->fib_dev->name : "*",
2591cb7b593cSStephen Hemminger 					 prefix,
2592cb7b593cSStephen Hemminger 					 fi->fib_nh->nh_gw, flags, 0, 0,
2593cb7b593cSStephen Hemminger 					 fi->fib_priority,
2594cb7b593cSStephen Hemminger 					 mask,
2595a07f5f50SStephen Hemminger 					 (fi->fib_advmss ?
2596a07f5f50SStephen Hemminger 					  fi->fib_advmss + 40 : 0),
2597cb7b593cSStephen Hemminger 					 fi->fib_window,
25985e659e4cSPavel Emelyanov 					 fi->fib_rtt >> 3, &len);
2599cb7b593cSStephen Hemminger 			else
26005e659e4cSPavel Emelyanov 				seq_printf(seq,
26015e659e4cSPavel Emelyanov 					 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
26025e659e4cSPavel Emelyanov 					 "%d\t%08X\t%d\t%u\t%u%n",
2603cb7b593cSStephen Hemminger 					 prefix, 0, flags, 0, 0, 0,
26045e659e4cSPavel Emelyanov 					 mask, 0, 0, 0, &len);
2605cb7b593cSStephen Hemminger 
26065e659e4cSPavel Emelyanov 			seq_printf(seq, "%*s\n", 127 - len, "");
2607cb7b593cSStephen Hemminger 		}
2608cb7b593cSStephen Hemminger 	}
2609cb7b593cSStephen Hemminger 
2610cb7b593cSStephen Hemminger 	return 0;
2611cb7b593cSStephen Hemminger }
2612cb7b593cSStephen Hemminger 
2613f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = {
26148315f5d8SStephen Hemminger 	.start  = fib_route_seq_start,
26158315f5d8SStephen Hemminger 	.next   = fib_route_seq_next,
26168315f5d8SStephen Hemminger 	.stop   = fib_route_seq_stop,
2617cb7b593cSStephen Hemminger 	.show   = fib_route_seq_show,
2618cb7b593cSStephen Hemminger };
2619cb7b593cSStephen Hemminger 
2620cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file)
2621cb7b593cSStephen Hemminger {
26221c340b2fSDenis V. Lunev 	return seq_open_net(inode, file, &fib_route_seq_ops,
26238315f5d8SStephen Hemminger 			    sizeof(struct fib_route_iter));
2624cb7b593cSStephen Hemminger }
2625cb7b593cSStephen Hemminger 
26269a32144eSArjan van de Ven static const struct file_operations fib_route_fops = {
2627cb7b593cSStephen Hemminger 	.owner  = THIS_MODULE,
2628cb7b593cSStephen Hemminger 	.open   = fib_route_seq_open,
2629cb7b593cSStephen Hemminger 	.read   = seq_read,
2630cb7b593cSStephen Hemminger 	.llseek = seq_lseek,
26311c340b2fSDenis V. Lunev 	.release = seq_release_net,
2632cb7b593cSStephen Hemminger };
2633cb7b593cSStephen Hemminger 
263461a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net)
263519baf839SRobert Olsson {
263661a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
2637cb7b593cSStephen Hemminger 		goto out1;
2638cb7b593cSStephen Hemminger 
263961a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
264061a02653SDenis V. Lunev 				  &fib_triestat_fops))
2641cb7b593cSStephen Hemminger 		goto out2;
2642cb7b593cSStephen Hemminger 
264361a02653SDenis V. Lunev 	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
2644cb7b593cSStephen Hemminger 		goto out3;
2645cb7b593cSStephen Hemminger 
264619baf839SRobert Olsson 	return 0;
2647cb7b593cSStephen Hemminger 
2648cb7b593cSStephen Hemminger out3:
264961a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
2650cb7b593cSStephen Hemminger out2:
265161a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
2652cb7b593cSStephen Hemminger out1:
2653cb7b593cSStephen Hemminger 	return -ENOMEM;
265419baf839SRobert Olsson }
265519baf839SRobert Olsson 
265661a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net)
265719baf839SRobert Olsson {
265861a02653SDenis V. Lunev 	proc_net_remove(net, "fib_trie");
265961a02653SDenis V. Lunev 	proc_net_remove(net, "fib_triestat");
266061a02653SDenis V. Lunev 	proc_net_remove(net, "route");
266119baf839SRobert Olsson }
266219baf839SRobert Olsson 
266319baf839SRobert Olsson #endif /* CONFIG_PROC_FS */
2664