xref: /openbmc/linux/net/ipv4/fib_trie.c (revision 7e24a55b2122746c2eef192296fc84624354f895)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
219baf839SRobert Olsson /*
319baf839SRobert Olsson  *
419baf839SRobert Olsson  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
519baf839SRobert Olsson  *     & Swedish University of Agricultural Sciences.
619baf839SRobert Olsson  *
719baf839SRobert Olsson  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
819baf839SRobert Olsson  *     Agricultural Sciences.
919baf839SRobert Olsson  *
1019baf839SRobert Olsson  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
1119baf839SRobert Olsson  *
1225985edcSLucas De Marchi  * This work is based on the LPC-trie which is originally described in:
1319baf839SRobert Olsson  *
1419baf839SRobert Olsson  * An experimental study of compression methods for dynamic tries
1519baf839SRobert Olsson  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
167a6498ebSAlexander A. Klimov  * https://www.csc.kth.se/~snilsson/software/dyntrie2/
1719baf839SRobert Olsson  *
1819baf839SRobert Olsson  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
1919baf839SRobert Olsson  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2019baf839SRobert Olsson  *
2119baf839SRobert Olsson  * Code from fib_hash has been reused which includes the following header:
2219baf839SRobert Olsson  *
2319baf839SRobert Olsson  * INET		An implementation of the TCP/IP protocol suite for the LINUX
2419baf839SRobert Olsson  *		operating system.  INET is implemented using the  BSD Socket
2519baf839SRobert Olsson  *		interface as the means of communication with the user level.
2619baf839SRobert Olsson  *
2719baf839SRobert Olsson  *		IPv4 FIB: lookup engine and maintenance routines.
2819baf839SRobert Olsson  *
2919baf839SRobert Olsson  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
3019baf839SRobert Olsson  *
31fd966255SRobert Olsson  * Substantial contributions to this work comes from:
32fd966255SRobert Olsson  *
33fd966255SRobert Olsson  *		David S. Miller, <davem@davemloft.net>
34fd966255SRobert Olsson  *		Stephen Hemminger <shemminger@osdl.org>
35fd966255SRobert Olsson  *		Paul E. McKenney <paulmck@us.ibm.com>
36fd966255SRobert Olsson  *		Patrick McHardy <kaber@trash.net>
3719baf839SRobert Olsson  */
3808009a76SAlexey Dobriyan #include <linux/cache.h>
397c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
401977f032SJiri Slaby #include <linux/bitops.h>
4119baf839SRobert Olsson #include <linux/types.h>
4219baf839SRobert Olsson #include <linux/kernel.h>
4319baf839SRobert Olsson #include <linux/mm.h>
4419baf839SRobert Olsson #include <linux/string.h>
4519baf839SRobert Olsson #include <linux/socket.h>
4619baf839SRobert Olsson #include <linux/sockios.h>
4719baf839SRobert Olsson #include <linux/errno.h>
4819baf839SRobert Olsson #include <linux/in.h>
4919baf839SRobert Olsson #include <linux/inet.h>
50cd8787abSStephen Hemminger #include <linux/inetdevice.h>
5119baf839SRobert Olsson #include <linux/netdevice.h>
5219baf839SRobert Olsson #include <linux/if_arp.h>
5319baf839SRobert Olsson #include <linux/proc_fs.h>
542373ce1cSRobert Olsson #include <linux/rcupdate.h>
5519baf839SRobert Olsson #include <linux/skbuff.h>
5619baf839SRobert Olsson #include <linux/netlink.h>
5719baf839SRobert Olsson #include <linux/init.h>
5819baf839SRobert Olsson #include <linux/list.h>
595a0e3ad6STejun Heo #include <linux/slab.h>
60bc3b2d7fSPaul Gortmaker #include <linux/export.h>
61ffa915d0SDavid S. Miller #include <linux/vmalloc.h>
62b90eb754SJiri Pirko #include <linux/notifier.h>
63457c4cbcSEric W. Biederman #include <net/net_namespace.h>
64f55fbb6aSGuillaume Nault #include <net/inet_dscp.h>
6519baf839SRobert Olsson #include <net/ip.h>
6619baf839SRobert Olsson #include <net/protocol.h>
6719baf839SRobert Olsson #include <net/route.h>
6819baf839SRobert Olsson #include <net/tcp.h>
6919baf839SRobert Olsson #include <net/sock.h>
7019baf839SRobert Olsson #include <net/ip_fib.h>
7104b1d4e5SIdo Schimmel #include <net/fib_notifier.h>
72f6d3c192SDavid Ahern #include <trace/events/fib.h>
7319baf839SRobert Olsson #include "fib_lookup.h"
7419baf839SRobert Olsson 
call_fib_entry_notifier(struct notifier_block * nb,enum fib_event_type event_type,u32 dst,int dst_len,struct fib_alias * fa,struct netlink_ext_ack * extack)757c550dafSJiri Pirko static int call_fib_entry_notifier(struct notifier_block *nb,
76c3852ef7SIdo Schimmel 				   enum fib_event_type event_type, u32 dst,
77b7a59557SJiri Pirko 				   int dst_len, struct fib_alias *fa,
78b7a59557SJiri Pirko 				   struct netlink_ext_ack *extack)
79c3852ef7SIdo Schimmel {
80c3852ef7SIdo Schimmel 	struct fib_entry_notifier_info info = {
81b7a59557SJiri Pirko 		.info.extack = extack,
82c3852ef7SIdo Schimmel 		.dst = dst,
83c3852ef7SIdo Schimmel 		.dst_len = dst_len,
846eba87c7SDavid Ahern 		.fi = fa->fa_info,
85568a3f33SGuillaume Nault 		.dscp = fa->fa_dscp,
866eba87c7SDavid Ahern 		.type = fa->fa_type,
876eba87c7SDavid Ahern 		.tb_id = fa->tb_id,
88c3852ef7SIdo Schimmel 	};
897c550dafSJiri Pirko 	return call_fib4_notifier(nb, event_type, &info.info);
90c3852ef7SIdo Schimmel }
91c3852ef7SIdo Schimmel 
call_fib_entry_notifiers(struct net * net,enum fib_event_type event_type,u32 dst,int dst_len,struct fib_alias * fa,struct netlink_ext_ack * extack)92b90eb754SJiri Pirko static int call_fib_entry_notifiers(struct net *net,
93b90eb754SJiri Pirko 				    enum fib_event_type event_type, u32 dst,
946c31e5a9SDavid Ahern 				    int dst_len, struct fib_alias *fa,
956c31e5a9SDavid Ahern 				    struct netlink_ext_ack *extack)
96b90eb754SJiri Pirko {
97b90eb754SJiri Pirko 	struct fib_entry_notifier_info info = {
986c31e5a9SDavid Ahern 		.info.extack = extack,
99b90eb754SJiri Pirko 		.dst = dst,
100b90eb754SJiri Pirko 		.dst_len = dst_len,
1016eba87c7SDavid Ahern 		.fi = fa->fa_info,
102568a3f33SGuillaume Nault 		.dscp = fa->fa_dscp,
1036eba87c7SDavid Ahern 		.type = fa->fa_type,
1046eba87c7SDavid Ahern 		.tb_id = fa->tb_id,
105b90eb754SJiri Pirko 	};
10604b1d4e5SIdo Schimmel 	return call_fib4_notifiers(net, event_type, &info.info);
107b90eb754SJiri Pirko }
108b90eb754SJiri Pirko 
10906ef921dSRobert Olsson #define MAX_STAT_DEPTH 32
11019baf839SRobert Olsson 
11119baf839SRobert Olsson #define KEYLENGTH	(8*sizeof(t_key))
11295f60ea3SAlexander Duyck #define KEY_MAX		((t_key)~0)
11319baf839SRobert Olsson 
11419baf839SRobert Olsson typedef unsigned int t_key;
11519baf839SRobert Olsson 
11688bae714SAlexander Duyck #define IS_TRIE(n)	((n)->pos >= KEYLENGTH)
11764c9b6fbSAlexander Duyck #define IS_TNODE(n)	((n)->bits)
11864c9b6fbSAlexander Duyck #define IS_LEAF(n)	(!(n)->bits)
1192373ce1cSRobert Olsson 
12035c6edacSAlexander Duyck struct key_vector {
12141b489fdSAlexander Duyck 	t_key key;
12241b489fdSAlexander Duyck 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
12341b489fdSAlexander Duyck 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
12441b489fdSAlexander Duyck 	unsigned char slen;
12541b489fdSAlexander Duyck 	union {
12641b489fdSAlexander Duyck 		/* This list pointer if valid if (pos | bits) == 0 (LEAF) */
12779e5ad2cSAlexander Duyck 		struct hlist_head leaf;
12841b489fdSAlexander Duyck 		/* This array is valid if (pos | bits) > 0 (TNODE) */
129764f8485SKees Cook 		DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode);
13019baf839SRobert Olsson 	};
131adaf9816SAlexander Duyck };
13219baf839SRobert Olsson 
133dc35dbedSAlexander Duyck struct tnode {
13456ca2adfSAlexander Duyck 	struct rcu_head rcu;
1356e22d174SAlexander Duyck 	t_key empty_children;		/* KEYLENGTH bits needed */
1366e22d174SAlexander Duyck 	t_key full_children;		/* KEYLENGTH bits needed */
137f23e59fbSAlexander Duyck 	struct key_vector __rcu *parent;
138dc35dbedSAlexander Duyck 	struct key_vector kv[1];
13956ca2adfSAlexander Duyck #define tn_bits kv[0].bits
140dc35dbedSAlexander Duyck };
141dc35dbedSAlexander Duyck 
142dc35dbedSAlexander Duyck #define TNODE_SIZE(n)	offsetof(struct tnode, kv[0].tnode[n])
14341b489fdSAlexander Duyck #define LEAF_SIZE	TNODE_SIZE(1)
14441b489fdSAlexander Duyck 
14519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
14619baf839SRobert Olsson struct trie_use_stats {
14719baf839SRobert Olsson 	unsigned int gets;
14819baf839SRobert Olsson 	unsigned int backtrack;
14919baf839SRobert Olsson 	unsigned int semantic_match_passed;
15019baf839SRobert Olsson 	unsigned int semantic_match_miss;
15119baf839SRobert Olsson 	unsigned int null_node_hit;
1522f36895aSRobert Olsson 	unsigned int resize_node_skipped;
15319baf839SRobert Olsson };
15419baf839SRobert Olsson #endif
15519baf839SRobert Olsson 
15619baf839SRobert Olsson struct trie_stat {
15719baf839SRobert Olsson 	unsigned int totdepth;
15819baf839SRobert Olsson 	unsigned int maxdepth;
15919baf839SRobert Olsson 	unsigned int tnodes;
16019baf839SRobert Olsson 	unsigned int leaves;
16119baf839SRobert Olsson 	unsigned int nullpointers;
16293672292SStephen Hemminger 	unsigned int prefixes;
16306ef921dSRobert Olsson 	unsigned int nodesizes[MAX_STAT_DEPTH];
16419baf839SRobert Olsson };
16519baf839SRobert Olsson 
16619baf839SRobert Olsson struct trie {
16788bae714SAlexander Duyck 	struct key_vector kv[1];
16819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
1698274a97aSAlexander Duyck 	struct trie_use_stats __percpu *stats;
17019baf839SRobert Olsson #endif
17119baf839SRobert Olsson };
17219baf839SRobert Olsson 
17388bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn);
1749ab948a9SDavid Ahern static unsigned int tnode_free_size;
175c3059477SJarek Poplawski 
176c3059477SJarek Poplawski /*
1779ab948a9SDavid Ahern  * synchronize_rcu after call_rcu for outstanding dirty memory; it should be
1789ab948a9SDavid Ahern  * especially useful before resizing the root node with PREEMPT_NONE configs;
1799ab948a9SDavid Ahern  * the value was obtained experimentally, aiming to avoid visible slowdown.
180c3059477SJarek Poplawski  */
1819ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem = 512 * 1024;
1829ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
1839ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
18419baf839SRobert Olsson 
18508009a76SAlexey Dobriyan static struct kmem_cache *fn_alias_kmem __ro_after_init;
18608009a76SAlexey Dobriyan static struct kmem_cache *trie_leaf_kmem __ro_after_init;
18719baf839SRobert Olsson 
tn_info(struct key_vector * kv)18856ca2adfSAlexander Duyck static inline struct tnode *tn_info(struct key_vector *kv)
18956ca2adfSAlexander Duyck {
19056ca2adfSAlexander Duyck 	return container_of(kv, struct tnode, kv[0]);
19156ca2adfSAlexander Duyck }
19256ca2adfSAlexander Duyck 
19364c9b6fbSAlexander Duyck /* caller must hold RTNL */
194f23e59fbSAlexander Duyck #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
195754baf8dSAlexander Duyck #define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
1960a5c0475SEric Dumazet 
19764c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */
198f23e59fbSAlexander Duyck #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
199754baf8dSAlexander Duyck #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
2000a5c0475SEric Dumazet 
20164c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */
node_set_parent(struct key_vector * n,struct key_vector * tp)20235c6edacSAlexander Duyck static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
20306801916SStephen Hemminger {
204adaf9816SAlexander Duyck 	if (n)
205f23e59fbSAlexander Duyck 		rcu_assign_pointer(tn_info(n)->parent, tp);
20664c9b6fbSAlexander Duyck }
20764c9b6fbSAlexander Duyck 
208f23e59fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
20964c9b6fbSAlexander Duyck 
21064c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a
21164c9b6fbSAlexander Duyck  * leaf this will return 0 meaning none of the children are accessible.
21264c9b6fbSAlexander Duyck  */
child_length(const struct key_vector * tn)2132e1ac88aSAlexander Duyck static inline unsigned long child_length(const struct key_vector *tn)
21464c9b6fbSAlexander Duyck {
21564c9b6fbSAlexander Duyck 	return (1ul << tn->bits) & ~(1ul);
21606801916SStephen Hemminger }
2172373ce1cSRobert Olsson 
21888bae714SAlexander Duyck #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
21988bae714SAlexander Duyck 
get_index(t_key key,struct key_vector * kv)2202e1ac88aSAlexander Duyck static inline unsigned long get_index(t_key key, struct key_vector *kv)
2212e1ac88aSAlexander Duyck {
2222e1ac88aSAlexander Duyck 	unsigned long index = key ^ kv->key;
2232e1ac88aSAlexander Duyck 
22488bae714SAlexander Duyck 	if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
22588bae714SAlexander Duyck 		return 0;
22688bae714SAlexander Duyck 
2272e1ac88aSAlexander Duyck 	return index >> kv->pos;
2282e1ac88aSAlexander Duyck }
2292e1ac88aSAlexander Duyck 
230e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is
231e9b44019SAlexander Duyck  * necessary. Every node in the trie has a key associated with it, but not
232e9b44019SAlexander Duyck  * all of the bits in that key are significant.
233e9b44019SAlexander Duyck  *
234e9b44019SAlexander Duyck  * Consider a node 'n' and its parent 'tp'.
235e9b44019SAlexander Duyck  *
236e9b44019SAlexander Duyck  * If n is a leaf, every bit in its key is significant. Its presence is
237e9b44019SAlexander Duyck  * necessitated by path compression, since during a tree traversal (when
238e9b44019SAlexander Duyck  * searching for a leaf - unless we are doing an insertion) we will completely
239e9b44019SAlexander Duyck  * ignore all skipped bits we encounter. Thus we need to verify, at the end of
240e9b44019SAlexander Duyck  * a potentially successful search, that we have indeed been walking the
241e9b44019SAlexander Duyck  * correct key path.
242e9b44019SAlexander Duyck  *
243e9b44019SAlexander Duyck  * Note that we can never "miss" the correct key in the tree if present by
244e9b44019SAlexander Duyck  * following the wrong path. Path compression ensures that segments of the key
245e9b44019SAlexander Duyck  * that are the same for all keys with a given prefix are skipped, but the
246e9b44019SAlexander Duyck  * skipped part *is* identical for each node in the subtrie below the skipped
247e9b44019SAlexander Duyck  * bit! trie_insert() in this implementation takes care of that.
248e9b44019SAlexander Duyck  *
249e9b44019SAlexander Duyck  * if n is an internal node - a 'tnode' here, the various parts of its key
250e9b44019SAlexander Duyck  * have many different meanings.
251e9b44019SAlexander Duyck  *
252e9b44019SAlexander Duyck  * Example:
253e9b44019SAlexander Duyck  * _________________________________________________________________
254e9b44019SAlexander Duyck  * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
255e9b44019SAlexander Duyck  * -----------------------------------------------------------------
256e9b44019SAlexander Duyck  *  31  30  29  28  27  26  25  24  23  22  21  20  19  18  17  16
257e9b44019SAlexander Duyck  *
258e9b44019SAlexander Duyck  * _________________________________________________________________
259e9b44019SAlexander Duyck  * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
260e9b44019SAlexander Duyck  * -----------------------------------------------------------------
261e9b44019SAlexander Duyck  *  15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0
262e9b44019SAlexander Duyck  *
263e9b44019SAlexander Duyck  * tp->pos = 22
264e9b44019SAlexander Duyck  * tp->bits = 3
265e9b44019SAlexander Duyck  * n->pos = 13
266e9b44019SAlexander Duyck  * n->bits = 4
267e9b44019SAlexander Duyck  *
268e9b44019SAlexander Duyck  * First, let's just ignore the bits that come before the parent tp, that is
269e9b44019SAlexander Duyck  * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
270e9b44019SAlexander Duyck  * point we do not use them for anything.
271e9b44019SAlexander Duyck  *
272e9b44019SAlexander Duyck  * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
273e9b44019SAlexander Duyck  * index into the parent's child array. That is, they will be used to find
274e9b44019SAlexander Duyck  * 'n' among tp's children.
275e9b44019SAlexander Duyck  *
27698a384ecSXunlei Pang  * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits
277e9b44019SAlexander Duyck  * for the node n.
278e9b44019SAlexander Duyck  *
279e9b44019SAlexander Duyck  * All the bits we have seen so far are significant to the node n. The rest
280e9b44019SAlexander Duyck  * of the bits are really not needed or indeed known in n->key.
281e9b44019SAlexander Duyck  *
282e9b44019SAlexander Duyck  * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
283e9b44019SAlexander Duyck  * n's child array, and will of course be different for each child.
284e9b44019SAlexander Duyck  *
28598a384ecSXunlei Pang  * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown
286e9b44019SAlexander Duyck  * at this point.
28719baf839SRobert Olsson  */
28819baf839SRobert Olsson 
289f5026fabSDenis V. Lunev static const int halve_threshold = 25;
290f5026fabSDenis V. Lunev static const int inflate_threshold = 50;
291345aa031SJarek Poplawski static const int halve_threshold_root = 15;
29280b71b80SJens Låås static const int inflate_threshold_root = 30;
2932373ce1cSRobert Olsson 
__alias_free_mem(struct rcu_head * head)2942373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head)
2952373ce1cSRobert Olsson {
2962373ce1cSRobert Olsson 	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
2972373ce1cSRobert Olsson 	kmem_cache_free(fn_alias_kmem, fa);
2982373ce1cSRobert Olsson }
2992373ce1cSRobert Olsson 
alias_free_mem_rcu(struct fib_alias * fa)3002373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa)
3012373ce1cSRobert Olsson {
3022373ce1cSRobert Olsson 	call_rcu(&fa->rcu, __alias_free_mem);
3032373ce1cSRobert Olsson }
3042373ce1cSRobert Olsson 
3051de3d87bSAlexander Duyck #define TNODE_VMALLOC_MAX \
30635c6edacSAlexander Duyck 	ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
30737fd30f2SAlexander Duyck 
__node_free_rcu(struct rcu_head * head)30837fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head)
3092373ce1cSRobert Olsson {
31056ca2adfSAlexander Duyck 	struct tnode *n = container_of(head, struct tnode, rcu);
31137fd30f2SAlexander Duyck 
31256ca2adfSAlexander Duyck 	if (!n->tn_bits)
31337fd30f2SAlexander Duyck 		kmem_cache_free(trie_leaf_kmem, n);
31437fd30f2SAlexander Duyck 	else
3151d5cfdb0STetsuo Handa 		kvfree(n);
3162373ce1cSRobert Olsson }
3172373ce1cSRobert Olsson 
31856ca2adfSAlexander Duyck #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
319387a5487SStephen Hemminger 
tnode_alloc(int bits)320dc35dbedSAlexander Duyck static struct tnode *tnode_alloc(int bits)
3212373ce1cSRobert Olsson {
3221de3d87bSAlexander Duyck 	size_t size;
3231de3d87bSAlexander Duyck 
3241de3d87bSAlexander Duyck 	/* verify bits is within bounds */
3251de3d87bSAlexander Duyck 	if (bits > TNODE_VMALLOC_MAX)
3261de3d87bSAlexander Duyck 		return NULL;
3271de3d87bSAlexander Duyck 
3281de3d87bSAlexander Duyck 	/* determine size and verify it is non-zero and didn't overflow */
3291de3d87bSAlexander Duyck 	size = TNODE_SIZE(1ul << bits);
3301de3d87bSAlexander Duyck 
3312373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3328d965444SEric Dumazet 		return kzalloc(size, GFP_KERNEL);
33315be75cdSStephen Hemminger 	else
3347a1c8e5aSEric Dumazet 		return vzalloc(size);
33515be75cdSStephen Hemminger }
3362373ce1cSRobert Olsson 
empty_child_inc(struct key_vector * n)33735c6edacSAlexander Duyck static inline void empty_child_inc(struct key_vector *n)
33895f60ea3SAlexander Duyck {
33925cec756SMatthias Kaehlcke 	tn_info(n)->empty_children++;
34025cec756SMatthias Kaehlcke 
34125cec756SMatthias Kaehlcke 	if (!tn_info(n)->empty_children)
34225cec756SMatthias Kaehlcke 		tn_info(n)->full_children++;
34395f60ea3SAlexander Duyck }
34495f60ea3SAlexander Duyck 
empty_child_dec(struct key_vector * n)34535c6edacSAlexander Duyck static inline void empty_child_dec(struct key_vector *n)
34695f60ea3SAlexander Duyck {
34725cec756SMatthias Kaehlcke 	if (!tn_info(n)->empty_children)
34825cec756SMatthias Kaehlcke 		tn_info(n)->full_children--;
34925cec756SMatthias Kaehlcke 
35025cec756SMatthias Kaehlcke 	tn_info(n)->empty_children--;
35195f60ea3SAlexander Duyck }
35295f60ea3SAlexander Duyck 
leaf_new(t_key key,struct fib_alias * fa)35335c6edacSAlexander Duyck static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
35419baf839SRobert Olsson {
355f38b24c9SFiro Yang 	struct key_vector *l;
356f38b24c9SFiro Yang 	struct tnode *kv;
357dc35dbedSAlexander Duyck 
358f38b24c9SFiro Yang 	kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
359dc35dbedSAlexander Duyck 	if (!kv)
360dc35dbedSAlexander Duyck 		return NULL;
361dc35dbedSAlexander Duyck 
362dc35dbedSAlexander Duyck 	/* initialize key vector */
363f38b24c9SFiro Yang 	l = kv->kv;
36464c9b6fbSAlexander Duyck 	l->key = key;
365e9b44019SAlexander Duyck 	l->pos = 0;
36664c9b6fbSAlexander Duyck 	l->bits = 0;
367dc35dbedSAlexander Duyck 	l->slen = fa->fa_slen;
36864c9b6fbSAlexander Duyck 
369d5d6487cSAlexander Duyck 	/* link leaf to fib alias */
37079e5ad2cSAlexander Duyck 	INIT_HLIST_HEAD(&l->leaf);
371d5d6487cSAlexander Duyck 	hlist_add_head(&fa->fa_list, &l->leaf);
372dc35dbedSAlexander Duyck 
37319baf839SRobert Olsson 	return l;
37419baf839SRobert Olsson }
37519baf839SRobert Olsson 
tnode_new(t_key key,int pos,int bits)37635c6edacSAlexander Duyck static struct key_vector *tnode_new(t_key key, int pos, int bits)
37719baf839SRobert Olsson {
37864c9b6fbSAlexander Duyck 	unsigned int shift = pos + bits;
379f38b24c9SFiro Yang 	struct key_vector *tn;
380f38b24c9SFiro Yang 	struct tnode *tnode;
38164c9b6fbSAlexander Duyck 
38264c9b6fbSAlexander Duyck 	/* verify bits and pos their msb bits clear and values are valid */
38364c9b6fbSAlexander Duyck 	BUG_ON(!bits || (shift > KEYLENGTH));
38419baf839SRobert Olsson 
385f38b24c9SFiro Yang 	tnode = tnode_alloc(bits);
386dc35dbedSAlexander Duyck 	if (!tnode)
387dc35dbedSAlexander Duyck 		return NULL;
388dc35dbedSAlexander Duyck 
389f38b24c9SFiro Yang 	pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
390f38b24c9SFiro Yang 		 sizeof(struct key_vector *) << bits);
391f38b24c9SFiro Yang 
39295f60ea3SAlexander Duyck 	if (bits == KEYLENGTH)
3936e22d174SAlexander Duyck 		tnode->full_children = 1;
39495f60ea3SAlexander Duyck 	else
3956e22d174SAlexander Duyck 		tnode->empty_children = 1ul << bits;
396c877efb2SStephen Hemminger 
397f38b24c9SFiro Yang 	tn = tnode->kv;
398dc35dbedSAlexander Duyck 	tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
399dc35dbedSAlexander Duyck 	tn->pos = pos;
400dc35dbedSAlexander Duyck 	tn->bits = bits;
401dc35dbedSAlexander Duyck 	tn->slen = pos;
402dc35dbedSAlexander Duyck 
40319baf839SRobert Olsson 	return tn;
40419baf839SRobert Olsson }
40519baf839SRobert Olsson 
406e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node
40719baf839SRobert Olsson  * and no bits are skipped. See discussion in dyntree paper p. 6
40819baf839SRobert Olsson  */
tnode_full(struct key_vector * tn,struct key_vector * n)40935c6edacSAlexander Duyck static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
41019baf839SRobert Olsson {
411e9b44019SAlexander Duyck 	return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
41219baf839SRobert Olsson }
41319baf839SRobert Olsson 
414ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value.
41519baf839SRobert Olsson  * Update the value of full_children and empty_children.
41619baf839SRobert Olsson  */
put_child(struct key_vector * tn,unsigned long i,struct key_vector * n)41735c6edacSAlexander Duyck static void put_child(struct key_vector *tn, unsigned long i,
41835c6edacSAlexander Duyck 		      struct key_vector *n)
41919baf839SRobert Olsson {
420754baf8dSAlexander Duyck 	struct key_vector *chi = get_child(tn, i);
421ff181ed8SAlexander Duyck 	int isfull, wasfull;
42219baf839SRobert Olsson 
4232e1ac88aSAlexander Duyck 	BUG_ON(i >= child_length(tn));
4240c7770c7SStephen Hemminger 
42595f60ea3SAlexander Duyck 	/* update emptyChildren, overflow into fullChildren */
42600db4124SIan Morris 	if (!n && chi)
42795f60ea3SAlexander Duyck 		empty_child_inc(tn);
42800db4124SIan Morris 	if (n && !chi)
42995f60ea3SAlexander Duyck 		empty_child_dec(tn);
43019baf839SRobert Olsson 
43119baf839SRobert Olsson 	/* update fullChildren */
43219baf839SRobert Olsson 	wasfull = tnode_full(tn, chi);
43319baf839SRobert Olsson 	isfull = tnode_full(tn, n);
434ff181ed8SAlexander Duyck 
43519baf839SRobert Olsson 	if (wasfull && !isfull)
4366e22d174SAlexander Duyck 		tn_info(tn)->full_children--;
43719baf839SRobert Olsson 	else if (!wasfull && isfull)
4386e22d174SAlexander Duyck 		tn_info(tn)->full_children++;
43991b9a277SOlof Johansson 
4405405afd1SAlexander Duyck 	if (n && (tn->slen < n->slen))
4415405afd1SAlexander Duyck 		tn->slen = n->slen;
4425405afd1SAlexander Duyck 
44341b489fdSAlexander Duyck 	rcu_assign_pointer(tn->tnode[i], n);
44419baf839SRobert Olsson }
44519baf839SRobert Olsson 
update_children(struct key_vector * tn)44635c6edacSAlexander Duyck static void update_children(struct key_vector *tn)
44769fa57b1SAlexander Duyck {
44869fa57b1SAlexander Duyck 	unsigned long i;
44969fa57b1SAlexander Duyck 
45069fa57b1SAlexander Duyck 	/* update all of the child parent pointers */
4512e1ac88aSAlexander Duyck 	for (i = child_length(tn); i;) {
452754baf8dSAlexander Duyck 		struct key_vector *inode = get_child(tn, --i);
45369fa57b1SAlexander Duyck 
45469fa57b1SAlexander Duyck 		if (!inode)
45569fa57b1SAlexander Duyck 			continue;
45669fa57b1SAlexander Duyck 
45769fa57b1SAlexander Duyck 		/* Either update the children of a tnode that
45869fa57b1SAlexander Duyck 		 * already belongs to us or update the child
45969fa57b1SAlexander Duyck 		 * to point to ourselves.
46069fa57b1SAlexander Duyck 		 */
46169fa57b1SAlexander Duyck 		if (node_parent(inode) == tn)
46269fa57b1SAlexander Duyck 			update_children(inode);
46369fa57b1SAlexander Duyck 		else
46469fa57b1SAlexander Duyck 			node_set_parent(inode, tn);
46569fa57b1SAlexander Duyck 	}
46669fa57b1SAlexander Duyck }
46769fa57b1SAlexander Duyck 
put_child_root(struct key_vector * tp,t_key key,struct key_vector * n)46888bae714SAlexander Duyck static inline void put_child_root(struct key_vector *tp, t_key key,
46988bae714SAlexander Duyck 				  struct key_vector *n)
470836a0123SAlexander Duyck {
47188bae714SAlexander Duyck 	if (IS_TRIE(tp))
47288bae714SAlexander Duyck 		rcu_assign_pointer(tp->tnode[0], n);
473836a0123SAlexander Duyck 	else
47488bae714SAlexander Duyck 		put_child(tp, get_index(key, tp), n);
475836a0123SAlexander Duyck }
476836a0123SAlexander Duyck 
tnode_free_init(struct key_vector * tn)47735c6edacSAlexander Duyck static inline void tnode_free_init(struct key_vector *tn)
4780a5c0475SEric Dumazet {
47956ca2adfSAlexander Duyck 	tn_info(tn)->rcu.next = NULL;
4800a5c0475SEric Dumazet }
481fc86a93bSAlexander Duyck 
tnode_free_append(struct key_vector * tn,struct key_vector * n)48235c6edacSAlexander Duyck static inline void tnode_free_append(struct key_vector *tn,
48335c6edacSAlexander Duyck 				     struct key_vector *n)
484fc86a93bSAlexander Duyck {
48556ca2adfSAlexander Duyck 	tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
48656ca2adfSAlexander Duyck 	tn_info(tn)->rcu.next = &tn_info(n)->rcu;
487fc86a93bSAlexander Duyck }
488fc86a93bSAlexander Duyck 
tnode_free(struct key_vector * tn)48935c6edacSAlexander Duyck static void tnode_free(struct key_vector *tn)
490fc86a93bSAlexander Duyck {
49156ca2adfSAlexander Duyck 	struct callback_head *head = &tn_info(tn)->rcu;
492fc86a93bSAlexander Duyck 
493fc86a93bSAlexander Duyck 	while (head) {
494fc86a93bSAlexander Duyck 		head = head->next;
49541b489fdSAlexander Duyck 		tnode_free_size += TNODE_SIZE(1ul << tn->bits);
49637fd30f2SAlexander Duyck 		node_free(tn);
497fc86a93bSAlexander Duyck 
49856ca2adfSAlexander Duyck 		tn = container_of(head, struct tnode, rcu)->kv;
499fc86a93bSAlexander Duyck 	}
500fc86a93bSAlexander Duyck 
50173318c4bSKuniyuki Iwashima 	if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
502fc86a93bSAlexander Duyck 		tnode_free_size = 0;
503fc86a93bSAlexander Duyck 		synchronize_rcu();
504fc86a93bSAlexander Duyck 	}
5050a5c0475SEric Dumazet }
5060a5c0475SEric Dumazet 
replace(struct trie * t,struct key_vector * oldtnode,struct key_vector * tn)50788bae714SAlexander Duyck static struct key_vector *replace(struct trie *t,
50835c6edacSAlexander Duyck 				  struct key_vector *oldtnode,
50935c6edacSAlexander Duyck 				  struct key_vector *tn)
51069fa57b1SAlexander Duyck {
51135c6edacSAlexander Duyck 	struct key_vector *tp = node_parent(oldtnode);
51269fa57b1SAlexander Duyck 	unsigned long i;
51369fa57b1SAlexander Duyck 
51469fa57b1SAlexander Duyck 	/* setup the parent pointer out of and back into this node */
51569fa57b1SAlexander Duyck 	NODE_INIT_PARENT(tn, tp);
51688bae714SAlexander Duyck 	put_child_root(tp, tn->key, tn);
51769fa57b1SAlexander Duyck 
51869fa57b1SAlexander Duyck 	/* update all of the child parent pointers */
51969fa57b1SAlexander Duyck 	update_children(tn);
52069fa57b1SAlexander Duyck 
52169fa57b1SAlexander Duyck 	/* all pointers should be clean so we are done */
52269fa57b1SAlexander Duyck 	tnode_free(oldtnode);
52369fa57b1SAlexander Duyck 
52469fa57b1SAlexander Duyck 	/* resize children now that oldtnode is freed */
5252e1ac88aSAlexander Duyck 	for (i = child_length(tn); i;) {
526754baf8dSAlexander Duyck 		struct key_vector *inode = get_child(tn, --i);
52769fa57b1SAlexander Duyck 
52869fa57b1SAlexander Duyck 		/* resize child node */
52969fa57b1SAlexander Duyck 		if (tnode_full(tn, inode))
53088bae714SAlexander Duyck 			tn = resize(t, inode);
53169fa57b1SAlexander Duyck 	}
5328d8e810cSAlexander Duyck 
53388bae714SAlexander Duyck 	return tp;
53469fa57b1SAlexander Duyck }
53569fa57b1SAlexander Duyck 
inflate(struct trie * t,struct key_vector * oldtnode)53688bae714SAlexander Duyck static struct key_vector *inflate(struct trie *t,
53735c6edacSAlexander Duyck 				  struct key_vector *oldtnode)
53819baf839SRobert Olsson {
53935c6edacSAlexander Duyck 	struct key_vector *tn;
54069fa57b1SAlexander Duyck 	unsigned long i;
541e9b44019SAlexander Duyck 	t_key m;
54219baf839SRobert Olsson 
5430c7770c7SStephen Hemminger 	pr_debug("In inflate\n");
54419baf839SRobert Olsson 
545e9b44019SAlexander Duyck 	tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
5462f80b3c8SRobert Olsson 	if (!tn)
5478d8e810cSAlexander Duyck 		goto notnode;
5482f36895aSRobert Olsson 
54969fa57b1SAlexander Duyck 	/* prepare oldtnode to be freed */
55069fa57b1SAlexander Duyck 	tnode_free_init(oldtnode);
55169fa57b1SAlexander Duyck 
55212c081a5SAlexander Duyck 	/* Assemble all of the pointers in our cluster, in this case that
55312c081a5SAlexander Duyck 	 * represents all of the pointers out of our allocated nodes that
55412c081a5SAlexander Duyck 	 * point to existing tnodes and the links between our allocated
55512c081a5SAlexander Duyck 	 * nodes.
5562f36895aSRobert Olsson 	 */
5572e1ac88aSAlexander Duyck 	for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
558754baf8dSAlexander Duyck 		struct key_vector *inode = get_child(oldtnode, --i);
55935c6edacSAlexander Duyck 		struct key_vector *node0, *node1;
56069fa57b1SAlexander Duyck 		unsigned long j, k;
56119baf839SRobert Olsson 
56219baf839SRobert Olsson 		/* An empty child */
56351456b29SIan Morris 		if (!inode)
56419baf839SRobert Olsson 			continue;
56519baf839SRobert Olsson 
56619baf839SRobert Olsson 		/* A leaf or an internal node with skipped bits */
567adaf9816SAlexander Duyck 		if (!tnode_full(oldtnode, inode)) {
568e9b44019SAlexander Duyck 			put_child(tn, get_index(inode->key, tn), inode);
56919baf839SRobert Olsson 			continue;
57019baf839SRobert Olsson 		}
57119baf839SRobert Olsson 
57269fa57b1SAlexander Duyck 		/* drop the node in the old tnode free list */
57369fa57b1SAlexander Duyck 		tnode_free_append(oldtnode, inode);
57469fa57b1SAlexander Duyck 
57519baf839SRobert Olsson 		/* An internal node with two children */
57619baf839SRobert Olsson 		if (inode->bits == 1) {
577754baf8dSAlexander Duyck 			put_child(tn, 2 * i + 1, get_child(inode, 1));
578754baf8dSAlexander Duyck 			put_child(tn, 2 * i, get_child(inode, 0));
57991b9a277SOlof Johansson 			continue;
58019baf839SRobert Olsson 		}
58119baf839SRobert Olsson 
58219baf839SRobert Olsson 		/* We will replace this node 'inode' with two new
58312c081a5SAlexander Duyck 		 * ones, 'node0' and 'node1', each with half of the
58419baf839SRobert Olsson 		 * original children. The two new nodes will have
58519baf839SRobert Olsson 		 * a position one bit further down the key and this
58619baf839SRobert Olsson 		 * means that the "significant" part of their keys
58719baf839SRobert Olsson 		 * (see the discussion near the top of this file)
58819baf839SRobert Olsson 		 * will differ by one bit, which will be "0" in
58912c081a5SAlexander Duyck 		 * node0's key and "1" in node1's key. Since we are
59019baf839SRobert Olsson 		 * moving the key position by one step, the bit that
59119baf839SRobert Olsson 		 * we are moving away from - the bit at position
59212c081a5SAlexander Duyck 		 * (tn->pos) - is the one that will differ between
59312c081a5SAlexander Duyck 		 * node0 and node1. So... we synthesize that bit in the
59419baf839SRobert Olsson 		 * two new keys.
59519baf839SRobert Olsson 		 */
59612c081a5SAlexander Duyck 		node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
59712c081a5SAlexander Duyck 		if (!node1)
59812c081a5SAlexander Duyck 			goto nomem;
59969fa57b1SAlexander Duyck 		node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
60019baf839SRobert Olsson 
60169fa57b1SAlexander Duyck 		tnode_free_append(tn, node1);
60212c081a5SAlexander Duyck 		if (!node0)
60312c081a5SAlexander Duyck 			goto nomem;
60412c081a5SAlexander Duyck 		tnode_free_append(tn, node0);
6052f36895aSRobert Olsson 
60612c081a5SAlexander Duyck 		/* populate child pointers in new nodes */
6072e1ac88aSAlexander Duyck 		for (k = child_length(inode), j = k / 2; j;) {
608754baf8dSAlexander Duyck 			put_child(node1, --j, get_child(inode, --k));
609754baf8dSAlexander Duyck 			put_child(node0, j, get_child(inode, j));
610754baf8dSAlexander Duyck 			put_child(node1, --j, get_child(inode, --k));
611754baf8dSAlexander Duyck 			put_child(node0, j, get_child(inode, j));
61219baf839SRobert Olsson 		}
613ff181ed8SAlexander Duyck 
61412c081a5SAlexander Duyck 		/* link new nodes to parent */
61512c081a5SAlexander Duyck 		NODE_INIT_PARENT(node1, tn);
61612c081a5SAlexander Duyck 		NODE_INIT_PARENT(node0, tn);
61712c081a5SAlexander Duyck 
61812c081a5SAlexander Duyck 		/* link parent to nodes */
61912c081a5SAlexander Duyck 		put_child(tn, 2 * i + 1, node1);
62012c081a5SAlexander Duyck 		put_child(tn, 2 * i, node0);
62112c081a5SAlexander Duyck 	}
62212c081a5SAlexander Duyck 
62369fa57b1SAlexander Duyck 	/* setup the parent pointers into and out of this node */
6248d8e810cSAlexander Duyck 	return replace(t, oldtnode, tn);
625ff181ed8SAlexander Duyck nomem:
626fc86a93bSAlexander Duyck 	/* all pointers should be clean so we are done */
627fc86a93bSAlexander Duyck 	tnode_free(tn);
6288d8e810cSAlexander Duyck notnode:
6298d8e810cSAlexander Duyck 	return NULL;
630ff181ed8SAlexander Duyck }
631ff181ed8SAlexander Duyck 
halve(struct trie * t,struct key_vector * oldtnode)63288bae714SAlexander Duyck static struct key_vector *halve(struct trie *t,
63335c6edacSAlexander Duyck 				struct key_vector *oldtnode)
63419baf839SRobert Olsson {
63535c6edacSAlexander Duyck 	struct key_vector *tn;
63612c081a5SAlexander Duyck 	unsigned long i;
63719baf839SRobert Olsson 
6380c7770c7SStephen Hemminger 	pr_debug("In halve\n");
63919baf839SRobert Olsson 
640e9b44019SAlexander Duyck 	tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
6412f80b3c8SRobert Olsson 	if (!tn)
6428d8e810cSAlexander Duyck 		goto notnode;
6432f36895aSRobert Olsson 
64469fa57b1SAlexander Duyck 	/* prepare oldtnode to be freed */
64569fa57b1SAlexander Duyck 	tnode_free_init(oldtnode);
64669fa57b1SAlexander Duyck 
64712c081a5SAlexander Duyck 	/* Assemble all of the pointers in our cluster, in this case that
64812c081a5SAlexander Duyck 	 * represents all of the pointers out of our allocated nodes that
64912c081a5SAlexander Duyck 	 * point to existing tnodes and the links between our allocated
65012c081a5SAlexander Duyck 	 * nodes.
6512f36895aSRobert Olsson 	 */
6522e1ac88aSAlexander Duyck 	for (i = child_length(oldtnode); i;) {
653754baf8dSAlexander Duyck 		struct key_vector *node1 = get_child(oldtnode, --i);
654754baf8dSAlexander Duyck 		struct key_vector *node0 = get_child(oldtnode, --i);
65535c6edacSAlexander Duyck 		struct key_vector *inode;
6562f36895aSRobert Olsson 
65712c081a5SAlexander Duyck 		/* At least one of the children is empty */
65812c081a5SAlexander Duyck 		if (!node1 || !node0) {
65912c081a5SAlexander Duyck 			put_child(tn, i / 2, node1 ? : node0);
66012c081a5SAlexander Duyck 			continue;
66112c081a5SAlexander Duyck 		}
6622f36895aSRobert Olsson 
6632f36895aSRobert Olsson 		/* Two nonempty children */
66412c081a5SAlexander Duyck 		inode = tnode_new(node0->key, oldtnode->pos, 1);
6658d8e810cSAlexander Duyck 		if (!inode)
6668d8e810cSAlexander Duyck 			goto nomem;
66712c081a5SAlexander Duyck 		tnode_free_append(tn, inode);
6682f80b3c8SRobert Olsson 
66912c081a5SAlexander Duyck 		/* initialize pointers out of node */
67012c081a5SAlexander Duyck 		put_child(inode, 1, node1);
67112c081a5SAlexander Duyck 		put_child(inode, 0, node0);
67212c081a5SAlexander Duyck 		NODE_INIT_PARENT(inode, tn);
67312c081a5SAlexander Duyck 
67412c081a5SAlexander Duyck 		/* link parent to node */
67512c081a5SAlexander Duyck 		put_child(tn, i / 2, inode);
6762f36895aSRobert Olsson 	}
6772f36895aSRobert Olsson 
67869fa57b1SAlexander Duyck 	/* setup the parent pointers into and out of this node */
6798d8e810cSAlexander Duyck 	return replace(t, oldtnode, tn);
6808d8e810cSAlexander Duyck nomem:
6818d8e810cSAlexander Duyck 	/* all pointers should be clean so we are done */
6828d8e810cSAlexander Duyck 	tnode_free(tn);
6838d8e810cSAlexander Duyck notnode:
6848d8e810cSAlexander Duyck 	return NULL;
6852f80b3c8SRobert Olsson }
68619baf839SRobert Olsson 
collapse(struct trie * t,struct key_vector * oldtnode)68788bae714SAlexander Duyck static struct key_vector *collapse(struct trie *t,
68888bae714SAlexander Duyck 				   struct key_vector *oldtnode)
68995f60ea3SAlexander Duyck {
69035c6edacSAlexander Duyck 	struct key_vector *n, *tp;
69195f60ea3SAlexander Duyck 	unsigned long i;
69295f60ea3SAlexander Duyck 
69395f60ea3SAlexander Duyck 	/* scan the tnode looking for that one child that might still exist */
6942e1ac88aSAlexander Duyck 	for (n = NULL, i = child_length(oldtnode); !n && i;)
695754baf8dSAlexander Duyck 		n = get_child(oldtnode, --i);
69695f60ea3SAlexander Duyck 
69795f60ea3SAlexander Duyck 	/* compress one level */
69895f60ea3SAlexander Duyck 	tp = node_parent(oldtnode);
69988bae714SAlexander Duyck 	put_child_root(tp, oldtnode->key, n);
70095f60ea3SAlexander Duyck 	node_set_parent(n, tp);
70195f60ea3SAlexander Duyck 
70295f60ea3SAlexander Duyck 	/* drop dead node */
70395f60ea3SAlexander Duyck 	node_free(oldtnode);
70488bae714SAlexander Duyck 
70588bae714SAlexander Duyck 	return tp;
70695f60ea3SAlexander Duyck }
70795f60ea3SAlexander Duyck 
update_suffix(struct key_vector * tn)70835c6edacSAlexander Duyck static unsigned char update_suffix(struct key_vector *tn)
7095405afd1SAlexander Duyck {
7105405afd1SAlexander Duyck 	unsigned char slen = tn->pos;
7115405afd1SAlexander Duyck 	unsigned long stride, i;
712a52ca62cSAlexander Duyck 	unsigned char slen_max;
713a52ca62cSAlexander Duyck 
714a52ca62cSAlexander Duyck 	/* only vector 0 can have a suffix length greater than or equal to
715a52ca62cSAlexander Duyck 	 * tn->pos + tn->bits, the second highest node will have a suffix
716a52ca62cSAlexander Duyck 	 * length at most of tn->pos + tn->bits - 1
717a52ca62cSAlexander Duyck 	 */
718a52ca62cSAlexander Duyck 	slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
7195405afd1SAlexander Duyck 
7205405afd1SAlexander Duyck 	/* search though the list of children looking for nodes that might
7215405afd1SAlexander Duyck 	 * have a suffix greater than the one we currently have.  This is
7225405afd1SAlexander Duyck 	 * why we start with a stride of 2 since a stride of 1 would
7235405afd1SAlexander Duyck 	 * represent the nodes with suffix length equal to tn->pos
7245405afd1SAlexander Duyck 	 */
7252e1ac88aSAlexander Duyck 	for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
726754baf8dSAlexander Duyck 		struct key_vector *n = get_child(tn, i);
7275405afd1SAlexander Duyck 
7285405afd1SAlexander Duyck 		if (!n || (n->slen <= slen))
7295405afd1SAlexander Duyck 			continue;
7305405afd1SAlexander Duyck 
7315405afd1SAlexander Duyck 		/* update stride and slen based on new value */
7325405afd1SAlexander Duyck 		stride <<= (n->slen - slen);
7335405afd1SAlexander Duyck 		slen = n->slen;
7345405afd1SAlexander Duyck 		i &= ~(stride - 1);
7355405afd1SAlexander Duyck 
736a52ca62cSAlexander Duyck 		/* stop searching if we have hit the maximum possible value */
737a52ca62cSAlexander Duyck 		if (slen >= slen_max)
7385405afd1SAlexander Duyck 			break;
7395405afd1SAlexander Duyck 	}
7405405afd1SAlexander Duyck 
7415405afd1SAlexander Duyck 	tn->slen = slen;
7425405afd1SAlexander Duyck 
7435405afd1SAlexander Duyck 	return slen;
7445405afd1SAlexander Duyck }
7455405afd1SAlexander Duyck 
746f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
747cf3637bbSAlexander Duyck  * the Helsinki University of Technology and Matti Tikkanen of Nokia
748cf3637bbSAlexander Duyck  * Telecommunications, page 6:
749cf3637bbSAlexander Duyck  * "A node is doubled if the ratio of non-empty children to all
750cf3637bbSAlexander Duyck  * children in the *doubled* node is at least 'high'."
751cf3637bbSAlexander Duyck  *
752cf3637bbSAlexander Duyck  * 'high' in this instance is the variable 'inflate_threshold'. It
753cf3637bbSAlexander Duyck  * is expressed as a percentage, so we multiply it with
7542e1ac88aSAlexander Duyck  * child_length() and instead of multiplying by 2 (since the
755cf3637bbSAlexander Duyck  * child array will be doubled by inflate()) and multiplying
756cf3637bbSAlexander Duyck  * the left-hand side by 100 (to handle the percentage thing) we
757cf3637bbSAlexander Duyck  * multiply the left-hand side by 50.
758cf3637bbSAlexander Duyck  *
7592e1ac88aSAlexander Duyck  * The left-hand side may look a bit weird: child_length(tn)
760cf3637bbSAlexander Duyck  * - tn->empty_children is of course the number of non-null children
761cf3637bbSAlexander Duyck  * in the current node. tn->full_children is the number of "full"
762cf3637bbSAlexander Duyck  * children, that is non-null tnodes with a skip value of 0.
763cf3637bbSAlexander Duyck  * All of those will be doubled in the resulting inflated tnode, so
764cf3637bbSAlexander Duyck  * we just count them one extra time here.
765cf3637bbSAlexander Duyck  *
766cf3637bbSAlexander Duyck  * A clearer way to write this would be:
767cf3637bbSAlexander Duyck  *
768cf3637bbSAlexander Duyck  * to_be_doubled = tn->full_children;
7692e1ac88aSAlexander Duyck  * not_to_be_doubled = child_length(tn) - tn->empty_children -
770cf3637bbSAlexander Duyck  *     tn->full_children;
771cf3637bbSAlexander Duyck  *
7722e1ac88aSAlexander Duyck  * new_child_length = child_length(tn) * 2;
773cf3637bbSAlexander Duyck  *
774cf3637bbSAlexander Duyck  * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
775cf3637bbSAlexander Duyck  *      new_child_length;
776cf3637bbSAlexander Duyck  * if (new_fill_factor >= inflate_threshold)
777cf3637bbSAlexander Duyck  *
778cf3637bbSAlexander Duyck  * ...and so on, tho it would mess up the while () loop.
779cf3637bbSAlexander Duyck  *
780cf3637bbSAlexander Duyck  * anyway,
781cf3637bbSAlexander Duyck  * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
782cf3637bbSAlexander Duyck  *      inflate_threshold
783cf3637bbSAlexander Duyck  *
784cf3637bbSAlexander Duyck  * avoid a division:
785cf3637bbSAlexander Duyck  * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
786cf3637bbSAlexander Duyck  *      inflate_threshold * new_child_length
787cf3637bbSAlexander Duyck  *
788cf3637bbSAlexander Duyck  * expand not_to_be_doubled and to_be_doubled, and shorten:
7892e1ac88aSAlexander Duyck  * 100 * (child_length(tn) - tn->empty_children +
790cf3637bbSAlexander Duyck  *    tn->full_children) >= inflate_threshold * new_child_length
791cf3637bbSAlexander Duyck  *
792cf3637bbSAlexander Duyck  * expand new_child_length:
7932e1ac88aSAlexander Duyck  * 100 * (child_length(tn) - tn->empty_children +
794cf3637bbSAlexander Duyck  *    tn->full_children) >=
7952e1ac88aSAlexander Duyck  *      inflate_threshold * child_length(tn) * 2
796cf3637bbSAlexander Duyck  *
797cf3637bbSAlexander Duyck  * shorten again:
7982e1ac88aSAlexander Duyck  * 50 * (tn->full_children + child_length(tn) -
799cf3637bbSAlexander Duyck  *    tn->empty_children) >= inflate_threshold *
8002e1ac88aSAlexander Duyck  *    child_length(tn)
801cf3637bbSAlexander Duyck  *
802cf3637bbSAlexander Duyck  */
should_inflate(struct key_vector * tp,struct key_vector * tn)80335c6edacSAlexander Duyck static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
804f05a4819SAlexander Duyck {
8052e1ac88aSAlexander Duyck 	unsigned long used = child_length(tn);
806f05a4819SAlexander Duyck 	unsigned long threshold = used;
807cf3637bbSAlexander Duyck 
808cf3637bbSAlexander Duyck 	/* Keep root node larger */
80988bae714SAlexander Duyck 	threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
8106e22d174SAlexander Duyck 	used -= tn_info(tn)->empty_children;
8116e22d174SAlexander Duyck 	used += tn_info(tn)->full_children;
812cf3637bbSAlexander Duyck 
81395f60ea3SAlexander Duyck 	/* if bits == KEYLENGTH then pos = 0, and will fail below */
81495f60ea3SAlexander Duyck 
81595f60ea3SAlexander Duyck 	return (used > 1) && tn->pos && ((50 * used) >= threshold);
816cf3637bbSAlexander Duyck }
817cf3637bbSAlexander Duyck 
should_halve(struct key_vector * tp,struct key_vector * tn)81835c6edacSAlexander Duyck static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
819f05a4819SAlexander Duyck {
8202e1ac88aSAlexander Duyck 	unsigned long used = child_length(tn);
821f05a4819SAlexander Duyck 	unsigned long threshold = used;
822cf3637bbSAlexander Duyck 
823f05a4819SAlexander Duyck 	/* Keep root node larger */
82488bae714SAlexander Duyck 	threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
8256e22d174SAlexander Duyck 	used -= tn_info(tn)->empty_children;
826f05a4819SAlexander Duyck 
82795f60ea3SAlexander Duyck 	/* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */
82895f60ea3SAlexander Duyck 
82995f60ea3SAlexander Duyck 	return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
83095f60ea3SAlexander Duyck }
83195f60ea3SAlexander Duyck 
should_collapse(struct key_vector * tn)83235c6edacSAlexander Duyck static inline bool should_collapse(struct key_vector *tn)
83395f60ea3SAlexander Duyck {
8342e1ac88aSAlexander Duyck 	unsigned long used = child_length(tn);
83595f60ea3SAlexander Duyck 
8366e22d174SAlexander Duyck 	used -= tn_info(tn)->empty_children;
83795f60ea3SAlexander Duyck 
83895f60ea3SAlexander Duyck 	/* account for bits == KEYLENGTH case */
8396e22d174SAlexander Duyck 	if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
84095f60ea3SAlexander Duyck 		used -= KEY_MAX;
84195f60ea3SAlexander Duyck 
84295f60ea3SAlexander Duyck 	/* One child or none, time to drop us from the trie */
84395f60ea3SAlexander Duyck 	return used < 2;
844f05a4819SAlexander Duyck }
845f05a4819SAlexander Duyck 
846f05a4819SAlexander Duyck #define MAX_WORK 10
resize(struct trie * t,struct key_vector * tn)84788bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn)
848f05a4819SAlexander Duyck {
8498d8e810cSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
8508d8e810cSAlexander Duyck 	struct trie_use_stats __percpu *stats = t->stats;
8518d8e810cSAlexander Duyck #endif
85235c6edacSAlexander Duyck 	struct key_vector *tp = node_parent(tn);
85388bae714SAlexander Duyck 	unsigned long cindex = get_index(tn->key, tp);
854a80e89d4SAlexander Duyck 	int max_work = MAX_WORK;
855f05a4819SAlexander Duyck 
856f05a4819SAlexander Duyck 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
857f05a4819SAlexander Duyck 		 tn, inflate_threshold, halve_threshold);
858f05a4819SAlexander Duyck 
859ff181ed8SAlexander Duyck 	/* track the tnode via the pointer from the parent instead of
860ff181ed8SAlexander Duyck 	 * doing it ourselves.  This way we can let RCU fully do its
861ff181ed8SAlexander Duyck 	 * thing without us interfering
862ff181ed8SAlexander Duyck 	 */
86388bae714SAlexander Duyck 	BUG_ON(tn != get_child(tp, cindex));
864ff181ed8SAlexander Duyck 
865f05a4819SAlexander Duyck 	/* Double as long as the resulting node has a number of
866f05a4819SAlexander Duyck 	 * nonempty nodes that are above the threshold.
867f05a4819SAlexander Duyck 	 */
868b6f15f82SAlexander Duyck 	while (should_inflate(tp, tn) && max_work) {
86988bae714SAlexander Duyck 		tp = inflate(t, tn);
87088bae714SAlexander Duyck 		if (!tp) {
871cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
8728d8e810cSAlexander Duyck 			this_cpu_inc(stats->resize_node_skipped);
873cf3637bbSAlexander Duyck #endif
874cf3637bbSAlexander Duyck 			break;
875cf3637bbSAlexander Duyck 		}
876ff181ed8SAlexander Duyck 
877b6f15f82SAlexander Duyck 		max_work--;
87888bae714SAlexander Duyck 		tn = get_child(tp, cindex);
879cf3637bbSAlexander Duyck 	}
880cf3637bbSAlexander Duyck 
881b6f15f82SAlexander Duyck 	/* update parent in case inflate failed */
882b6f15f82SAlexander Duyck 	tp = node_parent(tn);
883b6f15f82SAlexander Duyck 
884cf3637bbSAlexander Duyck 	/* Return if at least one inflate is run */
885cf3637bbSAlexander Duyck 	if (max_work != MAX_WORK)
886b6f15f82SAlexander Duyck 		return tp;
887cf3637bbSAlexander Duyck 
888f05a4819SAlexander Duyck 	/* Halve as long as the number of empty children in this
889cf3637bbSAlexander Duyck 	 * node is above threshold.
890cf3637bbSAlexander Duyck 	 */
891b6f15f82SAlexander Duyck 	while (should_halve(tp, tn) && max_work) {
89288bae714SAlexander Duyck 		tp = halve(t, tn);
89388bae714SAlexander Duyck 		if (!tp) {
894cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
8958d8e810cSAlexander Duyck 			this_cpu_inc(stats->resize_node_skipped);
896cf3637bbSAlexander Duyck #endif
897cf3637bbSAlexander Duyck 			break;
898cf3637bbSAlexander Duyck 		}
899cf3637bbSAlexander Duyck 
900b6f15f82SAlexander Duyck 		max_work--;
90188bae714SAlexander Duyck 		tn = get_child(tp, cindex);
902ff181ed8SAlexander Duyck 	}
903cf3637bbSAlexander Duyck 
904cf3637bbSAlexander Duyck 	/* Only one child remains */
90588bae714SAlexander Duyck 	if (should_collapse(tn))
90688bae714SAlexander Duyck 		return collapse(t, tn);
90788bae714SAlexander Duyck 
908b6f15f82SAlexander Duyck 	/* update parent in case halve failed */
909a52ca62cSAlexander Duyck 	return node_parent(tn);
910cf3637bbSAlexander Duyck }
9118d8e810cSAlexander Duyck 
node_pull_suffix(struct key_vector * tn,unsigned char slen)9121a239173SAlexander Duyck static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
91319baf839SRobert Olsson {
9141a239173SAlexander Duyck 	unsigned char node_slen = tn->slen;
9151a239173SAlexander Duyck 
9161a239173SAlexander Duyck 	while ((node_slen > tn->pos) && (node_slen > slen)) {
9171a239173SAlexander Duyck 		slen = update_suffix(tn);
9181a239173SAlexander Duyck 		if (node_slen == slen)
9195405afd1SAlexander Duyck 			break;
9201a239173SAlexander Duyck 
9211a239173SAlexander Duyck 		tn = node_parent(tn);
9221a239173SAlexander Duyck 		node_slen = tn->slen;
9235405afd1SAlexander Duyck 	}
9245405afd1SAlexander Duyck }
9255405afd1SAlexander Duyck 
node_push_suffix(struct key_vector * tn,unsigned char slen)9261a239173SAlexander Duyck static void node_push_suffix(struct key_vector *tn, unsigned char slen)
9275405afd1SAlexander Duyck {
9281a239173SAlexander Duyck 	while (tn->slen < slen) {
9291a239173SAlexander Duyck 		tn->slen = slen;
9305405afd1SAlexander Duyck 		tn = node_parent(tn);
9315405afd1SAlexander Duyck 	}
9325405afd1SAlexander Duyck }
9335405afd1SAlexander Duyck 
9342373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
fib_find_node(struct trie * t,struct key_vector ** tp,u32 key)93535c6edacSAlexander Duyck static struct key_vector *fib_find_node(struct trie *t,
93635c6edacSAlexander Duyck 					struct key_vector **tp, u32 key)
93719baf839SRobert Olsson {
93888bae714SAlexander Duyck 	struct key_vector *pn, *n = t->kv;
93988bae714SAlexander Duyck 	unsigned long index = 0;
94019baf839SRobert Olsson 
94188bae714SAlexander Duyck 	do {
94288bae714SAlexander Duyck 		pn = n;
94388bae714SAlexander Duyck 		n = get_child_rcu(n, index);
94488bae714SAlexander Duyck 
94588bae714SAlexander Duyck 		if (!n)
94688bae714SAlexander Duyck 			break;
94788bae714SAlexander Duyck 
94888bae714SAlexander Duyck 		index = get_cindex(key, n);
94919baf839SRobert Olsson 
950939afb06SAlexander Duyck 		/* This bit of code is a bit tricky but it combines multiple
951939afb06SAlexander Duyck 		 * checks into a single check.  The prefix consists of the
952939afb06SAlexander Duyck 		 * prefix plus zeros for the bits in the cindex. The index
953939afb06SAlexander Duyck 		 * is the difference between the key and this value.  From
954939afb06SAlexander Duyck 		 * this we can actually derive several pieces of data.
955d4a975e8SAlexander Duyck 		 *   if (index >= (1ul << bits))
956939afb06SAlexander Duyck 		 *     we have a mismatch in skip bits and failed
957b3832117SAlexander Duyck 		 *   else
958b3832117SAlexander Duyck 		 *     we know the value is cindex
959d4a975e8SAlexander Duyck 		 *
960d4a975e8SAlexander Duyck 		 * This check is safe even if bits == KEYLENGTH due to the
961d4a975e8SAlexander Duyck 		 * fact that we can only allocate a node with 32 bits if a
962d4a975e8SAlexander Duyck 		 * long is greater than 32 bits.
963939afb06SAlexander Duyck 		 */
964d4a975e8SAlexander Duyck 		if (index >= (1ul << n->bits)) {
965d4a975e8SAlexander Duyck 			n = NULL;
966d4a975e8SAlexander Duyck 			break;
967d4a975e8SAlexander Duyck 		}
968939afb06SAlexander Duyck 
96988bae714SAlexander Duyck 		/* keep searching until we find a perfect match leaf or NULL */
97088bae714SAlexander Duyck 	} while (IS_TNODE(n));
971939afb06SAlexander Duyck 
97235c6edacSAlexander Duyck 	*tp = pn;
973d4a975e8SAlexander Duyck 
974939afb06SAlexander Duyck 	return n;
97519baf839SRobert Olsson }
97619baf839SRobert Olsson 
97732ccf110SGuillaume Nault /* Return the first fib alias matching DSCP with
97802525368SAlexander Duyck  * priority less than or equal to PRIO.
979b5fc0430SIdo Schimmel  * If 'find_first' is set, return the first matching
98032ccf110SGuillaume Nault  * fib alias, regardless of DSCP and priority.
98102525368SAlexander Duyck  */
fib_find_alias(struct hlist_head * fah,u8 slen,dscp_t dscp,u32 prio,u32 tb_id,bool find_first)98279e5ad2cSAlexander Duyck static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
98332ccf110SGuillaume Nault 					dscp_t dscp, u32 prio, u32 tb_id,
984b5fc0430SIdo Schimmel 					bool find_first)
98502525368SAlexander Duyck {
98602525368SAlexander Duyck 	struct fib_alias *fa;
98702525368SAlexander Duyck 
98802525368SAlexander Duyck 	if (!fah)
98902525368SAlexander Duyck 		return NULL;
99002525368SAlexander Duyck 
99156315f9eSAlexander Duyck 	hlist_for_each_entry(fa, fah, fa_list) {
99232ccf110SGuillaume Nault 		/* Avoid Sparse warning when using dscp_t in inequalities */
99332ccf110SGuillaume Nault 		u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp);
99432ccf110SGuillaume Nault 		u8 __dscp = inet_dscp_to_dsfield(dscp);
99532ccf110SGuillaume Nault 
99679e5ad2cSAlexander Duyck 		if (fa->fa_slen < slen)
99779e5ad2cSAlexander Duyck 			continue;
99879e5ad2cSAlexander Duyck 		if (fa->fa_slen != slen)
99979e5ad2cSAlexander Duyck 			break;
10000b65bd97SAlexander Duyck 		if (fa->tb_id > tb_id)
10010b65bd97SAlexander Duyck 			continue;
10020b65bd97SAlexander Duyck 		if (fa->tb_id != tb_id)
10030b65bd97SAlexander Duyck 			break;
1004b5fc0430SIdo Schimmel 		if (find_first)
1005b5fc0430SIdo Schimmel 			return fa;
100632ccf110SGuillaume Nault 		if (__fa_dscp > __dscp)
100702525368SAlexander Duyck 			continue;
100832ccf110SGuillaume Nault 		if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp)
100902525368SAlexander Duyck 			return fa;
101002525368SAlexander Duyck 	}
101102525368SAlexander Duyck 
101202525368SAlexander Duyck 	return NULL;
101302525368SAlexander Duyck }
101402525368SAlexander Duyck 
101590b93f1bSIdo Schimmel static struct fib_alias *
fib_find_matching_alias(struct net * net,const struct fib_rt_info * fri)101690b93f1bSIdo Schimmel fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
101790b93f1bSIdo Schimmel {
101890b93f1bSIdo Schimmel 	u8 slen = KEYLENGTH - fri->dst_len;
101990b93f1bSIdo Schimmel 	struct key_vector *l, *tp;
102090b93f1bSIdo Schimmel 	struct fib_table *tb;
102190b93f1bSIdo Schimmel 	struct fib_alias *fa;
102290b93f1bSIdo Schimmel 	struct trie *t;
102390b93f1bSIdo Schimmel 
102490b93f1bSIdo Schimmel 	tb = fib_get_table(net, fri->tb_id);
102590b93f1bSIdo Schimmel 	if (!tb)
102690b93f1bSIdo Schimmel 		return NULL;
102790b93f1bSIdo Schimmel 
102890b93f1bSIdo Schimmel 	t = (struct trie *)tb->tb_data;
102990b93f1bSIdo Schimmel 	l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
103090b93f1bSIdo Schimmel 	if (!l)
103190b93f1bSIdo Schimmel 		return NULL;
103290b93f1bSIdo Schimmel 
103390b93f1bSIdo Schimmel 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
103490b93f1bSIdo Schimmel 		if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
1035888ade8fSGuillaume Nault 		    fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi &&
1036888ade8fSGuillaume Nault 		    fa->fa_type == fri->type)
103790b93f1bSIdo Schimmel 			return fa;
103890b93f1bSIdo Schimmel 	}
103990b93f1bSIdo Schimmel 
104090b93f1bSIdo Schimmel 	return NULL;
104190b93f1bSIdo Schimmel }
104290b93f1bSIdo Schimmel 
fib_alias_hw_flags_set(struct net * net,const struct fib_rt_info * fri)104390b93f1bSIdo Schimmel void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
104490b93f1bSIdo Schimmel {
104596b9bd8cSKuniyuki Iwashima 	u8 fib_notify_on_flag_change;
104690b93f1bSIdo Schimmel 	struct fib_alias *fa_match;
1047680aea08SAmit Cohen 	struct sk_buff *skb;
1048680aea08SAmit Cohen 	int err;
104990b93f1bSIdo Schimmel 
105090b93f1bSIdo Schimmel 	rcu_read_lock();
105190b93f1bSIdo Schimmel 
105290b93f1bSIdo Schimmel 	fa_match = fib_find_matching_alias(net, fri);
105390b93f1bSIdo Schimmel 	if (!fa_match)
105490b93f1bSIdo Schimmel 		goto out;
105590b93f1bSIdo Schimmel 
10569fcf986cSEric Dumazet 	/* These are paired with the WRITE_ONCE() happening in this function.
10579fcf986cSEric Dumazet 	 * The reason is that we are only protected by RCU at this point.
10589fcf986cSEric Dumazet 	 */
10599fcf986cSEric Dumazet 	if (READ_ONCE(fa_match->offload) == fri->offload &&
10609fcf986cSEric Dumazet 	    READ_ONCE(fa_match->trap) == fri->trap &&
10619fcf986cSEric Dumazet 	    READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
1062680aea08SAmit Cohen 		goto out;
1063680aea08SAmit Cohen 
10649fcf986cSEric Dumazet 	WRITE_ONCE(fa_match->offload, fri->offload);
10659fcf986cSEric Dumazet 	WRITE_ONCE(fa_match->trap, fri->trap);
1066648106c3SAmit Cohen 
106796b9bd8cSKuniyuki Iwashima 	fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
106896b9bd8cSKuniyuki Iwashima 
1069648106c3SAmit Cohen 	/* 2 means send notifications only if offload_failed was changed. */
107096b9bd8cSKuniyuki Iwashima 	if (fib_notify_on_flag_change == 2 &&
10719fcf986cSEric Dumazet 	    READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
1072648106c3SAmit Cohen 		goto out;
1073648106c3SAmit Cohen 
10749fcf986cSEric Dumazet 	WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
107590b93f1bSIdo Schimmel 
107696b9bd8cSKuniyuki Iwashima 	if (!fib_notify_on_flag_change)
1077680aea08SAmit Cohen 		goto out;
1078680aea08SAmit Cohen 
1079680aea08SAmit Cohen 	skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
1080680aea08SAmit Cohen 	if (!skb) {
1081680aea08SAmit Cohen 		err = -ENOBUFS;
1082680aea08SAmit Cohen 		goto errout;
1083680aea08SAmit Cohen 	}
1084680aea08SAmit Cohen 
1085680aea08SAmit Cohen 	err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0);
1086680aea08SAmit Cohen 	if (err < 0) {
1087680aea08SAmit Cohen 		/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
1088680aea08SAmit Cohen 		WARN_ON(err == -EMSGSIZE);
1089680aea08SAmit Cohen 		kfree_skb(skb);
1090680aea08SAmit Cohen 		goto errout;
1091680aea08SAmit Cohen 	}
1092680aea08SAmit Cohen 
1093680aea08SAmit Cohen 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC);
1094680aea08SAmit Cohen 	goto out;
1095680aea08SAmit Cohen 
1096680aea08SAmit Cohen errout:
1097680aea08SAmit Cohen 	rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err);
109890b93f1bSIdo Schimmel out:
109990b93f1bSIdo Schimmel 	rcu_read_unlock();
110090b93f1bSIdo Schimmel }
110190b93f1bSIdo Schimmel EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
110290b93f1bSIdo Schimmel 
trie_rebalance(struct trie * t,struct key_vector * tn)110335c6edacSAlexander Duyck static void trie_rebalance(struct trie *t, struct key_vector *tn)
110419baf839SRobert Olsson {
110588bae714SAlexander Duyck 	while (!IS_TRIE(tn))
110688bae714SAlexander Duyck 		tn = resize(t, tn);
110719baf839SRobert Olsson }
110819baf839SRobert Olsson 
fib_insert_node(struct trie * t,struct key_vector * tp,struct fib_alias * new,t_key key)110935c6edacSAlexander Duyck static int fib_insert_node(struct trie *t, struct key_vector *tp,
1110d5d6487cSAlexander Duyck 			   struct fib_alias *new, t_key key)
111119baf839SRobert Olsson {
111235c6edacSAlexander Duyck 	struct key_vector *n, *l;
1113836a0123SAlexander Duyck 
1114d5d6487cSAlexander Duyck 	l = leaf_new(key, new);
111579e5ad2cSAlexander Duyck 	if (!l)
11168d8e810cSAlexander Duyck 		goto noleaf;
1117d5d6487cSAlexander Duyck 
1118d5d6487cSAlexander Duyck 	/* retrieve child from parent node */
1119754baf8dSAlexander Duyck 	n = get_child(tp, get_index(key, tp));
112019baf839SRobert Olsson 
1121836a0123SAlexander Duyck 	/* Case 2: n is a LEAF or a TNODE and the key doesn't match.
1122836a0123SAlexander Duyck 	 *
112319baf839SRobert Olsson 	 *  Add a new tnode here
112419baf839SRobert Olsson 	 *  first tnode need some special handling
1125836a0123SAlexander Duyck 	 *  leaves us in position for handling as case 3
112619baf839SRobert Olsson 	 */
112719baf839SRobert Olsson 	if (n) {
112835c6edacSAlexander Duyck 		struct key_vector *tn;
1129f835e471SRobert Olsson 
1130e9b44019SAlexander Duyck 		tn = tnode_new(key, __fls(key ^ n->key), 1);
11318d8e810cSAlexander Duyck 		if (!tn)
11328d8e810cSAlexander Duyck 			goto notnode;
113319baf839SRobert Olsson 
1134836a0123SAlexander Duyck 		/* initialize routes out of node */
1135836a0123SAlexander Duyck 		NODE_INIT_PARENT(tn, tp);
1136836a0123SAlexander Duyck 		put_child(tn, get_index(key, tn) ^ 1, n);
113719baf839SRobert Olsson 
1138836a0123SAlexander Duyck 		/* start adding routes into the node */
113988bae714SAlexander Duyck 		put_child_root(tp, key, tn);
1140836a0123SAlexander Duyck 		node_set_parent(n, tn);
114119baf839SRobert Olsson 
1142836a0123SAlexander Duyck 		/* parent now has a NULL spot where the leaf can go */
1143e962f302SAlexander Duyck 		tp = tn;
114419baf839SRobert Olsson 	}
114591b9a277SOlof Johansson 
1146836a0123SAlexander Duyck 	/* Case 3: n is NULL, and will just insert a new leaf */
1147a52ca62cSAlexander Duyck 	node_push_suffix(tp, new->fa_slen);
1148836a0123SAlexander Duyck 	NODE_INIT_PARENT(l, tp);
114988bae714SAlexander Duyck 	put_child_root(tp, key, l);
11507b85576dSJarek Poplawski 	trie_rebalance(t, tp);
1151d5d6487cSAlexander Duyck 
1152d5d6487cSAlexander Duyck 	return 0;
11538d8e810cSAlexander Duyck notnode:
11548d8e810cSAlexander Duyck 	node_free(l);
11558d8e810cSAlexander Duyck noleaf:
11568d8e810cSAlexander Duyck 	return -ENOMEM;
1157d5d6487cSAlexander Duyck }
1158d5d6487cSAlexander Duyck 
fib_insert_alias(struct trie * t,struct key_vector * tp,struct key_vector * l,struct fib_alias * new,struct fib_alias * fa,t_key key)115935c6edacSAlexander Duyck static int fib_insert_alias(struct trie *t, struct key_vector *tp,
116035c6edacSAlexander Duyck 			    struct key_vector *l, struct fib_alias *new,
1161d5d6487cSAlexander Duyck 			    struct fib_alias *fa, t_key key)
1162d5d6487cSAlexander Duyck {
1163d5d6487cSAlexander Duyck 	if (!l)
1164d5d6487cSAlexander Duyck 		return fib_insert_node(t, tp, new, key);
1165d5d6487cSAlexander Duyck 
1166d5d6487cSAlexander Duyck 	if (fa) {
1167d5d6487cSAlexander Duyck 		hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1168836a0123SAlexander Duyck 	} else {
1169d5d6487cSAlexander Duyck 		struct fib_alias *last;
1170d5d6487cSAlexander Duyck 
1171d5d6487cSAlexander Duyck 		hlist_for_each_entry(last, &l->leaf, fa_list) {
1172d5d6487cSAlexander Duyck 			if (new->fa_slen < last->fa_slen)
1173d5d6487cSAlexander Duyck 				break;
11740b65bd97SAlexander Duyck 			if ((new->fa_slen == last->fa_slen) &&
11750b65bd97SAlexander Duyck 			    (new->tb_id > last->tb_id))
11760b65bd97SAlexander Duyck 				break;
1177d5d6487cSAlexander Duyck 			fa = last;
1178836a0123SAlexander Duyck 		}
1179836a0123SAlexander Duyck 
1180d5d6487cSAlexander Duyck 		if (fa)
1181d5d6487cSAlexander Duyck 			hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1182d5d6487cSAlexander Duyck 		else
1183d5d6487cSAlexander Duyck 			hlist_add_head_rcu(&new->fa_list, &l->leaf);
118419baf839SRobert Olsson 	}
118519baf839SRobert Olsson 
1186d5d6487cSAlexander Duyck 	/* if we added to the tail node then we need to update slen */
1187d5d6487cSAlexander Duyck 	if (l->slen < new->fa_slen) {
1188d5d6487cSAlexander Duyck 		l->slen = new->fa_slen;
11891a239173SAlexander Duyck 		node_push_suffix(tp, new->fa_slen);
1190d5d6487cSAlexander Duyck 	}
1191d5d6487cSAlexander Duyck 
1192d5d6487cSAlexander Duyck 	return 0;
1193d5d6487cSAlexander Duyck }
1194d5d6487cSAlexander Duyck 
fib_valid_key_len(u32 key,u8 plen,struct netlink_ext_ack * extack)119578055998SDavid Ahern static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
1196ba277e8eSDavid Ahern {
119778055998SDavid Ahern 	if (plen > KEYLENGTH) {
119878055998SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
1199ba277e8eSDavid Ahern 		return false;
120078055998SDavid Ahern 	}
1201ba277e8eSDavid Ahern 
120278055998SDavid Ahern 	if ((plen < KEYLENGTH) && (key << plen)) {
120378055998SDavid Ahern 		NL_SET_ERR_MSG(extack,
120478055998SDavid Ahern 			       "Invalid prefix for given prefix length");
1205ba277e8eSDavid Ahern 		return false;
120678055998SDavid Ahern 	}
1207ba277e8eSDavid Ahern 
1208ba277e8eSDavid Ahern 	return true;
1209ba277e8eSDavid Ahern }
1210ba277e8eSDavid Ahern 
1211a6c76c17SIdo Schimmel static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1212a6c76c17SIdo Schimmel 			     struct key_vector *l, struct fib_alias *old);
1213a6c76c17SIdo Schimmel 
1214d5d6487cSAlexander Duyck /* Caller must hold RTNL. */
fib_table_insert(struct net * net,struct fib_table * tb,struct fib_config * cfg,struct netlink_ext_ack * extack)1215b90eb754SJiri Pirko int fib_table_insert(struct net *net, struct fib_table *tb,
12166d8422a1SDavid Ahern 		     struct fib_config *cfg, struct netlink_ext_ack *extack)
121719baf839SRobert Olsson {
121819baf839SRobert Olsson 	struct trie *t = (struct trie *)tb->tb_data;
121919baf839SRobert Olsson 	struct fib_alias *fa, *new_fa;
122035c6edacSAlexander Duyck 	struct key_vector *l, *tp;
1221b93e1fa7SGuillaume Nault 	u16 nlflags = NLM_F_EXCL;
122219baf839SRobert Olsson 	struct fib_info *fi;
122379e5ad2cSAlexander Duyck 	u8 plen = cfg->fc_dst_len;
122479e5ad2cSAlexander Duyck 	u8 slen = KEYLENGTH - plen;
122532ccf110SGuillaume Nault 	dscp_t dscp;
1226d4a975e8SAlexander Duyck 	u32 key;
122719baf839SRobert Olsson 	int err;
122819baf839SRobert Olsson 
12294e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
123019baf839SRobert Olsson 
123178055998SDavid Ahern 	if (!fib_valid_key_len(key, plen, extack))
123219baf839SRobert Olsson 		return -EINVAL;
123319baf839SRobert Olsson 
1234ba277e8eSDavid Ahern 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1235ba277e8eSDavid Ahern 
12366d8422a1SDavid Ahern 	fi = fib_create_info(cfg, extack);
12374e902c57SThomas Graf 	if (IS_ERR(fi)) {
12384e902c57SThomas Graf 		err = PTR_ERR(fi);
123919baf839SRobert Olsson 		goto err;
12404e902c57SThomas Graf 	}
124119baf839SRobert Olsson 
124232ccf110SGuillaume Nault 	dscp = cfg->fc_dscp;
1243d4a975e8SAlexander Duyck 	l = fib_find_node(t, &tp, key);
124432ccf110SGuillaume Nault 	fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
1245b5fc0430SIdo Schimmel 				tb->tb_id, false) : NULL;
124619baf839SRobert Olsson 
124719baf839SRobert Olsson 	/* Now fa, if non-NULL, points to the first fib alias
124832ccf110SGuillaume Nault 	 * with the same keys [prefix,dscp,priority], if such key already
124919baf839SRobert Olsson 	 * exists or to the node before which we will insert new one.
125019baf839SRobert Olsson 	 *
125119baf839SRobert Olsson 	 * If fa is NULL, we will need to allocate a new one and
125256315f9eSAlexander Duyck 	 * insert to the tail of the section matching the suffix length
125356315f9eSAlexander Duyck 	 * of the new alias.
125419baf839SRobert Olsson 	 */
125519baf839SRobert Olsson 
125632ccf110SGuillaume Nault 	if (fa && fa->fa_dscp == dscp &&
1257936f6f8eSJulian Anastasov 	    fa->fa_info->fib_priority == fi->fib_priority) {
1258936f6f8eSJulian Anastasov 		struct fib_alias *fa_first, *fa_match;
125919baf839SRobert Olsson 
126019baf839SRobert Olsson 		err = -EEXIST;
12614e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_EXCL)
126219baf839SRobert Olsson 			goto out;
126319baf839SRobert Olsson 
1264b93e1fa7SGuillaume Nault 		nlflags &= ~NLM_F_EXCL;
1265b93e1fa7SGuillaume Nault 
1266936f6f8eSJulian Anastasov 		/* We have 2 goals:
1267936f6f8eSJulian Anastasov 		 * 1. Find exact match for type, scope, fib_info to avoid
1268936f6f8eSJulian Anastasov 		 * duplicate routes
1269936f6f8eSJulian Anastasov 		 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1270936f6f8eSJulian Anastasov 		 */
1271936f6f8eSJulian Anastasov 		fa_match = NULL;
1272936f6f8eSJulian Anastasov 		fa_first = fa;
127356315f9eSAlexander Duyck 		hlist_for_each_entry_from(fa, fa_list) {
12740b65bd97SAlexander Duyck 			if ((fa->fa_slen != slen) ||
12750b65bd97SAlexander Duyck 			    (fa->tb_id != tb->tb_id) ||
127632ccf110SGuillaume Nault 			    (fa->fa_dscp != dscp))
1277936f6f8eSJulian Anastasov 				break;
1278936f6f8eSJulian Anastasov 			if (fa->fa_info->fib_priority != fi->fib_priority)
1279936f6f8eSJulian Anastasov 				break;
1280936f6f8eSJulian Anastasov 			if (fa->fa_type == cfg->fc_type &&
1281936f6f8eSJulian Anastasov 			    fa->fa_info == fi) {
1282936f6f8eSJulian Anastasov 				fa_match = fa;
1283936f6f8eSJulian Anastasov 				break;
1284936f6f8eSJulian Anastasov 			}
1285936f6f8eSJulian Anastasov 		}
1286936f6f8eSJulian Anastasov 
12874e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_REPLACE) {
128819baf839SRobert Olsson 			struct fib_info *fi_drop;
128919baf839SRobert Olsson 			u8 state;
129019baf839SRobert Olsson 
1291b93e1fa7SGuillaume Nault 			nlflags |= NLM_F_REPLACE;
1292936f6f8eSJulian Anastasov 			fa = fa_first;
1293936f6f8eSJulian Anastasov 			if (fa_match) {
1294936f6f8eSJulian Anastasov 				if (fa == fa_match)
1295936f6f8eSJulian Anastasov 					err = 0;
12966725033fSJoonwoo Park 				goto out;
1297936f6f8eSJulian Anastasov 			}
12982373ce1cSRobert Olsson 			err = -ENOBUFS;
1299e94b1766SChristoph Lameter 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
130051456b29SIan Morris 			if (!new_fa)
13012373ce1cSRobert Olsson 				goto out;
130219baf839SRobert Olsson 
130319baf839SRobert Olsson 			fi_drop = fa->fa_info;
130432ccf110SGuillaume Nault 			new_fa->fa_dscp = fa->fa_dscp;
13052373ce1cSRobert Olsson 			new_fa->fa_info = fi;
13064e902c57SThomas Graf 			new_fa->fa_type = cfg->fc_type;
130719baf839SRobert Olsson 			state = fa->fa_state;
1308936f6f8eSJulian Anastasov 			new_fa->fa_state = state & ~FA_S_ACCESSED;
13099b6ebad5SAlexander Duyck 			new_fa->fa_slen = fa->fa_slen;
1310d4e64c29SMichal Kubeček 			new_fa->tb_id = tb->tb_id;
13112392debcSJulian Anastasov 			new_fa->fa_default = -1;
131290b93f1bSIdo Schimmel 			new_fa->offload = 0;
131390b93f1bSIdo Schimmel 			new_fa->trap = 0;
131436c5100eSAmit Cohen 			new_fa->offload_failed = 0;
131519baf839SRobert Olsson 
13166324d0faSIdo Schimmel 			hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
13176324d0faSIdo Schimmel 
1318ee3936d6SIdo Schimmel 			if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
13196324d0faSIdo Schimmel 					   tb->tb_id, true) == new_fa) {
1320ee3936d6SIdo Schimmel 				enum fib_event_type fib_event;
1321ee3936d6SIdo Schimmel 
1322446f7391SIdo Schimmel 				fib_event = FIB_EVENT_ENTRY_REPLACE;
1323ee3936d6SIdo Schimmel 				err = call_fib_entry_notifiers(net, fib_event,
1324ee3936d6SIdo Schimmel 							       key, plen,
1325ee3936d6SIdo Schimmel 							       new_fa, extack);
13266324d0faSIdo Schimmel 				if (err) {
13276324d0faSIdo Schimmel 					hlist_replace_rcu(&new_fa->fa_list,
13286324d0faSIdo Schimmel 							  &fa->fa_list);
1329ee3936d6SIdo Schimmel 					goto out_free_new_fa;
1330ee3936d6SIdo Schimmel 				}
13316324d0faSIdo Schimmel 			}
1332c1d7ee67SDavid Ahern 
13335b7d616dSIdo Schimmel 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
13345b7d616dSIdo Schimmel 				  tb->tb_id, &cfg->fc_nlinfo, nlflags);
13355b7d616dSIdo Schimmel 
13362373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
133719baf839SRobert Olsson 
133819baf839SRobert Olsson 			fib_release_info(fi_drop);
133919baf839SRobert Olsson 			if (state & FA_S_ACCESSED)
13404ccfe6d4SNicolas Dichtel 				rt_cache_flush(cfg->fc_nlinfo.nl_net);
1341b90eb754SJiri Pirko 
134219baf839SRobert Olsson 			goto succeeded;
134319baf839SRobert Olsson 		}
134419baf839SRobert Olsson 		/* Error if we find a perfect match which
134519baf839SRobert Olsson 		 * uses the same scope, type, and nexthop
134619baf839SRobert Olsson 		 * information.
134719baf839SRobert Olsson 		 */
1348936f6f8eSJulian Anastasov 		if (fa_match)
134919baf839SRobert Olsson 			goto out;
1350a07f5f50SStephen Hemminger 
1351446f7391SIdo Schimmel 		if (cfg->fc_nlflags & NLM_F_APPEND)
1352b93e1fa7SGuillaume Nault 			nlflags |= NLM_F_APPEND;
1353446f7391SIdo Schimmel 		else
1354936f6f8eSJulian Anastasov 			fa = fa_first;
135519baf839SRobert Olsson 	}
135619baf839SRobert Olsson 	err = -ENOENT;
13574e902c57SThomas Graf 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
135819baf839SRobert Olsson 		goto out;
135919baf839SRobert Olsson 
1360b93e1fa7SGuillaume Nault 	nlflags |= NLM_F_CREATE;
136119baf839SRobert Olsson 	err = -ENOBUFS;
1362e94b1766SChristoph Lameter 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
136351456b29SIan Morris 	if (!new_fa)
136419baf839SRobert Olsson 		goto out;
136519baf839SRobert Olsson 
136619baf839SRobert Olsson 	new_fa->fa_info = fi;
136732ccf110SGuillaume Nault 	new_fa->fa_dscp = dscp;
13684e902c57SThomas Graf 	new_fa->fa_type = cfg->fc_type;
136919baf839SRobert Olsson 	new_fa->fa_state = 0;
137079e5ad2cSAlexander Duyck 	new_fa->fa_slen = slen;
13710ddcf43dSAlexander Duyck 	new_fa->tb_id = tb->tb_id;
13722392debcSJulian Anastasov 	new_fa->fa_default = -1;
137390b93f1bSIdo Schimmel 	new_fa->offload = 0;
137490b93f1bSIdo Schimmel 	new_fa->trap = 0;
137536c5100eSAmit Cohen 	new_fa->offload_failed = 0;
137619baf839SRobert Olsson 
13779b6ebad5SAlexander Duyck 	/* Insert new entry to the list. */
1378d5d6487cSAlexander Duyck 	err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1379d5d6487cSAlexander Duyck 	if (err)
1380a6c76c17SIdo Schimmel 		goto out_free_new_fa;
1381a6c76c17SIdo Schimmel 
1382a6c76c17SIdo Schimmel 	/* The alias was already inserted, so the node must exist. */
1383a6c76c17SIdo Schimmel 	l = l ? l : fib_find_node(t, &tp, key);
1384568fe849SZiyang Xuan 	if (WARN_ON_ONCE(!l)) {
1385568fe849SZiyang Xuan 		err = -ENOENT;
1386a6c76c17SIdo Schimmel 		goto out_free_new_fa;
1387568fe849SZiyang Xuan 	}
1388a6c76c17SIdo Schimmel 
1389a8674f75SIdo Schimmel 	if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
1390a8674f75SIdo Schimmel 	    new_fa) {
1391a8674f75SIdo Schimmel 		enum fib_event_type fib_event;
1392a8674f75SIdo Schimmel 
1393446f7391SIdo Schimmel 		fib_event = FIB_EVENT_ENTRY_REPLACE;
1394a8674f75SIdo Schimmel 		err = call_fib_entry_notifiers(net, fib_event, key, plen,
1395a8674f75SIdo Schimmel 					       new_fa, extack);
1396a8674f75SIdo Schimmel 		if (err)
1397a8674f75SIdo Schimmel 			goto out_remove_new_fa;
1398a8674f75SIdo Schimmel 	}
139919baf839SRobert Olsson 
140021d8c49eSDavid S. Miller 	if (!plen)
140121d8c49eSDavid S. Miller 		tb->tb_num_default++;
140221d8c49eSDavid S. Miller 
14034ccfe6d4SNicolas Dichtel 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
14040ddcf43dSAlexander Duyck 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1405a2bb6d7dSRoopa Prabhu 		  &cfg->fc_nlinfo, nlflags);
140619baf839SRobert Olsson succeeded:
140719baf839SRobert Olsson 	return 0;
1408f835e471SRobert Olsson 
1409a6c76c17SIdo Schimmel out_remove_new_fa:
1410a6c76c17SIdo Schimmel 	fib_remove_alias(t, tp, l, new_fa);
1411f835e471SRobert Olsson out_free_new_fa:
1412f835e471SRobert Olsson 	kmem_cache_free(fn_alias_kmem, new_fa);
141319baf839SRobert Olsson out:
141419baf839SRobert Olsson 	fib_release_info(fi);
141591b9a277SOlof Johansson err:
141619baf839SRobert Olsson 	return err;
141719baf839SRobert Olsson }
141819baf839SRobert Olsson 
prefix_mismatch(t_key key,struct key_vector * n)141935c6edacSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
14209f9e636dSAlexander Duyck {
14219f9e636dSAlexander Duyck 	t_key prefix = n->key;
14229f9e636dSAlexander Duyck 
14239f9e636dSAlexander Duyck 	return (key ^ prefix) & (prefix | -prefix);
14249f9e636dSAlexander Duyck }
14259f9e636dSAlexander Duyck 
fib_lookup_good_nhc(const struct fib_nh_common * nhc,int fib_flags,const struct flowi4 * flp)1426af7888adSDavid Ahern bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
1427af7888adSDavid Ahern 			 const struct flowi4 *flp)
1428af7888adSDavid Ahern {
1429af7888adSDavid Ahern 	if (nhc->nhc_flags & RTNH_F_DEAD)
1430af7888adSDavid Ahern 		return false;
1431af7888adSDavid Ahern 
1432af7888adSDavid Ahern 	if (ip_ignore_linkdown(nhc->nhc_dev) &&
1433af7888adSDavid Ahern 	    nhc->nhc_flags & RTNH_F_LINKDOWN &&
1434af7888adSDavid Ahern 	    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1435af7888adSDavid Ahern 		return false;
1436af7888adSDavid Ahern 
143740867d74SDavid Ahern 	if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
1438af7888adSDavid Ahern 		return false;
1439af7888adSDavid Ahern 
1440af7888adSDavid Ahern 	return true;
1441af7888adSDavid Ahern }
1442af7888adSDavid Ahern 
1443345e9b54SAlexander Duyck /* should be called with rcu_read_lock */
fib_table_lookup(struct fib_table * tb,const struct flowi4 * flp,struct fib_result * res,int fib_flags)144422bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1445ebc0ffaeSEric Dumazet 		     struct fib_result *res, int fib_flags)
144619baf839SRobert Olsson {
144719baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
14488274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
14498274a97aSAlexander Duyck 	struct trie_use_stats __percpu *stats = t->stats;
14508274a97aSAlexander Duyck #endif
14519f9e636dSAlexander Duyck 	const t_key key = ntohl(flp->daddr);
145235c6edacSAlexander Duyck 	struct key_vector *n, *pn;
145379e5ad2cSAlexander Duyck 	struct fib_alias *fa;
145471e8b67dSAlexander Duyck 	unsigned long index;
14559f9e636dSAlexander Duyck 	t_key cindex;
145619baf839SRobert Olsson 
145788bae714SAlexander Duyck 	pn = t->kv;
145888bae714SAlexander Duyck 	cindex = 0;
145988bae714SAlexander Duyck 
146088bae714SAlexander Duyck 	n = get_child_rcu(pn, cindex);
14619f323973SDavid Ahern 	if (!n) {
14629f323973SDavid Ahern 		trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
1463345e9b54SAlexander Duyck 		return -EAGAIN;
14649f323973SDavid Ahern 	}
146519baf839SRobert Olsson 
146619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
14678274a97aSAlexander Duyck 	this_cpu_inc(stats->gets);
146819baf839SRobert Olsson #endif
146919baf839SRobert Olsson 
14709f9e636dSAlexander Duyck 	/* Step 1: Travel to the longest prefix match in the trie */
14719f9e636dSAlexander Duyck 	for (;;) {
147288bae714SAlexander Duyck 		index = get_cindex(key, n);
14739f9e636dSAlexander Duyck 
14749f9e636dSAlexander Duyck 		/* This bit of code is a bit tricky but it combines multiple
14759f9e636dSAlexander Duyck 		 * checks into a single check.  The prefix consists of the
14769f9e636dSAlexander Duyck 		 * prefix plus zeros for the "bits" in the prefix. The index
14779f9e636dSAlexander Duyck 		 * is the difference between the key and this value.  From
14789f9e636dSAlexander Duyck 		 * this we can actually derive several pieces of data.
147971e8b67dSAlexander Duyck 		 *   if (index >= (1ul << bits))
14809f9e636dSAlexander Duyck 		 *     we have a mismatch in skip bits and failed
1481b3832117SAlexander Duyck 		 *   else
1482b3832117SAlexander Duyck 		 *     we know the value is cindex
148371e8b67dSAlexander Duyck 		 *
148471e8b67dSAlexander Duyck 		 * This check is safe even if bits == KEYLENGTH due to the
148571e8b67dSAlexander Duyck 		 * fact that we can only allocate a node with 32 bits if a
148671e8b67dSAlexander Duyck 		 * long is greater than 32 bits.
14879f9e636dSAlexander Duyck 		 */
148871e8b67dSAlexander Duyck 		if (index >= (1ul << n->bits))
14899f9e636dSAlexander Duyck 			break;
14909f9e636dSAlexander Duyck 
14919f9e636dSAlexander Duyck 		/* we have found a leaf. Prefixes have already been compared */
14929f9e636dSAlexander Duyck 		if (IS_LEAF(n))
1493a07f5f50SStephen Hemminger 			goto found;
14949f9e636dSAlexander Duyck 
14959f9e636dSAlexander Duyck 		/* only record pn and cindex if we are going to be chopping
14969f9e636dSAlexander Duyck 		 * bits later.  Otherwise we are just wasting cycles.
14979f9e636dSAlexander Duyck 		 */
14985405afd1SAlexander Duyck 		if (n->slen > n->pos) {
14999f9e636dSAlexander Duyck 			pn = n;
15009f9e636dSAlexander Duyck 			cindex = index;
150119baf839SRobert Olsson 		}
1502a07f5f50SStephen Hemminger 
1503754baf8dSAlexander Duyck 		n = get_child_rcu(n, index);
15049f9e636dSAlexander Duyck 		if (unlikely(!n))
15059f9e636dSAlexander Duyck 			goto backtrace;
15069f9e636dSAlexander Duyck 	}
150719baf839SRobert Olsson 
15089f9e636dSAlexander Duyck 	/* Step 2: Sort out leaves and begin backtracing for longest prefix */
15099f9e636dSAlexander Duyck 	for (;;) {
15109f9e636dSAlexander Duyck 		/* record the pointer where our next node pointer is stored */
151135c6edacSAlexander Duyck 		struct key_vector __rcu **cptr = n->tnode;
151219baf839SRobert Olsson 
15139f9e636dSAlexander Duyck 		/* This test verifies that none of the bits that differ
15149f9e636dSAlexander Duyck 		 * between the key and the prefix exist in the region of
15159f9e636dSAlexander Duyck 		 * the lsb and higher in the prefix.
15169f9e636dSAlexander Duyck 		 */
15175405afd1SAlexander Duyck 		if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
15189f9e636dSAlexander Duyck 			goto backtrace;
151919baf839SRobert Olsson 
15209f9e636dSAlexander Duyck 		/* exit out and process leaf */
15219f9e636dSAlexander Duyck 		if (unlikely(IS_LEAF(n)))
15229f9e636dSAlexander Duyck 			break;
152319baf839SRobert Olsson 
15249f9e636dSAlexander Duyck 		/* Don't bother recording parent info.  Since we are in
15259f9e636dSAlexander Duyck 		 * prefix match mode we will have to come back to wherever
15269f9e636dSAlexander Duyck 		 * we started this traversal anyway
15279f9e636dSAlexander Duyck 		 */
15289f9e636dSAlexander Duyck 
15299f9e636dSAlexander Duyck 		while ((n = rcu_dereference(*cptr)) == NULL) {
15309f9e636dSAlexander Duyck backtrace:
153119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15329f9e636dSAlexander Duyck 			if (!n)
15338274a97aSAlexander Duyck 				this_cpu_inc(stats->null_node_hit);
153419baf839SRobert Olsson #endif
15359f9e636dSAlexander Duyck 			/* If we are at cindex 0 there are no more bits for
15369f9e636dSAlexander Duyck 			 * us to strip at this level so we must ascend back
15379f9e636dSAlexander Duyck 			 * up one level to see if there are any more bits to
15389f9e636dSAlexander Duyck 			 * be stripped there.
153919baf839SRobert Olsson 			 */
15409f9e636dSAlexander Duyck 			while (!cindex) {
15419f9e636dSAlexander Duyck 				t_key pkey = pn->key;
154219baf839SRobert Olsson 
154388bae714SAlexander Duyck 				/* If we don't have a parent then there is
154488bae714SAlexander Duyck 				 * nothing for us to do as we do not have any
154588bae714SAlexander Duyck 				 * further nodes to parse.
154688bae714SAlexander Duyck 				 */
15479f323973SDavid Ahern 				if (IS_TRIE(pn)) {
15489f323973SDavid Ahern 					trace_fib_table_lookup(tb->tb_id, flp,
15499f323973SDavid Ahern 							       NULL, -EAGAIN);
1550345e9b54SAlexander Duyck 					return -EAGAIN;
15519f323973SDavid Ahern 				}
155219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15538274a97aSAlexander Duyck 				this_cpu_inc(stats->backtrack);
155419baf839SRobert Olsson #endif
15559f9e636dSAlexander Duyck 				/* Get Child's index */
155688bae714SAlexander Duyck 				pn = node_parent_rcu(pn);
15579f9e636dSAlexander Duyck 				cindex = get_index(pkey, pn);
15589f9e636dSAlexander Duyck 			}
15599f9e636dSAlexander Duyck 
15609f9e636dSAlexander Duyck 			/* strip the least significant bit from the cindex */
15619f9e636dSAlexander Duyck 			cindex &= cindex - 1;
15629f9e636dSAlexander Duyck 
15639f9e636dSAlexander Duyck 			/* grab pointer for next child node */
156441b489fdSAlexander Duyck 			cptr = &pn->tnode[cindex];
156519baf839SRobert Olsson 		}
156619baf839SRobert Olsson 	}
15679f9e636dSAlexander Duyck 
156819baf839SRobert Olsson found:
156971e8b67dSAlexander Duyck 	/* this line carries forward the xor from earlier in the function */
157071e8b67dSAlexander Duyck 	index = key ^ n->key;
157171e8b67dSAlexander Duyck 
15729f9e636dSAlexander Duyck 	/* Step 3: Process the leaf, if that fails fall back to backtracing */
157379e5ad2cSAlexander Duyck 	hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1574345e9b54SAlexander Duyck 		struct fib_info *fi = fa->fa_info;
1575af7888adSDavid Ahern 		struct fib_nh_common *nhc;
1576345e9b54SAlexander Duyck 		int nhsel, err;
1577345e9b54SAlexander Duyck 
1578a5829f53SAlexander Duyck 		if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
1579a5829f53SAlexander Duyck 			if (index >= (1ul << fa->fa_slen))
15809b6ebad5SAlexander Duyck 				continue;
1581a5829f53SAlexander Duyck 		}
158232ccf110SGuillaume Nault 		if (fa->fa_dscp &&
158332ccf110SGuillaume Nault 		    inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
1584345e9b54SAlexander Duyck 			continue;
1585fce92af1SEric Dumazet 		/* Paired with WRITE_ONCE() in fib_release_info() */
1586fce92af1SEric Dumazet 		if (READ_ONCE(fi->fib_dead))
1587345e9b54SAlexander Duyck 			continue;
1588345e9b54SAlexander Duyck 		if (fa->fa_info->fib_scope < flp->flowi4_scope)
1589345e9b54SAlexander Duyck 			continue;
1590345e9b54SAlexander Duyck 		fib_alias_accessed(fa);
1591345e9b54SAlexander Duyck 		err = fib_props[fa->fa_type].error;
1592345e9b54SAlexander Duyck 		if (unlikely(err < 0)) {
15934c7e8084SDavid Ahern out_reject:
1594345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
1595345e9b54SAlexander Duyck 			this_cpu_inc(stats->semantic_match_passed);
1596345e9b54SAlexander Duyck #endif
15979f323973SDavid Ahern 			trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
1598345e9b54SAlexander Duyck 			return err;
1599345e9b54SAlexander Duyck 		}
1600345e9b54SAlexander Duyck 		if (fi->fib_flags & RTNH_F_DEAD)
1601345e9b54SAlexander Duyck 			continue;
16024c7e8084SDavid Ahern 
1603af7888adSDavid Ahern 		if (unlikely(fi->nh)) {
1604af7888adSDavid Ahern 			if (nexthop_is_blackhole(fi->nh)) {
16054c7e8084SDavid Ahern 				err = fib_props[RTN_BLACKHOLE].error;
16064c7e8084SDavid Ahern 				goto out_reject;
16074c7e8084SDavid Ahern 			}
16084c7e8084SDavid Ahern 
1609af7888adSDavid Ahern 			nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
1610af7888adSDavid Ahern 						     &nhsel);
1611af7888adSDavid Ahern 			if (nhc)
1612af7888adSDavid Ahern 				goto set_result;
1613af7888adSDavid Ahern 			goto miss;
1614613d09b3SDavid Ahern 		}
1615345e9b54SAlexander Duyck 
1616af7888adSDavid Ahern 		for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1617af7888adSDavid Ahern 			nhc = fib_info_nhc(fi, nhsel);
1618af7888adSDavid Ahern 
1619af7888adSDavid Ahern 			if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
1620af7888adSDavid Ahern 				continue;
1621af7888adSDavid Ahern set_result:
1622345e9b54SAlexander Duyck 			if (!(fib_flags & FIB_LOOKUP_NOREF))
16230029c0deSReshetova, Elena 				refcount_inc(&fi->fib_clntref);
1624345e9b54SAlexander Duyck 
16256ffd9034SDavid Ahern 			res->prefix = htonl(n->key);
16269b6ebad5SAlexander Duyck 			res->prefixlen = KEYLENGTH - fa->fa_slen;
1627345e9b54SAlexander Duyck 			res->nh_sel = nhsel;
1628eba618abSDavid Ahern 			res->nhc = nhc;
1629345e9b54SAlexander Duyck 			res->type = fa->fa_type;
1630345e9b54SAlexander Duyck 			res->scope = fi->fib_scope;
1631*015d29dbSIdo Schimmel 			res->dscp = fa->fa_dscp;
1632345e9b54SAlexander Duyck 			res->fi = fi;
1633345e9b54SAlexander Duyck 			res->table = tb;
163479e5ad2cSAlexander Duyck 			res->fa_head = &n->leaf;
1635345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
1636345e9b54SAlexander Duyck 			this_cpu_inc(stats->semantic_match_passed);
1637345e9b54SAlexander Duyck #endif
1638eba618abSDavid Ahern 			trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
1639f6d3c192SDavid Ahern 
1640345e9b54SAlexander Duyck 			return err;
1641345e9b54SAlexander Duyck 		}
1642345e9b54SAlexander Duyck 	}
1643af7888adSDavid Ahern miss:
1644345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
1645345e9b54SAlexander Duyck 	this_cpu_inc(stats->semantic_match_miss);
1646345e9b54SAlexander Duyck #endif
16479f9e636dSAlexander Duyck 	goto backtrace;
164819baf839SRobert Olsson }
16496fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup);
165019baf839SRobert Olsson 
fib_remove_alias(struct trie * t,struct key_vector * tp,struct key_vector * l,struct fib_alias * old)165135c6edacSAlexander Duyck static void fib_remove_alias(struct trie *t, struct key_vector *tp,
165235c6edacSAlexander Duyck 			     struct key_vector *l, struct fib_alias *old)
1653d5d6487cSAlexander Duyck {
1654d5d6487cSAlexander Duyck 	/* record the location of the previous list_info entry */
1655d5d6487cSAlexander Duyck 	struct hlist_node **pprev = old->fa_list.pprev;
1656d5d6487cSAlexander Duyck 	struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1657d5d6487cSAlexander Duyck 
1658d5d6487cSAlexander Duyck 	/* remove the fib_alias from the list */
1659d5d6487cSAlexander Duyck 	hlist_del_rcu(&old->fa_list);
1660d5d6487cSAlexander Duyck 
1661d5d6487cSAlexander Duyck 	/* if we emptied the list this leaf will be freed and we can sort
1662d5d6487cSAlexander Duyck 	 * out parent suffix lengths as a part of trie_rebalance
1663d562f1f8SRobert Olsson 	 */
1664d5d6487cSAlexander Duyck 	if (hlist_empty(&l->leaf)) {
1665a52ca62cSAlexander Duyck 		if (tp->slen == l->slen)
1666a52ca62cSAlexander Duyck 			node_pull_suffix(tp, tp->pos);
166788bae714SAlexander Duyck 		put_child_root(tp, l->key, NULL);
1668d5d6487cSAlexander Duyck 		node_free(l);
1669d5d6487cSAlexander Duyck 		trie_rebalance(t, tp);
1670d5d6487cSAlexander Duyck 		return;
1671d5d6487cSAlexander Duyck 	}
1672d5d6487cSAlexander Duyck 
1673d5d6487cSAlexander Duyck 	/* only access fa if it is pointing at the last valid hlist_node */
1674d5d6487cSAlexander Duyck 	if (*pprev)
1675d5d6487cSAlexander Duyck 		return;
1676d5d6487cSAlexander Duyck 
1677d5d6487cSAlexander Duyck 	/* update the trie with the latest suffix length */
1678d5d6487cSAlexander Duyck 	l->slen = fa->fa_slen;
16791a239173SAlexander Duyck 	node_pull_suffix(tp, fa->fa_slen);
1680d5d6487cSAlexander Duyck }
1681d5d6487cSAlexander Duyck 
fib_notify_alias_delete(struct net * net,u32 key,struct hlist_head * fah,struct fib_alias * fa_to_delete,struct netlink_ext_ack * extack)1682f613b6e2SIdo Schimmel static void fib_notify_alias_delete(struct net *net, u32 key,
1683f613b6e2SIdo Schimmel 				    struct hlist_head *fah,
1684f613b6e2SIdo Schimmel 				    struct fib_alias *fa_to_delete,
1685f613b6e2SIdo Schimmel 				    struct netlink_ext_ack *extack)
1686f613b6e2SIdo Schimmel {
1687f613b6e2SIdo Schimmel 	struct fib_alias *fa_next, *fa_to_notify;
1688f613b6e2SIdo Schimmel 	u32 tb_id = fa_to_delete->tb_id;
1689f613b6e2SIdo Schimmel 	u8 slen = fa_to_delete->fa_slen;
1690f613b6e2SIdo Schimmel 	enum fib_event_type fib_event;
1691f613b6e2SIdo Schimmel 
1692f613b6e2SIdo Schimmel 	/* Do not notify if we do not care about the route. */
1693f613b6e2SIdo Schimmel 	if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
1694f613b6e2SIdo Schimmel 		return;
1695f613b6e2SIdo Schimmel 
1696f613b6e2SIdo Schimmel 	/* Determine if the route should be replaced by the next route in the
1697f613b6e2SIdo Schimmel 	 * list.
1698f613b6e2SIdo Schimmel 	 */
1699f613b6e2SIdo Schimmel 	fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
1700f613b6e2SIdo Schimmel 				   struct fib_alias, fa_list);
1701f613b6e2SIdo Schimmel 	if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
1702446f7391SIdo Schimmel 		fib_event = FIB_EVENT_ENTRY_REPLACE;
1703f613b6e2SIdo Schimmel 		fa_to_notify = fa_next;
1704f613b6e2SIdo Schimmel 	} else {
1705446f7391SIdo Schimmel 		fib_event = FIB_EVENT_ENTRY_DEL;
1706f613b6e2SIdo Schimmel 		fa_to_notify = fa_to_delete;
1707f613b6e2SIdo Schimmel 	}
1708f613b6e2SIdo Schimmel 	call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
1709f613b6e2SIdo Schimmel 				 fa_to_notify, extack);
1710f613b6e2SIdo Schimmel }
1711f613b6e2SIdo Schimmel 
1712d5d6487cSAlexander Duyck /* Caller must hold RTNL. */
fib_table_delete(struct net * net,struct fib_table * tb,struct fib_config * cfg,struct netlink_ext_ack * extack)1713b90eb754SJiri Pirko int fib_table_delete(struct net *net, struct fib_table *tb,
171478055998SDavid Ahern 		     struct fib_config *cfg, struct netlink_ext_ack *extack)
171519baf839SRobert Olsson {
171619baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
171719baf839SRobert Olsson 	struct fib_alias *fa, *fa_to_delete;
171835c6edacSAlexander Duyck 	struct key_vector *l, *tp;
171979e5ad2cSAlexander Duyck 	u8 plen = cfg->fc_dst_len;
172079e5ad2cSAlexander Duyck 	u8 slen = KEYLENGTH - plen;
172132ccf110SGuillaume Nault 	dscp_t dscp;
1722d4a975e8SAlexander Duyck 	u32 key;
172391b9a277SOlof Johansson 
17244e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
172519baf839SRobert Olsson 
172678055998SDavid Ahern 	if (!fib_valid_key_len(key, plen, extack))
172719baf839SRobert Olsson 		return -EINVAL;
172819baf839SRobert Olsson 
1729d4a975e8SAlexander Duyck 	l = fib_find_node(t, &tp, key);
173019baf839SRobert Olsson 	if (!l)
173119baf839SRobert Olsson 		return -ESRCH;
173219baf839SRobert Olsson 
173332ccf110SGuillaume Nault 	dscp = cfg->fc_dscp;
173432ccf110SGuillaume Nault 	fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
173519baf839SRobert Olsson 	if (!fa)
173619baf839SRobert Olsson 		return -ESRCH;
173719baf839SRobert Olsson 
173832ccf110SGuillaume Nault 	pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen,
173932ccf110SGuillaume Nault 		 inet_dscp_to_dsfield(dscp), t);
174019baf839SRobert Olsson 
174119baf839SRobert Olsson 	fa_to_delete = NULL;
174256315f9eSAlexander Duyck 	hlist_for_each_entry_from(fa, fa_list) {
174319baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
174419baf839SRobert Olsson 
17450b65bd97SAlexander Duyck 		if ((fa->fa_slen != slen) ||
17460b65bd97SAlexander Duyck 		    (fa->tb_id != tb->tb_id) ||
174732ccf110SGuillaume Nault 		    (fa->fa_dscp != dscp))
174819baf839SRobert Olsson 			break;
174919baf839SRobert Olsson 
17504e902c57SThomas Graf 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
17514e902c57SThomas Graf 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
175237e826c5SDavid S. Miller 		     fa->fa_info->fib_scope == cfg->fc_scope) &&
175374cb3c10SJulian Anastasov 		    (!cfg->fc_prefsrc ||
175474cb3c10SJulian Anastasov 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
17554e902c57SThomas Graf 		    (!cfg->fc_protocol ||
17564e902c57SThomas Graf 		     fi->fib_protocol == cfg->fc_protocol) &&
1757faee6769SAlexander Aring 		    fib_nh_match(net, cfg, fi, extack) == 0 &&
17585f9ae3d9SXin Long 		    fib_metrics_match(cfg, fi)) {
175919baf839SRobert Olsson 			fa_to_delete = fa;
176019baf839SRobert Olsson 			break;
176119baf839SRobert Olsson 		}
176219baf839SRobert Olsson 	}
176319baf839SRobert Olsson 
176491b9a277SOlof Johansson 	if (!fa_to_delete)
176591b9a277SOlof Johansson 		return -ESRCH;
176619baf839SRobert Olsson 
1767f613b6e2SIdo Schimmel 	fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
1768d5d6487cSAlexander Duyck 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1769b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
177019baf839SRobert Olsson 
177121d8c49eSDavid S. Miller 	if (!plen)
177221d8c49eSDavid S. Miller 		tb->tb_num_default--;
177321d8c49eSDavid S. Miller 
1774d5d6487cSAlexander Duyck 	fib_remove_alias(t, tp, l, fa_to_delete);
17757289e6ddSAlexander Duyck 
1776d5d6487cSAlexander Duyck 	if (fa_to_delete->fa_state & FA_S_ACCESSED)
17774ccfe6d4SNicolas Dichtel 		rt_cache_flush(cfg->fc_nlinfo.nl_net);
177819baf839SRobert Olsson 
1779d5d6487cSAlexander Duyck 	fib_release_info(fa_to_delete->fa_info);
1780d5d6487cSAlexander Duyck 	alias_free_mem_rcu(fa_to_delete);
178119baf839SRobert Olsson 	return 0;
178219baf839SRobert Olsson }
178319baf839SRobert Olsson 
17848be33e95SAlexander Duyck /* Scan for the next leaf starting at the provided key value */
leaf_walk_rcu(struct key_vector ** tn,t_key key)178535c6edacSAlexander Duyck static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
178619baf839SRobert Olsson {
178735c6edacSAlexander Duyck 	struct key_vector *pn, *n = *tn;
17888be33e95SAlexander Duyck 	unsigned long cindex;
178919baf839SRobert Olsson 
17908be33e95SAlexander Duyck 	/* this loop is meant to try and find the key in the trie */
179188bae714SAlexander Duyck 	do {
179288bae714SAlexander Duyck 		/* record parent and next child index */
179388bae714SAlexander Duyck 		pn = n;
1794c2229fe1SAlexander Duyck 		cindex = (key > pn->key) ? get_index(key, pn) : 0;
179588bae714SAlexander Duyck 
179688bae714SAlexander Duyck 		if (cindex >> pn->bits)
179788bae714SAlexander Duyck 			break;
179888bae714SAlexander Duyck 
179988bae714SAlexander Duyck 		/* descend into the next child */
180088bae714SAlexander Duyck 		n = get_child_rcu(pn, cindex++);
180188bae714SAlexander Duyck 		if (!n)
180288bae714SAlexander Duyck 			break;
18038be33e95SAlexander Duyck 
18048be33e95SAlexander Duyck 		/* guarantee forward progress on the keys */
18058be33e95SAlexander Duyck 		if (IS_LEAF(n) && (n->key >= key))
18068be33e95SAlexander Duyck 			goto found;
180788bae714SAlexander Duyck 	} while (IS_TNODE(n));
18088be33e95SAlexander Duyck 
18098be33e95SAlexander Duyck 	/* this loop will search for the next leaf with a greater key */
181088bae714SAlexander Duyck 	while (!IS_TRIE(pn)) {
18118be33e95SAlexander Duyck 		/* if we exhausted the parent node we will need to climb */
18128be33e95SAlexander Duyck 		if (cindex >= (1ul << pn->bits)) {
18138be33e95SAlexander Duyck 			t_key pkey = pn->key;
18148be33e95SAlexander Duyck 
18158be33e95SAlexander Duyck 			pn = node_parent_rcu(pn);
18168be33e95SAlexander Duyck 			cindex = get_index(pkey, pn) + 1;
18178be33e95SAlexander Duyck 			continue;
18188be33e95SAlexander Duyck 		}
18198be33e95SAlexander Duyck 
18208be33e95SAlexander Duyck 		/* grab the next available node */
1821754baf8dSAlexander Duyck 		n = get_child_rcu(pn, cindex++);
18228be33e95SAlexander Duyck 		if (!n)
182391b9a277SOlof Johansson 			continue;
182419baf839SRobert Olsson 
18258be33e95SAlexander Duyck 		/* no need to compare keys since we bumped the index */
18268be33e95SAlexander Duyck 		if (IS_LEAF(n))
18278be33e95SAlexander Duyck 			goto found;
182882cfbb00SStephen Hemminger 
182982cfbb00SStephen Hemminger 		/* Rescan start scanning in new node */
18308be33e95SAlexander Duyck 		pn = n;
18318be33e95SAlexander Duyck 		cindex = 0;
183219baf839SRobert Olsson 	}
183382cfbb00SStephen Hemminger 
18348be33e95SAlexander Duyck 	*tn = pn;
183582cfbb00SStephen Hemminger 	return NULL; /* Root of trie */
18368be33e95SAlexander Duyck found:
18378be33e95SAlexander Duyck 	/* if we are at the limit for keys just return NULL for the tnode */
183888bae714SAlexander Duyck 	*tn = pn;
1839adaf9816SAlexander Duyck 	return n;
184082cfbb00SStephen Hemminger }
184182cfbb00SStephen Hemminger 
fib_trie_free(struct fib_table * tb)18420ddcf43dSAlexander Duyck static void fib_trie_free(struct fib_table *tb)
18430ddcf43dSAlexander Duyck {
18440ddcf43dSAlexander Duyck 	struct trie *t = (struct trie *)tb->tb_data;
18450ddcf43dSAlexander Duyck 	struct key_vector *pn = t->kv;
18460ddcf43dSAlexander Duyck 	unsigned long cindex = 1;
18470ddcf43dSAlexander Duyck 	struct hlist_node *tmp;
18480ddcf43dSAlexander Duyck 	struct fib_alias *fa;
18490ddcf43dSAlexander Duyck 
18500ddcf43dSAlexander Duyck 	/* walk trie in reverse order and free everything */
18510ddcf43dSAlexander Duyck 	for (;;) {
18520ddcf43dSAlexander Duyck 		struct key_vector *n;
18530ddcf43dSAlexander Duyck 
18540ddcf43dSAlexander Duyck 		if (!(cindex--)) {
18550ddcf43dSAlexander Duyck 			t_key pkey = pn->key;
18560ddcf43dSAlexander Duyck 
18570ddcf43dSAlexander Duyck 			if (IS_TRIE(pn))
18580ddcf43dSAlexander Duyck 				break;
18590ddcf43dSAlexander Duyck 
18600ddcf43dSAlexander Duyck 			n = pn;
18610ddcf43dSAlexander Duyck 			pn = node_parent(pn);
18620ddcf43dSAlexander Duyck 
18630ddcf43dSAlexander Duyck 			/* drop emptied tnode */
18640ddcf43dSAlexander Duyck 			put_child_root(pn, n->key, NULL);
18650ddcf43dSAlexander Duyck 			node_free(n);
18660ddcf43dSAlexander Duyck 
18670ddcf43dSAlexander Duyck 			cindex = get_index(pkey, pn);
18680ddcf43dSAlexander Duyck 
18690ddcf43dSAlexander Duyck 			continue;
18700ddcf43dSAlexander Duyck 		}
18710ddcf43dSAlexander Duyck 
18720ddcf43dSAlexander Duyck 		/* grab the next available node */
18730ddcf43dSAlexander Duyck 		n = get_child(pn, cindex);
18740ddcf43dSAlexander Duyck 		if (!n)
18750ddcf43dSAlexander Duyck 			continue;
18760ddcf43dSAlexander Duyck 
18770ddcf43dSAlexander Duyck 		if (IS_TNODE(n)) {
18780ddcf43dSAlexander Duyck 			/* record pn and cindex for leaf walking */
18790ddcf43dSAlexander Duyck 			pn = n;
18800ddcf43dSAlexander Duyck 			cindex = 1ul << n->bits;
18810ddcf43dSAlexander Duyck 
18820ddcf43dSAlexander Duyck 			continue;
18830ddcf43dSAlexander Duyck 		}
18840ddcf43dSAlexander Duyck 
18850ddcf43dSAlexander Duyck 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
18860ddcf43dSAlexander Duyck 			hlist_del_rcu(&fa->fa_list);
18870ddcf43dSAlexander Duyck 			alias_free_mem_rcu(fa);
18880ddcf43dSAlexander Duyck 		}
18890ddcf43dSAlexander Duyck 
18900ddcf43dSAlexander Duyck 		put_child_root(pn, n->key, NULL);
18910ddcf43dSAlexander Duyck 		node_free(n);
18920ddcf43dSAlexander Duyck 	}
18930ddcf43dSAlexander Duyck 
18940ddcf43dSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
18950ddcf43dSAlexander Duyck 	free_percpu(t->stats);
18960ddcf43dSAlexander Duyck #endif
18970ddcf43dSAlexander Duyck 	kfree(tb);
18980ddcf43dSAlexander Duyck }
18990ddcf43dSAlexander Duyck 
fib_trie_unmerge(struct fib_table * oldtb)19000ddcf43dSAlexander Duyck struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
19010ddcf43dSAlexander Duyck {
19020ddcf43dSAlexander Duyck 	struct trie *ot = (struct trie *)oldtb->tb_data;
19030ddcf43dSAlexander Duyck 	struct key_vector *l, *tp = ot->kv;
19040ddcf43dSAlexander Duyck 	struct fib_table *local_tb;
19050ddcf43dSAlexander Duyck 	struct fib_alias *fa;
19060ddcf43dSAlexander Duyck 	struct trie *lt;
19070ddcf43dSAlexander Duyck 	t_key key = 0;
19080ddcf43dSAlexander Duyck 
19090ddcf43dSAlexander Duyck 	if (oldtb->tb_data == oldtb->__data)
19100ddcf43dSAlexander Duyck 		return oldtb;
19110ddcf43dSAlexander Duyck 
19120ddcf43dSAlexander Duyck 	local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
19130ddcf43dSAlexander Duyck 	if (!local_tb)
19140ddcf43dSAlexander Duyck 		return NULL;
19150ddcf43dSAlexander Duyck 
19160ddcf43dSAlexander Duyck 	lt = (struct trie *)local_tb->tb_data;
19170ddcf43dSAlexander Duyck 
19180ddcf43dSAlexander Duyck 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
19190ddcf43dSAlexander Duyck 		struct key_vector *local_l = NULL, *local_tp;
19200ddcf43dSAlexander Duyck 
192183f35228SIdo Schimmel 		hlist_for_each_entry(fa, &l->leaf, fa_list) {
19220ddcf43dSAlexander Duyck 			struct fib_alias *new_fa;
19230ddcf43dSAlexander Duyck 
19240ddcf43dSAlexander Duyck 			if (local_tb->tb_id != fa->tb_id)
19250ddcf43dSAlexander Duyck 				continue;
19260ddcf43dSAlexander Duyck 
19270ddcf43dSAlexander Duyck 			/* clone fa for new local table */
19280ddcf43dSAlexander Duyck 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
19290ddcf43dSAlexander Duyck 			if (!new_fa)
19300ddcf43dSAlexander Duyck 				goto out;
19310ddcf43dSAlexander Duyck 
19320ddcf43dSAlexander Duyck 			memcpy(new_fa, fa, sizeof(*fa));
19330ddcf43dSAlexander Duyck 
19340ddcf43dSAlexander Duyck 			/* insert clone into table */
19350ddcf43dSAlexander Duyck 			if (!local_l)
19360ddcf43dSAlexander Duyck 				local_l = fib_find_node(lt, &local_tp, l->key);
19370ddcf43dSAlexander Duyck 
19380ddcf43dSAlexander Duyck 			if (fib_insert_alias(lt, local_tp, local_l, new_fa,
19393114cdfeSAlexander Duyck 					     NULL, l->key)) {
19403114cdfeSAlexander Duyck 				kmem_cache_free(fn_alias_kmem, new_fa);
19410ddcf43dSAlexander Duyck 				goto out;
19420ddcf43dSAlexander Duyck 			}
19433114cdfeSAlexander Duyck 		}
19440ddcf43dSAlexander Duyck 
19450ddcf43dSAlexander Duyck 		/* stop loop if key wrapped back to 0 */
19460ddcf43dSAlexander Duyck 		key = l->key + 1;
19470ddcf43dSAlexander Duyck 		if (key < l->key)
19480ddcf43dSAlexander Duyck 			break;
19490ddcf43dSAlexander Duyck 	}
19500ddcf43dSAlexander Duyck 
19510ddcf43dSAlexander Duyck 	return local_tb;
19520ddcf43dSAlexander Duyck out:
19530ddcf43dSAlexander Duyck 	fib_trie_free(local_tb);
19540ddcf43dSAlexander Duyck 
19550ddcf43dSAlexander Duyck 	return NULL;
19560ddcf43dSAlexander Duyck }
19570ddcf43dSAlexander Duyck 
19583b709334SAlexander Duyck /* Caller must hold RTNL */
fib_table_flush_external(struct fib_table * tb)19593b709334SAlexander Duyck void fib_table_flush_external(struct fib_table *tb)
19603b709334SAlexander Duyck {
19613b709334SAlexander Duyck 	struct trie *t = (struct trie *)tb->tb_data;
19623b709334SAlexander Duyck 	struct key_vector *pn = t->kv;
19633b709334SAlexander Duyck 	unsigned long cindex = 1;
19643b709334SAlexander Duyck 	struct hlist_node *tmp;
19653b709334SAlexander Duyck 	struct fib_alias *fa;
19663b709334SAlexander Duyck 
19673b709334SAlexander Duyck 	/* walk trie in reverse order */
19683b709334SAlexander Duyck 	for (;;) {
19693b709334SAlexander Duyck 		unsigned char slen = 0;
19703b709334SAlexander Duyck 		struct key_vector *n;
19713b709334SAlexander Duyck 
19723b709334SAlexander Duyck 		if (!(cindex--)) {
19733b709334SAlexander Duyck 			t_key pkey = pn->key;
19743b709334SAlexander Duyck 
19753b709334SAlexander Duyck 			/* cannot resize the trie vector */
19763b709334SAlexander Duyck 			if (IS_TRIE(pn))
19773b709334SAlexander Duyck 				break;
19783b709334SAlexander Duyck 
1979a52ca62cSAlexander Duyck 			/* update the suffix to address pulled leaves */
1980a52ca62cSAlexander Duyck 			if (pn->slen > pn->pos)
1981a52ca62cSAlexander Duyck 				update_suffix(pn);
1982a52ca62cSAlexander Duyck 
19833b709334SAlexander Duyck 			/* resize completed node */
19843b709334SAlexander Duyck 			pn = resize(t, pn);
19853b709334SAlexander Duyck 			cindex = get_index(pkey, pn);
19863b709334SAlexander Duyck 
19873b709334SAlexander Duyck 			continue;
19883b709334SAlexander Duyck 		}
19893b709334SAlexander Duyck 
19903b709334SAlexander Duyck 		/* grab the next available node */
19913b709334SAlexander Duyck 		n = get_child(pn, cindex);
19923b709334SAlexander Duyck 		if (!n)
19933b709334SAlexander Duyck 			continue;
19943b709334SAlexander Duyck 
19953b709334SAlexander Duyck 		if (IS_TNODE(n)) {
19963b709334SAlexander Duyck 			/* record pn and cindex for leaf walking */
19973b709334SAlexander Duyck 			pn = n;
19983b709334SAlexander Duyck 			cindex = 1ul << n->bits;
19993b709334SAlexander Duyck 
20003b709334SAlexander Duyck 			continue;
20013b709334SAlexander Duyck 		}
20023b709334SAlexander Duyck 
20033b709334SAlexander Duyck 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
20043b709334SAlexander Duyck 			/* if alias was cloned to local then we just
20053b709334SAlexander Duyck 			 * need to remove the local copy from main
20063b709334SAlexander Duyck 			 */
20073b709334SAlexander Duyck 			if (tb->tb_id != fa->tb_id) {
20083b709334SAlexander Duyck 				hlist_del_rcu(&fa->fa_list);
20093b709334SAlexander Duyck 				alias_free_mem_rcu(fa);
20103b709334SAlexander Duyck 				continue;
20113b709334SAlexander Duyck 			}
20123b709334SAlexander Duyck 
20133b709334SAlexander Duyck 			/* record local slen */
20143b709334SAlexander Duyck 			slen = fa->fa_slen;
20153b709334SAlexander Duyck 		}
20163b709334SAlexander Duyck 
20173b709334SAlexander Duyck 		/* update leaf slen */
20183b709334SAlexander Duyck 		n->slen = slen;
20193b709334SAlexander Duyck 
20203b709334SAlexander Duyck 		if (hlist_empty(&n->leaf)) {
20213b709334SAlexander Duyck 			put_child_root(pn, n->key, NULL);
20223b709334SAlexander Duyck 			node_free(n);
20233b709334SAlexander Duyck 		}
20243b709334SAlexander Duyck 	}
20253b709334SAlexander Duyck }
20263b709334SAlexander Duyck 
20278be33e95SAlexander Duyck /* Caller must hold RTNL. */
fib_table_flush(struct net * net,struct fib_table * tb,bool flush_all)2028f97f4dd8SIdo Schimmel int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
202919baf839SRobert Olsson {
203019baf839SRobert Olsson 	struct trie *t = (struct trie *)tb->tb_data;
20314b2b6060SHangbin Liu 	struct nl_info info = { .nl_net = net };
203288bae714SAlexander Duyck 	struct key_vector *pn = t->kv;
203388bae714SAlexander Duyck 	unsigned long cindex = 1;
20347289e6ddSAlexander Duyck 	struct hlist_node *tmp;
20357289e6ddSAlexander Duyck 	struct fib_alias *fa;
203682cfbb00SStephen Hemminger 	int found = 0;
203719baf839SRobert Olsson 
20387289e6ddSAlexander Duyck 	/* walk trie in reverse order */
203988bae714SAlexander Duyck 	for (;;) {
204088bae714SAlexander Duyck 		unsigned char slen = 0;
204188bae714SAlexander Duyck 		struct key_vector *n;
204288bae714SAlexander Duyck 
204388bae714SAlexander Duyck 		if (!(cindex--)) {
20447289e6ddSAlexander Duyck 			t_key pkey = pn->key;
20457289e6ddSAlexander Duyck 
204688bae714SAlexander Duyck 			/* cannot resize the trie vector */
204788bae714SAlexander Duyck 			if (IS_TRIE(pn))
204888bae714SAlexander Duyck 				break;
20497289e6ddSAlexander Duyck 
2050a52ca62cSAlexander Duyck 			/* update the suffix to address pulled leaves */
2051a52ca62cSAlexander Duyck 			if (pn->slen > pn->pos)
2052a52ca62cSAlexander Duyck 				update_suffix(pn);
2053a52ca62cSAlexander Duyck 
20547289e6ddSAlexander Duyck 			/* resize completed node */
205588bae714SAlexander Duyck 			pn = resize(t, pn);
20567289e6ddSAlexander Duyck 			cindex = get_index(pkey, pn);
205788bae714SAlexander Duyck 
205888bae714SAlexander Duyck 			continue;
205964c62723SAlexander Duyck 		}
206064c62723SAlexander Duyck 
20617289e6ddSAlexander Duyck 		/* grab the next available node */
2062754baf8dSAlexander Duyck 		n = get_child(pn, cindex);
206388bae714SAlexander Duyck 		if (!n)
206488bae714SAlexander Duyck 			continue;
206519baf839SRobert Olsson 
206688bae714SAlexander Duyck 		if (IS_TNODE(n)) {
206788bae714SAlexander Duyck 			/* record pn and cindex for leaf walking */
206888bae714SAlexander Duyck 			pn = n;
206988bae714SAlexander Duyck 			cindex = 1ul << n->bits;
207088bae714SAlexander Duyck 
207188bae714SAlexander Duyck 			continue;
207288bae714SAlexander Duyck 		}
20737289e6ddSAlexander Duyck 
20747289e6ddSAlexander Duyck 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
20757289e6ddSAlexander Duyck 			struct fib_info *fi = fa->fa_info;
20767289e6ddSAlexander Duyck 
2077f97f4dd8SIdo Schimmel 			if (!fi || tb->tb_id != fa->tb_id ||
2078f97f4dd8SIdo Schimmel 			    (!(fi->fib_flags & RTNH_F_DEAD) &&
2079f97f4dd8SIdo Schimmel 			     !fib_props[fa->fa_type].error)) {
2080f97f4dd8SIdo Schimmel 				slen = fa->fa_slen;
2081f97f4dd8SIdo Schimmel 				continue;
2082f97f4dd8SIdo Schimmel 			}
2083f97f4dd8SIdo Schimmel 
2084f97f4dd8SIdo Schimmel 			/* Do not flush error routes if network namespace is
2085f97f4dd8SIdo Schimmel 			 * not being dismantled
2086f97f4dd8SIdo Schimmel 			 */
2087f97f4dd8SIdo Schimmel 			if (!flush_all && fib_props[fa->fa_type].error) {
208888bae714SAlexander Duyck 				slen = fa->fa_slen;
208988bae714SAlexander Duyck 				continue;
209088bae714SAlexander Duyck 			}
209188bae714SAlexander Duyck 
2092525bc345SIdo Schimmel 			fib_notify_alias_delete(net, n->key, &n->leaf, fa,
2093525bc345SIdo Schimmel 						NULL);
20944b2b6060SHangbin Liu 			if (fi->pfsrc_removed)
20954b2b6060SHangbin Liu 				rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa,
20964b2b6060SHangbin Liu 					  KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0);
20977289e6ddSAlexander Duyck 			hlist_del_rcu(&fa->fa_list);
20987289e6ddSAlexander Duyck 			fib_release_info(fa->fa_info);
20997289e6ddSAlexander Duyck 			alias_free_mem_rcu(fa);
21007289e6ddSAlexander Duyck 			found++;
21017289e6ddSAlexander Duyck 		}
21027289e6ddSAlexander Duyck 
21037289e6ddSAlexander Duyck 		/* update leaf slen */
21047289e6ddSAlexander Duyck 		n->slen = slen;
21057289e6ddSAlexander Duyck 
21067289e6ddSAlexander Duyck 		if (hlist_empty(&n->leaf)) {
210788bae714SAlexander Duyck 			put_child_root(pn, n->key, NULL);
21087289e6ddSAlexander Duyck 			node_free(n);
21097289e6ddSAlexander Duyck 		}
211088bae714SAlexander Duyck 	}
21117289e6ddSAlexander Duyck 
21120c7770c7SStephen Hemminger 	pr_debug("trie_flush found=%d\n", found);
211319baf839SRobert Olsson 	return found;
211419baf839SRobert Olsson }
211519baf839SRobert Olsson 
21161bff1a0cSDavid Ahern /* derived from fib_trie_free */
__fib_info_notify_update(struct net * net,struct fib_table * tb,struct nl_info * info)21171bff1a0cSDavid Ahern static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
21181bff1a0cSDavid Ahern 				     struct nl_info *info)
21191bff1a0cSDavid Ahern {
21201bff1a0cSDavid Ahern 	struct trie *t = (struct trie *)tb->tb_data;
21211bff1a0cSDavid Ahern 	struct key_vector *pn = t->kv;
21221bff1a0cSDavid Ahern 	unsigned long cindex = 1;
21231bff1a0cSDavid Ahern 	struct fib_alias *fa;
21241bff1a0cSDavid Ahern 
21251bff1a0cSDavid Ahern 	for (;;) {
21261bff1a0cSDavid Ahern 		struct key_vector *n;
21271bff1a0cSDavid Ahern 
21281bff1a0cSDavid Ahern 		if (!(cindex--)) {
21291bff1a0cSDavid Ahern 			t_key pkey = pn->key;
21301bff1a0cSDavid Ahern 
21311bff1a0cSDavid Ahern 			if (IS_TRIE(pn))
21321bff1a0cSDavid Ahern 				break;
21331bff1a0cSDavid Ahern 
21341bff1a0cSDavid Ahern 			pn = node_parent(pn);
21351bff1a0cSDavid Ahern 			cindex = get_index(pkey, pn);
21361bff1a0cSDavid Ahern 			continue;
21371bff1a0cSDavid Ahern 		}
21381bff1a0cSDavid Ahern 
21391bff1a0cSDavid Ahern 		/* grab the next available node */
21401bff1a0cSDavid Ahern 		n = get_child(pn, cindex);
21411bff1a0cSDavid Ahern 		if (!n)
21421bff1a0cSDavid Ahern 			continue;
21431bff1a0cSDavid Ahern 
21441bff1a0cSDavid Ahern 		if (IS_TNODE(n)) {
21451bff1a0cSDavid Ahern 			/* record pn and cindex for leaf walking */
21461bff1a0cSDavid Ahern 			pn = n;
21471bff1a0cSDavid Ahern 			cindex = 1ul << n->bits;
21481bff1a0cSDavid Ahern 
21491bff1a0cSDavid Ahern 			continue;
21501bff1a0cSDavid Ahern 		}
21511bff1a0cSDavid Ahern 
21521bff1a0cSDavid Ahern 		hlist_for_each_entry(fa, &n->leaf, fa_list) {
21531bff1a0cSDavid Ahern 			struct fib_info *fi = fa->fa_info;
21541bff1a0cSDavid Ahern 
21551bff1a0cSDavid Ahern 			if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
21561bff1a0cSDavid Ahern 				continue;
21571bff1a0cSDavid Ahern 
21581bff1a0cSDavid Ahern 			rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
21591bff1a0cSDavid Ahern 				  KEYLENGTH - fa->fa_slen, tb->tb_id,
21601bff1a0cSDavid Ahern 				  info, NLM_F_REPLACE);
21611bff1a0cSDavid Ahern 		}
21621bff1a0cSDavid Ahern 	}
21631bff1a0cSDavid Ahern }
21641bff1a0cSDavid Ahern 
fib_info_notify_update(struct net * net,struct nl_info * info)21651bff1a0cSDavid Ahern void fib_info_notify_update(struct net *net, struct nl_info *info)
21661bff1a0cSDavid Ahern {
21671bff1a0cSDavid Ahern 	unsigned int h;
21681bff1a0cSDavid Ahern 
21691bff1a0cSDavid Ahern 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
21701bff1a0cSDavid Ahern 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
21711bff1a0cSDavid Ahern 		struct fib_table *tb;
21721bff1a0cSDavid Ahern 
21737f6f32bbSIdo Schimmel 		hlist_for_each_entry_rcu(tb, head, tb_hlist,
21747f6f32bbSIdo Schimmel 					 lockdep_rtnl_is_held())
21751bff1a0cSDavid Ahern 			__fib_info_notify_update(net, tb, info);
21761bff1a0cSDavid Ahern 	}
21771bff1a0cSDavid Ahern }
21781bff1a0cSDavid Ahern 
fib_leaf_notify(struct key_vector * l,struct fib_table * tb,struct notifier_block * nb,struct netlink_ext_ack * extack)217955c894f7SJiri Pirko static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
2180b7a59557SJiri Pirko 			   struct notifier_block *nb,
2181b7a59557SJiri Pirko 			   struct netlink_ext_ack *extack)
2182c3852ef7SIdo Schimmel {
2183c3852ef7SIdo Schimmel 	struct fib_alias *fa;
218420d15652SIdo Schimmel 	int last_slen = -1;
218555c894f7SJiri Pirko 	int err;
2186c3852ef7SIdo Schimmel 
2187c3852ef7SIdo Schimmel 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2188c3852ef7SIdo Schimmel 		struct fib_info *fi = fa->fa_info;
2189c3852ef7SIdo Schimmel 
2190c3852ef7SIdo Schimmel 		if (!fi)
2191c3852ef7SIdo Schimmel 			continue;
2192c3852ef7SIdo Schimmel 
2193c3852ef7SIdo Schimmel 		/* local and main table can share the same trie,
2194c3852ef7SIdo Schimmel 		 * so don't notify twice for the same entry.
2195c3852ef7SIdo Schimmel 		 */
2196c3852ef7SIdo Schimmel 		if (tb->tb_id != fa->tb_id)
2197c3852ef7SIdo Schimmel 			continue;
2198c3852ef7SIdo Schimmel 
219920d15652SIdo Schimmel 		if (fa->fa_slen == last_slen)
220020d15652SIdo Schimmel 			continue;
220120d15652SIdo Schimmel 
220220d15652SIdo Schimmel 		last_slen = fa->fa_slen;
2203446f7391SIdo Schimmel 		err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
220420d15652SIdo Schimmel 					      l->key, KEYLENGTH - fa->fa_slen,
220520d15652SIdo Schimmel 					      fa, extack);
220620d15652SIdo Schimmel 		if (err)
220720d15652SIdo Schimmel 			return err;
2208c3852ef7SIdo Schimmel 	}
220955c894f7SJiri Pirko 	return 0;
2210c3852ef7SIdo Schimmel }
2211c3852ef7SIdo Schimmel 
fib_table_notify(struct fib_table * tb,struct notifier_block * nb,struct netlink_ext_ack * extack)2212b7a59557SJiri Pirko static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
2213b7a59557SJiri Pirko 			    struct netlink_ext_ack *extack)
2214c3852ef7SIdo Schimmel {
2215c3852ef7SIdo Schimmel 	struct trie *t = (struct trie *)tb->tb_data;
2216c3852ef7SIdo Schimmel 	struct key_vector *l, *tp = t->kv;
2217c3852ef7SIdo Schimmel 	t_key key = 0;
221855c894f7SJiri Pirko 	int err;
2219c3852ef7SIdo Schimmel 
2220c3852ef7SIdo Schimmel 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2221b7a59557SJiri Pirko 		err = fib_leaf_notify(l, tb, nb, extack);
222255c894f7SJiri Pirko 		if (err)
222355c894f7SJiri Pirko 			return err;
2224c3852ef7SIdo Schimmel 
2225c3852ef7SIdo Schimmel 		key = l->key + 1;
2226c3852ef7SIdo Schimmel 		/* stop in case of wrap around */
2227c3852ef7SIdo Schimmel 		if (key < l->key)
2228c3852ef7SIdo Schimmel 			break;
2229c3852ef7SIdo Schimmel 	}
223055c894f7SJiri Pirko 	return 0;
2231c3852ef7SIdo Schimmel }
2232c3852ef7SIdo Schimmel 
fib_notify(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)2233b7a59557SJiri Pirko int fib_notify(struct net *net, struct notifier_block *nb,
2234b7a59557SJiri Pirko 	       struct netlink_ext_ack *extack)
2235c3852ef7SIdo Schimmel {
2236c3852ef7SIdo Schimmel 	unsigned int h;
223755c894f7SJiri Pirko 	int err;
2238c3852ef7SIdo Schimmel 
2239c3852ef7SIdo Schimmel 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2240c3852ef7SIdo Schimmel 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2241c3852ef7SIdo Schimmel 		struct fib_table *tb;
2242c3852ef7SIdo Schimmel 
224355c894f7SJiri Pirko 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2244b7a59557SJiri Pirko 			err = fib_table_notify(tb, nb, extack);
224555c894f7SJiri Pirko 			if (err)
224655c894f7SJiri Pirko 				return err;
2247c3852ef7SIdo Schimmel 		}
2248c3852ef7SIdo Schimmel 	}
224955c894f7SJiri Pirko 	return 0;
225055c894f7SJiri Pirko }
2251c3852ef7SIdo Schimmel 
__trie_free_rcu(struct rcu_head * head)2252a7e53531SAlexander Duyck static void __trie_free_rcu(struct rcu_head *head)
22534aa2c466SPavel Emelyanov {
2254a7e53531SAlexander Duyck 	struct fib_table *tb = container_of(head, struct fib_table, rcu);
22558274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
22568274a97aSAlexander Duyck 	struct trie *t = (struct trie *)tb->tb_data;
22578274a97aSAlexander Duyck 
22580ddcf43dSAlexander Duyck 	if (tb->tb_data == tb->__data)
22598274a97aSAlexander Duyck 		free_percpu(t->stats);
22608274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */
22614aa2c466SPavel Emelyanov 	kfree(tb);
22624aa2c466SPavel Emelyanov }
22634aa2c466SPavel Emelyanov 
fib_free_table(struct fib_table * tb)2264a7e53531SAlexander Duyck void fib_free_table(struct fib_table *tb)
2265a7e53531SAlexander Duyck {
2266a7e53531SAlexander Duyck 	call_rcu(&tb->rcu, __trie_free_rcu);
2267a7e53531SAlexander Duyck }
2268a7e53531SAlexander Duyck 
fn_trie_dump_leaf(struct key_vector * l,struct fib_table * tb,struct sk_buff * skb,struct netlink_callback * cb,struct fib_dump_filter * filter)226935c6edacSAlexander Duyck static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
227018a8021aSDavid Ahern 			     struct sk_buff *skb, struct netlink_callback *cb,
227118a8021aSDavid Ahern 			     struct fib_dump_filter *filter)
227219baf839SRobert Olsson {
227318a8021aSDavid Ahern 	unsigned int flags = NLM_F_MULTI;
227479e5ad2cSAlexander Duyck 	__be32 xkey = htonl(l->key);
2275ee28906fSStefano Brivio 	int i, s_i, i_fa, s_fa, err;
227619baf839SRobert Olsson 	struct fib_alias *fa;
227719baf839SRobert Olsson 
2278ee28906fSStefano Brivio 	if (filter->filter_set ||
2279ee28906fSStefano Brivio 	    !filter->dump_exceptions || !filter->dump_routes)
228018a8021aSDavid Ahern 		flags |= NLM_F_DUMP_FILTERED;
228118a8021aSDavid Ahern 
228279e5ad2cSAlexander Duyck 	s_i = cb->args[4];
2283ee28906fSStefano Brivio 	s_fa = cb->args[5];
228419baf839SRobert Olsson 	i = 0;
228519baf839SRobert Olsson 
22862373ce1cSRobert Olsson 	/* rcu_read_lock is hold by caller */
228779e5ad2cSAlexander Duyck 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2288ee28906fSStefano Brivio 		struct fib_info *fi = fa->fa_info;
2289f6c5775fSDavid Ahern 
229018a8021aSDavid Ahern 		if (i < s_i)
229118a8021aSDavid Ahern 			goto next;
229219baf839SRobert Olsson 
2293ee28906fSStefano Brivio 		i_fa = 0;
2294ee28906fSStefano Brivio 
229518a8021aSDavid Ahern 		if (tb->tb_id != fa->tb_id)
229618a8021aSDavid Ahern 			goto next;
229718a8021aSDavid Ahern 
229818a8021aSDavid Ahern 		if (filter->filter_set) {
229918a8021aSDavid Ahern 			if (filter->rt_type && fa->fa_type != filter->rt_type)
230018a8021aSDavid Ahern 				goto next;
230118a8021aSDavid Ahern 
230218a8021aSDavid Ahern 			if ((filter->protocol &&
2303ee28906fSStefano Brivio 			     fi->fib_protocol != filter->protocol))
230418a8021aSDavid Ahern 				goto next;
230518a8021aSDavid Ahern 
230618a8021aSDavid Ahern 			if (filter->dev &&
2307ee28906fSStefano Brivio 			    !fib_info_nh_uses_dev(fi, filter->dev))
230818a8021aSDavid Ahern 				goto next;
23090ddcf43dSAlexander Duyck 		}
23100ddcf43dSAlexander Duyck 
2311885b8b4dSStefano Brivio 		if (filter->dump_routes) {
2312885b8b4dSStefano Brivio 			if (!s_fa) {
23131e301fd0SIdo Schimmel 				struct fib_rt_info fri;
23141e301fd0SIdo Schimmel 
23151e301fd0SIdo Schimmel 				fri.fi = fi;
23161e301fd0SIdo Schimmel 				fri.tb_id = tb->tb_id;
23171e301fd0SIdo Schimmel 				fri.dst = xkey;
23181e301fd0SIdo Schimmel 				fri.dst_len = KEYLENGTH - fa->fa_slen;
2319888ade8fSGuillaume Nault 				fri.dscp = fa->fa_dscp;
23201e301fd0SIdo Schimmel 				fri.type = fa->fa_type;
23219fcf986cSEric Dumazet 				fri.offload = READ_ONCE(fa->offload);
23229fcf986cSEric Dumazet 				fri.trap = READ_ONCE(fa->trap);
23239fcf986cSEric Dumazet 				fri.offload_failed = READ_ONCE(fa->offload_failed);
2324885b8b4dSStefano Brivio 				err = fib_dump_info(skb,
2325885b8b4dSStefano Brivio 						    NETLINK_CB(cb->skb).portid,
2326885b8b4dSStefano Brivio 						    cb->nlh->nlmsg_seq,
23271e301fd0SIdo Schimmel 						    RTM_NEWROUTE, &fri, flags);
2328ee28906fSStefano Brivio 				if (err < 0)
2329ee28906fSStefano Brivio 					goto stop;
2330885b8b4dSStefano Brivio 			}
2331885b8b4dSStefano Brivio 
2332ee28906fSStefano Brivio 			i_fa++;
233319baf839SRobert Olsson 		}
2334ee28906fSStefano Brivio 
2335ee28906fSStefano Brivio 		if (filter->dump_exceptions) {
2336ee28906fSStefano Brivio 			err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2337e93fb3e9SJohn Fastabend 						 &i_fa, s_fa, flags);
2338ee28906fSStefano Brivio 			if (err < 0)
2339ee28906fSStefano Brivio 				goto stop;
2340ee28906fSStefano Brivio 		}
2341ee28906fSStefano Brivio 
234218a8021aSDavid Ahern next:
2343a88ee229SStephen Hemminger 		i++;
234419baf839SRobert Olsson 	}
2345a88ee229SStephen Hemminger 
234671d67e66SStephen Hemminger 	cb->args[4] = i;
234719baf839SRobert Olsson 	return skb->len;
2348ee28906fSStefano Brivio 
2349ee28906fSStefano Brivio stop:
2350ee28906fSStefano Brivio 	cb->args[4] = i;
2351ee28906fSStefano Brivio 	cb->args[5] = i_fa;
2352ee28906fSStefano Brivio 	return err;
235319baf839SRobert Olsson }
235419baf839SRobert Olsson 
2355a7e53531SAlexander Duyck /* rcu_read_lock needs to be hold by caller from readside */
fib_table_dump(struct fib_table * tb,struct sk_buff * skb,struct netlink_callback * cb,struct fib_dump_filter * filter)235616c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
235718a8021aSDavid Ahern 		   struct netlink_callback *cb, struct fib_dump_filter *filter)
235819baf839SRobert Olsson {
235919baf839SRobert Olsson 	struct trie *t = (struct trie *)tb->tb_data;
236088bae714SAlexander Duyck 	struct key_vector *l, *tp = t->kv;
2361d5ce8a0eSStephen Hemminger 	/* Dump starting at last key.
2362d5ce8a0eSStephen Hemminger 	 * Note: 0.0.0.0/0 (ie default) is first key.
2363d5ce8a0eSStephen Hemminger 	 */
23648be33e95SAlexander Duyck 	int count = cb->args[2];
23658be33e95SAlexander Duyck 	t_key key = cb->args[3];
2366a88ee229SStephen Hemminger 
23679827c063SDavid Ahern 	/* First time here, count and key are both always 0. Count > 0
23689827c063SDavid Ahern 	 * and key == 0 means the dump has wrapped around and we are done.
23699827c063SDavid Ahern 	 */
23709827c063SDavid Ahern 	if (count && !key)
23719827c063SDavid Ahern 		return skb->len;
23729827c063SDavid Ahern 
23738be33e95SAlexander Duyck 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2374f6c5775fSDavid Ahern 		int err;
2375f6c5775fSDavid Ahern 
237618a8021aSDavid Ahern 		err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
2377f6c5775fSDavid Ahern 		if (err < 0) {
23788be33e95SAlexander Duyck 			cb->args[3] = key;
23798be33e95SAlexander Duyck 			cb->args[2] = count;
2380f6c5775fSDavid Ahern 			return err;
238119baf839SRobert Olsson 		}
2382d5ce8a0eSStephen Hemminger 
238371d67e66SStephen Hemminger 		++count;
23848be33e95SAlexander Duyck 		key = l->key + 1;
23858be33e95SAlexander Duyck 
238671d67e66SStephen Hemminger 		memset(&cb->args[4], 0,
238771d67e66SStephen Hemminger 		       sizeof(cb->args) - 4*sizeof(cb->args[0]));
23888be33e95SAlexander Duyck 
23898be33e95SAlexander Duyck 		/* stop loop if key wrapped back to 0 */
23908be33e95SAlexander Duyck 		if (key < l->key)
23918be33e95SAlexander Duyck 			break;
2392a88ee229SStephen Hemminger 	}
23938be33e95SAlexander Duyck 
23948be33e95SAlexander Duyck 	cb->args[3] = key;
23958be33e95SAlexander Duyck 	cb->args[2] = count;
23968be33e95SAlexander Duyck 
2397a88ee229SStephen Hemminger 	return skb->len;
2398a88ee229SStephen Hemminger }
239919baf839SRobert Olsson 
fib_trie_init(void)24005348ba85SDavid S. Miller void __init fib_trie_init(void)
24017f9b8052SStephen Hemminger {
2402a07f5f50SStephen Hemminger 	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2403a07f5f50SStephen Hemminger 					  sizeof(struct fib_alias),
24046126891cSVasily Averin 					  0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
2405bc3c8c1eSStephen Hemminger 
2406bc3c8c1eSStephen Hemminger 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
240741b489fdSAlexander Duyck 					   LEAF_SIZE,
24086126891cSVasily Averin 					   0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
24097f9b8052SStephen Hemminger }
241019baf839SRobert Olsson 
fib_trie_table(u32 id,struct fib_table * alias)24110ddcf43dSAlexander Duyck struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
241219baf839SRobert Olsson {
241319baf839SRobert Olsson 	struct fib_table *tb;
241419baf839SRobert Olsson 	struct trie *t;
24150ddcf43dSAlexander Duyck 	size_t sz = sizeof(*tb);
241619baf839SRobert Olsson 
24170ddcf43dSAlexander Duyck 	if (!alias)
24180ddcf43dSAlexander Duyck 		sz += sizeof(struct trie);
24190ddcf43dSAlexander Duyck 
24200ddcf43dSAlexander Duyck 	tb = kzalloc(sz, GFP_KERNEL);
242151456b29SIan Morris 	if (!tb)
242219baf839SRobert Olsson 		return NULL;
242319baf839SRobert Olsson 
242419baf839SRobert Olsson 	tb->tb_id = id;
242521d8c49eSDavid S. Miller 	tb->tb_num_default = 0;
24260ddcf43dSAlexander Duyck 	tb->tb_data = (alias ? alias->__data : tb->__data);
24270ddcf43dSAlexander Duyck 
24280ddcf43dSAlexander Duyck 	if (alias)
24290ddcf43dSAlexander Duyck 		return tb;
243019baf839SRobert Olsson 
243119baf839SRobert Olsson 	t = (struct trie *) tb->tb_data;
243288bae714SAlexander Duyck 	t->kv[0].pos = KEYLENGTH;
243388bae714SAlexander Duyck 	t->kv[0].slen = KEYLENGTH;
24348274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
24358274a97aSAlexander Duyck 	t->stats = alloc_percpu(struct trie_use_stats);
24368274a97aSAlexander Duyck 	if (!t->stats) {
24378274a97aSAlexander Duyck 		kfree(tb);
24388274a97aSAlexander Duyck 		tb = NULL;
24398274a97aSAlexander Duyck 	}
24408274a97aSAlexander Duyck #endif
244119baf839SRobert Olsson 
244219baf839SRobert Olsson 	return tb;
244319baf839SRobert Olsson }
244419baf839SRobert Olsson 
2445cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS
2446cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */
2447cb7b593cSStephen Hemminger struct fib_trie_iter {
24481c340b2fSDenis V. Lunev 	struct seq_net_private p;
24493d3b2d25SStephen Hemminger 	struct fib_table *tb;
245035c6edacSAlexander Duyck 	struct key_vector *tnode;
2451a034ee3cSEric Dumazet 	unsigned int index;
2452a034ee3cSEric Dumazet 	unsigned int depth;
2453cb7b593cSStephen Hemminger };
245419baf839SRobert Olsson 
fib_trie_get_next(struct fib_trie_iter * iter)245535c6edacSAlexander Duyck static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
245619baf839SRobert Olsson {
245798293e8dSAlexander Duyck 	unsigned long cindex = iter->index;
245888bae714SAlexander Duyck 	struct key_vector *pn = iter->tnode;
245988bae714SAlexander Duyck 	t_key pkey;
24606640e697SEric W. Biederman 
2461cb7b593cSStephen Hemminger 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2462cb7b593cSStephen Hemminger 		 iter->tnode, iter->index, iter->depth);
246319baf839SRobert Olsson 
246488bae714SAlexander Duyck 	while (!IS_TRIE(pn)) {
246588bae714SAlexander Duyck 		while (cindex < child_length(pn)) {
246688bae714SAlexander Duyck 			struct key_vector *n = get_child_rcu(pn, cindex++);
246788bae714SAlexander Duyck 
246888bae714SAlexander Duyck 			if (!n)
246988bae714SAlexander Duyck 				continue;
247088bae714SAlexander Duyck 
247119baf839SRobert Olsson 			if (IS_LEAF(n)) {
247288bae714SAlexander Duyck 				iter->tnode = pn;
247388bae714SAlexander Duyck 				iter->index = cindex;
247491b9a277SOlof Johansson 			} else {
2475cb7b593cSStephen Hemminger 				/* push down one level */
2476adaf9816SAlexander Duyck 				iter->tnode = n;
2477cb7b593cSStephen Hemminger 				iter->index = 0;
2478cb7b593cSStephen Hemminger 				++iter->depth;
247919baf839SRobert Olsson 			}
248088bae714SAlexander Duyck 
2481cb7b593cSStephen Hemminger 			return n;
248219baf839SRobert Olsson 		}
248319baf839SRobert Olsson 
2484cb7b593cSStephen Hemminger 		/* Current node exhausted, pop back up */
248588bae714SAlexander Duyck 		pkey = pn->key;
248688bae714SAlexander Duyck 		pn = node_parent_rcu(pn);
248788bae714SAlexander Duyck 		cindex = get_index(pkey, pn) + 1;
2488cb7b593cSStephen Hemminger 		--iter->depth;
2489cb7b593cSStephen Hemminger 	}
2490cb7b593cSStephen Hemminger 
249188bae714SAlexander Duyck 	/* record root node so further searches know we are done */
249288bae714SAlexander Duyck 	iter->tnode = pn;
249388bae714SAlexander Duyck 	iter->index = 0;
249488bae714SAlexander Duyck 
2495cb7b593cSStephen Hemminger 	return NULL;
2496cb7b593cSStephen Hemminger }
2497cb7b593cSStephen Hemminger 
fib_trie_get_first(struct fib_trie_iter * iter,struct trie * t)249835c6edacSAlexander Duyck static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2499cb7b593cSStephen Hemminger 					     struct trie *t)
2500cb7b593cSStephen Hemminger {
2501f38b24c9SFiro Yang 	struct key_vector *n, *pn;
25025ddf0eb2SRobert Olsson 
25035ddf0eb2SRobert Olsson 	if (!t)
25045ddf0eb2SRobert Olsson 		return NULL;
25055ddf0eb2SRobert Olsson 
2506f38b24c9SFiro Yang 	pn = t->kv;
250788bae714SAlexander Duyck 	n = rcu_dereference(pn->tnode[0]);
25083d3b2d25SStephen Hemminger 	if (!n)
25095ddf0eb2SRobert Olsson 		return NULL;
2510cb7b593cSStephen Hemminger 
25116640e697SEric W. Biederman 	if (IS_TNODE(n)) {
2512adaf9816SAlexander Duyck 		iter->tnode = n;
2513cb7b593cSStephen Hemminger 		iter->index = 0;
25141d25cd6cSRobert Olsson 		iter->depth = 1;
25156640e697SEric W. Biederman 	} else {
251688bae714SAlexander Duyck 		iter->tnode = pn;
25176640e697SEric W. Biederman 		iter->index = 0;
25186640e697SEric W. Biederman 		iter->depth = 0;
25196640e697SEric W. Biederman 	}
25203d3b2d25SStephen Hemminger 
2521cb7b593cSStephen Hemminger 	return n;
2522cb7b593cSStephen Hemminger }
2523cb7b593cSStephen Hemminger 
trie_collect_stats(struct trie * t,struct trie_stat * s)2524cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s)
252519baf839SRobert Olsson {
252635c6edacSAlexander Duyck 	struct key_vector *n;
2527cb7b593cSStephen Hemminger 	struct fib_trie_iter iter;
2528cb7b593cSStephen Hemminger 
2529cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
253019baf839SRobert Olsson 
25312373ce1cSRobert Olsson 	rcu_read_lock();
25323d3b2d25SStephen Hemminger 	for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2533cb7b593cSStephen Hemminger 		if (IS_LEAF(n)) {
253479e5ad2cSAlexander Duyck 			struct fib_alias *fa;
253593672292SStephen Hemminger 
253619baf839SRobert Olsson 			s->leaves++;
2537cb7b593cSStephen Hemminger 			s->totdepth += iter.depth;
2538cb7b593cSStephen Hemminger 			if (iter.depth > s->maxdepth)
2539cb7b593cSStephen Hemminger 				s->maxdepth = iter.depth;
254093672292SStephen Hemminger 
254179e5ad2cSAlexander Duyck 			hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
254293672292SStephen Hemminger 				++s->prefixes;
254391b9a277SOlof Johansson 		} else {
254419baf839SRobert Olsson 			s->tnodes++;
2545adaf9816SAlexander Duyck 			if (n->bits < MAX_STAT_DEPTH)
2546adaf9816SAlexander Duyck 				s->nodesizes[n->bits]++;
25476e22d174SAlexander Duyck 			s->nullpointers += tn_info(n)->empty_children;
254819baf839SRobert Olsson 		}
254998293e8dSAlexander Duyck 	}
25502373ce1cSRobert Olsson 	rcu_read_unlock();
255119baf839SRobert Olsson }
255219baf839SRobert Olsson 
255319baf839SRobert Olsson /*
255419baf839SRobert Olsson  *	This outputs /proc/net/fib_triestats
255519baf839SRobert Olsson  */
trie_show_stats(struct seq_file * seq,struct trie_stat * stat)2556cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
255719baf839SRobert Olsson {
2558a034ee3cSEric Dumazet 	unsigned int i, max, pointers, bytes, avdepth;
255919baf839SRobert Olsson 
256019baf839SRobert Olsson 	if (stat->leaves)
256119baf839SRobert Olsson 		avdepth = stat->totdepth*100 / stat->leaves;
256219baf839SRobert Olsson 	else
256319baf839SRobert Olsson 		avdepth = 0;
256419baf839SRobert Olsson 
2565a07f5f50SStephen Hemminger 	seq_printf(seq, "\tAver depth:     %u.%02d\n",
2566a07f5f50SStephen Hemminger 		   avdepth / 100, avdepth % 100);
2567cb7b593cSStephen Hemminger 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
2568cb7b593cSStephen Hemminger 
2569cb7b593cSStephen Hemminger 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
257041b489fdSAlexander Duyck 	bytes = LEAF_SIZE * stat->leaves;
257193672292SStephen Hemminger 
257293672292SStephen Hemminger 	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
257379e5ad2cSAlexander Duyck 	bytes += sizeof(struct fib_alias) * stat->prefixes;
257493672292SStephen Hemminger 
2575187b5188SStephen Hemminger 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
257641b489fdSAlexander Duyck 	bytes += TNODE_SIZE(0) * stat->tnodes;
257719baf839SRobert Olsson 
257806ef921dSRobert Olsson 	max = MAX_STAT_DEPTH;
257906ef921dSRobert Olsson 	while (max > 0 && stat->nodesizes[max-1] == 0)
258019baf839SRobert Olsson 		max--;
258119baf839SRobert Olsson 
2582cb7b593cSStephen Hemminger 	pointers = 0;
2583f585a991SJerry Snitselaar 	for (i = 1; i < max; i++)
258419baf839SRobert Olsson 		if (stat->nodesizes[i] != 0) {
2585187b5188SStephen Hemminger 			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
258619baf839SRobert Olsson 			pointers += (1<<i) * stat->nodesizes[i];
258719baf839SRobert Olsson 		}
2588cb7b593cSStephen Hemminger 	seq_putc(seq, '\n');
2589187b5188SStephen Hemminger 	seq_printf(seq, "\tPointers: %u\n", pointers);
2590cb7b593cSStephen Hemminger 
259135c6edacSAlexander Duyck 	bytes += sizeof(struct key_vector *) * pointers;
2592187b5188SStephen Hemminger 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2593187b5188SStephen Hemminger 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
259466a2f7fdSStephen Hemminger }
259519baf839SRobert Olsson 
259619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
trie_show_usage(struct seq_file * seq,const struct trie_use_stats __percpu * stats)259766a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq,
25988274a97aSAlexander Duyck 			    const struct trie_use_stats __percpu *stats)
259966a2f7fdSStephen Hemminger {
26008274a97aSAlexander Duyck 	struct trie_use_stats s = { 0 };
26018274a97aSAlexander Duyck 	int cpu;
26028274a97aSAlexander Duyck 
26038274a97aSAlexander Duyck 	/* loop through all of the CPUs and gather up the stats */
26048274a97aSAlexander Duyck 	for_each_possible_cpu(cpu) {
26058274a97aSAlexander Duyck 		const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
26068274a97aSAlexander Duyck 
26078274a97aSAlexander Duyck 		s.gets += pcpu->gets;
26088274a97aSAlexander Duyck 		s.backtrack += pcpu->backtrack;
26098274a97aSAlexander Duyck 		s.semantic_match_passed += pcpu->semantic_match_passed;
26108274a97aSAlexander Duyck 		s.semantic_match_miss += pcpu->semantic_match_miss;
26118274a97aSAlexander Duyck 		s.null_node_hit += pcpu->null_node_hit;
26128274a97aSAlexander Duyck 		s.resize_node_skipped += pcpu->resize_node_skipped;
26138274a97aSAlexander Duyck 	}
26148274a97aSAlexander Duyck 
261566a2f7fdSStephen Hemminger 	seq_printf(seq, "\nCounters:\n---------\n");
26168274a97aSAlexander Duyck 	seq_printf(seq, "gets = %u\n", s.gets);
26178274a97aSAlexander Duyck 	seq_printf(seq, "backtracks = %u\n", s.backtrack);
2618a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match passed = %u\n",
26198274a97aSAlexander Duyck 		   s.semantic_match_passed);
26208274a97aSAlexander Duyck 	seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
26218274a97aSAlexander Duyck 	seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
26228274a97aSAlexander Duyck 	seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
262319baf839SRobert Olsson }
262466a2f7fdSStephen Hemminger #endif /*  CONFIG_IP_FIB_TRIE_STATS */
262566a2f7fdSStephen Hemminger 
fib_table_print(struct seq_file * seq,struct fib_table * tb)26263d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2627d717a9a6SStephen Hemminger {
26283d3b2d25SStephen Hemminger 	if (tb->tb_id == RT_TABLE_LOCAL)
26293d3b2d25SStephen Hemminger 		seq_puts(seq, "Local:\n");
26303d3b2d25SStephen Hemminger 	else if (tb->tb_id == RT_TABLE_MAIN)
26313d3b2d25SStephen Hemminger 		seq_puts(seq, "Main:\n");
26323d3b2d25SStephen Hemminger 	else
26333d3b2d25SStephen Hemminger 		seq_printf(seq, "Id %d:\n", tb->tb_id);
2634d717a9a6SStephen Hemminger }
263519baf839SRobert Olsson 
26363d3b2d25SStephen Hemminger 
fib_triestat_seq_show(struct seq_file * seq,void * v)263719baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v)
263819baf839SRobert Olsson {
26392e47eeceSYu Zhe 	struct net *net = seq->private;
26403d3b2d25SStephen Hemminger 	unsigned int h;
2641877a9bffSEric W. Biederman 
2642d717a9a6SStephen Hemminger 	seq_printf(seq,
2643a07f5f50SStephen Hemminger 		   "Basic info: size of leaf:"
26445b5e0928SAlexey Dobriyan 		   " %zd bytes, size of tnode: %zd bytes.\n",
264541b489fdSAlexander Duyck 		   LEAF_SIZE, TNODE_SIZE(0));
264619baf839SRobert Olsson 
2647fbe4e0c1SQian Cai 	rcu_read_lock();
26483d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
26493d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
26503d3b2d25SStephen Hemminger 		struct fib_table *tb;
2651cb7b593cSStephen Hemminger 
2652b67bfe0dSSasha Levin 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
26533d3b2d25SStephen Hemminger 			struct trie *t = (struct trie *) tb->tb_data;
26543d3b2d25SStephen Hemminger 			struct trie_stat stat;
26553d3b2d25SStephen Hemminger 
26563d3b2d25SStephen Hemminger 			if (!t)
26573d3b2d25SStephen Hemminger 				continue;
26583d3b2d25SStephen Hemminger 
26593d3b2d25SStephen Hemminger 			fib_table_print(seq, tb);
26603d3b2d25SStephen Hemminger 
26613d3b2d25SStephen Hemminger 			trie_collect_stats(t, &stat);
26623d3b2d25SStephen Hemminger 			trie_show_stats(seq, &stat);
26633d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS
26648274a97aSAlexander Duyck 			trie_show_usage(seq, t->stats);
26653d3b2d25SStephen Hemminger #endif
26663d3b2d25SStephen Hemminger 		}
2667fbe4e0c1SQian Cai 		cond_resched_rcu();
26683d3b2d25SStephen Hemminger 	}
2669fbe4e0c1SQian Cai 	rcu_read_unlock();
2670cb7b593cSStephen Hemminger 
267119baf839SRobert Olsson 	return 0;
267219baf839SRobert Olsson }
267319baf839SRobert Olsson 
fib_trie_get_idx(struct seq_file * seq,loff_t pos)267435c6edacSAlexander Duyck static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
267519baf839SRobert Olsson {
26761218854aSYOSHIFUJI Hideaki 	struct fib_trie_iter *iter = seq->private;
26771218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
2678cb7b593cSStephen Hemminger 	loff_t idx = 0;
26793d3b2d25SStephen Hemminger 	unsigned int h;
26803d3b2d25SStephen Hemminger 
26813d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
26823d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
26833d3b2d25SStephen Hemminger 		struct fib_table *tb;
26843d3b2d25SStephen Hemminger 
2685b67bfe0dSSasha Levin 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
268635c6edacSAlexander Duyck 			struct key_vector *n;
2687cb7b593cSStephen Hemminger 
26883d3b2d25SStephen Hemminger 			for (n = fib_trie_get_first(iter,
26893d3b2d25SStephen Hemminger 						    (struct trie *) tb->tb_data);
26903d3b2d25SStephen Hemminger 			     n; n = fib_trie_get_next(iter))
26913d3b2d25SStephen Hemminger 				if (pos == idx++) {
26923d3b2d25SStephen Hemminger 					iter->tb = tb;
2693cb7b593cSStephen Hemminger 					return n;
269419baf839SRobert Olsson 				}
26953d3b2d25SStephen Hemminger 		}
26963d3b2d25SStephen Hemminger 	}
269719baf839SRobert Olsson 
269819baf839SRobert Olsson 	return NULL;
269919baf839SRobert Olsson }
270019baf839SRobert Olsson 
fib_trie_seq_start(struct seq_file * seq,loff_t * pos)270119baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2702c95aaf9aSStephen Hemminger 	__acquires(RCU)
270319baf839SRobert Olsson {
2704cb7b593cSStephen Hemminger 	rcu_read_lock();
27051218854aSYOSHIFUJI Hideaki 	return fib_trie_get_idx(seq, *pos);
270619baf839SRobert Olsson }
270719baf839SRobert Olsson 
fib_trie_seq_next(struct seq_file * seq,void * v,loff_t * pos)270819baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
270919baf839SRobert Olsson {
2710cb7b593cSStephen Hemminger 	struct fib_trie_iter *iter = seq->private;
27111218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
27123d3b2d25SStephen Hemminger 	struct fib_table *tb = iter->tb;
27133d3b2d25SStephen Hemminger 	struct hlist_node *tb_node;
27143d3b2d25SStephen Hemminger 	unsigned int h;
271535c6edacSAlexander Duyck 	struct key_vector *n;
2716cb7b593cSStephen Hemminger 
271719baf839SRobert Olsson 	++*pos;
27183d3b2d25SStephen Hemminger 	/* next node in same table */
27193d3b2d25SStephen Hemminger 	n = fib_trie_get_next(iter);
27203d3b2d25SStephen Hemminger 	if (n)
27213d3b2d25SStephen Hemminger 		return n;
272291b9a277SOlof Johansson 
27233d3b2d25SStephen Hemminger 	/* walk rest of this hash chain */
27243d3b2d25SStephen Hemminger 	h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
27250a5c0475SEric Dumazet 	while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
27263d3b2d25SStephen Hemminger 		tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
27273d3b2d25SStephen Hemminger 		n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
27283d3b2d25SStephen Hemminger 		if (n)
27293d3b2d25SStephen Hemminger 			goto found;
27303d3b2d25SStephen Hemminger 	}
2731cb7b593cSStephen Hemminger 
27323d3b2d25SStephen Hemminger 	/* new hash chain */
27333d3b2d25SStephen Hemminger 	while (++h < FIB_TABLE_HASHSZ) {
27343d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2735b67bfe0dSSasha Levin 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
27363d3b2d25SStephen Hemminger 			n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
27373d3b2d25SStephen Hemminger 			if (n)
27383d3b2d25SStephen Hemminger 				goto found;
27393d3b2d25SStephen Hemminger 		}
27403d3b2d25SStephen Hemminger 	}
2741cb7b593cSStephen Hemminger 	return NULL;
27423d3b2d25SStephen Hemminger 
27433d3b2d25SStephen Hemminger found:
27443d3b2d25SStephen Hemminger 	iter->tb = tb;
27453d3b2d25SStephen Hemminger 	return n;
274619baf839SRobert Olsson }
274719baf839SRobert Olsson 
fib_trie_seq_stop(struct seq_file * seq,void * v)274819baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2749c95aaf9aSStephen Hemminger 	__releases(RCU)
275019baf839SRobert Olsson {
2751cb7b593cSStephen Hemminger 	rcu_read_unlock();
275219baf839SRobert Olsson }
275319baf839SRobert Olsson 
seq_indent(struct seq_file * seq,int n)2754cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n)
2755cb7b593cSStephen Hemminger {
2756a034ee3cSEric Dumazet 	while (n-- > 0)
2757a034ee3cSEric Dumazet 		seq_puts(seq, "   ");
2758cb7b593cSStephen Hemminger }
275919baf839SRobert Olsson 
rtn_scope(char * buf,size_t len,enum rt_scope_t s)276028d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2761cb7b593cSStephen Hemminger {
2762cb7b593cSStephen Hemminger 	switch (s) {
2763cb7b593cSStephen Hemminger 	case RT_SCOPE_UNIVERSE: return "universe";
2764cb7b593cSStephen Hemminger 	case RT_SCOPE_SITE:	return "site";
2765cb7b593cSStephen Hemminger 	case RT_SCOPE_LINK:	return "link";
2766cb7b593cSStephen Hemminger 	case RT_SCOPE_HOST:	return "host";
2767cb7b593cSStephen Hemminger 	case RT_SCOPE_NOWHERE:	return "nowhere";
2768cb7b593cSStephen Hemminger 	default:
276928d36e37SEric Dumazet 		snprintf(buf, len, "scope=%d", s);
2770cb7b593cSStephen Hemminger 		return buf;
2771cb7b593cSStephen Hemminger 	}
2772cb7b593cSStephen Hemminger }
2773cb7b593cSStephen Hemminger 
277436cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = {
2775cb7b593cSStephen Hemminger 	[RTN_UNSPEC] = "UNSPEC",
2776cb7b593cSStephen Hemminger 	[RTN_UNICAST] = "UNICAST",
2777cb7b593cSStephen Hemminger 	[RTN_LOCAL] = "LOCAL",
2778cb7b593cSStephen Hemminger 	[RTN_BROADCAST] = "BROADCAST",
2779cb7b593cSStephen Hemminger 	[RTN_ANYCAST] = "ANYCAST",
2780cb7b593cSStephen Hemminger 	[RTN_MULTICAST] = "MULTICAST",
2781cb7b593cSStephen Hemminger 	[RTN_BLACKHOLE] = "BLACKHOLE",
2782cb7b593cSStephen Hemminger 	[RTN_UNREACHABLE] = "UNREACHABLE",
2783cb7b593cSStephen Hemminger 	[RTN_PROHIBIT] = "PROHIBIT",
2784cb7b593cSStephen Hemminger 	[RTN_THROW] = "THROW",
2785cb7b593cSStephen Hemminger 	[RTN_NAT] = "NAT",
2786cb7b593cSStephen Hemminger 	[RTN_XRESOLVE] = "XRESOLVE",
2787cb7b593cSStephen Hemminger };
2788cb7b593cSStephen Hemminger 
rtn_type(char * buf,size_t len,unsigned int t)2789a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2790cb7b593cSStephen Hemminger {
2791cb7b593cSStephen Hemminger 	if (t < __RTN_MAX && rtn_type_names[t])
2792cb7b593cSStephen Hemminger 		return rtn_type_names[t];
279328d36e37SEric Dumazet 	snprintf(buf, len, "type %u", t);
2794cb7b593cSStephen Hemminger 	return buf;
2795cb7b593cSStephen Hemminger }
2796cb7b593cSStephen Hemminger 
2797cb7b593cSStephen Hemminger /* Pretty print the trie */
fib_trie_seq_show(struct seq_file * seq,void * v)279819baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v)
279919baf839SRobert Olsson {
2800cb7b593cSStephen Hemminger 	const struct fib_trie_iter *iter = seq->private;
280135c6edacSAlexander Duyck 	struct key_vector *n = v;
280219baf839SRobert Olsson 
280388bae714SAlexander Duyck 	if (IS_TRIE(node_parent_rcu(n)))
28043d3b2d25SStephen Hemminger 		fib_table_print(seq, iter->tb);
2805095b8501SRobert Olsson 
2806095b8501SRobert Olsson 	if (IS_TNODE(n)) {
2807adaf9816SAlexander Duyck 		__be32 prf = htonl(n->key);
2808095b8501SRobert Olsson 
28091d25cd6cSRobert Olsson 		seq_indent(seq, iter->depth-1);
2810e9b44019SAlexander Duyck 		seq_printf(seq, "  +-- %pI4/%zu %u %u %u\n",
2811e9b44019SAlexander Duyck 			   &prf, KEYLENGTH - n->pos - n->bits, n->bits,
28126e22d174SAlexander Duyck 			   tn_info(n)->full_children,
28136e22d174SAlexander Duyck 			   tn_info(n)->empty_children);
2814cb7b593cSStephen Hemminger 	} else {
2815adaf9816SAlexander Duyck 		__be32 val = htonl(n->key);
281679e5ad2cSAlexander Duyck 		struct fib_alias *fa;
2817cb7b593cSStephen Hemminger 
2818cb7b593cSStephen Hemminger 		seq_indent(seq, iter->depth);
2819673d57e7SHarvey Harrison 		seq_printf(seq, "  |-- %pI4\n", &val);
282028d36e37SEric Dumazet 
282179e5ad2cSAlexander Duyck 		hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
282228d36e37SEric Dumazet 			char buf1[32], buf2[32];
282328d36e37SEric Dumazet 
2824cb7b593cSStephen Hemminger 			seq_indent(seq, iter->depth + 1);
28255786ec60SAlexander Duyck 			seq_printf(seq, "  /%zu %s %s",
28269b6ebad5SAlexander Duyck 				   KEYLENGTH - fa->fa_slen,
282728d36e37SEric Dumazet 				   rtn_scope(buf1, sizeof(buf1),
282837e826c5SDavid S. Miller 					     fa->fa_info->fib_scope),
282928d36e37SEric Dumazet 				   rtn_type(buf2, sizeof(buf2),
283028d36e37SEric Dumazet 					    fa->fa_type));
283132ccf110SGuillaume Nault 			if (fa->fa_dscp)
283232ccf110SGuillaume Nault 				seq_printf(seq, " tos=%d",
283332ccf110SGuillaume Nault 					   inet_dscp_to_dsfield(fa->fa_dscp));
2834cb7b593cSStephen Hemminger 			seq_putc(seq, '\n');
2835cb7b593cSStephen Hemminger 		}
2836cb7b593cSStephen Hemminger 	}
283719baf839SRobert Olsson 
283819baf839SRobert Olsson 	return 0;
283919baf839SRobert Olsson }
284019baf839SRobert Olsson 
2841f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = {
284219baf839SRobert Olsson 	.start  = fib_trie_seq_start,
284319baf839SRobert Olsson 	.next   = fib_trie_seq_next,
284419baf839SRobert Olsson 	.stop   = fib_trie_seq_stop,
284519baf839SRobert Olsson 	.show   = fib_trie_seq_show,
284619baf839SRobert Olsson };
284719baf839SRobert Olsson 
28488315f5d8SStephen Hemminger struct fib_route_iter {
28498315f5d8SStephen Hemminger 	struct seq_net_private p;
28508be33e95SAlexander Duyck 	struct fib_table *main_tb;
285135c6edacSAlexander Duyck 	struct key_vector *tnode;
28528315f5d8SStephen Hemminger 	loff_t	pos;
28538315f5d8SStephen Hemminger 	t_key	key;
28548315f5d8SStephen Hemminger };
28558315f5d8SStephen Hemminger 
fib_route_get_idx(struct fib_route_iter * iter,loff_t pos)285635c6edacSAlexander Duyck static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
285735c6edacSAlexander Duyck 					    loff_t pos)
28588315f5d8SStephen Hemminger {
285935c6edacSAlexander Duyck 	struct key_vector *l, **tp = &iter->tnode;
28608be33e95SAlexander Duyck 	t_key key;
28618315f5d8SStephen Hemminger 
2862fd0285a3SAlexander Duyck 	/* use cached location of previously found key */
28638be33e95SAlexander Duyck 	if (iter->pos > 0 && pos >= iter->pos) {
28648be33e95SAlexander Duyck 		key = iter->key;
28658be33e95SAlexander Duyck 	} else {
2866fd0285a3SAlexander Duyck 		iter->pos = 1;
28678be33e95SAlexander Duyck 		key = 0;
28688315f5d8SStephen Hemminger 	}
28698315f5d8SStephen Hemminger 
2870fd0285a3SAlexander Duyck 	pos -= iter->pos;
2871fd0285a3SAlexander Duyck 
2872fd0285a3SAlexander Duyck 	while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
28738be33e95SAlexander Duyck 		key = l->key + 1;
28748315f5d8SStephen Hemminger 		iter->pos++;
28758be33e95SAlexander Duyck 		l = NULL;
28768be33e95SAlexander Duyck 
28778be33e95SAlexander Duyck 		/* handle unlikely case of a key wrap */
28788be33e95SAlexander Duyck 		if (!key)
28798be33e95SAlexander Duyck 			break;
28808315f5d8SStephen Hemminger 	}
28818315f5d8SStephen Hemminger 
28828315f5d8SStephen Hemminger 	if (l)
2883fd0285a3SAlexander Duyck 		iter->key = l->key;	/* remember it */
28848315f5d8SStephen Hemminger 	else
28858315f5d8SStephen Hemminger 		iter->pos = 0;		/* forget it */
28868315f5d8SStephen Hemminger 
28878315f5d8SStephen Hemminger 	return l;
28888315f5d8SStephen Hemminger }
28898315f5d8SStephen Hemminger 
fib_route_seq_start(struct seq_file * seq,loff_t * pos)28908315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
28918315f5d8SStephen Hemminger 	__acquires(RCU)
28928315f5d8SStephen Hemminger {
28938315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
28948315f5d8SStephen Hemminger 	struct fib_table *tb;
28958be33e95SAlexander Duyck 	struct trie *t;
28968315f5d8SStephen Hemminger 
28978315f5d8SStephen Hemminger 	rcu_read_lock();
28988be33e95SAlexander Duyck 
28991218854aSYOSHIFUJI Hideaki 	tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
29008315f5d8SStephen Hemminger 	if (!tb)
29018315f5d8SStephen Hemminger 		return NULL;
29028315f5d8SStephen Hemminger 
29038be33e95SAlexander Duyck 	iter->main_tb = tb;
290494d9f1c5SDavid Forster 	t = (struct trie *)tb->tb_data;
290594d9f1c5SDavid Forster 	iter->tnode = t->kv;
29068be33e95SAlexander Duyck 
29078be33e95SAlexander Duyck 	if (*pos != 0)
29088be33e95SAlexander Duyck 		return fib_route_get_idx(iter, *pos);
29098be33e95SAlexander Duyck 
29108be33e95SAlexander Duyck 	iter->pos = 0;
2911fd0285a3SAlexander Duyck 	iter->key = KEY_MAX;
29128be33e95SAlexander Duyck 
29138315f5d8SStephen Hemminger 	return SEQ_START_TOKEN;
29148315f5d8SStephen Hemminger }
29158315f5d8SStephen Hemminger 
fib_route_seq_next(struct seq_file * seq,void * v,loff_t * pos)29168315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
29178315f5d8SStephen Hemminger {
29188315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
291935c6edacSAlexander Duyck 	struct key_vector *l = NULL;
2920fd0285a3SAlexander Duyck 	t_key key = iter->key + 1;
29218315f5d8SStephen Hemminger 
29228315f5d8SStephen Hemminger 	++*pos;
29238be33e95SAlexander Duyck 
29248be33e95SAlexander Duyck 	/* only allow key of 0 for start of sequence */
29258be33e95SAlexander Duyck 	if ((v == SEQ_START_TOKEN) || key)
29268be33e95SAlexander Duyck 		l = leaf_walk_rcu(&iter->tnode, key);
29278be33e95SAlexander Duyck 
29288be33e95SAlexander Duyck 	if (l) {
2929fd0285a3SAlexander Duyck 		iter->key = l->key;
29308315f5d8SStephen Hemminger 		iter->pos++;
29318be33e95SAlexander Duyck 	} else {
29328be33e95SAlexander Duyck 		iter->pos = 0;
29338315f5d8SStephen Hemminger 	}
29348315f5d8SStephen Hemminger 
29358315f5d8SStephen Hemminger 	return l;
29368315f5d8SStephen Hemminger }
29378315f5d8SStephen Hemminger 
fib_route_seq_stop(struct seq_file * seq,void * v)29388315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v)
29398315f5d8SStephen Hemminger 	__releases(RCU)
29408315f5d8SStephen Hemminger {
29418315f5d8SStephen Hemminger 	rcu_read_unlock();
29428315f5d8SStephen Hemminger }
29438315f5d8SStephen Hemminger 
fib_flag_trans(int type,__be32 mask,struct fib_info * fi)29445481d73fSDavid Ahern static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
2945cb7b593cSStephen Hemminger {
2946a034ee3cSEric Dumazet 	unsigned int flags = 0;
2947cb7b593cSStephen Hemminger 
2948a034ee3cSEric Dumazet 	if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2949a034ee3cSEric Dumazet 		flags = RTF_REJECT;
29505481d73fSDavid Ahern 	if (fi) {
2951dcb1ecb5SDavid Ahern 		const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
29525481d73fSDavid Ahern 
2953dcb1ecb5SDavid Ahern 		if (nhc->nhc_gw.ipv4)
2954cb7b593cSStephen Hemminger 			flags |= RTF_GATEWAY;
29555481d73fSDavid Ahern 	}
295632ab5f80SAl Viro 	if (mask == htonl(0xFFFFFFFF))
2957cb7b593cSStephen Hemminger 		flags |= RTF_HOST;
2958cb7b593cSStephen Hemminger 	flags |= RTF_UP;
2959cb7b593cSStephen Hemminger 	return flags;
2960cb7b593cSStephen Hemminger }
2961cb7b593cSStephen Hemminger 
2962cb7b593cSStephen Hemminger /*
2963cb7b593cSStephen Hemminger  *	This outputs /proc/net/route.
2964cb7b593cSStephen Hemminger  *	The format of the file is not supposed to be changed
2965cb7b593cSStephen Hemminger  *	and needs to be same as fib_hash output to avoid breaking
2966cb7b593cSStephen Hemminger  *	legacy utilities
2967cb7b593cSStephen Hemminger  */
fib_route_seq_show(struct seq_file * seq,void * v)2968cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v)
2969cb7b593cSStephen Hemminger {
2970654eff45SAlexander Duyck 	struct fib_route_iter *iter = seq->private;
2971654eff45SAlexander Duyck 	struct fib_table *tb = iter->main_tb;
297279e5ad2cSAlexander Duyck 	struct fib_alias *fa;
297335c6edacSAlexander Duyck 	struct key_vector *l = v;
29749b6ebad5SAlexander Duyck 	__be32 prefix;
2975cb7b593cSStephen Hemminger 
2976cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN) {
2977cb7b593cSStephen Hemminger 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2978cb7b593cSStephen Hemminger 			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2979cb7b593cSStephen Hemminger 			   "\tWindow\tIRTT");
2980cb7b593cSStephen Hemminger 		return 0;
2981cb7b593cSStephen Hemminger 	}
2982cb7b593cSStephen Hemminger 
29839b6ebad5SAlexander Duyck 	prefix = htonl(l->key);
29849b6ebad5SAlexander Duyck 
298579e5ad2cSAlexander Duyck 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
29865481d73fSDavid Ahern 		struct fib_info *fi = fa->fa_info;
29879b6ebad5SAlexander Duyck 		__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2988a034ee3cSEric Dumazet 		unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2989cb7b593cSStephen Hemminger 
299079e5ad2cSAlexander Duyck 		if ((fa->fa_type == RTN_BROADCAST) ||
299179e5ad2cSAlexander Duyck 		    (fa->fa_type == RTN_MULTICAST))
2992cb7b593cSStephen Hemminger 			continue;
2993cb7b593cSStephen Hemminger 
2994654eff45SAlexander Duyck 		if (fa->tb_id != tb->tb_id)
2995654eff45SAlexander Duyck 			continue;
2996654eff45SAlexander Duyck 
2997652586dfSTetsuo Handa 		seq_setwidth(seq, 127);
2998652586dfSTetsuo Handa 
29995481d73fSDavid Ahern 		if (fi) {
3000dcb1ecb5SDavid Ahern 			struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
3001dcb1ecb5SDavid Ahern 			__be32 gw = 0;
3002dcb1ecb5SDavid Ahern 
3003dcb1ecb5SDavid Ahern 			if (nhc->nhc_gw_family == AF_INET)
3004dcb1ecb5SDavid Ahern 				gw = nhc->nhc_gw.ipv4;
30055481d73fSDavid Ahern 
30065e659e4cSPavel Emelyanov 			seq_printf(seq,
30075e659e4cSPavel Emelyanov 				   "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
3008652586dfSTetsuo Handa 				   "%d\t%08X\t%d\t%u\t%u",
3009dcb1ecb5SDavid Ahern 				   nhc->nhc_dev ? nhc->nhc_dev->name : "*",
3010dcb1ecb5SDavid Ahern 				   prefix, gw, flags, 0, 0,
3011cb7b593cSStephen Hemminger 				   fi->fib_priority,
3012cb7b593cSStephen Hemminger 				   mask,
3013a07f5f50SStephen Hemminger 				   (fi->fib_advmss ?
3014a07f5f50SStephen Hemminger 				    fi->fib_advmss + 40 : 0),
3015cb7b593cSStephen Hemminger 				   fi->fib_window,
3016652586dfSTetsuo Handa 				   fi->fib_rtt >> 3);
30175481d73fSDavid Ahern 		} else {
30185e659e4cSPavel Emelyanov 			seq_printf(seq,
30195e659e4cSPavel Emelyanov 				   "*\t%08X\t%08X\t%04X\t%d\t%u\t"
3020652586dfSTetsuo Handa 				   "%d\t%08X\t%d\t%u\t%u",
3021cb7b593cSStephen Hemminger 				   prefix, 0, flags, 0, 0, 0,
3022652586dfSTetsuo Handa 				   mask, 0, 0, 0);
30235481d73fSDavid Ahern 		}
3024652586dfSTetsuo Handa 		seq_pad(seq, '\n');
3025cb7b593cSStephen Hemminger 	}
3026cb7b593cSStephen Hemminger 
3027cb7b593cSStephen Hemminger 	return 0;
3028cb7b593cSStephen Hemminger }
3029cb7b593cSStephen Hemminger 
3030f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = {
30318315f5d8SStephen Hemminger 	.start  = fib_route_seq_start,
30328315f5d8SStephen Hemminger 	.next   = fib_route_seq_next,
30338315f5d8SStephen Hemminger 	.stop   = fib_route_seq_stop,
3034cb7b593cSStephen Hemminger 	.show   = fib_route_seq_show,
3035cb7b593cSStephen Hemminger };
3036cb7b593cSStephen Hemminger 
fib_proc_init(struct net * net)303761a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net)
303819baf839SRobert Olsson {
3039c3506372SChristoph Hellwig 	if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
3040c3506372SChristoph Hellwig 			sizeof(struct fib_trie_iter)))
3041cb7b593cSStephen Hemminger 		goto out1;
3042cb7b593cSStephen Hemminger 
30433617d949SChristoph Hellwig 	if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
30443617d949SChristoph Hellwig 			fib_triestat_seq_show, NULL))
3045cb7b593cSStephen Hemminger 		goto out2;
3046cb7b593cSStephen Hemminger 
3047c3506372SChristoph Hellwig 	if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
3048c3506372SChristoph Hellwig 			sizeof(struct fib_route_iter)))
3049cb7b593cSStephen Hemminger 		goto out3;
3050cb7b593cSStephen Hemminger 
305119baf839SRobert Olsson 	return 0;
3052cb7b593cSStephen Hemminger 
3053cb7b593cSStephen Hemminger out3:
3054ece31ffdSGao feng 	remove_proc_entry("fib_triestat", net->proc_net);
3055cb7b593cSStephen Hemminger out2:
3056ece31ffdSGao feng 	remove_proc_entry("fib_trie", net->proc_net);
3057cb7b593cSStephen Hemminger out1:
3058cb7b593cSStephen Hemminger 	return -ENOMEM;
305919baf839SRobert Olsson }
306019baf839SRobert Olsson 
fib_proc_exit(struct net * net)306161a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net)
306219baf839SRobert Olsson {
3063ece31ffdSGao feng 	remove_proc_entry("fib_trie", net->proc_net);
3064ece31ffdSGao feng 	remove_proc_entry("fib_triestat", net->proc_net);
3065ece31ffdSGao feng 	remove_proc_entry("route", net->proc_net);
306619baf839SRobert Olsson }
306719baf839SRobert Olsson 
306819baf839SRobert Olsson #endif /* CONFIG_PROC_FS */
3069