xref: /openbmc/linux/net/ipv4/fib_trie.c (revision 4b2b6060)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
219baf839SRobert Olsson /*
319baf839SRobert Olsson  *
419baf839SRobert Olsson  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
519baf839SRobert Olsson  *     & Swedish University of Agricultural Sciences.
619baf839SRobert Olsson  *
719baf839SRobert Olsson  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
819baf839SRobert Olsson  *     Agricultural Sciences.
919baf839SRobert Olsson  *
1019baf839SRobert Olsson  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
1119baf839SRobert Olsson  *
1225985edcSLucas De Marchi  * This work is based on the LPC-trie which is originally described in:
1319baf839SRobert Olsson  *
1419baf839SRobert Olsson  * An experimental study of compression methods for dynamic tries
1519baf839SRobert Olsson  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
167a6498ebSAlexander A. Klimov  * https://www.csc.kth.se/~snilsson/software/dyntrie2/
1719baf839SRobert Olsson  *
1819baf839SRobert Olsson  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
1919baf839SRobert Olsson  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2019baf839SRobert Olsson  *
2119baf839SRobert Olsson  * Code from fib_hash has been reused which includes the following header:
2219baf839SRobert Olsson  *
2319baf839SRobert Olsson  * INET		An implementation of the TCP/IP protocol suite for the LINUX
2419baf839SRobert Olsson  *		operating system.  INET is implemented using the  BSD Socket
2519baf839SRobert Olsson  *		interface as the means of communication with the user level.
2619baf839SRobert Olsson  *
2719baf839SRobert Olsson  *		IPv4 FIB: lookup engine and maintenance routines.
2819baf839SRobert Olsson  *
2919baf839SRobert Olsson  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
3019baf839SRobert Olsson  *
31fd966255SRobert Olsson  * Substantial contributions to this work comes from:
32fd966255SRobert Olsson  *
33fd966255SRobert Olsson  *		David S. Miller, <davem@davemloft.net>
34fd966255SRobert Olsson  *		Stephen Hemminger <shemminger@osdl.org>
35fd966255SRobert Olsson  *		Paul E. McKenney <paulmck@us.ibm.com>
36fd966255SRobert Olsson  *		Patrick McHardy <kaber@trash.net>
3719baf839SRobert Olsson  */
3808009a76SAlexey Dobriyan #include <linux/cache.h>
397c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
401977f032SJiri Slaby #include <linux/bitops.h>
4119baf839SRobert Olsson #include <linux/types.h>
4219baf839SRobert Olsson #include <linux/kernel.h>
4319baf839SRobert Olsson #include <linux/mm.h>
4419baf839SRobert Olsson #include <linux/string.h>
4519baf839SRobert Olsson #include <linux/socket.h>
4619baf839SRobert Olsson #include <linux/sockios.h>
4719baf839SRobert Olsson #include <linux/errno.h>
4819baf839SRobert Olsson #include <linux/in.h>
4919baf839SRobert Olsson #include <linux/inet.h>
50cd8787abSStephen Hemminger #include <linux/inetdevice.h>
5119baf839SRobert Olsson #include <linux/netdevice.h>
5219baf839SRobert Olsson #include <linux/if_arp.h>
5319baf839SRobert Olsson #include <linux/proc_fs.h>
542373ce1cSRobert Olsson #include <linux/rcupdate.h>
5519baf839SRobert Olsson #include <linux/skbuff.h>
5619baf839SRobert Olsson #include <linux/netlink.h>
5719baf839SRobert Olsson #include <linux/init.h>
5819baf839SRobert Olsson #include <linux/list.h>
595a0e3ad6STejun Heo #include <linux/slab.h>
60bc3b2d7fSPaul Gortmaker #include <linux/export.h>
61ffa915d0SDavid S. Miller #include <linux/vmalloc.h>
62b90eb754SJiri Pirko #include <linux/notifier.h>
63457c4cbcSEric W. Biederman #include <net/net_namespace.h>
64f55fbb6aSGuillaume Nault #include <net/inet_dscp.h>
6519baf839SRobert Olsson #include <net/ip.h>
6619baf839SRobert Olsson #include <net/protocol.h>
6719baf839SRobert Olsson #include <net/route.h>
6819baf839SRobert Olsson #include <net/tcp.h>
6919baf839SRobert Olsson #include <net/sock.h>
7019baf839SRobert Olsson #include <net/ip_fib.h>
7104b1d4e5SIdo Schimmel #include <net/fib_notifier.h>
72f6d3c192SDavid Ahern #include <trace/events/fib.h>
7319baf839SRobert Olsson #include "fib_lookup.h"
7419baf839SRobert Olsson 
call_fib_entry_notifier(struct notifier_block * nb,enum fib_event_type event_type,u32 dst,int dst_len,struct fib_alias * fa,struct netlink_ext_ack * extack)757c550dafSJiri Pirko static int call_fib_entry_notifier(struct notifier_block *nb,
76c3852ef7SIdo Schimmel 				   enum fib_event_type event_type, u32 dst,
77b7a59557SJiri Pirko 				   int dst_len, struct fib_alias *fa,
78b7a59557SJiri Pirko 				   struct netlink_ext_ack *extack)
79c3852ef7SIdo Schimmel {
80c3852ef7SIdo Schimmel 	struct fib_entry_notifier_info info = {
81b7a59557SJiri Pirko 		.info.extack = extack,
82c3852ef7SIdo Schimmel 		.dst = dst,
83c3852ef7SIdo Schimmel 		.dst_len = dst_len,
846eba87c7SDavid Ahern 		.fi = fa->fa_info,
85568a3f33SGuillaume Nault 		.dscp = fa->fa_dscp,
866eba87c7SDavid Ahern 		.type = fa->fa_type,
876eba87c7SDavid Ahern 		.tb_id = fa->tb_id,
88c3852ef7SIdo Schimmel 	};
897c550dafSJiri Pirko 	return call_fib4_notifier(nb, event_type, &info.info);
90c3852ef7SIdo Schimmel }
91c3852ef7SIdo Schimmel 
call_fib_entry_notifiers(struct net * net,enum fib_event_type event_type,u32 dst,int dst_len,struct fib_alias * fa,struct netlink_ext_ack * extack)92b90eb754SJiri Pirko static int call_fib_entry_notifiers(struct net *net,
93b90eb754SJiri Pirko 				    enum fib_event_type event_type, u32 dst,
946c31e5a9SDavid Ahern 				    int dst_len, struct fib_alias *fa,
956c31e5a9SDavid Ahern 				    struct netlink_ext_ack *extack)
96b90eb754SJiri Pirko {
97b90eb754SJiri Pirko 	struct fib_entry_notifier_info info = {
986c31e5a9SDavid Ahern 		.info.extack = extack,
99b90eb754SJiri Pirko 		.dst = dst,
100b90eb754SJiri Pirko 		.dst_len = dst_len,
1016eba87c7SDavid Ahern 		.fi = fa->fa_info,
102568a3f33SGuillaume Nault 		.dscp = fa->fa_dscp,
1036eba87c7SDavid Ahern 		.type = fa->fa_type,
1046eba87c7SDavid Ahern 		.tb_id = fa->tb_id,
105b90eb754SJiri Pirko 	};
10604b1d4e5SIdo Schimmel 	return call_fib4_notifiers(net, event_type, &info.info);
107b90eb754SJiri Pirko }
108b90eb754SJiri Pirko 
10906ef921dSRobert Olsson #define MAX_STAT_DEPTH 32
11019baf839SRobert Olsson 
11119baf839SRobert Olsson #define KEYLENGTH	(8*sizeof(t_key))
11295f60ea3SAlexander Duyck #define KEY_MAX		((t_key)~0)
11319baf839SRobert Olsson 
11419baf839SRobert Olsson typedef unsigned int t_key;
11519baf839SRobert Olsson 
11688bae714SAlexander Duyck #define IS_TRIE(n)	((n)->pos >= KEYLENGTH)
11764c9b6fbSAlexander Duyck #define IS_TNODE(n)	((n)->bits)
11864c9b6fbSAlexander Duyck #define IS_LEAF(n)	(!(n)->bits)
1192373ce1cSRobert Olsson 
12035c6edacSAlexander Duyck struct key_vector {
12141b489fdSAlexander Duyck 	t_key key;
12241b489fdSAlexander Duyck 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
12341b489fdSAlexander Duyck 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
12441b489fdSAlexander Duyck 	unsigned char slen;
12541b489fdSAlexander Duyck 	union {
12641b489fdSAlexander Duyck 		/* This list pointer if valid if (pos | bits) == 0 (LEAF) */
12779e5ad2cSAlexander Duyck 		struct hlist_head leaf;
12841b489fdSAlexander Duyck 		/* This array is valid if (pos | bits) > 0 (TNODE) */
129764f8485SKees Cook 		DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode);
13019baf839SRobert Olsson 	};
131adaf9816SAlexander Duyck };
13219baf839SRobert Olsson 
133dc35dbedSAlexander Duyck struct tnode {
13456ca2adfSAlexander Duyck 	struct rcu_head rcu;
1356e22d174SAlexander Duyck 	t_key empty_children;		/* KEYLENGTH bits needed */
1366e22d174SAlexander Duyck 	t_key full_children;		/* KEYLENGTH bits needed */
137f23e59fbSAlexander Duyck 	struct key_vector __rcu *parent;
138dc35dbedSAlexander Duyck 	struct key_vector kv[1];
13956ca2adfSAlexander Duyck #define tn_bits kv[0].bits
140dc35dbedSAlexander Duyck };
141dc35dbedSAlexander Duyck 
142dc35dbedSAlexander Duyck #define TNODE_SIZE(n)	offsetof(struct tnode, kv[0].tnode[n])
14341b489fdSAlexander Duyck #define LEAF_SIZE	TNODE_SIZE(1)
14441b489fdSAlexander Duyck 
14519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
14619baf839SRobert Olsson struct trie_use_stats {
14719baf839SRobert Olsson 	unsigned int gets;
14819baf839SRobert Olsson 	unsigned int backtrack;
14919baf839SRobert Olsson 	unsigned int semantic_match_passed;
15019baf839SRobert Olsson 	unsigned int semantic_match_miss;
15119baf839SRobert Olsson 	unsigned int null_node_hit;
1522f36895aSRobert Olsson 	unsigned int resize_node_skipped;
15319baf839SRobert Olsson };
15419baf839SRobert Olsson #endif
15519baf839SRobert Olsson 
15619baf839SRobert Olsson struct trie_stat {
15719baf839SRobert Olsson 	unsigned int totdepth;
15819baf839SRobert Olsson 	unsigned int maxdepth;
15919baf839SRobert Olsson 	unsigned int tnodes;
16019baf839SRobert Olsson 	unsigned int leaves;
16119baf839SRobert Olsson 	unsigned int nullpointers;
16293672292SStephen Hemminger 	unsigned int prefixes;
16306ef921dSRobert Olsson 	unsigned int nodesizes[MAX_STAT_DEPTH];
16419baf839SRobert Olsson };
16519baf839SRobert Olsson 
16619baf839SRobert Olsson struct trie {
16788bae714SAlexander Duyck 	struct key_vector kv[1];
16819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
1698274a97aSAlexander Duyck 	struct trie_use_stats __percpu *stats;
17019baf839SRobert Olsson #endif
17119baf839SRobert Olsson };
17219baf839SRobert Olsson 
17388bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn);
1749ab948a9SDavid Ahern static unsigned int tnode_free_size;
175c3059477SJarek Poplawski 
176c3059477SJarek Poplawski /*
1779ab948a9SDavid Ahern  * synchronize_rcu after call_rcu for outstanding dirty memory; it should be
1789ab948a9SDavid Ahern  * especially useful before resizing the root node with PREEMPT_NONE configs;
1799ab948a9SDavid Ahern  * the value was obtained experimentally, aiming to avoid visible slowdown.
180c3059477SJarek Poplawski  */
1819ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem = 512 * 1024;
1829ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
1839ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
18419baf839SRobert Olsson 
18508009a76SAlexey Dobriyan static struct kmem_cache *fn_alias_kmem __ro_after_init;
18608009a76SAlexey Dobriyan static struct kmem_cache *trie_leaf_kmem __ro_after_init;
18719baf839SRobert Olsson 
tn_info(struct key_vector * kv)18856ca2adfSAlexander Duyck static inline struct tnode *tn_info(struct key_vector *kv)
18956ca2adfSAlexander Duyck {
19056ca2adfSAlexander Duyck 	return container_of(kv, struct tnode, kv[0]);
19156ca2adfSAlexander Duyck }
19256ca2adfSAlexander Duyck 
19364c9b6fbSAlexander Duyck /* caller must hold RTNL */
194f23e59fbSAlexander Duyck #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
195754baf8dSAlexander Duyck #define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
1960a5c0475SEric Dumazet 
19764c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */
198f23e59fbSAlexander Duyck #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
199754baf8dSAlexander Duyck #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
2000a5c0475SEric Dumazet 
20164c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */
node_set_parent(struct key_vector * n,struct key_vector * tp)20235c6edacSAlexander Duyck static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
20306801916SStephen Hemminger {
204adaf9816SAlexander Duyck 	if (n)
205f23e59fbSAlexander Duyck 		rcu_assign_pointer(tn_info(n)->parent, tp);
20664c9b6fbSAlexander Duyck }
20764c9b6fbSAlexander Duyck 
208f23e59fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
20964c9b6fbSAlexander Duyck 
21064c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a
21164c9b6fbSAlexander Duyck  * leaf this will return 0 meaning none of the children are accessible.
21264c9b6fbSAlexander Duyck  */
child_length(const struct key_vector * tn)2132e1ac88aSAlexander Duyck static inline unsigned long child_length(const struct key_vector *tn)
21464c9b6fbSAlexander Duyck {
21564c9b6fbSAlexander Duyck 	return (1ul << tn->bits) & ~(1ul);
21606801916SStephen Hemminger }
2172373ce1cSRobert Olsson 
21888bae714SAlexander Duyck #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
21988bae714SAlexander Duyck 
get_index(t_key key,struct key_vector * kv)2202e1ac88aSAlexander Duyck static inline unsigned long get_index(t_key key, struct key_vector *kv)
2212e1ac88aSAlexander Duyck {
2222e1ac88aSAlexander Duyck 	unsigned long index = key ^ kv->key;
2232e1ac88aSAlexander Duyck 
22488bae714SAlexander Duyck 	if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
22588bae714SAlexander Duyck 		return 0;
22688bae714SAlexander Duyck 
2272e1ac88aSAlexander Duyck 	return index >> kv->pos;
2282e1ac88aSAlexander Duyck }
2292e1ac88aSAlexander Duyck 
230e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is
231e9b44019SAlexander Duyck  * necessary. Every node in the trie has a key associated with it, but not
232e9b44019SAlexander Duyck  * all of the bits in that key are significant.
233e9b44019SAlexander Duyck  *
234e9b44019SAlexander Duyck  * Consider a node 'n' and its parent 'tp'.
235e9b44019SAlexander Duyck  *
236e9b44019SAlexander Duyck  * If n is a leaf, every bit in its key is significant. Its presence is
237e9b44019SAlexander Duyck  * necessitated by path compression, since during a tree traversal (when
238e9b44019SAlexander Duyck  * searching for a leaf - unless we are doing an insertion) we will completely
239e9b44019SAlexander Duyck  * ignore all skipped bits we encounter. Thus we need to verify, at the end of
240e9b44019SAlexander Duyck  * a potentially successful search, that we have indeed been walking the
241e9b44019SAlexander Duyck  * correct key path.
242e9b44019SAlexander Duyck  *
243e9b44019SAlexander Duyck  * Note that we can never "miss" the correct key in the tree if present by
244e9b44019SAlexander Duyck  * following the wrong path. Path compression ensures that segments of the key
245e9b44019SAlexander Duyck  * that are the same for all keys with a given prefix are skipped, but the
246e9b44019SAlexander Duyck  * skipped part *is* identical for each node in the subtrie below the skipped
247e9b44019SAlexander Duyck  * bit! trie_insert() in this implementation takes care of that.
248e9b44019SAlexander Duyck  *
249e9b44019SAlexander Duyck  * if n is an internal node - a 'tnode' here, the various parts of its key
250e9b44019SAlexander Duyck  * have many different meanings.
251e9b44019SAlexander Duyck  *
252e9b44019SAlexander Duyck  * Example:
253e9b44019SAlexander Duyck  * _________________________________________________________________
254e9b44019SAlexander Duyck  * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
255e9b44019SAlexander Duyck  * -----------------------------------------------------------------
256e9b44019SAlexander Duyck  *  31  30  29  28  27  26  25  24  23  22  21  20  19  18  17  16
257e9b44019SAlexander Duyck  *
258e9b44019SAlexander Duyck  * _________________________________________________________________
259e9b44019SAlexander Duyck  * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
260e9b44019SAlexander Duyck  * -----------------------------------------------------------------
261e9b44019SAlexander Duyck  *  15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0
262e9b44019SAlexander Duyck  *
263e9b44019SAlexander Duyck  * tp->pos = 22
264e9b44019SAlexander Duyck  * tp->bits = 3
265e9b44019SAlexander Duyck  * n->pos = 13
266e9b44019SAlexander Duyck  * n->bits = 4
267e9b44019SAlexander Duyck  *
268e9b44019SAlexander Duyck  * First, let's just ignore the bits that come before the parent tp, that is
269e9b44019SAlexander Duyck  * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
270e9b44019SAlexander Duyck  * point we do not use them for anything.
271e9b44019SAlexander Duyck  *
272e9b44019SAlexander Duyck  * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
273e9b44019SAlexander Duyck  * index into the parent's child array. That is, they will be used to find
274e9b44019SAlexander Duyck  * 'n' among tp's children.
275e9b44019SAlexander Duyck  *
27698a384ecSXunlei Pang  * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits
277e9b44019SAlexander Duyck  * for the node n.
278e9b44019SAlexander Duyck  *
279e9b44019SAlexander Duyck  * All the bits we have seen so far are significant to the node n. The rest
280e9b44019SAlexander Duyck  * of the bits are really not needed or indeed known in n->key.
281e9b44019SAlexander Duyck  *
282e9b44019SAlexander Duyck  * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
283e9b44019SAlexander Duyck  * n's child array, and will of course be different for each child.
284e9b44019SAlexander Duyck  *
28598a384ecSXunlei Pang  * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown
286e9b44019SAlexander Duyck  * at this point.
28719baf839SRobert Olsson  */
28819baf839SRobert Olsson 
289f5026fabSDenis V. Lunev static const int halve_threshold = 25;
290f5026fabSDenis V. Lunev static const int inflate_threshold = 50;
291345aa031SJarek Poplawski static const int halve_threshold_root = 15;
29280b71b80SJens Låås static const int inflate_threshold_root = 30;
2932373ce1cSRobert Olsson 
__alias_free_mem(struct rcu_head * head)2942373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head)
2952373ce1cSRobert Olsson {
2962373ce1cSRobert Olsson 	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
2972373ce1cSRobert Olsson 	kmem_cache_free(fn_alias_kmem, fa);
2982373ce1cSRobert Olsson }
2992373ce1cSRobert Olsson 
alias_free_mem_rcu(struct fib_alias * fa)3002373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa)
3012373ce1cSRobert Olsson {
3022373ce1cSRobert Olsson 	call_rcu(&fa->rcu, __alias_free_mem);
3032373ce1cSRobert Olsson }
3042373ce1cSRobert Olsson 
3051de3d87bSAlexander Duyck #define TNODE_VMALLOC_MAX \
30635c6edacSAlexander Duyck 	ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
30737fd30f2SAlexander Duyck 
__node_free_rcu(struct rcu_head * head)30837fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head)
3092373ce1cSRobert Olsson {
31056ca2adfSAlexander Duyck 	struct tnode *n = container_of(head, struct tnode, rcu);
31137fd30f2SAlexander Duyck 
31256ca2adfSAlexander Duyck 	if (!n->tn_bits)
31337fd30f2SAlexander Duyck 		kmem_cache_free(trie_leaf_kmem, n);
31437fd30f2SAlexander Duyck 	else
3151d5cfdb0STetsuo Handa 		kvfree(n);
3162373ce1cSRobert Olsson }
3172373ce1cSRobert Olsson 
31856ca2adfSAlexander Duyck #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
319387a5487SStephen Hemminger 
tnode_alloc(int bits)320dc35dbedSAlexander Duyck static struct tnode *tnode_alloc(int bits)
3212373ce1cSRobert Olsson {
3221de3d87bSAlexander Duyck 	size_t size;
3231de3d87bSAlexander Duyck 
3241de3d87bSAlexander Duyck 	/* verify bits is within bounds */
3251de3d87bSAlexander Duyck 	if (bits > TNODE_VMALLOC_MAX)
3261de3d87bSAlexander Duyck 		return NULL;
3271de3d87bSAlexander Duyck 
3281de3d87bSAlexander Duyck 	/* determine size and verify it is non-zero and didn't overflow */
3291de3d87bSAlexander Duyck 	size = TNODE_SIZE(1ul << bits);
3301de3d87bSAlexander Duyck 
3312373ce1cSRobert Olsson 	if (size <= PAGE_SIZE)
3328d965444SEric Dumazet 		return kzalloc(size, GFP_KERNEL);
33315be75cdSStephen Hemminger 	else
3347a1c8e5aSEric Dumazet 		return vzalloc(size);
33515be75cdSStephen Hemminger }
3362373ce1cSRobert Olsson 
empty_child_inc(struct key_vector * n)33735c6edacSAlexander Duyck static inline void empty_child_inc(struct key_vector *n)
33895f60ea3SAlexander Duyck {
33925cec756SMatthias Kaehlcke 	tn_info(n)->empty_children++;
34025cec756SMatthias Kaehlcke 
34125cec756SMatthias Kaehlcke 	if (!tn_info(n)->empty_children)
34225cec756SMatthias Kaehlcke 		tn_info(n)->full_children++;
34395f60ea3SAlexander Duyck }
34495f60ea3SAlexander Duyck 
empty_child_dec(struct key_vector * n)34535c6edacSAlexander Duyck static inline void empty_child_dec(struct key_vector *n)
34695f60ea3SAlexander Duyck {
34725cec756SMatthias Kaehlcke 	if (!tn_info(n)->empty_children)
34825cec756SMatthias Kaehlcke 		tn_info(n)->full_children--;
34925cec756SMatthias Kaehlcke 
35025cec756SMatthias Kaehlcke 	tn_info(n)->empty_children--;
35195f60ea3SAlexander Duyck }
35295f60ea3SAlexander Duyck 
leaf_new(t_key key,struct fib_alias * fa)35335c6edacSAlexander Duyck static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
35419baf839SRobert Olsson {
355f38b24c9SFiro Yang 	struct key_vector *l;
356f38b24c9SFiro Yang 	struct tnode *kv;
357dc35dbedSAlexander Duyck 
358f38b24c9SFiro Yang 	kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
359dc35dbedSAlexander Duyck 	if (!kv)
360dc35dbedSAlexander Duyck 		return NULL;
361dc35dbedSAlexander Duyck 
362dc35dbedSAlexander Duyck 	/* initialize key vector */
363f38b24c9SFiro Yang 	l = kv->kv;
36464c9b6fbSAlexander Duyck 	l->key = key;
365e9b44019SAlexander Duyck 	l->pos = 0;
36664c9b6fbSAlexander Duyck 	l->bits = 0;
367dc35dbedSAlexander Duyck 	l->slen = fa->fa_slen;
36864c9b6fbSAlexander Duyck 
369d5d6487cSAlexander Duyck 	/* link leaf to fib alias */
37079e5ad2cSAlexander Duyck 	INIT_HLIST_HEAD(&l->leaf);
371d5d6487cSAlexander Duyck 	hlist_add_head(&fa->fa_list, &l->leaf);
372dc35dbedSAlexander Duyck 
37319baf839SRobert Olsson 	return l;
37419baf839SRobert Olsson }
37519baf839SRobert Olsson 
tnode_new(t_key key,int pos,int bits)37635c6edacSAlexander Duyck static struct key_vector *tnode_new(t_key key, int pos, int bits)
37719baf839SRobert Olsson {
37864c9b6fbSAlexander Duyck 	unsigned int shift = pos + bits;
379f38b24c9SFiro Yang 	struct key_vector *tn;
380f38b24c9SFiro Yang 	struct tnode *tnode;
38164c9b6fbSAlexander Duyck 
38264c9b6fbSAlexander Duyck 	/* verify bits and pos their msb bits clear and values are valid */
38364c9b6fbSAlexander Duyck 	BUG_ON(!bits || (shift > KEYLENGTH));
38419baf839SRobert Olsson 
385f38b24c9SFiro Yang 	tnode = tnode_alloc(bits);
386dc35dbedSAlexander Duyck 	if (!tnode)
387dc35dbedSAlexander Duyck 		return NULL;
388dc35dbedSAlexander Duyck 
389f38b24c9SFiro Yang 	pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
390f38b24c9SFiro Yang 		 sizeof(struct key_vector *) << bits);
391f38b24c9SFiro Yang 
39295f60ea3SAlexander Duyck 	if (bits == KEYLENGTH)
3936e22d174SAlexander Duyck 		tnode->full_children = 1;
39495f60ea3SAlexander Duyck 	else
3956e22d174SAlexander Duyck 		tnode->empty_children = 1ul << bits;
396c877efb2SStephen Hemminger 
397f38b24c9SFiro Yang 	tn = tnode->kv;
398dc35dbedSAlexander Duyck 	tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
399dc35dbedSAlexander Duyck 	tn->pos = pos;
400dc35dbedSAlexander Duyck 	tn->bits = bits;
401dc35dbedSAlexander Duyck 	tn->slen = pos;
402dc35dbedSAlexander Duyck 
40319baf839SRobert Olsson 	return tn;
40419baf839SRobert Olsson }
40519baf839SRobert Olsson 
406e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node
40719baf839SRobert Olsson  * and no bits are skipped. See discussion in dyntree paper p. 6
40819baf839SRobert Olsson  */
tnode_full(struct key_vector * tn,struct key_vector * n)40935c6edacSAlexander Duyck static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
41019baf839SRobert Olsson {
411e9b44019SAlexander Duyck 	return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
41219baf839SRobert Olsson }
41319baf839SRobert Olsson 
414ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value.
41519baf839SRobert Olsson  * Update the value of full_children and empty_children.
41619baf839SRobert Olsson  */
put_child(struct key_vector * tn,unsigned long i,struct key_vector * n)41735c6edacSAlexander Duyck static void put_child(struct key_vector *tn, unsigned long i,
41835c6edacSAlexander Duyck 		      struct key_vector *n)
41919baf839SRobert Olsson {
420754baf8dSAlexander Duyck 	struct key_vector *chi = get_child(tn, i);
421ff181ed8SAlexander Duyck 	int isfull, wasfull;
42219baf839SRobert Olsson 
4232e1ac88aSAlexander Duyck 	BUG_ON(i >= child_length(tn));
4240c7770c7SStephen Hemminger 
42595f60ea3SAlexander Duyck 	/* update emptyChildren, overflow into fullChildren */
42600db4124SIan Morris 	if (!n && chi)
42795f60ea3SAlexander Duyck 		empty_child_inc(tn);
42800db4124SIan Morris 	if (n && !chi)
42995f60ea3SAlexander Duyck 		empty_child_dec(tn);
43019baf839SRobert Olsson 
43119baf839SRobert Olsson 	/* update fullChildren */
43219baf839SRobert Olsson 	wasfull = tnode_full(tn, chi);
43319baf839SRobert Olsson 	isfull = tnode_full(tn, n);
434ff181ed8SAlexander Duyck 
43519baf839SRobert Olsson 	if (wasfull && !isfull)
4366e22d174SAlexander Duyck 		tn_info(tn)->full_children--;
43719baf839SRobert Olsson 	else if (!wasfull && isfull)
4386e22d174SAlexander Duyck 		tn_info(tn)->full_children++;
43991b9a277SOlof Johansson 
4405405afd1SAlexander Duyck 	if (n && (tn->slen < n->slen))
4415405afd1SAlexander Duyck 		tn->slen = n->slen;
4425405afd1SAlexander Duyck 
44341b489fdSAlexander Duyck 	rcu_assign_pointer(tn->tnode[i], n);
44419baf839SRobert Olsson }
44519baf839SRobert Olsson 
update_children(struct key_vector * tn)44635c6edacSAlexander Duyck static void update_children(struct key_vector *tn)
44769fa57b1SAlexander Duyck {
44869fa57b1SAlexander Duyck 	unsigned long i;
44969fa57b1SAlexander Duyck 
45069fa57b1SAlexander Duyck 	/* update all of the child parent pointers */
4512e1ac88aSAlexander Duyck 	for (i = child_length(tn); i;) {
452754baf8dSAlexander Duyck 		struct key_vector *inode = get_child(tn, --i);
45369fa57b1SAlexander Duyck 
45469fa57b1SAlexander Duyck 		if (!inode)
45569fa57b1SAlexander Duyck 			continue;
45669fa57b1SAlexander Duyck 
45769fa57b1SAlexander Duyck 		/* Either update the children of a tnode that
45869fa57b1SAlexander Duyck 		 * already belongs to us or update the child
45969fa57b1SAlexander Duyck 		 * to point to ourselves.
46069fa57b1SAlexander Duyck 		 */
46169fa57b1SAlexander Duyck 		if (node_parent(inode) == tn)
46269fa57b1SAlexander Duyck 			update_children(inode);
46369fa57b1SAlexander Duyck 		else
46469fa57b1SAlexander Duyck 			node_set_parent(inode, tn);
46569fa57b1SAlexander Duyck 	}
46669fa57b1SAlexander Duyck }
46769fa57b1SAlexander Duyck 
put_child_root(struct key_vector * tp,t_key key,struct key_vector * n)46888bae714SAlexander Duyck static inline void put_child_root(struct key_vector *tp, t_key key,
46988bae714SAlexander Duyck 				  struct key_vector *n)
470836a0123SAlexander Duyck {
47188bae714SAlexander Duyck 	if (IS_TRIE(tp))
47288bae714SAlexander Duyck 		rcu_assign_pointer(tp->tnode[0], n);
473836a0123SAlexander Duyck 	else
47488bae714SAlexander Duyck 		put_child(tp, get_index(key, tp), n);
475836a0123SAlexander Duyck }
476836a0123SAlexander Duyck 
tnode_free_init(struct key_vector * tn)47735c6edacSAlexander Duyck static inline void tnode_free_init(struct key_vector *tn)
4780a5c0475SEric Dumazet {
47956ca2adfSAlexander Duyck 	tn_info(tn)->rcu.next = NULL;
4800a5c0475SEric Dumazet }
481fc86a93bSAlexander Duyck 
tnode_free_append(struct key_vector * tn,struct key_vector * n)48235c6edacSAlexander Duyck static inline void tnode_free_append(struct key_vector *tn,
48335c6edacSAlexander Duyck 				     struct key_vector *n)
484fc86a93bSAlexander Duyck {
48556ca2adfSAlexander Duyck 	tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
48656ca2adfSAlexander Duyck 	tn_info(tn)->rcu.next = &tn_info(n)->rcu;
487fc86a93bSAlexander Duyck }
488fc86a93bSAlexander Duyck 
tnode_free(struct key_vector * tn)48935c6edacSAlexander Duyck static void tnode_free(struct key_vector *tn)
490fc86a93bSAlexander Duyck {
49156ca2adfSAlexander Duyck 	struct callback_head *head = &tn_info(tn)->rcu;
492fc86a93bSAlexander Duyck 
493fc86a93bSAlexander Duyck 	while (head) {
494fc86a93bSAlexander Duyck 		head = head->next;
49541b489fdSAlexander Duyck 		tnode_free_size += TNODE_SIZE(1ul << tn->bits);
49637fd30f2SAlexander Duyck 		node_free(tn);
497fc86a93bSAlexander Duyck 
49856ca2adfSAlexander Duyck 		tn = container_of(head, struct tnode, rcu)->kv;
499fc86a93bSAlexander Duyck 	}
500fc86a93bSAlexander Duyck 
50173318c4bSKuniyuki Iwashima 	if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
502fc86a93bSAlexander Duyck 		tnode_free_size = 0;
503fc86a93bSAlexander Duyck 		synchronize_rcu();
504fc86a93bSAlexander Duyck 	}
5050a5c0475SEric Dumazet }
5060a5c0475SEric Dumazet 
replace(struct trie * t,struct key_vector * oldtnode,struct key_vector * tn)50788bae714SAlexander Duyck static struct key_vector *replace(struct trie *t,
50835c6edacSAlexander Duyck 				  struct key_vector *oldtnode,
50935c6edacSAlexander Duyck 				  struct key_vector *tn)
51069fa57b1SAlexander Duyck {
51135c6edacSAlexander Duyck 	struct key_vector *tp = node_parent(oldtnode);
51269fa57b1SAlexander Duyck 	unsigned long i;
51369fa57b1SAlexander Duyck 
51469fa57b1SAlexander Duyck 	/* setup the parent pointer out of and back into this node */
51569fa57b1SAlexander Duyck 	NODE_INIT_PARENT(tn, tp);
51688bae714SAlexander Duyck 	put_child_root(tp, tn->key, tn);
51769fa57b1SAlexander Duyck 
51869fa57b1SAlexander Duyck 	/* update all of the child parent pointers */
51969fa57b1SAlexander Duyck 	update_children(tn);
52069fa57b1SAlexander Duyck 
52169fa57b1SAlexander Duyck 	/* all pointers should be clean so we are done */
52269fa57b1SAlexander Duyck 	tnode_free(oldtnode);
52369fa57b1SAlexander Duyck 
52469fa57b1SAlexander Duyck 	/* resize children now that oldtnode is freed */
5252e1ac88aSAlexander Duyck 	for (i = child_length(tn); i;) {
526754baf8dSAlexander Duyck 		struct key_vector *inode = get_child(tn, --i);
52769fa57b1SAlexander Duyck 
52869fa57b1SAlexander Duyck 		/* resize child node */
52969fa57b1SAlexander Duyck 		if (tnode_full(tn, inode))
53088bae714SAlexander Duyck 			tn = resize(t, inode);
53169fa57b1SAlexander Duyck 	}
5328d8e810cSAlexander Duyck 
53388bae714SAlexander Duyck 	return tp;
53469fa57b1SAlexander Duyck }
53569fa57b1SAlexander Duyck 
inflate(struct trie * t,struct key_vector * oldtnode)53688bae714SAlexander Duyck static struct key_vector *inflate(struct trie *t,
53735c6edacSAlexander Duyck 				  struct key_vector *oldtnode)
53819baf839SRobert Olsson {
53935c6edacSAlexander Duyck 	struct key_vector *tn;
54069fa57b1SAlexander Duyck 	unsigned long i;
541e9b44019SAlexander Duyck 	t_key m;
54219baf839SRobert Olsson 
5430c7770c7SStephen Hemminger 	pr_debug("In inflate\n");
54419baf839SRobert Olsson 
545e9b44019SAlexander Duyck 	tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
5462f80b3c8SRobert Olsson 	if (!tn)
5478d8e810cSAlexander Duyck 		goto notnode;
5482f36895aSRobert Olsson 
54969fa57b1SAlexander Duyck 	/* prepare oldtnode to be freed */
55069fa57b1SAlexander Duyck 	tnode_free_init(oldtnode);
55169fa57b1SAlexander Duyck 
55212c081a5SAlexander Duyck 	/* Assemble all of the pointers in our cluster, in this case that
55312c081a5SAlexander Duyck 	 * represents all of the pointers out of our allocated nodes that
55412c081a5SAlexander Duyck 	 * point to existing tnodes and the links between our allocated
55512c081a5SAlexander Duyck 	 * nodes.
5562f36895aSRobert Olsson 	 */
5572e1ac88aSAlexander Duyck 	for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
558754baf8dSAlexander Duyck 		struct key_vector *inode = get_child(oldtnode, --i);
55935c6edacSAlexander Duyck 		struct key_vector *node0, *node1;
56069fa57b1SAlexander Duyck 		unsigned long j, k;
56119baf839SRobert Olsson 
56219baf839SRobert Olsson 		/* An empty child */
56351456b29SIan Morris 		if (!inode)
56419baf839SRobert Olsson 			continue;
56519baf839SRobert Olsson 
56619baf839SRobert Olsson 		/* A leaf or an internal node with skipped bits */
567adaf9816SAlexander Duyck 		if (!tnode_full(oldtnode, inode)) {
568e9b44019SAlexander Duyck 			put_child(tn, get_index(inode->key, tn), inode);
56919baf839SRobert Olsson 			continue;
57019baf839SRobert Olsson 		}
57119baf839SRobert Olsson 
57269fa57b1SAlexander Duyck 		/* drop the node in the old tnode free list */
57369fa57b1SAlexander Duyck 		tnode_free_append(oldtnode, inode);
57469fa57b1SAlexander Duyck 
57519baf839SRobert Olsson 		/* An internal node with two children */
57619baf839SRobert Olsson 		if (inode->bits == 1) {
577754baf8dSAlexander Duyck 			put_child(tn, 2 * i + 1, get_child(inode, 1));
578754baf8dSAlexander Duyck 			put_child(tn, 2 * i, get_child(inode, 0));
57991b9a277SOlof Johansson 			continue;
58019baf839SRobert Olsson 		}
58119baf839SRobert Olsson 
58219baf839SRobert Olsson 		/* We will replace this node 'inode' with two new
58312c081a5SAlexander Duyck 		 * ones, 'node0' and 'node1', each with half of the
58419baf839SRobert Olsson 		 * original children. The two new nodes will have
58519baf839SRobert Olsson 		 * a position one bit further down the key and this
58619baf839SRobert Olsson 		 * means that the "significant" part of their keys
58719baf839SRobert Olsson 		 * (see the discussion near the top of this file)
58819baf839SRobert Olsson 		 * will differ by one bit, which will be "0" in
58912c081a5SAlexander Duyck 		 * node0's key and "1" in node1's key. Since we are
59019baf839SRobert Olsson 		 * moving the key position by one step, the bit that
59119baf839SRobert Olsson 		 * we are moving away from - the bit at position
59212c081a5SAlexander Duyck 		 * (tn->pos) - is the one that will differ between
59312c081a5SAlexander Duyck 		 * node0 and node1. So... we synthesize that bit in the
59419baf839SRobert Olsson 		 * two new keys.
59519baf839SRobert Olsson 		 */
59612c081a5SAlexander Duyck 		node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
59712c081a5SAlexander Duyck 		if (!node1)
59812c081a5SAlexander Duyck 			goto nomem;
59969fa57b1SAlexander Duyck 		node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
60019baf839SRobert Olsson 
60169fa57b1SAlexander Duyck 		tnode_free_append(tn, node1);
60212c081a5SAlexander Duyck 		if (!node0)
60312c081a5SAlexander Duyck 			goto nomem;
60412c081a5SAlexander Duyck 		tnode_free_append(tn, node0);
6052f36895aSRobert Olsson 
60612c081a5SAlexander Duyck 		/* populate child pointers in new nodes */
6072e1ac88aSAlexander Duyck 		for (k = child_length(inode), j = k / 2; j;) {
608754baf8dSAlexander Duyck 			put_child(node1, --j, get_child(inode, --k));
609754baf8dSAlexander Duyck 			put_child(node0, j, get_child(inode, j));
610754baf8dSAlexander Duyck 			put_child(node1, --j, get_child(inode, --k));
611754baf8dSAlexander Duyck 			put_child(node0, j, get_child(inode, j));
61219baf839SRobert Olsson 		}
613ff181ed8SAlexander Duyck 
61412c081a5SAlexander Duyck 		/* link new nodes to parent */
61512c081a5SAlexander Duyck 		NODE_INIT_PARENT(node1, tn);
61612c081a5SAlexander Duyck 		NODE_INIT_PARENT(node0, tn);
61712c081a5SAlexander Duyck 
61812c081a5SAlexander Duyck 		/* link parent to nodes */
61912c081a5SAlexander Duyck 		put_child(tn, 2 * i + 1, node1);
62012c081a5SAlexander Duyck 		put_child(tn, 2 * i, node0);
62112c081a5SAlexander Duyck 	}
62212c081a5SAlexander Duyck 
62369fa57b1SAlexander Duyck 	/* setup the parent pointers into and out of this node */
6248d8e810cSAlexander Duyck 	return replace(t, oldtnode, tn);
625ff181ed8SAlexander Duyck nomem:
626fc86a93bSAlexander Duyck 	/* all pointers should be clean so we are done */
627fc86a93bSAlexander Duyck 	tnode_free(tn);
6288d8e810cSAlexander Duyck notnode:
6298d8e810cSAlexander Duyck 	return NULL;
630ff181ed8SAlexander Duyck }
631ff181ed8SAlexander Duyck 
halve(struct trie * t,struct key_vector * oldtnode)63288bae714SAlexander Duyck static struct key_vector *halve(struct trie *t,
63335c6edacSAlexander Duyck 				struct key_vector *oldtnode)
63419baf839SRobert Olsson {
63535c6edacSAlexander Duyck 	struct key_vector *tn;
63612c081a5SAlexander Duyck 	unsigned long i;
63719baf839SRobert Olsson 
6380c7770c7SStephen Hemminger 	pr_debug("In halve\n");
63919baf839SRobert Olsson 
640e9b44019SAlexander Duyck 	tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
6412f80b3c8SRobert Olsson 	if (!tn)
6428d8e810cSAlexander Duyck 		goto notnode;
6432f36895aSRobert Olsson 
64469fa57b1SAlexander Duyck 	/* prepare oldtnode to be freed */
64569fa57b1SAlexander Duyck 	tnode_free_init(oldtnode);
64669fa57b1SAlexander Duyck 
64712c081a5SAlexander Duyck 	/* Assemble all of the pointers in our cluster, in this case that
64812c081a5SAlexander Duyck 	 * represents all of the pointers out of our allocated nodes that
64912c081a5SAlexander Duyck 	 * point to existing tnodes and the links between our allocated
65012c081a5SAlexander Duyck 	 * nodes.
6512f36895aSRobert Olsson 	 */
6522e1ac88aSAlexander Duyck 	for (i = child_length(oldtnode); i;) {
653754baf8dSAlexander Duyck 		struct key_vector *node1 = get_child(oldtnode, --i);
654754baf8dSAlexander Duyck 		struct key_vector *node0 = get_child(oldtnode, --i);
65535c6edacSAlexander Duyck 		struct key_vector *inode;
6562f36895aSRobert Olsson 
65712c081a5SAlexander Duyck 		/* At least one of the children is empty */
65812c081a5SAlexander Duyck 		if (!node1 || !node0) {
65912c081a5SAlexander Duyck 			put_child(tn, i / 2, node1 ? : node0);
66012c081a5SAlexander Duyck 			continue;
66112c081a5SAlexander Duyck 		}
6622f36895aSRobert Olsson 
6632f36895aSRobert Olsson 		/* Two nonempty children */
66412c081a5SAlexander Duyck 		inode = tnode_new(node0->key, oldtnode->pos, 1);
6658d8e810cSAlexander Duyck 		if (!inode)
6668d8e810cSAlexander Duyck 			goto nomem;
66712c081a5SAlexander Duyck 		tnode_free_append(tn, inode);
6682f80b3c8SRobert Olsson 
66912c081a5SAlexander Duyck 		/* initialize pointers out of node */
67012c081a5SAlexander Duyck 		put_child(inode, 1, node1);
67112c081a5SAlexander Duyck 		put_child(inode, 0, node0);
67212c081a5SAlexander Duyck 		NODE_INIT_PARENT(inode, tn);
67312c081a5SAlexander Duyck 
67412c081a5SAlexander Duyck 		/* link parent to node */
67512c081a5SAlexander Duyck 		put_child(tn, i / 2, inode);
6762f36895aSRobert Olsson 	}
6772f36895aSRobert Olsson 
67869fa57b1SAlexander Duyck 	/* setup the parent pointers into and out of this node */
6798d8e810cSAlexander Duyck 	return replace(t, oldtnode, tn);
6808d8e810cSAlexander Duyck nomem:
6818d8e810cSAlexander Duyck 	/* all pointers should be clean so we are done */
6828d8e810cSAlexander Duyck 	tnode_free(tn);
6838d8e810cSAlexander Duyck notnode:
6848d8e810cSAlexander Duyck 	return NULL;
6852f80b3c8SRobert Olsson }
68619baf839SRobert Olsson 
collapse(struct trie * t,struct key_vector * oldtnode)68788bae714SAlexander Duyck static struct key_vector *collapse(struct trie *t,
68888bae714SAlexander Duyck 				   struct key_vector *oldtnode)
68995f60ea3SAlexander Duyck {
69035c6edacSAlexander Duyck 	struct key_vector *n, *tp;
69195f60ea3SAlexander Duyck 	unsigned long i;
69295f60ea3SAlexander Duyck 
69395f60ea3SAlexander Duyck 	/* scan the tnode looking for that one child that might still exist */
6942e1ac88aSAlexander Duyck 	for (n = NULL, i = child_length(oldtnode); !n && i;)
695754baf8dSAlexander Duyck 		n = get_child(oldtnode, --i);
69695f60ea3SAlexander Duyck 
69795f60ea3SAlexander Duyck 	/* compress one level */
69895f60ea3SAlexander Duyck 	tp = node_parent(oldtnode);
69988bae714SAlexander Duyck 	put_child_root(tp, oldtnode->key, n);
70095f60ea3SAlexander Duyck 	node_set_parent(n, tp);
70195f60ea3SAlexander Duyck 
70295f60ea3SAlexander Duyck 	/* drop dead node */
70395f60ea3SAlexander Duyck 	node_free(oldtnode);
70488bae714SAlexander Duyck 
70588bae714SAlexander Duyck 	return tp;
70695f60ea3SAlexander Duyck }
70795f60ea3SAlexander Duyck 
update_suffix(struct key_vector * tn)70835c6edacSAlexander Duyck static unsigned char update_suffix(struct key_vector *tn)
7095405afd1SAlexander Duyck {
7105405afd1SAlexander Duyck 	unsigned char slen = tn->pos;
7115405afd1SAlexander Duyck 	unsigned long stride, i;
712a52ca62cSAlexander Duyck 	unsigned char slen_max;
713a52ca62cSAlexander Duyck 
714a52ca62cSAlexander Duyck 	/* only vector 0 can have a suffix length greater than or equal to
715a52ca62cSAlexander Duyck 	 * tn->pos + tn->bits, the second highest node will have a suffix
716a52ca62cSAlexander Duyck 	 * length at most of tn->pos + tn->bits - 1
717a52ca62cSAlexander Duyck 	 */
718a52ca62cSAlexander Duyck 	slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
7195405afd1SAlexander Duyck 
7205405afd1SAlexander Duyck 	/* search though the list of children looking for nodes that might
7215405afd1SAlexander Duyck 	 * have a suffix greater than the one we currently have.  This is
7225405afd1SAlexander Duyck 	 * why we start with a stride of 2 since a stride of 1 would
7235405afd1SAlexander Duyck 	 * represent the nodes with suffix length equal to tn->pos
7245405afd1SAlexander Duyck 	 */
7252e1ac88aSAlexander Duyck 	for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
726754baf8dSAlexander Duyck 		struct key_vector *n = get_child(tn, i);
7275405afd1SAlexander Duyck 
7285405afd1SAlexander Duyck 		if (!n || (n->slen <= slen))
7295405afd1SAlexander Duyck 			continue;
7305405afd1SAlexander Duyck 
7315405afd1SAlexander Duyck 		/* update stride and slen based on new value */
7325405afd1SAlexander Duyck 		stride <<= (n->slen - slen);
7335405afd1SAlexander Duyck 		slen = n->slen;
7345405afd1SAlexander Duyck 		i &= ~(stride - 1);
7355405afd1SAlexander Duyck 
736a52ca62cSAlexander Duyck 		/* stop searching if we have hit the maximum possible value */
737a52ca62cSAlexander Duyck 		if (slen >= slen_max)
7385405afd1SAlexander Duyck 			break;
7395405afd1SAlexander Duyck 	}
7405405afd1SAlexander Duyck 
7415405afd1SAlexander Duyck 	tn->slen = slen;
7425405afd1SAlexander Duyck 
7435405afd1SAlexander Duyck 	return slen;
7445405afd1SAlexander Duyck }
7455405afd1SAlexander Duyck 
746f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
747cf3637bbSAlexander Duyck  * the Helsinki University of Technology and Matti Tikkanen of Nokia
748cf3637bbSAlexander Duyck  * Telecommunications, page 6:
749cf3637bbSAlexander Duyck  * "A node is doubled if the ratio of non-empty children to all
750cf3637bbSAlexander Duyck  * children in the *doubled* node is at least 'high'."
751cf3637bbSAlexander Duyck  *
752cf3637bbSAlexander Duyck  * 'high' in this instance is the variable 'inflate_threshold'. It
753cf3637bbSAlexander Duyck  * is expressed as a percentage, so we multiply it with
7542e1ac88aSAlexander Duyck  * child_length() and instead of multiplying by 2 (since the
755cf3637bbSAlexander Duyck  * child array will be doubled by inflate()) and multiplying
756cf3637bbSAlexander Duyck  * the left-hand side by 100 (to handle the percentage thing) we
757cf3637bbSAlexander Duyck  * multiply the left-hand side by 50.
758cf3637bbSAlexander Duyck  *
7592e1ac88aSAlexander Duyck  * The left-hand side may look a bit weird: child_length(tn)
760cf3637bbSAlexander Duyck  * - tn->empty_children is of course the number of non-null children
761cf3637bbSAlexander Duyck  * in the current node. tn->full_children is the number of "full"
762cf3637bbSAlexander Duyck  * children, that is non-null tnodes with a skip value of 0.
763cf3637bbSAlexander Duyck  * All of those will be doubled in the resulting inflated tnode, so
764cf3637bbSAlexander Duyck  * we just count them one extra time here.
765cf3637bbSAlexander Duyck  *
766cf3637bbSAlexander Duyck  * A clearer way to write this would be:
767cf3637bbSAlexander Duyck  *
768cf3637bbSAlexander Duyck  * to_be_doubled = tn->full_children;
7692e1ac88aSAlexander Duyck  * not_to_be_doubled = child_length(tn) - tn->empty_children -
770cf3637bbSAlexander Duyck  *     tn->full_children;
771cf3637bbSAlexander Duyck  *
7722e1ac88aSAlexander Duyck  * new_child_length = child_length(tn) * 2;
773cf3637bbSAlexander Duyck  *
774cf3637bbSAlexander Duyck  * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
775cf3637bbSAlexander Duyck  *      new_child_length;
776cf3637bbSAlexander Duyck  * if (new_fill_factor >= inflate_threshold)
777cf3637bbSAlexander Duyck  *
778cf3637bbSAlexander Duyck  * ...and so on, tho it would mess up the while () loop.
779cf3637bbSAlexander Duyck  *
780cf3637bbSAlexander Duyck  * anyway,
781cf3637bbSAlexander Duyck  * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
782cf3637bbSAlexander Duyck  *      inflate_threshold
783cf3637bbSAlexander Duyck  *
784cf3637bbSAlexander Duyck  * avoid a division:
785cf3637bbSAlexander Duyck  * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
786cf3637bbSAlexander Duyck  *      inflate_threshold * new_child_length
787cf3637bbSAlexander Duyck  *
788cf3637bbSAlexander Duyck  * expand not_to_be_doubled and to_be_doubled, and shorten:
7892e1ac88aSAlexander Duyck  * 100 * (child_length(tn) - tn->empty_children +
790cf3637bbSAlexander Duyck  *    tn->full_children) >= inflate_threshold * new_child_length
791cf3637bbSAlexander Duyck  *
792cf3637bbSAlexander Duyck  * expand new_child_length:
7932e1ac88aSAlexander Duyck  * 100 * (child_length(tn) - tn->empty_children +
794cf3637bbSAlexander Duyck  *    tn->full_children) >=
7952e1ac88aSAlexander Duyck  *      inflate_threshold * child_length(tn) * 2
796cf3637bbSAlexander Duyck  *
797cf3637bbSAlexander Duyck  * shorten again:
7982e1ac88aSAlexander Duyck  * 50 * (tn->full_children + child_length(tn) -
799cf3637bbSAlexander Duyck  *    tn->empty_children) >= inflate_threshold *
8002e1ac88aSAlexander Duyck  *    child_length(tn)
801cf3637bbSAlexander Duyck  *
802cf3637bbSAlexander Duyck  */
should_inflate(struct key_vector * tp,struct key_vector * tn)80335c6edacSAlexander Duyck static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
804f05a4819SAlexander Duyck {
8052e1ac88aSAlexander Duyck 	unsigned long used = child_length(tn);
806f05a4819SAlexander Duyck 	unsigned long threshold = used;
807cf3637bbSAlexander Duyck 
808cf3637bbSAlexander Duyck 	/* Keep root node larger */
80988bae714SAlexander Duyck 	threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
8106e22d174SAlexander Duyck 	used -= tn_info(tn)->empty_children;
8116e22d174SAlexander Duyck 	used += tn_info(tn)->full_children;
812cf3637bbSAlexander Duyck 
81395f60ea3SAlexander Duyck 	/* if bits == KEYLENGTH then pos = 0, and will fail below */
81495f60ea3SAlexander Duyck 
81595f60ea3SAlexander Duyck 	return (used > 1) && tn->pos && ((50 * used) >= threshold);
816cf3637bbSAlexander Duyck }
817cf3637bbSAlexander Duyck 
should_halve(struct key_vector * tp,struct key_vector * tn)81835c6edacSAlexander Duyck static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
819f05a4819SAlexander Duyck {
8202e1ac88aSAlexander Duyck 	unsigned long used = child_length(tn);
821f05a4819SAlexander Duyck 	unsigned long threshold = used;
822cf3637bbSAlexander Duyck 
823f05a4819SAlexander Duyck 	/* Keep root node larger */
82488bae714SAlexander Duyck 	threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
8256e22d174SAlexander Duyck 	used -= tn_info(tn)->empty_children;
826f05a4819SAlexander Duyck 
82795f60ea3SAlexander Duyck 	/* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */
82895f60ea3SAlexander Duyck 
82995f60ea3SAlexander Duyck 	return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
83095f60ea3SAlexander Duyck }
83195f60ea3SAlexander Duyck 
should_collapse(struct key_vector * tn)83235c6edacSAlexander Duyck static inline bool should_collapse(struct key_vector *tn)
83395f60ea3SAlexander Duyck {
8342e1ac88aSAlexander Duyck 	unsigned long used = child_length(tn);
83595f60ea3SAlexander Duyck 
8366e22d174SAlexander Duyck 	used -= tn_info(tn)->empty_children;
83795f60ea3SAlexander Duyck 
83895f60ea3SAlexander Duyck 	/* account for bits == KEYLENGTH case */
8396e22d174SAlexander Duyck 	if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
84095f60ea3SAlexander Duyck 		used -= KEY_MAX;
84195f60ea3SAlexander Duyck 
84295f60ea3SAlexander Duyck 	/* One child or none, time to drop us from the trie */
84395f60ea3SAlexander Duyck 	return used < 2;
844f05a4819SAlexander Duyck }
845f05a4819SAlexander Duyck 
846f05a4819SAlexander Duyck #define MAX_WORK 10
resize(struct trie * t,struct key_vector * tn)84788bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn)
848f05a4819SAlexander Duyck {
8498d8e810cSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
8508d8e810cSAlexander Duyck 	struct trie_use_stats __percpu *stats = t->stats;
8518d8e810cSAlexander Duyck #endif
85235c6edacSAlexander Duyck 	struct key_vector *tp = node_parent(tn);
85388bae714SAlexander Duyck 	unsigned long cindex = get_index(tn->key, tp);
854a80e89d4SAlexander Duyck 	int max_work = MAX_WORK;
855f05a4819SAlexander Duyck 
856f05a4819SAlexander Duyck 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
857f05a4819SAlexander Duyck 		 tn, inflate_threshold, halve_threshold);
858f05a4819SAlexander Duyck 
859ff181ed8SAlexander Duyck 	/* track the tnode via the pointer from the parent instead of
860ff181ed8SAlexander Duyck 	 * doing it ourselves.  This way we can let RCU fully do its
861ff181ed8SAlexander Duyck 	 * thing without us interfering
862ff181ed8SAlexander Duyck 	 */
86388bae714SAlexander Duyck 	BUG_ON(tn != get_child(tp, cindex));
864ff181ed8SAlexander Duyck 
865f05a4819SAlexander Duyck 	/* Double as long as the resulting node has a number of
866f05a4819SAlexander Duyck 	 * nonempty nodes that are above the threshold.
867f05a4819SAlexander Duyck 	 */
868b6f15f82SAlexander Duyck 	while (should_inflate(tp, tn) && max_work) {
86988bae714SAlexander Duyck 		tp = inflate(t, tn);
87088bae714SAlexander Duyck 		if (!tp) {
871cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
8728d8e810cSAlexander Duyck 			this_cpu_inc(stats->resize_node_skipped);
873cf3637bbSAlexander Duyck #endif
874cf3637bbSAlexander Duyck 			break;
875cf3637bbSAlexander Duyck 		}
876ff181ed8SAlexander Duyck 
877b6f15f82SAlexander Duyck 		max_work--;
87888bae714SAlexander Duyck 		tn = get_child(tp, cindex);
879cf3637bbSAlexander Duyck 	}
880cf3637bbSAlexander Duyck 
881b6f15f82SAlexander Duyck 	/* update parent in case inflate failed */
882b6f15f82SAlexander Duyck 	tp = node_parent(tn);
883b6f15f82SAlexander Duyck 
884cf3637bbSAlexander Duyck 	/* Return if at least one inflate is run */
885cf3637bbSAlexander Duyck 	if (max_work != MAX_WORK)
886b6f15f82SAlexander Duyck 		return tp;
887cf3637bbSAlexander Duyck 
888f05a4819SAlexander Duyck 	/* Halve as long as the number of empty children in this
889cf3637bbSAlexander Duyck 	 * node is above threshold.
890cf3637bbSAlexander Duyck 	 */
891b6f15f82SAlexander Duyck 	while (should_halve(tp, tn) && max_work) {
89288bae714SAlexander Duyck 		tp = halve(t, tn);
89388bae714SAlexander Duyck 		if (!tp) {
894cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
8958d8e810cSAlexander Duyck 			this_cpu_inc(stats->resize_node_skipped);
896cf3637bbSAlexander Duyck #endif
897cf3637bbSAlexander Duyck 			break;
898cf3637bbSAlexander Duyck 		}
899cf3637bbSAlexander Duyck 
900b6f15f82SAlexander Duyck 		max_work--;
90188bae714SAlexander Duyck 		tn = get_child(tp, cindex);
902ff181ed8SAlexander Duyck 	}
903cf3637bbSAlexander Duyck 
904cf3637bbSAlexander Duyck 	/* Only one child remains */
90588bae714SAlexander Duyck 	if (should_collapse(tn))
90688bae714SAlexander Duyck 		return collapse(t, tn);
90788bae714SAlexander Duyck 
908b6f15f82SAlexander Duyck 	/* update parent in case halve failed */
909a52ca62cSAlexander Duyck 	return node_parent(tn);
910cf3637bbSAlexander Duyck }
9118d8e810cSAlexander Duyck 
node_pull_suffix(struct key_vector * tn,unsigned char slen)9121a239173SAlexander Duyck static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
91319baf839SRobert Olsson {
9141a239173SAlexander Duyck 	unsigned char node_slen = tn->slen;
9151a239173SAlexander Duyck 
9161a239173SAlexander Duyck 	while ((node_slen > tn->pos) && (node_slen > slen)) {
9171a239173SAlexander Duyck 		slen = update_suffix(tn);
9181a239173SAlexander Duyck 		if (node_slen == slen)
9195405afd1SAlexander Duyck 			break;
9201a239173SAlexander Duyck 
9211a239173SAlexander Duyck 		tn = node_parent(tn);
9221a239173SAlexander Duyck 		node_slen = tn->slen;
9235405afd1SAlexander Duyck 	}
9245405afd1SAlexander Duyck }
9255405afd1SAlexander Duyck 
node_push_suffix(struct key_vector * tn,unsigned char slen)9261a239173SAlexander Duyck static void node_push_suffix(struct key_vector *tn, unsigned char slen)
9275405afd1SAlexander Duyck {
9281a239173SAlexander Duyck 	while (tn->slen < slen) {
9291a239173SAlexander Duyck 		tn->slen = slen;
9305405afd1SAlexander Duyck 		tn = node_parent(tn);
9315405afd1SAlexander Duyck 	}
9325405afd1SAlexander Duyck }
9335405afd1SAlexander Duyck 
9342373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */
fib_find_node(struct trie * t,struct key_vector ** tp,u32 key)93535c6edacSAlexander Duyck static struct key_vector *fib_find_node(struct trie *t,
93635c6edacSAlexander Duyck 					struct key_vector **tp, u32 key)
93719baf839SRobert Olsson {
93888bae714SAlexander Duyck 	struct key_vector *pn, *n = t->kv;
93988bae714SAlexander Duyck 	unsigned long index = 0;
94019baf839SRobert Olsson 
94188bae714SAlexander Duyck 	do {
94288bae714SAlexander Duyck 		pn = n;
94388bae714SAlexander Duyck 		n = get_child_rcu(n, index);
94488bae714SAlexander Duyck 
94588bae714SAlexander Duyck 		if (!n)
94688bae714SAlexander Duyck 			break;
94788bae714SAlexander Duyck 
94888bae714SAlexander Duyck 		index = get_cindex(key, n);
94919baf839SRobert Olsson 
950939afb06SAlexander Duyck 		/* This bit of code is a bit tricky but it combines multiple
951939afb06SAlexander Duyck 		 * checks into a single check.  The prefix consists of the
952939afb06SAlexander Duyck 		 * prefix plus zeros for the bits in the cindex. The index
953939afb06SAlexander Duyck 		 * is the difference between the key and this value.  From
954939afb06SAlexander Duyck 		 * this we can actually derive several pieces of data.
955d4a975e8SAlexander Duyck 		 *   if (index >= (1ul << bits))
956939afb06SAlexander Duyck 		 *     we have a mismatch in skip bits and failed
957b3832117SAlexander Duyck 		 *   else
958b3832117SAlexander Duyck 		 *     we know the value is cindex
959d4a975e8SAlexander Duyck 		 *
960d4a975e8SAlexander Duyck 		 * This check is safe even if bits == KEYLENGTH due to the
961d4a975e8SAlexander Duyck 		 * fact that we can only allocate a node with 32 bits if a
962d4a975e8SAlexander Duyck 		 * long is greater than 32 bits.
963939afb06SAlexander Duyck 		 */
964d4a975e8SAlexander Duyck 		if (index >= (1ul << n->bits)) {
965d4a975e8SAlexander Duyck 			n = NULL;
966d4a975e8SAlexander Duyck 			break;
967d4a975e8SAlexander Duyck 		}
968939afb06SAlexander Duyck 
96988bae714SAlexander Duyck 		/* keep searching until we find a perfect match leaf or NULL */
97088bae714SAlexander Duyck 	} while (IS_TNODE(n));
971939afb06SAlexander Duyck 
97235c6edacSAlexander Duyck 	*tp = pn;
973d4a975e8SAlexander Duyck 
974939afb06SAlexander Duyck 	return n;
97519baf839SRobert Olsson }
97619baf839SRobert Olsson 
97732ccf110SGuillaume Nault /* Return the first fib alias matching DSCP with
97802525368SAlexander Duyck  * priority less than or equal to PRIO.
979b5fc0430SIdo Schimmel  * If 'find_first' is set, return the first matching
98032ccf110SGuillaume Nault  * fib alias, regardless of DSCP and priority.
98102525368SAlexander Duyck  */
fib_find_alias(struct hlist_head * fah,u8 slen,dscp_t dscp,u32 prio,u32 tb_id,bool find_first)98279e5ad2cSAlexander Duyck static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
98332ccf110SGuillaume Nault 					dscp_t dscp, u32 prio, u32 tb_id,
984b5fc0430SIdo Schimmel 					bool find_first)
98502525368SAlexander Duyck {
98602525368SAlexander Duyck 	struct fib_alias *fa;
98702525368SAlexander Duyck 
98802525368SAlexander Duyck 	if (!fah)
98902525368SAlexander Duyck 		return NULL;
99002525368SAlexander Duyck 
99156315f9eSAlexander Duyck 	hlist_for_each_entry(fa, fah, fa_list) {
99232ccf110SGuillaume Nault 		/* Avoid Sparse warning when using dscp_t in inequalities */
99332ccf110SGuillaume Nault 		u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp);
99432ccf110SGuillaume Nault 		u8 __dscp = inet_dscp_to_dsfield(dscp);
99532ccf110SGuillaume Nault 
99679e5ad2cSAlexander Duyck 		if (fa->fa_slen < slen)
99779e5ad2cSAlexander Duyck 			continue;
99879e5ad2cSAlexander Duyck 		if (fa->fa_slen != slen)
99979e5ad2cSAlexander Duyck 			break;
10000b65bd97SAlexander Duyck 		if (fa->tb_id > tb_id)
10010b65bd97SAlexander Duyck 			continue;
10020b65bd97SAlexander Duyck 		if (fa->tb_id != tb_id)
10030b65bd97SAlexander Duyck 			break;
1004b5fc0430SIdo Schimmel 		if (find_first)
1005b5fc0430SIdo Schimmel 			return fa;
100632ccf110SGuillaume Nault 		if (__fa_dscp > __dscp)
100702525368SAlexander Duyck 			continue;
100832ccf110SGuillaume Nault 		if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp)
100902525368SAlexander Duyck 			return fa;
101002525368SAlexander Duyck 	}
101102525368SAlexander Duyck 
101202525368SAlexander Duyck 	return NULL;
101302525368SAlexander Duyck }
101402525368SAlexander Duyck 
101590b93f1bSIdo Schimmel static struct fib_alias *
fib_find_matching_alias(struct net * net,const struct fib_rt_info * fri)101690b93f1bSIdo Schimmel fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
101790b93f1bSIdo Schimmel {
101890b93f1bSIdo Schimmel 	u8 slen = KEYLENGTH - fri->dst_len;
101990b93f1bSIdo Schimmel 	struct key_vector *l, *tp;
102090b93f1bSIdo Schimmel 	struct fib_table *tb;
102190b93f1bSIdo Schimmel 	struct fib_alias *fa;
102290b93f1bSIdo Schimmel 	struct trie *t;
102390b93f1bSIdo Schimmel 
102490b93f1bSIdo Schimmel 	tb = fib_get_table(net, fri->tb_id);
102590b93f1bSIdo Schimmel 	if (!tb)
102690b93f1bSIdo Schimmel 		return NULL;
102790b93f1bSIdo Schimmel 
102890b93f1bSIdo Schimmel 	t = (struct trie *)tb->tb_data;
102990b93f1bSIdo Schimmel 	l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
103090b93f1bSIdo Schimmel 	if (!l)
103190b93f1bSIdo Schimmel 		return NULL;
103290b93f1bSIdo Schimmel 
103390b93f1bSIdo Schimmel 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
103490b93f1bSIdo Schimmel 		if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
1035888ade8fSGuillaume Nault 		    fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi &&
1036888ade8fSGuillaume Nault 		    fa->fa_type == fri->type)
103790b93f1bSIdo Schimmel 			return fa;
103890b93f1bSIdo Schimmel 	}
103990b93f1bSIdo Schimmel 
104090b93f1bSIdo Schimmel 	return NULL;
104190b93f1bSIdo Schimmel }
104290b93f1bSIdo Schimmel 
fib_alias_hw_flags_set(struct net * net,const struct fib_rt_info * fri)104390b93f1bSIdo Schimmel void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
104490b93f1bSIdo Schimmel {
104596b9bd8cSKuniyuki Iwashima 	u8 fib_notify_on_flag_change;
104690b93f1bSIdo Schimmel 	struct fib_alias *fa_match;
1047680aea08SAmit Cohen 	struct sk_buff *skb;
1048680aea08SAmit Cohen 	int err;
104990b93f1bSIdo Schimmel 
105090b93f1bSIdo Schimmel 	rcu_read_lock();
105190b93f1bSIdo Schimmel 
105290b93f1bSIdo Schimmel 	fa_match = fib_find_matching_alias(net, fri);
105390b93f1bSIdo Schimmel 	if (!fa_match)
105490b93f1bSIdo Schimmel 		goto out;
105590b93f1bSIdo Schimmel 
10569fcf986cSEric Dumazet 	/* These are paired with the WRITE_ONCE() happening in this function.
10579fcf986cSEric Dumazet 	 * The reason is that we are only protected by RCU at this point.
10589fcf986cSEric Dumazet 	 */
10599fcf986cSEric Dumazet 	if (READ_ONCE(fa_match->offload) == fri->offload &&
10609fcf986cSEric Dumazet 	    READ_ONCE(fa_match->trap) == fri->trap &&
10619fcf986cSEric Dumazet 	    READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
1062680aea08SAmit Cohen 		goto out;
1063680aea08SAmit Cohen 
10649fcf986cSEric Dumazet 	WRITE_ONCE(fa_match->offload, fri->offload);
10659fcf986cSEric Dumazet 	WRITE_ONCE(fa_match->trap, fri->trap);
1066648106c3SAmit Cohen 
106796b9bd8cSKuniyuki Iwashima 	fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
106896b9bd8cSKuniyuki Iwashima 
1069648106c3SAmit Cohen 	/* 2 means send notifications only if offload_failed was changed. */
107096b9bd8cSKuniyuki Iwashima 	if (fib_notify_on_flag_change == 2 &&
10719fcf986cSEric Dumazet 	    READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
1072648106c3SAmit Cohen 		goto out;
1073648106c3SAmit Cohen 
10749fcf986cSEric Dumazet 	WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
107590b93f1bSIdo Schimmel 
107696b9bd8cSKuniyuki Iwashima 	if (!fib_notify_on_flag_change)
1077680aea08SAmit Cohen 		goto out;
1078680aea08SAmit Cohen 
1079680aea08SAmit Cohen 	skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
1080680aea08SAmit Cohen 	if (!skb) {
1081680aea08SAmit Cohen 		err = -ENOBUFS;
1082680aea08SAmit Cohen 		goto errout;
1083680aea08SAmit Cohen 	}
1084680aea08SAmit Cohen 
1085680aea08SAmit Cohen 	err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0);
1086680aea08SAmit Cohen 	if (err < 0) {
1087680aea08SAmit Cohen 		/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
1088680aea08SAmit Cohen 		WARN_ON(err == -EMSGSIZE);
1089680aea08SAmit Cohen 		kfree_skb(skb);
1090680aea08SAmit Cohen 		goto errout;
1091680aea08SAmit Cohen 	}
1092680aea08SAmit Cohen 
1093680aea08SAmit Cohen 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC);
1094680aea08SAmit Cohen 	goto out;
1095680aea08SAmit Cohen 
1096680aea08SAmit Cohen errout:
1097680aea08SAmit Cohen 	rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err);
109890b93f1bSIdo Schimmel out:
109990b93f1bSIdo Schimmel 	rcu_read_unlock();
110090b93f1bSIdo Schimmel }
110190b93f1bSIdo Schimmel EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
110290b93f1bSIdo Schimmel 
trie_rebalance(struct trie * t,struct key_vector * tn)110335c6edacSAlexander Duyck static void trie_rebalance(struct trie *t, struct key_vector *tn)
110419baf839SRobert Olsson {
110588bae714SAlexander Duyck 	while (!IS_TRIE(tn))
110688bae714SAlexander Duyck 		tn = resize(t, tn);
110719baf839SRobert Olsson }
110819baf839SRobert Olsson 
fib_insert_node(struct trie * t,struct key_vector * tp,struct fib_alias * new,t_key key)110935c6edacSAlexander Duyck static int fib_insert_node(struct trie *t, struct key_vector *tp,
1110d5d6487cSAlexander Duyck 			   struct fib_alias *new, t_key key)
111119baf839SRobert Olsson {
111235c6edacSAlexander Duyck 	struct key_vector *n, *l;
1113836a0123SAlexander Duyck 
1114d5d6487cSAlexander Duyck 	l = leaf_new(key, new);
111579e5ad2cSAlexander Duyck 	if (!l)
11168d8e810cSAlexander Duyck 		goto noleaf;
1117d5d6487cSAlexander Duyck 
1118d5d6487cSAlexander Duyck 	/* retrieve child from parent node */
1119754baf8dSAlexander Duyck 	n = get_child(tp, get_index(key, tp));
112019baf839SRobert Olsson 
1121836a0123SAlexander Duyck 	/* Case 2: n is a LEAF or a TNODE and the key doesn't match.
1122836a0123SAlexander Duyck 	 *
112319baf839SRobert Olsson 	 *  Add a new tnode here
112419baf839SRobert Olsson 	 *  first tnode need some special handling
1125836a0123SAlexander Duyck 	 *  leaves us in position for handling as case 3
112619baf839SRobert Olsson 	 */
112719baf839SRobert Olsson 	if (n) {
112835c6edacSAlexander Duyck 		struct key_vector *tn;
1129f835e471SRobert Olsson 
1130e9b44019SAlexander Duyck 		tn = tnode_new(key, __fls(key ^ n->key), 1);
11318d8e810cSAlexander Duyck 		if (!tn)
11328d8e810cSAlexander Duyck 			goto notnode;
113319baf839SRobert Olsson 
1134836a0123SAlexander Duyck 		/* initialize routes out of node */
1135836a0123SAlexander Duyck 		NODE_INIT_PARENT(tn, tp);
1136836a0123SAlexander Duyck 		put_child(tn, get_index(key, tn) ^ 1, n);
113719baf839SRobert Olsson 
1138836a0123SAlexander Duyck 		/* start adding routes into the node */
113988bae714SAlexander Duyck 		put_child_root(tp, key, tn);
1140836a0123SAlexander Duyck 		node_set_parent(n, tn);
114119baf839SRobert Olsson 
1142836a0123SAlexander Duyck 		/* parent now has a NULL spot where the leaf can go */
1143e962f302SAlexander Duyck 		tp = tn;
114419baf839SRobert Olsson 	}
114591b9a277SOlof Johansson 
1146836a0123SAlexander Duyck 	/* Case 3: n is NULL, and will just insert a new leaf */
1147a52ca62cSAlexander Duyck 	node_push_suffix(tp, new->fa_slen);
1148836a0123SAlexander Duyck 	NODE_INIT_PARENT(l, tp);
114988bae714SAlexander Duyck 	put_child_root(tp, key, l);
11507b85576dSJarek Poplawski 	trie_rebalance(t, tp);
1151d5d6487cSAlexander Duyck 
1152d5d6487cSAlexander Duyck 	return 0;
11538d8e810cSAlexander Duyck notnode:
11548d8e810cSAlexander Duyck 	node_free(l);
11558d8e810cSAlexander Duyck noleaf:
11568d8e810cSAlexander Duyck 	return -ENOMEM;
1157d5d6487cSAlexander Duyck }
1158d5d6487cSAlexander Duyck 
fib_insert_alias(struct trie * t,struct key_vector * tp,struct key_vector * l,struct fib_alias * new,struct fib_alias * fa,t_key key)115935c6edacSAlexander Duyck static int fib_insert_alias(struct trie *t, struct key_vector *tp,
116035c6edacSAlexander Duyck 			    struct key_vector *l, struct fib_alias *new,
1161d5d6487cSAlexander Duyck 			    struct fib_alias *fa, t_key key)
1162d5d6487cSAlexander Duyck {
1163d5d6487cSAlexander Duyck 	if (!l)
1164d5d6487cSAlexander Duyck 		return fib_insert_node(t, tp, new, key);
1165d5d6487cSAlexander Duyck 
1166d5d6487cSAlexander Duyck 	if (fa) {
1167d5d6487cSAlexander Duyck 		hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1168836a0123SAlexander Duyck 	} else {
1169d5d6487cSAlexander Duyck 		struct fib_alias *last;
1170d5d6487cSAlexander Duyck 
1171d5d6487cSAlexander Duyck 		hlist_for_each_entry(last, &l->leaf, fa_list) {
1172d5d6487cSAlexander Duyck 			if (new->fa_slen < last->fa_slen)
1173d5d6487cSAlexander Duyck 				break;
11740b65bd97SAlexander Duyck 			if ((new->fa_slen == last->fa_slen) &&
11750b65bd97SAlexander Duyck 			    (new->tb_id > last->tb_id))
11760b65bd97SAlexander Duyck 				break;
1177d5d6487cSAlexander Duyck 			fa = last;
1178836a0123SAlexander Duyck 		}
1179836a0123SAlexander Duyck 
1180d5d6487cSAlexander Duyck 		if (fa)
1181d5d6487cSAlexander Duyck 			hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1182d5d6487cSAlexander Duyck 		else
1183d5d6487cSAlexander Duyck 			hlist_add_head_rcu(&new->fa_list, &l->leaf);
118419baf839SRobert Olsson 	}
118519baf839SRobert Olsson 
1186d5d6487cSAlexander Duyck 	/* if we added to the tail node then we need to update slen */
1187d5d6487cSAlexander Duyck 	if (l->slen < new->fa_slen) {
1188d5d6487cSAlexander Duyck 		l->slen = new->fa_slen;
11891a239173SAlexander Duyck 		node_push_suffix(tp, new->fa_slen);
1190d5d6487cSAlexander Duyck 	}
1191d5d6487cSAlexander Duyck 
1192d5d6487cSAlexander Duyck 	return 0;
1193d5d6487cSAlexander Duyck }
1194d5d6487cSAlexander Duyck 
fib_valid_key_len(u32 key,u8 plen,struct netlink_ext_ack * extack)119578055998SDavid Ahern static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
1196ba277e8eSDavid Ahern {
119778055998SDavid Ahern 	if (plen > KEYLENGTH) {
119878055998SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
1199ba277e8eSDavid Ahern 		return false;
120078055998SDavid Ahern 	}
1201ba277e8eSDavid Ahern 
120278055998SDavid Ahern 	if ((plen < KEYLENGTH) && (key << plen)) {
120378055998SDavid Ahern 		NL_SET_ERR_MSG(extack,
120478055998SDavid Ahern 			       "Invalid prefix for given prefix length");
1205ba277e8eSDavid Ahern 		return false;
120678055998SDavid Ahern 	}
1207ba277e8eSDavid Ahern 
1208ba277e8eSDavid Ahern 	return true;
1209ba277e8eSDavid Ahern }
1210ba277e8eSDavid Ahern 
1211a6c76c17SIdo Schimmel static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1212a6c76c17SIdo Schimmel 			     struct key_vector *l, struct fib_alias *old);
1213a6c76c17SIdo Schimmel 
1214d5d6487cSAlexander Duyck /* Caller must hold RTNL. */
fib_table_insert(struct net * net,struct fib_table * tb,struct fib_config * cfg,struct netlink_ext_ack * extack)1215b90eb754SJiri Pirko int fib_table_insert(struct net *net, struct fib_table *tb,
12166d8422a1SDavid Ahern 		     struct fib_config *cfg, struct netlink_ext_ack *extack)
121719baf839SRobert Olsson {
121819baf839SRobert Olsson 	struct trie *t = (struct trie *)tb->tb_data;
121919baf839SRobert Olsson 	struct fib_alias *fa, *new_fa;
122035c6edacSAlexander Duyck 	struct key_vector *l, *tp;
1221b93e1fa7SGuillaume Nault 	u16 nlflags = NLM_F_EXCL;
122219baf839SRobert Olsson 	struct fib_info *fi;
122379e5ad2cSAlexander Duyck 	u8 plen = cfg->fc_dst_len;
122479e5ad2cSAlexander Duyck 	u8 slen = KEYLENGTH - plen;
122532ccf110SGuillaume Nault 	dscp_t dscp;
1226d4a975e8SAlexander Duyck 	u32 key;
122719baf839SRobert Olsson 	int err;
122819baf839SRobert Olsson 
12294e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
123019baf839SRobert Olsson 
123178055998SDavid Ahern 	if (!fib_valid_key_len(key, plen, extack))
123219baf839SRobert Olsson 		return -EINVAL;
123319baf839SRobert Olsson 
1234ba277e8eSDavid Ahern 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1235ba277e8eSDavid Ahern 
12366d8422a1SDavid Ahern 	fi = fib_create_info(cfg, extack);
12374e902c57SThomas Graf 	if (IS_ERR(fi)) {
12384e902c57SThomas Graf 		err = PTR_ERR(fi);
123919baf839SRobert Olsson 		goto err;
12404e902c57SThomas Graf 	}
124119baf839SRobert Olsson 
124232ccf110SGuillaume Nault 	dscp = cfg->fc_dscp;
1243d4a975e8SAlexander Duyck 	l = fib_find_node(t, &tp, key);
124432ccf110SGuillaume Nault 	fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
1245b5fc0430SIdo Schimmel 				tb->tb_id, false) : NULL;
124619baf839SRobert Olsson 
124719baf839SRobert Olsson 	/* Now fa, if non-NULL, points to the first fib alias
124832ccf110SGuillaume Nault 	 * with the same keys [prefix,dscp,priority], if such key already
124919baf839SRobert Olsson 	 * exists or to the node before which we will insert new one.
125019baf839SRobert Olsson 	 *
125119baf839SRobert Olsson 	 * If fa is NULL, we will need to allocate a new one and
125256315f9eSAlexander Duyck 	 * insert to the tail of the section matching the suffix length
125356315f9eSAlexander Duyck 	 * of the new alias.
125419baf839SRobert Olsson 	 */
125519baf839SRobert Olsson 
125632ccf110SGuillaume Nault 	if (fa && fa->fa_dscp == dscp &&
1257936f6f8eSJulian Anastasov 	    fa->fa_info->fib_priority == fi->fib_priority) {
1258936f6f8eSJulian Anastasov 		struct fib_alias *fa_first, *fa_match;
125919baf839SRobert Olsson 
126019baf839SRobert Olsson 		err = -EEXIST;
12614e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_EXCL)
126219baf839SRobert Olsson 			goto out;
126319baf839SRobert Olsson 
1264b93e1fa7SGuillaume Nault 		nlflags &= ~NLM_F_EXCL;
1265b93e1fa7SGuillaume Nault 
1266936f6f8eSJulian Anastasov 		/* We have 2 goals:
1267936f6f8eSJulian Anastasov 		 * 1. Find exact match for type, scope, fib_info to avoid
1268936f6f8eSJulian Anastasov 		 * duplicate routes
1269936f6f8eSJulian Anastasov 		 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1270936f6f8eSJulian Anastasov 		 */
1271936f6f8eSJulian Anastasov 		fa_match = NULL;
1272936f6f8eSJulian Anastasov 		fa_first = fa;
127356315f9eSAlexander Duyck 		hlist_for_each_entry_from(fa, fa_list) {
12740b65bd97SAlexander Duyck 			if ((fa->fa_slen != slen) ||
12750b65bd97SAlexander Duyck 			    (fa->tb_id != tb->tb_id) ||
127632ccf110SGuillaume Nault 			    (fa->fa_dscp != dscp))
1277936f6f8eSJulian Anastasov 				break;
1278936f6f8eSJulian Anastasov 			if (fa->fa_info->fib_priority != fi->fib_priority)
1279936f6f8eSJulian Anastasov 				break;
1280936f6f8eSJulian Anastasov 			if (fa->fa_type == cfg->fc_type &&
1281936f6f8eSJulian Anastasov 			    fa->fa_info == fi) {
1282936f6f8eSJulian Anastasov 				fa_match = fa;
1283936f6f8eSJulian Anastasov 				break;
1284936f6f8eSJulian Anastasov 			}
1285936f6f8eSJulian Anastasov 		}
1286936f6f8eSJulian Anastasov 
12874e902c57SThomas Graf 		if (cfg->fc_nlflags & NLM_F_REPLACE) {
128819baf839SRobert Olsson 			struct fib_info *fi_drop;
128919baf839SRobert Olsson 			u8 state;
129019baf839SRobert Olsson 
1291b93e1fa7SGuillaume Nault 			nlflags |= NLM_F_REPLACE;
1292936f6f8eSJulian Anastasov 			fa = fa_first;
1293936f6f8eSJulian Anastasov 			if (fa_match) {
1294936f6f8eSJulian Anastasov 				if (fa == fa_match)
1295936f6f8eSJulian Anastasov 					err = 0;
12966725033fSJoonwoo Park 				goto out;
1297936f6f8eSJulian Anastasov 			}
12982373ce1cSRobert Olsson 			err = -ENOBUFS;
1299e94b1766SChristoph Lameter 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
130051456b29SIan Morris 			if (!new_fa)
13012373ce1cSRobert Olsson 				goto out;
130219baf839SRobert Olsson 
130319baf839SRobert Olsson 			fi_drop = fa->fa_info;
130432ccf110SGuillaume Nault 			new_fa->fa_dscp = fa->fa_dscp;
13052373ce1cSRobert Olsson 			new_fa->fa_info = fi;
13064e902c57SThomas Graf 			new_fa->fa_type = cfg->fc_type;
130719baf839SRobert Olsson 			state = fa->fa_state;
1308936f6f8eSJulian Anastasov 			new_fa->fa_state = state & ~FA_S_ACCESSED;
13099b6ebad5SAlexander Duyck 			new_fa->fa_slen = fa->fa_slen;
1310d4e64c29SMichal Kubeček 			new_fa->tb_id = tb->tb_id;
13112392debcSJulian Anastasov 			new_fa->fa_default = -1;
131290b93f1bSIdo Schimmel 			new_fa->offload = 0;
131390b93f1bSIdo Schimmel 			new_fa->trap = 0;
131436c5100eSAmit Cohen 			new_fa->offload_failed = 0;
131519baf839SRobert Olsson 
13166324d0faSIdo Schimmel 			hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
13176324d0faSIdo Schimmel 
1318ee3936d6SIdo Schimmel 			if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
13196324d0faSIdo Schimmel 					   tb->tb_id, true) == new_fa) {
1320ee3936d6SIdo Schimmel 				enum fib_event_type fib_event;
1321ee3936d6SIdo Schimmel 
1322446f7391SIdo Schimmel 				fib_event = FIB_EVENT_ENTRY_REPLACE;
1323ee3936d6SIdo Schimmel 				err = call_fib_entry_notifiers(net, fib_event,
1324ee3936d6SIdo Schimmel 							       key, plen,
1325ee3936d6SIdo Schimmel 							       new_fa, extack);
13266324d0faSIdo Schimmel 				if (err) {
13276324d0faSIdo Schimmel 					hlist_replace_rcu(&new_fa->fa_list,
13286324d0faSIdo Schimmel 							  &fa->fa_list);
1329ee3936d6SIdo Schimmel 					goto out_free_new_fa;
1330ee3936d6SIdo Schimmel 				}
13316324d0faSIdo Schimmel 			}
1332c1d7ee67SDavid Ahern 
13335b7d616dSIdo Schimmel 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
13345b7d616dSIdo Schimmel 				  tb->tb_id, &cfg->fc_nlinfo, nlflags);
13355b7d616dSIdo Schimmel 
13362373ce1cSRobert Olsson 			alias_free_mem_rcu(fa);
133719baf839SRobert Olsson 
133819baf839SRobert Olsson 			fib_release_info(fi_drop);
133919baf839SRobert Olsson 			if (state & FA_S_ACCESSED)
13404ccfe6d4SNicolas Dichtel 				rt_cache_flush(cfg->fc_nlinfo.nl_net);
1341b90eb754SJiri Pirko 
134219baf839SRobert Olsson 			goto succeeded;
134319baf839SRobert Olsson 		}
134419baf839SRobert Olsson 		/* Error if we find a perfect match which
134519baf839SRobert Olsson 		 * uses the same scope, type, and nexthop
134619baf839SRobert Olsson 		 * information.
134719baf839SRobert Olsson 		 */
1348936f6f8eSJulian Anastasov 		if (fa_match)
134919baf839SRobert Olsson 			goto out;
1350a07f5f50SStephen Hemminger 
1351446f7391SIdo Schimmel 		if (cfg->fc_nlflags & NLM_F_APPEND)
1352b93e1fa7SGuillaume Nault 			nlflags |= NLM_F_APPEND;
1353446f7391SIdo Schimmel 		else
1354936f6f8eSJulian Anastasov 			fa = fa_first;
135519baf839SRobert Olsson 	}
135619baf839SRobert Olsson 	err = -ENOENT;
13574e902c57SThomas Graf 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
135819baf839SRobert Olsson 		goto out;
135919baf839SRobert Olsson 
1360b93e1fa7SGuillaume Nault 	nlflags |= NLM_F_CREATE;
136119baf839SRobert Olsson 	err = -ENOBUFS;
1362e94b1766SChristoph Lameter 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
136351456b29SIan Morris 	if (!new_fa)
136419baf839SRobert Olsson 		goto out;
136519baf839SRobert Olsson 
136619baf839SRobert Olsson 	new_fa->fa_info = fi;
136732ccf110SGuillaume Nault 	new_fa->fa_dscp = dscp;
13684e902c57SThomas Graf 	new_fa->fa_type = cfg->fc_type;
136919baf839SRobert Olsson 	new_fa->fa_state = 0;
137079e5ad2cSAlexander Duyck 	new_fa->fa_slen = slen;
13710ddcf43dSAlexander Duyck 	new_fa->tb_id = tb->tb_id;
13722392debcSJulian Anastasov 	new_fa->fa_default = -1;
137390b93f1bSIdo Schimmel 	new_fa->offload = 0;
137490b93f1bSIdo Schimmel 	new_fa->trap = 0;
137536c5100eSAmit Cohen 	new_fa->offload_failed = 0;
137619baf839SRobert Olsson 
13779b6ebad5SAlexander Duyck 	/* Insert new entry to the list. */
1378d5d6487cSAlexander Duyck 	err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1379d5d6487cSAlexander Duyck 	if (err)
1380a6c76c17SIdo Schimmel 		goto out_free_new_fa;
1381a6c76c17SIdo Schimmel 
1382a6c76c17SIdo Schimmel 	/* The alias was already inserted, so the node must exist. */
1383a6c76c17SIdo Schimmel 	l = l ? l : fib_find_node(t, &tp, key);
1384568fe849SZiyang Xuan 	if (WARN_ON_ONCE(!l)) {
1385568fe849SZiyang Xuan 		err = -ENOENT;
1386a6c76c17SIdo Schimmel 		goto out_free_new_fa;
1387568fe849SZiyang Xuan 	}
1388a6c76c17SIdo Schimmel 
1389a8674f75SIdo Schimmel 	if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
1390a8674f75SIdo Schimmel 	    new_fa) {
1391a8674f75SIdo Schimmel 		enum fib_event_type fib_event;
1392a8674f75SIdo Schimmel 
1393446f7391SIdo Schimmel 		fib_event = FIB_EVENT_ENTRY_REPLACE;
1394a8674f75SIdo Schimmel 		err = call_fib_entry_notifiers(net, fib_event, key, plen,
1395a8674f75SIdo Schimmel 					       new_fa, extack);
1396a8674f75SIdo Schimmel 		if (err)
1397a8674f75SIdo Schimmel 			goto out_remove_new_fa;
1398a8674f75SIdo Schimmel 	}
139919baf839SRobert Olsson 
140021d8c49eSDavid S. Miller 	if (!plen)
140121d8c49eSDavid S. Miller 		tb->tb_num_default++;
140221d8c49eSDavid S. Miller 
14034ccfe6d4SNicolas Dichtel 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
14040ddcf43dSAlexander Duyck 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1405a2bb6d7dSRoopa Prabhu 		  &cfg->fc_nlinfo, nlflags);
140619baf839SRobert Olsson succeeded:
140719baf839SRobert Olsson 	return 0;
1408f835e471SRobert Olsson 
1409a6c76c17SIdo Schimmel out_remove_new_fa:
1410a6c76c17SIdo Schimmel 	fib_remove_alias(t, tp, l, new_fa);
1411f835e471SRobert Olsson out_free_new_fa:
1412f835e471SRobert Olsson 	kmem_cache_free(fn_alias_kmem, new_fa);
141319baf839SRobert Olsson out:
141419baf839SRobert Olsson 	fib_release_info(fi);
141591b9a277SOlof Johansson err:
141619baf839SRobert Olsson 	return err;
141719baf839SRobert Olsson }
141819baf839SRobert Olsson 
prefix_mismatch(t_key key,struct key_vector * n)141935c6edacSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
14209f9e636dSAlexander Duyck {
14219f9e636dSAlexander Duyck 	t_key prefix = n->key;
14229f9e636dSAlexander Duyck 
14239f9e636dSAlexander Duyck 	return (key ^ prefix) & (prefix | -prefix);
14249f9e636dSAlexander Duyck }
14259f9e636dSAlexander Duyck 
fib_lookup_good_nhc(const struct fib_nh_common * nhc,int fib_flags,const struct flowi4 * flp)1426af7888adSDavid Ahern bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
1427af7888adSDavid Ahern 			 const struct flowi4 *flp)
1428af7888adSDavid Ahern {
1429af7888adSDavid Ahern 	if (nhc->nhc_flags & RTNH_F_DEAD)
1430af7888adSDavid Ahern 		return false;
1431af7888adSDavid Ahern 
1432af7888adSDavid Ahern 	if (ip_ignore_linkdown(nhc->nhc_dev) &&
1433af7888adSDavid Ahern 	    nhc->nhc_flags & RTNH_F_LINKDOWN &&
1434af7888adSDavid Ahern 	    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1435af7888adSDavid Ahern 		return false;
1436af7888adSDavid Ahern 
143740867d74SDavid Ahern 	if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
1438af7888adSDavid Ahern 		return false;
1439af7888adSDavid Ahern 
1440af7888adSDavid Ahern 	return true;
1441af7888adSDavid Ahern }
1442af7888adSDavid Ahern 
1443345e9b54SAlexander Duyck /* should be called with rcu_read_lock */
fib_table_lookup(struct fib_table * tb,const struct flowi4 * flp,struct fib_result * res,int fib_flags)144422bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1445ebc0ffaeSEric Dumazet 		     struct fib_result *res, int fib_flags)
144619baf839SRobert Olsson {
144719baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
14488274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
14498274a97aSAlexander Duyck 	struct trie_use_stats __percpu *stats = t->stats;
14508274a97aSAlexander Duyck #endif
14519f9e636dSAlexander Duyck 	const t_key key = ntohl(flp->daddr);
145235c6edacSAlexander Duyck 	struct key_vector *n, *pn;
145379e5ad2cSAlexander Duyck 	struct fib_alias *fa;
145471e8b67dSAlexander Duyck 	unsigned long index;
14559f9e636dSAlexander Duyck 	t_key cindex;
145619baf839SRobert Olsson 
145788bae714SAlexander Duyck 	pn = t->kv;
145888bae714SAlexander Duyck 	cindex = 0;
145988bae714SAlexander Duyck 
146088bae714SAlexander Duyck 	n = get_child_rcu(pn, cindex);
14619f323973SDavid Ahern 	if (!n) {
14629f323973SDavid Ahern 		trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
1463345e9b54SAlexander Duyck 		return -EAGAIN;
14649f323973SDavid Ahern 	}
146519baf839SRobert Olsson 
146619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
14678274a97aSAlexander Duyck 	this_cpu_inc(stats->gets);
146819baf839SRobert Olsson #endif
146919baf839SRobert Olsson 
14709f9e636dSAlexander Duyck 	/* Step 1: Travel to the longest prefix match in the trie */
14719f9e636dSAlexander Duyck 	for (;;) {
147288bae714SAlexander Duyck 		index = get_cindex(key, n);
14739f9e636dSAlexander Duyck 
14749f9e636dSAlexander Duyck 		/* This bit of code is a bit tricky but it combines multiple
14759f9e636dSAlexander Duyck 		 * checks into a single check.  The prefix consists of the
14769f9e636dSAlexander Duyck 		 * prefix plus zeros for the "bits" in the prefix. The index
14779f9e636dSAlexander Duyck 		 * is the difference between the key and this value.  From
14789f9e636dSAlexander Duyck 		 * this we can actually derive several pieces of data.
147971e8b67dSAlexander Duyck 		 *   if (index >= (1ul << bits))
14809f9e636dSAlexander Duyck 		 *     we have a mismatch in skip bits and failed
1481b3832117SAlexander Duyck 		 *   else
1482b3832117SAlexander Duyck 		 *     we know the value is cindex
148371e8b67dSAlexander Duyck 		 *
148471e8b67dSAlexander Duyck 		 * This check is safe even if bits == KEYLENGTH due to the
148571e8b67dSAlexander Duyck 		 * fact that we can only allocate a node with 32 bits if a
148671e8b67dSAlexander Duyck 		 * long is greater than 32 bits.
14879f9e636dSAlexander Duyck 		 */
148871e8b67dSAlexander Duyck 		if (index >= (1ul << n->bits))
14899f9e636dSAlexander Duyck 			break;
14909f9e636dSAlexander Duyck 
14919f9e636dSAlexander Duyck 		/* we have found a leaf. Prefixes have already been compared */
14929f9e636dSAlexander Duyck 		if (IS_LEAF(n))
1493a07f5f50SStephen Hemminger 			goto found;
14949f9e636dSAlexander Duyck 
14959f9e636dSAlexander Duyck 		/* only record pn and cindex if we are going to be chopping
14969f9e636dSAlexander Duyck 		 * bits later.  Otherwise we are just wasting cycles.
14979f9e636dSAlexander Duyck 		 */
14985405afd1SAlexander Duyck 		if (n->slen > n->pos) {
14999f9e636dSAlexander Duyck 			pn = n;
15009f9e636dSAlexander Duyck 			cindex = index;
150119baf839SRobert Olsson 		}
1502a07f5f50SStephen Hemminger 
1503754baf8dSAlexander Duyck 		n = get_child_rcu(n, index);
15049f9e636dSAlexander Duyck 		if (unlikely(!n))
15059f9e636dSAlexander Duyck 			goto backtrace;
15069f9e636dSAlexander Duyck 	}
150719baf839SRobert Olsson 
15089f9e636dSAlexander Duyck 	/* Step 2: Sort out leaves and begin backtracing for longest prefix */
15099f9e636dSAlexander Duyck 	for (;;) {
15109f9e636dSAlexander Duyck 		/* record the pointer where our next node pointer is stored */
151135c6edacSAlexander Duyck 		struct key_vector __rcu **cptr = n->tnode;
151219baf839SRobert Olsson 
15139f9e636dSAlexander Duyck 		/* This test verifies that none of the bits that differ
15149f9e636dSAlexander Duyck 		 * between the key and the prefix exist in the region of
15159f9e636dSAlexander Duyck 		 * the lsb and higher in the prefix.
15169f9e636dSAlexander Duyck 		 */
15175405afd1SAlexander Duyck 		if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
15189f9e636dSAlexander Duyck 			goto backtrace;
151919baf839SRobert Olsson 
15209f9e636dSAlexander Duyck 		/* exit out and process leaf */
15219f9e636dSAlexander Duyck 		if (unlikely(IS_LEAF(n)))
15229f9e636dSAlexander Duyck 			break;
152319baf839SRobert Olsson 
15249f9e636dSAlexander Duyck 		/* Don't bother recording parent info.  Since we are in
15259f9e636dSAlexander Duyck 		 * prefix match mode we will have to come back to wherever
15269f9e636dSAlexander Duyck 		 * we started this traversal anyway
15279f9e636dSAlexander Duyck 		 */
15289f9e636dSAlexander Duyck 
15299f9e636dSAlexander Duyck 		while ((n = rcu_dereference(*cptr)) == NULL) {
15309f9e636dSAlexander Duyck backtrace:
153119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15329f9e636dSAlexander Duyck 			if (!n)
15338274a97aSAlexander Duyck 				this_cpu_inc(stats->null_node_hit);
153419baf839SRobert Olsson #endif
15359f9e636dSAlexander Duyck 			/* If we are at cindex 0 there are no more bits for
15369f9e636dSAlexander Duyck 			 * us to strip at this level so we must ascend back
15379f9e636dSAlexander Duyck 			 * up one level to see if there are any more bits to
15389f9e636dSAlexander Duyck 			 * be stripped there.
153919baf839SRobert Olsson 			 */
15409f9e636dSAlexander Duyck 			while (!cindex) {
15419f9e636dSAlexander Duyck 				t_key pkey = pn->key;
154219baf839SRobert Olsson 
154388bae714SAlexander Duyck 				/* If we don't have a parent then there is
154488bae714SAlexander Duyck 				 * nothing for us to do as we do not have any
154588bae714SAlexander Duyck 				 * further nodes to parse.
154688bae714SAlexander Duyck 				 */
15479f323973SDavid Ahern 				if (IS_TRIE(pn)) {
15489f323973SDavid Ahern 					trace_fib_table_lookup(tb->tb_id, flp,
15499f323973SDavid Ahern 							       NULL, -EAGAIN);
1550345e9b54SAlexander Duyck 					return -EAGAIN;
15519f323973SDavid Ahern 				}
155219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
15538274a97aSAlexander Duyck 				this_cpu_inc(stats->backtrack);
155419baf839SRobert Olsson #endif
15559f9e636dSAlexander Duyck 				/* Get Child's index */
155688bae714SAlexander Duyck 				pn = node_parent_rcu(pn);
15579f9e636dSAlexander Duyck 				cindex = get_index(pkey, pn);
15589f9e636dSAlexander Duyck 			}
15599f9e636dSAlexander Duyck 
15609f9e636dSAlexander Duyck 			/* strip the least significant bit from the cindex */
15619f9e636dSAlexander Duyck 			cindex &= cindex - 1;
15629f9e636dSAlexander Duyck 
15639f9e636dSAlexander Duyck 			/* grab pointer for next child node */
156441b489fdSAlexander Duyck 			cptr = &pn->tnode[cindex];
156519baf839SRobert Olsson 		}
156619baf839SRobert Olsson 	}
15679f9e636dSAlexander Duyck 
156819baf839SRobert Olsson found:
156971e8b67dSAlexander Duyck 	/* this line carries forward the xor from earlier in the function */
157071e8b67dSAlexander Duyck 	index = key ^ n->key;
157171e8b67dSAlexander Duyck 
15729f9e636dSAlexander Duyck 	/* Step 3: Process the leaf, if that fails fall back to backtracing */
157379e5ad2cSAlexander Duyck 	hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1574345e9b54SAlexander Duyck 		struct fib_info *fi = fa->fa_info;
1575af7888adSDavid Ahern 		struct fib_nh_common *nhc;
1576345e9b54SAlexander Duyck 		int nhsel, err;
1577345e9b54SAlexander Duyck 
1578a5829f53SAlexander Duyck 		if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
1579a5829f53SAlexander Duyck 			if (index >= (1ul << fa->fa_slen))
15809b6ebad5SAlexander Duyck 				continue;
1581a5829f53SAlexander Duyck 		}
158232ccf110SGuillaume Nault 		if (fa->fa_dscp &&
158332ccf110SGuillaume Nault 		    inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
1584345e9b54SAlexander Duyck 			continue;
1585fce92af1SEric Dumazet 		/* Paired with WRITE_ONCE() in fib_release_info() */
1586fce92af1SEric Dumazet 		if (READ_ONCE(fi->fib_dead))
1587345e9b54SAlexander Duyck 			continue;
1588345e9b54SAlexander Duyck 		if (fa->fa_info->fib_scope < flp->flowi4_scope)
1589345e9b54SAlexander Duyck 			continue;
1590345e9b54SAlexander Duyck 		fib_alias_accessed(fa);
1591345e9b54SAlexander Duyck 		err = fib_props[fa->fa_type].error;
1592345e9b54SAlexander Duyck 		if (unlikely(err < 0)) {
15934c7e8084SDavid Ahern out_reject:
1594345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
1595345e9b54SAlexander Duyck 			this_cpu_inc(stats->semantic_match_passed);
1596345e9b54SAlexander Duyck #endif
15979f323973SDavid Ahern 			trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
1598345e9b54SAlexander Duyck 			return err;
1599345e9b54SAlexander Duyck 		}
1600345e9b54SAlexander Duyck 		if (fi->fib_flags & RTNH_F_DEAD)
1601345e9b54SAlexander Duyck 			continue;
16024c7e8084SDavid Ahern 
1603af7888adSDavid Ahern 		if (unlikely(fi->nh)) {
1604af7888adSDavid Ahern 			if (nexthop_is_blackhole(fi->nh)) {
16054c7e8084SDavid Ahern 				err = fib_props[RTN_BLACKHOLE].error;
16064c7e8084SDavid Ahern 				goto out_reject;
16074c7e8084SDavid Ahern 			}
16084c7e8084SDavid Ahern 
1609af7888adSDavid Ahern 			nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
1610af7888adSDavid Ahern 						     &nhsel);
1611af7888adSDavid Ahern 			if (nhc)
1612af7888adSDavid Ahern 				goto set_result;
1613af7888adSDavid Ahern 			goto miss;
1614613d09b3SDavid Ahern 		}
1615345e9b54SAlexander Duyck 
1616af7888adSDavid Ahern 		for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1617af7888adSDavid Ahern 			nhc = fib_info_nhc(fi, nhsel);
1618af7888adSDavid Ahern 
1619af7888adSDavid Ahern 			if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
1620af7888adSDavid Ahern 				continue;
1621af7888adSDavid Ahern set_result:
1622345e9b54SAlexander Duyck 			if (!(fib_flags & FIB_LOOKUP_NOREF))
16230029c0deSReshetova, Elena 				refcount_inc(&fi->fib_clntref);
1624345e9b54SAlexander Duyck 
16256ffd9034SDavid Ahern 			res->prefix = htonl(n->key);
16269b6ebad5SAlexander Duyck 			res->prefixlen = KEYLENGTH - fa->fa_slen;
1627345e9b54SAlexander Duyck 			res->nh_sel = nhsel;
1628eba618abSDavid Ahern 			res->nhc = nhc;
1629345e9b54SAlexander Duyck 			res->type = fa->fa_type;
1630345e9b54SAlexander Duyck 			res->scope = fi->fib_scope;
1631345e9b54SAlexander Duyck 			res->fi = fi;
1632345e9b54SAlexander Duyck 			res->table = tb;
163379e5ad2cSAlexander Duyck 			res->fa_head = &n->leaf;
1634345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
1635345e9b54SAlexander Duyck 			this_cpu_inc(stats->semantic_match_passed);
1636345e9b54SAlexander Duyck #endif
1637eba618abSDavid Ahern 			trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
1638f6d3c192SDavid Ahern 
1639345e9b54SAlexander Duyck 			return err;
1640345e9b54SAlexander Duyck 		}
1641345e9b54SAlexander Duyck 	}
1642af7888adSDavid Ahern miss:
1643345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
1644345e9b54SAlexander Duyck 	this_cpu_inc(stats->semantic_match_miss);
1645345e9b54SAlexander Duyck #endif
16469f9e636dSAlexander Duyck 	goto backtrace;
164719baf839SRobert Olsson }
16486fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup);
164919baf839SRobert Olsson 
fib_remove_alias(struct trie * t,struct key_vector * tp,struct key_vector * l,struct fib_alias * old)165035c6edacSAlexander Duyck static void fib_remove_alias(struct trie *t, struct key_vector *tp,
165135c6edacSAlexander Duyck 			     struct key_vector *l, struct fib_alias *old)
1652d5d6487cSAlexander Duyck {
1653d5d6487cSAlexander Duyck 	/* record the location of the previous list_info entry */
1654d5d6487cSAlexander Duyck 	struct hlist_node **pprev = old->fa_list.pprev;
1655d5d6487cSAlexander Duyck 	struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1656d5d6487cSAlexander Duyck 
1657d5d6487cSAlexander Duyck 	/* remove the fib_alias from the list */
1658d5d6487cSAlexander Duyck 	hlist_del_rcu(&old->fa_list);
1659d5d6487cSAlexander Duyck 
1660d5d6487cSAlexander Duyck 	/* if we emptied the list this leaf will be freed and we can sort
1661d5d6487cSAlexander Duyck 	 * out parent suffix lengths as a part of trie_rebalance
1662d562f1f8SRobert Olsson 	 */
1663d5d6487cSAlexander Duyck 	if (hlist_empty(&l->leaf)) {
1664a52ca62cSAlexander Duyck 		if (tp->slen == l->slen)
1665a52ca62cSAlexander Duyck 			node_pull_suffix(tp, tp->pos);
166688bae714SAlexander Duyck 		put_child_root(tp, l->key, NULL);
1667d5d6487cSAlexander Duyck 		node_free(l);
1668d5d6487cSAlexander Duyck 		trie_rebalance(t, tp);
1669d5d6487cSAlexander Duyck 		return;
1670d5d6487cSAlexander Duyck 	}
1671d5d6487cSAlexander Duyck 
1672d5d6487cSAlexander Duyck 	/* only access fa if it is pointing at the last valid hlist_node */
1673d5d6487cSAlexander Duyck 	if (*pprev)
1674d5d6487cSAlexander Duyck 		return;
1675d5d6487cSAlexander Duyck 
1676d5d6487cSAlexander Duyck 	/* update the trie with the latest suffix length */
1677d5d6487cSAlexander Duyck 	l->slen = fa->fa_slen;
16781a239173SAlexander Duyck 	node_pull_suffix(tp, fa->fa_slen);
1679d5d6487cSAlexander Duyck }
1680d5d6487cSAlexander Duyck 
fib_notify_alias_delete(struct net * net,u32 key,struct hlist_head * fah,struct fib_alias * fa_to_delete,struct netlink_ext_ack * extack)1681f613b6e2SIdo Schimmel static void fib_notify_alias_delete(struct net *net, u32 key,
1682f613b6e2SIdo Schimmel 				    struct hlist_head *fah,
1683f613b6e2SIdo Schimmel 				    struct fib_alias *fa_to_delete,
1684f613b6e2SIdo Schimmel 				    struct netlink_ext_ack *extack)
1685f613b6e2SIdo Schimmel {
1686f613b6e2SIdo Schimmel 	struct fib_alias *fa_next, *fa_to_notify;
1687f613b6e2SIdo Schimmel 	u32 tb_id = fa_to_delete->tb_id;
1688f613b6e2SIdo Schimmel 	u8 slen = fa_to_delete->fa_slen;
1689f613b6e2SIdo Schimmel 	enum fib_event_type fib_event;
1690f613b6e2SIdo Schimmel 
1691f613b6e2SIdo Schimmel 	/* Do not notify if we do not care about the route. */
1692f613b6e2SIdo Schimmel 	if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
1693f613b6e2SIdo Schimmel 		return;
1694f613b6e2SIdo Schimmel 
1695f613b6e2SIdo Schimmel 	/* Determine if the route should be replaced by the next route in the
1696f613b6e2SIdo Schimmel 	 * list.
1697f613b6e2SIdo Schimmel 	 */
1698f613b6e2SIdo Schimmel 	fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
1699f613b6e2SIdo Schimmel 				   struct fib_alias, fa_list);
1700f613b6e2SIdo Schimmel 	if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
1701446f7391SIdo Schimmel 		fib_event = FIB_EVENT_ENTRY_REPLACE;
1702f613b6e2SIdo Schimmel 		fa_to_notify = fa_next;
1703f613b6e2SIdo Schimmel 	} else {
1704446f7391SIdo Schimmel 		fib_event = FIB_EVENT_ENTRY_DEL;
1705f613b6e2SIdo Schimmel 		fa_to_notify = fa_to_delete;
1706f613b6e2SIdo Schimmel 	}
1707f613b6e2SIdo Schimmel 	call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
1708f613b6e2SIdo Schimmel 				 fa_to_notify, extack);
1709f613b6e2SIdo Schimmel }
1710f613b6e2SIdo Schimmel 
1711d5d6487cSAlexander Duyck /* Caller must hold RTNL. */
fib_table_delete(struct net * net,struct fib_table * tb,struct fib_config * cfg,struct netlink_ext_ack * extack)1712b90eb754SJiri Pirko int fib_table_delete(struct net *net, struct fib_table *tb,
171378055998SDavid Ahern 		     struct fib_config *cfg, struct netlink_ext_ack *extack)
171419baf839SRobert Olsson {
171519baf839SRobert Olsson 	struct trie *t = (struct trie *) tb->tb_data;
171619baf839SRobert Olsson 	struct fib_alias *fa, *fa_to_delete;
171735c6edacSAlexander Duyck 	struct key_vector *l, *tp;
171879e5ad2cSAlexander Duyck 	u8 plen = cfg->fc_dst_len;
171979e5ad2cSAlexander Duyck 	u8 slen = KEYLENGTH - plen;
172032ccf110SGuillaume Nault 	dscp_t dscp;
1721d4a975e8SAlexander Duyck 	u32 key;
172291b9a277SOlof Johansson 
17234e902c57SThomas Graf 	key = ntohl(cfg->fc_dst);
172419baf839SRobert Olsson 
172578055998SDavid Ahern 	if (!fib_valid_key_len(key, plen, extack))
172619baf839SRobert Olsson 		return -EINVAL;
172719baf839SRobert Olsson 
1728d4a975e8SAlexander Duyck 	l = fib_find_node(t, &tp, key);
172919baf839SRobert Olsson 	if (!l)
173019baf839SRobert Olsson 		return -ESRCH;
173119baf839SRobert Olsson 
173232ccf110SGuillaume Nault 	dscp = cfg->fc_dscp;
173332ccf110SGuillaume Nault 	fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
173419baf839SRobert Olsson 	if (!fa)
173519baf839SRobert Olsson 		return -ESRCH;
173619baf839SRobert Olsson 
173732ccf110SGuillaume Nault 	pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen,
173832ccf110SGuillaume Nault 		 inet_dscp_to_dsfield(dscp), t);
173919baf839SRobert Olsson 
174019baf839SRobert Olsson 	fa_to_delete = NULL;
174156315f9eSAlexander Duyck 	hlist_for_each_entry_from(fa, fa_list) {
174219baf839SRobert Olsson 		struct fib_info *fi = fa->fa_info;
174319baf839SRobert Olsson 
17440b65bd97SAlexander Duyck 		if ((fa->fa_slen != slen) ||
17450b65bd97SAlexander Duyck 		    (fa->tb_id != tb->tb_id) ||
174632ccf110SGuillaume Nault 		    (fa->fa_dscp != dscp))
174719baf839SRobert Olsson 			break;
174819baf839SRobert Olsson 
17494e902c57SThomas Graf 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
17504e902c57SThomas Graf 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
175137e826c5SDavid S. Miller 		     fa->fa_info->fib_scope == cfg->fc_scope) &&
175274cb3c10SJulian Anastasov 		    (!cfg->fc_prefsrc ||
175374cb3c10SJulian Anastasov 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
17544e902c57SThomas Graf 		    (!cfg->fc_protocol ||
17554e902c57SThomas Graf 		     fi->fib_protocol == cfg->fc_protocol) &&
1756faee6769SAlexander Aring 		    fib_nh_match(net, cfg, fi, extack) == 0 &&
17575f9ae3d9SXin Long 		    fib_metrics_match(cfg, fi)) {
175819baf839SRobert Olsson 			fa_to_delete = fa;
175919baf839SRobert Olsson 			break;
176019baf839SRobert Olsson 		}
176119baf839SRobert Olsson 	}
176219baf839SRobert Olsson 
176391b9a277SOlof Johansson 	if (!fa_to_delete)
176491b9a277SOlof Johansson 		return -ESRCH;
176519baf839SRobert Olsson 
1766f613b6e2SIdo Schimmel 	fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
1767d5d6487cSAlexander Duyck 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1768b8f55831SMilan Kocian 		  &cfg->fc_nlinfo, 0);
176919baf839SRobert Olsson 
177021d8c49eSDavid S. Miller 	if (!plen)
177121d8c49eSDavid S. Miller 		tb->tb_num_default--;
177221d8c49eSDavid S. Miller 
1773d5d6487cSAlexander Duyck 	fib_remove_alias(t, tp, l, fa_to_delete);
17747289e6ddSAlexander Duyck 
1775d5d6487cSAlexander Duyck 	if (fa_to_delete->fa_state & FA_S_ACCESSED)
17764ccfe6d4SNicolas Dichtel 		rt_cache_flush(cfg->fc_nlinfo.nl_net);
177719baf839SRobert Olsson 
1778d5d6487cSAlexander Duyck 	fib_release_info(fa_to_delete->fa_info);
1779d5d6487cSAlexander Duyck 	alias_free_mem_rcu(fa_to_delete);
178019baf839SRobert Olsson 	return 0;
178119baf839SRobert Olsson }
178219baf839SRobert Olsson 
17838be33e95SAlexander Duyck /* Scan for the next leaf starting at the provided key value */
leaf_walk_rcu(struct key_vector ** tn,t_key key)178435c6edacSAlexander Duyck static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
178519baf839SRobert Olsson {
178635c6edacSAlexander Duyck 	struct key_vector *pn, *n = *tn;
17878be33e95SAlexander Duyck 	unsigned long cindex;
178819baf839SRobert Olsson 
17898be33e95SAlexander Duyck 	/* this loop is meant to try and find the key in the trie */
179088bae714SAlexander Duyck 	do {
179188bae714SAlexander Duyck 		/* record parent and next child index */
179288bae714SAlexander Duyck 		pn = n;
1793c2229fe1SAlexander Duyck 		cindex = (key > pn->key) ? get_index(key, pn) : 0;
179488bae714SAlexander Duyck 
179588bae714SAlexander Duyck 		if (cindex >> pn->bits)
179688bae714SAlexander Duyck 			break;
179788bae714SAlexander Duyck 
179888bae714SAlexander Duyck 		/* descend into the next child */
179988bae714SAlexander Duyck 		n = get_child_rcu(pn, cindex++);
180088bae714SAlexander Duyck 		if (!n)
180188bae714SAlexander Duyck 			break;
18028be33e95SAlexander Duyck 
18038be33e95SAlexander Duyck 		/* guarantee forward progress on the keys */
18048be33e95SAlexander Duyck 		if (IS_LEAF(n) && (n->key >= key))
18058be33e95SAlexander Duyck 			goto found;
180688bae714SAlexander Duyck 	} while (IS_TNODE(n));
18078be33e95SAlexander Duyck 
18088be33e95SAlexander Duyck 	/* this loop will search for the next leaf with a greater key */
180988bae714SAlexander Duyck 	while (!IS_TRIE(pn)) {
18108be33e95SAlexander Duyck 		/* if we exhausted the parent node we will need to climb */
18118be33e95SAlexander Duyck 		if (cindex >= (1ul << pn->bits)) {
18128be33e95SAlexander Duyck 			t_key pkey = pn->key;
18138be33e95SAlexander Duyck 
18148be33e95SAlexander Duyck 			pn = node_parent_rcu(pn);
18158be33e95SAlexander Duyck 			cindex = get_index(pkey, pn) + 1;
18168be33e95SAlexander Duyck 			continue;
18178be33e95SAlexander Duyck 		}
18188be33e95SAlexander Duyck 
18198be33e95SAlexander Duyck 		/* grab the next available node */
1820754baf8dSAlexander Duyck 		n = get_child_rcu(pn, cindex++);
18218be33e95SAlexander Duyck 		if (!n)
182291b9a277SOlof Johansson 			continue;
182319baf839SRobert Olsson 
18248be33e95SAlexander Duyck 		/* no need to compare keys since we bumped the index */
18258be33e95SAlexander Duyck 		if (IS_LEAF(n))
18268be33e95SAlexander Duyck 			goto found;
182782cfbb00SStephen Hemminger 
182882cfbb00SStephen Hemminger 		/* Rescan start scanning in new node */
18298be33e95SAlexander Duyck 		pn = n;
18308be33e95SAlexander Duyck 		cindex = 0;
183119baf839SRobert Olsson 	}
183282cfbb00SStephen Hemminger 
18338be33e95SAlexander Duyck 	*tn = pn;
183482cfbb00SStephen Hemminger 	return NULL; /* Root of trie */
18358be33e95SAlexander Duyck found:
18368be33e95SAlexander Duyck 	/* if we are at the limit for keys just return NULL for the tnode */
183788bae714SAlexander Duyck 	*tn = pn;
1838adaf9816SAlexander Duyck 	return n;
183982cfbb00SStephen Hemminger }
184082cfbb00SStephen Hemminger 
fib_trie_free(struct fib_table * tb)18410ddcf43dSAlexander Duyck static void fib_trie_free(struct fib_table *tb)
18420ddcf43dSAlexander Duyck {
18430ddcf43dSAlexander Duyck 	struct trie *t = (struct trie *)tb->tb_data;
18440ddcf43dSAlexander Duyck 	struct key_vector *pn = t->kv;
18450ddcf43dSAlexander Duyck 	unsigned long cindex = 1;
18460ddcf43dSAlexander Duyck 	struct hlist_node *tmp;
18470ddcf43dSAlexander Duyck 	struct fib_alias *fa;
18480ddcf43dSAlexander Duyck 
18490ddcf43dSAlexander Duyck 	/* walk trie in reverse order and free everything */
18500ddcf43dSAlexander Duyck 	for (;;) {
18510ddcf43dSAlexander Duyck 		struct key_vector *n;
18520ddcf43dSAlexander Duyck 
18530ddcf43dSAlexander Duyck 		if (!(cindex--)) {
18540ddcf43dSAlexander Duyck 			t_key pkey = pn->key;
18550ddcf43dSAlexander Duyck 
18560ddcf43dSAlexander Duyck 			if (IS_TRIE(pn))
18570ddcf43dSAlexander Duyck 				break;
18580ddcf43dSAlexander Duyck 
18590ddcf43dSAlexander Duyck 			n = pn;
18600ddcf43dSAlexander Duyck 			pn = node_parent(pn);
18610ddcf43dSAlexander Duyck 
18620ddcf43dSAlexander Duyck 			/* drop emptied tnode */
18630ddcf43dSAlexander Duyck 			put_child_root(pn, n->key, NULL);
18640ddcf43dSAlexander Duyck 			node_free(n);
18650ddcf43dSAlexander Duyck 
18660ddcf43dSAlexander Duyck 			cindex = get_index(pkey, pn);
18670ddcf43dSAlexander Duyck 
18680ddcf43dSAlexander Duyck 			continue;
18690ddcf43dSAlexander Duyck 		}
18700ddcf43dSAlexander Duyck 
18710ddcf43dSAlexander Duyck 		/* grab the next available node */
18720ddcf43dSAlexander Duyck 		n = get_child(pn, cindex);
18730ddcf43dSAlexander Duyck 		if (!n)
18740ddcf43dSAlexander Duyck 			continue;
18750ddcf43dSAlexander Duyck 
18760ddcf43dSAlexander Duyck 		if (IS_TNODE(n)) {
18770ddcf43dSAlexander Duyck 			/* record pn and cindex for leaf walking */
18780ddcf43dSAlexander Duyck 			pn = n;
18790ddcf43dSAlexander Duyck 			cindex = 1ul << n->bits;
18800ddcf43dSAlexander Duyck 
18810ddcf43dSAlexander Duyck 			continue;
18820ddcf43dSAlexander Duyck 		}
18830ddcf43dSAlexander Duyck 
18840ddcf43dSAlexander Duyck 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
18850ddcf43dSAlexander Duyck 			hlist_del_rcu(&fa->fa_list);
18860ddcf43dSAlexander Duyck 			alias_free_mem_rcu(fa);
18870ddcf43dSAlexander Duyck 		}
18880ddcf43dSAlexander Duyck 
18890ddcf43dSAlexander Duyck 		put_child_root(pn, n->key, NULL);
18900ddcf43dSAlexander Duyck 		node_free(n);
18910ddcf43dSAlexander Duyck 	}
18920ddcf43dSAlexander Duyck 
18930ddcf43dSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
18940ddcf43dSAlexander Duyck 	free_percpu(t->stats);
18950ddcf43dSAlexander Duyck #endif
18960ddcf43dSAlexander Duyck 	kfree(tb);
18970ddcf43dSAlexander Duyck }
18980ddcf43dSAlexander Duyck 
fib_trie_unmerge(struct fib_table * oldtb)18990ddcf43dSAlexander Duyck struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
19000ddcf43dSAlexander Duyck {
19010ddcf43dSAlexander Duyck 	struct trie *ot = (struct trie *)oldtb->tb_data;
19020ddcf43dSAlexander Duyck 	struct key_vector *l, *tp = ot->kv;
19030ddcf43dSAlexander Duyck 	struct fib_table *local_tb;
19040ddcf43dSAlexander Duyck 	struct fib_alias *fa;
19050ddcf43dSAlexander Duyck 	struct trie *lt;
19060ddcf43dSAlexander Duyck 	t_key key = 0;
19070ddcf43dSAlexander Duyck 
19080ddcf43dSAlexander Duyck 	if (oldtb->tb_data == oldtb->__data)
19090ddcf43dSAlexander Duyck 		return oldtb;
19100ddcf43dSAlexander Duyck 
19110ddcf43dSAlexander Duyck 	local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
19120ddcf43dSAlexander Duyck 	if (!local_tb)
19130ddcf43dSAlexander Duyck 		return NULL;
19140ddcf43dSAlexander Duyck 
19150ddcf43dSAlexander Duyck 	lt = (struct trie *)local_tb->tb_data;
19160ddcf43dSAlexander Duyck 
19170ddcf43dSAlexander Duyck 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
19180ddcf43dSAlexander Duyck 		struct key_vector *local_l = NULL, *local_tp;
19190ddcf43dSAlexander Duyck 
192083f35228SIdo Schimmel 		hlist_for_each_entry(fa, &l->leaf, fa_list) {
19210ddcf43dSAlexander Duyck 			struct fib_alias *new_fa;
19220ddcf43dSAlexander Duyck 
19230ddcf43dSAlexander Duyck 			if (local_tb->tb_id != fa->tb_id)
19240ddcf43dSAlexander Duyck 				continue;
19250ddcf43dSAlexander Duyck 
19260ddcf43dSAlexander Duyck 			/* clone fa for new local table */
19270ddcf43dSAlexander Duyck 			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
19280ddcf43dSAlexander Duyck 			if (!new_fa)
19290ddcf43dSAlexander Duyck 				goto out;
19300ddcf43dSAlexander Duyck 
19310ddcf43dSAlexander Duyck 			memcpy(new_fa, fa, sizeof(*fa));
19320ddcf43dSAlexander Duyck 
19330ddcf43dSAlexander Duyck 			/* insert clone into table */
19340ddcf43dSAlexander Duyck 			if (!local_l)
19350ddcf43dSAlexander Duyck 				local_l = fib_find_node(lt, &local_tp, l->key);
19360ddcf43dSAlexander Duyck 
19370ddcf43dSAlexander Duyck 			if (fib_insert_alias(lt, local_tp, local_l, new_fa,
19383114cdfeSAlexander Duyck 					     NULL, l->key)) {
19393114cdfeSAlexander Duyck 				kmem_cache_free(fn_alias_kmem, new_fa);
19400ddcf43dSAlexander Duyck 				goto out;
19410ddcf43dSAlexander Duyck 			}
19423114cdfeSAlexander Duyck 		}
19430ddcf43dSAlexander Duyck 
19440ddcf43dSAlexander Duyck 		/* stop loop if key wrapped back to 0 */
19450ddcf43dSAlexander Duyck 		key = l->key + 1;
19460ddcf43dSAlexander Duyck 		if (key < l->key)
19470ddcf43dSAlexander Duyck 			break;
19480ddcf43dSAlexander Duyck 	}
19490ddcf43dSAlexander Duyck 
19500ddcf43dSAlexander Duyck 	return local_tb;
19510ddcf43dSAlexander Duyck out:
19520ddcf43dSAlexander Duyck 	fib_trie_free(local_tb);
19530ddcf43dSAlexander Duyck 
19540ddcf43dSAlexander Duyck 	return NULL;
19550ddcf43dSAlexander Duyck }
19560ddcf43dSAlexander Duyck 
19573b709334SAlexander Duyck /* Caller must hold RTNL */
fib_table_flush_external(struct fib_table * tb)19583b709334SAlexander Duyck void fib_table_flush_external(struct fib_table *tb)
19593b709334SAlexander Duyck {
19603b709334SAlexander Duyck 	struct trie *t = (struct trie *)tb->tb_data;
19613b709334SAlexander Duyck 	struct key_vector *pn = t->kv;
19623b709334SAlexander Duyck 	unsigned long cindex = 1;
19633b709334SAlexander Duyck 	struct hlist_node *tmp;
19643b709334SAlexander Duyck 	struct fib_alias *fa;
19653b709334SAlexander Duyck 
19663b709334SAlexander Duyck 	/* walk trie in reverse order */
19673b709334SAlexander Duyck 	for (;;) {
19683b709334SAlexander Duyck 		unsigned char slen = 0;
19693b709334SAlexander Duyck 		struct key_vector *n;
19703b709334SAlexander Duyck 
19713b709334SAlexander Duyck 		if (!(cindex--)) {
19723b709334SAlexander Duyck 			t_key pkey = pn->key;
19733b709334SAlexander Duyck 
19743b709334SAlexander Duyck 			/* cannot resize the trie vector */
19753b709334SAlexander Duyck 			if (IS_TRIE(pn))
19763b709334SAlexander Duyck 				break;
19773b709334SAlexander Duyck 
1978a52ca62cSAlexander Duyck 			/* update the suffix to address pulled leaves */
1979a52ca62cSAlexander Duyck 			if (pn->slen > pn->pos)
1980a52ca62cSAlexander Duyck 				update_suffix(pn);
1981a52ca62cSAlexander Duyck 
19823b709334SAlexander Duyck 			/* resize completed node */
19833b709334SAlexander Duyck 			pn = resize(t, pn);
19843b709334SAlexander Duyck 			cindex = get_index(pkey, pn);
19853b709334SAlexander Duyck 
19863b709334SAlexander Duyck 			continue;
19873b709334SAlexander Duyck 		}
19883b709334SAlexander Duyck 
19893b709334SAlexander Duyck 		/* grab the next available node */
19903b709334SAlexander Duyck 		n = get_child(pn, cindex);
19913b709334SAlexander Duyck 		if (!n)
19923b709334SAlexander Duyck 			continue;
19933b709334SAlexander Duyck 
19943b709334SAlexander Duyck 		if (IS_TNODE(n)) {
19953b709334SAlexander Duyck 			/* record pn and cindex for leaf walking */
19963b709334SAlexander Duyck 			pn = n;
19973b709334SAlexander Duyck 			cindex = 1ul << n->bits;
19983b709334SAlexander Duyck 
19993b709334SAlexander Duyck 			continue;
20003b709334SAlexander Duyck 		}
20013b709334SAlexander Duyck 
20023b709334SAlexander Duyck 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
20033b709334SAlexander Duyck 			/* if alias was cloned to local then we just
20043b709334SAlexander Duyck 			 * need to remove the local copy from main
20053b709334SAlexander Duyck 			 */
20063b709334SAlexander Duyck 			if (tb->tb_id != fa->tb_id) {
20073b709334SAlexander Duyck 				hlist_del_rcu(&fa->fa_list);
20083b709334SAlexander Duyck 				alias_free_mem_rcu(fa);
20093b709334SAlexander Duyck 				continue;
20103b709334SAlexander Duyck 			}
20113b709334SAlexander Duyck 
20123b709334SAlexander Duyck 			/* record local slen */
20133b709334SAlexander Duyck 			slen = fa->fa_slen;
20143b709334SAlexander Duyck 		}
20153b709334SAlexander Duyck 
20163b709334SAlexander Duyck 		/* update leaf slen */
20173b709334SAlexander Duyck 		n->slen = slen;
20183b709334SAlexander Duyck 
20193b709334SAlexander Duyck 		if (hlist_empty(&n->leaf)) {
20203b709334SAlexander Duyck 			put_child_root(pn, n->key, NULL);
20213b709334SAlexander Duyck 			node_free(n);
20223b709334SAlexander Duyck 		}
20233b709334SAlexander Duyck 	}
20243b709334SAlexander Duyck }
20253b709334SAlexander Duyck 
20268be33e95SAlexander Duyck /* Caller must hold RTNL. */
fib_table_flush(struct net * net,struct fib_table * tb,bool flush_all)2027f97f4dd8SIdo Schimmel int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
202819baf839SRobert Olsson {
202919baf839SRobert Olsson 	struct trie *t = (struct trie *)tb->tb_data;
2030*4b2b6060SHangbin Liu 	struct nl_info info = { .nl_net = net };
203188bae714SAlexander Duyck 	struct key_vector *pn = t->kv;
203288bae714SAlexander Duyck 	unsigned long cindex = 1;
20337289e6ddSAlexander Duyck 	struct hlist_node *tmp;
20347289e6ddSAlexander Duyck 	struct fib_alias *fa;
203582cfbb00SStephen Hemminger 	int found = 0;
203619baf839SRobert Olsson 
20377289e6ddSAlexander Duyck 	/* walk trie in reverse order */
203888bae714SAlexander Duyck 	for (;;) {
203988bae714SAlexander Duyck 		unsigned char slen = 0;
204088bae714SAlexander Duyck 		struct key_vector *n;
204188bae714SAlexander Duyck 
204288bae714SAlexander Duyck 		if (!(cindex--)) {
20437289e6ddSAlexander Duyck 			t_key pkey = pn->key;
20447289e6ddSAlexander Duyck 
204588bae714SAlexander Duyck 			/* cannot resize the trie vector */
204688bae714SAlexander Duyck 			if (IS_TRIE(pn))
204788bae714SAlexander Duyck 				break;
20487289e6ddSAlexander Duyck 
2049a52ca62cSAlexander Duyck 			/* update the suffix to address pulled leaves */
2050a52ca62cSAlexander Duyck 			if (pn->slen > pn->pos)
2051a52ca62cSAlexander Duyck 				update_suffix(pn);
2052a52ca62cSAlexander Duyck 
20537289e6ddSAlexander Duyck 			/* resize completed node */
205488bae714SAlexander Duyck 			pn = resize(t, pn);
20557289e6ddSAlexander Duyck 			cindex = get_index(pkey, pn);
205688bae714SAlexander Duyck 
205788bae714SAlexander Duyck 			continue;
205864c62723SAlexander Duyck 		}
205964c62723SAlexander Duyck 
20607289e6ddSAlexander Duyck 		/* grab the next available node */
2061754baf8dSAlexander Duyck 		n = get_child(pn, cindex);
206288bae714SAlexander Duyck 		if (!n)
206388bae714SAlexander Duyck 			continue;
206419baf839SRobert Olsson 
206588bae714SAlexander Duyck 		if (IS_TNODE(n)) {
206688bae714SAlexander Duyck 			/* record pn and cindex for leaf walking */
206788bae714SAlexander Duyck 			pn = n;
206888bae714SAlexander Duyck 			cindex = 1ul << n->bits;
206988bae714SAlexander Duyck 
207088bae714SAlexander Duyck 			continue;
207188bae714SAlexander Duyck 		}
20727289e6ddSAlexander Duyck 
20737289e6ddSAlexander Duyck 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
20747289e6ddSAlexander Duyck 			struct fib_info *fi = fa->fa_info;
20757289e6ddSAlexander Duyck 
2076f97f4dd8SIdo Schimmel 			if (!fi || tb->tb_id != fa->tb_id ||
2077f97f4dd8SIdo Schimmel 			    (!(fi->fib_flags & RTNH_F_DEAD) &&
2078f97f4dd8SIdo Schimmel 			     !fib_props[fa->fa_type].error)) {
2079f97f4dd8SIdo Schimmel 				slen = fa->fa_slen;
2080f97f4dd8SIdo Schimmel 				continue;
2081f97f4dd8SIdo Schimmel 			}
2082f97f4dd8SIdo Schimmel 
2083f97f4dd8SIdo Schimmel 			/* Do not flush error routes if network namespace is
2084f97f4dd8SIdo Schimmel 			 * not being dismantled
2085f97f4dd8SIdo Schimmel 			 */
2086f97f4dd8SIdo Schimmel 			if (!flush_all && fib_props[fa->fa_type].error) {
208788bae714SAlexander Duyck 				slen = fa->fa_slen;
208888bae714SAlexander Duyck 				continue;
208988bae714SAlexander Duyck 			}
209088bae714SAlexander Duyck 
2091525bc345SIdo Schimmel 			fib_notify_alias_delete(net, n->key, &n->leaf, fa,
2092525bc345SIdo Schimmel 						NULL);
2093*4b2b6060SHangbin Liu 			if (fi->pfsrc_removed)
2094*4b2b6060SHangbin Liu 				rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa,
2095*4b2b6060SHangbin Liu 					  KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0);
20967289e6ddSAlexander Duyck 			hlist_del_rcu(&fa->fa_list);
20977289e6ddSAlexander Duyck 			fib_release_info(fa->fa_info);
20987289e6ddSAlexander Duyck 			alias_free_mem_rcu(fa);
20997289e6ddSAlexander Duyck 			found++;
21007289e6ddSAlexander Duyck 		}
21017289e6ddSAlexander Duyck 
21027289e6ddSAlexander Duyck 		/* update leaf slen */
21037289e6ddSAlexander Duyck 		n->slen = slen;
21047289e6ddSAlexander Duyck 
21057289e6ddSAlexander Duyck 		if (hlist_empty(&n->leaf)) {
210688bae714SAlexander Duyck 			put_child_root(pn, n->key, NULL);
21077289e6ddSAlexander Duyck 			node_free(n);
21087289e6ddSAlexander Duyck 		}
210988bae714SAlexander Duyck 	}
21107289e6ddSAlexander Duyck 
21110c7770c7SStephen Hemminger 	pr_debug("trie_flush found=%d\n", found);
211219baf839SRobert Olsson 	return found;
211319baf839SRobert Olsson }
211419baf839SRobert Olsson 
21151bff1a0cSDavid Ahern /* derived from fib_trie_free */
__fib_info_notify_update(struct net * net,struct fib_table * tb,struct nl_info * info)21161bff1a0cSDavid Ahern static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
21171bff1a0cSDavid Ahern 				     struct nl_info *info)
21181bff1a0cSDavid Ahern {
21191bff1a0cSDavid Ahern 	struct trie *t = (struct trie *)tb->tb_data;
21201bff1a0cSDavid Ahern 	struct key_vector *pn = t->kv;
21211bff1a0cSDavid Ahern 	unsigned long cindex = 1;
21221bff1a0cSDavid Ahern 	struct fib_alias *fa;
21231bff1a0cSDavid Ahern 
21241bff1a0cSDavid Ahern 	for (;;) {
21251bff1a0cSDavid Ahern 		struct key_vector *n;
21261bff1a0cSDavid Ahern 
21271bff1a0cSDavid Ahern 		if (!(cindex--)) {
21281bff1a0cSDavid Ahern 			t_key pkey = pn->key;
21291bff1a0cSDavid Ahern 
21301bff1a0cSDavid Ahern 			if (IS_TRIE(pn))
21311bff1a0cSDavid Ahern 				break;
21321bff1a0cSDavid Ahern 
21331bff1a0cSDavid Ahern 			pn = node_parent(pn);
21341bff1a0cSDavid Ahern 			cindex = get_index(pkey, pn);
21351bff1a0cSDavid Ahern 			continue;
21361bff1a0cSDavid Ahern 		}
21371bff1a0cSDavid Ahern 
21381bff1a0cSDavid Ahern 		/* grab the next available node */
21391bff1a0cSDavid Ahern 		n = get_child(pn, cindex);
21401bff1a0cSDavid Ahern 		if (!n)
21411bff1a0cSDavid Ahern 			continue;
21421bff1a0cSDavid Ahern 
21431bff1a0cSDavid Ahern 		if (IS_TNODE(n)) {
21441bff1a0cSDavid Ahern 			/* record pn and cindex for leaf walking */
21451bff1a0cSDavid Ahern 			pn = n;
21461bff1a0cSDavid Ahern 			cindex = 1ul << n->bits;
21471bff1a0cSDavid Ahern 
21481bff1a0cSDavid Ahern 			continue;
21491bff1a0cSDavid Ahern 		}
21501bff1a0cSDavid Ahern 
21511bff1a0cSDavid Ahern 		hlist_for_each_entry(fa, &n->leaf, fa_list) {
21521bff1a0cSDavid Ahern 			struct fib_info *fi = fa->fa_info;
21531bff1a0cSDavid Ahern 
21541bff1a0cSDavid Ahern 			if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
21551bff1a0cSDavid Ahern 				continue;
21561bff1a0cSDavid Ahern 
21571bff1a0cSDavid Ahern 			rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
21581bff1a0cSDavid Ahern 				  KEYLENGTH - fa->fa_slen, tb->tb_id,
21591bff1a0cSDavid Ahern 				  info, NLM_F_REPLACE);
21601bff1a0cSDavid Ahern 		}
21611bff1a0cSDavid Ahern 	}
21621bff1a0cSDavid Ahern }
21631bff1a0cSDavid Ahern 
fib_info_notify_update(struct net * net,struct nl_info * info)21641bff1a0cSDavid Ahern void fib_info_notify_update(struct net *net, struct nl_info *info)
21651bff1a0cSDavid Ahern {
21661bff1a0cSDavid Ahern 	unsigned int h;
21671bff1a0cSDavid Ahern 
21681bff1a0cSDavid Ahern 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
21691bff1a0cSDavid Ahern 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
21701bff1a0cSDavid Ahern 		struct fib_table *tb;
21711bff1a0cSDavid Ahern 
21727f6f32bbSIdo Schimmel 		hlist_for_each_entry_rcu(tb, head, tb_hlist,
21737f6f32bbSIdo Schimmel 					 lockdep_rtnl_is_held())
21741bff1a0cSDavid Ahern 			__fib_info_notify_update(net, tb, info);
21751bff1a0cSDavid Ahern 	}
21761bff1a0cSDavid Ahern }
21771bff1a0cSDavid Ahern 
fib_leaf_notify(struct key_vector * l,struct fib_table * tb,struct notifier_block * nb,struct netlink_ext_ack * extack)217855c894f7SJiri Pirko static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
2179b7a59557SJiri Pirko 			   struct notifier_block *nb,
2180b7a59557SJiri Pirko 			   struct netlink_ext_ack *extack)
2181c3852ef7SIdo Schimmel {
2182c3852ef7SIdo Schimmel 	struct fib_alias *fa;
218320d15652SIdo Schimmel 	int last_slen = -1;
218455c894f7SJiri Pirko 	int err;
2185c3852ef7SIdo Schimmel 
2186c3852ef7SIdo Schimmel 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2187c3852ef7SIdo Schimmel 		struct fib_info *fi = fa->fa_info;
2188c3852ef7SIdo Schimmel 
2189c3852ef7SIdo Schimmel 		if (!fi)
2190c3852ef7SIdo Schimmel 			continue;
2191c3852ef7SIdo Schimmel 
2192c3852ef7SIdo Schimmel 		/* local and main table can share the same trie,
2193c3852ef7SIdo Schimmel 		 * so don't notify twice for the same entry.
2194c3852ef7SIdo Schimmel 		 */
2195c3852ef7SIdo Schimmel 		if (tb->tb_id != fa->tb_id)
2196c3852ef7SIdo Schimmel 			continue;
2197c3852ef7SIdo Schimmel 
219820d15652SIdo Schimmel 		if (fa->fa_slen == last_slen)
219920d15652SIdo Schimmel 			continue;
220020d15652SIdo Schimmel 
220120d15652SIdo Schimmel 		last_slen = fa->fa_slen;
2202446f7391SIdo Schimmel 		err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
220320d15652SIdo Schimmel 					      l->key, KEYLENGTH - fa->fa_slen,
220420d15652SIdo Schimmel 					      fa, extack);
220520d15652SIdo Schimmel 		if (err)
220620d15652SIdo Schimmel 			return err;
2207c3852ef7SIdo Schimmel 	}
220855c894f7SJiri Pirko 	return 0;
2209c3852ef7SIdo Schimmel }
2210c3852ef7SIdo Schimmel 
fib_table_notify(struct fib_table * tb,struct notifier_block * nb,struct netlink_ext_ack * extack)2211b7a59557SJiri Pirko static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
2212b7a59557SJiri Pirko 			    struct netlink_ext_ack *extack)
2213c3852ef7SIdo Schimmel {
2214c3852ef7SIdo Schimmel 	struct trie *t = (struct trie *)tb->tb_data;
2215c3852ef7SIdo Schimmel 	struct key_vector *l, *tp = t->kv;
2216c3852ef7SIdo Schimmel 	t_key key = 0;
221755c894f7SJiri Pirko 	int err;
2218c3852ef7SIdo Schimmel 
2219c3852ef7SIdo Schimmel 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2220b7a59557SJiri Pirko 		err = fib_leaf_notify(l, tb, nb, extack);
222155c894f7SJiri Pirko 		if (err)
222255c894f7SJiri Pirko 			return err;
2223c3852ef7SIdo Schimmel 
2224c3852ef7SIdo Schimmel 		key = l->key + 1;
2225c3852ef7SIdo Schimmel 		/* stop in case of wrap around */
2226c3852ef7SIdo Schimmel 		if (key < l->key)
2227c3852ef7SIdo Schimmel 			break;
2228c3852ef7SIdo Schimmel 	}
222955c894f7SJiri Pirko 	return 0;
2230c3852ef7SIdo Schimmel }
2231c3852ef7SIdo Schimmel 
fib_notify(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)2232b7a59557SJiri Pirko int fib_notify(struct net *net, struct notifier_block *nb,
2233b7a59557SJiri Pirko 	       struct netlink_ext_ack *extack)
2234c3852ef7SIdo Schimmel {
2235c3852ef7SIdo Schimmel 	unsigned int h;
223655c894f7SJiri Pirko 	int err;
2237c3852ef7SIdo Schimmel 
2238c3852ef7SIdo Schimmel 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2239c3852ef7SIdo Schimmel 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2240c3852ef7SIdo Schimmel 		struct fib_table *tb;
2241c3852ef7SIdo Schimmel 
224255c894f7SJiri Pirko 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2243b7a59557SJiri Pirko 			err = fib_table_notify(tb, nb, extack);
224455c894f7SJiri Pirko 			if (err)
224555c894f7SJiri Pirko 				return err;
2246c3852ef7SIdo Schimmel 		}
2247c3852ef7SIdo Schimmel 	}
224855c894f7SJiri Pirko 	return 0;
224955c894f7SJiri Pirko }
2250c3852ef7SIdo Schimmel 
__trie_free_rcu(struct rcu_head * head)2251a7e53531SAlexander Duyck static void __trie_free_rcu(struct rcu_head *head)
22524aa2c466SPavel Emelyanov {
2253a7e53531SAlexander Duyck 	struct fib_table *tb = container_of(head, struct fib_table, rcu);
22548274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
22558274a97aSAlexander Duyck 	struct trie *t = (struct trie *)tb->tb_data;
22568274a97aSAlexander Duyck 
22570ddcf43dSAlexander Duyck 	if (tb->tb_data == tb->__data)
22588274a97aSAlexander Duyck 		free_percpu(t->stats);
22598274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */
22604aa2c466SPavel Emelyanov 	kfree(tb);
22614aa2c466SPavel Emelyanov }
22624aa2c466SPavel Emelyanov 
fib_free_table(struct fib_table * tb)2263a7e53531SAlexander Duyck void fib_free_table(struct fib_table *tb)
2264a7e53531SAlexander Duyck {
2265a7e53531SAlexander Duyck 	call_rcu(&tb->rcu, __trie_free_rcu);
2266a7e53531SAlexander Duyck }
2267a7e53531SAlexander Duyck 
fn_trie_dump_leaf(struct key_vector * l,struct fib_table * tb,struct sk_buff * skb,struct netlink_callback * cb,struct fib_dump_filter * filter)226835c6edacSAlexander Duyck static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
226918a8021aSDavid Ahern 			     struct sk_buff *skb, struct netlink_callback *cb,
227018a8021aSDavid Ahern 			     struct fib_dump_filter *filter)
227119baf839SRobert Olsson {
227218a8021aSDavid Ahern 	unsigned int flags = NLM_F_MULTI;
227379e5ad2cSAlexander Duyck 	__be32 xkey = htonl(l->key);
2274ee28906fSStefano Brivio 	int i, s_i, i_fa, s_fa, err;
227519baf839SRobert Olsson 	struct fib_alias *fa;
227619baf839SRobert Olsson 
2277ee28906fSStefano Brivio 	if (filter->filter_set ||
2278ee28906fSStefano Brivio 	    !filter->dump_exceptions || !filter->dump_routes)
227918a8021aSDavid Ahern 		flags |= NLM_F_DUMP_FILTERED;
228018a8021aSDavid Ahern 
228179e5ad2cSAlexander Duyck 	s_i = cb->args[4];
2282ee28906fSStefano Brivio 	s_fa = cb->args[5];
228319baf839SRobert Olsson 	i = 0;
228419baf839SRobert Olsson 
22852373ce1cSRobert Olsson 	/* rcu_read_lock is hold by caller */
228679e5ad2cSAlexander Duyck 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2287ee28906fSStefano Brivio 		struct fib_info *fi = fa->fa_info;
2288f6c5775fSDavid Ahern 
228918a8021aSDavid Ahern 		if (i < s_i)
229018a8021aSDavid Ahern 			goto next;
229119baf839SRobert Olsson 
2292ee28906fSStefano Brivio 		i_fa = 0;
2293ee28906fSStefano Brivio 
229418a8021aSDavid Ahern 		if (tb->tb_id != fa->tb_id)
229518a8021aSDavid Ahern 			goto next;
229618a8021aSDavid Ahern 
229718a8021aSDavid Ahern 		if (filter->filter_set) {
229818a8021aSDavid Ahern 			if (filter->rt_type && fa->fa_type != filter->rt_type)
229918a8021aSDavid Ahern 				goto next;
230018a8021aSDavid Ahern 
230118a8021aSDavid Ahern 			if ((filter->protocol &&
2302ee28906fSStefano Brivio 			     fi->fib_protocol != filter->protocol))
230318a8021aSDavid Ahern 				goto next;
230418a8021aSDavid Ahern 
230518a8021aSDavid Ahern 			if (filter->dev &&
2306ee28906fSStefano Brivio 			    !fib_info_nh_uses_dev(fi, filter->dev))
230718a8021aSDavid Ahern 				goto next;
23080ddcf43dSAlexander Duyck 		}
23090ddcf43dSAlexander Duyck 
2310885b8b4dSStefano Brivio 		if (filter->dump_routes) {
2311885b8b4dSStefano Brivio 			if (!s_fa) {
23121e301fd0SIdo Schimmel 				struct fib_rt_info fri;
23131e301fd0SIdo Schimmel 
23141e301fd0SIdo Schimmel 				fri.fi = fi;
23151e301fd0SIdo Schimmel 				fri.tb_id = tb->tb_id;
23161e301fd0SIdo Schimmel 				fri.dst = xkey;
23171e301fd0SIdo Schimmel 				fri.dst_len = KEYLENGTH - fa->fa_slen;
2318888ade8fSGuillaume Nault 				fri.dscp = fa->fa_dscp;
23191e301fd0SIdo Schimmel 				fri.type = fa->fa_type;
23209fcf986cSEric Dumazet 				fri.offload = READ_ONCE(fa->offload);
23219fcf986cSEric Dumazet 				fri.trap = READ_ONCE(fa->trap);
23229fcf986cSEric Dumazet 				fri.offload_failed = READ_ONCE(fa->offload_failed);
2323885b8b4dSStefano Brivio 				err = fib_dump_info(skb,
2324885b8b4dSStefano Brivio 						    NETLINK_CB(cb->skb).portid,
2325885b8b4dSStefano Brivio 						    cb->nlh->nlmsg_seq,
23261e301fd0SIdo Schimmel 						    RTM_NEWROUTE, &fri, flags);
2327ee28906fSStefano Brivio 				if (err < 0)
2328ee28906fSStefano Brivio 					goto stop;
2329885b8b4dSStefano Brivio 			}
2330885b8b4dSStefano Brivio 
2331ee28906fSStefano Brivio 			i_fa++;
233219baf839SRobert Olsson 		}
2333ee28906fSStefano Brivio 
2334ee28906fSStefano Brivio 		if (filter->dump_exceptions) {
2335ee28906fSStefano Brivio 			err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2336e93fb3e9SJohn Fastabend 						 &i_fa, s_fa, flags);
2337ee28906fSStefano Brivio 			if (err < 0)
2338ee28906fSStefano Brivio 				goto stop;
2339ee28906fSStefano Brivio 		}
2340ee28906fSStefano Brivio 
234118a8021aSDavid Ahern next:
2342a88ee229SStephen Hemminger 		i++;
234319baf839SRobert Olsson 	}
2344a88ee229SStephen Hemminger 
234571d67e66SStephen Hemminger 	cb->args[4] = i;
234619baf839SRobert Olsson 	return skb->len;
2347ee28906fSStefano Brivio 
2348ee28906fSStefano Brivio stop:
2349ee28906fSStefano Brivio 	cb->args[4] = i;
2350ee28906fSStefano Brivio 	cb->args[5] = i_fa;
2351ee28906fSStefano Brivio 	return err;
235219baf839SRobert Olsson }
235319baf839SRobert Olsson 
2354a7e53531SAlexander Duyck /* rcu_read_lock needs to be hold by caller from readside */
fib_table_dump(struct fib_table * tb,struct sk_buff * skb,struct netlink_callback * cb,struct fib_dump_filter * filter)235516c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
235618a8021aSDavid Ahern 		   struct netlink_callback *cb, struct fib_dump_filter *filter)
235719baf839SRobert Olsson {
235819baf839SRobert Olsson 	struct trie *t = (struct trie *)tb->tb_data;
235988bae714SAlexander Duyck 	struct key_vector *l, *tp = t->kv;
2360d5ce8a0eSStephen Hemminger 	/* Dump starting at last key.
2361d5ce8a0eSStephen Hemminger 	 * Note: 0.0.0.0/0 (ie default) is first key.
2362d5ce8a0eSStephen Hemminger 	 */
23638be33e95SAlexander Duyck 	int count = cb->args[2];
23648be33e95SAlexander Duyck 	t_key key = cb->args[3];
2365a88ee229SStephen Hemminger 
23669827c063SDavid Ahern 	/* First time here, count and key are both always 0. Count > 0
23679827c063SDavid Ahern 	 * and key == 0 means the dump has wrapped around and we are done.
23689827c063SDavid Ahern 	 */
23699827c063SDavid Ahern 	if (count && !key)
23709827c063SDavid Ahern 		return skb->len;
23719827c063SDavid Ahern 
23728be33e95SAlexander Duyck 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2373f6c5775fSDavid Ahern 		int err;
2374f6c5775fSDavid Ahern 
237518a8021aSDavid Ahern 		err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
2376f6c5775fSDavid Ahern 		if (err < 0) {
23778be33e95SAlexander Duyck 			cb->args[3] = key;
23788be33e95SAlexander Duyck 			cb->args[2] = count;
2379f6c5775fSDavid Ahern 			return err;
238019baf839SRobert Olsson 		}
2381d5ce8a0eSStephen Hemminger 
238271d67e66SStephen Hemminger 		++count;
23838be33e95SAlexander Duyck 		key = l->key + 1;
23848be33e95SAlexander Duyck 
238571d67e66SStephen Hemminger 		memset(&cb->args[4], 0,
238671d67e66SStephen Hemminger 		       sizeof(cb->args) - 4*sizeof(cb->args[0]));
23878be33e95SAlexander Duyck 
23888be33e95SAlexander Duyck 		/* stop loop if key wrapped back to 0 */
23898be33e95SAlexander Duyck 		if (key < l->key)
23908be33e95SAlexander Duyck 			break;
2391a88ee229SStephen Hemminger 	}
23928be33e95SAlexander Duyck 
23938be33e95SAlexander Duyck 	cb->args[3] = key;
23948be33e95SAlexander Duyck 	cb->args[2] = count;
23958be33e95SAlexander Duyck 
2396a88ee229SStephen Hemminger 	return skb->len;
2397a88ee229SStephen Hemminger }
239819baf839SRobert Olsson 
fib_trie_init(void)23995348ba85SDavid S. Miller void __init fib_trie_init(void)
24007f9b8052SStephen Hemminger {
2401a07f5f50SStephen Hemminger 	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2402a07f5f50SStephen Hemminger 					  sizeof(struct fib_alias),
24036126891cSVasily Averin 					  0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
2404bc3c8c1eSStephen Hemminger 
2405bc3c8c1eSStephen Hemminger 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
240641b489fdSAlexander Duyck 					   LEAF_SIZE,
24076126891cSVasily Averin 					   0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
24087f9b8052SStephen Hemminger }
240919baf839SRobert Olsson 
fib_trie_table(u32 id,struct fib_table * alias)24100ddcf43dSAlexander Duyck struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
241119baf839SRobert Olsson {
241219baf839SRobert Olsson 	struct fib_table *tb;
241319baf839SRobert Olsson 	struct trie *t;
24140ddcf43dSAlexander Duyck 	size_t sz = sizeof(*tb);
241519baf839SRobert Olsson 
24160ddcf43dSAlexander Duyck 	if (!alias)
24170ddcf43dSAlexander Duyck 		sz += sizeof(struct trie);
24180ddcf43dSAlexander Duyck 
24190ddcf43dSAlexander Duyck 	tb = kzalloc(sz, GFP_KERNEL);
242051456b29SIan Morris 	if (!tb)
242119baf839SRobert Olsson 		return NULL;
242219baf839SRobert Olsson 
242319baf839SRobert Olsson 	tb->tb_id = id;
242421d8c49eSDavid S. Miller 	tb->tb_num_default = 0;
24250ddcf43dSAlexander Duyck 	tb->tb_data = (alias ? alias->__data : tb->__data);
24260ddcf43dSAlexander Duyck 
24270ddcf43dSAlexander Duyck 	if (alias)
24280ddcf43dSAlexander Duyck 		return tb;
242919baf839SRobert Olsson 
243019baf839SRobert Olsson 	t = (struct trie *) tb->tb_data;
243188bae714SAlexander Duyck 	t->kv[0].pos = KEYLENGTH;
243288bae714SAlexander Duyck 	t->kv[0].slen = KEYLENGTH;
24338274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS
24348274a97aSAlexander Duyck 	t->stats = alloc_percpu(struct trie_use_stats);
24358274a97aSAlexander Duyck 	if (!t->stats) {
24368274a97aSAlexander Duyck 		kfree(tb);
24378274a97aSAlexander Duyck 		tb = NULL;
24388274a97aSAlexander Duyck 	}
24398274a97aSAlexander Duyck #endif
244019baf839SRobert Olsson 
244119baf839SRobert Olsson 	return tb;
244219baf839SRobert Olsson }
244319baf839SRobert Olsson 
2444cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS
2445cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */
2446cb7b593cSStephen Hemminger struct fib_trie_iter {
24471c340b2fSDenis V. Lunev 	struct seq_net_private p;
24483d3b2d25SStephen Hemminger 	struct fib_table *tb;
244935c6edacSAlexander Duyck 	struct key_vector *tnode;
2450a034ee3cSEric Dumazet 	unsigned int index;
2451a034ee3cSEric Dumazet 	unsigned int depth;
2452cb7b593cSStephen Hemminger };
245319baf839SRobert Olsson 
fib_trie_get_next(struct fib_trie_iter * iter)245435c6edacSAlexander Duyck static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
245519baf839SRobert Olsson {
245698293e8dSAlexander Duyck 	unsigned long cindex = iter->index;
245788bae714SAlexander Duyck 	struct key_vector *pn = iter->tnode;
245888bae714SAlexander Duyck 	t_key pkey;
24596640e697SEric W. Biederman 
2460cb7b593cSStephen Hemminger 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2461cb7b593cSStephen Hemminger 		 iter->tnode, iter->index, iter->depth);
246219baf839SRobert Olsson 
246388bae714SAlexander Duyck 	while (!IS_TRIE(pn)) {
246488bae714SAlexander Duyck 		while (cindex < child_length(pn)) {
246588bae714SAlexander Duyck 			struct key_vector *n = get_child_rcu(pn, cindex++);
246688bae714SAlexander Duyck 
246788bae714SAlexander Duyck 			if (!n)
246888bae714SAlexander Duyck 				continue;
246988bae714SAlexander Duyck 
247019baf839SRobert Olsson 			if (IS_LEAF(n)) {
247188bae714SAlexander Duyck 				iter->tnode = pn;
247288bae714SAlexander Duyck 				iter->index = cindex;
247391b9a277SOlof Johansson 			} else {
2474cb7b593cSStephen Hemminger 				/* push down one level */
2475adaf9816SAlexander Duyck 				iter->tnode = n;
2476cb7b593cSStephen Hemminger 				iter->index = 0;
2477cb7b593cSStephen Hemminger 				++iter->depth;
247819baf839SRobert Olsson 			}
247988bae714SAlexander Duyck 
2480cb7b593cSStephen Hemminger 			return n;
248119baf839SRobert Olsson 		}
248219baf839SRobert Olsson 
2483cb7b593cSStephen Hemminger 		/* Current node exhausted, pop back up */
248488bae714SAlexander Duyck 		pkey = pn->key;
248588bae714SAlexander Duyck 		pn = node_parent_rcu(pn);
248688bae714SAlexander Duyck 		cindex = get_index(pkey, pn) + 1;
2487cb7b593cSStephen Hemminger 		--iter->depth;
2488cb7b593cSStephen Hemminger 	}
2489cb7b593cSStephen Hemminger 
249088bae714SAlexander Duyck 	/* record root node so further searches know we are done */
249188bae714SAlexander Duyck 	iter->tnode = pn;
249288bae714SAlexander Duyck 	iter->index = 0;
249388bae714SAlexander Duyck 
2494cb7b593cSStephen Hemminger 	return NULL;
2495cb7b593cSStephen Hemminger }
2496cb7b593cSStephen Hemminger 
fib_trie_get_first(struct fib_trie_iter * iter,struct trie * t)249735c6edacSAlexander Duyck static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2498cb7b593cSStephen Hemminger 					     struct trie *t)
2499cb7b593cSStephen Hemminger {
2500f38b24c9SFiro Yang 	struct key_vector *n, *pn;
25015ddf0eb2SRobert Olsson 
25025ddf0eb2SRobert Olsson 	if (!t)
25035ddf0eb2SRobert Olsson 		return NULL;
25045ddf0eb2SRobert Olsson 
2505f38b24c9SFiro Yang 	pn = t->kv;
250688bae714SAlexander Duyck 	n = rcu_dereference(pn->tnode[0]);
25073d3b2d25SStephen Hemminger 	if (!n)
25085ddf0eb2SRobert Olsson 		return NULL;
2509cb7b593cSStephen Hemminger 
25106640e697SEric W. Biederman 	if (IS_TNODE(n)) {
2511adaf9816SAlexander Duyck 		iter->tnode = n;
2512cb7b593cSStephen Hemminger 		iter->index = 0;
25131d25cd6cSRobert Olsson 		iter->depth = 1;
25146640e697SEric W. Biederman 	} else {
251588bae714SAlexander Duyck 		iter->tnode = pn;
25166640e697SEric W. Biederman 		iter->index = 0;
25176640e697SEric W. Biederman 		iter->depth = 0;
25186640e697SEric W. Biederman 	}
25193d3b2d25SStephen Hemminger 
2520cb7b593cSStephen Hemminger 	return n;
2521cb7b593cSStephen Hemminger }
2522cb7b593cSStephen Hemminger 
trie_collect_stats(struct trie * t,struct trie_stat * s)2523cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s)
252419baf839SRobert Olsson {
252535c6edacSAlexander Duyck 	struct key_vector *n;
2526cb7b593cSStephen Hemminger 	struct fib_trie_iter iter;
2527cb7b593cSStephen Hemminger 
2528cb7b593cSStephen Hemminger 	memset(s, 0, sizeof(*s));
252919baf839SRobert Olsson 
25302373ce1cSRobert Olsson 	rcu_read_lock();
25313d3b2d25SStephen Hemminger 	for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2532cb7b593cSStephen Hemminger 		if (IS_LEAF(n)) {
253379e5ad2cSAlexander Duyck 			struct fib_alias *fa;
253493672292SStephen Hemminger 
253519baf839SRobert Olsson 			s->leaves++;
2536cb7b593cSStephen Hemminger 			s->totdepth += iter.depth;
2537cb7b593cSStephen Hemminger 			if (iter.depth > s->maxdepth)
2538cb7b593cSStephen Hemminger 				s->maxdepth = iter.depth;
253993672292SStephen Hemminger 
254079e5ad2cSAlexander Duyck 			hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
254193672292SStephen Hemminger 				++s->prefixes;
254291b9a277SOlof Johansson 		} else {
254319baf839SRobert Olsson 			s->tnodes++;
2544adaf9816SAlexander Duyck 			if (n->bits < MAX_STAT_DEPTH)
2545adaf9816SAlexander Duyck 				s->nodesizes[n->bits]++;
25466e22d174SAlexander Duyck 			s->nullpointers += tn_info(n)->empty_children;
254719baf839SRobert Olsson 		}
254898293e8dSAlexander Duyck 	}
25492373ce1cSRobert Olsson 	rcu_read_unlock();
255019baf839SRobert Olsson }
255119baf839SRobert Olsson 
255219baf839SRobert Olsson /*
255319baf839SRobert Olsson  *	This outputs /proc/net/fib_triestats
255419baf839SRobert Olsson  */
trie_show_stats(struct seq_file * seq,struct trie_stat * stat)2555cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
255619baf839SRobert Olsson {
2557a034ee3cSEric Dumazet 	unsigned int i, max, pointers, bytes, avdepth;
255819baf839SRobert Olsson 
255919baf839SRobert Olsson 	if (stat->leaves)
256019baf839SRobert Olsson 		avdepth = stat->totdepth*100 / stat->leaves;
256119baf839SRobert Olsson 	else
256219baf839SRobert Olsson 		avdepth = 0;
256319baf839SRobert Olsson 
2564a07f5f50SStephen Hemminger 	seq_printf(seq, "\tAver depth:     %u.%02d\n",
2565a07f5f50SStephen Hemminger 		   avdepth / 100, avdepth % 100);
2566cb7b593cSStephen Hemminger 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
2567cb7b593cSStephen Hemminger 
2568cb7b593cSStephen Hemminger 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
256941b489fdSAlexander Duyck 	bytes = LEAF_SIZE * stat->leaves;
257093672292SStephen Hemminger 
257193672292SStephen Hemminger 	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
257279e5ad2cSAlexander Duyck 	bytes += sizeof(struct fib_alias) * stat->prefixes;
257393672292SStephen Hemminger 
2574187b5188SStephen Hemminger 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
257541b489fdSAlexander Duyck 	bytes += TNODE_SIZE(0) * stat->tnodes;
257619baf839SRobert Olsson 
257706ef921dSRobert Olsson 	max = MAX_STAT_DEPTH;
257806ef921dSRobert Olsson 	while (max > 0 && stat->nodesizes[max-1] == 0)
257919baf839SRobert Olsson 		max--;
258019baf839SRobert Olsson 
2581cb7b593cSStephen Hemminger 	pointers = 0;
2582f585a991SJerry Snitselaar 	for (i = 1; i < max; i++)
258319baf839SRobert Olsson 		if (stat->nodesizes[i] != 0) {
2584187b5188SStephen Hemminger 			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
258519baf839SRobert Olsson 			pointers += (1<<i) * stat->nodesizes[i];
258619baf839SRobert Olsson 		}
2587cb7b593cSStephen Hemminger 	seq_putc(seq, '\n');
2588187b5188SStephen Hemminger 	seq_printf(seq, "\tPointers: %u\n", pointers);
2589cb7b593cSStephen Hemminger 
259035c6edacSAlexander Duyck 	bytes += sizeof(struct key_vector *) * pointers;
2591187b5188SStephen Hemminger 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2592187b5188SStephen Hemminger 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
259366a2f7fdSStephen Hemminger }
259419baf839SRobert Olsson 
259519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS
trie_show_usage(struct seq_file * seq,const struct trie_use_stats __percpu * stats)259666a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq,
25978274a97aSAlexander Duyck 			    const struct trie_use_stats __percpu *stats)
259866a2f7fdSStephen Hemminger {
25998274a97aSAlexander Duyck 	struct trie_use_stats s = { 0 };
26008274a97aSAlexander Duyck 	int cpu;
26018274a97aSAlexander Duyck 
26028274a97aSAlexander Duyck 	/* loop through all of the CPUs and gather up the stats */
26038274a97aSAlexander Duyck 	for_each_possible_cpu(cpu) {
26048274a97aSAlexander Duyck 		const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
26058274a97aSAlexander Duyck 
26068274a97aSAlexander Duyck 		s.gets += pcpu->gets;
26078274a97aSAlexander Duyck 		s.backtrack += pcpu->backtrack;
26088274a97aSAlexander Duyck 		s.semantic_match_passed += pcpu->semantic_match_passed;
26098274a97aSAlexander Duyck 		s.semantic_match_miss += pcpu->semantic_match_miss;
26108274a97aSAlexander Duyck 		s.null_node_hit += pcpu->null_node_hit;
26118274a97aSAlexander Duyck 		s.resize_node_skipped += pcpu->resize_node_skipped;
26128274a97aSAlexander Duyck 	}
26138274a97aSAlexander Duyck 
261466a2f7fdSStephen Hemminger 	seq_printf(seq, "\nCounters:\n---------\n");
26158274a97aSAlexander Duyck 	seq_printf(seq, "gets = %u\n", s.gets);
26168274a97aSAlexander Duyck 	seq_printf(seq, "backtracks = %u\n", s.backtrack);
2617a07f5f50SStephen Hemminger 	seq_printf(seq, "semantic match passed = %u\n",
26188274a97aSAlexander Duyck 		   s.semantic_match_passed);
26198274a97aSAlexander Duyck 	seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
26208274a97aSAlexander Duyck 	seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
26218274a97aSAlexander Duyck 	seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
262219baf839SRobert Olsson }
262366a2f7fdSStephen Hemminger #endif /*  CONFIG_IP_FIB_TRIE_STATS */
262466a2f7fdSStephen Hemminger 
fib_table_print(struct seq_file * seq,struct fib_table * tb)26253d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2626d717a9a6SStephen Hemminger {
26273d3b2d25SStephen Hemminger 	if (tb->tb_id == RT_TABLE_LOCAL)
26283d3b2d25SStephen Hemminger 		seq_puts(seq, "Local:\n");
26293d3b2d25SStephen Hemminger 	else if (tb->tb_id == RT_TABLE_MAIN)
26303d3b2d25SStephen Hemminger 		seq_puts(seq, "Main:\n");
26313d3b2d25SStephen Hemminger 	else
26323d3b2d25SStephen Hemminger 		seq_printf(seq, "Id %d:\n", tb->tb_id);
2633d717a9a6SStephen Hemminger }
263419baf839SRobert Olsson 
26353d3b2d25SStephen Hemminger 
fib_triestat_seq_show(struct seq_file * seq,void * v)263619baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v)
263719baf839SRobert Olsson {
26382e47eeceSYu Zhe 	struct net *net = seq->private;
26393d3b2d25SStephen Hemminger 	unsigned int h;
2640877a9bffSEric W. Biederman 
2641d717a9a6SStephen Hemminger 	seq_printf(seq,
2642a07f5f50SStephen Hemminger 		   "Basic info: size of leaf:"
26435b5e0928SAlexey Dobriyan 		   " %zd bytes, size of tnode: %zd bytes.\n",
264441b489fdSAlexander Duyck 		   LEAF_SIZE, TNODE_SIZE(0));
264519baf839SRobert Olsson 
2646fbe4e0c1SQian Cai 	rcu_read_lock();
26473d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
26483d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
26493d3b2d25SStephen Hemminger 		struct fib_table *tb;
2650cb7b593cSStephen Hemminger 
2651b67bfe0dSSasha Levin 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
26523d3b2d25SStephen Hemminger 			struct trie *t = (struct trie *) tb->tb_data;
26533d3b2d25SStephen Hemminger 			struct trie_stat stat;
26543d3b2d25SStephen Hemminger 
26553d3b2d25SStephen Hemminger 			if (!t)
26563d3b2d25SStephen Hemminger 				continue;
26573d3b2d25SStephen Hemminger 
26583d3b2d25SStephen Hemminger 			fib_table_print(seq, tb);
26593d3b2d25SStephen Hemminger 
26603d3b2d25SStephen Hemminger 			trie_collect_stats(t, &stat);
26613d3b2d25SStephen Hemminger 			trie_show_stats(seq, &stat);
26623d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS
26638274a97aSAlexander Duyck 			trie_show_usage(seq, t->stats);
26643d3b2d25SStephen Hemminger #endif
26653d3b2d25SStephen Hemminger 		}
2666fbe4e0c1SQian Cai 		cond_resched_rcu();
26673d3b2d25SStephen Hemminger 	}
2668fbe4e0c1SQian Cai 	rcu_read_unlock();
2669cb7b593cSStephen Hemminger 
267019baf839SRobert Olsson 	return 0;
267119baf839SRobert Olsson }
267219baf839SRobert Olsson 
fib_trie_get_idx(struct seq_file * seq,loff_t pos)267335c6edacSAlexander Duyck static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
267419baf839SRobert Olsson {
26751218854aSYOSHIFUJI Hideaki 	struct fib_trie_iter *iter = seq->private;
26761218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
2677cb7b593cSStephen Hemminger 	loff_t idx = 0;
26783d3b2d25SStephen Hemminger 	unsigned int h;
26793d3b2d25SStephen Hemminger 
26803d3b2d25SStephen Hemminger 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
26813d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
26823d3b2d25SStephen Hemminger 		struct fib_table *tb;
26833d3b2d25SStephen Hemminger 
2684b67bfe0dSSasha Levin 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
268535c6edacSAlexander Duyck 			struct key_vector *n;
2686cb7b593cSStephen Hemminger 
26873d3b2d25SStephen Hemminger 			for (n = fib_trie_get_first(iter,
26883d3b2d25SStephen Hemminger 						    (struct trie *) tb->tb_data);
26893d3b2d25SStephen Hemminger 			     n; n = fib_trie_get_next(iter))
26903d3b2d25SStephen Hemminger 				if (pos == idx++) {
26913d3b2d25SStephen Hemminger 					iter->tb = tb;
2692cb7b593cSStephen Hemminger 					return n;
269319baf839SRobert Olsson 				}
26943d3b2d25SStephen Hemminger 		}
26953d3b2d25SStephen Hemminger 	}
269619baf839SRobert Olsson 
269719baf839SRobert Olsson 	return NULL;
269819baf839SRobert Olsson }
269919baf839SRobert Olsson 
fib_trie_seq_start(struct seq_file * seq,loff_t * pos)270019baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2701c95aaf9aSStephen Hemminger 	__acquires(RCU)
270219baf839SRobert Olsson {
2703cb7b593cSStephen Hemminger 	rcu_read_lock();
27041218854aSYOSHIFUJI Hideaki 	return fib_trie_get_idx(seq, *pos);
270519baf839SRobert Olsson }
270619baf839SRobert Olsson 
fib_trie_seq_next(struct seq_file * seq,void * v,loff_t * pos)270719baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
270819baf839SRobert Olsson {
2709cb7b593cSStephen Hemminger 	struct fib_trie_iter *iter = seq->private;
27101218854aSYOSHIFUJI Hideaki 	struct net *net = seq_file_net(seq);
27113d3b2d25SStephen Hemminger 	struct fib_table *tb = iter->tb;
27123d3b2d25SStephen Hemminger 	struct hlist_node *tb_node;
27133d3b2d25SStephen Hemminger 	unsigned int h;
271435c6edacSAlexander Duyck 	struct key_vector *n;
2715cb7b593cSStephen Hemminger 
271619baf839SRobert Olsson 	++*pos;
27173d3b2d25SStephen Hemminger 	/* next node in same table */
27183d3b2d25SStephen Hemminger 	n = fib_trie_get_next(iter);
27193d3b2d25SStephen Hemminger 	if (n)
27203d3b2d25SStephen Hemminger 		return n;
272191b9a277SOlof Johansson 
27223d3b2d25SStephen Hemminger 	/* walk rest of this hash chain */
27233d3b2d25SStephen Hemminger 	h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
27240a5c0475SEric Dumazet 	while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
27253d3b2d25SStephen Hemminger 		tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
27263d3b2d25SStephen Hemminger 		n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
27273d3b2d25SStephen Hemminger 		if (n)
27283d3b2d25SStephen Hemminger 			goto found;
27293d3b2d25SStephen Hemminger 	}
2730cb7b593cSStephen Hemminger 
27313d3b2d25SStephen Hemminger 	/* new hash chain */
27323d3b2d25SStephen Hemminger 	while (++h < FIB_TABLE_HASHSZ) {
27333d3b2d25SStephen Hemminger 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2734b67bfe0dSSasha Levin 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
27353d3b2d25SStephen Hemminger 			n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
27363d3b2d25SStephen Hemminger 			if (n)
27373d3b2d25SStephen Hemminger 				goto found;
27383d3b2d25SStephen Hemminger 		}
27393d3b2d25SStephen Hemminger 	}
2740cb7b593cSStephen Hemminger 	return NULL;
27413d3b2d25SStephen Hemminger 
27423d3b2d25SStephen Hemminger found:
27433d3b2d25SStephen Hemminger 	iter->tb = tb;
27443d3b2d25SStephen Hemminger 	return n;
274519baf839SRobert Olsson }
274619baf839SRobert Olsson 
fib_trie_seq_stop(struct seq_file * seq,void * v)274719baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2748c95aaf9aSStephen Hemminger 	__releases(RCU)
274919baf839SRobert Olsson {
2750cb7b593cSStephen Hemminger 	rcu_read_unlock();
275119baf839SRobert Olsson }
275219baf839SRobert Olsson 
seq_indent(struct seq_file * seq,int n)2753cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n)
2754cb7b593cSStephen Hemminger {
2755a034ee3cSEric Dumazet 	while (n-- > 0)
2756a034ee3cSEric Dumazet 		seq_puts(seq, "   ");
2757cb7b593cSStephen Hemminger }
275819baf839SRobert Olsson 
rtn_scope(char * buf,size_t len,enum rt_scope_t s)275928d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2760cb7b593cSStephen Hemminger {
2761cb7b593cSStephen Hemminger 	switch (s) {
2762cb7b593cSStephen Hemminger 	case RT_SCOPE_UNIVERSE: return "universe";
2763cb7b593cSStephen Hemminger 	case RT_SCOPE_SITE:	return "site";
2764cb7b593cSStephen Hemminger 	case RT_SCOPE_LINK:	return "link";
2765cb7b593cSStephen Hemminger 	case RT_SCOPE_HOST:	return "host";
2766cb7b593cSStephen Hemminger 	case RT_SCOPE_NOWHERE:	return "nowhere";
2767cb7b593cSStephen Hemminger 	default:
276828d36e37SEric Dumazet 		snprintf(buf, len, "scope=%d", s);
2769cb7b593cSStephen Hemminger 		return buf;
2770cb7b593cSStephen Hemminger 	}
2771cb7b593cSStephen Hemminger }
2772cb7b593cSStephen Hemminger 
277336cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = {
2774cb7b593cSStephen Hemminger 	[RTN_UNSPEC] = "UNSPEC",
2775cb7b593cSStephen Hemminger 	[RTN_UNICAST] = "UNICAST",
2776cb7b593cSStephen Hemminger 	[RTN_LOCAL] = "LOCAL",
2777cb7b593cSStephen Hemminger 	[RTN_BROADCAST] = "BROADCAST",
2778cb7b593cSStephen Hemminger 	[RTN_ANYCAST] = "ANYCAST",
2779cb7b593cSStephen Hemminger 	[RTN_MULTICAST] = "MULTICAST",
2780cb7b593cSStephen Hemminger 	[RTN_BLACKHOLE] = "BLACKHOLE",
2781cb7b593cSStephen Hemminger 	[RTN_UNREACHABLE] = "UNREACHABLE",
2782cb7b593cSStephen Hemminger 	[RTN_PROHIBIT] = "PROHIBIT",
2783cb7b593cSStephen Hemminger 	[RTN_THROW] = "THROW",
2784cb7b593cSStephen Hemminger 	[RTN_NAT] = "NAT",
2785cb7b593cSStephen Hemminger 	[RTN_XRESOLVE] = "XRESOLVE",
2786cb7b593cSStephen Hemminger };
2787cb7b593cSStephen Hemminger 
rtn_type(char * buf,size_t len,unsigned int t)2788a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2789cb7b593cSStephen Hemminger {
2790cb7b593cSStephen Hemminger 	if (t < __RTN_MAX && rtn_type_names[t])
2791cb7b593cSStephen Hemminger 		return rtn_type_names[t];
279228d36e37SEric Dumazet 	snprintf(buf, len, "type %u", t);
2793cb7b593cSStephen Hemminger 	return buf;
2794cb7b593cSStephen Hemminger }
2795cb7b593cSStephen Hemminger 
2796cb7b593cSStephen Hemminger /* Pretty print the trie */
fib_trie_seq_show(struct seq_file * seq,void * v)279719baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v)
279819baf839SRobert Olsson {
2799cb7b593cSStephen Hemminger 	const struct fib_trie_iter *iter = seq->private;
280035c6edacSAlexander Duyck 	struct key_vector *n = v;
280119baf839SRobert Olsson 
280288bae714SAlexander Duyck 	if (IS_TRIE(node_parent_rcu(n)))
28033d3b2d25SStephen Hemminger 		fib_table_print(seq, iter->tb);
2804095b8501SRobert Olsson 
2805095b8501SRobert Olsson 	if (IS_TNODE(n)) {
2806adaf9816SAlexander Duyck 		__be32 prf = htonl(n->key);
2807095b8501SRobert Olsson 
28081d25cd6cSRobert Olsson 		seq_indent(seq, iter->depth-1);
2809e9b44019SAlexander Duyck 		seq_printf(seq, "  +-- %pI4/%zu %u %u %u\n",
2810e9b44019SAlexander Duyck 			   &prf, KEYLENGTH - n->pos - n->bits, n->bits,
28116e22d174SAlexander Duyck 			   tn_info(n)->full_children,
28126e22d174SAlexander Duyck 			   tn_info(n)->empty_children);
2813cb7b593cSStephen Hemminger 	} else {
2814adaf9816SAlexander Duyck 		__be32 val = htonl(n->key);
281579e5ad2cSAlexander Duyck 		struct fib_alias *fa;
2816cb7b593cSStephen Hemminger 
2817cb7b593cSStephen Hemminger 		seq_indent(seq, iter->depth);
2818673d57e7SHarvey Harrison 		seq_printf(seq, "  |-- %pI4\n", &val);
281928d36e37SEric Dumazet 
282079e5ad2cSAlexander Duyck 		hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
282128d36e37SEric Dumazet 			char buf1[32], buf2[32];
282228d36e37SEric Dumazet 
2823cb7b593cSStephen Hemminger 			seq_indent(seq, iter->depth + 1);
28245786ec60SAlexander Duyck 			seq_printf(seq, "  /%zu %s %s",
28259b6ebad5SAlexander Duyck 				   KEYLENGTH - fa->fa_slen,
282628d36e37SEric Dumazet 				   rtn_scope(buf1, sizeof(buf1),
282737e826c5SDavid S. Miller 					     fa->fa_info->fib_scope),
282828d36e37SEric Dumazet 				   rtn_type(buf2, sizeof(buf2),
282928d36e37SEric Dumazet 					    fa->fa_type));
283032ccf110SGuillaume Nault 			if (fa->fa_dscp)
283132ccf110SGuillaume Nault 				seq_printf(seq, " tos=%d",
283232ccf110SGuillaume Nault 					   inet_dscp_to_dsfield(fa->fa_dscp));
2833cb7b593cSStephen Hemminger 			seq_putc(seq, '\n');
2834cb7b593cSStephen Hemminger 		}
2835cb7b593cSStephen Hemminger 	}
283619baf839SRobert Olsson 
283719baf839SRobert Olsson 	return 0;
283819baf839SRobert Olsson }
283919baf839SRobert Olsson 
2840f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = {
284119baf839SRobert Olsson 	.start  = fib_trie_seq_start,
284219baf839SRobert Olsson 	.next   = fib_trie_seq_next,
284319baf839SRobert Olsson 	.stop   = fib_trie_seq_stop,
284419baf839SRobert Olsson 	.show   = fib_trie_seq_show,
284519baf839SRobert Olsson };
284619baf839SRobert Olsson 
28478315f5d8SStephen Hemminger struct fib_route_iter {
28488315f5d8SStephen Hemminger 	struct seq_net_private p;
28498be33e95SAlexander Duyck 	struct fib_table *main_tb;
285035c6edacSAlexander Duyck 	struct key_vector *tnode;
28518315f5d8SStephen Hemminger 	loff_t	pos;
28528315f5d8SStephen Hemminger 	t_key	key;
28538315f5d8SStephen Hemminger };
28548315f5d8SStephen Hemminger 
fib_route_get_idx(struct fib_route_iter * iter,loff_t pos)285535c6edacSAlexander Duyck static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
285635c6edacSAlexander Duyck 					    loff_t pos)
28578315f5d8SStephen Hemminger {
285835c6edacSAlexander Duyck 	struct key_vector *l, **tp = &iter->tnode;
28598be33e95SAlexander Duyck 	t_key key;
28608315f5d8SStephen Hemminger 
2861fd0285a3SAlexander Duyck 	/* use cached location of previously found key */
28628be33e95SAlexander Duyck 	if (iter->pos > 0 && pos >= iter->pos) {
28638be33e95SAlexander Duyck 		key = iter->key;
28648be33e95SAlexander Duyck 	} else {
2865fd0285a3SAlexander Duyck 		iter->pos = 1;
28668be33e95SAlexander Duyck 		key = 0;
28678315f5d8SStephen Hemminger 	}
28688315f5d8SStephen Hemminger 
2869fd0285a3SAlexander Duyck 	pos -= iter->pos;
2870fd0285a3SAlexander Duyck 
2871fd0285a3SAlexander Duyck 	while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
28728be33e95SAlexander Duyck 		key = l->key + 1;
28738315f5d8SStephen Hemminger 		iter->pos++;
28748be33e95SAlexander Duyck 		l = NULL;
28758be33e95SAlexander Duyck 
28768be33e95SAlexander Duyck 		/* handle unlikely case of a key wrap */
28778be33e95SAlexander Duyck 		if (!key)
28788be33e95SAlexander Duyck 			break;
28798315f5d8SStephen Hemminger 	}
28808315f5d8SStephen Hemminger 
28818315f5d8SStephen Hemminger 	if (l)
2882fd0285a3SAlexander Duyck 		iter->key = l->key;	/* remember it */
28838315f5d8SStephen Hemminger 	else
28848315f5d8SStephen Hemminger 		iter->pos = 0;		/* forget it */
28858315f5d8SStephen Hemminger 
28868315f5d8SStephen Hemminger 	return l;
28878315f5d8SStephen Hemminger }
28888315f5d8SStephen Hemminger 
fib_route_seq_start(struct seq_file * seq,loff_t * pos)28898315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
28908315f5d8SStephen Hemminger 	__acquires(RCU)
28918315f5d8SStephen Hemminger {
28928315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
28938315f5d8SStephen Hemminger 	struct fib_table *tb;
28948be33e95SAlexander Duyck 	struct trie *t;
28958315f5d8SStephen Hemminger 
28968315f5d8SStephen Hemminger 	rcu_read_lock();
28978be33e95SAlexander Duyck 
28981218854aSYOSHIFUJI Hideaki 	tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
28998315f5d8SStephen Hemminger 	if (!tb)
29008315f5d8SStephen Hemminger 		return NULL;
29018315f5d8SStephen Hemminger 
29028be33e95SAlexander Duyck 	iter->main_tb = tb;
290394d9f1c5SDavid Forster 	t = (struct trie *)tb->tb_data;
290494d9f1c5SDavid Forster 	iter->tnode = t->kv;
29058be33e95SAlexander Duyck 
29068be33e95SAlexander Duyck 	if (*pos != 0)
29078be33e95SAlexander Duyck 		return fib_route_get_idx(iter, *pos);
29088be33e95SAlexander Duyck 
29098be33e95SAlexander Duyck 	iter->pos = 0;
2910fd0285a3SAlexander Duyck 	iter->key = KEY_MAX;
29118be33e95SAlexander Duyck 
29128315f5d8SStephen Hemminger 	return SEQ_START_TOKEN;
29138315f5d8SStephen Hemminger }
29148315f5d8SStephen Hemminger 
fib_route_seq_next(struct seq_file * seq,void * v,loff_t * pos)29158315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
29168315f5d8SStephen Hemminger {
29178315f5d8SStephen Hemminger 	struct fib_route_iter *iter = seq->private;
291835c6edacSAlexander Duyck 	struct key_vector *l = NULL;
2919fd0285a3SAlexander Duyck 	t_key key = iter->key + 1;
29208315f5d8SStephen Hemminger 
29218315f5d8SStephen Hemminger 	++*pos;
29228be33e95SAlexander Duyck 
29238be33e95SAlexander Duyck 	/* only allow key of 0 for start of sequence */
29248be33e95SAlexander Duyck 	if ((v == SEQ_START_TOKEN) || key)
29258be33e95SAlexander Duyck 		l = leaf_walk_rcu(&iter->tnode, key);
29268be33e95SAlexander Duyck 
29278be33e95SAlexander Duyck 	if (l) {
2928fd0285a3SAlexander Duyck 		iter->key = l->key;
29298315f5d8SStephen Hemminger 		iter->pos++;
29308be33e95SAlexander Duyck 	} else {
29318be33e95SAlexander Duyck 		iter->pos = 0;
29328315f5d8SStephen Hemminger 	}
29338315f5d8SStephen Hemminger 
29348315f5d8SStephen Hemminger 	return l;
29358315f5d8SStephen Hemminger }
29368315f5d8SStephen Hemminger 
fib_route_seq_stop(struct seq_file * seq,void * v)29378315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v)
29388315f5d8SStephen Hemminger 	__releases(RCU)
29398315f5d8SStephen Hemminger {
29408315f5d8SStephen Hemminger 	rcu_read_unlock();
29418315f5d8SStephen Hemminger }
29428315f5d8SStephen Hemminger 
fib_flag_trans(int type,__be32 mask,struct fib_info * fi)29435481d73fSDavid Ahern static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
2944cb7b593cSStephen Hemminger {
2945a034ee3cSEric Dumazet 	unsigned int flags = 0;
2946cb7b593cSStephen Hemminger 
2947a034ee3cSEric Dumazet 	if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2948a034ee3cSEric Dumazet 		flags = RTF_REJECT;
29495481d73fSDavid Ahern 	if (fi) {
2950dcb1ecb5SDavid Ahern 		const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
29515481d73fSDavid Ahern 
2952dcb1ecb5SDavid Ahern 		if (nhc->nhc_gw.ipv4)
2953cb7b593cSStephen Hemminger 			flags |= RTF_GATEWAY;
29545481d73fSDavid Ahern 	}
295532ab5f80SAl Viro 	if (mask == htonl(0xFFFFFFFF))
2956cb7b593cSStephen Hemminger 		flags |= RTF_HOST;
2957cb7b593cSStephen Hemminger 	flags |= RTF_UP;
2958cb7b593cSStephen Hemminger 	return flags;
2959cb7b593cSStephen Hemminger }
2960cb7b593cSStephen Hemminger 
2961cb7b593cSStephen Hemminger /*
2962cb7b593cSStephen Hemminger  *	This outputs /proc/net/route.
2963cb7b593cSStephen Hemminger  *	The format of the file is not supposed to be changed
2964cb7b593cSStephen Hemminger  *	and needs to be same as fib_hash output to avoid breaking
2965cb7b593cSStephen Hemminger  *	legacy utilities
2966cb7b593cSStephen Hemminger  */
fib_route_seq_show(struct seq_file * seq,void * v)2967cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v)
2968cb7b593cSStephen Hemminger {
2969654eff45SAlexander Duyck 	struct fib_route_iter *iter = seq->private;
2970654eff45SAlexander Duyck 	struct fib_table *tb = iter->main_tb;
297179e5ad2cSAlexander Duyck 	struct fib_alias *fa;
297235c6edacSAlexander Duyck 	struct key_vector *l = v;
29739b6ebad5SAlexander Duyck 	__be32 prefix;
2974cb7b593cSStephen Hemminger 
2975cb7b593cSStephen Hemminger 	if (v == SEQ_START_TOKEN) {
2976cb7b593cSStephen Hemminger 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2977cb7b593cSStephen Hemminger 			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2978cb7b593cSStephen Hemminger 			   "\tWindow\tIRTT");
2979cb7b593cSStephen Hemminger 		return 0;
2980cb7b593cSStephen Hemminger 	}
2981cb7b593cSStephen Hemminger 
29829b6ebad5SAlexander Duyck 	prefix = htonl(l->key);
29839b6ebad5SAlexander Duyck 
298479e5ad2cSAlexander Duyck 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
29855481d73fSDavid Ahern 		struct fib_info *fi = fa->fa_info;
29869b6ebad5SAlexander Duyck 		__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2987a034ee3cSEric Dumazet 		unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2988cb7b593cSStephen Hemminger 
298979e5ad2cSAlexander Duyck 		if ((fa->fa_type == RTN_BROADCAST) ||
299079e5ad2cSAlexander Duyck 		    (fa->fa_type == RTN_MULTICAST))
2991cb7b593cSStephen Hemminger 			continue;
2992cb7b593cSStephen Hemminger 
2993654eff45SAlexander Duyck 		if (fa->tb_id != tb->tb_id)
2994654eff45SAlexander Duyck 			continue;
2995654eff45SAlexander Duyck 
2996652586dfSTetsuo Handa 		seq_setwidth(seq, 127);
2997652586dfSTetsuo Handa 
29985481d73fSDavid Ahern 		if (fi) {
2999dcb1ecb5SDavid Ahern 			struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
3000dcb1ecb5SDavid Ahern 			__be32 gw = 0;
3001dcb1ecb5SDavid Ahern 
3002dcb1ecb5SDavid Ahern 			if (nhc->nhc_gw_family == AF_INET)
3003dcb1ecb5SDavid Ahern 				gw = nhc->nhc_gw.ipv4;
30045481d73fSDavid Ahern 
30055e659e4cSPavel Emelyanov 			seq_printf(seq,
30065e659e4cSPavel Emelyanov 				   "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
3007652586dfSTetsuo Handa 				   "%d\t%08X\t%d\t%u\t%u",
3008dcb1ecb5SDavid Ahern 				   nhc->nhc_dev ? nhc->nhc_dev->name : "*",
3009dcb1ecb5SDavid Ahern 				   prefix, gw, flags, 0, 0,
3010cb7b593cSStephen Hemminger 				   fi->fib_priority,
3011cb7b593cSStephen Hemminger 				   mask,
3012a07f5f50SStephen Hemminger 				   (fi->fib_advmss ?
3013a07f5f50SStephen Hemminger 				    fi->fib_advmss + 40 : 0),
3014cb7b593cSStephen Hemminger 				   fi->fib_window,
3015652586dfSTetsuo Handa 				   fi->fib_rtt >> 3);
30165481d73fSDavid Ahern 		} else {
30175e659e4cSPavel Emelyanov 			seq_printf(seq,
30185e659e4cSPavel Emelyanov 				   "*\t%08X\t%08X\t%04X\t%d\t%u\t"
3019652586dfSTetsuo Handa 				   "%d\t%08X\t%d\t%u\t%u",
3020cb7b593cSStephen Hemminger 				   prefix, 0, flags, 0, 0, 0,
3021652586dfSTetsuo Handa 				   mask, 0, 0, 0);
30225481d73fSDavid Ahern 		}
3023652586dfSTetsuo Handa 		seq_pad(seq, '\n');
3024cb7b593cSStephen Hemminger 	}
3025cb7b593cSStephen Hemminger 
3026cb7b593cSStephen Hemminger 	return 0;
3027cb7b593cSStephen Hemminger }
3028cb7b593cSStephen Hemminger 
3029f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = {
30308315f5d8SStephen Hemminger 	.start  = fib_route_seq_start,
30318315f5d8SStephen Hemminger 	.next   = fib_route_seq_next,
30328315f5d8SStephen Hemminger 	.stop   = fib_route_seq_stop,
3033cb7b593cSStephen Hemminger 	.show   = fib_route_seq_show,
3034cb7b593cSStephen Hemminger };
3035cb7b593cSStephen Hemminger 
fib_proc_init(struct net * net)303661a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net)
303719baf839SRobert Olsson {
3038c3506372SChristoph Hellwig 	if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
3039c3506372SChristoph Hellwig 			sizeof(struct fib_trie_iter)))
3040cb7b593cSStephen Hemminger 		goto out1;
3041cb7b593cSStephen Hemminger 
30423617d949SChristoph Hellwig 	if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
30433617d949SChristoph Hellwig 			fib_triestat_seq_show, NULL))
3044cb7b593cSStephen Hemminger 		goto out2;
3045cb7b593cSStephen Hemminger 
3046c3506372SChristoph Hellwig 	if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
3047c3506372SChristoph Hellwig 			sizeof(struct fib_route_iter)))
3048cb7b593cSStephen Hemminger 		goto out3;
3049cb7b593cSStephen Hemminger 
305019baf839SRobert Olsson 	return 0;
3051cb7b593cSStephen Hemminger 
3052cb7b593cSStephen Hemminger out3:
3053ece31ffdSGao feng 	remove_proc_entry("fib_triestat", net->proc_net);
3054cb7b593cSStephen Hemminger out2:
3055ece31ffdSGao feng 	remove_proc_entry("fib_trie", net->proc_net);
3056cb7b593cSStephen Hemminger out1:
3057cb7b593cSStephen Hemminger 	return -ENOMEM;
305819baf839SRobert Olsson }
305919baf839SRobert Olsson 
fib_proc_exit(struct net * net)306061a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net)
306119baf839SRobert Olsson {
3062ece31ffdSGao feng 	remove_proc_entry("fib_trie", net->proc_net);
3063ece31ffdSGao feng 	remove_proc_entry("fib_triestat", net->proc_net);
3064ece31ffdSGao feng 	remove_proc_entry("route", net->proc_net);
306519baf839SRobert Olsson }
306619baf839SRobert Olsson 
306719baf839SRobert Olsson #endif /* CONFIG_PROC_FS */
3068